1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_fei_hevc_g9_skl.cpp
24 //! \brief    HEVC FEI dual-pipe encoder for GEN9 SKL.
25 //!
26 
27 #include "codechal_fei_hevc_g9_skl.h"
28 #include "igcodeckrn_g9.h"
29 #include "codeckrnheader.h"
30 
31 #define GPUMMU_WA_PADDING                               (64 * 1024)
32 
33 //! HEVC encoder kernel header structure for G9 SKL
34 struct CODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL
35 {
36     int nKernelCount;                                                       //!< Total number of kernels
37 
38     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_2xDownSampling_Kernel;             //!< 2x down sampling kernel
39     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_32x32_PU_ModeDecision_Kernel;      //!< Intra 32x32 PU mode decision kernel
40     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_16x16_PU_SADComputation_Kernel;    //!< Intra 16x16 PU SAD computation kernel
41     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_16x16_PU_ModeDecision_Kernel;      //!< Intra 16x16 PU mode decision kernel
42     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_8x8_PU_Kernel;                     //!< Intra 8x8 PU mode decision kernel
43     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_8x8_PU_FMode_Kernel;               //!< Intra 8x8 PU final mode decision kernel
44     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_PB_32x32_PU_IntraCheck;              //!< P/B 32x32 PU intra mode check kernel
45     CODECHAL_KERNEL_HEADER HEVC_FEI_LCUEnc_PB_MB;                               //!< P/B MbEnc Kernel
46     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_DS4HME;                            //!< 4x Scaling kernel
47     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_P_HME;                               //!< P frame HME kernel
48     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_B_HME;                               //!< B frame HME kernel
49     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_COARSE;                            //!< Intra coarse kernel
50     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_PB_Pak;                              //!< P/B frame PAK kernel
51     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_BRC_Blockcopy;                       //!< BRC blockcopy kerenel
52     CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_DS_Combined;                         //!< Down scale and format conversion kernel for 10 bit for KBL
53     CODECHAL_KERNEL_HEADER HEVC_FEI_LCUEnc_P_MB;                                //!< P frame MbEnc kernel
54 };
55 
56 using PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL = struct CODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL*;
57 
58 //! HEVC encoder FEI intra 8x8 PU final mode decision kernel curbe for GEN9
59 struct CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9
60 {
61     union {
62         struct {
63             uint32_t       FrameWidth                   : MOS_BITFIELD_RANGE(0, 15);
64             uint32_t       FrameHeight                  : MOS_BITFIELD_RANGE(16, 31);
65         };
66         uint32_t Value;
67     } DW0;
68 
69     union {
70         struct {
71             uint32_t       SliceType                    : MOS_BITFIELD_RANGE(0, 1);
72             uint32_t       PuType                       : MOS_BITFIELD_RANGE(2, 3);
73             uint32_t       PakReordingFlag              : MOS_BITFIELD_BIT(4);
74             uint32_t       ReservedMBZ                  : MOS_BITFIELD_BIT(5);
75             uint32_t       LCUType                      : MOS_BITFIELD_BIT(6);
76             uint32_t       ScreenContentFlag            : MOS_BITFIELD_BIT(7);
77             uint32_t       IntraRefreshEn               : MOS_BITFIELD_RANGE(8, 9);
78             uint32_t       EnableRollingIntra           : MOS_BITFIELD_BIT(10);
79             uint32_t       HalfUpdateMixedLCU           : MOS_BITFIELD_BIT(11);
80             uint32_t       Reserved_12_23               : MOS_BITFIELD_RANGE(12, 23);
81             uint32_t       EnableIntraEarlyExit         : MOS_BITFIELD_BIT(24);
82             uint32_t       BRCEnable                    : MOS_BITFIELD_BIT(25);
83             uint32_t       LCUBRCEnable                 : MOS_BITFIELD_BIT(26);
84             uint32_t       ROIEnable                    : MOS_BITFIELD_BIT(27);
85             uint32_t       FASTSurveillanceFlag         : MOS_BITFIELD_BIT(28);
86             uint32_t       EnableFlexibleParam          : MOS_BITFIELD_BIT(29);
87             uint32_t       EnableQualityImprovement     : MOS_BITFIELD_BIT(30);
88             uint32_t       EnableDebugDump              : MOS_BITFIELD_BIT(31);
89         };
90         uint32_t Value;
91     } DW1;
92 
93     union {
94         struct {
95             uint32_t       LambdaForLuma;
96         };
97         uint32_t Value;
98     } DW2;
99 
100     union {
101         // For inter frame or enable statictics data dump
102         struct {
103             uint32_t       LambdaForDistCalculation;
104         };
105         uint32_t Value;
106     } DW3;
107 
108     union {
109         struct {
110             uint32_t       ModeCostFor8x8PU_TU8;
111         };
112         uint32_t Value;
113     } DW4;
114 
115     union {
116         struct {
117             uint32_t       ModeCostFor8x8PU_TU4;
118         };
119         uint32_t Value;
120     } DW5;
121 
122     union {
123         struct {
124             uint32_t       SATD16x16PuThreshold         : MOS_BITFIELD_RANGE(0, 15);
125             uint32_t       BiasFactorToward8x8          : MOS_BITFIELD_RANGE(16, 31);
126         };
127         uint32_t Value;
128     } DW6;
129 
130     union {
131         struct {
132             uint32_t       Qp                           : MOS_BITFIELD_RANGE(0, 15);
133             uint32_t       QpForInter                   : MOS_BITFIELD_RANGE(16, 31);
134         };
135         uint32_t Value;
136     } DW7;
137 
138     union {
139         struct {
140             uint32_t       SimplifiedFlagForInter       : MOS_BITFIELD_BIT(0);
141             uint32_t       EnableStatsDataDump          : MOS_BITFIELD_BIT(1);
142             uint32_t       Reserved_2_7                 : MOS_BITFIELD_RANGE(2, 7);
143             uint32_t       KBLControlFlag               : MOS_BITFIELD_BIT(8);
144             uint32_t       Reserved_9_31                : MOS_BITFIELD_RANGE(9, 31);
145         };
146         uint32_t Value;
147     } DW8;
148 
149     union {
150         struct {
151             uint32_t       IntraRefreshMBNum            : MOS_BITFIELD_RANGE(0, 15);
152             uint32_t       IntraRefreshUnitInMB         : MOS_BITFIELD_RANGE(16, 23);
153             uint32_t       IntraRefreshQPDelta          : MOS_BITFIELD_RANGE(24, 31);
154         };
155         uint32_t Value;
156     } DW9;
157 
158     union {
159         struct {
160             uint32_t       Reserved;
161         };
162         uint32_t Value;
163     } DW10;
164 
165     union {
166         struct {
167             uint32_t       Reserved;
168         };
169         uint32_t Value;
170     } DW11;
171 
172     union {
173         struct {
174             uint32_t       Reserved;
175         };
176         uint32_t Value;
177     } DW12;
178 
179     union {
180         struct {
181             uint32_t       Reserved;
182         };
183         uint32_t Value;
184     } DW13;
185 
186     union {
187         struct {
188             uint32_t       Reserved;
189         };
190         uint32_t Value;
191     } DW14;
192 
193     union {
194         struct {
195             uint32_t       Reserved;
196         };
197         uint32_t Value;
198     } DW15;
199 
200     union {
201         struct {
202             uint32_t       BTI_PAK_Object;
203         };
204         uint32_t Value;
205     } DW16;
206 
207     union {
208         struct {
209             uint32_t       BTI_VME_8x8_Mode;
210         };
211         uint32_t Value;
212     } DW17;
213 
214     union {
215         struct {
216             uint32_t       BTI_Intra_Mode;
217         };
218         uint32_t Value;
219     } DW18;
220 
221     union {
222         struct {
223             uint32_t       BTI_PAK_Command;
224         };
225         uint32_t Value;
226     } DW19;
227 
228     union {
229         struct {
230             uint32_t       BTI_Slice_Map;
231         };
232         uint32_t Value;
233     } DW20;
234 
235     union {
236         struct {
237             uint32_t       BTI_IntraDist;
238         };
239         uint32_t Value;
240     } DW21;
241 
242     union {
243         struct {
244             uint32_t       BTI_BRC_Input;
245         };
246         uint32_t Value;
247     } DW22;
248 
249     union {
250         struct {
251             uint32_t       BTI_Simplest_Intra;
252         };
253         uint32_t Value;
254     } DW23;
255 
256     union {
257         struct {
258             uint32_t       BTI_LCU_Qp_Surface;
259         };
260         uint32_t Value;
261     } DW24;
262 
263     union {
264         struct {
265             uint32_t       BTI_BRC_Data;
266         };
267         uint32_t Value;
268     } DW25;
269 
270     union {
271         //Output (for inter and statictics data dump only)
272         struct {
273             uint32_t       BTI_Haar_Dist16x16;
274         };
275         uint32_t Value;
276     } DW26;
277 
278     union {
279         // This surface should take the statistics surface from Hevc_LCUEnc_I_32x32_PU_ModeDecision as input
280         struct {
281             uint32_t       BTI_Stats_Data;
282         };
283         uint32_t Value;
284     } DW27;
285 
286     union {
287         // Frame level Statistics data surface
288         struct {
289             uint32_t       BTI_Frame_Stats_Data;
290         };
291         uint32_t Value;
292     } DW28;
293 
294     union {
295         // Frame level CTB Distortion data surface
296         struct {
297             uint32_t       BTI_CTB_Distortion_Surface;
298         };
299         uint32_t Value;
300     } DW29;
301 
302     union {
303         struct {
304             uint32_t       BTI_Debug;
305         };
306         uint32_t Value;
307     } DW30;
308 };
309 
310 using PCODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 = struct CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9*;
311 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9)) == 31);
312 
313 //! HEVC encoder FEI B 32x32 PU intra check kernel curbe for GEN9
314 struct CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9
315 {
316     union {
317         struct {
318             uint32_t       FrameWidth                   : MOS_BITFIELD_RANGE(0, 15);
319             uint32_t       FrameHeight                  : MOS_BITFIELD_RANGE(16, 31);
320         };
321         uint32_t Value;
322     } DW0;
323 
324     union {
325         struct {
326             uint32_t       SliceType                    : MOS_BITFIELD_RANGE(0, 1);
327             uint32_t       Reserved                     : MOS_BITFIELD_RANGE(2, 7);
328             uint32_t       Log2MinTUSize                : MOS_BITFIELD_RANGE(8, 15);
329             uint32_t       Flags                        : MOS_BITFIELD_RANGE(16, 23);
330             uint32_t       EnableIntraEarlyExit         : MOS_BITFIELD_BIT(24);
331             uint32_t       HMEEnable                    : MOS_BITFIELD_BIT(25);
332             uint32_t       FASTSurveillanceFlag         : MOS_BITFIELD_BIT(26);
333             uint32_t       Res_27_30                    : MOS_BITFIELD_RANGE(27, 30);
334             uint32_t       EnableDebugDump              : MOS_BITFIELD_BIT(31);
335         };
336         uint32_t Value;
337     } DW1;
338 
339     union {
340         struct {
341             uint32_t       QpValue                      : MOS_BITFIELD_RANGE(0, 15);
342             uint32_t       QpMultiplier                 : MOS_BITFIELD_RANGE(16, 31);
343         };
344         uint32_t Value;
345     } DW2;
346 
347     union {
348         struct {
349             uint32_t       Reserved;
350         };
351         uint32_t Value;
352     } DW3;
353 
354     union {
355         struct {
356             uint32_t       Reserved;
357         };
358         uint32_t Value;
359     } DW4;
360 
361     union {
362         struct {
363             uint32_t       Reserved;
364         };
365         uint32_t Value;
366     } DW5;
367 
368     union {
369         struct {
370             uint32_t       Reserved;
371         };
372         uint32_t Value;
373     } DW6;
374 
375     union {
376         struct {
377             uint32_t       Reserved;
378         };
379         uint32_t Value;
380     } DW7;
381 
382     union {
383         struct {
384             uint32_t       BTI_Per32x32PuIntraCheck;
385         };
386         uint32_t Value;
387     } DW8;
388 
389     union {
390         struct {
391             uint32_t       BTI_Src_Y;
392         };
393         uint32_t Value;
394     } DW9;
395 
396     union {
397         struct {
398             uint32_t       BTI_Src_Y2X;
399         };
400         uint32_t Value;
401     } DW10;
402 
403     union {
404         struct {
405             uint32_t       BTI_Slice_Map;
406         };
407         uint32_t Value;
408     } DW11;
409 
410     union {
411         struct {
412             uint32_t       BTI_VME_Y2X;
413         };
414         uint32_t Value;
415     } DW12;
416 
417     union {
418         struct {
419             uint32_t       BTI_Simplest_Intra;   // output only
420         };
421         uint32_t Value;
422     } DW13;
423 
424     union {
425         struct {
426             uint32_t       BTI_HME_MVPred;
427         };
428         uint32_t Value;
429     } DW14;
430 
431     union {
432         struct {
433             uint32_t       BTI_HME_Dist;
434         };
435         uint32_t Value;
436     } DW15;
437 
438     union {
439         struct {
440             uint32_t       BTI_LCU_Skip;
441         };
442         uint32_t Value;
443     } DW16;
444 
445     union {
446         struct {
447             uint32_t       BTI_Debug;
448         };
449         uint32_t Value;
450     } DW17;
451 };
452 
453 using PCODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9;
454 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9)) == 18);
455 
456 //! HEVC encoder FEI B Pak kernel curbe for GEN9
457 struct CODECHAL_FEI_HEVC_B_PAK_CURBE_G9
458 {
459     union
460     {
461         struct
462         {
463             uint32_t   FrameWidth                       : MOS_BITFIELD_RANGE(0, 15);
464             uint32_t   FrameHeight                      : MOS_BITFIELD_RANGE(16, 31);
465         };
466         struct
467         {
468             uint32_t   Value;
469         };
470     } DW0;
471 
472     union
473     {
474         struct
475         {
476             uint32_t   Qp                               : MOS_BITFIELD_RANGE(0, 7);
477             uint32_t   Res_8_15                         : MOS_BITFIELD_RANGE(8, 15);
478             uint32_t   MaxVmvR                          : MOS_BITFIELD_RANGE(16, 31);
479         };
480         struct
481         {
482             uint32_t   Value;
483         };
484     } DW1;
485 
486     union
487     {
488         struct
489         {
490             uint32_t   SliceType                        : MOS_BITFIELD_RANGE(0, 1);
491             uint32_t   EnableWA                         : MOS_BITFIELD_BIT(     2);
492             uint32_t   Res_3_7                          : MOS_BITFIELD_RANGE(3, 7);
493             uint32_t   SimplestIntraEnable              : MOS_BITFIELD_BIT(8);
494             uint32_t   BrcEnable                        : MOS_BITFIELD_BIT(9);
495             uint32_t   LcuBrcEnable                     : MOS_BITFIELD_BIT(10);
496             uint32_t   ROIEnable                        : MOS_BITFIELD_BIT(11);
497             uint32_t   FASTSurveillanceFlag             : MOS_BITFIELD_BIT(12);
498             uint32_t   EnableRollingIntra               : MOS_BITFIELD_BIT(13);
499             uint32_t   Res_14                           : MOS_BITFIELD_BIT(14);
500             uint32_t   EnableQualityImprovement         : MOS_BITFIELD_BIT(15);
501             uint32_t   KBLControlFlag                   : MOS_BITFIELD_BIT(16);
502             uint32_t   Res_17_30                        : MOS_BITFIELD_RANGE(17, 30);
503             uint32_t   ScreenContent                    : MOS_BITFIELD_BIT(31);
504         };
505         struct
506         {
507             uint32_t   Value;
508         };
509     } DW2;
510 
511     union
512     {
513         struct
514         {
515             uint32_t   IntraRefreshMBNum                : MOS_BITFIELD_RANGE(0, 15);
516             uint32_t   IntraRefreshUnitInMB             : MOS_BITFIELD_RANGE(16, 23);
517             uint32_t   IntraRefreshQPDelta              : MOS_BITFIELD_RANGE(24, 31);
518         };
519         struct
520         {
521             uint32_t   Value;
522         };
523     } DW3;
524 
525     union
526     {
527         struct
528         {
529             uint32_t Reserved;
530         };
531         struct
532         {
533             uint32_t   Value;
534         };
535     } DW4_15[12];
536 
537     union
538     {
539         struct
540         {
541             uint32_t  BTI_CU_Record;
542         };
543         struct
544         {
545             uint32_t   Value;
546         };
547     } DW16;
548 
549     union
550     {
551         struct
552         {
553             uint32_t  BTI_PAK_Obj;
554         };
555         struct
556         {
557             uint32_t   Value;
558         };
559     } DW17;
560 
561     union
562     {
563         struct
564         {
565             uint32_t  BTI_Slice_Map;
566         };
567         struct
568         {
569             uint32_t   Value;
570         };
571     } DW18;
572 
573     union
574     {
575         struct
576         {
577             uint32_t  BTI_Brc_Input;
578         };
579         struct
580         {
581             uint32_t   Value;
582         };
583     } DW19;
584 
585     union
586     {
587         struct
588         {
589             uint32_t  BTI_LCU_Qp;
590         };
591         struct
592         {
593             uint32_t   Value;
594         };
595     } DW20;
596 
597     union
598     {
599         struct
600         {
601             uint32_t  BTI_Brc_Data;
602         };
603         struct
604         {
605             uint32_t   Value;
606         };
607     } DW21;
608 
609     union
610     {
611         struct
612         {
613             uint32_t  BTI_MB_Data;
614         };
615         struct
616         {
617             uint32_t   Value;
618         };
619     } DW22;
620 
621     union
622     {
623         struct
624         {
625             uint32_t  BTI_MVP_Surface;
626         };
627         struct
628         {
629             uint32_t   Value;
630         };
631     } DW23;
632 
633     union
634     {
635         struct
636         {
637             uint32_t  BTI_WA_PAK_Data;
638         };
639         struct
640         {
641             uint32_t   Value;
642         };
643     } DW24;
644 
645     union
646     {
647         struct
648         {
649             uint32_t  BTI_WA_PAK_Obj;
650         };
651         struct
652         {
653             uint32_t  Value;
654         };
655     } DW25;
656 
657     union
658     {
659         struct
660         {
661             uint32_t  BTI_Debug;
662         };
663         struct
664         {
665             uint32_t   Value;
666         };
667     } DW26;
668 
669 };
670 
671 using PCODECHAL_FEI_HEVC_B_PAK_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_PAK_CURBE_G9*;
672 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_PAK_CURBE_G9)) == 27);
673 
674 //! HEVC encoder B MBEnc kernel curbe for GEN9
675 struct CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9
676 {
677     // DW0
678     union
679     {
680         struct
681         {
682             uint32_t   SkipModeEn                       : MOS_BITFIELD_BIT(0);
683             uint32_t   AdaptiveEn                       : MOS_BITFIELD_BIT(1);
684             uint32_t   BiMixDis                         : MOS_BITFIELD_BIT(2);
685             uint32_t                                    : MOS_BITFIELD_RANGE(3, 4);
686             uint32_t   EarlyImeSuccessEn                : MOS_BITFIELD_BIT(5);
687             uint32_t                                    : MOS_BITFIELD_BIT(6);
688             uint32_t   T8x8FlagForInterEn               : MOS_BITFIELD_BIT(7);
689             uint32_t                                    : MOS_BITFIELD_RANGE(8, 23);
690             uint32_t   EarlyImeStop                     : MOS_BITFIELD_RANGE(24, 31);
691         };
692         struct
693         {
694             uint32_t   Value;
695         };
696     } DW0;
697 
698     // DW1
699     union
700     {
701         struct
702         {
703             uint32_t   MaxNumMVs                        : MOS_BITFIELD_RANGE(0, 5);
704             uint32_t                                    : MOS_BITFIELD_RANGE(6, 15);
705             uint32_t   BiWeight                         : MOS_BITFIELD_RANGE(16, 21);
706             uint32_t                                    : MOS_BITFIELD_RANGE(22, 27);
707             uint32_t   UniMixDisable                    : MOS_BITFIELD_BIT(28);
708             uint32_t                                    : MOS_BITFIELD_RANGE(29, 31);
709         };
710         struct
711         {
712             uint32_t   Value;
713         };
714     } DW1;
715 
716     // DW2
717     union
718     {
719         struct
720         {
721             uint32_t   LenSP                            : MOS_BITFIELD_RANGE(0, 7);
722             uint32_t   MaxNumSU                         : MOS_BITFIELD_RANGE(8, 15);
723             uint32_t   PicWidth                         : MOS_BITFIELD_RANGE(16, 31);
724         };
725         struct
726         {
727             uint32_t   Value;
728         };
729     } DW2;
730 
731     // DW3
732     union
733     {
734         struct
735         {
736             uint32_t   SrcSize                          : MOS_BITFIELD_RANGE(0, 1);
737             uint32_t                                    : MOS_BITFIELD_RANGE(2, 3);
738             uint32_t   MbTypeRemap                      : MOS_BITFIELD_RANGE(4, 5);
739             uint32_t   SrcAccess                        : MOS_BITFIELD_BIT(6);
740             uint32_t   RefAccess                        : MOS_BITFIELD_BIT(7);
741             uint32_t   SearchCtrl                       : MOS_BITFIELD_RANGE(8, 10);
742             uint32_t   DualSearchPathOption             : MOS_BITFIELD_BIT(11);
743             uint32_t   SubPelMode                       : MOS_BITFIELD_RANGE(12, 13);
744             uint32_t   SkipType                         : MOS_BITFIELD_BIT(14);
745             uint32_t   DisableFieldCacheAlloc           : MOS_BITFIELD_BIT(15);
746             uint32_t   InterChromaMode                  : MOS_BITFIELD_BIT(16);
747             uint32_t   FTEnable                         : MOS_BITFIELD_BIT(17);
748             uint32_t   BMEDisableFBR                    : MOS_BITFIELD_BIT(18);
749             uint32_t   BlockBasedSkipEnable             : MOS_BITFIELD_BIT(19);
750             uint32_t   InterSAD                         : MOS_BITFIELD_RANGE(20, 21);
751             uint32_t   IntraSAD                         : MOS_BITFIELD_RANGE(22, 23);
752             uint32_t   SubMbPartMask                    : MOS_BITFIELD_RANGE(24, 30);
753             uint32_t                                    : MOS_BITFIELD_BIT(31);
754         };
755         struct
756         {
757             uint32_t   Value;
758         };
759     } DW3;
760 
761     union
762     {
763         struct
764         {
765             uint32_t   PicHeightMinus1                  : MOS_BITFIELD_RANGE(0, 15);
766             uint32_t   Res_16_22                        : MOS_BITFIELD_RANGE(16,22);
767             uint32_t   EnableQualityImprovement         : MOS_BITFIELD_BIT(23);
768             uint32_t   EnableDebug                      : MOS_BITFIELD_BIT(24);
769             uint32_t   EnableFlexibleParam              : MOS_BITFIELD_BIT(25);
770             uint32_t   EnableStatsDataDump              : MOS_BITFIELD_BIT(26);
771             uint32_t   Res_27                           : MOS_BITFIELD_BIT(27);
772             uint32_t   HMEEnable                        : MOS_BITFIELD_BIT(28);
773             uint32_t   SliceType                        : MOS_BITFIELD_RANGE(29, 30);
774             uint32_t   UseActualRefQPValue              : MOS_BITFIELD_BIT(31);
775         };
776         struct
777         {
778             uint32_t   Value;
779         };
780     } DW4;
781 
782     // DW5
783     union
784     {
785         struct
786         {
787             uint32_t   Res_0_15                         : MOS_BITFIELD_RANGE(0, 15);
788             uint32_t   RefWidth                         : MOS_BITFIELD_RANGE(16, 23);
789             uint32_t   RefHeight                        : MOS_BITFIELD_RANGE(24, 31);
790         };
791         struct
792         {
793             uint32_t   Value;
794         };
795     } DW5;
796 
797     union
798     {
799         struct
800         {
801             uint32_t   FrameWidth                       : MOS_BITFIELD_RANGE(0, 15);
802             uint32_t   FrameHeight                      : MOS_BITFIELD_RANGE(16, 31);
803         };
804         struct
805         {
806             uint32_t   Value;
807         };
808     } DW6;
809 
810     // DW7
811     union
812     {
813         struct
814         {
815             uint32_t   IntraPartMask                    : MOS_BITFIELD_RANGE(0, 4);
816             uint32_t   NonSkipZMvAdded                  : MOS_BITFIELD_BIT(5);
817             uint32_t   NonSkipModeAdded                 : MOS_BITFIELD_BIT(6);
818             uint32_t   LumaIntraSrcCornerSwap           : MOS_BITFIELD_BIT(7);
819             uint32_t                                    : MOS_BITFIELD_RANGE(8, 15);
820             uint32_t   MVCostScaleFactor                : MOS_BITFIELD_RANGE(16, 17);
821             uint32_t   BilinearEnable                   : MOS_BITFIELD_BIT(18);
822             uint32_t   Res_19                           : MOS_BITFIELD_BIT(19);
823             uint32_t   WeightedSADHAAR                  : MOS_BITFIELD_BIT(20);
824             uint32_t   AConlyHAAR                       : MOS_BITFIELD_BIT(21);
825             uint32_t   RefIDCostMode                    : MOS_BITFIELD_BIT(22);
826             uint32_t                                    : MOS_BITFIELD_BIT(23);
827             uint32_t   SkipCenterMask                   : MOS_BITFIELD_RANGE(24, 31);
828         };
829         struct
830         {
831             uint32_t   Value;
832         };
833     } DW7;
834 
835     // DW8
836     union
837     {
838         struct
839         {
840             uint32_t   Mode0Cost                        : MOS_BITFIELD_RANGE(0, 7);
841             uint32_t   Mode1Cost                        : MOS_BITFIELD_RANGE(8, 15);
842             uint32_t   Mode2Cost                        : MOS_BITFIELD_RANGE(16, 23);
843             uint32_t   Mode3Cost                        : MOS_BITFIELD_RANGE(24, 31);
844         };
845         struct
846         {
847             uint32_t   Value;
848         };
849     } DW8;
850 
851     // DW9
852     union
853     {
854         struct
855         {
856             uint32_t   Mode4Cost                        : MOS_BITFIELD_RANGE(0, 7);
857             uint32_t   Mode5Cost                        : MOS_BITFIELD_RANGE(8, 15);
858             uint32_t   Mode6Cost                        : MOS_BITFIELD_RANGE(16, 23);
859             uint32_t   Mode7Cost                        : MOS_BITFIELD_RANGE(24, 31);
860         };
861         struct
862         {
863             uint32_t   Value;
864         };
865     } DW9;
866 
867     // DW10
868     union
869     {
870         struct
871         {
872             uint32_t   Mode8Cost                        : MOS_BITFIELD_RANGE(0, 7);
873             uint32_t   Mode9Cost                        : MOS_BITFIELD_RANGE(8, 15);
874             uint32_t   RefIDCost                        : MOS_BITFIELD_RANGE(16, 23);
875             uint32_t   ChromaIntraModeCost              : MOS_BITFIELD_RANGE(24, 31);
876         };
877         struct
878         {
879             uint32_t   Value;
880         };
881     } DW10;
882 
883     // DW11
884     union
885     {
886         struct
887         {
888             uint32_t   MV0Cost                          : MOS_BITFIELD_RANGE(0, 7);
889             uint32_t   MV1Cost                          : MOS_BITFIELD_RANGE(8, 15);
890             uint32_t   MV2Cost                          : MOS_BITFIELD_RANGE(16, 23);
891             uint32_t   MV3Cost                          : MOS_BITFIELD_RANGE(24, 31);
892         };
893         struct
894         {
895             uint32_t   Value;
896         };
897     } DW11;
898 
899     // DW12
900     union
901     {
902         struct
903         {
904             uint32_t   MV4Cost                          : MOS_BITFIELD_RANGE(0, 7);
905             uint32_t   MV5Cost                          : MOS_BITFIELD_RANGE(8, 15);
906             uint32_t   MV6Cost                          : MOS_BITFIELD_RANGE(16, 23);
907             uint32_t   MV7Cost                          : MOS_BITFIELD_RANGE(24, 31);
908         };
909         struct
910         {
911             uint32_t   Value;
912         };
913     } DW12;
914 
915     // DW13
916     union
917     {
918         struct
919         {
920             uint32_t   QpPrimeY                         : MOS_BITFIELD_RANGE(0, 7);
921             uint32_t   QpPrimeCb                        : MOS_BITFIELD_RANGE(8, 15);
922             uint32_t   QpPrimeCr                        : MOS_BITFIELD_RANGE(16, 23);
923             uint32_t   TargetSizeInWord                 : MOS_BITFIELD_RANGE(24, 31);
924         };
925         struct
926         {
927             uint32_t   Value;
928         };
929     } DW13;
930 
931     // DW14
932     union
933     {
934         struct
935         {
936             uint32_t   SICFwdTransCoeffThreshold_0      : MOS_BITFIELD_RANGE(0, 15);
937             uint32_t   SICFwdTransCoeffThreshold_1      : MOS_BITFIELD_RANGE(16, 23);
938             uint32_t   SICFwdTransCoeffThreshold_2      : MOS_BITFIELD_RANGE(24, 31);
939         };
940         struct
941         {
942             uint32_t   Value;
943         };
944     } DW14;
945 
946     // DW15
947     union
948     {
949         struct
950         {
951             uint32_t   SICFwdTransCoeffThreshold_3      : MOS_BITFIELD_RANGE(0, 7);
952             uint32_t   SICFwdTransCoeffThreshold_4      : MOS_BITFIELD_RANGE(8, 15);
953             uint32_t   SICFwdTransCoeffThreshold_5      : MOS_BITFIELD_RANGE(16, 23);
954             uint32_t   SICFwdTransCoeffThreshold_6      : MOS_BITFIELD_RANGE(24, 31);    // Highest Freq
955         };
956         struct
957         {
958             uint32_t   Value;
959         };
960     } DW15;
961 
962     // DW16
963     union
964     {
965         struct
966         {
967             SearchPathDelta   SPDelta_0;
968             SearchPathDelta   SPDelta_1;
969             SearchPathDelta   SPDelta_2;
970             SearchPathDelta   SPDelta_3;
971         };
972         struct
973         {
974             uint32_t   Value;
975         };
976     } DW16;
977 
978     // DW17
979     union
980     {
981         struct
982         {
983             SearchPathDelta   SPDelta_4;
984             SearchPathDelta   SPDelta_5;
985             SearchPathDelta   SPDelta_6;
986             SearchPathDelta   SPDelta_7;
987         };
988         struct
989         {
990             uint32_t   Value;
991         };
992     } DW17;
993 
994     // DW18
995     union
996     {
997         struct
998         {
999             SearchPathDelta   SPDelta_8;
1000             SearchPathDelta   SPDelta_9;
1001             SearchPathDelta   SPDelta_10;
1002             SearchPathDelta   SPDelta_11;
1003         };
1004         struct
1005         {
1006             uint32_t   Value;
1007         };
1008     } DW18;
1009 
1010     // DW19
1011     union
1012     {
1013         struct
1014         {
1015             SearchPathDelta   SPDelta_12;
1016             SearchPathDelta   SPDelta_13;
1017             SearchPathDelta   SPDelta_14;
1018             SearchPathDelta   SPDelta_15;
1019         };
1020         struct
1021         {
1022             uint32_t   Value;
1023         };
1024     } DW19;
1025 
1026     // DW20
1027     union
1028     {
1029         struct
1030         {
1031             SearchPathDelta   SPDelta_16;
1032             SearchPathDelta   SPDelta_17;
1033             SearchPathDelta   SPDelta_18;
1034             SearchPathDelta   SPDelta_19;
1035         };
1036         struct
1037         {
1038             uint32_t   Value;
1039         };
1040     } DW20;
1041 
1042     // DW21
1043     union
1044     {
1045         struct
1046         {
1047             SearchPathDelta   SPDelta_20;
1048             SearchPathDelta   SPDelta_21;
1049             SearchPathDelta   SPDelta_22;
1050             SearchPathDelta   SPDelta_23;
1051         };
1052         struct
1053         {
1054             uint32_t   Value;
1055         };
1056     } DW21;
1057 
1058     // DW22
1059     union
1060     {
1061         struct
1062         {
1063             SearchPathDelta   SPDelta_24;
1064             SearchPathDelta   SPDelta_25;
1065             SearchPathDelta   SPDelta_26;
1066             SearchPathDelta   SPDelta_27;
1067         };
1068         struct
1069         {
1070             uint32_t   Value;
1071         };
1072     } DW22;
1073 
1074     // DW23
1075     union
1076     {
1077         struct
1078         {
1079             SearchPathDelta   SPDelta_28;
1080             SearchPathDelta   SPDelta_29;
1081             SearchPathDelta   SPDelta_30;
1082             SearchPathDelta   SPDelta_31;
1083         };
1084         struct
1085         {
1086             uint32_t   Value;
1087         };
1088     } DW23;
1089 
1090     // DW24
1091     union
1092     {
1093         struct
1094         {
1095             SearchPathDelta   SPDelta_32;
1096             SearchPathDelta   SPDelta_33;
1097             SearchPathDelta   SPDelta_34;
1098             SearchPathDelta   SPDelta_35;
1099         };
1100         struct
1101         {
1102             uint32_t   Value;
1103         };
1104     } DW24;
1105 
1106     // DW25
1107     union
1108     {
1109         struct
1110         {
1111             SearchPathDelta   SPDelta_36;
1112             SearchPathDelta   SPDelta_37;
1113             SearchPathDelta   SPDelta_38;
1114             SearchPathDelta   SPDelta_39;
1115         };
1116         struct
1117         {
1118             uint32_t   Value;
1119         };
1120     } DW25;
1121 
1122     // DW26
1123     union
1124     {
1125         struct
1126         {
1127             SearchPathDelta   SPDelta_40;
1128             SearchPathDelta   SPDelta_41;
1129             SearchPathDelta   SPDelta_42;
1130             SearchPathDelta   SPDelta_43;
1131         };
1132         struct
1133         {
1134             uint32_t   Value;
1135         };
1136     } DW26;
1137 
1138     // DW27
1139     union
1140     {
1141         struct
1142         {
1143             SearchPathDelta   SPDelta_44;
1144             SearchPathDelta   SPDelta_45;
1145             SearchPathDelta   SPDelta_46;
1146             SearchPathDelta   SPDelta_47;
1147         };
1148         struct
1149         {
1150             uint32_t   Value;
1151         };
1152     } DW27;
1153 
1154     // DW28
1155     union
1156     {
1157         struct
1158         {
1159             SearchPathDelta   SPDelta_48;
1160             SearchPathDelta   SPDelta_49;
1161             SearchPathDelta   SPDelta_50;
1162             SearchPathDelta   SPDelta_51;
1163         };
1164         struct
1165         {
1166             uint32_t   Value;
1167         };
1168     } DW28;
1169 
1170     // DW29
1171     union
1172     {
1173         struct
1174         {
1175             SearchPathDelta   SPDelta_52;
1176             SearchPathDelta   SPDelta_53;
1177             SearchPathDelta   SPDelta_54;
1178             SearchPathDelta   SPDelta_55;
1179         };
1180         struct
1181         {
1182             uint32_t   Value;
1183         };
1184     } DW29;
1185 
1186     // DW30
1187     union
1188     {
1189         struct
1190         {
1191             uint32_t   Intra4x4ModeMask                 : MOS_BITFIELD_RANGE(0, 8);
1192             uint32_t                                    : MOS_BITFIELD_RANGE(9, 15);
1193             uint32_t   Intra8x8ModeMask                 : MOS_BITFIELD_RANGE(16, 24);
1194             uint32_t                                    : MOS_BITFIELD_RANGE(25, 31);
1195         };
1196         struct
1197         {
1198             uint32_t   Value;
1199         };
1200     } DW30;
1201 
1202     // DW31
1203     union
1204     {
1205         struct
1206         {
1207             uint32_t   Intra16x16ModeMask               : MOS_BITFIELD_RANGE(0, 3);
1208             uint32_t   IntraChromaModeMask              : MOS_BITFIELD_RANGE(4, 7);
1209             uint32_t   IntraComputeType                 : MOS_BITFIELD_RANGE(8, 9);
1210             uint32_t                                    : MOS_BITFIELD_RANGE(10, 31);
1211         };
1212         struct
1213         {
1214             uint32_t   Value;
1215         };
1216     } DW31;
1217 
1218     // DW32
1219     union
1220     {
1221         struct
1222         {
1223             uint32_t   SkipVal                          : MOS_BITFIELD_RANGE(0, 15);
1224             uint32_t   MultiPredL0Disable               : MOS_BITFIELD_RANGE(16, 23);
1225             uint32_t   MultiPredL1Disable               : MOS_BITFIELD_RANGE(24, 31);
1226         };
1227         struct
1228         {
1229             uint32_t   Value;
1230         };
1231     } DW32;
1232 
1233     // DW33
1234     union
1235     {
1236         struct
1237         {
1238             uint32_t   Intra16x16NonDCPredPenalty       : MOS_BITFIELD_RANGE(0, 7);
1239             uint32_t   Intra8x8NonDCPredPenalty         : MOS_BITFIELD_RANGE(8, 15);
1240             uint32_t   Intra4x4NonDCPredPenalty         : MOS_BITFIELD_RANGE(16, 23);
1241             uint32_t                                    : MOS_BITFIELD_RANGE(24, 31);
1242         };
1243         struct
1244         {
1245             uint32_t   Value;
1246         };
1247     } DW33;
1248 
1249     union {
1250         struct {
1251             uint32_t       LambdaME;
1252         };
1253         uint32_t Value;
1254     } DW34;
1255 
1256     union {
1257         struct {
1258             uint32_t       SimpIntraInterThreshold      : MOS_BITFIELD_RANGE(0, 15);
1259             uint32_t       ModeCostSp                   : MOS_BITFIELD_RANGE(16, 23);
1260             uint32_t       IntraRefreshEn               : MOS_BITFIELD_RANGE(24, 25);
1261             uint32_t       FirstIntraRefresh            : MOS_BITFIELD_BIT(26);
1262             uint32_t       EnableRollingIntra           : MOS_BITFIELD_BIT(27);
1263             uint32_t       HalfUpdateMixedLCU           : MOS_BITFIELD_BIT(28);
1264             uint32_t       Res_29_31                    : MOS_BITFIELD_RANGE(29, 31);
1265         };
1266         uint32_t Value;
1267     } DW35;
1268 
1269     union {
1270         struct {
1271             uint32_t       NumRefIdxL0MinusOne          : MOS_BITFIELD_RANGE(0, 7);
1272             uint32_t       HMECombinedExtraSUs          : MOS_BITFIELD_RANGE(8, 15);
1273             uint32_t       NumRefIdxL1MinusOne          : MOS_BITFIELD_RANGE(16, 23);
1274             uint32_t       PowerSaving                  : MOS_BITFIELD_BIT(24);
1275             uint32_t       BRCEnable                    : MOS_BITFIELD_BIT(25);
1276             uint32_t       LCUBRCEnable                 : MOS_BITFIELD_BIT(26);
1277             uint32_t       ROIEnable                    : MOS_BITFIELD_BIT(27);
1278             uint32_t       FASTSurveillanceFlag         : MOS_BITFIELD_BIT(28);
1279             uint32_t       CheckAllFractionalEnable     : MOS_BITFIELD_BIT(29);
1280             uint32_t       HMECombinedOverlap           : MOS_BITFIELD_RANGE(30, 31);
1281         };
1282         uint32_t Value;
1283     } DW36;
1284 
1285     union {
1286         struct {
1287             uint32_t       ActualQpRefID0List0          : MOS_BITFIELD_RANGE(0, 7);
1288             uint32_t       ActualQpRefID1List0          : MOS_BITFIELD_RANGE(8, 15);
1289             uint32_t       ActualQpRefID2List0          : MOS_BITFIELD_RANGE(16, 23);
1290             uint32_t       ActualQpRefID3List0          : MOS_BITFIELD_RANGE(24, 31);
1291         };
1292         uint32_t Value;
1293     } DW37;
1294 
1295     union {
1296         struct {
1297             uint32_t       NumIntraRefreshOffFrames     : MOS_BITFIELD_RANGE(0, 15);
1298             uint32_t       NumFrameInGOB                : MOS_BITFIELD_RANGE(16, 31);
1299         };
1300         uint32_t Value;
1301     } DW38;
1302 
1303     union {
1304         struct {
1305             uint32_t       ActualQpRefID0List1          : MOS_BITFIELD_RANGE(0, 7);
1306             uint32_t       ActualQpRefID1List1          : MOS_BITFIELD_RANGE(8, 15);
1307             uint32_t       RefCost                      : MOS_BITFIELD_RANGE(16, 31);
1308         };
1309         uint32_t Value;
1310     } DW39;
1311 
1312     union {
1313         struct {
1314             uint32_t       TransformThreshold0          : MOS_BITFIELD_RANGE(0, 15);
1315             uint32_t       TransformThreshold1          : MOS_BITFIELD_RANGE(16, 31);
1316         };
1317         uint32_t Value;
1318     } DW40;
1319 
1320     union {
1321         struct {
1322             uint32_t       TransformThreshold2          : MOS_BITFIELD_RANGE(0, 15);
1323             uint32_t       TextureIntraCostThreshold    : MOS_BITFIELD_RANGE(16, 31);
1324         };
1325         uint32_t Value;
1326     } DW41;
1327 
1328     union {
1329         struct
1330         {
1331             uint32_t   NumMVPredictorsL0                : MOS_BITFIELD_RANGE(0, 3);
1332             uint32_t   NumMVPredictorsL1                : MOS_BITFIELD_RANGE(4, 7);
1333             uint32_t   Res_8                            : MOS_BITFIELD_BIT(8);
1334             uint32_t   PerLCUQP                         : MOS_BITFIELD_BIT(9);
1335             uint32_t   PerCTBInput                      : MOS_BITFIELD_BIT(10);
1336             uint32_t   CTBDistortionOutput              : MOS_BITFIELD_BIT(11);
1337             uint32_t   MVPredictorBlockSize             : MOS_BITFIELD_RANGE(12, 14);
1338             uint32_t   Res_15                           : MOS_BITFIELD_BIT(15);
1339             uint32_t   MultiPredL0                      : MOS_BITFIELD_RANGE(16, 19);
1340             uint32_t   MultiPredL1                      : MOS_BITFIELD_RANGE(20, 23);
1341             uint32_t   Res_24_31                        : MOS_BITFIELD_RANGE(24, 31);
1342         };
1343         uint32_t Value;
1344     } DW42;
1345 
1346     union {
1347         struct {
1348             uint32_t       Reserved;
1349         };
1350         uint32_t Value;
1351     } DW43;
1352 
1353     union {
1354         struct {
1355             uint32_t       MaxNumMergeCandidates        : MOS_BITFIELD_RANGE(0, 3);
1356             uint32_t       MaxNumRefList0               : MOS_BITFIELD_RANGE(4, 7);
1357             uint32_t       MaxNumRefList1               : MOS_BITFIELD_RANGE(8, 11);
1358             uint32_t       Res_12_15                    : MOS_BITFIELD_RANGE(12, 15);
1359             uint32_t       MaxVmvR                      : MOS_BITFIELD_RANGE(16, 31);
1360         };
1361         uint32_t Value;
1362     } DW44;
1363 
1364     union {
1365         struct {
1366             uint32_t       TemporalMvpEnableFlag        : MOS_BITFIELD_BIT(0);
1367             uint32_t       Res_1_7                      : MOS_BITFIELD_RANGE(1, 7);
1368             uint32_t       Log2ParallelMergeLevel       : MOS_BITFIELD_RANGE(8, 15);
1369             uint32_t       HMECombineLenPslice          : MOS_BITFIELD_RANGE(16, 23);
1370             uint32_t       HMECombineLenBslice          : MOS_BITFIELD_RANGE(24, 31);
1371         };
1372         uint32_t Value;
1373     } DW45;
1374 
1375     union {
1376         struct {
1377             uint32_t       Log2MinTUSize                : MOS_BITFIELD_RANGE(0, 7);
1378             uint32_t       Log2MaxTUSize                : MOS_BITFIELD_RANGE(8, 15);
1379             uint32_t       Log2MinCUSize                : MOS_BITFIELD_RANGE(16, 23);
1380             uint32_t       Log2MaxCUSize                : MOS_BITFIELD_RANGE(24, 31);
1381         };
1382         uint32_t Value;
1383     } DW46;
1384 
1385     union {
1386         struct {
1387             uint32_t       NumRegionsInSlice            : MOS_BITFIELD_RANGE(0, 7);
1388             uint32_t       TypeOfWalkingPattern         : MOS_BITFIELD_RANGE(8, 11);
1389             uint32_t       ChromaFlatnessCheckFlag      : MOS_BITFIELD_BIT(12);
1390             uint32_t       EnableIntraEarlyExit         : MOS_BITFIELD_BIT(13);
1391             uint32_t       SkipIntraKrnFlag             : MOS_BITFIELD_BIT(14);
1392             uint32_t       ScreenContentFlag            : MOS_BITFIELD_BIT(15);
1393             uint32_t       IsLowDelay                   : MOS_BITFIELD_BIT(16);
1394             uint32_t       CollocatedFromL0Flag         : MOS_BITFIELD_BIT(17);
1395             uint32_t       ArbitarySliceFlag            : MOS_BITFIELD_BIT(18);
1396             uint32_t       MultiSliceFlag               : MOS_BITFIELD_BIT(19);
1397             uint32_t       Res_20_23                    : MOS_BITFIELD_RANGE(20, 23);
1398             uint32_t       isCurrRefL0LongTerm          : MOS_BITFIELD_BIT(24);
1399             uint32_t       isCurrRefL1LongTerm          : MOS_BITFIELD_BIT(25);
1400             uint32_t       NumRegionMinus1              : MOS_BITFIELD_RANGE(26, 31);
1401         };
1402         uint32_t Value;
1403     } DW47;
1404 
1405     union {
1406         struct {
1407             uint32_t       CurrentTdL0_0                : MOS_BITFIELD_RANGE(0, 15);
1408             uint32_t       CurrentTdL0_1                : MOS_BITFIELD_RANGE(16, 31);
1409         };
1410         uint32_t Value;
1411     } DW48;
1412 
1413     union {
1414         struct {
1415             uint32_t       CurrentTdL0_2                : MOS_BITFIELD_RANGE(0, 15);
1416             uint32_t       CurrentTdL0_3                : MOS_BITFIELD_RANGE(16, 31);
1417         };
1418         uint32_t Value;
1419     } DW49;
1420 
1421     union {
1422         struct {
1423             uint32_t       CurrentTdL1_0                : MOS_BITFIELD_RANGE(0, 15);
1424             uint32_t       CurrentTdL1_1                : MOS_BITFIELD_RANGE(16, 31);
1425         };
1426         uint32_t Value;
1427     } DW50;
1428 
1429     union {
1430         struct {
1431             uint32_t       IntraRefreshMBNum            : MOS_BITFIELD_RANGE(0, 15);
1432             uint32_t       IntraRefreshUnitInMB         : MOS_BITFIELD_RANGE(16, 23);
1433             uint32_t       IntraRefreshQPDelta          : MOS_BITFIELD_RANGE(24, 31);
1434         };
1435         uint32_t Value;
1436     } DW51;
1437 
1438     union {
1439         struct {
1440             uint32_t       NumofUnitInRegion            : MOS_BITFIELD_RANGE(0, 15);
1441             uint32_t       MaxHeightInRegion            : MOS_BITFIELD_RANGE(16, 31);
1442         };
1443         uint32_t Value;
1444     } DW52;
1445 
1446     union {
1447         struct {
1448             uint32_t       IntraRefreshRefWidth         : MOS_BITFIELD_RANGE(0, 7);
1449             uint32_t       IntraRefreshRefHeight        : MOS_BITFIELD_RANGE(8, 15);
1450             uint32_t       Res_16_31                    : MOS_BITFIELD_RANGE(16, 31);
1451         };
1452         uint32_t Value;
1453     } DW53;
1454 
1455     union {
1456         struct {
1457             uint32_t       Reserved;
1458         };
1459         uint32_t Value;
1460     } DW54;
1461 
1462     union {
1463         struct {
1464             uint32_t       Reserved;
1465         };
1466         uint32_t Value;
1467     } DW55;
1468 
1469     union {
1470         struct {
1471             uint32_t       BTI_CU_Record;
1472         };
1473         uint32_t Value;
1474     } DW56;
1475 
1476     union {
1477         struct {
1478             uint32_t       BTI_PAK_Cmd;
1479         };
1480         uint32_t Value;
1481     } DW57;
1482 
1483     union {
1484         struct {
1485             uint32_t       BTI_Src_Y;
1486         };
1487         uint32_t Value;
1488     } DW58;
1489 
1490     union {
1491         struct {
1492             uint32_t       BTI_Intra_Dist;
1493         };
1494         uint32_t Value;
1495     } DW59;
1496 
1497     union {
1498         struct {
1499             uint32_t       BTI_Min_Dist;
1500         };
1501         uint32_t Value;
1502     } DW60;
1503 
1504     union {
1505         struct {
1506             uint32_t       BTI_HMEMVPredFwdBwdSurfIndex;
1507         };
1508         uint32_t Value;
1509     } DW61;
1510 
1511     union {
1512         struct {
1513             uint32_t       BTI_HMEDistSurfIndex;
1514         };
1515         uint32_t Value;
1516     } DW62;
1517 
1518     union {
1519         struct {
1520             uint32_t       BTI_Slice_Map;
1521         };
1522         uint32_t Value;
1523     } DW63;
1524 
1525     union {
1526         struct {
1527             uint32_t       BTI_VME_Saved_UNI_SIC;
1528         };
1529         uint32_t Value;
1530     } DW64;
1531 
1532     union {
1533         struct {
1534             uint32_t       BTI_Simplest_Intra;
1535         };
1536         uint32_t Value;
1537     } DW65;
1538 
1539     union {
1540         struct {
1541             uint32_t       BTI_Collocated_RefFrame;
1542         };
1543         uint32_t Value;
1544     } DW66;
1545 
1546     union {
1547         struct {
1548             uint32_t       BTI_Reserved;
1549         };
1550         uint32_t Value;
1551     } DW67;
1552 
1553     union {
1554         struct {
1555             uint32_t       BTI_BRC_Input;
1556         };
1557         uint32_t Value;
1558     } DW68;
1559 
1560     union {
1561         struct {
1562             uint32_t       BTI_LCU_QP;
1563         };
1564         uint32_t Value;
1565     } DW69;
1566 
1567     union {
1568         struct {
1569             uint32_t       BTI_BRC_Data;
1570         };
1571         uint32_t Value;
1572     } DW70;
1573 
1574     union {
1575         struct {
1576             uint32_t       BTI_VMEInterPredictionSurfIndex;
1577         };
1578         uint32_t Value;
1579     } DW71;
1580 
1581     union {
1582         //For B frame
1583         struct {
1584             uint32_t       BTI_VMEInterPredictionBSurfIndex;
1585         };
1586         //For P frame
1587         struct {
1588             uint32_t       BTI_ConcurrentThreadMap;
1589         };
1590         uint32_t Value;
1591     } DW72;
1592 
1593     union {
1594         //For B frame
1595         struct {
1596             uint32_t       BTI_ConcurrentThreadMap;
1597         };
1598         //For P frame
1599         struct {
1600             uint32_t       BTI_MB_Data_CurFrame;
1601         };
1602         uint32_t Value;
1603     } DW73;
1604 
1605     union {
1606         //For B frame
1607         struct {
1608             uint32_t       BTI_MB_Data_CurFrame;
1609         };
1610         //For P frame
1611         struct {
1612             uint32_t       BTI_MVP_CurFrame;
1613         };
1614         uint32_t Value;
1615     } DW74;
1616 
1617     union {
1618         //For B frame
1619         struct {
1620             uint32_t       BTI_MVP_CurFrame;
1621         };
1622         //For P frame
1623         struct {
1624             uint32_t       BTI_Haar_Dist16x16;
1625         };
1626         uint32_t Value;
1627     } DW75;
1628 
1629     union {
1630         // this surface need to take same surface name from Hevc_LCUEnc_I_8x8_PU_FMode_inLCU as input
1631         //For B frame
1632         struct {
1633             uint32_t       BTI_Haar_Dist16x16;
1634         };
1635         //For P frame
1636         struct {
1637             uint32_t       BTI_Stats_Data;
1638         };
1639         uint32_t Value;
1640     } DW76;
1641 
1642     union {
1643         //For B frame
1644         struct {
1645             uint32_t       BTI_Stats_Data;
1646         };
1647         //For P frame
1648         struct {
1649             uint32_t       BTI_Frame_Stats_Data;
1650         };
1651         uint32_t Value;
1652     } DW77;
1653 
1654     union {
1655         //For B frame
1656         struct {
1657             uint32_t       BTI_Frame_Stats_Data;
1658         };
1659         //For P frame
1660         struct {
1661             uint32_t       BTI_MVPredictor_Surface;
1662         };
1663         uint32_t Value;
1664     } DW78;
1665 
1666     union {
1667         //For B frame
1668         struct {
1669             uint32_t       BTI_MVPredictor_Surface;
1670         };
1671         //For P frame
1672         struct {
1673             uint32_t       BTI_CTB_Input_Surface;
1674         };
1675         uint32_t Value;
1676     } DW79;
1677 
1678     union {
1679         //For B frame
1680         struct {
1681             uint32_t       BTI_CTB_Input_Surface;
1682         };
1683         //For P frame
1684         struct {
1685             uint32_t       BTI_CTB_Distortion_Output_Surface;
1686         };
1687         uint32_t Value;
1688     } DW80;
1689 
1690     union {
1691         //For B frame
1692         struct {
1693             uint32_t       BTI_CTB_Distortion_Output_Surface;
1694         };
1695         //For P frame
1696         struct {
1697             uint32_t       BTI_Debug;
1698         };
1699         uint32_t Value;
1700     } DW81;
1701 
1702     union {
1703         //For B frame
1704         struct {
1705             uint32_t       BTI_Debug;
1706         };
1707         uint32_t Value;
1708     } DW82;
1709 };
1710 
1711 using PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9*;
1712 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9)) == 83 );
1713 
SetMbEncKernelParams(MHW_KERNEL_PARAM * kernelParams,uint32_t idx)1714 MOS_STATUS CodechalFeiHevcStateG9Skl::SetMbEncKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx)
1715 {
1716     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1717 
1718     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
1719 
1720     auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
1721 
1722     kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
1723     kernelParams->iIdCount     = 1;
1724 
1725     switch (idx)
1726     {
1727     case CODECHAL_HEVC_MBENC_2xSCALING:
1728         kernelParams->iBTCount = CODECHAL_HEVC_FEI_SCALING_FRAME_END - CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN;
1729         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9), curbeAlignment);
1730         kernelParams->iBlockWidth = 32;
1731         kernelParams->iBlockHeight = 32;
1732         break;
1733 
1734     case CODECHAL_HEVC_MBENC_32x32MD:
1735         kernelParams->iBTCount = CODECHAL_HEVC_FEI_32x32_PU_END - CODECHAL_HEVC_FEI_32x32_PU_BEGIN;
1736         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9), curbeAlignment);
1737         kernelParams->iBlockWidth = 32;
1738         kernelParams->iBlockHeight = 32;
1739         break;
1740 
1741     case CODECHAL_HEVC_MBENC_16x16SAD:
1742         kernelParams->iBTCount = CODECHAL_HEVC_FEI_16x16_PU_SAD_END - CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN;
1743         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9), curbeAlignment);
1744         kernelParams->iBlockWidth = 16;
1745         kernelParams->iBlockHeight = 16;
1746         break;
1747 
1748     case CODECHAL_HEVC_MBENC_16x16MD:
1749         kernelParams->iBTCount = CODECHAL_HEVC_FEI_16x16_PU_MD_END - CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN;
1750         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9), curbeAlignment);
1751         kernelParams->iBlockWidth = 32;
1752         kernelParams->iBlockHeight = 32;
1753         break;
1754 
1755     case CODECHAL_HEVC_MBENC_8x8PU:
1756         kernelParams->iBTCount = CODECHAL_HEVC_FEI_8x8_PU_END - CODECHAL_HEVC_FEI_8x8_PU_BEGIN;
1757         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9), curbeAlignment);
1758         kernelParams->iBlockWidth = 8;
1759         kernelParams->iBlockHeight = 8;
1760         break;
1761 
1762     case CODECHAL_HEVC_MBENC_8x8FMODE:
1763         kernelParams->iBTCount = CODECHAL_HEVC_FEI_8x8_PU_FMODE_END - CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN;
1764         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9), curbeAlignment);
1765         kernelParams->iBlockWidth = 32;
1766         kernelParams->iBlockHeight = 32;
1767         break;
1768 
1769     case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
1770         kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_32x32_PU_END - CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN;
1771         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9), curbeAlignment);
1772         kernelParams->iBlockWidth = 32;
1773         kernelParams->iBlockHeight = 32;
1774         break;
1775 
1776     case CODECHAL_HEVC_FEI_MBENC_BENC:
1777         kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_MBENC_END - CODECHAL_HEVC_FEI_B_MBENC_BEGIN;
1778         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9), curbeAlignment);
1779         kernelParams->iBlockWidth = 16;
1780         kernelParams->iBlockHeight = 16;
1781         break;
1782 
1783     case CODECHAL_HEVC_FEI_MBENC_BPAK:
1784         kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_PAK_END - CODECHAL_HEVC_FEI_B_PAK_BEGIN;
1785         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_PAK_CURBE_G9), curbeAlignment);
1786         kernelParams->iBlockWidth = 32;
1787         kernelParams->iBlockHeight = 32;
1788         break;
1789 
1790     case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED:
1791         if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
1792         {
1793             kernelParams->iBTCount = CODECHAL_HEVC_FEI_DS_COMBINED_END - CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN;
1794             uint32_t uiDSCombinedKernelCurbeSize = sizeof(CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9);
1795             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(uiDSCombinedKernelCurbeSize, curbeAlignment);
1796             kernelParams->iBlockWidth = 8;
1797             kernelParams->iBlockHeight = 8;
1798         }
1799         else
1800         {
1801             CODECHAL_ENCODE_ASSERT(false);
1802             eStatus = MOS_STATUS_INVALID_PARAMETER;
1803         }
1804         break;
1805 
1806     case CODECHAL_HEVC_FEI_MBENC_PENC:
1807         kernelParams->iBTCount = CODECHAL_HEVC_FEI_P_MBENC_END - CODECHAL_HEVC_FEI_P_MBENC_BEGIN;
1808         //P MBEnc curbe has one less DWord than B MBEnc curbe
1809         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9) - sizeof(uint32_t), (size_t)curbeAlignment);
1810         kernelParams->iBlockWidth = 16;
1811         kernelParams->iBlockHeight = 16;
1812         break;
1813 
1814     default:
1815         CODECHAL_ENCODE_ASSERT(false);
1816         eStatus = MOS_STATUS_INVALID_PARAMETER;
1817     }
1818 
1819     return eStatus;
1820 }
1821 
SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,uint32_t idx)1822 MOS_STATUS CodechalFeiHevcStateG9Skl::SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx)
1823 {
1824     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1825 
1826     CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
1827 
1828     MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
1829     bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_MBENC, idx);
1830 
1831     switch (idx)
1832     {
1833     case CODECHAL_HEVC_MBENC_2xSCALING:
1834         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_SCALING_FRAME_END - CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN;
1835         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN;
1836         break;
1837 
1838     case CODECHAL_HEVC_MBENC_32x32MD:
1839         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_32x32_PU_END - CODECHAL_HEVC_FEI_32x32_PU_BEGIN;
1840         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_32x32_PU_BEGIN;
1841         break;
1842 
1843     case CODECHAL_HEVC_MBENC_16x16SAD:
1844         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_16x16_PU_SAD_END - CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN;
1845         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN;
1846         break;
1847 
1848     case CODECHAL_HEVC_MBENC_16x16MD:
1849         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_16x16_PU_MD_END - CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN;
1850         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN;
1851         break;
1852 
1853     case CODECHAL_HEVC_MBENC_8x8PU:
1854         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_8x8_PU_END - CODECHAL_HEVC_FEI_8x8_PU_BEGIN;
1855         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_8x8_PU_BEGIN;
1856         break;
1857 
1858     case CODECHAL_HEVC_MBENC_8x8FMODE:
1859         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_8x8_PU_FMODE_END - CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN;
1860         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN;
1861         break;
1862 
1863     case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
1864         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_32x32_PU_END - CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN;
1865         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN;
1866         break;
1867 
1868     case CODECHAL_HEVC_FEI_MBENC_BENC:
1869         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_MBENC_END - CODECHAL_HEVC_FEI_B_MBENC_BEGIN;
1870         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_MBENC_BEGIN;
1871         break;
1872 
1873     case CODECHAL_HEVC_FEI_MBENC_BPAK:
1874         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_PAK_END - CODECHAL_HEVC_FEI_B_PAK_BEGIN;
1875         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_PAK_BEGIN;
1876         break;
1877 
1878     case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED:
1879         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_DS_COMBINED_END - CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN;
1880         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN;
1881         break;
1882 
1883     case CODECHAL_HEVC_FEI_MBENC_PENC:
1884         bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_P_MBENC_END - CODECHAL_HEVC_FEI_P_MBENC_BEGIN;
1885         bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_P_MBENC_BEGIN;
1886         break;
1887 
1888     default:
1889         CODECHAL_ENCODE_ASSERT(false);
1890         eStatus = MOS_STATUS_INVALID_PARAMETER;
1891         return eStatus;
1892     }
1893 
1894     for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
1895     {
1896         bindingTable->dwBindingTableEntries[i] = i;
1897     }
1898 
1899     return eStatus;
1900 }
1901 
EndKernelCall(CODECHAL_MEDIA_STATE_TYPE mediaStateType,PMHW_KERNEL_STATE kernelState,PMOS_COMMAND_BUFFER cmdBuffer)1902 MOS_STATUS CodechalFeiHevcStateG9Skl::EndKernelCall(
1903     CODECHAL_MEDIA_STATE_TYPE       mediaStateType,
1904     PMHW_KERNEL_STATE               kernelState,
1905     PMOS_COMMAND_BUFFER             cmdBuffer)
1906 {
1907     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1908 
1909     CODECHAL_ENCODE_FUNCTION_ENTER;
1910 
1911     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG9::EndKernelCall(mediaStateType, kernelState, cmdBuffer));
1912 
1913     // skip haar distortion surface, statstics data dump surface
1914     // and frame level statstics data surface because they are not used
1915 #if 0
1916     CODECHAL_DEBUG_TOOL(
1917         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1918             &m_encStatsBuffers.m_puStatsSurface,
1919             CodechalDbgAttr::attrOutput,
1920             "HEVC_B_MBENC_PU_StatsSurface",
1921             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1922 
1923         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1924             &m_encStatsBuffers.m_8x8PuHaarDist,
1925             CodechalDbgAttr::attrOutput,
1926             "HEVC_B_MBENC_8X8_PU_HaarDistSurface",
1927             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1928 
1929         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1930             &m_encStatsBuffers.m_8x8PuFrameStats.sResource,
1931             "HEVC_B_MBENC_ConstantData_In",
1932             CodechalDbgAttr::attrOutput,
1933             m_encStatsBuffers.m_8x8PuFrameStats.dwSize,
1934             0,
1935             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1936 
1937         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1938             &m_encStatsBuffers.m_mbEncStatsSurface,
1939             CodechalDbgAttr::attrOutput,
1940             "HEVC_B_MBENC_MB_ENC_StatsSurface",
1941             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1942 
1943         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1944             &m_encStatsBuffers.m_mbEncFrameStats.sResource,
1945             "HEVC_B_MBENC_ConstantData_In",
1946             CodechalDbgAttr::attrOutput,
1947             m_encStatsBuffers.m_mbEncFrameStats.dwSize,
1948             0,
1949             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1950     )
1951 #endif
1952     return eStatus;
1953 }
1954 
InitKernelState()1955 MOS_STATUS CodechalFeiHevcStateG9Skl::InitKernelState()
1956 {
1957     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1958 
1959     CODECHAL_ENCODE_FUNCTION_ENTER;
1960 
1961     // InitKernelStateMbEnc
1962     m_numMbEncEncKrnStates = CODECHAL_HEVC_FEI_MBENC_NUM_BXT_SKL;
1963 
1964     m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
1965     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
1966 
1967     m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1968         sizeof(GenericBindingTable) * m_numMbEncEncKrnStates);
1969     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
1970 
1971     auto krnStateIdx = m_mbEncKernelStates;
1972 
1973     for (uint32_t KrnStateIdx = 0; KrnStateIdx < m_numMbEncEncKrnStates; KrnStateIdx++)
1974     {
1975         auto kernelSize = m_combinedKernelSize;
1976         CODECHAL_KERNEL_HEADER currKrnHeader;
1977 
1978         if (KrnStateIdx == CODECHAL_HEVC_FEI_MBENC_DS_COMBINED &&
1979             m_numMbEncEncKrnStates == CODECHAL_HEVC_FEI_MBENC_NUM_BXT_SKL)  //Ignore. It isn't used on BXT.
1980         {
1981             krnStateIdx++;
1982             continue;
1983         }
1984 
1985         CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
1986             m_kernelBinary,
1987             ENC_MBENC,
1988             KrnStateIdx,
1989             &currKrnHeader,
1990             &kernelSize));
1991 
1992         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncKernelParams(
1993             &krnStateIdx->KernelParams,
1994             KrnStateIdx));
1995 
1996         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncBindingTable(
1997             &m_mbEncKernelBindingTable[KrnStateIdx], KrnStateIdx));
1998 
1999         krnStateIdx->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
2000         krnStateIdx->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
2001         krnStateIdx->KernelParams.iSize = kernelSize;
2002 
2003         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
2004             m_stateHeapInterface,
2005             krnStateIdx->KernelParams.iBTCount,
2006             &krnStateIdx->dwSshSize,
2007             &krnStateIdx->dwBindingTableSize));
2008 
2009         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, krnStateIdx));
2010 
2011         krnStateIdx++;
2012     }
2013 
2014     return eStatus;
2015 }
2016 
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)2017 MOS_STATUS CodechalFeiHevcStateG9Skl::GetKernelHeaderAndSize(
2018     void                           *binary,
2019     EncOperation                   operation,
2020     uint32_t                       krnStateIdx,
2021     void                           *krnHeader,
2022     uint32_t                       *krnSize)
2023 {
2024     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2025 
2026     CODECHAL_ENCODE_FUNCTION_ENTER;
2027 
2028     CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
2029     CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
2030     CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
2031 
2032     PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL kernelHeaderTable = (PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL)binary;
2033     PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
2034 
2035     if (operation == ENC_SCALING4X)
2036     {
2037         currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_DS4HME;
2038     }
2039     else if (operation == ENC_ME)
2040     {
2041         // SKL supports P frame. P HME index CODECHAL_ENCODE_ME_IDX_P is 0 and B HME index CODECHAL_ENCODE_ME_IDX_B is 1
2042         if (krnStateIdx == 0)
2043         {
2044             currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_P_HME;
2045         }
2046         else
2047         {
2048             currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_B_HME;
2049         }
2050     }
2051     else if (operation == ENC_BRC)
2052     {
2053         switch (krnStateIdx)
2054         {
2055         case CODECHAL_HEVC_BRC_COARSE_INTRA:
2056             currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_COARSE;
2057             break;
2058 
2059         default:
2060             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
2061             eStatus = MOS_STATUS_INVALID_PARAMETER;
2062             return eStatus;
2063         }
2064     }
2065     else if (operation == ENC_MBENC)
2066     {
2067         switch (krnStateIdx)
2068         {
2069             case CODECHAL_HEVC_MBENC_2xSCALING:
2070             case CODECHAL_HEVC_MBENC_32x32MD:
2071             case CODECHAL_HEVC_MBENC_16x16SAD:
2072             case CODECHAL_HEVC_MBENC_16x16MD:
2073             case CODECHAL_HEVC_MBENC_8x8PU:
2074             case CODECHAL_HEVC_MBENC_8x8FMODE:
2075             case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
2076             case CODECHAL_HEVC_FEI_MBENC_BENC:
2077                 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_2xDownSampling_Kernel;
2078                 currKrnHeader += krnStateIdx;
2079                 break;
2080 
2081             case CODECHAL_HEVC_FEI_MBENC_BPAK:
2082                 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_PB_Pak;
2083                 break;
2084 
2085             case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED:
2086                 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_DS_Combined;
2087                 break;
2088 
2089             case CODECHAL_HEVC_FEI_MBENC_PENC:
2090                 currKrnHeader = &kernelHeaderTable->HEVC_FEI_LCUEnc_P_MB;
2091                 break;
2092 
2093             default:
2094                 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
2095                 eStatus = MOS_STATUS_INVALID_PARAMETER;
2096                 return eStatus;
2097         }
2098     }
2099     else
2100     {
2101         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
2102         eStatus = MOS_STATUS_INVALID_PARAMETER;
2103         return eStatus;
2104     }
2105 
2106     *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
2107 
2108     PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
2109     PCODECHAL_KERNEL_HEADER invalidEntry = (PCODECHAL_KERNEL_HEADER)(((uint8_t*)binary) + sizeof(*kernelHeaderTable));
2110     uint32_t nextKrnOffset = *krnSize;
2111 
2112     if (nextKrnHeader < invalidEntry)
2113     {
2114         nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
2115     }
2116     *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
2117 
2118     return eStatus;
2119 }
2120 
2121 #ifndef HEVC_FEI_ENABLE_CMRT
2122 
Encode2xScalingKernel()2123 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode2xScalingKernel()
2124 {
2125     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2126 
2127     PerfTagSetting perfTag;
2128     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
2129 
2130     uint32_t krnIdx = CODECHAL_HEVC_MBENC_2xSCALING;
2131     auto kernelState = &m_mbEncKernelStates[krnIdx];
2132     auto pScalingBindingTable = &m_mbEncKernelBindingTable[krnIdx];
2133     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2134     {
2135         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2136     }
2137 
2138     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
2139         m_osInterface,
2140         &m_scaled2xSurface));
2141 
2142     // Setup DSH
2143     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2144         m_stateHeapInterface,
2145         kernelState,
2146         false,
2147         0,
2148         false,
2149         m_storeData));
2150 
2151     //Setup CURBE
2152     MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9  cmd, *curbe = &cmd;
2153     MOS_ZeroMemory(curbe, sizeof(*curbe));
2154     curbe->DW0.PicWidth  = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2155     curbe->DW0.PicHeight    = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2156 
2157     uint32_t startBTI = 0;
2158     curbe->DW8.BTI_Src_Y    = pScalingBindingTable->dwBindingTableEntries[startBTI++];
2159     curbe->DW9.BTI_Dst_Y    = pScalingBindingTable->dwBindingTableEntries[startBTI++];
2160 
2161     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
2162     CODECHAL_ENCODE_CHK_STATUS_RETURN(
2163         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2164 
2165     MOS_COMMAND_BUFFER cmdBuffer;
2166     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2167         &cmdBuffer,
2168         kernelState,
2169         encFunctionType,
2170         nullptr));
2171 
2172     // Add surface states, 2X scaling uses U16Norm surface format
2173     startBTI = 0;
2174 
2175     // Source surface/s
2176     auto surfaceCodecParams = &m_surfaceParams[SURFACE_RAW_Y];
2177     surfaceCodecParams->bUse16UnormSurfaceFormat = true;
2178 
2179     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2180         kernelState,
2181         &cmdBuffer,
2182         SURFACE_RAW_Y,
2183         &pScalingBindingTable->dwBindingTableEntries[startBTI++]
2184     ));
2185 
2186     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
2187     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceParams(surfaceCodecParams));
2188 
2189     // Destination surface/s
2190     m_scaled2xSurface.dwWidth  = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_WIDTH);
2191     m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_HEIGHT);
2192 
2193     m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat =
2194     m_surfaceParams[SURFACE_Y_2X].bIsWritable   =
2195     m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
2196     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2197         kernelState,
2198         &cmdBuffer,
2199         SURFACE_Y_2X,
2200         &pScalingBindingTable->dwBindingTableEntries[startBTI++]
2201         ));
2202 
2203     if (!m_hwWalker)
2204     {
2205         eStatus = MOS_STATUS_UNKNOWN;
2206         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2207         return eStatus;
2208     }
2209 
2210     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2211     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2212     walkerCodecParams.WalkerMode        = m_walkerMode;
2213     // check kernel of Downscaling 2x kernels for Ultra HME.
2214     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5;
2215     // The frame kernel process 32x32 input pixels and output 16x16 down sampled pixels
2216     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
2217     /* Enforce no dependency dispatch order for Scaling kernel,  */
2218     walkerCodecParams.bNoDependency     = true;
2219 
2220     MHW_WALKER_PARAMS walkerParams;
2221     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2222         m_hwInterface,
2223         &walkerParams,
2224         &walkerCodecParams));
2225 
2226     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2227         &cmdBuffer,
2228         &walkerParams));
2229 
2230     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2231         encFunctionType,
2232         kernelState,
2233         &cmdBuffer));
2234 
2235     return eStatus;
2236 }
2237 
Encode32x32PuModeDecisionKernel()2238 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32x32PuModeDecisionKernel()
2239 {
2240     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2241 
2242     PerfTagSetting perfTag;
2243     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD);
2244 
2245     uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32MD;
2246     auto kernelState = &m_mbEncKernelStates[krnIdx];
2247     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2248     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2249     {
2250         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2251     }
2252 
2253     // Setup DSH
2254     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2255         m_stateHeapInterface,
2256         kernelState,
2257         false,
2258         0,
2259         false,
2260         m_storeData));
2261 
2262     //Setup CURBE
2263     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2264 
2265     CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR);
2266     int32_t sliceQp = CalSliceQp();
2267 
2268     double lambdaScalingFactor = 1.0;
2269     double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
2270     double squaredQpLambda = qpLambda * qpLambda;
2271     m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
2272 
2273     CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd;
2274     MOS_ZeroMemory(curbe, sizeof(*curbe));
2275     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2276     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2277 
2278     curbe->DW1.EnableDebugDump = false;
2279     curbe->DW1.LCUType         = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
2280     curbe->DW1.PuType          = 0; // 32x32 PU
2281     curbe->DW1.BRCEnable       = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
2282     curbe->DW1.LCUBRCEnable    = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
2283     curbe->DW1.SliceType       = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2284     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
2285     curbe->DW1.ROIEnable            = (m_hevcPicParams->NumROI > 0);
2286     curbe->DW1.SliceQp         = sliceQp;
2287     curbe->DW1.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
2288     curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
2289 
2290     curbe->DW2.Lambda          = m_fixedPointLambda;
2291 
2292     curbe->DW3.ModeCost32x32   = 0;
2293 
2294     curbe->DW4.EarlyExit       = (uint32_t)-1;
2295     if (curbe->DW1.EnableStatsDataDump)
2296     {
2297         double lambdaMd;
2298         float hadBias = 2.0f;
2299 
2300         lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
2301         lambdaMd = lambdaMd * hadBias;
2302         curbe->DW5.NewLambdaForHaarTransform = (uint32_t)(lambdaMd*(1<<10));
2303     }
2304 
2305     uint32_t startIndex = 0;
2306     curbe->DW8.BTI_32x32PU_Output    = bindingTable->dwBindingTableEntries[startIndex++];
2307     curbe->DW9.BTI_Src_Y           = bindingTable->dwBindingTableEntries[startIndex++];
2308     startIndex++; // skip one BTI for Y and UV have the same BTI
2309     curbe->DW10.BTI_Src_Y2x        = bindingTable->dwBindingTableEntries[startIndex++];
2310     curbe->DW11.BTI_Slice_Map      = bindingTable->dwBindingTableEntries[startIndex++];
2311     curbe->DW12.BTI_Src_Y2x_VME    = bindingTable->dwBindingTableEntries[startIndex++];
2312     curbe->DW13.BTI_Brc_Input      = bindingTable->dwBindingTableEntries[startIndex++];
2313     curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startIndex++];
2314     curbe->DW15.BTI_Brc_Data       = bindingTable->dwBindingTableEntries[startIndex++];
2315     curbe->DW16.BTI_Stats_Data     = bindingTable->dwBindingTableEntries[startIndex++];
2316     curbe->DW17.BTI_Kernel_Debug   = bindingTable->dwBindingTableEntries[startIndex++];
2317 
2318     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
2319 
2320     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION;
2321     CODECHAL_ENCODE_CHK_STATUS_RETURN(
2322         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2323 
2324     MOS_COMMAND_BUFFER cmdBuffer;
2325     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2326         &cmdBuffer,
2327         kernelState,
2328         encFunctionType,
2329         nullptr));
2330 
2331     //Add surface states
2332     startIndex = 0;
2333 
2334     // 32x32 PU output
2335     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable   =
2336     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
2337     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2338         kernelState,
2339         &cmdBuffer,
2340         SURFACE_32x32_PU_OUTPUT,
2341         &bindingTable->dwBindingTableEntries[startIndex++]));
2342 
2343     // Source Y and UV
2344     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2345         kernelState,
2346         &cmdBuffer,
2347         SURFACE_RAW_Y_UV,
2348         &bindingTable->dwBindingTableEntries[startIndex++]));
2349     startIndex ++; // UV index
2350 
2351     // Source Y2x
2352     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2353         kernelState,
2354         &cmdBuffer,
2355         SURFACE_Y_2X,
2356         &bindingTable->dwBindingTableEntries[startIndex++]));
2357 
2358     // Slice map
2359     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2360         kernelState,
2361         &cmdBuffer,
2362         SURFACE_SLICE_MAP,
2363         &bindingTable->dwBindingTableEntries[startIndex++]));
2364 
2365     // Source Y2x for VME
2366     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2367         kernelState,
2368         &cmdBuffer,
2369         SURFACE_Y_2X_VME,
2370         &bindingTable->dwBindingTableEntries[startIndex++]));
2371 
2372     // BRC Input
2373     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2374         kernelState,
2375         &cmdBuffer,
2376         SURFACE_BRC_INPUT,
2377         &bindingTable->dwBindingTableEntries[startIndex++]));
2378 
2379     // LCU Qp surface
2380     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2381         kernelState,
2382         &cmdBuffer,
2383         SURFACE_LCU_QP,
2384         &bindingTable->dwBindingTableEntries[startIndex++]));
2385 
2386     // BRC data surface
2387     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2388         kernelState,
2389         &cmdBuffer,
2390         SURFACE_BRC_DATA,
2391         &bindingTable->dwBindingTableEntries[startIndex++]));
2392 
2393     // skip statstics data dump surface because it is not used
2394 
2395     if (!m_hwWalker)
2396     {
2397         eStatus = MOS_STATUS_UNKNOWN;
2398         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2399         return eStatus;
2400     }
2401 
2402     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2403     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2404     walkerCodecParams.WalkerMode            = m_walkerMode;
2405     walkerCodecParams.dwResolutionX         = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5; /* looping for Walker is needed at 8x8 block level */
2406     walkerCodecParams.dwResolutionY         = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
2407     walkerCodecParams.bNoDependency         = true;     /* Enforce no dependency dispatch order for 32x32 MD kernel  */
2408 
2409     MHW_WALKER_PARAMS walkerParams;
2410     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2411         m_hwInterface,
2412         &walkerParams,
2413         &walkerCodecParams));
2414 
2415     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2416         &cmdBuffer,
2417         &walkerParams));
2418 
2419     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2420         encFunctionType,
2421         kernelState,
2422         &cmdBuffer));
2423 
2424     return eStatus;
2425 }
2426 
Encode16x16SadPuComputationKernel()2427 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16SadPuComputationKernel()
2428 {
2429     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2430 
2431     CODECHAL_ENCODE_FUNCTION_ENTER;
2432 
2433     PerfTagSetting perfTag;
2434     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD);
2435 
2436     uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16SAD;
2437     auto kernelState = &m_mbEncKernelStates[krnIdx];
2438     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2439     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2440     {
2441         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2442     }
2443 
2444     //Setup DSH
2445     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2446         m_stateHeapInterface,
2447         kernelState,
2448         false,
2449         0,
2450         false,
2451         m_storeData));
2452 
2453     // Setup CURBE
2454     CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd;
2455 
2456     MOS_ZeroMemory(curbe, sizeof(*curbe));
2457     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2458     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2459 
2460     curbe->DW1.Log2MaxCUSize   = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2461     curbe->DW1.Log2MinCUSize   = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
2462     curbe->DW1.Log2MinTUSize   = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
2463     curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
2464 
2465     curbe->DW2.SliceType       = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2466     curbe->DW2.SimFlagForInter = false;
2467     if(m_hevcPicParams->CodingType != I_TYPE)
2468     {
2469         curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance;
2470     }
2471 
2472     uint32_t startIndex = 0;
2473     curbe->DW8.BTI_Src_Y                   = bindingTable->dwBindingTableEntries[startIndex++];
2474     startIndex++; // skip UV BTI
2475     curbe->DW9.BTI_Sad_16x16_PU_Output     = bindingTable->dwBindingTableEntries[startIndex++];
2476     curbe->DW10.BTI_32x32_Pu_ModeDecision  = bindingTable->dwBindingTableEntries[startIndex++];
2477     curbe->DW11.BTI_Slice_Map              = bindingTable->dwBindingTableEntries[startIndex++];
2478     curbe->DW12.BTI_Simplest_Intra         = bindingTable->dwBindingTableEntries[startIndex++];
2479     curbe->DW13.BTI_Debug                  = bindingTable->dwBindingTableEntries[startIndex++];
2480 
2481     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
2482 
2483     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD;
2484     CODECHAL_ENCODE_CHK_STATUS_RETURN(
2485         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2486 
2487     MOS_COMMAND_BUFFER cmdBuffer;
2488     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2489         &cmdBuffer,
2490         kernelState,
2491         encFunctionType,
2492         nullptr));
2493 
2494     //Add surface states
2495     startIndex = 0;
2496 
2497     // Source Y and UV
2498     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2499         kernelState,
2500         &cmdBuffer,
2501         SURFACE_RAW_Y_UV,
2502         &bindingTable->dwBindingTableEntries[startIndex++]));
2503     startIndex++;
2504 
2505     // 16x16 PU SAD output
2506     m_surfaceParams[SURFACE_16x16PU_SAD].bIsWritable   =
2507     m_surfaceParams[SURFACE_16x16PU_SAD].bRenderTarget = true;
2508     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2509         kernelState,
2510         &cmdBuffer,
2511         SURFACE_16x16PU_SAD,
2512         &bindingTable->dwBindingTableEntries[startIndex++]));
2513 
2514     // 32x32 PU MD data
2515     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2516         kernelState,
2517         &cmdBuffer,
2518         SURFACE_32x32_PU_OUTPUT,
2519         &bindingTable->dwBindingTableEntries[startIndex++]));
2520 
2521     // Slice map
2522     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2523         kernelState,
2524         &cmdBuffer,
2525         SURFACE_SLICE_MAP,
2526         &bindingTable->dwBindingTableEntries[startIndex++]));
2527 
2528     // Simplest Intra
2529     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2530         kernelState,
2531         &cmdBuffer,
2532         SURFACE_SIMPLIFIED_INTRA,
2533         &bindingTable->dwBindingTableEntries[startIndex++]));
2534 
2535     if (!m_hwWalker)
2536     {
2537         eStatus = MOS_STATUS_UNKNOWN;
2538         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2539         return eStatus;
2540     }
2541 
2542     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2543     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2544     walkerCodecParams.WalkerMode        = m_walkerMode;
2545     /* looping for Walker is needed at 16x16 block level */
2546     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  16) >> 4;
2547     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 16) >> 4;
2548     /* Enforce no dependency dispatch order for the 16x16 SAD kernel  */
2549     walkerCodecParams.bNoDependency     = true;
2550 
2551     MHW_WALKER_PARAMS walkerParams;
2552     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2553         m_hwInterface,
2554         &walkerParams,
2555         &walkerCodecParams));
2556 
2557     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2558         &cmdBuffer,
2559         &walkerParams));
2560 
2561     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2562         encFunctionType,
2563         kernelState,
2564         &cmdBuffer));
2565 
2566     return eStatus;
2567 }
2568 
Encode16x16PuModeDecisionKernel()2569 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16PuModeDecisionKernel()
2570 {
2571     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2572 
2573     CODECHAL_ENCODE_FUNCTION_ENTER;
2574 
2575     PerfTagSetting perfTag;
2576     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD);
2577 
2578     uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16MD;
2579     auto kernelState = &m_mbEncKernelStates[krnIdx];
2580     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2581     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2582     {
2583         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2584     }
2585 
2586     // Setup DSH
2587     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2588         m_stateHeapInterface,
2589         kernelState,
2590         false,
2591         0,
2592         false,
2593         m_storeData));
2594 
2595     // Setup CURBE
2596     int32_t sliceQp = CalSliceQp();
2597     uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2598 
2599     double lambdaScaleFactor = 0.46 + sliceQp - 22;
2600     if (lambdaScaleFactor < 0)
2601     {
2602         lambdaScaleFactor = 0.46;
2603     }
2604 
2605     if (lambdaScaleFactor > 15)
2606     {
2607         lambdaScaleFactor = 15;
2608     }
2609 
2610     double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6);
2611     m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10));
2612 
2613     double lambdaScalingFactor = 1.0;
2614     double qpLambda = m_qpLambdaMd[sliceType][sliceQp];
2615     double squaredQpLambda = qpLambda * qpLambda;
2616     m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
2617 
2618     LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR);
2619 
2620     CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd;
2621     MOS_ZeroMemory(curbe, sizeof(*curbe));
2622 
2623     uint32_t log2MaxCUSize        = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2624     curbe->DW0.FrameWidth          = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2625     curbe->DW0.FrameHeight         = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2626 
2627     curbe->DW1.Log2MaxCUSize       = log2MaxCUSize;
2628     curbe->DW1.Log2MinCUSize       = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
2629     curbe->DW1.Log2MinTUSize       = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
2630     curbe->DW1.SliceQp             = sliceQp;
2631 
2632     curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma;
2633 
2634     curbe->DW3.LambdaScalingFactor    = 1;
2635     curbe->DW3.SliceType              = sliceType;
2636     curbe->DW3.EnableIntraEarlyExit   = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
2637     curbe->DW3.BRCEnable              = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
2638     curbe->DW3.LCUBRCEnable           = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
2639     curbe->DW3.ROIEnable              = (m_hevcPicParams->NumROI > 0);
2640     curbe->DW3.FASTSurveillanceFlag   = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
2641     curbe->DW3.EnableRollingIntra     = m_hevcPicParams->bEnableRollingIntraRefresh;
2642     //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel
2643     curbe->DW3.IntraRefreshEn         = m_hevcPicParams->bEnableRollingIntraRefresh;
2644     curbe->DW3.HalfUpdateMixedLCU     = 0;
2645     curbe->DW3.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
2646 
2647     curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0;
2648     curbe->DW4.IntraComputeType                = 1;
2649     curbe->DW4.AVCIntra8x8Mask                 = 0;
2650     curbe->DW4.IntraSadAdjust                  = 2;
2651 
2652     double lambdaMd       = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3));
2653     squredLambda          = lambdaMd * lambdaMd;
2654     uint32_t newLambda      = (uint32_t)(squredLambda*(1<<10));
2655     curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda;
2656 
2657     curbe->DW6.ScreenContentFlag    = m_hevcPicParams->bScreenContent;
2658 
2659     curbe->DW7.ModeCostIntraNonPred = m_modeCost[0];
2660     curbe->DW7.ModeCostIntra16x16   = m_modeCost[1];
2661     curbe->DW7.ModeCostIntra8x8     = m_modeCost[2];
2662     curbe->DW7.ModeCostIntra4x4     = m_modeCost[3];
2663 
2664     curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma;
2665 
2666     if (m_hevcPicParams->bEnableRollingIntraRefresh)
2667     {
2668         curbe->DW9.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
2669         curbe->DW9.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
2670         curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
2671     }
2672 
2673     curbe->DW10.SimplifiedFlagForInter = 0;
2674     if (m_encodeParams.bReportStatisticsEnabled)
2675     {
2676         curbe->DW10.HaarTransformMode  = true;
2677     }
2678     else
2679     {
2680         curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE)? false: true;
2681     }
2682 
2683     uint32_t startBTI = 0;
2684     curbe->DW16.BTI_Src_Y              = bindingTable->dwBindingTableEntries[startBTI++];
2685     startBTI++; // skip UV BTI
2686     curbe->DW17.BTI_Sad_16x16_PU       = bindingTable->dwBindingTableEntries[startBTI++];
2687     curbe->DW18.BTI_PAK_Object         = bindingTable->dwBindingTableEntries[startBTI++];
2688     curbe->DW19.BTI_SAD_32x32_PU_mode  = bindingTable->dwBindingTableEntries[startBTI++];
2689     curbe->DW20.BTI_VME_Mode_8x8       = bindingTable->dwBindingTableEntries[startBTI++];
2690     curbe->DW21.BTI_Slice_Map          = bindingTable->dwBindingTableEntries[startBTI++];
2691     curbe->DW22.BTI_VME_Src            = bindingTable->dwBindingTableEntries[startBTI++];
2692     curbe->DW23.BTI_BRC_Input          = bindingTable->dwBindingTableEntries[startBTI++];
2693     curbe->DW24.BTI_Simplest_Intra     = bindingTable->dwBindingTableEntries[startBTI++];
2694     curbe->DW25.BTI_LCU_Qp_Surface     = bindingTable->dwBindingTableEntries[startBTI++];
2695     curbe->DW26.BTI_BRC_Data           = bindingTable->dwBindingTableEntries[startBTI++];
2696     curbe->DW27.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
2697 
2698     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
2699 
2700     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION;
2701     CODECHAL_ENCODE_CHK_STATUS_RETURN(
2702         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2703 
2704     MOS_COMMAND_BUFFER cmdBuffer;
2705     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2706         &cmdBuffer,
2707         kernelState,
2708         encFunctionType,
2709         nullptr));
2710 
2711     //Add surface states
2712     startBTI = 0;
2713 
2714     // Source Y and UV:
2715     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2716         kernelState,
2717         &cmdBuffer,
2718         SURFACE_RAW_Y_UV,
2719         &bindingTable->dwBindingTableEntries[startBTI++]));
2720     startBTI++;
2721 
2722     // 16x16 PU SAD output
2723     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2724         kernelState,
2725         &cmdBuffer,
2726         SURFACE_16x16PU_SAD,
2727         &bindingTable->dwBindingTableEntries[startBTI++]));
2728 
2729     // PAK object output
2730     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2731         kernelState,
2732         &cmdBuffer,
2733         SURFACE_CU_RECORD,
2734         &bindingTable->dwBindingTableEntries[startBTI++]));
2735 
2736     // 32x32 PU MD data
2737     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2738         kernelState,
2739         &cmdBuffer,
2740         SURFACE_32x32_PU_OUTPUT,
2741         &bindingTable->dwBindingTableEntries[startBTI++]));
2742 
2743     // VME 8x8 mode
2744     m_surfaceParams[SURFACE_VME_8x8].bIsWritable   =
2745     m_surfaceParams[SURFACE_VME_8x8].bRenderTarget = true;
2746     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2747         kernelState,
2748         &cmdBuffer,
2749         SURFACE_VME_8x8,
2750         &bindingTable->dwBindingTableEntries[startBTI++]));
2751 
2752     // Slice map
2753     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2754         kernelState,
2755         &cmdBuffer,
2756         SURFACE_SLICE_MAP,
2757         &bindingTable->dwBindingTableEntries[startBTI++]));
2758 
2759     // Source Y for VME
2760     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2761         kernelState,
2762         &cmdBuffer,
2763         SURFACE_RAW_VME,
2764         &bindingTable->dwBindingTableEntries[startBTI++]));
2765 
2766     // BRC Input
2767     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2768         kernelState,
2769         &cmdBuffer,
2770         SURFACE_BRC_INPUT,
2771         &bindingTable->dwBindingTableEntries[startBTI++]));
2772 
2773     // Simplest Intra
2774     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2775         kernelState,
2776         &cmdBuffer,
2777         SURFACE_SIMPLIFIED_INTRA,
2778         &bindingTable->dwBindingTableEntries[startBTI++]));
2779 
2780     // LCU Qp surface
2781     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2782         kernelState,
2783         &cmdBuffer,
2784         SURFACE_LCU_QP,
2785         &bindingTable->dwBindingTableEntries[startBTI++]));
2786 
2787     // BRC data surface
2788     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2789         kernelState,
2790         &cmdBuffer,
2791         SURFACE_BRC_DATA,
2792         &bindingTable->dwBindingTableEntries[startBTI++]));
2793 
2794     if (!m_hwWalker)
2795     {
2796         eStatus = MOS_STATUS_UNKNOWN;
2797         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2798         return eStatus;
2799     }
2800 
2801     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2802     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2803     walkerCodecParams.WalkerMode        = m_walkerMode;
2804     /* looping for Walker is needed at 32x32 block level in OPT case*/
2805     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5;
2806     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
2807     walkerCodecParams.bNoDependency     = true;
2808 
2809     MHW_WALKER_PARAMS walkerParams;
2810     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2811         m_hwInterface,
2812         &walkerParams,
2813         &walkerCodecParams));
2814 
2815     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2816         &cmdBuffer,
2817         &walkerParams));
2818 
2819     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2820         encFunctionType,
2821         kernelState,
2822         &cmdBuffer));
2823 
2824     return eStatus;
2825 }
2826 
Encode8x8PUKernel()2827 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUKernel()
2828 {
2829     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2830 
2831     CODECHAL_ENCODE_FUNCTION_ENTER;
2832 
2833     PerfTagSetting perfTag;
2834     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
2835 
2836     uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8PU;
2837     auto kernelState = &m_mbEncKernelStates[krnIdx];
2838     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2839     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2840     {
2841         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2842     }
2843 
2844     // Setup DSH
2845     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2846         m_stateHeapInterface,
2847         kernelState,
2848         false,
2849         0,
2850         false,
2851         m_storeData));
2852 
2853     // Setup CURBE
2854     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2855     CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd;
2856     MOS_ZeroMemory(curbe, sizeof(*curbe));
2857 
2858     curbe->DW0.FrameWidth          = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2859     curbe->DW0.FrameHeight         = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2860     curbe->DW1.SliceType       = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2861     curbe->DW1.PuType          = 2; // 8x8
2862     curbe->DW1.DcFilterFlag    = true;
2863     curbe->DW1.AngleRefineFlag = true;
2864     curbe->DW1.LCUType         = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
2865     curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
2866     curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
2867     curbe->DW1.EnableDebugDump = false;
2868     curbe->DW1.BRCEnable       = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
2869     curbe->DW1.LCUBRCEnable    = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
2870     curbe->DW1.ROIEnable       = (m_hevcPicParams->NumROI > 0);
2871     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
2872     curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
2873     curbe->DW1.QPValue = CalSliceQp();
2874     if (m_hevcPicParams->bEnableRollingIntraRefresh)
2875     {
2876         curbe->DW1.EnableRollingIntra   = true;
2877         curbe->DW1.IntraRefreshEn       = true;
2878         curbe->DW1.HalfUpdateMixedLCU   = 0;
2879 
2880         curbe->DW5.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
2881         curbe->DW5.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
2882         curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
2883 
2884         int32_t qp = CalSliceQp();
2885         curbe->DW1.QPValue              = (uint32_t)qp;
2886     }
2887 
2888     curbe->DW2.LumaLambda      = m_fixedPointLambdaForLuma;
2889 
2890     curbe->DW3.ChromaLambda    = m_fixedPointLambdaForChroma;
2891 
2892     if (m_encodeParams.bReportStatisticsEnabled)
2893     {
2894         curbe->DW4.HaarTransformFlag   = true;
2895     }
2896     else
2897     {
2898         curbe->DW4.HaarTransformFlag   = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
2899     }
2900     curbe->DW4.SimplifiedFlagForInter  = false;
2901 
2902     uint32_t startBTI = 0;
2903     curbe->DW8.BTI_Src_Y           = bindingTable->dwBindingTableEntries[startBTI++];
2904     startBTI++; // skip one BTI for Y and UV have the same BTI
2905     curbe->DW9.BTI_Slice_Map       = bindingTable->dwBindingTableEntries[startBTI++];
2906     curbe->DW10.BTI_VME_8x8_Mode    = bindingTable->dwBindingTableEntries[startBTI++];
2907     curbe->DW11.BTI_Intra_Mode     = bindingTable->dwBindingTableEntries[startBTI++];
2908     curbe->DW12.BTI_BRC_Input      = bindingTable->dwBindingTableEntries[startBTI++];
2909     curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
2910     curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
2911     curbe->DW15.BTI_BRC_Data       = bindingTable->dwBindingTableEntries[startBTI++];
2912     curbe->DW16.BTI_Debug          = bindingTable->dwBindingTableEntries[startBTI++];
2913 
2914     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
2915 
2916     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU;
2917     CODECHAL_ENCODE_CHK_STATUS_RETURN(
2918         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2919 
2920     MOS_COMMAND_BUFFER cmdBuffer;
2921     if(m_numMb8x8IntraKernelSplit == 0)
2922     {
2923         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
2924             kernelState,
2925             encFunctionType,
2926             nullptr));
2927     }
2928     else
2929     {
2930         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2931 
2932         MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
2933         MOS_ZeroMemory(&idParams, sizeof(idParams));
2934         idParams.pKernelState = kernelState;
2935         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
2936             m_stateHeapInterface,
2937             1,
2938             &idParams));
2939 
2940         // Add binding table
2941         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
2942             m_stateHeapInterface,
2943             kernelState));
2944     }
2945 
2946     //Add surface states
2947     startBTI = 0;
2948 
2949     // Source Y and UV
2950     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2951         kernelState,
2952         &cmdBuffer,
2953         SURFACE_RAW_Y_UV,
2954         &bindingTable->dwBindingTableEntries[startBTI++]));
2955     startBTI++;
2956 
2957     // Slice Map
2958     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2959         kernelState,
2960         &cmdBuffer,
2961         SURFACE_SLICE_MAP,
2962         &bindingTable->dwBindingTableEntries[startBTI++]));
2963 
2964     // VME 8x8 mode
2965     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2966         kernelState,
2967         &cmdBuffer,
2968         SURFACE_VME_8x8,
2969         &bindingTable->dwBindingTableEntries[startBTI++]));
2970 
2971     // Intra mode
2972     m_surfaceParams[SURFACE_INTRA_MODE].bIsWritable   =
2973     m_surfaceParams[SURFACE_INTRA_MODE].bRenderTarget = true;
2974     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2975         kernelState,
2976         &cmdBuffer,
2977         SURFACE_INTRA_MODE,
2978         &bindingTable->dwBindingTableEntries[startBTI++]));
2979 
2980     // BRC Input
2981     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2982         kernelState,
2983         &cmdBuffer,
2984         SURFACE_BRC_INPUT,
2985         &bindingTable->dwBindingTableEntries[startBTI++]));
2986 
2987     // Simplest Intra
2988     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2989         kernelState,
2990         &cmdBuffer,
2991         SURFACE_SIMPLIFIED_INTRA,
2992         &bindingTable->dwBindingTableEntries[startBTI++]));
2993 
2994     // LCU Qp surface
2995     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2996         kernelState,
2997         &cmdBuffer,
2998         SURFACE_LCU_QP,
2999         &bindingTable->dwBindingTableEntries[startBTI++]));
3000 
3001     // BRC data surface
3002     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3003         kernelState,
3004         &cmdBuffer,
3005         SURFACE_BRC_DATA,
3006         &bindingTable->dwBindingTableEntries[startBTI++]));
3007 
3008     if (!m_hwWalker)
3009     {
3010         eStatus = MOS_STATUS_UNKNOWN;
3011         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3012         return eStatus;
3013     }
3014 
3015     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3016     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3017     walkerCodecParams.WalkerMode        = m_walkerMode;
3018     // each EU is based on one 8x8 block
3019     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,    CODECHAL_MACROBLOCK_WIDTH)  >> 3;
3020     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight,   CODECHAL_MACROBLOCK_HEIGHT) >> 3;
3021     /* Enforce no dependency dispatch order for 8x8 PU kernel  */
3022     walkerCodecParams.bNoDependency     = true;
3023 
3024     if(m_numMb8x8IntraKernelSplit == 0)
3025     {
3026         MHW_WALKER_PARAMS walkerParams;
3027         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3028             m_hwInterface,
3029             &walkerParams,
3030             &walkerCodecParams));
3031 
3032         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3033             &cmdBuffer,
3034             &walkerParams));
3035     }
3036     else
3037     {
3038         uint32_t numRowPerSplit = (walkerCodecParams.dwResolutionY + m_numMb8x8IntraKernelSplit - 1) / m_numMb8x8IntraKernelSplit;
3039         uint32_t currentNumRow = 0;
3040 
3041         for(uint32_t i = 0; i < m_numMb8x8IntraKernelSplit; i++)
3042         {
3043             // Program render engine pipe commands
3044             SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
3045             sendKernelCmdsParams.EncFunctionType        = encFunctionType;
3046             sendKernelCmdsParams.pKernelState           = kernelState;
3047             sendKernelCmdsParams.bEnableCustomScoreBoard= true;
3048             sendKernelCmdsParams.pCustomScoreBoard      = &m_walkingPatternParam.ScoreBoard;
3049             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
3050 
3051             MHW_WALKER_PARAMS walkerParams;
3052             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3053                 m_hwInterface,
3054                 &walkerParams,
3055                 &walkerCodecParams));
3056 
3057             if(currentNumRow + numRowPerSplit >= walkerCodecParams.dwResolutionY)
3058             {
3059                 // the last split may not have the same number of rows as previous splits
3060                 numRowPerSplit = walkerCodecParams.dwResolutionY - currentNumRow;
3061             }
3062 
3063             walkerParams.LocalStart.y = currentNumRow;
3064             walkerParams.dwLocalLoopExecCount = numRowPerSplit * walkerCodecParams.dwResolutionX;
3065 
3066             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3067                 &cmdBuffer,
3068                 &walkerParams));
3069 
3070             currentNumRow += numRowPerSplit;
3071             if(currentNumRow >= walkerCodecParams.dwResolutionY)
3072             {
3073                 break;
3074             }
3075         }
3076     }
3077 
3078     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3079         encFunctionType,
3080         kernelState,
3081         &cmdBuffer));
3082 
3083     return eStatus;
3084 }
3085 
Encode8x8PUFMODEKernel()3086 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUFMODEKernel()
3087 {
3088     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3089 
3090     CODECHAL_ENCODE_FUNCTION_ENTER;
3091 
3092     PerfTagSetting perfTag;
3093     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE);
3094 
3095     uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8FMODE;
3096     auto kernelState = &m_mbEncKernelStates[krnIdx];
3097     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3098     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3099     {
3100         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3101     }
3102 
3103     // Setup DSH
3104     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3105         m_stateHeapInterface,
3106         kernelState,
3107         false,
3108         0,
3109         false,
3110         m_storeData));
3111 
3112     // Setup CURBE
3113     int32_t qp = CalSliceQp();
3114     uint32_t sliceQp = (uint32_t)qp;
3115     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3116 
3117     CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd;
3118     MOS_ZeroMemory(curbe, sizeof(*curbe));
3119     curbe->DW0.FrameWidth                  = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
3120     curbe->DW0.FrameHeight                 = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
3121 
3122     curbe->DW1.SliceType                   = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
3123     curbe->DW1.PuType                      = 2;
3124     curbe->DW1.PakReordingFlag             = (m_hevcPicParams->CodingType == I_TYPE)? true : false;
3125     curbe->DW1.LCUType                     = (log2MaxCUSize == 6)? 0 /*64x64*/: 1 /*32x32*/;
3126     curbe->DW1.ScreenContentFlag           = m_hevcPicParams->bScreenContent;
3127     curbe->DW1.EnableIntraEarlyExit        = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
3128     curbe->DW1.EnableDebugDump             = false;
3129     curbe->DW1.BRCEnable                   = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
3130     curbe->DW1.LCUBRCEnable                = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
3131     curbe->DW1.ROIEnable                   = (m_hevcPicParams->NumROI > 0);
3132     curbe->DW1.FASTSurveillanceFlag        = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3133     curbe->DW1.EnableRollingIntra          = m_hevcPicParams->bEnableRollingIntraRefresh;
3134     curbe->DW1.IntraRefreshEn              = m_hevcPicParams->bEnableRollingIntraRefresh;
3135     curbe->DW1.HalfUpdateMixedLCU          = 0;
3136     curbe->DW1.EnableQualityImprovement    = m_encodeParams.bQualityImprovementEnable;
3137     curbe->DW2.LambdaForLuma               = m_fixedPointLambdaForLuma;
3138 
3139     if (m_hevcPicParams->CodingType != I_TYPE ||
3140             m_encodeParams.bReportStatisticsEnabled)
3141     {
3142         float hadBias = 2.0f;
3143 
3144         double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
3145         lambdaMd = lambdaMd * hadBias;
3146         curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10));
3147     }
3148     curbe->DW4.ModeCostFor8x8PU_TU8      = 0;
3149     curbe->DW5.ModeCostFor8x8PU_TU4      = 0;
3150     curbe->DW6.SATD16x16PuThreshold      = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0);
3151     curbe->DW6.BiasFactorToward8x8       = (m_hevcPicParams->bScreenContent) ? 1024 : 1126+102;
3152     curbe->DW7.Qp                        = sliceQp;
3153     curbe->DW7.QpForInter                = 0;
3154     curbe->DW8.SimplifiedFlagForInter    = false;
3155     curbe->DW8.EnableStatsDataDump       = m_encodeParams.bReportStatisticsEnabled;
3156     // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
3157     curbe->DW8.KBLControlFlag            = UsePlatformControlFlag();
3158     curbe->DW9.IntraRefreshMBNum         = m_hevcPicParams->IntraInsertionLocation;
3159     curbe->DW9.IntraRefreshQPDelta       = m_hevcPicParams->QpDeltaForInsertedIntra;
3160     curbe->DW9.IntraRefreshUnitInMB      = m_hevcPicParams->IntraInsertionSize;
3161 
3162     uint32_t startBTI = 0;
3163     curbe->DW16.BTI_PAK_Object           = bindingTable->dwBindingTableEntries[startBTI++];
3164     curbe->DW17.BTI_VME_8x8_Mode         = bindingTable->dwBindingTableEntries[startBTI++];
3165     curbe->DW18.BTI_Intra_Mode           = bindingTable->dwBindingTableEntries[startBTI++];
3166     curbe->DW19.BTI_PAK_Command          = bindingTable->dwBindingTableEntries[startBTI++];
3167     curbe->DW20.BTI_Slice_Map            = bindingTable->dwBindingTableEntries[startBTI++];
3168     curbe->DW21.BTI_IntraDist            = bindingTable->dwBindingTableEntries[startBTI++];
3169     curbe->DW22.BTI_BRC_Input            = bindingTable->dwBindingTableEntries[startBTI++];
3170     curbe->DW23.BTI_Simplest_Intra       = bindingTable->dwBindingTableEntries[startBTI++];
3171     curbe->DW24.BTI_LCU_Qp_Surface       = bindingTable->dwBindingTableEntries[startBTI++];
3172     curbe->DW25.BTI_BRC_Data             = bindingTable->dwBindingTableEntries[startBTI++];
3173     curbe->DW26.BTI_Haar_Dist16x16       = bindingTable->dwBindingTableEntries[startBTI++];
3174     curbe->DW27.BTI_Stats_Data           = bindingTable->dwBindingTableEntries[startBTI++];
3175     curbe->DW28.BTI_Frame_Stats_Data     = bindingTable->dwBindingTableEntries[startBTI++];
3176     curbe->DW29.BTI_CTB_Distortion_Surface = 0;
3177     startBTI++;
3178     curbe->DW30.BTI_Debug                = bindingTable->dwBindingTableEntries[startBTI++];
3179 
3180     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
3181 
3182     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE;
3183     CODECHAL_ENCODE_CHK_STATUS_RETURN(
3184         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
3185 
3186     MOS_COMMAND_BUFFER cmdBuffer;
3187     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
3188         &cmdBuffer,
3189         kernelState,
3190         encFunctionType,
3191         nullptr));
3192 
3193     //Add surface states
3194     startBTI = 0;
3195 
3196     // PAK object
3197     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3198         kernelState,
3199         &cmdBuffer,
3200         SURFACE_CU_RECORD,
3201         &bindingTable->dwBindingTableEntries[startBTI++]));
3202 
3203     // VME 8x8 mode
3204     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3205         kernelState,
3206         &cmdBuffer,
3207         SURFACE_VME_8x8,
3208         &bindingTable->dwBindingTableEntries[startBTI++]));
3209 
3210     // Intra mode
3211     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3212         kernelState,
3213         &cmdBuffer,
3214         SURFACE_INTRA_MODE,
3215         &bindingTable->dwBindingTableEntries[startBTI++]));
3216 
3217     // PAK command
3218     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3219         kernelState,
3220         &cmdBuffer,
3221         SURFACE_HCP_PAK,
3222         &bindingTable->dwBindingTableEntries[startBTI++]));
3223 
3224     // Slice Map
3225     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3226         kernelState,
3227         &cmdBuffer,
3228         SURFACE_SLICE_MAP,
3229         &bindingTable->dwBindingTableEntries[startBTI++]));
3230 
3231     // Intra dist
3232     m_surfaceParams[SURFACE_INTRA_DIST].bIsWritable   =
3233     m_surfaceParams[SURFACE_INTRA_DIST].bRenderTarget = true;
3234     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3235         kernelState,
3236         &cmdBuffer,
3237         SURFACE_INTRA_DIST,
3238         &bindingTable->dwBindingTableEntries[startBTI++]));
3239 
3240     // BRC Input
3241     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3242         kernelState,
3243         &cmdBuffer,
3244         SURFACE_BRC_INPUT,
3245         &bindingTable->dwBindingTableEntries[startBTI++]));
3246 
3247     // Simplest Intra
3248     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3249         kernelState,
3250         &cmdBuffer,
3251         SURFACE_SIMPLIFIED_INTRA,
3252         &bindingTable->dwBindingTableEntries[startBTI++]));
3253 
3254     // LCU Qp surface
3255     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3256         kernelState,
3257         &cmdBuffer,
3258         SURFACE_LCU_QP,
3259         &bindingTable->dwBindingTableEntries[startBTI++]));
3260 
3261     // BRC data surface
3262     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3263         kernelState,
3264         &cmdBuffer,
3265         SURFACE_BRC_DATA,
3266         &bindingTable->dwBindingTableEntries[startBTI++]));
3267 
3268     // skip haar distortion surface, statstics data dump surface
3269     // and frame level statstics data surface because they are not used
3270 
3271     if (!m_hwWalker)
3272     {
3273         eStatus = MOS_STATUS_UNKNOWN;
3274         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3275         return eStatus;
3276     }
3277 
3278     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3279     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3280     walkerCodecParams.WalkerMode        = m_walkerMode;
3281     // each EU is based on one LCU
3282     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,    (1<<log2MaxCUSize)) >> log2MaxCUSize;
3283     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight,   (1<<log2MaxCUSize)) >> log2MaxCUSize;
3284     /* Enforce no dependency dispatch order for 8x8 PU FMODE kernel  */
3285     walkerCodecParams.bNoDependency     = true;
3286 
3287     MHW_WALKER_PARAMS walkerParams;
3288     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3289         m_hwInterface,
3290         &walkerParams,
3291         &walkerCodecParams));
3292 
3293     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3294         &cmdBuffer,
3295         &walkerParams));
3296 
3297     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3298         encFunctionType,
3299         kernelState,
3300         &cmdBuffer));
3301 
3302     return eStatus;
3303 }
3304 
Encode32X32BIntraCheckKernel()3305 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32X32BIntraCheckKernel()
3306 {
3307     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3308 
3309     CODECHAL_ENCODE_FUNCTION_ENTER;
3310 
3311     PerfTagSetting perfTag;
3312     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC);
3313 
3314     uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32INTRACHECK;
3315     auto kernelState = &m_mbEncKernelStates[krnIdx];
3316     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3317 
3318     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3319     {
3320         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3321     }
3322 
3323     // Setup DSH
3324     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3325         m_stateHeapInterface,
3326         kernelState,
3327         false,
3328         0,
3329         false,
3330         m_storeData));
3331 
3332     // Setup CURBE
3333     if (m_pictureCodingType == P_TYPE)
3334     {
3335         CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR);
3336     }
3337     else
3338     {
3339         CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR);
3340     }
3341     int32_t sliceQp = CalSliceQp();
3342 
3343     double lambdaScalingFactor = 1.0;
3344     double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
3345     double squaredQpLambda = qpLambda * qpLambda;
3346     m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
3347 
3348     CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd;
3349     MOS_ZeroMemory(curbe, sizeof(*curbe));
3350     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
3351     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
3352 
3353     curbe->DW1.EnableDebugDump = false;
3354     curbe->DW1.EnableIntraEarlyExit = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1;
3355     curbe->DW1.Flags           = 0;
3356     curbe->DW1.Log2MinTUSize   = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3357     curbe->DW1.SliceType       = m_hevcSliceParams->slice_type;
3358     curbe->DW1.HMEEnable       = 0;
3359     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3360 
3361     curbe->DW2.QpMultiplier    = 100;
3362     curbe->DW2.QpValue         = 0;     // MBZ
3363 
3364     uint32_t startIndex = 0;
3365     curbe->DW8.BTI_Per32x32PuIntraCheck    = bindingTable->dwBindingTableEntries[startIndex++];
3366     curbe->DW9.BTI_Src_Y            = bindingTable->dwBindingTableEntries[startIndex++];
3367     startIndex++; // skip one BTI for Y and UV have the same BTI
3368     curbe->DW10.BTI_Src_Y2X         = bindingTable->dwBindingTableEntries[startIndex++];
3369     curbe->DW11.BTI_Slice_Map       = bindingTable->dwBindingTableEntries[startIndex++];
3370     curbe->DW12.BTI_VME_Y2X         = bindingTable->dwBindingTableEntries[startIndex++];
3371     curbe->DW13.BTI_Simplest_Intra  = bindingTable->dwBindingTableEntries[startIndex++];
3372     curbe->DW14.BTI_HME_MVPred      = bindingTable->dwBindingTableEntries[startIndex++];
3373     curbe->DW15.BTI_HME_Dist        = bindingTable->dwBindingTableEntries[startIndex++];
3374     curbe->DW16.BTI_LCU_Skip        = bindingTable->dwBindingTableEntries[startIndex++];
3375     curbe->DW17.BTI_Debug           = bindingTable->dwBindingTableEntries[startIndex++];
3376 
3377     CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
3378 
3379     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK;
3380     CODECHAL_ENCODE_CHK_STATUS_RETURN(
3381         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
3382 
3383     MOS_COMMAND_BUFFER cmdBuffer;
3384     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
3385         &cmdBuffer,
3386         kernelState,
3387         encFunctionType,
3388         nullptr));
3389 
3390     //Add surface states
3391     startIndex = 0;
3392 
3393     // 32x32 PU B Intra Check Output
3394     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable   =
3395     m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
3396     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3397         kernelState,
3398         &cmdBuffer,
3399         SURFACE_32x32_PU_OUTPUT,
3400         &bindingTable->dwBindingTableEntries[startIndex++]));
3401 
3402     // Source Y and UV
3403     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3404         kernelState,
3405         &cmdBuffer,
3406         SURFACE_RAW_Y_UV,
3407         &bindingTable->dwBindingTableEntries[startIndex++]));
3408     startIndex++;
3409 
3410     // Source Y2x
3411     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3412         kernelState,
3413         &cmdBuffer,
3414         SURFACE_Y_2X,
3415         &bindingTable->dwBindingTableEntries[startIndex++]));
3416 
3417     // Slice map
3418     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3419         kernelState,
3420         &cmdBuffer,
3421         SURFACE_SLICE_MAP,
3422         &bindingTable->dwBindingTableEntries[startIndex++]));
3423 
3424     // Source Y2x for VME
3425     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3426         kernelState,
3427         &cmdBuffer,
3428         SURFACE_Y_2X_VME,
3429         &bindingTable->dwBindingTableEntries[startIndex++]));
3430 
3431     // Simplest Intra
3432     m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bIsWritable   =
3433     m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bRenderTarget = true;
3434     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3435         kernelState,
3436         &cmdBuffer,
3437         SURFACE_SIMPLIFIED_INTRA,
3438         &bindingTable->dwBindingTableEntries[startIndex++]));
3439 
3440     // skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME
3441     startIndex += 2;
3442 
3443     // LCU Qp/Skip surface
3444     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3445         kernelState,
3446         &cmdBuffer,
3447         SURFACE_LCU_QP,
3448         &bindingTable->dwBindingTableEntries[startIndex++]));
3449 
3450     if (!m_hwWalker)
3451     {
3452         eStatus = MOS_STATUS_UNKNOWN;
3453         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3454         return eStatus;
3455     }
3456 
3457     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3458     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3459     walkerCodecParams.WalkerMode        = m_walkerMode;
3460     /* looping for Walker is needed at 8x8 block level */
3461     walkerCodecParams.dwResolutionX     = MOS_ALIGN_CEIL(m_frameWidth,  32) >> 5;
3462     walkerCodecParams.dwResolutionY     = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
3463     /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel  */
3464     walkerCodecParams.bNoDependency     = true;
3465 
3466     MHW_WALKER_PARAMS walkerParams;
3467     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3468         m_hwInterface,
3469         &walkerParams,
3470         &walkerCodecParams));
3471 
3472     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3473         &cmdBuffer,
3474         &walkerParams));
3475 
3476     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3477         encFunctionType,
3478         kernelState,
3479         &cmdBuffer));
3480 
3481     return eStatus;
3482 }
3483 
Encode8x8BPakKernel(PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)3484 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8BPakKernel(
3485     PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)
3486 {
3487     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3488 
3489     CODECHAL_ENCODE_FUNCTION_ENTER;
3490 
3491     CODECHAL_ENCODE_CHK_NULL_RETURN(pEncBCurbe);
3492 
3493     PerfTagSetting perfTag;
3494     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL);
3495 
3496     uint32_t krnIdx = CODECHAL_HEVC_FEI_MBENC_BPAK;
3497     auto kernelState = &m_mbEncKernelStates[krnIdx];
3498     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3499     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3500     {
3501         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3502     }
3503 
3504     //Setup DSH
3505     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3506         m_stateHeapInterface,
3507         kernelState,
3508         false,
3509         0,
3510         false,
3511         m_storeData));
3512 
3513     //Setup CURBE
3514     CODECHAL_FEI_HEVC_B_PAK_CURBE_G9  cmd, *curbe = &cmd;
3515     MOS_ZeroMemory(curbe, sizeof(*curbe));
3516     curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
3517     curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
3518 
3519     curbe->DW1.MaxVmvR                 = pEncBCurbe->DW44.MaxVmvR;
3520     curbe->DW1.Qp                      = pEncBCurbe->DW13.QpPrimeY;
3521     curbe->DW2.BrcEnable               = pEncBCurbe->DW36.BRCEnable;
3522     curbe->DW2.LcuBrcEnable            = pEncBCurbe->DW36.LCUBRCEnable;
3523     curbe->DW2.ScreenContent           = pEncBCurbe->DW47.ScreenContentFlag;
3524     curbe->DW2.SimplestIntraEnable     = pEncBCurbe->DW47.SkipIntraKrnFlag;
3525     curbe->DW2.SliceType               = pEncBCurbe->DW4.SliceType;
3526     curbe->DW2.EnableWA                = 0;
3527     curbe->DW2.ROIEnable               = (m_hevcPicParams->NumROI > 0);
3528     curbe->DW2.FASTSurveillanceFlag    = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3529     // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
3530     curbe->DW2.KBLControlFlag          = UsePlatformControlFlag();
3531     curbe->DW2.EnableRollingIntra      = m_hevcPicParams->bEnableRollingIntraRefresh;
3532     curbe->DW2.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
3533     curbe->DW3.IntraRefreshQPDelta     = m_hevcPicParams->QpDeltaForInsertedIntra;
3534     curbe->DW3.IntraRefreshMBNum       = m_hevcPicParams->IntraInsertionLocation;
3535     curbe->DW3.IntraRefreshUnitInMB    = m_hevcPicParams->IntraInsertionSize;
3536 
3537     uint32_t startBTI = 0;
3538     curbe->DW16.BTI_CU_Record          = bindingTable->dwBindingTableEntries[startBTI++];
3539     curbe->DW17.BTI_PAK_Obj            = bindingTable->dwBindingTableEntries[startBTI++];
3540     curbe->DW18.BTI_Slice_Map          = bindingTable->dwBindingTableEntries[startBTI++];
3541     curbe->DW19.BTI_Brc_Input          = bindingTable->dwBindingTableEntries[startBTI++];
3542     curbe->DW20.BTI_LCU_Qp             = bindingTable->dwBindingTableEntries[startBTI++];
3543     curbe->DW21.BTI_Brc_Data           = bindingTable->dwBindingTableEntries[startBTI++];
3544     curbe->DW22.BTI_MB_Data            = bindingTable->dwBindingTableEntries[startBTI++];
3545     curbe->DW23.BTI_MVP_Surface        = bindingTable->dwBindingTableEntries[startBTI++];
3546     curbe->DW24.BTI_WA_PAK_Data        = bindingTable->dwBindingTableEntries[startBTI++];
3547     curbe->DW25.BTI_WA_PAK_Obj         = bindingTable->dwBindingTableEntries[startBTI++];
3548     curbe->DW26.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
3549 
3550     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
3551 
3552     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK;
3553     CODECHAL_ENCODE_CHK_STATUS_RETURN(
3554         AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
3555 
3556     MOS_COMMAND_BUFFER cmdBuffer;
3557     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
3558         &cmdBuffer,
3559         kernelState,
3560         encFunctionType,
3561         nullptr));
3562 
3563     //Add surface states
3564     startBTI = 0;
3565     //0: CU record
3566     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3567         kernelState,
3568         &cmdBuffer,
3569         SURFACE_CU_RECORD,
3570         &bindingTable->dwBindingTableEntries[startBTI++]));
3571 
3572     //1: PAK command
3573     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3574         kernelState,
3575         &cmdBuffer,
3576         SURFACE_HCP_PAK,
3577         &bindingTable->dwBindingTableEntries[startBTI++]));
3578 
3579     //2: slice map
3580     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3581         kernelState,
3582         &cmdBuffer,
3583         SURFACE_SLICE_MAP,
3584         &bindingTable->dwBindingTableEntries[startBTI++]));
3585 
3586     // 3: BRC Input
3587     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3588         kernelState,
3589         &cmdBuffer,
3590         SURFACE_BRC_INPUT,
3591         &bindingTable->dwBindingTableEntries[startBTI++]));
3592 
3593     // 4: LCU Qp
3594     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3595         kernelState,
3596         &cmdBuffer,
3597         SURFACE_LCU_QP,
3598         &bindingTable->dwBindingTableEntries[startBTI++]));
3599 
3600     // 5: LCU BRC constant
3601     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3602         kernelState,
3603         &cmdBuffer,
3604         SURFACE_BRC_DATA,
3605         &bindingTable->dwBindingTableEntries[startBTI++]));
3606 
3607     // 6: MV index buffer or MB data
3608     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3609         kernelState,
3610         &cmdBuffer,
3611         SURFACE_MB_MV_INDEX,
3612         &bindingTable->dwBindingTableEntries[startBTI++]));
3613 
3614     // 7: MVP index buffer
3615     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3616         kernelState,
3617         &cmdBuffer,
3618         SURFACE_MVP_INDEX,
3619         &bindingTable->dwBindingTableEntries[startBTI++]));
3620 
3621     // skip 8 and 9 for SURFACE_WA_CU_RECORD and SURFACE_WA_HCP_PAK
3622 
3623     if (!m_hwWalker)
3624     {
3625         eStatus = MOS_STATUS_UNKNOWN;
3626         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3627         return eStatus;
3628     }
3629 
3630     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3631     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3632     walkerCodecParams.WalkerMode            = m_walkerMode;
3633     /* looping for Walker is needed at 8x8 block level */
3634     walkerCodecParams.dwResolutionX         = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
3635     walkerCodecParams.dwResolutionY         = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
3636     /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel  */
3637     walkerCodecParams.bNoDependency         = true;
3638     walkerCodecParams.wPictureCodingType    = m_pictureCodingType;
3639     walkerCodecParams.bUseScoreboard        = m_useHwScoreboard;
3640 
3641     MHW_WALKER_PARAMS walkerParams;
3642     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3643         m_hwInterface,
3644         &walkerParams,
3645         &walkerCodecParams));
3646 
3647     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3648         &cmdBuffer,
3649         &walkerParams));
3650 
3651     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3652         encFunctionType,
3653         kernelState,
3654         &cmdBuffer));
3655 
3656     return eStatus;
3657 }
3658 
Encode8x8PBMbEncKernel()3659 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PBMbEncKernel()
3660 {
3661     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3662 
3663     CODECHAL_ENCODE_FUNCTION_ENTER;
3664 
3665     PerfTagSetting perfTag;
3666     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
3667 
3668     uint32_t krnIdx = CODECHAL_HEVC_FEI_MBENC_BENC;
3669     if (m_pictureCodingType == P_TYPE)
3670     {
3671         //krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_FEI_MBENC_ADV_P : CODECHAL_HEVC_FEI_MBENC_PENC;
3672         krnIdx = CODECHAL_HEVC_FEI_MBENC_PENC;
3673     }
3674     else if (m_pictureCodingType == B_TYPE)
3675     {
3676         // In TU7, we still need the original ENC B kernel to process the I frame
3677         //krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_FEI_MBENC_ADV : CODECHAL_HEVC_FEI_MBENC_BENC;
3678         krnIdx = CODECHAL_HEVC_FEI_MBENC_BENC;
3679     }
3680 
3681     auto kernelState = &m_mbEncKernelStates[krnIdx];
3682     auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3683     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3684     {
3685         CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3686     }
3687 
3688     int32_t sliceQp = CalSliceQp();
3689     uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType);
3690 
3691     if (m_feiPicParams->FastIntraMode)
3692     {
3693         // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped.
3694         CalcLambda(sliceType, INTRA_TRANSFORM_HAAR);
3695     }
3696     LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR);
3697 
3698     uint8_t mbCodeIdxForTempMVP = 0xFF;
3699     if(m_pictureCodingType != I_TYPE)
3700     {
3701         if(m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
3702         {
3703             uint8_t FrameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
3704 
3705             mbCodeIdxForTempMVP = m_refList[FrameIdx]->ucScalingIdx;
3706         }
3707 
3708         if(mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
3709         {
3710             // Temporal reference MV index is invalid and so disable the temporal MVP
3711             CODECHAL_ENCODE_ASSERT(false);
3712             m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
3713         }
3714     }
3715 
3716     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
3717 
3718     //Setup DSH
3719     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3720         m_stateHeapInterface,
3721         kernelState,
3722         false,
3723         0,
3724         false,
3725         m_storeData));
3726 
3727     //Setup CURBE
3728     uint8_t forwardTransformThd[7] = { 0 };
3729     CalcForwardCoeffThd(forwardTransformThd, sliceQp);
3730 
3731     uint32_t curbeSize = 0;
3732     void *defaultCurbe = (void *)GetDefaultCurbeEncBKernel(curbeSize);
3733     CODECHAL_ENCODE_ASSERT(defaultCurbe);
3734 
3735     CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
3736     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
3737 
3738     bool transform_8x8_mode_flag = true;
3739     uint32_t SearchPath  = (m_feiPicParams->SearchWindow == 5) ? 2 : 1; // 2 means full search, 1 means diamand search
3740     uint32_t LenSP       = m_feiPicParams->LenSP;
3741     uint32_t RefWidth    = m_feiPicParams->RefWidth;
3742     uint32_t RefHeight   = m_feiPicParams->RefHeight;
3743 
3744     switch (m_feiPicParams->SearchWindow)
3745     {
3746     case 0:
3747         // not use predefined search window
3748         if((m_feiPicParams->SearchPath != 0) && (m_feiPicParams->SearchPath != 1) && (m_feiPicParams->SearchPath != 2))
3749         {
3750             CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input SearchPath for SearchWindow=0 case!!!.");
3751             eStatus = MOS_STATUS_INVALID_PARAMETER;
3752             return eStatus;
3753         }
3754         SearchPath = m_feiPicParams->SearchPath;
3755         if(((RefWidth * RefHeight) > 2048) || (RefWidth > 64) || (RefHeight > 64))
3756         {
3757             CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input RefWidth/RefHeight size for SearchWindow=0 case!!!.");
3758             eStatus = MOS_STATUS_INVALID_PARAMETER;
3759             return eStatus;
3760         }
3761         break;
3762     case 1:
3763         // Tiny SUs 24x24 window
3764         RefWidth  = 24;
3765         RefHeight = 24;
3766         LenSP     = 4;
3767         break;
3768     case 2:
3769         // Small SUs 28x28 window
3770         RefWidth  = 28;
3771         RefHeight = 28;
3772         LenSP     = 9;
3773         break;
3774     case 3:
3775         // Diamond SUs 48x40 window
3776         RefWidth  = 48;
3777         RefHeight = 40;
3778         LenSP     = 16;
3779         break;
3780     case 4:
3781         // Large Diamond SUs 48x40 window
3782         RefWidth  = 48;
3783         RefHeight = 40;
3784         LenSP     = 32;
3785         break;
3786     case 5:
3787         // Exhaustive SUs 48x40 window
3788         RefWidth  = 48;
3789         RefHeight = 40;
3790         LenSP     = 48;
3791         if (m_hevcSeqParams->TargetUsage != 7)
3792         {
3793             if (m_pictureCodingType == B_TYPE)
3794             {
3795                 LenSP = 48;
3796             } else {
3797                 LenSP = 57;
3798             }
3799         } else {
3800             LenSP = 25;
3801         }
3802         break;
3803     default:
3804         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC SearchWindow value for HEVC FEI on SKL!!!.");
3805         eStatus = MOS_STATUS_INVALID_PARAMETER;
3806         return eStatus;
3807     }
3808 
3809     if((m_pictureCodingType == B_TYPE) && (curbe->DW3.BMEDisableFBR == 0))
3810     {
3811         if(RefWidth > 32)
3812         {
3813             RefWidth  = 32;
3814         }
3815         if(RefHeight > 32)
3816         {
3817             RefHeight = 32;
3818         }
3819     }
3820 
3821     curbe->DW0.AdaptiveEn  = m_feiPicParams->AdaptiveSearch;
3822     curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag;
3823     curbe->DW2.PicWidth    = m_picWidthInMb;
3824     curbe->DW2.LenSP       = LenSP;
3825     curbe->DW3.SrcAccess   = curbe->DW3.RefAccess = 0;
3826     if (m_feiPicParams->FastIntraMode)
3827     {
3828         curbe->DW3.FTEnable    = (m_ftqBasedSkip[0x07] >> 1) & 0x01;
3829     }
3830     else
3831     {
3832         curbe->DW3.FTEnable    = (m_ftqBasedSkip[0x04] >> 1) & 0x01;
3833     }
3834     curbe->DW3.SubPelMode  = m_feiPicParams->SubPelMode;
3835 
3836     curbe->DW4.PicHeightMinus1               = m_picHeightInMb - 1;
3837     curbe->DW4.EnableStatsDataDump           = m_encodeParams.bReportStatisticsEnabled;
3838     curbe->DW4.HMEEnable                     = 0;
3839     curbe->DW4.SliceType                     = sliceType;
3840     curbe->DW4.EnableQualityImprovement      = m_encodeParams.bQualityImprovementEnable;
3841     curbe->DW4.UseActualRefQPValue           = false;
3842 
3843     curbe->DW5.RefWidth                      = RefWidth;
3844     curbe->DW5.RefHeight                     = RefHeight;
3845 
3846     curbe->DW7.IntraPartMask                 = 0x3;
3847 
3848     curbe->DW6.FrameWidth                    = m_picWidthInMb  * CODECHAL_MACROBLOCK_WIDTH;
3849     curbe->DW6.FrameHeight                   = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
3850 
3851     curbe->DW8.Mode0Cost = m_modeCost[0];
3852     curbe->DW8.Mode1Cost = m_modeCost[1];
3853     curbe->DW8.Mode2Cost = m_modeCost[2];
3854     curbe->DW8.Mode3Cost = m_modeCost[3];
3855 
3856     curbe->DW9.Mode4Cost = m_modeCost[4];
3857     curbe->DW9.Mode5Cost = m_modeCost[5];
3858     curbe->DW9.Mode6Cost = m_modeCost[6];
3859     curbe->DW9.Mode7Cost = m_modeCost[7];
3860 
3861     curbe->DW10.Mode8Cost= m_modeCost[8];
3862     curbe->DW10.Mode9Cost= m_modeCost[9];
3863     curbe->DW10.RefIDCost = m_modeCost[10];
3864     curbe->DW10.ChromaIntraModeCost = m_modeCost[11];
3865 
3866     curbe->DW11.MV0Cost  = m_mvCost[0];
3867     curbe->DW11.MV1Cost  = m_mvCost[1];
3868     curbe->DW11.MV2Cost  = m_mvCost[2];
3869     curbe->DW11.MV3Cost  = m_mvCost[3];
3870 
3871     curbe->DW12.MV4Cost  = m_mvCost[4];
3872     curbe->DW12.MV5Cost  = m_mvCost[5];
3873     curbe->DW12.MV6Cost  = m_mvCost[6];
3874     curbe->DW12.MV7Cost  = m_mvCost[7];
3875 
3876     curbe->DW13.QpPrimeY = sliceQp;
3877     uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only
3878     int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8;
3879     int32_t qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset));
3880     int32_t QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
3881     curbe->DW13.QpPrimeCb= QPc + qpBdOffsetC;
3882     qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset));
3883     QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
3884     curbe->DW13.QpPrimeCr= QPc;
3885 
3886     curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0];
3887     curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1];
3888     curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2];
3889 
3890     curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3];
3891     curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4];
3892     curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5];
3893     curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6];
3894 
3895     if (SearchPath == 1)
3896     {
3897         // diamond search
3898         if (m_pictureCodingType == P_TYPE)
3899         {
3900             CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7PCurbeInit[16]), 14 * sizeof(uint32_t)));
3901         }
3902         else if (m_pictureCodingType == B_TYPE)
3903         {
3904             CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7BCurbeInit[16]), 14 * sizeof(uint32_t)));
3905         }
3906     }
3907     else if((SearchPath != 0) && (SearchPath != 2))
3908     {
3909         // default 0 and 2 are full sparil search
3910         CODECHAL_ENCODE_ASSERT(false);
3911     }
3912 
3913     curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp];
3914 
3915     if(m_pictureCodingType == I_TYPE)
3916     {
3917         *(float*)&(curbe->DW34.LambdaME) = 0.0;
3918     }
3919     else if (m_pictureCodingType == P_TYPE)
3920     {
3921         *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp];
3922     }
3923     else
3924     {
3925         *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
3926     }
3927 
3928     curbe->DW35.ModeCostSp                 = m_modeCostSp;
3929     curbe->DW35.SimpIntraInterThreshold    = m_simplestIntraInterThreshold;
3930 
3931     curbe->DW36.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
3932     curbe->DW36.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
3933     curbe->DW36.BRCEnable           = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
3934     curbe->DW36.LCUBRCEnable        = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
3935     curbe->DW36.PowerSaving         = m_powerSavingEnabled;
3936     curbe->DW36.ROIEnable           = (m_hevcPicParams->NumROI > 0);
3937     curbe->DW36.FASTSurveillanceFlag= (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3938 
3939     if(m_pictureCodingType != I_TYPE)
3940     {
3941         curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0);
3942         curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1);
3943         curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2);
3944         curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3);
3945         curbe->DW41.TextureIntraCostThreshold = 500;
3946 
3947         if(m_pictureCodingType == B_TYPE) {
3948             curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0);
3949             curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1);
3950             float lambda_me = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
3951             if (m_encodeParams.bQualityImprovementEnable)
3952             {
3953                 curbe->DW40.TransformThreshold0 = (uint16_t) (lambda_me * 56.25 + 0.5);
3954                 curbe->DW40.TransformThreshold1 = (uint16_t) (lambda_me * 21 + 0.5);
3955                 curbe->DW41.TransformThreshold2 = (uint16_t) (lambda_me * 9 + 0.5);
3956             }
3957         }
3958     }
3959 
3960     curbe->DW42.NumMVPredictorsL0      = m_feiPicParams->NumMVPredictorsL0;
3961     curbe->DW42.NumMVPredictorsL1      = m_feiPicParams->NumMVPredictorsL1;
3962     curbe->DW42.PerLCUQP               = m_encodeParams.bMbQpDataEnabled;
3963     curbe->DW42.PerCTBInput            = m_feiPicParams->bPerCTBInput;
3964     curbe->DW42.CTBDistortionOutput    = m_feiPicParams->bDistortionEnable;
3965     curbe->DW42.MultiPredL0            = m_feiPicParams->MultiPredL0;
3966     curbe->DW42.MultiPredL1            = m_feiPicParams->MultiPredL1;
3967     curbe->DW42.MVPredictorBlockSize   = m_feiPicParams->MVPredictorInput;
3968 
3969     curbe->DW44.MaxVmvR                = 511 * 4;
3970     curbe->DW44.MaxNumMergeCandidates  = m_hevcSliceParams->MaxNumMergeCand;
3971 
3972     if(m_pictureCodingType != I_TYPE)
3973     {
3974         curbe->DW44.MaxNumRefList0         = curbe->DW36.NumRefIdxL0MinusOne + 1;
3975 
3976         curbe->DW45.TemporalMvpEnableFlag  = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
3977         curbe->DW45.HMECombineLenPslice    = 8;
3978         if(m_pictureCodingType == B_TYPE)
3979         {
3980             curbe->DW44.MaxNumRefList1         = curbe->DW36.NumRefIdxL1MinusOne + 1;
3981             curbe->DW45.HMECombineLenBslice    = 8;
3982         }
3983     }
3984 
3985     curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
3986 
3987     curbe->DW46.Log2MaxTUSize          = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
3988     curbe->DW46.Log2MinTUSize          = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3989     curbe->DW46.Log2MaxCUSize          = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3990     curbe->DW46.Log2MinCUSize          = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
3991 
3992     curbe->DW47.NumRegionsInSlice      = m_numRegionsInSlice;
3993     curbe->DW47.TypeOfWalkingPattern   = m_enable26WalkingPattern;
3994     curbe->DW47.ChromaFlatnessCheckFlag= (m_feiPicParams->FastIntraMode) ? 0 : 1;
3995     curbe->DW47.EnableIntraEarlyExit   = (m_feiPicParams->FastIntraMode) ? 0 : 1;
3996     curbe->DW47.SkipIntraKrnFlag       = (m_feiPicParams->FastIntraMode) ? 1 : 0;
3997     curbe->DW47.CollocatedFromL0Flag   = m_hevcSliceParams->collocated_from_l0_flag;
3998     curbe->DW47.IsLowDelay             = m_lowDelay;
3999     curbe->DW47.ScreenContentFlag      = m_hevcPicParams->bScreenContent;
4000     curbe->DW47.MultiSliceFlag         = (m_numSlices > 1);
4001     curbe->DW47.ArbitarySliceFlag      = m_arbitraryNumMbsInSlice;
4002     curbe->DW47.NumRegionMinus1        = m_walkingPatternParam.dwNumRegion - 1;
4003 
4004     if(m_pictureCodingType != I_TYPE)
4005     {
4006         curbe->DW48.CurrentTdL0_0          = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]);
4007         curbe->DW48.CurrentTdL0_1          = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]);
4008         curbe->DW49.CurrentTdL0_2          = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]);
4009         curbe->DW49.CurrentTdL0_3          = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]);
4010         if(m_pictureCodingType == B_TYPE) {
4011             curbe->DW50.CurrentTdL1_0          = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]);
4012             curbe->DW50.CurrentTdL1_1          = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]);
4013         }
4014     }
4015 
4016     curbe->DW52.NumofUnitInRegion          = m_walkingPatternParam.dwNumUnitsInRegion;
4017     curbe->DW52.MaxHeightInRegion          = m_walkingPatternParam.dwMaxHeightInRegion;
4018 
4019     uint32_t startBTI = 0;
4020     curbe->DW56.BTI_CU_Record                  = bindingTable->dwBindingTableEntries[startBTI++];
4021     curbe->DW57.BTI_PAK_Cmd                    = bindingTable->dwBindingTableEntries[startBTI++];
4022     curbe->DW58.BTI_Src_Y                      = bindingTable->dwBindingTableEntries[startBTI++];
4023     startBTI++; //skip UV index
4024     curbe->DW59.BTI_Intra_Dist                 = bindingTable->dwBindingTableEntries[startBTI++];
4025     curbe->DW60.BTI_Min_Dist                   = bindingTable->dwBindingTableEntries[startBTI++];
4026     curbe->DW61.BTI_HMEMVPredFwdBwdSurfIndex   = bindingTable->dwBindingTableEntries[startBTI++];
4027     curbe->DW62.BTI_HMEDistSurfIndex           = bindingTable->dwBindingTableEntries[startBTI++];
4028     curbe->DW63.BTI_Slice_Map                  = bindingTable->dwBindingTableEntries[startBTI++];
4029     curbe->DW64.BTI_VME_Saved_UNI_SIC          = bindingTable->dwBindingTableEntries[startBTI++];
4030     curbe->DW65.BTI_Simplest_Intra             = bindingTable->dwBindingTableEntries[startBTI++];
4031     curbe->DW66.BTI_Collocated_RefFrame        = bindingTable->dwBindingTableEntries[startBTI++];
4032     curbe->DW67.BTI_Reserved                   = bindingTable->dwBindingTableEntries[startBTI++];
4033     curbe->DW68.BTI_BRC_Input                  = bindingTable->dwBindingTableEntries[startBTI++];
4034     curbe->DW69.BTI_LCU_QP                     = bindingTable->dwBindingTableEntries[startBTI++];
4035     curbe->DW70.BTI_BRC_Data                   = bindingTable->dwBindingTableEntries[startBTI++];
4036     curbe->DW71.BTI_VMEInterPredictionSurfIndex= bindingTable->dwBindingTableEntries[startBTI++];
4037     if(m_pictureCodingType == P_TYPE)
4038     {
4039         //P MBEnc curbe 72~75 are different from B frame.
4040         startBTI += (CODECHAL_HEVC_P_MBENC_CONCURRENT_THD_MAP - CODECHAL_HEVC_P_MBENC_VME_FORWARD_0);
4041         curbe->DW72.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
4042         curbe->DW73.BTI_MB_Data_CurFrame   = bindingTable->dwBindingTableEntries[startBTI++];
4043         curbe->DW74.BTI_MVP_CurFrame       = bindingTable->dwBindingTableEntries[startBTI++];
4044         curbe->DW75.BTI_Haar_Dist16x16     = bindingTable->dwBindingTableEntries[startBTI++];
4045         curbe->DW76.BTI_Stats_Data         = bindingTable->dwBindingTableEntries[startBTI++];
4046         curbe->DW77.BTI_Frame_Stats_Data   = bindingTable->dwBindingTableEntries[startBTI++];
4047         curbe->DW78.BTI_MVPredictor_Surface= bindingTable->dwBindingTableEntries[startBTI++];
4048         curbe->DW79.BTI_CTB_Input_Surface  = bindingTable->dwBindingTableEntries[startBTI++];
4049         curbe->DW80.BTI_CTB_Distortion_Output_Surface = bindingTable->dwBindingTableEntries[startBTI++];
4050         curbe->DW81.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
4051     }
4052     else
4053     {
4054         startBTI += (CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_VME_FORWARD_0 + 1);
4055 
4056         curbe->DW72.BTI_VMEInterPredictionBSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
4057         startBTI += (CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_VME_MUL_BACKWARD_0 + 1);
4058 
4059         curbe->DW73.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
4060         curbe->DW74.BTI_MB_Data_CurFrame   = bindingTable->dwBindingTableEntries[startBTI++];
4061         curbe->DW75.BTI_MVP_CurFrame       = bindingTable->dwBindingTableEntries[startBTI++];
4062         curbe->DW76.BTI_Haar_Dist16x16     = bindingTable->dwBindingTableEntries[startBTI++];
4063         curbe->DW77.BTI_Stats_Data         = bindingTable->dwBindingTableEntries[startBTI++];
4064         curbe->DW78.BTI_Frame_Stats_Data   = bindingTable->dwBindingTableEntries[startBTI++];
4065         curbe->DW79.BTI_MVPredictor_Surface= bindingTable->dwBindingTableEntries[startBTI++];
4066         curbe->DW80.BTI_CTB_Input_Surface  = bindingTable->dwBindingTableEntries[startBTI++];
4067         curbe->DW81.BTI_CTB_Distortion_Output_Surface = bindingTable->dwBindingTableEntries[startBTI++];
4068         curbe->DW82.BTI_Debug              = bindingTable->dwBindingTableEntries[startBTI++];
4069     }
4070 
4071     // Intra refresh is enabled. Program related CURBE fields
4072     if (m_hevcPicParams->bEnableRollingIntraRefresh)
4073     {
4074         curbe->DW35.IntraRefreshEn     = true;
4075         curbe->DW35.FirstIntraRefresh  = m_firstIntraRefresh;
4076         curbe->DW35.HalfUpdateMixedLCU     = 0;
4077         curbe->DW35.EnableRollingIntra     = true;
4078 
4079         curbe->DW38.NumFrameInGOB            = m_frameNumInGob;
4080         curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh;
4081 
4082         curbe->DW51.IntraRefreshQPDelta        = m_hevcPicParams->QpDeltaForInsertedIntra;
4083         curbe->DW51.IntraRefreshMBNum          = m_hevcPicParams->IntraInsertionLocation;
4084         curbe->DW51.IntraRefreshUnitInMB       = m_hevcPicParams->IntraInsertionSize;
4085 
4086         curbe->DW53.IntraRefreshRefHeight = 40;
4087         curbe->DW53.IntraRefreshRefWidth  = 48;
4088 
4089         m_firstIntraRefresh = false;
4090         m_frameNumWithoutIntraRefresh = 0;
4091     }
4092     else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames
4093     {
4094         m_frameNumWithoutIntraRefresh++;
4095     }
4096 
4097     CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
4098 
4099     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
4100     if (m_pictureCodingType == P_TYPE)
4101     {
4102         //P frame curbe only use the DW0~DW75
4103         CODECHAL_ENCODE_CHK_STATUS_RETURN(
4104             AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd) - sizeof(uint32_t)));
4105     }
4106     else
4107     {
4108         CODECHAL_ENCODE_CHK_STATUS_RETURN(
4109             AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
4110     }
4111 
4112     MOS_COMMAND_BUFFER cmdBuffer;
4113     if(m_numMbBKernelSplit == 0)
4114     {
4115         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
4116             kernelState,
4117             encFunctionType,
4118             &m_walkingPatternParam.ScoreBoard));
4119     }
4120     else
4121     {
4122         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4123 
4124         MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
4125         MOS_ZeroMemory(&idParams, sizeof(idParams));
4126         idParams.pKernelState = kernelState;
4127         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
4128             m_stateHeapInterface,
4129             1,
4130             &idParams));
4131 
4132         // Add binding table
4133         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
4134             m_stateHeapInterface,
4135             kernelState));
4136     }
4137 
4138     //Add surface states
4139     startBTI = 0;
4140 
4141     //0: CU record
4142     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4143         kernelState,
4144         &cmdBuffer,
4145         SURFACE_CU_RECORD,
4146         &bindingTable->dwBindingTableEntries[startBTI++]));
4147 
4148     //1: PAK command
4149     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4150         kernelState,
4151         &cmdBuffer,
4152         SURFACE_HCP_PAK,
4153         &bindingTable->dwBindingTableEntries[startBTI++]));
4154 
4155     //2 and 3 Source Y and UV
4156     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4157         kernelState,
4158         &cmdBuffer,
4159         SURFACE_RAW_Y_UV,
4160         &bindingTable->dwBindingTableEntries[startBTI++]));
4161     startBTI++;
4162 
4163     //4: Intra dist
4164     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4165         kernelState,
4166         &cmdBuffer,
4167         SURFACE_INTRA_DIST,
4168         &bindingTable->dwBindingTableEntries[startBTI++]));
4169 
4170     //5: min distortion
4171     m_surfaceParams[SURFACE_MIN_DIST].bIsWritable   =
4172     m_surfaceParams[SURFACE_MIN_DIST].bRenderTarget = true;
4173     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4174         kernelState,
4175         &cmdBuffer,
4176         SURFACE_MIN_DIST,
4177         &bindingTable->dwBindingTableEntries[startBTI++]));
4178 
4179     // 6 and 7, skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME
4180     startBTI += 2;
4181 
4182     //8: slice map
4183     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4184         kernelState,
4185         &cmdBuffer,
4186         SURFACE_SLICE_MAP,
4187         &bindingTable->dwBindingTableEntries[startBTI++]));
4188 
4189     //9: VME UNI and SIC data
4190     m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bIsWritable   =
4191     m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bRenderTarget = true;
4192     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4193         kernelState,
4194         &cmdBuffer,
4195         SURFACE_VME_UNI_SIC_DATA,
4196         &bindingTable->dwBindingTableEntries[startBTI++]));
4197 
4198     //10: Simplest Intra
4199     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4200         kernelState,
4201         &cmdBuffer,
4202         SURFACE_SIMPLIFIED_INTRA,
4203         &bindingTable->dwBindingTableEntries[startBTI++]));
4204 
4205     // 11: Reference frame col-located data surface
4206     if(mbCodeIdxForTempMVP == 0xFF)
4207     {
4208         startBTI++;
4209     }
4210     else
4211     {
4212         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4213             kernelState,
4214             &cmdBuffer,
4215             SURFACE_COL_MB_MV,
4216             &bindingTable->dwBindingTableEntries[startBTI++],
4217             m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP)));
4218     }
4219 
4220     // 12: Current frame col-located data surface -- reserved now
4221     startBTI++;
4222 
4223     // 13: BRC Input
4224     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4225         kernelState,
4226         &cmdBuffer,
4227         SURFACE_BRC_INPUT,
4228         &bindingTable->dwBindingTableEntries[startBTI++]));
4229 
4230     // 14: LCU Qp
4231     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4232         kernelState,
4233         &cmdBuffer,
4234         SURFACE_LCU_QP,
4235         &bindingTable->dwBindingTableEntries[startBTI++]));
4236 
4237     // 15: LCU BRC constant
4238     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4239         kernelState,
4240         &cmdBuffer,
4241         SURFACE_BRC_DATA,
4242         &bindingTable->dwBindingTableEntries[startBTI++]));
4243 
4244     // 16 - 32 Current plus forward and backward surface 0-7
4245     //16: Source Y for VME
4246     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4247         kernelState,
4248         &cmdBuffer,
4249         SURFACE_RAW_VME,
4250         &bindingTable->dwBindingTableEntries[startBTI++]));
4251 
4252     for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++)
4253     {
4254         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx];
4255         if(!CodecHal_PictureIsInvalid(refPic) &&
4256             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4257         {
4258             uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4259 
4260             // Picture Y VME
4261             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4262                 kernelState,
4263                 &cmdBuffer,
4264                 SURFACE_REF_FRAME_VME,
4265                 &bindingTable->dwBindingTableEntries[startBTI++],
4266                 &m_refList[idx]->sRefBuffer,
4267                 curbe->DW6.FrameWidth,
4268                 curbe->DW6.FrameHeight));
4269 
4270         }
4271         else
4272         {
4273             // Skip the binding table index because it is not used
4274             startBTI++;
4275         }
4276 
4277         refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx];
4278         if(!CodecHal_PictureIsInvalid(refPic) &&
4279             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4280         {
4281             uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4282 
4283             // Picture Y VME
4284             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4285                 kernelState,
4286                 &cmdBuffer,
4287                 SURFACE_REF_FRAME_VME,
4288                 &bindingTable->dwBindingTableEntries[startBTI++],
4289                 &m_refList[idx]->sRefBuffer,
4290                 curbe->DW6.FrameWidth,
4291                 curbe->DW6.FrameHeight));
4292 
4293         }
4294         else
4295         {
4296             // Skip the binding table index because it is not used
4297             startBTI++;
4298         }
4299     }
4300     CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
4301 
4302     if (m_pictureCodingType != P_TYPE)
4303     {
4304         //33-41 VME multi-ref BTI -- Current plus [backward, nil][0..3]
4305         //33: Current Y VME surface
4306         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4307             kernelState,
4308             &cmdBuffer,
4309             SURFACE_RAW_VME,
4310             &bindingTable->dwBindingTableEntries[startBTI++]));
4311 
4312         for(uint32_t surfaceIdx = 0; surfaceIdx < 4; surfaceIdx++)
4313         {
4314             CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[1][surfaceIdx];
4315             if(!CodecHal_PictureIsInvalid(refPic) &&
4316                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4317             {
4318                 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4319 
4320                 // Picture Y VME
4321                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4322                     kernelState,
4323                     &cmdBuffer,
4324                     SURFACE_REF_FRAME_VME,
4325                     &bindingTable->dwBindingTableEntries[startBTI++],
4326                     &m_refList[idx]->sRefBuffer,
4327                     curbe->DW6.FrameWidth,
4328                     curbe->DW6.FrameHeight));
4329             }
4330             else
4331             {
4332                 // Skip the binding table index because it is not used
4333                 startBTI++;
4334             }
4335 
4336             // Skip the binding table index because it is not used
4337             startBTI++;
4338         }
4339         CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
4340     }
4341 
4342     // B 42 or P 33: Concurrent thread
4343     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4344         kernelState,
4345         &cmdBuffer,
4346         (SURFACE_ID)(SURFACE_CONCURRENT_THREAD + m_concurrentThreadIndex),
4347         &bindingTable->dwBindingTableEntries[startBTI++]));
4348 
4349     if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD)
4350     {
4351         m_concurrentThreadIndex = 0;
4352     }
4353 
4354     // B 43 or P 34: MV index buffer
4355     m_surfaceParams[SURFACE_MB_MV_INDEX].bIsWritable   =
4356     m_surfaceParams[SURFACE_MB_MV_INDEX].bRenderTarget = true;
4357     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4358         kernelState,
4359         &cmdBuffer,
4360         SURFACE_MB_MV_INDEX,
4361         &bindingTable->dwBindingTableEntries[startBTI++]));
4362 
4363     // B 44: or P 35: MVP index buffer
4364     m_surfaceParams[SURFACE_MVP_INDEX].bIsWritable   =
4365     m_surfaceParams[SURFACE_MVP_INDEX].bRenderTarget = true;
4366     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4367         kernelState,
4368         &cmdBuffer,
4369         SURFACE_MVP_INDEX,
4370         &bindingTable->dwBindingTableEntries[startBTI++]));
4371 
4372     // skip three BTI for haar distortion surface, statstics data dump surface
4373     // and frame level statstics data surface because they are not used
4374     startBTI += 3;
4375 
4376     // 48: FEI external MVPredictor surface
4377     if (m_feiPicParams->MVPredictorInput)
4378     {
4379         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4380                     kernelState,
4381                     &cmdBuffer,
4382                     SURFACE_FEI_EXTERNAL_MVP,
4383                     &bindingTable->dwBindingTableEntries[startBTI++]));
4384     }
4385     else
4386     {
4387         startBTI++;
4388     }
4389 
4390     if (m_feiPicParams->bPerCTBInput)
4391     {
4392         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4393                     kernelState,
4394                     &cmdBuffer,
4395                     SURFACE_FEI_PER_CTB_CTRL,
4396                     &bindingTable->dwBindingTableEntries[startBTI++]));
4397     }
4398     else
4399     {
4400         startBTI ++;
4401     }
4402     startBTI += 1;
4403 
4404     if (!m_hwWalker)
4405     {
4406         eStatus = MOS_STATUS_UNKNOWN;
4407         CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
4408         return eStatus;
4409     }
4410 
4411     if(m_numMbBKernelSplit == 0)
4412     {
4413         // always use customized media walker
4414         MHW_WALKER_PARAMS walkerParams;
4415         MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
4416         walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
4417 
4418         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
4419             &cmdBuffer,
4420             &walkerParams));
4421     }
4422     else
4423     {
4424         int32_t localOuterLoopExecCount = m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount;
4425         int32_t localInitialStartPointY = m_walkingPatternParam.MediaWalker.LocalStart.y;
4426         int32_t phase = MOS_MIN(m_numMbBKernelSplit, MAX_NUM_KERNEL_SPLIT);
4427         int32_t totalExecCount = localOuterLoopExecCount + 1;
4428         int32_t deltaExecCount = (((totalExecCount+phase - 1) / phase) + 1) & 0xfffe;
4429         int32_t remainExecCount = totalExecCount;
4430 
4431         int32_t deltaY = 0;
4432         if (m_enable26WalkingPattern)
4433         {
4434             deltaY = deltaExecCount / 2;
4435         }
4436         else
4437         {
4438             deltaY = deltaExecCount * 2;
4439         }
4440 
4441         int32_t startPointY[MAX_NUM_KERNEL_SPLIT] = { 0 };
4442         int32_t currentExecCount[MAX_NUM_KERNEL_SPLIT] = { -1 };
4443         currentExecCount[0] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) :  (remainExecCount-1);
4444         startPointY[0] = localInitialStartPointY;
4445 
4446         for (auto i = 1; i < phase; i++)
4447         {
4448             remainExecCount -= deltaExecCount;
4449             if (remainExecCount < 1)
4450             {
4451                 remainExecCount = 1;
4452             }
4453 
4454             currentExecCount[i] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) :  (remainExecCount-1);
4455             startPointY[i] = startPointY[i-1] + deltaY;
4456         }
4457 
4458         for(auto i = 0; i < phase; i++)
4459         {
4460             if(currentExecCount[i] < 0)
4461             {
4462                 break;
4463             }
4464 
4465             // Program render engine pipe commands
4466             SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
4467             sendKernelCmdsParams.EncFunctionType        = encFunctionType;
4468             sendKernelCmdsParams.pKernelState           = kernelState;
4469             sendKernelCmdsParams.bEnableCustomScoreBoard= true;
4470             sendKernelCmdsParams.pCustomScoreBoard      = &m_walkingPatternParam.ScoreBoard;
4471             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
4472 
4473             // Change walker execution count and local start Y for different phases
4474             m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = currentExecCount[i];
4475             m_walkingPatternParam.MediaWalker.LocalStart.y = startPointY[i];
4476 
4477             // always use customized media walker
4478             MHW_WALKER_PARAMS walkerParams;
4479             MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
4480             walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
4481 
4482             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
4483                 &cmdBuffer,
4484                 &walkerParams));
4485         }
4486     }
4487 
4488     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
4489         encFunctionType,
4490         kernelState,
4491         &cmdBuffer));
4492 
4493     CODECHAL_DEBUG_TOOL(
4494         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4495             &m_mvIndex.sResource,
4496             CodechalDbgAttr::attrOutput,
4497             "MbData",
4498             m_mvpIndex.dwSize,
4499             0,
4500             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
4501 
4502          CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4503             &m_mvpIndex.sResource,
4504             CodechalDbgAttr::attrOutput,
4505             "MvData",
4506             m_mvpIndex.dwSize,
4507             0,
4508             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
4509     )
4510 
4511     m_lastTaskInPhase = true;
4512     eStatus = Encode8x8BPakKernel(curbe);
4513 
4514     return eStatus;
4515 }
4516 
4517 #else
4518 
Encode2xScalingKernel()4519 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode2xScalingKernel()
4520 {
4521     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4522 
4523     PerfTagSetting perfTag;
4524     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
4525 
4526     //Setup CURBE
4527     MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9  cmd, *curbe = &cmd;
4528     MOS_ZeroMemory(curbe, sizeof(*curbe));
4529     curbe->DW0.PicWidth  = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4530     curbe->DW0.PicHeight    = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4531 
4532     DownScalingKernelParams scalingParams;
4533     MOS_ZeroMemory(&scalingParams, sizeof(scalingParams));
4534 
4535     scalingParams.m_cmSurfDS_TopIn = &m_rawSurfaceToEnc->OsResource;
4536     scalingParams.m_cmSurfDS_TopOut = &m_scaled2xSurface.OsResource;
4537     scalingParams.m_cmSurfTopVProc = nullptr;
4538 
4539     if (m_cmKernelMap.count("2xScaling") == 0)
4540     {
4541         m_cmKernelMap["2xScaling"] = new CMRTKernelDownScalingUMD();
4542         m_cmKernelMap["2xScaling"]->Init((void *)m_osInterface->pOsContext);
4543     }
4544 
4545     m_cmKernelMap["2xScaling"]->SetupCurbe(curbe);
4546 
4547     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
4548     CODECHAL_DEBUG_TOOL(
4549         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4550             encFunctionType,
4551             (uint8_t *)curbe, sizeof(*curbe)));
4552     )
4553 
4554     m_cmKernelMap["2xScaling"]->AllocateSurfaces(&scalingParams);
4555 
4556     //No need to wait for task finished
4557     m_cmEvent = CM_NO_EVENT;
4558     m_cmKernelMap["2xScaling"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4559 
4560     return eStatus;
4561 }
4562 
Encode32x32PuModeDecisionKernel()4563 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32x32PuModeDecisionKernel()
4564 {
4565     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4566 
4567     PerfTagSetting perfTag;
4568     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD);
4569 
4570     //Setup CURBE
4571     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4572 
4573     CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR);
4574     int32_t sliceQp = CalSliceQp();
4575 
4576     double lambdaScalingFactor = 1.0;
4577     double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
4578     double squaredQpLambda = qpLambda * qpLambda;
4579     m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
4580 
4581     CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd;
4582     MOS_ZeroMemory(curbe, sizeof(*curbe));
4583     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4584     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4585 
4586     curbe->DW1.EnableDebugDump = false;
4587     curbe->DW1.LCUType         = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
4588     curbe->DW1.PuType          = 0; // 32x32 PU
4589     curbe->DW1.BRCEnable                 = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4590     curbe->DW1.LCUBRCEnable              = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4591     curbe->DW1.SliceType                 = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4592     curbe->DW1.FASTSurveillanceFlag      = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4593     curbe->DW1.ROIEnable                 = (m_hevcPicParams->NumROI > 0);
4594     curbe->DW1.SliceQp         = sliceQp;
4595     curbe->DW1.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
4596     curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
4597 
4598     curbe->DW2.Lambda          = m_fixedPointLambda;
4599 
4600     curbe->DW3.ModeCost32x32   = 0;
4601 
4602     curbe->DW4.EarlyExit       = (uint32_t)-1;
4603     if (curbe->DW1.EnableStatsDataDump)
4604     {
4605         double lambdaMd;
4606         float hadBias = 2.0f;
4607 
4608         lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
4609         lambdaMd = lambdaMd * hadBias;
4610         curbe->DW5.NewLambdaForHaarTransform = (uint32_t)(lambdaMd*(1<<10));
4611     }
4612 
4613     IFrameKernelParams I32x32Params;
4614     MOS_ZeroMemory(&I32x32Params, sizeof(I32x32Params));
4615 
4616     I32x32Params.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource;
4617     I32x32Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4618     I32x32Params.m_cmSurfCurrY2 = &m_scaled2xSurface.OsResource;
4619     I32x32Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4620     I32x32Params.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
4621     I32x32Params.m_cmLCUQPSurf = &m_lcuQP.OsResource;
4622     I32x32Params.m_cmBRCConstSurf          = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
4623 
4624     if (m_cmKernelMap.count("I_32X32") == 0)
4625     {
4626         m_cmKernelMap["I_32X32"] = new CMRTKernelI32x32UMD();
4627         m_cmKernelMap["I_32X32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
4628     }
4629 
4630     m_cmKernelMap["I_32X32"]->SetupCurbe(curbe);
4631 
4632     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION;
4633     CODECHAL_DEBUG_TOOL(
4634         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4635             encFunctionType,
4636             (uint8_t *)curbe, sizeof(*curbe)));
4637     )
4638 
4639     m_cmKernelMap["I_32X32"]->AllocateSurfaces(&I32x32Params);
4640 
4641     //No need to wait for task finished
4642     m_cmEvent = CM_NO_EVENT;
4643     m_cmKernelMap["I_32X32"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4644 
4645     return eStatus;
4646 }
4647 
Encode16x16SadPuComputationKernel()4648 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16SadPuComputationKernel()
4649 {
4650     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4651 
4652     CODECHAL_ENCODE_FUNCTION_ENTER;
4653 
4654     PerfTagSetting perfTag;
4655     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD);
4656 
4657     // Setup CURBE
4658     CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd;
4659 
4660     MOS_ZeroMemory(curbe, sizeof(*curbe));
4661     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4662     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4663 
4664     curbe->DW1.Log2MaxCUSize        = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4665     curbe->DW1.Log2MinCUSize        = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
4666     curbe->DW1.Log2MinTUSize        = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
4667     curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4668 
4669     curbe->DW2.SliceType       = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4670     curbe->DW2.SimFlagForInter = false;
4671     if (m_hevcPicParams->CodingType != I_TYPE)
4672     {
4673         curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance;
4674     }
4675 
4676     IFrameKernelParams I16x16SadParams;
4677     MOS_ZeroMemory(&I16x16SadParams, sizeof(I16x16SadParams));
4678 
4679     I16x16SadParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4680     I16x16SadParams.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource;
4681     I16x16SadParams.m_cmSurfSAD16x16 = &m_sad16x16Pu.sResource;
4682     I16x16SadParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4683     I16x16SadParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
4684 
4685     //in case I_32x32 isn't initialized when using FastIntraMode for per-frame control (I: enable; P/B: disable)
4686     if (m_cmKernelMap.count("I_32X32") == 0)
4687     {
4688         m_cmKernelMap["I_32X32"] = new CMRTKernelI32x32UMD();
4689         m_cmKernelMap["I_32X32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
4690     }
4691 
4692     if (m_cmKernelMap.count("I_16X16_SAD") == 0)
4693     {
4694         m_cmKernelMap["I_16X16_SAD"] = new CMRTKernelI16x16SadUMD();
4695         m_cmKernelMap["I_16X16_SAD"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
4696     }
4697 
4698     m_cmKernelMap["I_16X16_SAD"]->SetupCurbe(curbe);
4699 
4700     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD;
4701     CODECHAL_DEBUG_TOOL(
4702         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4703             encFunctionType,
4704             (uint8_t *)curbe, sizeof(*curbe)));
4705     )
4706 
4707     m_cmKernelMap["I_16X16_SAD"]->AllocateSurfaces(&I16x16SadParams);
4708 
4709     //No need to wait for task finished
4710     m_cmEvent = CM_NO_EVENT;
4711     m_cmKernelMap["I_16X16_SAD"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4712 
4713     return eStatus;
4714 }
4715 
Encode16x16PuModeDecisionKernel()4716 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16PuModeDecisionKernel()
4717 {
4718     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4719 
4720     CODECHAL_ENCODE_FUNCTION_ENTER;
4721 
4722     PerfTagSetting perfTag;
4723     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD);
4724 
4725     // Setup CURBE
4726     int32_t sliceQp = CalSliceQp();
4727     uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4728 
4729     double lambdaScaleFactor = 0.46 + sliceQp - 22;
4730     if (lambdaScaleFactor < 0)
4731     {
4732         lambdaScaleFactor = 0.46;
4733     }
4734 
4735     if (lambdaScaleFactor > 15)
4736     {
4737         lambdaScaleFactor = 15;
4738     }
4739 
4740     double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6);
4741     m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10));
4742 
4743     double lambdaScalingFactor = 1.0;
4744     double qpLambda = m_qpLambdaMd[sliceType][sliceQp];
4745     double squaredQpLambda = qpLambda * qpLambda;
4746     m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
4747 
4748     LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR);
4749 
4750     CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd;
4751     MOS_ZeroMemory(curbe, sizeof(*curbe));
4752 
4753     uint32_t log2MaxCUSize         = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4754     curbe->DW0.FrameWidth          = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4755     curbe->DW0.FrameHeight         = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4756 
4757     curbe->DW1.Log2MaxCUSize       = log2MaxCUSize;
4758     curbe->DW1.Log2MinCUSize       = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
4759     curbe->DW1.Log2MinTUSize       = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
4760     curbe->DW1.SliceQp             = sliceQp;
4761 
4762     curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma;
4763 
4764     curbe->DW3.LambdaScalingFactor    = 1;
4765     curbe->DW3.SliceType              = sliceType;
4766     curbe->DW3.EnableIntraEarlyExit   = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4767     curbe->DW3.BRCEnable              = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4768     curbe->DW3.LCUBRCEnable           = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4769     curbe->DW3.ROIEnable              = (m_hevcPicParams->NumROI > 0);
4770     curbe->DW3.FASTSurveillanceFlag   = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4771     curbe->DW3.EnableRollingIntra     = m_hevcPicParams->bEnableRollingIntraRefresh;
4772     //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel
4773     curbe->DW3.IntraRefreshEn            = m_hevcPicParams->bEnableRollingIntraRefresh;
4774     curbe->DW3.HalfUpdateMixedLCU     = 0;
4775     curbe->DW3.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
4776 
4777     curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0;
4778     curbe->DW4.IntraComputeType                = 1;
4779     curbe->DW4.AVCIntra8x8Mask                 = 0;
4780     curbe->DW4.IntraSadAdjust                  = 2;
4781 
4782     double lambdaMd       = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3));
4783     squredLambda          = lambdaMd * lambdaMd;
4784     uint32_t newLambda      = (uint32_t)(squredLambda*(1<<10));
4785     curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda;
4786 
4787     curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent;
4788 
4789     curbe->DW7.ModeCostIntraNonPred = m_modeCost[0];
4790     curbe->DW7.ModeCostIntra16x16   = m_modeCost[1];
4791     curbe->DW7.ModeCostIntra8x8     = m_modeCost[2];
4792     curbe->DW7.ModeCostIntra4x4     = m_modeCost[3];
4793 
4794     curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma;
4795 
4796     if (m_hevcPicParams->bEnableRollingIntraRefresh)
4797     {
4798         curbe->DW9.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
4799         curbe->DW9.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
4800         curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
4801     }
4802 
4803     curbe->DW10.SimplifiedFlagForInter = 0;
4804     if (m_encodeParams.bReportStatisticsEnabled)
4805     {
4806         curbe->DW10.HaarTransformMode  = true;
4807     }
4808     else
4809     {
4810         curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
4811     }
4812 
4813     IFrameKernelParams I16x16ModeParams;
4814     MOS_ZeroMemory(&I16x16ModeParams, sizeof(I16x16ModeParams));
4815 
4816     I16x16ModeParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4817     I16x16ModeParams.m_cmSurfSAD16x16 = &m_sad16x16Pu.sResource;
4818     I16x16ModeParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
4819     I16x16ModeParams.m_bufSize = m_mbCodeSize - m_mvOffset;
4820     I16x16ModeParams.m_bufOffset = m_mvOffset;
4821     I16x16ModeParams.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource;
4822     I16x16ModeParams.m_cmSurfVMEMode = &m_vme8x8Mode.sResource;
4823     I16x16ModeParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4824     I16x16ModeParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
4825     I16x16ModeParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
4826     I16x16ModeParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
4827     I16x16ModeParams.m_cmBRCConstSurf          = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
4828 
4829     if (m_cmKernelMap.count("I_16X16_MODE") == 0)
4830     {
4831         m_cmKernelMap["I_16X16_MODE"] = new CMRTKernelI16x16ModeUMD();
4832         m_cmKernelMap["I_16X16_MODE"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
4833     }
4834 
4835     m_cmKernelMap["I_16X16_MODE"]->SetupCurbe(curbe);
4836 
4837     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION;
4838     CODECHAL_DEBUG_TOOL(
4839         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4840             encFunctionType,
4841             (uint8_t *)curbe, sizeof(*curbe)));
4842     )
4843 
4844     m_cmKernelMap["I_16X16_MODE"]->AllocateSurfaces(&I16x16ModeParams);
4845 
4846     //No need to wait for task finished
4847     m_cmEvent = CM_NO_EVENT;
4848     m_cmKernelMap["I_16X16_MODE"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4849 
4850     return eStatus;
4851 }
4852 
Encode8x8PUKernel()4853 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUKernel()
4854 {
4855     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4856 
4857     CODECHAL_ENCODE_FUNCTION_ENTER;
4858 
4859     PerfTagSetting perfTag;
4860     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
4861 
4862     // Setup CURBE
4863     uint32_t                            log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4864     CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd;
4865     MOS_ZeroMemory(curbe, sizeof(*curbe));
4866 
4867     curbe->DW0.FrameWidth          = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4868     curbe->DW0.FrameHeight         = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4869 
4870     curbe->DW1.SliceType           = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4871     curbe->DW1.PuType          = 2; // 8x8
4872     curbe->DW1.DcFilterFlag    = true;
4873     curbe->DW1.AngleRefineFlag = true;
4874     curbe->DW1.LCUType         = (log2MaxCUSize==6)? 0 : 1;
4875     curbe->DW1.ScreenContentFlag         = m_hevcPicParams->bScreenContent;
4876     curbe->DW1.EnableIntraEarlyExit      = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4877     curbe->DW1.EnableDebugDump = false;
4878     curbe->DW1.BRCEnable                 = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4879     curbe->DW1.LCUBRCEnable              = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4880     curbe->DW1.ROIEnable                 = (m_hevcPicParams->NumROI > 0);
4881     curbe->DW1.FASTSurveillanceFlag      = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4882     curbe->DW1.EnableQualityImprovement  = m_encodeParams.bQualityImprovementEnable;
4883     curbe->DW1.QPValue = CalSliceQp();
4884     if (m_hevcPicParams->bEnableRollingIntraRefresh)
4885     {
4886         curbe->DW1.EnableRollingIntra   = true;
4887         curbe->DW1.IntraRefreshEn       = true;
4888         curbe->DW1.HalfUpdateMixedLCU   = 0;
4889 
4890         curbe->DW5.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
4891         curbe->DW5.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
4892         curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
4893 
4894         int32_t qp = CalSliceQp();
4895         curbe->DW1.QPValue              = (uint32_t)qp;
4896     }
4897 
4898     curbe->DW2.LumaLambda      = m_fixedPointLambdaForLuma;
4899 
4900     curbe->DW3.ChromaLambda    = m_fixedPointLambdaForChroma;
4901 
4902     if (m_encodeParams.bReportStatisticsEnabled)
4903     {
4904         curbe->DW4.HaarTransformFlag   = true;
4905     }
4906     else
4907     {
4908         curbe->DW4.HaarTransformFlag = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
4909     }
4910     curbe->DW4.SimplifiedFlagForInter  = false;
4911 
4912     IFrameKernelParams I8x8Params;
4913     MOS_ZeroMemory(&I8x8Params, sizeof(I8x8Params));
4914 
4915     I8x8Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4916     I8x8Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4917     I8x8Params.m_cmSurfVMEMode = &m_vme8x8Mode.sResource;
4918     I8x8Params.m_cmSurfMode = &m_intraMode.sResource;
4919     I8x8Params.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
4920     I8x8Params.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
4921     I8x8Params.m_cmLCUQPSurf = &m_lcuQP.OsResource;
4922     I8x8Params.m_cmBRCConstSurf   = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
4923 
4924     if (m_cmKernelMap.count("I_8X8") == 0)
4925     {
4926         m_cmKernelMap["I_8X8"] = new CMRTKernelI8x8UMD();
4927         m_cmKernelMap["I_8X8"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
4928     }
4929 
4930     m_cmKernelMap["I_8X8"]->SetupCurbe(curbe);
4931 
4932     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU;
4933     CODECHAL_DEBUG_TOOL(
4934         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4935             encFunctionType,
4936             (uint8_t *)curbe, sizeof(*curbe)));
4937     )
4938 
4939     m_cmKernelMap["I_8X8"]->AllocateSurfaces(&I8x8Params);
4940 
4941     //No need to wait for task finished
4942     m_cmEvent = CM_NO_EVENT;
4943     m_cmKernelMap["I_8X8"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4944 
4945     return eStatus;
4946 }
4947 
Encode8x8PUFMODEKernel()4948 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUFMODEKernel()
4949 {
4950     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4951 
4952     CODECHAL_ENCODE_FUNCTION_ENTER;
4953 
4954     PerfTagSetting perfTag;
4955     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE);
4956 
4957     // Setup CURBE
4958     int32_t qp = CalSliceQp();
4959     uint32_t sliceQp = (uint32_t)qp;
4960     uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4961 
4962     CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd;
4963     MOS_ZeroMemory(curbe, sizeof(*curbe));
4964     curbe->DW0.FrameWidth                  = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4965     curbe->DW0.FrameHeight                 = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4966 
4967     curbe->DW1.SliceType                   = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4968     curbe->DW1.PuType                      = 2;
4969     curbe->DW1.PakReordingFlag             = (m_hevcPicParams->CodingType == I_TYPE) ? true : false;
4970     curbe->DW1.LCUType                     = (log2MaxCUSize == 6)? 0 : 1;
4971     curbe->DW1.ScreenContentFlag           = m_hevcPicParams->bScreenContent;
4972     curbe->DW1.EnableIntraEarlyExit        = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4973     curbe->DW1.EnableDebugDump             = false;
4974     curbe->DW1.BRCEnable                   = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4975     curbe->DW1.LCUBRCEnable                = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4976     curbe->DW1.ROIEnable                   = (m_hevcPicParams->NumROI > 0);
4977     curbe->DW1.FASTSurveillanceFlag        = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4978     curbe->DW1.EnableRollingIntra          = m_hevcPicParams->bEnableRollingIntraRefresh;
4979     curbe->DW1.IntraRefreshEn              = m_hevcPicParams->bEnableRollingIntraRefresh;
4980     curbe->DW1.HalfUpdateMixedLCU          = 0;
4981     curbe->DW1.EnableQualityImprovement    = m_encodeParams.bQualityImprovementEnable;
4982     curbe->DW2.LambdaForLuma               = m_fixedPointLambdaForLuma;
4983     if (m_hevcPicParams->CodingType != I_TYPE ||
4984         m_encodeParams.bReportStatisticsEnabled)
4985     {
4986         float hadBias = 2.0f;
4987 
4988         double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
4989         lambdaMd = lambdaMd * hadBias;
4990         curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10));
4991     }
4992     curbe->DW4.ModeCostFor8x8PU_TU8      = 0;
4993     curbe->DW5.ModeCostFor8x8PU_TU4      = 0;
4994     curbe->DW6.SATD16x16PuThreshold      = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0);
4995     curbe->DW6.BiasFactorToward8x8       = (m_hevcPicParams->bScreenContent) ? 1024 : 1126 + 102;
4996     curbe->DW7.Qp                        = sliceQp;
4997     curbe->DW7.QpForInter                = 0;
4998     curbe->DW8.SimplifiedFlagForInter    = false;
4999     curbe->DW8.EnableStatsDataDump       = m_encodeParams.bReportStatisticsEnabled;
5000     // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
5001     curbe->DW8.KBLControlFlag            = UsePlatformControlFlag();
5002     curbe->DW9.IntraRefreshMBNum         = m_hevcPicParams->IntraInsertionLocation;
5003     curbe->DW9.IntraRefreshQPDelta       = m_hevcPicParams->QpDeltaForInsertedIntra;
5004     curbe->DW9.IntraRefreshUnitInMB      = m_hevcPicParams->IntraInsertionSize;
5005 
5006     IFrameKernelParams I8x8ModeParams;
5007     MOS_ZeroMemory(&I8x8ModeParams, sizeof(I8x8ModeParams));
5008 
5009     I8x8ModeParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
5010     I8x8ModeParams.m_bufSize = m_mbCodeSize - m_mvOffset;
5011     I8x8ModeParams.m_bufOffset = m_mvOffset;
5012     I8x8ModeParams.m_cmSurfVMEMode = &m_vme8x8Mode.sResource;
5013     I8x8ModeParams.m_cmSurfMode = &m_intraMode.sResource;
5014     I8x8ModeParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5015     I8x8ModeParams.m_cmSurfIntraDist = &m_intraDist.sResource;
5016     I8x8ModeParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
5017     I8x8ModeParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
5018     I8x8ModeParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5019     I8x8ModeParams.m_cmBRCConstSurf   = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
5020 
5021     if (m_cmKernelMap.count("I_8X8_MODE") == 0)
5022     {
5023         m_cmKernelMap["I_8X8_MODE"] = new CMRTKernelI8x8ModeUMD();
5024         m_cmKernelMap["I_8X8_MODE"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
5025     }
5026 
5027     m_cmKernelMap["I_8X8_MODE"]->SetupCurbe(curbe);
5028 
5029     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE;
5030     CODECHAL_DEBUG_TOOL(
5031         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5032             encFunctionType,
5033             (uint8_t *)curbe, sizeof(*curbe)));
5034     )
5035 
5036     m_cmKernelMap["I_8X8_MODE"]->AllocateSurfaces(&I8x8ModeParams);
5037 
5038     //No need to wait for task finished
5039     m_cmEvent = CM_NO_EVENT;
5040     m_cmKernelMap["I_8X8_MODE"]->CreateAndDispatchKernel(m_cmEvent, false, ((!m_singleTaskPhaseSupported)|| m_lastTaskInPhase));
5041 
5042     return eStatus;
5043 }
5044 
Encode32X32BIntraCheckKernel()5045 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32X32BIntraCheckKernel()
5046 {
5047     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5048 
5049     CODECHAL_ENCODE_FUNCTION_ENTER;
5050 
5051     PerfTagSetting perfTag;
5052     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC);
5053 
5054     // Setup CURBE
5055     if (m_pictureCodingType == P_TYPE)
5056     {
5057         CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR);
5058     }
5059     else
5060     {
5061         CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR);
5062     }
5063     int32_t sliceQp = CalSliceQp();
5064 
5065     double lambdaScalingFactor = 1.0;
5066     double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
5067     double squaredQpLambda = qpLambda * qpLambda;
5068     m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
5069 
5070     CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd;
5071     MOS_ZeroMemory(curbe, sizeof(*curbe));
5072     curbe->DW0.FrameWidth      = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
5073     curbe->DW0.FrameHeight     = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
5074 
5075     curbe->DW1.EnableDebugDump = false;
5076     curbe->DW1.EnableIntraEarlyExit = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1;
5077     curbe->DW1.Flags           = 0;
5078     curbe->DW1.Log2MinTUSize        = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
5079     curbe->DW1.SliceType            = m_hevcSliceParams->slice_type;
5080     curbe->DW1.HMEEnable       = 0;
5081     curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
5082 
5083     curbe->DW2.QpMultiplier    = 100;
5084     curbe->DW2.QpValue         = 0;     // MBZ
5085 
5086     PBFrameKernelParams PB32x32Params;
5087     MOS_ZeroMemory(&PB32x32Params, sizeof(PB32x32Params));
5088 
5089     PB32x32Params.m_cmSurfPer32x32ICOut = &m_32x32PuOutputData.sResource;
5090     PB32x32Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
5091     PB32x32Params.m_cmSurfCurrY2 = &m_scaled2xSurface.OsResource;
5092     PB32x32Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5093     PB32x32Params.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
5094     PB32x32Params.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5095 
5096     if (m_cmKernelMap.count("PB_32x32") == 0)
5097     {
5098         m_cmKernelMap["PB_32x32"] = new CMRTKernelPB32x32UMD();
5099         m_cmKernelMap["PB_32x32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
5100     }
5101 
5102     m_cmKernelMap["PB_32x32"]->SetupCurbe(curbe);
5103 
5104     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK;
5105     CODECHAL_DEBUG_TOOL(
5106         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5107             encFunctionType,
5108             (uint8_t *)curbe, sizeof(*curbe)));
5109     )
5110 
5111     m_cmKernelMap["PB_32x32"]->AllocateSurfaces(&PB32x32Params);
5112 
5113     //No need to wait for task finished
5114     m_cmEvent = CM_NO_EVENT;
5115     m_cmKernelMap["PB_32x32"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5116 
5117     return eStatus;
5118 }
5119 
Encode8x8BPakKernel(PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)5120 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8BPakKernel(
5121     PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)
5122 {
5123     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5124 
5125     CODECHAL_ENCODE_FUNCTION_ENTER;
5126 
5127     CODECHAL_ENCODE_CHK_NULL_RETURN(pEncBCurbe);
5128 
5129     PerfTagSetting perfTag;
5130     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL);
5131 
5132     //Setup CURBE
5133     CODECHAL_FEI_HEVC_B_PAK_CURBE_G9  cmd, *curbe = &cmd;
5134     MOS_ZeroMemory(curbe, sizeof(*curbe));
5135     curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
5136     curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
5137 
5138     curbe->DW1.MaxVmvR                 = pEncBCurbe->DW44.MaxVmvR;
5139     curbe->DW1.Qp                      = pEncBCurbe->DW13.QpPrimeY;
5140     curbe->DW2.BrcEnable               = pEncBCurbe->DW36.BRCEnable;
5141     curbe->DW2.LcuBrcEnable            = pEncBCurbe->DW36.LCUBRCEnable;
5142     curbe->DW2.ScreenContent           = pEncBCurbe->DW47.ScreenContentFlag;
5143     curbe->DW2.SimplestIntraEnable     = pEncBCurbe->DW47.SkipIntraKrnFlag;
5144     curbe->DW2.SliceType               = pEncBCurbe->DW4.SliceType;
5145     curbe->DW2.EnableWA                = 0;
5146     curbe->DW2.ROIEnable               = (m_hevcPicParams->NumROI > 0);
5147     curbe->DW2.FASTSurveillanceFlag    = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
5148     // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
5149     curbe->DW2.KBLControlFlag          = UsePlatformControlFlag();
5150     curbe->DW2.EnableRollingIntra        = m_hevcPicParams->bEnableRollingIntraRefresh;
5151     curbe->DW2.EnableQualityImprovement  = m_encodeParams.bQualityImprovementEnable;
5152     curbe->DW3.IntraRefreshQPDelta       = m_hevcPicParams->QpDeltaForInsertedIntra;
5153     curbe->DW3.IntraRefreshMBNum         = m_hevcPicParams->IntraInsertionLocation;
5154     curbe->DW3.IntraRefreshUnitInMB      = m_hevcPicParams->IntraInsertionSize;
5155 
5156     PBFrameKernelParams PB8x8PakParams;
5157     MOS_ZeroMemory(&PB8x8PakParams, sizeof(PB8x8PakParams));
5158 
5159     PB8x8PakParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
5160     PB8x8PakParams.m_bufSize = m_mbCodeSize - m_mvOffset;
5161     PB8x8PakParams.m_bufOffset = m_mvOffset;
5162     PB8x8PakParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5163     PB8x8PakParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
5164     PB8x8PakParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5165     PB8x8PakParams.m_cmBRCConstSurf   = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
5166     PB8x8PakParams.m_cmSurfMVIndex = &m_mvIndex.sResource;
5167     PB8x8PakParams.m_cmSurfMVPred = &m_mvpIndex.sResource;
5168 
5169     if (m_cmKernelMap.count("PB_8x8_PAK") == 0)
5170     {
5171         m_cmKernelMap["PB_8x8_PAK"] = new CMRTKernelPB8x8PakUMD();
5172         m_cmKernelMap["PB_8x8_PAK"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5173     }
5174 
5175     m_cmKernelMap["PB_8x8_PAK"]->SetupCurbe(curbe);
5176 
5177     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK;
5178     CODECHAL_DEBUG_TOOL(
5179         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5180             encFunctionType,
5181             (uint8_t *)curbe, sizeof(*curbe)));
5182     )
5183 
5184     m_cmKernelMap["PB_8x8_PAK"]->AllocateSurfaces(&PB8x8PakParams);
5185 
5186     //No need to wait for task finished
5187     m_cmEvent = CM_NO_EVENT;
5188     m_cmKernelMap["PB_8x8_PAK"]->CreateAndDispatchKernel(m_cmEvent, false, ((!m_singleTaskPhaseSupported)|| m_lastTaskInPhase));
5189 
5190     return eStatus;
5191 }
5192 
Encode8x8PBMbEncKernel()5193 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PBMbEncKernel()
5194 {
5195     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5196 
5197     CODECHAL_ENCODE_FUNCTION_ENTER;
5198 
5199     PerfTagSetting perfTag;
5200     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
5201 
5202     int32_t sliceQp = CalSliceQp();
5203     uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType);
5204 
5205     if (m_feiPicParams->FastIntraMode)
5206     {
5207         // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped.
5208         CalcLambda(sliceType, INTRA_TRANSFORM_HAAR);
5209     }
5210     LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR);
5211 
5212     uint8_t mbCodeIdxForTempMVP = 0xFF;
5213     if(m_pictureCodingType != I_TYPE)
5214     {
5215         if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
5216         {
5217             uint8_t FrameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
5218 
5219             mbCodeIdxForTempMVP = m_refList[FrameIdx]->ucScalingIdx;
5220         }
5221 
5222         if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
5223         {
5224             // Temporal reference MV index is invalid and so disable the temporal MVP
5225             CODECHAL_ENCODE_ASSERT(false);
5226             m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
5227         }
5228     }
5229     else
5230     {
5231         mbCodeIdxForTempMVP = 0;
5232     }
5233 
5234     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
5235 
5236     //Setup CURBE
5237     uint8_t forwardTransformThd[7] = { 0 };
5238     CalcForwardCoeffThd(forwardTransformThd, sliceQp);
5239 
5240     uint32_t curbeSize = 0;
5241     void *defaultCurbe = (void *)GetDefaultCurbeEncBKernel(curbeSize);
5242     CODECHAL_ENCODE_ASSERT(defaultCurbe);
5243 
5244     CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
5245     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
5246 
5247     bool transform_8x8_mode_flag = true;
5248     uint32_t SearchPath              = (m_feiPicParams->SearchWindow == 5) ? 2 : 1;  // 2 means full search, 1 means diamand search
5249     uint32_t LenSP                   = m_feiPicParams->LenSP;
5250     uint32_t RefWidth                = (m_feiPicParams->RefWidth < 20) ? 20 : m_feiPicParams->RefWidth;
5251     uint32_t RefHeight               = (m_feiPicParams->RefHeight < 20) ? 20 : m_feiPicParams->RefHeight;
5252 
5253     switch (m_feiPicParams->SearchWindow)
5254     {
5255     case 0:
5256         // not use predefined search window
5257         if ((m_feiPicParams->SearchPath != 0) && (m_feiPicParams->SearchPath != 1) && (m_feiPicParams->SearchPath != 2))
5258         {
5259             CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input SearchPath for SearchWindow=0 case!!!.");
5260             eStatus = MOS_STATUS_INVALID_PARAMETER;
5261             return eStatus;
5262         }
5263         SearchPath = m_feiPicParams->SearchPath;
5264         if(((RefWidth * RefHeight) > 2048) || (RefWidth > 64) || (RefHeight > 64))
5265         {
5266             CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input RefWidth/RefHeight size for SearchWindow=0 case!!!.");
5267             eStatus = MOS_STATUS_INVALID_PARAMETER;
5268             return eStatus;
5269         }
5270         break;
5271     case 1:
5272         // Tiny SUs 24x24 window
5273         RefWidth  = 24;
5274         RefHeight = 24;
5275         LenSP     = 4;
5276         break;
5277     case 2:
5278         // Small SUs 28x28 window
5279         RefWidth  = 28;
5280         RefHeight = 28;
5281         LenSP     = 9;
5282         break;
5283     case 3:
5284         // Diamond SUs 48x40 window
5285         RefWidth  = 48;
5286         RefHeight = 40;
5287         LenSP     = 16;
5288         break;
5289     case 4:
5290         // Large Diamond SUs 48x40 window
5291         RefWidth  = 48;
5292         RefHeight = 40;
5293         LenSP     = 32;
5294         break;
5295     case 5:
5296         // Exhaustive SUs 48x40 window
5297         RefWidth  = 48;
5298         RefHeight = 40;
5299         LenSP     = 48;
5300         if (m_hevcSeqParams->TargetUsage != 7)
5301         {
5302             if (m_pictureCodingType == B_TYPE)
5303             {
5304                 LenSP = 48;
5305             } else {
5306                 LenSP = 57;
5307             }
5308         } else {
5309             LenSP = 25;
5310         }
5311         break;
5312     default:
5313         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC SearchWindow value for HEVC FEI on SKL!!!.");
5314         eStatus = MOS_STATUS_INVALID_PARAMETER;
5315         return eStatus;
5316     }
5317 
5318     if((m_pictureCodingType == B_TYPE) && (curbe->DW3.BMEDisableFBR == 0))
5319     {
5320         if(RefWidth > 32)
5321         {
5322             RefWidth  = 32;
5323         }
5324         if(RefHeight > 32)
5325         {
5326             RefHeight = 32;
5327         }
5328     }
5329 
5330     curbe->DW0.AdaptiveEn         = m_feiPicParams->AdaptiveSearch;
5331     curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag;
5332     curbe->DW2.PicWidth    = m_picWidthInMb;
5333     curbe->DW2.LenSP       = LenSP;
5334     curbe->DW3.SrcAccess   = curbe->DW3.RefAccess = 0;
5335     if (m_feiPicParams->FastIntraMode)
5336     {
5337         curbe->DW3.FTEnable    = (m_ftqBasedSkip[0x07] >> 1) & 0x01;
5338     }
5339     else
5340     {
5341         curbe->DW3.FTEnable    = (m_ftqBasedSkip[0x04] >> 1) & 0x01;
5342     }
5343     curbe->DW3.SubPelMode                         = m_feiPicParams->SubPelMode;
5344 
5345     curbe->DW4.PicHeightMinus1               = m_picHeightInMb - 1;
5346     curbe->DW4.EnableStatsDataDump           = m_encodeParams.bReportStatisticsEnabled;
5347     curbe->DW4.HMEEnable                     = 0;
5348     curbe->DW4.SliceType                     = sliceType;
5349     curbe->DW4.EnableQualityImprovement      = m_encodeParams.bQualityImprovementEnable;
5350     curbe->DW4.UseActualRefQPValue           = false;
5351 
5352     curbe->DW5.RefWidth                      = RefWidth;
5353     curbe->DW5.RefHeight                     = RefHeight;
5354 
5355     curbe->DW7.IntraPartMask                 = 0x3;
5356 
5357     curbe->DW6.FrameWidth                    = m_picWidthInMb  * CODECHAL_MACROBLOCK_WIDTH;
5358     curbe->DW6.FrameHeight                   = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
5359 
5360     curbe->DW8.Mode0Cost = m_modeCost[0];
5361     curbe->DW8.Mode1Cost = m_modeCost[1];
5362     curbe->DW8.Mode2Cost = m_modeCost[2];
5363     curbe->DW8.Mode3Cost = m_modeCost[3];
5364 
5365     curbe->DW9.Mode4Cost = m_modeCost[4];
5366     curbe->DW9.Mode5Cost = m_modeCost[5];
5367     curbe->DW9.Mode6Cost = m_modeCost[6];
5368     curbe->DW9.Mode7Cost = m_modeCost[7];
5369 
5370     curbe->DW10.Mode8Cost= m_modeCost[8];
5371     curbe->DW10.Mode9Cost= m_modeCost[9];
5372     curbe->DW10.RefIDCost = m_modeCost[10];
5373     curbe->DW10.ChromaIntraModeCost = m_modeCost[11];
5374 
5375     curbe->DW11.MV0Cost  = m_mvCost[0];
5376     curbe->DW11.MV1Cost  = m_mvCost[1];
5377     curbe->DW11.MV2Cost  = m_mvCost[2];
5378     curbe->DW11.MV3Cost  = m_mvCost[3];
5379 
5380     curbe->DW12.MV4Cost  = m_mvCost[4];
5381     curbe->DW12.MV5Cost  = m_mvCost[5];
5382     curbe->DW12.MV6Cost  = m_mvCost[6];
5383     curbe->DW12.MV7Cost  = m_mvCost[7];
5384 
5385     curbe->DW13.QpPrimeY = sliceQp;
5386     uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only
5387     int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8;
5388     int32_t qPi                  = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset));
5389     int32_t QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
5390     curbe->DW13.QpPrimeCb= QPc + qpBdOffsetC;
5391     qPi                          = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset));
5392     QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
5393     curbe->DW13.QpPrimeCr= QPc;
5394 
5395     curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0];
5396     curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1];
5397     curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2];
5398 
5399     curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3];
5400     curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4];
5401     curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5];
5402     curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6];
5403 
5404     if (SearchPath == 1)
5405     {
5406         // diamond search
5407         if (m_pictureCodingType == P_TYPE)
5408         {
5409             CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7PCurbeInit[16]), 14 * sizeof(uint32_t)));
5410         }
5411         else if (m_pictureCodingType == B_TYPE)
5412         {
5413             CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7BCurbeInit[16]), 14 * sizeof(uint32_t)));
5414         }
5415     }
5416     else if((SearchPath != 0) && (SearchPath != 2))
5417     {
5418         // default 0 and 2 are full sparil search
5419         CODECHAL_ENCODE_ASSERT(false);
5420     }
5421 
5422     curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp];
5423 
5424     if(m_pictureCodingType == I_TYPE)
5425     {
5426         *(float*)&(curbe->DW34.LambdaME) = 0.0;
5427     }
5428     else if (m_pictureCodingType == P_TYPE)
5429     {
5430         *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp];
5431     }
5432     else
5433     {
5434         *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
5435     }
5436 
5437     curbe->DW35.ModeCostSp                 = m_modeCostSp;
5438     curbe->DW35.SimpIntraInterThreshold    = m_simplestIntraInterThreshold;
5439 
5440     curbe->DW36.NumRefIdxL0MinusOne  = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
5441     curbe->DW36.NumRefIdxL1MinusOne  = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
5442     curbe->DW36.BRCEnable            = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
5443     curbe->DW36.LCUBRCEnable         = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
5444     curbe->DW36.PowerSaving         = m_powerSavingEnabled;
5445     curbe->DW36.ROIEnable            = (m_hevcPicParams->NumROI > 0);
5446     curbe->DW36.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
5447 
5448     if(m_pictureCodingType != I_TYPE)
5449     {
5450         curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0);
5451         curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1);
5452         curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2);
5453         curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3);
5454         curbe->DW41.TextureIntraCostThreshold = 500;
5455 
5456         if(m_pictureCodingType == B_TYPE) {
5457             curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0);
5458             curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1);
5459             float lambda_me = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
5460             if (m_encodeParams.bQualityImprovementEnable)
5461             {
5462                 curbe->DW40.TransformThreshold0 = (uint16_t) (lambda_me * 56.25 + 0.5);
5463                 curbe->DW40.TransformThreshold1 = (uint16_t) (lambda_me * 21 + 0.5);
5464                 curbe->DW41.TransformThreshold2 = (uint16_t) (lambda_me * 9 + 0.5);
5465             }
5466         }
5467     }
5468 
5469     curbe->DW42.NumMVPredictorsL0      = m_feiPicParams->NumMVPredictorsL0;
5470     curbe->DW42.NumMVPredictorsL1      = m_feiPicParams->NumMVPredictorsL1;
5471     curbe->DW42.PerLCUQP               = m_encodeParams.bMbQpDataEnabled;
5472     curbe->DW42.PerCTBInput            = m_feiPicParams->bPerCTBInput;
5473     curbe->DW42.CTBDistortionOutput    = m_feiPicParams->bDistortionEnable;
5474     curbe->DW42.MultiPredL0            = m_feiPicParams->MultiPredL0;
5475     curbe->DW42.MultiPredL1            = m_feiPicParams->MultiPredL1;
5476     curbe->DW42.MVPredictorBlockSize   = m_feiPicParams->MVPredictorInput;
5477 
5478     curbe->DW44.MaxVmvR                = 511 * 4;
5479     curbe->DW44.MaxNumMergeCandidates  = m_hevcSliceParams->MaxNumMergeCand;
5480 
5481     if(m_pictureCodingType != I_TYPE)
5482     {
5483         curbe->DW44.MaxNumRefList0         = curbe->DW36.NumRefIdxL0MinusOne + 1;
5484 
5485         curbe->DW45.TemporalMvpEnableFlag  = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
5486         curbe->DW45.HMECombineLenPslice    = 8;
5487         if(m_pictureCodingType == B_TYPE)
5488         {
5489             curbe->DW44.MaxNumRefList1         = curbe->DW36.NumRefIdxL1MinusOne + 1;
5490             curbe->DW45.HMECombineLenBslice    = 8;
5491         }
5492     }
5493 
5494     curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
5495 
5496     curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
5497     curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
5498     curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
5499     curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
5500 
5501     curbe->DW47.NumRegionsInSlice      = m_numRegionsInSlice;
5502     curbe->DW47.TypeOfWalkingPattern   = m_enable26WalkingPattern;
5503     curbe->DW47.ChromaFlatnessCheckFlag= (m_feiPicParams->FastIntraMode) ? 0 : 1;
5504     curbe->DW47.EnableIntraEarlyExit   = (m_feiPicParams->FastIntraMode) ? 0 : 1;
5505     curbe->DW47.SkipIntraKrnFlag       = (m_feiPicParams->FastIntraMode) ? 1 : 0;
5506     curbe->DW47.CollocatedFromL0Flag   = m_hevcSliceParams->collocated_from_l0_flag;
5507     curbe->DW47.IsLowDelay             = m_lowDelay;
5508     curbe->DW47.ScreenContentFlag      = m_hevcPicParams->bScreenContent;
5509     curbe->DW47.MultiSliceFlag         = (m_numSlices > 1);
5510     curbe->DW47.ArbitarySliceFlag      = m_arbitraryNumMbsInSlice;
5511     curbe->DW47.NumRegionMinus1        = m_walkingPatternParam.dwNumRegion - 1;
5512 
5513     if(m_pictureCodingType != I_TYPE)
5514     {
5515         curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]);
5516         curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]);
5517         curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]);
5518         curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]);
5519         if(m_pictureCodingType == B_TYPE) {
5520             curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]);
5521             curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]);
5522         }
5523     }
5524 
5525     curbe->DW52.NumofUnitInRegion          = m_walkingPatternParam.dwNumUnitsInRegion;
5526     curbe->DW52.MaxHeightInRegion          = m_walkingPatternParam.dwMaxHeightInRegion;
5527 
5528     // Intra refresh is enabled. Program related CURBE fields
5529     if (m_hevcPicParams->bEnableRollingIntraRefresh)
5530     {
5531         curbe->DW35.IntraRefreshEn         = true;
5532         curbe->DW35.FirstIntraRefresh      = m_firstIntraRefresh;
5533         curbe->DW35.HalfUpdateMixedLCU     = 0;
5534         curbe->DW35.EnableRollingIntra     = true;
5535 
5536         curbe->DW38.NumFrameInGOB            = m_frameNumInGob;
5537         curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh;
5538 
5539         curbe->DW51.IntraRefreshQPDelta  = m_hevcPicParams->QpDeltaForInsertedIntra;
5540         curbe->DW51.IntraRefreshMBNum    = m_hevcPicParams->IntraInsertionLocation;
5541         curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
5542 
5543         curbe->DW53.IntraRefreshRefHeight = 40;
5544         curbe->DW53.IntraRefreshRefWidth  = 48;
5545 
5546         m_firstIntraRefresh               = false;
5547         m_frameNumWithoutIntraRefresh     = 0;
5548     }
5549     else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames
5550     {
5551         m_frameNumWithoutIntraRefresh++;
5552     }
5553 
5554     PBFrameKernelParams PB8x8MbEncParams;
5555     MOS_ZeroMemory(&PB8x8MbEncParams, sizeof(PB8x8MbEncParams));
5556 
5557     PB8x8MbEncParams.m_width = curbe->DW6.FrameWidth;
5558     PB8x8MbEncParams.m_height = curbe->DW6.FrameHeight;
5559 
5560     for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++)
5561     {
5562         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx];
5563         if (!CodecHal_PictureIsInvalid(refPic) &&
5564             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
5565         {
5566             uint8_t idx                                                 = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
5567             PB8x8MbEncParams.m_cmSurfRef0[PB8x8MbEncParams.m_ucRefNum0] = &m_refList[idx]->sRefBuffer.OsResource;
5568             PB8x8MbEncParams.m_ucRefNum0++;
5569         }
5570 
5571         refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx];
5572         if (!CodecHal_PictureIsInvalid(refPic) &&
5573             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
5574         {
5575             uint8_t idx                                                 = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
5576             PB8x8MbEncParams.m_cmSurfRef1[PB8x8MbEncParams.m_ucRefNum1] = &m_refList[idx]->sRefBuffer.OsResource;
5577             PB8x8MbEncParams.m_ucRefNum1++;
5578         }
5579     }
5580 
5581     PB8x8MbEncParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
5582     PB8x8MbEncParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
5583     PB8x8MbEncParams.m_bufSize = m_mbCodeSize - m_mvOffset;
5584     PB8x8MbEncParams.m_bufOffset = m_mvOffset;
5585     if(mbCodeIdxForTempMVP == 0xFF)
5586     {
5587         PB8x8MbEncParams.m_cmSurfColRefData = nullptr;
5588     }
5589     else
5590     {
5591         PB8x8MbEncParams.m_cmSurfColRefData = m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP);
5592     }
5593     PB8x8MbEncParams.m_cmSurfIntraDist = &m_intraDist.sResource;
5594     PB8x8MbEncParams.m_cmSurfMinDist = &m_minDistortion.OsResource;
5595     PB8x8MbEncParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5596     PB8x8MbEncParams.m_cmSurfVMEIN = &m_vmeSavedUniSic.sResource;
5597     PB8x8MbEncParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
5598     PB8x8MbEncParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
5599     PB8x8MbEncParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5600     PB8x8MbEncParams.m_cmBRCConstSurf   = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
5601     PB8x8MbEncParams.m_cmWaveFrontMap = &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource;
5602     if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD)
5603     {
5604         m_concurrentThreadIndex = 0;
5605     }
5606     PB8x8MbEncParams.m_cmSurfMVIndex = &m_mvIndex.sResource;
5607     PB8x8MbEncParams.m_cmSurfMVPred = &m_mvpIndex.sResource;
5608     if (m_feiPicParams->MVPredictorInput)
5609     {
5610         PB8x8MbEncParams.m_cmSurfMVPredictor = &m_feiPicParams->resMVPredictor;
5611     }
5612     else
5613     {
5614         PB8x8MbEncParams.m_cmSurfMVPredictor = nullptr;
5615     }
5616 
5617     if (m_feiPicParams->bPerCTBInput)
5618     {
5619         PB8x8MbEncParams.m_cmSurfPerCTBInput = &m_feiPicParams->resCTBCtrl;
5620     }
5621     else
5622     {
5623         PB8x8MbEncParams.m_cmSurfPerCTBInput = nullptr;
5624     }
5625 
5626     //to avoid multi contexts in case per-frame control of FastIntraMode, always use 2xScaling kernel to initialize the context.
5627     if (m_cmKernelMap.count("2xScaling") == 0)
5628     {
5629         m_cmKernelMap["2xScaling"] = new CMRTKernelDownScalingUMD();
5630         m_cmKernelMap["2xScaling"]->Init((void *)m_osInterface->pOsContext);
5631     }
5632 
5633     //in case PB_32x32 isn't initialized when using FastIntraMode for per-frame control (I: disable; P/B: enable)
5634     if (m_cmKernelMap.count("PB_32x32") == 0)
5635     {
5636         m_cmKernelMap["PB_32x32"] = new CMRTKernelPB32x32UMD();
5637         m_cmKernelMap["PB_32x32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
5638     }
5639 
5640     if (m_pictureCodingType == I_TYPE && m_feiPicParams->FastIntraMode)
5641     {
5642         if (m_cmKernelMap.count("I_8x8_MBENC") == 0)
5643         {
5644             m_cmKernelMap["I_8x8_MBENC"] = new CMRTKernelB8x8MbEncUMD();
5645             m_cmKernelMap["I_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5646         }
5647 
5648         m_cmKernelMap["I_8x8_MBENC"]->SetupCurbe(curbe);
5649         m_cmKernelMap["I_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams);
5650 
5651         //No need to wait for task finished
5652         m_cmEvent = CM_NO_EVENT;
5653         m_cmKernelMap["I_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5654     }
5655     else if (m_pictureCodingType == B_TYPE)
5656     {
5657         if (m_cmKernelMap.count("B_8x8_MBENC") == 0)
5658         {
5659             m_cmKernelMap["B_8x8_MBENC"] = new CMRTKernelB8x8MbEncUMD();
5660             m_cmKernelMap["B_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5661         }
5662 
5663         m_cmKernelMap["B_8x8_MBENC"]->SetupCurbe(curbe);
5664         m_cmKernelMap["B_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams);
5665 
5666         //No need to wait for task finished
5667         m_cmEvent = CM_NO_EVENT;
5668         m_cmKernelMap["B_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5669     }
5670     else if (m_pictureCodingType == P_TYPE)
5671     {
5672         if (m_cmKernelMap.count("P_8x8_MBENC") == 0)
5673         {
5674             m_cmKernelMap["P_8x8_MBENC"] = new CMRTKernelP8x8MbEncUMD();
5675             m_cmKernelMap["P_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5676         }
5677         m_cmKernelMap["P_8x8_MBENC"]->SetupCurbe(curbe);
5678         m_cmKernelMap["P_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams);
5679 
5680         //No need to wait for task finished
5681         m_cmEvent = CM_NO_EVENT;
5682         m_cmKernelMap["P_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5683     }
5684 
5685     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
5686     if (m_pictureCodingType == P_TYPE)
5687     {
5688         //P frame curbe only use the DW0~DW75
5689         CODECHAL_DEBUG_TOOL(
5690             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5691                 encFunctionType,
5692                 (uint8_t *)curbe, sizeof(*curbe) - sizeof(uint32_t)));
5693         )
5694     }
5695     else
5696     {
5697         CODECHAL_DEBUG_TOOL(
5698             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5699                 encFunctionType,
5700                 (uint8_t *)curbe, sizeof(*curbe)));
5701         )
5702     }
5703 
5704     m_lastTaskInPhase = true;
5705     eStatus = Encode8x8BPakKernel(curbe);
5706     return eStatus;
5707 }
5708 
5709 #endif
5710 
EncodeKernelFunctions()5711 MOS_STATUS CodechalFeiHevcStateG9Skl::EncodeKernelFunctions()
5712 {
5713     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5714 
5715     CODECHAL_ENCODE_FUNCTION_ENTER;
5716 
5717     m_feiPicParams = (CodecEncodeHevcFeiPicParams *)m_encodeParams.pFeiPicParams;
5718 
5719     CODECHAL_DEBUG_TOOL(
5720         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
5721             m_rawSurfaceToEnc,
5722             CodechalDbgAttr::attrEncodeRawInputSurface,
5723             "SrcSurf"));
5724     )
5725 
5726     if (m_pakOnlyTest)
5727     {
5728         // Skip all ENC kernel operations for now it is in the PAK only test mode.
5729         // PAK and CU records will be passed via the app
5730         return eStatus;
5731     }
5732 
5733     if (m_brcEnabled || m_hmeEnabled)
5734     {
5735         eStatus = MOS_STATUS_UNKNOWN;
5736         CODECHAL_ENCODE_ASSERTMESSAGE("HEVC FEI does not support BRC and HMEenabled.");
5737         return eStatus;
5738     }
5739 
5740     if(m_osInterface->bSimIsActive)
5741     {
5742         MOS_LOCK_PARAMS lockFlags;
5743         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5744         lockFlags.WriteOnly = 1;
5745 
5746         uint8_t*  data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &m_resMbCodeSurface, &lockFlags);
5747         if (data)
5748         {
5749             MOS_ZeroMemory(data, m_mbCodeSize);
5750             m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
5751         }
5752     }
5753 
5754     // Generate slice map for kernel
5755     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSliceMap());
5756 
5757     //Reset to use a different performance tag ID for I kernels. Each kernel has a different buffer ID
5758     m_osInterface->pfnResetPerfBufferID(m_osInterface);
5759 
5760     m_firstTaskInPhase = true;
5761     m_lastTaskInPhase  = false;
5762 
5763     // ROI uses the BRC LCU update kernel, even in CQP.  So we will call it
5764     // first if in CQP.  It has no other kernel execution dependencies, even
5765     // that brc is not initialized is not a dependency
5766     if (m_hevcPicParams->NumROI && !m_brcEnabled)
5767     {
5768         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(nullptr));
5769     }
5770 
5771     // config LCU QP input
5772     if (m_encodeParams.bMbQpDataEnabled)
5773     {
5774         // Setup Lamda/Cost table for LCU QP mode
5775         auto psBrcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
5776         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(psBrcConstantData));
5777 
5778         if (m_encodeParams.psMbQpDataSurface)
5779         {
5780             CODECHAL_ENCODE_CHK_STATUS_RETURN(Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_lcuQP));
5781             m_surfaceParams[SURFACE_LCU_QP].psSurface = &m_lcuQP;
5782         }
5783     }
5784 
5785     CODECHAL_DEBUG_TOOL(
5786         if (m_feiPicParams->bPerBlockQP) {
5787             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5788                 &m_lcuQP,
5789                 CodechalDbgAttr::attrInput,
5790                 "HEVC_B_MBENC_MB_QP",
5791                 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
5792         }
5793 
5794         if (m_feiPicParams->MVPredictorInput) {
5795             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5796                 &m_feiPicParams->resMVPredictor,
5797                 "HEVC_B_MBENC_ConstantData_In",
5798                 CodechalDbgAttr::attrInput,
5799                 m_feiPicParams->resMVPredictor.iSize,
5800                 0,
5801                 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
5802         })
5803 
5804     if(m_feiPicParams->FastIntraMode)
5805     {
5806         if (m_hevcPicParams->CodingType == I_TYPE)
5807         {
5808             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
5809         }
5810     }
5811     else
5812     {
5813         //Step 1: perform 2:1 down-scaling
5814         if (m_hevcSeqParams->bit_depth_luma_minus8 == 0)  // use this for 8 bit only case.
5815         {
5816             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode2xScalingKernel());
5817         }
5818 
5819         //Step 2: 32x32 PU Mode Decision or 32x32 PU Intra check kernel
5820         if (m_hevcPicParams->CodingType == I_TYPE)
5821         {
5822             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32x32PuModeDecisionKernel());
5823         }
5824         else
5825         {
5826             CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32X32BIntraCheckKernel());
5827         }
5828 
5829         //Step 3: 16x16 SAD Computation
5830         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16SadPuComputationKernel());
5831 
5832         CODECHAL_DEBUG_TOOL(
5833             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5834                 &m_sad16x16Pu.sResource,
5835                 CodechalDbgAttr::attrOutput,
5836                 "HEVC_16x16_PU_SAD_Out",
5837                 m_sad16x16Pu.dwSize,
5838                 0,
5839                 CODECHAL_MEDIA_STATE_16x16_PU_SAD));
5840         )
5841 
5842         //Step 4: 16x16 PU Mode Decision
5843         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16PuModeDecisionKernel());
5844 
5845         CODECHAL_DEBUG_TOOL(
5846             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5847                 &m_vme8x8Mode.sResource,
5848                 CodechalDbgAttr::attrOutput,
5849                 "HEVC_16x16_PU_MD_Out",
5850                 m_vme8x8Mode.dwSize,
5851                 0,
5852                 CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION));
5853         )
5854 
5855         //Step 5: 8x8 PU
5856         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUKernel());
5857 
5858         //Step 6: 8x8 PU FMODE
5859         m_lastTaskInPhase = true;
5860         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUFMODEKernel());
5861 
5862         CODECHAL_DEBUG_TOOL(
5863             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
5864                                 &m_scaled2xSurface,
5865                                 CodechalDbgAttr::attrReferenceSurfaces,
5866                                 "2xScaledSurf"))
5867 
5868             if (m_pictureCodingType == I_TYPE)
5869             {
5870                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5871                     &m_32x32PuOutputData.sResource,
5872                     CodechalDbgAttr::attrOutput,
5873                     "HEVC_32x32_PU_MD_Out",
5874                     m_32x32PuOutputData.dwSize,
5875                     0,
5876                     CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
5877             }
5878             else
5879             {
5880                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5881                     &m_32x32PuOutputData.sResource,
5882                     CodechalDbgAttr::attrOutput,
5883                     "HEVC_32x32_B_INTRA_CHECK_Out",
5884                     m_32x32PuOutputData.dwSize,
5885                     0,
5886                     CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
5887 
5888             }
5889 
5890             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5891                 &m_intraMode.sResource,
5892                 CodechalDbgAttr::attrOutput,
5893                 "HEVC_8x8_PU_MD_Out",
5894                 m_intraMode.dwSize,
5895                 0,
5896                 CODECHAL_MEDIA_STATE_8x8_PU));
5897 
5898             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5899                 &m_intraDist.sResource,
5900                 CodechalDbgAttr::attrOutput,
5901                 "HEVC_8x8_PU_FMOD_Out",
5902                 m_intraDist.dwSize,
5903                 0,
5904                 CODECHAL_MEDIA_STATE_8x8_PU_FMODE));
5905         )
5906     }
5907 
5908     // Sync-wait can be executed after I-kernel is submitted before there is no dependency for I to wait for PAK to be ready
5909     CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
5910 
5911     //Step 7: B MB ENC kernel for B picture only
5912     if (m_hevcPicParams->CodingType != I_TYPE)
5913     {
5914         m_firstTaskInPhase = true;
5915         m_lastTaskInPhase = false;
5916 
5917         if (m_feiPicParams->MVPredictorInput)
5918         {
5919             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5920                 &m_surfaceParams[SURFACE_FEI_EXTERNAL_MVP],
5921                 &m_feiPicParams->resMVPredictor,
5922                 m_feiPicParams->resMVPredictor.iSize,
5923                 0,
5924                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5925                 0,
5926                 false));
5927         }
5928 
5929         if ((m_hevcSeqParams->bit_depth_luma_minus8))
5930         {
5931             bool formatConversionDone[NUM_FORMAT_CONV_FRAMES] = { false };
5932             formatConversionDone[0] = true; // always true since its for the input surface.
5933 
5934             for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
5935             {
5936                 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
5937                 {
5938                     continue;
5939                 }
5940 
5941                 uint8_t picIdx = m_picIdx[i].ucPicIdx;
5942                 CODECHAL_ENCODE_ASSERT(picIdx < 127);
5943 
5944                 uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i];
5945 
5946                 if (frameStoreId >= CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC)
5947                 {
5948                     CODECHAL_ENCODE_ASSERT(0);
5949                     eStatus = MOS_STATUS_INVALID_PARAMETER;
5950                     return eStatus;
5951                 }
5952 
5953                 if (formatConversionDone[frameStoreId + 1] != true)
5954                 {
5955                     CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsDisabled, (frameStoreId + 1), picIdx));
5956                     formatConversionDone[frameStoreId + 1] = true;
5957                     m_refList[picIdx]->sRefBuffer          = m_formatConvertedSurface[frameStoreId + 1];
5958                 }
5959             }
5960         }
5961 
5962         CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
5963     }
5964 #ifdef HEVC_FEI_ENABLE_CMRT
5965 
5966     for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++)
5967     {
5968         it->second->DestroySurfResources();
5969     }
5970 
5971 #endif
5972 
5973     // Notify PAK engine once ENC is done
5974     if (!m_pakOnlyTest && !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
5975     {
5976         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
5977         syncParams.GpuContext = m_renderContext;
5978         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
5979 
5980         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
5981     }
5982 
5983     return eStatus;
5984 }
5985 
Initialize(CodechalSetting * settings)5986 MOS_STATUS CodechalFeiHevcStateG9Skl::Initialize(CodechalSetting * settings)
5987 {
5988     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5989 
5990     CODECHAL_ENCODE_FUNCTION_ENTER;
5991 
5992     // common initilization
5993     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
5994 
5995     m_cscDsState->EnableMmc();
5996 
5997     m_brcBuffers.dwBrcConstantSurfaceWidth  = BRC_CONSTANT_SURFACE_WIDTH;
5998     m_brcBuffers.dwBrcConstantSurfaceHeight = BRC_CONSTANT_SURFACE_HEIGHT;
5999 
6000     // LCU size is 32x32 in Gen9
6001     m_widthAlignedMaxLcu  = MOS_ALIGN_CEIL(m_frameWidth, 32);
6002     m_heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameHeight, 32);
6003 
6004     m_brcEnabled              = false;
6005     m_hmeEnabled              = false;
6006     m_hmeSupported            = false;
6007     m_16xMeUserfeatureControl = false;
6008     m_16xMeSupported          = false;
6009     m_32xMeUserfeatureControl = false;
6010     m_32xMeSupported          = false;
6011 
6012     // regkey setup
6013     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
6014     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6015     MOS_UserFeature_ReadValue_ID(
6016         nullptr,
6017         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
6018         &userFeatureData,
6019         m_osInterface->pOsContext);
6020     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
6021 
6022     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6023     MOS_UserFeature_ReadValue_ID(
6024         nullptr,
6025         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
6026         &userFeatureData,
6027         m_osInterface->pOsContext);
6028     m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
6029 
6030     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6031     eStatus = MOS_UserFeature_ReadValue_ID(
6032         nullptr,
6033         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
6034         &userFeatureData,
6035         m_osInterface->pOsContext);
6036 
6037     if (eStatus == MOS_STATUS_SUCCESS)
6038     {
6039         // Region number must be greater than 1
6040         m_numRegionsInSlice = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
6041     }
6042     else
6043     {
6044         // Reset the status to success if regkey is not set
6045         eStatus = MOS_STATUS_SUCCESS;
6046     }
6047 
6048     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6049     MOS_UserFeature_ReadValue_ID(
6050         nullptr,
6051         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT,
6052         &userFeatureData,
6053         m_osInterface->pOsContext);
6054     m_numMb8x8IntraKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
6055 
6056     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6057     MOS_UserFeature_ReadValue_ID(
6058         nullptr,
6059         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT,
6060         &userFeatureData,
6061         m_osInterface->pOsContext);
6062     m_numMbBKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
6063 
6064     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6065     MOS_UserFeature_ReadValue_ID(
6066         nullptr,
6067         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING,
6068         &userFeatureData,
6069         m_osInterface->pOsContext);
6070     m_powerSavingEnabled = (userFeatureData.i32Data) ? true : false;
6071 
6072     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
6073     {
6074         /* Make the width aligned to a multiple of 32 and then get the no of macroblocks.*/
6075         /* This is done to facilitate the use of format conversion kernel for downscaling to 4x and 2x along with formatconversion of 10 bit data to 8 bit data.
6076         Refer format conversion kernel for further details .
6077         We will use only 4x downscale for HME, Super and ultra HME use the traditional scaling kernels.
6078         */
6079         uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2));
6080         m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x);
6081 
6082     }
6083 
6084     return eStatus;
6085 }
6086 
GetMaxBtCount()6087 uint32_t CodechalFeiHevcStateG9Skl::GetMaxBtCount()
6088 {
6089     auto wBtIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
6090 
6091     // 6 I kernels
6092     uint32_t uiBtCountPhase1 = MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_2xSCALING].KernelParams.iBTCount, wBtIdxAlignment) +
6093                                MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16SAD].KernelParams.iBTCount, wBtIdxAlignment) +
6094                                MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16MD].KernelParams.iBTCount, wBtIdxAlignment) +
6095                                MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8PU].KernelParams.iBTCount, wBtIdxAlignment) +
6096                                MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8FMODE].KernelParams.iBTCount, wBtIdxAlignment);
6097 
6098     uiBtCountPhase1 += MOS_MAX(
6099         MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32MD].KernelParams.iBTCount, wBtIdxAlignment),
6100         MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32INTRACHECK].KernelParams.iBTCount, wBtIdxAlignment));
6101 
6102     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
6103     {
6104         uiBtCountPhase1 += MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_DS_COMBINED].KernelParams.iBTCount, wBtIdxAlignment);
6105     }
6106 
6107     // two B kernels
6108     uint32_t uiBtCountPhase2 = MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_BENC].KernelParams.iBTCount, wBtIdxAlignment) +
6109                                MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_BPAK].KernelParams.iBTCount, wBtIdxAlignment);
6110 
6111     uint32_t uiMaxBtCount = MOS_MAX(uiBtCountPhase1, uiBtCountPhase2);
6112 
6113     return uiMaxBtCount;
6114 }
6115 
AllocateEncResources()6116 MOS_STATUS CodechalFeiHevcStateG9Skl::AllocateEncResources()
6117 {
6118     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6119 
6120     CODECHAL_ENCODE_FUNCTION_ENTER;
6121 
6122     m_sliceMap = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)MOS_AllocAndZeroMemory(
6123         m_widthAlignedMaxLcu * m_heightAlignedMaxLcu * sizeof(m_sliceMap[0]));
6124     CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceMap);
6125 
6126     uint32_t Downscaling2xWidth  = m_widthAlignedMaxLcu >> 1;
6127     uint32_t Downscaling2xHeight = m_heightAlignedMaxLcu >> 1;
6128     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
6129         &m_scaled2xSurface,
6130         Downscaling2xWidth,
6131         Downscaling2xHeight,
6132         "2x Downscaling"));
6133 
6134     uint32_t uiWidth  = m_widthAlignedMaxLcu >> 3;
6135     uint32_t uiHeight = m_heightAlignedMaxLcu >> 5;
6136     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6137         &m_sliceMapSurface,
6138         uiWidth,
6139         uiHeight,
6140         "Slice Map"));
6141 
6142     uint32_t uiSize = 32 * (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5);
6143     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6144         &m_32x32PuOutputData,
6145         uiSize,
6146         "32x32 PU Output Data"));
6147 
6148     uiSize = 8 * 4 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
6149     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6150         &m_sad16x16Pu,
6151         uiSize,
6152         "SAD 16x16 PU"));
6153 
6154     // need 64 bytes for statistics report .
6155     uiSize = 64 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
6156     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6157         &m_vme8x8Mode,
6158         uiSize,
6159         "VME 8x8 mode"));
6160 
6161     uiSize = 32 * (m_widthAlignedMaxLcu >> 3) * (m_heightAlignedMaxLcu >> 3);
6162     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6163         &m_intraMode,
6164         uiSize,
6165         "Intra mode"));
6166 
6167     uiSize = 16 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
6168     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6169         &m_intraDist,
6170         uiSize,
6171         "Intra dist"));
6172 
6173     // Change the surface size
6174     uiWidth  = m_widthAlignedMaxLcu >> 1;
6175     uiHeight = m_heightAlignedMaxLcu >> 4;
6176     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6177         &m_minDistortion,
6178         uiWidth,
6179         uiHeight,
6180         "Min distortion surface"));
6181 
6182     // Allocate FEI 2D 2bytes LCU QP surface
6183     uiWidth  = MOS_ALIGN_CEIL((m_widthAlignedMaxLcu >> 4), 64);
6184     uiHeight = MOS_ALIGN_CEIL((m_heightAlignedMaxLcu >> 5), 4);
6185     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6186         &m_lcuQP,
6187         uiWidth,
6188         uiHeight,
6189         "LCU_QP surface"));
6190 
6191     uiWidth = sizeof(CODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION);
6192     uiHeight = HEVC_CONCURRENT_SURFACE_HEIGHT;
6193     for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
6194     {
6195         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6196             &m_concurrentThreadSurface[i],
6197             uiWidth,
6198             uiHeight,
6199             "Concurrent Thread"));
6200     }
6201 
6202     //uiSize = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 4);
6203     uiSize = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 4) + GPUMMU_WA_PADDING;
6204     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6205         &m_mvIndex,
6206         uiSize,
6207         "MV index surface"));
6208 
6209     //uiSize = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 2);
6210     uiSize = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 2) + GPUMMU_WA_PADDING;
6211     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6212         &m_mvpIndex,
6213         uiSize,
6214         "MVP index surface"));
6215 
6216     uiSize = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu;
6217     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6218         &m_vmeSavedUniSic,
6219         uiSize,
6220         "VME Saved UniSic surface"));
6221 
6222     uiWidth  = m_widthAlignedMaxLcu >> 3;
6223     uiHeight = m_heightAlignedMaxLcu >> 5;
6224     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6225         &m_simplestIntraSurface,
6226         uiWidth,
6227         uiHeight,
6228         "Simplest Intra surface"));
6229 
6230     m_allocator->AllocateResource(m_standard, 1024, 1, brcInputForEncKernel, "brcInputForEncKernel", true);
6231 
6232     if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
6233     {
6234         // adding 10 bit support for KBL : output surface for format conversion from 10bit to 8 bit
6235         for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
6236         {
6237             if (Mos_ResourceIsNull(&m_formatConvertedSurface[i].OsResource))
6238             {
6239                 uiWidth  = m_widthAlignedMaxLcu;
6240                 uiHeight = m_heightAlignedMaxLcu;
6241 
6242                 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
6243                     &m_formatConvertedSurface[i],
6244                     uiWidth,
6245                     uiHeight,
6246                     "Format Converted Surface"));
6247             }
6248         }
6249 
6250         if (Mos_ResourceIsNull(&m_resMbStatisticsSurface.sResource))
6251         {
6252             uiSize = 52 * m_picWidthInMb * m_picHeightInMb; // 13 DWs or 52 bytes for statistics per MB
6253 
6254             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6255                 &m_resMbStatisticsSurface,
6256                 uiSize,
6257                 "MB stats surface"));
6258         }
6259     }
6260 
6261     // ROI
6262     // ROI buffer size uses MB units for HEVC, not LCU
6263     uiWidth  = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64);
6264     uiHeight = MOS_ALIGN_CEIL(m_picHeightInMb, 8);
6265 
6266     MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface));
6267     m_roiSurface.TileType       = MOS_TILE_LINEAR;
6268     m_roiSurface.bArraySpacing  = true;
6269     m_roiSurface.Format         = Format_Buffer_2D;
6270     m_roiSurface.dwWidth        = uiWidth;
6271     m_roiSurface.dwPitch        = uiWidth;
6272     m_roiSurface.dwHeight       = uiHeight;
6273 
6274     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6275         &m_roiSurface,
6276         uiWidth,
6277         uiHeight,
6278         "ROI Buffer"));
6279 
6280     return eStatus;
6281 }
6282 
FreeEncResources()6283 MOS_STATUS CodechalFeiHevcStateG9Skl::FreeEncResources()
6284 {
6285     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6286 
6287     CODECHAL_ENCODE_FUNCTION_ENTER;
6288 
6289     MOS_Delete(m_meKernelState);
6290     m_meKernelState = nullptr;
6291     MOS_FreeMemory(m_meKernelBindingTable);
6292     m_meKernelBindingTable = nullptr;
6293 
6294     MOS_DeleteArray(m_mbEncKernelStates);
6295     m_mbEncKernelStates = nullptr;
6296     MOS_FreeMemory(m_mbEncKernelBindingTable);
6297     m_mbEncKernelBindingTable = nullptr;
6298 
6299     MOS_DeleteArray(m_brcKernelStates);
6300     m_brcKernelStates = nullptr;
6301     MOS_FreeMemory(m_brcKernelBindingTable);
6302     m_brcKernelBindingTable = nullptr;
6303 
6304     MOS_FreeMemory(m_surfaceParams); m_surfaceParams = nullptr;
6305 
6306     for (auto i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
6307     {
6308         m_osInterface->pfnFreeResource(
6309             m_osInterface,
6310             &m_formatConvertedSurface[i].OsResource);
6311     }
6312 
6313     m_osInterface->pfnFreeResource(
6314         m_osInterface,
6315         &m_scaled2xSurface.OsResource);
6316 
6317     m_osInterface->pfnFreeResource(
6318         m_osInterface,
6319         &m_resMbStatisticsSurface.sResource);
6320 
6321     m_osInterface->pfnFreeResource(
6322         m_osInterface,
6323         &m_sliceMapSurface.OsResource);
6324 
6325     m_osInterface->pfnFreeResource(
6326         m_osInterface,
6327         &m_32x32PuOutputData.sResource);
6328 
6329     m_osInterface->pfnFreeResource(
6330         m_osInterface,
6331         &m_sad16x16Pu.sResource);
6332 
6333     m_osInterface->pfnFreeResource(
6334         m_osInterface,
6335         &m_vme8x8Mode.sResource);
6336 
6337     m_osInterface->pfnFreeResource(
6338         m_osInterface,
6339         &m_intraMode.sResource);
6340 
6341     m_osInterface->pfnFreeResource(
6342         m_osInterface,
6343         &m_intraDist.sResource);
6344 
6345     m_osInterface->pfnFreeResource(
6346         m_osInterface,
6347         &m_mvIndex.sResource);
6348 
6349     m_osInterface->pfnFreeResource(
6350         m_osInterface,
6351         &m_mvpIndex.sResource);
6352 
6353     m_osInterface->pfnFreeResource(
6354         m_osInterface,
6355         &m_vmeSavedUniSic.sResource);
6356 
6357     m_osInterface->pfnFreeResource(
6358         m_osInterface,
6359         &m_minDistortion.OsResource);
6360 
6361     m_osInterface->pfnFreeResource(
6362         m_osInterface,
6363         &m_lcuQP.OsResource);
6364 
6365     for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
6366     {
6367         m_osInterface->pfnFreeResource(
6368             m_osInterface,
6369             &m_concurrentThreadSurface[i].OsResource);
6370     }
6371 
6372     m_osInterface->pfnFreeResource(
6373         m_osInterface,
6374         &m_simplestIntraSurface.OsResource);
6375 
6376     MOS_FreeMemory(m_sliceMap);
6377     m_sliceMap = nullptr;
6378 
6379     m_osInterface->pfnFreeResource(
6380         m_osInterface,
6381         &m_roiSurface.OsResource);
6382 
6383 #ifdef HEVC_FEI_ENABLE_CMRT
6384 
6385      for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++)
6386      {
6387           it->second->DestroyKernelResources();
6388      }
6389      if (m_cmKernelMap.count("2xScaling"))
6390      {
6391          m_cmKernelMap["2xScaling"]->DestroyProgramResources();
6392      }
6393      if (m_cmKernelMap.count("I_32x32"))
6394      {
6395          m_cmKernelMap["I_32x32"]->DestroyProgramResources();
6396      }
6397      if (m_cmKernelMap.count("PB_32x32"))
6398      {
6399          m_cmKernelMap["PB_32x32"]->DestroyProgramResources();
6400      }
6401      if (m_cmKernelMap.count("2xScaling"))
6402      {
6403          m_cmKernelMap["2xScaling"]->Destroy();
6404      }
6405 
6406      for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++)
6407      {
6408          delete it->second;
6409      }
6410 
6411      m_cmKernelMap.clear();
6412 
6413 #endif
6414 
6415     return eStatus;
6416 }
6417 
InitSurfaceInfoTable()6418 MOS_STATUS CodechalFeiHevcStateG9Skl::InitSurfaceInfoTable()
6419 {
6420     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6421 
6422     m_surfaceParams = (PCODECHAL_SURFACE_CODEC_PARAMS)MOS_AllocAndZeroMemory(
6423         sizeof(*m_surfaceParams) * SURFACE_NUM_TOTAL);
6424     CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfaceParams);
6425 
6426     PCODECHAL_SURFACE_CODEC_PARAMS param = &m_surfaceParams[SURFACE_RAW_Y];
6427     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6428         param,
6429         m_rawSurfaceToEnc,
6430         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6431         0,
6432         m_verticalLineStride,
6433         false));
6434 
6435     param = &m_surfaceParams[SURFACE_RAW_10bit_Y];
6436     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6437         param,
6438         m_rawSurfaceToEnc,
6439         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6440         0,
6441         m_verticalLineStride,
6442         false));
6443 
6444     // MB stats surface -- currently not used
6445     param = &m_surfaceParams[SURFACE_RAW_MBSTAT];
6446     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6447         param,
6448         &m_resMbStatisticsSurface.sResource,
6449         m_resMbStatisticsSurface.dwSize,
6450         0,
6451         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6452         0,
6453         true));
6454     param->bRawSurface = true;
6455 
6456     param = &m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV];
6457     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6458         param,
6459         &m_formatConvertedSurface[0],
6460         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6461         0,
6462         m_verticalLineStride,
6463         true));  //this should be writable as it is output of formatconversion
6464     param->bUseUVPlane = true;
6465 
6466     param = &m_surfaceParams[SURFACE_RAW_Y_UV];
6467     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6468            param,
6469            m_rawSurfaceToEnc,
6470            m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6471            0,
6472            m_verticalLineStride,
6473            false));
6474     param->bUseUVPlane    = true;
6475 
6476     param = &m_surfaceParams[SURFACE_RAW_10bit_Y_UV];
6477     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6478             param,
6479             m_rawSurfaceToEnc,
6480             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6481             0,
6482             m_verticalLineStride,
6483             false));//this should be writable as it is output of formatconversion
6484     param->bUseUVPlane = true;
6485 
6486     param = &m_surfaceParams[SURFACE_Y_2X];
6487     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6488         param,
6489         &m_scaled2xSurface,
6490         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6491         0,
6492         m_verticalLineStride,
6493         false));
6494 
6495     param = &m_surfaceParams[SURFACE_32x32_PU_OUTPUT];
6496     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6497         param,
6498         &m_32x32PuOutputData.sResource,
6499         m_32x32PuOutputData.dwSize,
6500         0,
6501         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6502         0,
6503         false));
6504 
6505     param = &m_surfaceParams[SURFACE_SLICE_MAP];
6506     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6507         param,
6508         &m_sliceMapSurface,
6509         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6510         0,
6511         m_verticalLineStride,
6512         false));
6513 
6514     param = &m_surfaceParams[SURFACE_Y_2X_VME];
6515     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6516         param,
6517         &m_scaled2xSurface,
6518         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
6519         0));
6520 
6521     param = &m_surfaceParams[SURFACE_BRC_INPUT];
6522     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6523         param,
6524         (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
6525         m_allocator->GetResourceSize(m_standard, brcInputForEncKernel),
6526         0,
6527         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6528         0,
6529         false));
6530 
6531     param = &m_surfaceParams[SURFACE_LCU_QP];
6532     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6533         param,
6534         &m_lcuQP,
6535         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6536         0,
6537         m_verticalLineStride,
6538         false));
6539 
6540     param = &m_surfaceParams[SURFACE_ROI];
6541     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6542         param,
6543         &m_roiSurface,
6544         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6545         0,
6546         m_verticalLineStride,
6547         false));
6548 
6549     param = &m_surfaceParams[SURFACE_BRC_DATA];
6550     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6551         param,
6552         &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
6553         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6554         0,
6555         m_verticalLineStride,
6556         false));
6557 
6558     param = &m_surfaceParams[SURFACE_SIMPLIFIED_INTRA];
6559     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6560         param,
6561         &m_simplestIntraSurface,
6562         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6563         0,
6564         m_verticalLineStride,
6565         false));
6566 
6567     // skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME
6568 
6569     param = &m_surfaceParams[SURFACE_16x16PU_SAD];
6570     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6571         param,
6572         &m_sad16x16Pu.sResource,
6573         m_sad16x16Pu.dwSize,
6574         0,
6575         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6576         0,
6577         false));
6578 
6579     param = &m_surfaceParams[SURFACE_RAW_VME];
6580     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6581         param,
6582         m_rawSurfaceToEnc,
6583         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6584         0));
6585 
6586     param = &m_surfaceParams[SURFACE_VME_8x8];
6587     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6588         param,
6589         &m_vme8x8Mode.sResource,
6590         m_vme8x8Mode.dwSize,
6591         0,
6592         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6593         0,
6594         false));
6595 
6596     param = &m_surfaceParams[SURFACE_CU_RECORD];
6597     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6598         param,
6599         &m_resMbCodeSurface,
6600         m_mbCodeSize - m_mvOffset,
6601         m_mvOffset,
6602         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6603         0,
6604         true));
6605 
6606     param = &m_surfaceParams[SURFACE_INTRA_MODE];
6607     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6608         param,
6609         &m_intraMode.sResource,
6610         m_intraMode.dwSize,
6611         0,
6612         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6613         0,
6614         false));
6615 
6616     param = &m_surfaceParams[SURFACE_HCP_PAK];
6617     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6618         param,
6619         &m_resMbCodeSurface,
6620         m_mvOffset,
6621         0,
6622         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6623         0,
6624         true));
6625 
6626     param = &m_surfaceParams[SURFACE_INTRA_DIST];
6627     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6628         param,
6629         &m_intraDist.sResource,
6630         m_intraDist.dwSize,
6631         0,
6632         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6633         0,
6634         false));
6635 
6636     param = &m_surfaceParams[SURFACE_MIN_DIST];
6637     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6638         param,
6639         &m_minDistortion,
6640         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
6641         0,
6642         m_verticalLineStride,
6643         false));
6644 
6645     param = &m_surfaceParams[SURFACE_VME_UNI_SIC_DATA];
6646     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6647         param,
6648         &m_vmeSavedUniSic.sResource,
6649         m_vmeSavedUniSic.dwSize,
6650         0,
6651         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6652         0,
6653         false));
6654 
6655     param = &m_surfaceParams[SURFACE_COL_MB_MV];
6656     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6657         param,
6658         nullptr,
6659         m_sizeOfMvTemporalBuffer,
6660         0,
6661         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6662         0,
6663         false));
6664 
6665     m_concurrentThreadIndex = 0;
6666     for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
6667     {
6668         param = &m_surfaceParams[SURFACE_CONCURRENT_THREAD + i];
6669         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6670             param,
6671             &m_concurrentThreadSurface[i],
6672             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
6673             0,
6674             m_verticalLineStride,
6675             false));
6676     }
6677 
6678     param = &m_surfaceParams[SURFACE_MB_MV_INDEX];
6679     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6680         param,
6681         &m_mvIndex.sResource,
6682         m_mvIndex.dwSize,
6683         0,
6684         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6685         0,
6686         false));
6687 
6688     param = &m_surfaceParams[SURFACE_MVP_INDEX];
6689     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6690         param,
6691         &m_mvpIndex.sResource,
6692         m_mvpIndex.dwSize,
6693         0,
6694         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6695         0,
6696         false));
6697 
6698     param = &m_surfaceParams[SURFACE_REF_FRAME_VME];
6699     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6700         param,
6701         0,
6702         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6703         0));
6704 
6705     param = &m_surfaceParams[SURFACE_Y_4X];
6706     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6707         param,
6708         nullptr,
6709         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6710         0,
6711         m_verticalLineStride,
6712         false));
6713 
6714     param = &m_surfaceParams[SURFACE_Y_4X_VME];
6715     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6716         param,
6717         nullptr,
6718         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
6719         0));
6720 
6721     param = &m_surfaceParams[SURFACE_BRC_HISTORY];
6722     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6723         param,
6724         &m_brcBuffers.resBrcHistoryBuffer,
6725         m_brcHistoryBufferSize,
6726         0,
6727         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6728         0,
6729         true));
6730 
6731     param = &m_surfaceParams[SURFACE_BRC_ME_DIST];
6732     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6733         param,
6734         &m_brcBuffers.sMeBrcDistortionBuffer,
6735         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6736         0,
6737         m_verticalLineStride,
6738         true));
6739 
6740     param = &m_surfaceParams[SURFACE_BRC_PAST_PAK_INFO];
6741     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6742         param,
6743         &m_brcBuffers.resBrcPakStatisticBuffer[0],
6744         m_hevcBrcPakStatisticsSize,
6745         0,
6746         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6747         0,
6748         false));
6749 
6750     param = &m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE];
6751     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6752         param,
6753         &m_brcBuffers.resBrcImageStatesWriteBuffer[0],
6754         m_brcBuffers.dwBrcHcpPicStateSize,
6755         0,
6756         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6757         0,
6758         false));
6759 
6760 #if 0
6761     param = &m_surfaceParams[SURFACE_PU_STATS];
6762     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6763                 param,
6764                 &m_encStatsBuffers.m_puStatsSurface,
6765                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6766                 0,
6767                 m_verticalLineStride,
6768                 true));
6769 
6770     param = &m_surfaceParams[SURFACE_8X8_PU_HAAR_DIST];
6771     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6772                 param,
6773                 &m_encStatsBuffers.m_8x8PuHaarDist,
6774                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6775                 0,
6776                 m_verticalLineStride,
6777                 true));
6778 
6779     param = &m_surfaceParams[SURFACE_8X8_PU_FRAME_STATS];
6780     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6781                 param,
6782                 &m_encStatsBuffers.m_8x8PuFrameStats.sResource,
6783                 m_encStatsBuffers.m_8x8PuFrameStats.dwSize,
6784                 0,
6785                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6786                 0,
6787                 true));
6788 
6789     param = &m_surfaceParams[SURFACE_MB_ENC_STATS];
6790     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6791                 param,
6792                 &m_encStatsBuffers.m_mbEncStatsSurface,
6793                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6794                 0,
6795                 m_verticalLineStride,
6796                 true));
6797 
6798     param = &m_surfaceParams[SURFACE_MB_ENC_FRAME_STATS];
6799     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6800                 param,
6801                 &m_encStatsBuffers.m_mbEncFrameStats.sResource,
6802                 m_encStatsBuffers.m_mbEncFrameStats.dwSize,
6803                 0,
6804                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6805                 0,
6806                 true));
6807 
6808     param = &m_surfaceParams[SURFACE_FEI_EXTERNAL_MVP];
6809     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6810         param,
6811         &m_feiPicParams->resMVPredictor,
6812         m_feiPicParams->resMVPredictor.iSize,
6813         0,
6814         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6815         0,
6816         false));
6817 
6818     param = &m_surfaceParams[SURFACE_FEI_PER_LCU_QP];
6819     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6820         param,
6821         &m_feiPicParams->resCTBQp,
6822         m_feiPicParams->resCTBQp.iSize,
6823         0,
6824         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6825         0,
6826         false));
6827 
6828     param = &m_surfaceParams[SURFACE_FEI_PER_CTB_CTRL];
6829     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6830         param,
6831         &m_feiPicParams->resCTBCtrl,
6832         m_feiPicParams->resCTBCtrl.iSize,
6833         0,
6834         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6835         0,
6836         false));
6837 
6838     param = &m_surfaceParams[SURFACE_FEI_CTB_DISTORTION];
6839     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6840         param,
6841         &m_feiPicParams->resDistortion,
6842         m_feiPicParams->resDistortion.iSize,
6843         0,
6844         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6845         0,
6846         false));
6847 #endif
6848 
6849     return eStatus;
6850 }
6851 
SetSequenceStructs()6852 MOS_STATUS CodechalFeiHevcStateG9Skl::SetSequenceStructs()
6853 {
6854     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6855 
6856     CODECHAL_ENCODE_FUNCTION_ENTER;
6857 
6858     m_feiPicParams = (CodecEncodeHevcFeiPicParams *)m_encodeParams.pFeiPicParams;
6859 
6860     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSequenceStructs());
6861 
6862     m_enable26WalkingPattern                 = m_feiPicParams->bForceLCUSplit;
6863     m_numRegionsInSlice                      = m_feiPicParams->NumConcurrentEncFramePartition;
6864     m_encodeParams.bReportStatisticsEnabled  = 0;
6865     m_encodeParams.bQualityImprovementEnable = 0;
6866 
6867     if (m_feiPicParams->FastIntraMode)
6868     {
6869         m_hevcSeqParams->TargetUsage = 0x07;
6870     }
6871 
6872     return eStatus;
6873 }
6874 
CodechalFeiHevcStateG9Skl(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)6875 CodechalFeiHevcStateG9Skl::CodechalFeiHevcStateG9Skl(CodechalHwInterface* hwInterface,
6876     CodechalDebugInterface* debugInterface,
6877     PCODECHAL_STANDARD_INFO standardInfo)
6878     :CodechalEncHevcStateG9(hwInterface, debugInterface, standardInfo)
6879 {
6880     m_kernelBase = (uint8_t *)IGCODECKRN_G9;
6881     m_kuid = IDR_CODEC_HEVC_FEI_COMBINED_KENREL_INTEL;
6882     pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
6883     m_noMeKernelForPFrame = false;
6884     m_feiEnable = true;
6885 
6886     MOS_STATUS eStatus = InitMhw();
6887     if (eStatus != MOS_STATUS_SUCCESS)
6888     {
6889         CODECHAL_ENCODE_ASSERTMESSAGE("HEVC FEI encoder MHW initialization failed.");
6890     }
6891 }
6892 
6893