1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_vdenc_hevc_g11.cpp
24 //! \brief    HEVC VDEnc encoder for GEN11.
25 //!
26 
27 #include "codechal_vdenc_hevc_g11.h"
28 #include "codechal_kernel_header_g11.h"
29 #include "codeckrnheader.h"
30 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
31 #include "igcodeckrn_g11.h"
32 #endif
33 #include "mhw_vdbox_g11_X.h"
34 #include "mhw_vdbox_hcp_g11_X.h"
35 #include "mhw_vdbox_vdenc_g11_X.h"
36 #include "codechal_huc_cmd_initializer_g11.h"
37 #include "codechal_debug_encode_par_g11.h"
38 #ifdef _ENCODE_VDENC_RESERVED
39 #include "codechal_debug_encode_brc.h"
40 #endif
41 
42 const double CodechalVdencHevcStateG11::m_devThreshIFPNEG[] = {
43     0.80, 0.60, 0.34, 0.2,
44 };
45 
46 const double CodechalVdencHevcStateG11::m_devThreshIFPPOS[] = {
47     0.2, 0.4 , 0.66, 0.9,
48 };
49 
50 const double CodechalVdencHevcStateG11::m_devThreshPBFPNEG[] = {
51     0.90, 0.66, 0.46, 0.3,
52 };
53 
54 const double CodechalVdencHevcStateG11::m_devThreshPBFPPOS[] = {
55     0.3, 0.46, 0.70, 0.90,
56 };
57 
58 const double CodechalVdencHevcStateG11::m_devThreshVBRNEG[] = {
59     0.90, 0.70, 0.50, 0.3,
60 };
61 
62 const double CodechalVdencHevcStateG11::m_devThreshVBRPOS[] = {
63     0.4, 0.5, 0.75, 0.90,
64 };
65 
66 const int8_t CodechalVdencHevcStateG11::m_lowdelayDevThreshPB[] = {
67     -45, -33, -23, -15, -8, 0, 15, 25,
68 };
69 const int8_t CodechalVdencHevcStateG11::m_lowdelayDevThreshVBR[] = {
70     -45, -35, -25, -15, -8, 0, 20, 40,
71 };
72 const int8_t CodechalVdencHevcStateG11::m_lowdelayDevThreshI[] = {
73     -40, -30, -17, -10, -5, 0, 10, 20,
74 };
75 
76 const int8_t CodechalVdencHevcStateG11::m_lowdelayDeltaFrmszI[][8] = {
77     { 0,  0, -8, -12, -16, -20, -28, -36 },
78     { 0,  0, -4, -8, -12,  -16, -24, -32 },
79     { 4,  2,  0, -1, -3,  -8, -16, -24 },
80     { 8,  4,  2,  0, -1,  -4,  -8, -16 },
81     { 20, 16,  4,  0, -1,  -4,  -8, -16 },
82     { 24, 20, 16,  8,  4,   0,  -4, -8 },
83     { 28, 24, 20, 16,  8,   4,  0, -8 },
84     { 32, 24, 20, 16, 8,   4,   0, -4 },
85     { 64, 48, 28, 20, 16,  12,  8,  4 },
86 };
87 
88 const int8_t CodechalVdencHevcStateG11::m_lowdelayDeltaFrmszP[][8] = {
89     { -8,  -24, -32, -40, -44, -48, -52, -80 },
90     { -8,  -16, -32, -40, -40,  -44, -44, -56 },
91     { 0,    0,  -12, -20, -24,  -28, -32, -36 },
92     { 8,   4,  0,   0,    -8,   -16,  -24, -32 },
93     { 32,  16,  8, 4,    -4,   -8,  -16,  -20 },
94     { 36,  24,  16, 8,    4,    -2,  -4, -8 },
95     { 40, 36, 24,   20, 16,  8,  0, -8 },
96     { 48, 40, 28,  24, 20,  12,  0, -4 },
97     { 64, 48, 28, 20, 16,  12,  8,  4 },
98 };
99 
100 const int8_t CodechalVdencHevcStateG11::m_lowdelayDeltaFrmszB[][8] = {
101     { 0, -4, -8, -16, -24, -32, -40, -48 },
102     { 1,  0, -4, -8, -16,  -24, -32, -40 },
103     { 4,  2,  0, -1, -3,  -8, -16, -24 },
104     { 8,  4,  2,  0, -1,  -4,  -8, -16 },
105     { 20, 16,  4,  0, -1,  -4,  -8, -16 },
106     { 24, 20, 16,  8,  4,   0,  -4, -8 },
107     { 28, 24, 20, 16,  8,   4,  0, -8 },
108     { 32, 24, 20, 16, 8,   4,   0, -4 },
109     { 64, 48, 28, 20, 16,  12,  8,  4 },
110 };
111 
112 const uint8_t m_qpAdaptiveWeight[52] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
113                                      7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
114                                      8, 8, 8, 9, 9, 10, 11, 12, 13, 14,
115                                      16, 17, 18, 20, 21, 23, 24, 26, 28, 30,
116                                      32, 34, 36, 38, 40, 42, 44, 46, 48, 50,
117                                      50, 50 };
118 const uint8_t m_boostTable[52] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119                                      3, 3, 3, 3, 3, 4, 4, 5, 5, 5,
120                                      6, 6, 6, 7, 7, 8, 8, 8, 9, 9,
121                                      9, 10,10,10,11,11,11,11,11,11,
122                                      11,11,12,12,12,12,12,12,12,12,12,12 };
123 
124 const uint32_t CodechalVdencHevcStateG11::m_hucConstantData[]  = {
125     0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c, 0x012c012c, 0x012c012c,
126     0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00640064,
127     0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
128     0x00640064, 0x00640064, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c,
129     0x012c012c, 0x012c012c, 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8,
130     0x00c800c8, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
131     0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x503c1e04, 0xffc88c78, 0x3c1e0400, 0xc88c7850,
132     0x140200ff, 0xa0824628, 0x0000ffc8, 0x00000000, 0x04030302, 0x00000000, 0x03030200, 0x0000ff04,
133     0x02020000, 0xffff0303, 0x01000000, 0xff020202, 0x0000ffff, 0x02020100, 0x00fffffe, 0x01010000,
134     0xfffffe02, 0x010000ff, 0xfefe0201, 0x0000ffff, 0xfe010100, 0x00fffffe, 0x01010000, 0x00000000,
135     0x03030200, 0x00000004, 0x03020000, 0x00ff0403, 0x02000000, 0xff030302, 0x000000ff, 0x02020201,
136     0x00ffffff, 0x02010000, 0xfffffe02, 0x01000000, 0xfffe0201, 0x0000ffff, 0xfe020101, 0x00fffffe,
137     0x01010000, 0xfffffefe, 0x01000000, 0x00000001, 0x03020000, 0x00000403, 0x02000000, 0xff040303,
138     0x00000000, 0x03030202, 0x0000ffff, 0x02020100, 0xffffff02, 0x01000000, 0xfffe0202, 0x000000ff,
139     0xfe020101, 0x00ffffff, 0x02010100, 0xfffffefe, 0x01000000, 0xfffefe01, 0x000000ff, 0xe0e00101,
140     0xc0d0d0d0, 0xe0e0b0c0, 0xd0d0d0e0, 0xf0f0c0d0, 0xd0e0e0e0, 0x0408d0d0, 0xe8f0f800, 0x1820dce0,
141     0xf8fc0210, 0x2024ecf0, 0x0008101c, 0x2428f8fc, 0x08101418, 0x2830f800, 0x0c14181c, 0x3040fc00,
142     0x0c10141c, 0xe8f80408, 0xc8d0d4e0, 0xf0f8b0c0, 0xccd4d8e0, 0x0000c0c8, 0xd8dce4f0, 0x0408d0d4,
143     0xf0f80000, 0x0808dce8, 0xf0f80004, 0x0810dce8, 0x00080808, 0x0810f8fc, 0x08080808, 0x1010f800,
144     0x08080808, 0x1020fc00, 0x08080810, 0xfc000408, 0xe0e8f0f8, 0x0001d0d8, 0xe8f0f8fc, 0x0204d8e0,
145     0xf8fdff00, 0x0408e8f0, 0xfcff0002, 0x1014f0f8, 0xfcff0004, 0x1418f0f8, 0x00040810, 0x181cf8fc,
146     0x04081014, 0x1820f800, 0x04081014, 0x3040fc00, 0x0c10141c, 0x40300408, 0x80706050, 0x30a0a090,
147     0x70605040, 0xa0a09080, 0x60504030, 0xa0908070, 0x040201a0, 0x18141008, 0x02012420, 0x0a080604,
148     0x01101010, 0x0c080402, 0x10101010, 0x05030201, 0x02010106, 0x00000503, 0xff030201, 0x02010000,
149     0x000000ff, 0xfffefe01, 0xfdfd0100, 0xfb00ffff, 0xfffffefd, 0xfefdfbfa, 0x030201ff, 0x01010605,
150     0x00050302, 0x03020101, 0x010000ff, 0x0000ff02, 0xffff0100, 0xfe0100ff, 0x00ffffff, 0xfffffefc,
151     0xfefcfb00, 0x0101ffff, 0x01050402, 0x04020101, 0x01010000, 0x0000ff02, 0x00ff0101, 0xff000000,
152     0x0100ffff, 0xfffffffe, 0xfffefd00, 0xfcfb00ff, 0x1efffffe, 0x070d0e10, 0x00003207, 0x00000000,
153     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
154     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
155     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
156     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
157     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
158     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
159     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
160     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
161     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
162     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
163     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
164     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
165     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
166     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
167     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
168     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
169     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
170     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
171     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
172     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
173     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
174     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
175     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
176     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
178     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
179     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
180     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
181     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
182     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
183     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
184     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
185     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
186     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
187     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
188     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
189     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
190     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
191     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
192     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
193     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
194     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
195     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
196     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
197     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
198     0x00000000, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
199     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
200     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
201     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
202     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
203     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
204     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
205     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
206     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
207     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
208     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
209     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
210     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
211     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
212     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
213     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
214     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
215     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
216     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
217     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
218     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
219     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
220     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
221     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
222     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
223     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
224     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
225     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
226     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
227     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
228     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
229     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
230     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
231     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
232     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
233     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
234     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
235     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
236     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
237     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff
238 };
239 
GetMaxBtCount()240 uint32_t CodechalVdencHevcStateG11::GetMaxBtCount()
241 {
242     CODECHAL_ENCODE_FUNCTION_ENTER;
243 
244     uint32_t maxBtCount = 0;
245 
246 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
247     auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
248 
249     // DsConversion kernel
250     maxBtCount = 2 * (MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment));
251 #endif
252 
253     // add ME and stream-in later
254     return maxBtCount;
255 }
256 
InitKernelStateMe()257 MOS_STATUS CodechalVdencHevcStateG11::InitKernelStateMe()
258 {
259     CODECHAL_ENCODE_FUNCTION_ENTER;
260 
261     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
262 
263     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
264     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
265 
266     uint32_t kernelSize = m_combinedKernelSize;
267     CODECHAL_KERNEL_HEADER currKrnHeader;
268     CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
269         m_kernelBinary,
270         ENC_ME,
271         0,
272         &currKrnHeader,
273         &kernelSize));
274 
275     auto kernelStatePtr = &m_vdencMeKernelState;
276     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
277         VDENC_ME_P,
278         &kernelStatePtr->KernelParams));
279 
280     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
281         VDENC_ME_P,
282         &m_vdencMeKernelBindingTable));
283 
284     kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
285     kernelStatePtr->KernelParams.pBinary =
286         m_kernelBinary +
287         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
288     kernelStatePtr->KernelParams.iSize = kernelSize;
289 
290     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
291         m_stateHeapInterface,
292         kernelStatePtr->KernelParams.iBTCount,
293         &kernelStatePtr->dwSshSize,
294         &kernelStatePtr->dwBindingTableSize));
295 
296     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
297 
298     return eStatus;
299 }
300 
InitKernelStateStreamIn()301 MOS_STATUS CodechalVdencHevcStateG11::InitKernelStateStreamIn()
302 {
303     CODECHAL_ENCODE_FUNCTION_ENTER;
304     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
305 
306     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
307     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
308 
309     uint32_t kernelSize = m_combinedKernelSize;
310     CODECHAL_KERNEL_HEADER currKrnHeader;
311     CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
312         m_kernelBinary,
313         VDENC_STREAMIN_HEVC,
314         0,
315         &currKrnHeader,
316         &kernelSize));
317 
318     auto kernelStatePtr = &m_vdencStreaminKernelState;
319     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
320         VDENC_STREAMIN_HEVC,
321         &kernelStatePtr->KernelParams));
322 
323     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
324         VDENC_STREAMIN_HEVC,
325         &m_vdencStreaminKernelBindingTable));
326 
327     kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
328     kernelStatePtr->KernelParams.pBinary =
329         m_kernelBinary +
330         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
331     kernelStatePtr->KernelParams.iSize = kernelSize;
332 
333     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
334         m_stateHeapInterface,
335         kernelStatePtr->KernelParams.iBTCount,
336         &kernelStatePtr->dwSshSize,
337         &kernelStatePtr->dwBindingTableSize));
338 
339     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
340 
341     return eStatus;
342 }
343 
InitKernelState()344 MOS_STATUS CodechalVdencHevcStateG11::InitKernelState()
345 {
346     CODECHAL_ENCODE_FUNCTION_ENTER;
347 
348     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
349 
350 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
351     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
352     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateStreamIn());
353 #endif
354 
355     return eStatus;
356 }
357 
DecideEncodingPipeNumber()358 MOS_STATUS CodechalVdencHevcStateG11::DecideEncodingPipeNumber()
359 {
360     CODECHAL_ENCODE_FUNCTION_ENTER;
361 
362     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
363 
364     m_numPipePre = m_numPipe;
365     m_numPipe = m_numVdbox;
366 
367     uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
368     uint8_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
369 
370     CODECHAL_ENCODE_VERBOSEMESSAGE("Tile Columns = %d, Tile Rows = %d.", numTileColumns, numTileRows);
371 
372     // Only support 1 colomn or 1 row when only have 1 VDBOX
373     if (m_numVdbox <= 1 && numTileRows > 1 && numTileColumns > 1)
374     {
375         CODECHAL_ENCODE_ASSERTMESSAGE("Only 1 VDBOX detected, and Gen11 only support 1xN or Nx1 tiles for single pipe!");
376         return MOS_STATUS_PLATFORM_NOT_SUPPORTED;
377     }
378 
379     if (numTileColumns > m_numPipe)
380     {
381         m_numPipe = 1;
382     }
383 
384     if (numTileColumns < m_numPipe)
385     {
386         if (numTileColumns >= 1 && numTileColumns <= 4)
387         {
388             m_numPipe = numTileColumns;
389         }
390         else
391         {
392             m_numPipe = 1;  // invalid tile column test cases and switch back to the single VDBOX mode
393         }
394     }
395 
396     m_useVirtualEngine = true;  // always use virtual engine interface for single pipe and scalability mode
397 
398     m_numUsedVdbox       = m_numPipe;
399     m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
400 
401     if (m_scalabilityState)
402     {
403         // Create/ re-use a GPU context with 2 pipes
404         m_scalabilityState->ucScalablePipeNum = m_numPipe;
405     }
406 
407     CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d, decided pipe num = %d.", m_numVdbox, m_numPipe);
408 
409     return eStatus;
410 }
411 
CheckSupportedFormat(PMOS_SURFACE surface)412 bool CodechalVdencHevcStateG11::CheckSupportedFormat(PMOS_SURFACE surface)
413 {
414     CODECHAL_ENCODE_FUNCTION_ENTER;
415 
416     bool isColorFormatSupported = false;
417 
418     if (nullptr == surface)
419     {
420         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
421         return isColorFormatSupported;
422     }
423 
424     switch (surface->Format)
425     {
426     case Format_NV12:
427     case Format_NV21:
428     case Format_P010:       // Planar 4:2:0
429     case Format_YUY2:
430     case Format_YUYV:
431     case Format_YVYU:
432     case Format_UYVY:
433     case Format_VYUY:
434     case Format_A8R8G8B8:
435     case Format_A8B8G8R8:
436     case Format_R10G10B10A2:// Packed RGB 4:4:4
437     case Format_B10G10R10A2:// Packed RGB 4:4:4
438     case Format_AYUV:
439     case Format_Y410:       // Packed 4:4:4
440         isColorFormatSupported = true;
441         break;
442     case Format_Y210:       // Packed 4:2:2
443         isColorFormatSupported = surface->TileType == MOS_TILE_Y;
444         break;
445     default:
446         CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
447         break;
448     }
449 
450     return isColorFormatSupported;
451 }
452 
PlatformCapabilityCheck()453 MOS_STATUS CodechalVdencHevcStateG11::PlatformCapabilityCheck()
454 {
455     CODECHAL_ENCODE_FUNCTION_ENTER;
456 
457     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
458 
459     CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
460 
461     if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
462     {
463         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
464             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
465     }
466 
467     if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_8K_PIC_WIDTH * ENCODE_HEVC_MAX_8K_PIC_HEIGHT)
468     {
469         eStatus = MOS_STATUS_INVALID_PARAMETER;
470         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 8k not supported");
471     }
472 
473     if (m_hevcSeqParams->SliceSizeControl && m_frameWidth * m_frameHeight < ENCODE_HEVC_MIN_DSS_PIC_WIDTH * ENCODE_HEVC_MIN_DSS_PIC_HEIGHT)
474     {
475         eStatus = MOS_STATUS_INVALID_PARAMETER;
476         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "DSS is not supported when frame resolution less than 320p");
477     }
478 
479     if (m_hevcSeqParams->ParallelBRC)
480     {
481         eStatus = MOS_STATUS_INVALID_PARAMETER;
482         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Parallel BRC is not supported on VDENC");
483     }
484 
485     if (m_hevcSeqParams->bit_depth_luma_minus8 >= 4 || m_hevcSeqParams->bit_depth_chroma_minus8 >= 4)
486     {
487         eStatus = MOS_STATUS_INVALID_PARAMETER;
488         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "12bit encoding is not supported on VDENC");
489     }
490 
491     if (m_hevcSeqParams->chroma_format_idc == 2)
492     {
493         eStatus = MOS_STATUS_INVALID_PARAMETER;
494         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "422 recon format encoding is not supported on HEVC VDENC");
495     }
496 
497     if (m_vdencEnabled && m_chromaFormat == HCP_CHROMA_FORMAT_YUV444 && m_hevcSeqParams->TargetUsage == 7)
498     {
499         CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n");
500         m_hevcSeqParams->TargetUsage = 4;
501     }
502 
503     bool oneLcuInTile = false;
504     uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
505     for (auto i = 0; i < numTileColumns; i++)
506     {
507         if (m_hevcPicParams->tile_column_width[i] == 1)
508         {
509             oneLcuInTile = true;
510             break;
511         }
512     }
513 
514     //Commented out in order to enable height of 64 pixels for Row tile on PO silicon for ICL.
515     /* uint16_t numTileTows = pHevcPicParams->num_tile_rows_minus1 + 1;
516     for (auto i = 0; i < numTileTows; i++)
517     {
518         if (pHevcPicParams->tile_row_height[i] == 1)
519         {
520             oneLcuInTile = true;
521             break;
522         }
523     }*/
524 
525     if (oneLcuInTile)
526     {
527         eStatus = MOS_STATUS_INVALID_PARAMETER;
528         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Tile width/ height of 1 LCU is not supported");
529     }
530 
531     // TU configuration for RDOQ
532     if (m_hevcRdoqEnabled)
533     {
534         m_hevcRdoqEnabled = (m_hevcSeqParams->TargetUsage < 7);
535     }
536 
537     // set RDOQ Intra blocks Threshold for Gen11+
538     m_rdoqIntraTuThreshold = 0;
539     if (m_hevcRdoqEnabled)
540     {
541         if (1 == m_hevcSeqParams->TargetUsage)
542         {
543             m_rdoqIntraTuThreshold = 0xffff;
544         }
545         else if (4 == m_hevcSeqParams->TargetUsage)
546         {
547             m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
548             m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
549         }
550     }
551 
552     return eStatus;
553 }
554 
SetStreaminDataPerLcu(PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)555 void CodechalVdencHevcStateG11::SetStreaminDataPerLcu(
556     PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
557     void* streaminData)
558 {
559     CODECHAL_ENCODE_FUNCTION_ENTER;
560     PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G11 data = (PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G11)streaminData;
561     if (streaminParams->setQpRoiCtrl)
562     {
563         if (m_vdencNativeROIEnabled || m_brcAdaptiveRegionBoostEnable)
564         {
565             data->DW0.RoiCtrl = streaminParams->roiCtrl;
566         }
567         else
568         {
569             data->DW7.QpEnable = 0xf;
570             data->DW14.ForceQp_0 = streaminParams->forceQp[0];
571             data->DW14.ForceQp_1 = streaminParams->forceQp[1];
572             data->DW14.ForceQp_2 = streaminParams->forceQp[2];
573             data->DW14.ForceQp_3 = streaminParams->forceQp[3];
574         }
575     }
576     else
577     {
578         data->DW0.MaxTuSize = streaminParams->maxTuSize;
579         data->DW0.MaxCuSize = streaminParams->maxCuSize;
580         data->DW0.NumImePredictors = streaminParams->numImePredictors;
581         data->DW0.PuTypeCtrl = streaminParams->puTypeCtrl;
582         data->DW6.NumMergeCandidateCu64x64 = streaminParams->numMergeCandidateCu64x64;
583         data->DW6.NumMergeCandidateCu32x32 = streaminParams->numMergeCandidateCu32x32;
584         data->DW6.NumMergeCandidateCu16x16 = streaminParams->numMergeCandidateCu16x16;
585         data->DW6.NumMergeCandidateCu8x8 = streaminParams->numMergeCandidateCu8x8;
586     }
587 }
588 
AllocatePakResources()589 MOS_STATUS CodechalVdencHevcStateG11::AllocatePakResources()
590 {
591     CODECHAL_ENCODE_FUNCTION_ENTER;
592 
593     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
594 
595     uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
596     uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
597     m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
598 
599     const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE);        //assume smallest LCU to get max width
600     const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE);      //assume smallest LCU to get max height
601 
602     MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
603     MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
604     hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
605     hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
606     // We should move the buffer allocation to picture level if the size is dependent on LCU size
607     hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
608     hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
609     hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
610 
611     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
612     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
613     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
614     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
615     allocParamsForBufferLinear.Format = Format_Buffer;
616 
617     // Deblocking Filter Row Store Scratch data surface
618     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
619         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
620         &hcpBufSizeParam);
621 
622     if (eStatus != MOS_STATUS_SUCCESS)
623     {
624         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
625         return eStatus;
626     }
627 
628     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
629     allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
630 
631     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
632         m_osInterface,
633         &allocParamsForBufferLinear,
634         &m_resDeblockingFilterRowStoreScratchBuffer);
635 
636     if (eStatus != MOS_STATUS_SUCCESS)
637     {
638         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
639         return eStatus;
640     }
641 
642     // Deblocking Filter Tile Row Store Scratch data surface
643     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
644         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
645         &hcpBufSizeParam);
646 
647     if (eStatus != MOS_STATUS_SUCCESS)
648     {
649         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
650         return eStatus;
651     }
652 
653     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
654     allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
655 
656     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
657         m_osInterface,
658         &allocParamsForBufferLinear,
659         &m_resDeblockingFilterTileRowStoreScratchBuffer);
660 
661     if (eStatus != MOS_STATUS_SUCCESS)
662     {
663         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
664         return eStatus;
665     }
666 
667     // Deblocking Filter Column Row Store Scratch data surface
668     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
669         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
670         &hcpBufSizeParam);
671 
672     if (eStatus != MOS_STATUS_SUCCESS)
673     {
674         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
675         return eStatus;
676     }
677 
678     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
679     allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
680 
681     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
682         m_osInterface,
683         &allocParamsForBufferLinear,
684         &m_resDeblockingFilterColumnRowStoreScratchBuffer);
685 
686     if (eStatus != MOS_STATUS_SUCCESS)
687     {
688         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
689         return eStatus;
690     }
691 
692     // Metadata Line buffer
693     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
694         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
695         &hcpBufSizeParam);
696 
697     if (eStatus != MOS_STATUS_SUCCESS)
698     {
699         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
700         return eStatus;
701     }
702 
703     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
704     allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
705 
706     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
707         m_osInterface,
708         &allocParamsForBufferLinear,
709         &m_resMetadataLineBuffer);
710 
711     if (eStatus != MOS_STATUS_SUCCESS)
712     {
713         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
714         return eStatus;
715     }
716 
717     // Metadata Tile Line buffer
718     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
719         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
720         &hcpBufSizeParam);
721 
722     if (eStatus != MOS_STATUS_SUCCESS)
723     {
724         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
725         return eStatus;
726     }
727 
728     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
729     allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
730 
731     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
732         m_osInterface,
733         &allocParamsForBufferLinear,
734         &m_resMetadataTileLineBuffer);
735 
736     if (eStatus != MOS_STATUS_SUCCESS)
737     {
738         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
739         return eStatus;
740     }
741 
742     // Metadata Tile Column buffer
743     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
744         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
745         &hcpBufSizeParam);
746 
747     if (eStatus != MOS_STATUS_SUCCESS)
748     {
749         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
750         return eStatus;
751     }
752 
753     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
754     allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
755 
756     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
757         m_osInterface,
758         &allocParamsForBufferLinear,
759         &m_resMetadataTileColumnBuffer);
760 
761     if (eStatus != MOS_STATUS_SUCCESS)
762     {
763         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
764         return eStatus;
765     }
766 
767     // SAO Line buffer
768     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
769         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
770         &hcpBufSizeParam);
771 
772     if (eStatus != MOS_STATUS_SUCCESS)
773     {
774         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
775         return eStatus;
776     }
777 
778     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
779     allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
780 
781     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
782         m_osInterface,
783         &allocParamsForBufferLinear,
784         &m_resSaoLineBuffer);
785 
786     if (eStatus != MOS_STATUS_SUCCESS)
787     {
788         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
789         return eStatus;
790     }
791 
792     // SAO Tile Line buffer
793     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
794         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
795         &hcpBufSizeParam);
796 
797     if (eStatus != MOS_STATUS_SUCCESS)
798     {
799         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
800         return eStatus;
801     }
802 
803     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
804     allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
805 
806     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
807         m_osInterface,
808         &allocParamsForBufferLinear,
809         &m_resSaoTileLineBuffer);
810 
811     if (eStatus != MOS_STATUS_SUCCESS)
812     {
813         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
814         return eStatus;
815     }
816 
817     // SAO Tile Column buffer
818     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
819         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
820         &hcpBufSizeParam);
821 
822     if (eStatus != MOS_STATUS_SUCCESS)
823     {
824         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
825         return eStatus;
826     }
827 
828     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
829     allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
830 
831     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
832         m_osInterface,
833         &allocParamsForBufferLinear,
834         &m_resSaoTileColumnBuffer);
835 
836     if (eStatus != MOS_STATUS_SUCCESS)
837     {
838         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
839         return eStatus;
840     }
841 
842     // Lcu ILDB StreamOut buffer
843     // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
844     allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
845     allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
846 
847     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
848         m_osInterface,
849         &allocParamsForBufferLinear,
850         &m_resLcuIldbStreamOutBuffer);
851 
852     if (eStatus != MOS_STATUS_SUCCESS)
853     {
854         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
855         return eStatus;
856     }
857 
858     // Lcu Base Address buffer
859     // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
860     // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
861     // Align to page for HUC requirement
862     uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
863     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
864     allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
865 
866     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
867         m_osInterface,
868         &allocParamsForBufferLinear,
869         &m_resLcuBaseAddressBuffer);
870 
871     if (eStatus != MOS_STATUS_SUCCESS)
872     {
873         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
874         return eStatus;
875     }
876 
877     // SAO StreamOut buffer
878     uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
879     //extra added size to cover tile enabled case, per tile width aligned to 4.  20: max tile column No.
880     size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
881     allocParamsForBufferLinear.dwBytes = size;
882     allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
883 
884     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
885         m_osInterface,
886         &allocParamsForBufferLinear,
887         &m_resSaoStreamOutBuffer);
888 
889     if (eStatus != MOS_STATUS_SUCCESS)
890     {
891         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
892         return eStatus;
893     }
894 
895     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
896     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
897     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
898     allocParamsForBufferLinear.Format = Format_Buffer;
899 
900     // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
901     size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE);  //Each tile has 8 cache size bytes of data, Align to page is HuC requirement
902     allocParamsForBufferLinear.dwBytes = size;
903     allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
904 
905     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
906         m_osInterface,
907         &allocParamsForBufferLinear,
908         &m_resFrameStatStreamOutBuffer),
909         "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
910 
911     // PAK Statistics buffer
912     size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
913     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
914         m_standard, size, 1, pakStats, "pakStats"));
915 
916     // Slice Count buffer 1 DW = 4 Bytes
917     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
918     allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
919 
920     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
921         m_osInterface,
922         &allocParamsForBufferLinear,
923         &m_sliceCountBuffer),
924         "Failed to create VDENC Slice Count Buffer");
925 
926     // VDEncMode Timer buffer 1 DW = 4 Bytes
927     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
928     allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
929 
930     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
931         m_osInterface,
932         &allocParamsForBufferLinear,
933         &m_vdencModeTimerBuffer),
934         "Failed to create VDEncMode Timer Buffer");
935 
936     uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
937     uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
938     uint32_t frameWidthInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
939     uint32_t frameHeightInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
940 
941     // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
942     // One CU has 16-byte. But, each tile needs to be aliged to the cache line
943     size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
944     allocParamsForBufferLinear.dwBytes = size;
945     allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
946 
947     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
948                                                       m_osInterface,
949                                                       &allocParamsForBufferLinear,
950                                                       &m_resPakcuLevelStreamoutData.sResource));
951                                                       m_resPakcuLevelStreamoutData.dwSize = size;
952     CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
953 
954     // these 2 buffers are not used so far, but put the correct size calculation here
955     // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
956     // One CU has 16-byte. But, each tile needs to be aliged to the cache line
957     //size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
958 
959     // PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
960     // one LCU has one cache line. Use CU as LCU during creation
961     //size = frameWidthInLcus * frameHeightInLcus * CODECHAL_CACHELINE_SIZE;
962 
963     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
964     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
965     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
966     allocParamsForBufferLinear.Format = Format_Buffer;
967 
968     // Allocate SSE Source Pixel Row Store Buffer
969     m_sizeOfSseSrcPixelRowStoreBufferPerLcu = CODECHAL_CACHELINE_SIZE * (4 + 4) << 1;
970     allocParamsForBufferLinear.dwBytes      = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_widthAlignedMaxLcu + 3);
971     allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
972 
973     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
974                                                   m_osInterface,
975                                                   &allocParamsForBufferLinear,
976                                                   &m_resSseSrcPixelRowStoreBuffer),
977         "Failed to create SseSrcPixelRowStoreBuffer");
978 
979     //HCP scalability Sync buffer
980     allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
981     allocParamsForBufferLinear.pBufName = "GEN11 HCP scalability Sync buffer ";
982 
983     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
984                                                   m_osInterface,
985                                                   &allocParamsForBufferLinear,
986                                                   &m_resHcpScalabilitySyncBuffer.sResource),
987         "Failed to create GEN11 HCP scalability Sync Buffer");
988 
989     // create the tile coding state parameters
990     m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)MOS_AllocAndZeroMemory(
991         sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)* m_maxTileNumber);
992 
993     if (m_enableHWSemaphore)
994     {
995         // Create the HW sync objects which will be used by each reference frame and BRC in GEN11
996         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
997         allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
998 
999         MOS_LOCK_PARAMS lockFlagsWriteOnly;
1000         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1001         lockFlagsWriteOnly.WriteOnly = 1;
1002 
1003         uint32_t* data = nullptr;
1004 
1005         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1006         {
1007             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1008                                                           m_osInterface,
1009                                                           &allocParamsForBufferLinear,
1010                                                           &m_refSync[i].resSemaphoreMem.sResource),
1011                 "Failed to create HW Semaphore Memory.");
1012             m_refSync[i].resSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
1013 
1014             CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1015                                                 m_osInterface,
1016                                                 &m_refSync[i].resSemaphoreMem.sResource,
1017                                                 &lockFlagsWriteOnly));
1018 
1019             *data = 1;
1020 
1021             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1022                 m_osInterface,
1023                 &m_refSync[i].resSemaphoreMem.sResource));
1024         }
1025 
1026     }
1027 
1028     // create the HW semaphore buffer to sync up between VDBOXes. This is used to WA HW internal lock issue
1029     if (m_enableVdBoxHWSemaphore)
1030     {
1031         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1032         allocParamsForBufferLinear.pBufName = "VDBOX SemaphoreMemory";
1033 
1034         MOS_LOCK_PARAMS lockFlagsWriteOnly;
1035         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1036         lockFlagsWriteOnly.WriteOnly = 1;
1037 
1038         uint32_t* data = nullptr;
1039 
1040         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1041         {
1042             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1043                                                           m_osInterface,
1044                                                           &allocParamsForBufferLinear,
1045                                                           &m_resVdBoxSemaphoreMem[i].sResource),
1046                 "Failed to create VDBOX HW Semaphore Memory.");
1047 
1048             CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1049                                                 m_osInterface,
1050                                                 &m_resVdBoxSemaphoreMem[i].sResource,
1051                                                 &lockFlagsWriteOnly));
1052 
1053             *data = 1;
1054 
1055             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1056                 m_osInterface,
1057                 &m_resVdBoxSemaphoreMem[i].sResource));
1058         }
1059     }
1060 
1061     uint32_t* data = nullptr;
1062     MOS_LOCK_PARAMS lockFlagsWriteOnly;
1063     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1064     lockFlagsWriteOnly.WriteOnly = 1;
1065 
1066     allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1067     allocParamsForBufferLinear.pBufName = "Pipe Start SemaphoreMemory";
1068 
1069     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1070                                                   m_osInterface,
1071                                                   &allocParamsForBufferLinear,
1072                                                   &m_resPipeStartSemaMem),
1073         "Cannot create Scalability pipe start sync HW semaphore.");
1074 
1075     CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1076                                         m_osInterface,
1077                                         &m_resPipeStartSemaMem,
1078                                         &lockFlagsWriteOnly));
1079 
1080     *data = 0;
1081 
1082     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1083         m_osInterface,
1084         &m_resPipeStartSemaMem));
1085 
1086 
1087     // SyncSemaMem
1088     data                                = nullptr;
1089     allocParamsForBufferLinear.dwBytes  = sizeof(uint32_t);
1090     allocParamsForBufferLinear.pBufName = "SyncSemaphoreMemory";
1091 
1092     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1093                                                   m_osInterface,
1094                                                   &allocParamsForBufferLinear,
1095                                                   &m_resSyncSemaMem),
1096         "Cannot create sync HW semaphore.");
1097 
1098     CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1099                                         m_osInterface,
1100                                         &m_resSyncSemaMem,
1101                                         &lockFlagsWriteOnly));
1102 
1103     *data = 0;
1104 
1105     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1106         m_osInterface,
1107         &m_resSyncSemaMem));
1108 
1109 
1110     data                                = nullptr;
1111     allocParamsForBufferLinear.dwBytes  = sizeof(uint32_t);
1112     allocParamsForBufferLinear.pBufName = "BrcPakSemaphoreMemory";
1113 
1114     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1115                                                   m_osInterface,
1116                                                   &allocParamsForBufferLinear,
1117                                                   &m_resBrcPakSemaphoreMem.sResource),
1118         "Failed to create BRC PAK Semaphore Memory.");
1119 
1120     CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1121                                         m_osInterface,
1122                                         &m_resBrcPakSemaphoreMem.sResource,
1123                                         &lockFlagsWriteOnly));
1124 
1125     *data = 0;
1126 
1127     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1128         m_osInterface,
1129         &m_resBrcPakSemaphoreMem.sResource));
1130 
1131     if (m_hucPakStitchEnabled)
1132     {
1133         uint8_t* data;
1134 
1135         // Pak stitch DMEM
1136         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG11), CODECHAL_CACHELINE_SIZE);
1137         allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
1138         auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
1139         for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1140         {
1141             for (auto i = 0; i < numOfPasses; i++)
1142             {
1143                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1144                     m_osInterface->pfnAllocateResource(
1145                         m_osInterface,
1146                         &allocParamsForBufferLinear,
1147                         &m_resHucPakStitchDmemBuffer[k][i]),
1148                     "Failed to allocate PAK Stitch Dmem Buffer.");
1149 
1150                 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1151                 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1152                 lockFlagsWriteOnly.WriteOnly = 1;
1153 
1154                 data = (uint8_t*)m_osInterface->pfnLockResource(
1155                     m_osInterface,
1156                     &m_resHucPakStitchDmemBuffer[k][i],
1157                     &lockFlagsWriteOnly);
1158 
1159                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1160 
1161                 MOS_ZeroMemory(
1162                     data,
1163                     allocParamsForBufferLinear.dwBytes);
1164 
1165                 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
1166             }
1167         }
1168 
1169         // BRC Data Buffer
1170         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1171         allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
1172 
1173         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1174             m_osInterface->pfnAllocateResource(
1175                 m_osInterface,
1176                 &allocParamsForBufferLinear,
1177                 &m_resBrcDataBuffer),
1178             "Failed to allocate BRC Data Buffer Buffer.");
1179 
1180         MOS_LOCK_PARAMS lockFlags;
1181         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1182         lockFlags.WriteOnly = 1;
1183 
1184         data = (uint8_t*)m_osInterface->pfnLockResource(
1185             m_osInterface,
1186             &m_resBrcDataBuffer,
1187             &lockFlags);
1188 
1189         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1190 
1191         MOS_ZeroMemory(
1192             data,
1193             allocParamsForBufferLinear.dwBytes);
1194 
1195         m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
1196     }
1197 
1198     if (m_numDelay)
1199     {
1200         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1201         allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1202 
1203         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1204             m_osInterface,
1205             &allocParamsForBufferLinear,
1206             &m_resDelayMinus), "Failed to allocate delay minus memory.");
1207 
1208         uint8_t* data;
1209         MOS_LOCK_PARAMS lockFlags;
1210         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1211         lockFlags.WriteOnly = 1;
1212         data = (uint8_t*)m_osInterface->pfnLockResource(
1213             m_osInterface,
1214             &m_resDelayMinus,
1215             &lockFlags);
1216 
1217         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1218 
1219         MOS_ZeroMemory(data, sizeof(uint32_t));
1220 
1221         m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1222     }
1223 
1224     return eStatus;
1225 }
1226 
FreePakResources()1227 MOS_STATUS CodechalVdencHevcStateG11::FreePakResources()
1228 {
1229     CODECHAL_ENCODE_FUNCTION_ENTER;
1230 
1231     m_osInterface->pfnFreeResource(m_osInterface, &m_resSseSrcPixelRowStoreBuffer);
1232     m_osInterface->pfnFreeResource(m_osInterface, &m_resHcpScalabilitySyncBuffer.sResource);
1233     m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
1234 
1235     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1236     {
1237         m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1238     }
1239     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1240     {
1241         m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1242     }
1243     m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1244 
1245     MOS_FreeMemory(m_tileParams);
1246 
1247     // command buffer for VE, allocated in MOS_STATUS CodechalEncodeHevcBase::VerifyCommandBufferSize()
1248     for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1249     {
1250         for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1251         {
1252             for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1253             {
1254                 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1255 
1256                 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
1257                 {
1258                     if (cmdBuffer->pCmdBase)
1259                     {
1260                         m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1261                     }
1262                     m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1263                 }
1264             }
1265         }
1266     }
1267 
1268     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1269     {
1270         auto sync = &m_refSync[i];
1271 
1272         if (!Mos_ResourceIsNull(&sync->resSyncObject))
1273         {
1274             // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1275             if (sync->uiSemaphoreObjCount || sync->bInUsed)
1276             {
1277                 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1278                 syncParams.GpuContext = m_renderContext;
1279                 syncParams.presSyncResource = &sync->resSyncObject;
1280                 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1281                 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1282             }
1283         }
1284         m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1285     }
1286     m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcPakSemaphoreMem.sResource);
1287     m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem);
1288     m_osInterface->pfnFreeResource(m_osInterface, &m_resSyncSemaMem);
1289 
1290     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1291     {
1292         m_osInterface->pfnFreeResource(m_osInterface, &m_resVdBoxSemaphoreMem[i].sResource);
1293     }
1294 
1295     if (m_hucPakStitchEnabled)
1296     {
1297         m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1298         auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
1299         for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1300         {
1301             for (auto i = 0; i < numOfPasses; i++)
1302             {
1303                 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
1304             }
1305         }
1306     }
1307 
1308     if (m_numDelay)
1309     {
1310         m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
1311     }
1312 
1313     return CodechalVdencHevcState::FreePakResources();
1314 }
1315 
AllocateEncResources()1316 MOS_STATUS CodechalVdencHevcStateG11::AllocateEncResources()
1317 {
1318     CODECHAL_ENCODE_FUNCTION_ENTER;
1319 
1320     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1321 
1322     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateEncResources());
1323 
1324      if (m_hmeSupported)
1325     {
1326          HmeParams hmeParams;
1327 
1328         MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1329         hmeParams.b4xMeDistortionBufferSupported = true;
1330         hmeParams.ps16xMeMvDataBuffer            = &m_s16XMeMvDataBuffer;
1331         hmeParams.ps32xMeMvDataBuffer            = &m_s32XMeMvDataBuffer;
1332         hmeParams.ps4xMeDistortionBuffer         = &m_s4XMeDistortionBuffer;
1333         hmeParams.ps4xMeMvDataBuffer             = &m_s4XMeMvDataBuffer;
1334         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources4xME(&hmeParams));
1335         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources16xME(&hmeParams));
1336         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources32xME(&hmeParams));
1337     }
1338 
1339     return eStatus;
1340 }
1341 
FreeEncResources()1342 MOS_STATUS CodechalVdencHevcStateG11::FreeEncResources()
1343 {
1344     CODECHAL_ENCODE_FUNCTION_ENTER;
1345     // Free ME resources
1346     HmeParams hmeParams;
1347 
1348     MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1349     hmeParams.ps16xMeMvDataBuffer    = &m_s16XMeMvDataBuffer;
1350     hmeParams.ps32xMeMvDataBuffer    = &m_s32XMeMvDataBuffer;
1351     hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1352     hmeParams.ps4xMeMvDataBuffer     = &m_s4XMeMvDataBuffer;
1353     DestroyMEResources(&hmeParams);
1354 
1355     return CodechalVdencHevcState::FreeEncResources();
1356 }
1357 
AllocateBrcResources()1358 MOS_STATUS CodechalVdencHevcStateG11::AllocateBrcResources()
1359 {
1360     CODECHAL_ENCODE_FUNCTION_ENTER;
1361 
1362     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateBrcResources());
1363     // initiate allocation paramters and lock flags
1364     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1365     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1366     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1367     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1368     allocParamsForBufferLinear.Format = Format_Buffer;
1369     // VDEnc Group3 batch buffer (input for HuC FW)
1370     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencGroup3BatchBufferSize, CODECHAL_PAGE_SIZE);
1371     allocParamsForBufferLinear.pBufName = "VDENC Group3 Batch Buffer";
1372 
1373     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1374     {
1375         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1376         {
1377             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1378                 m_osInterface,
1379                 &allocParamsForBufferLinear,
1380                 &m_vdencGroup3BatchBuffer[k][i]),
1381                 "Failed to allocate VDENC Group 3 Batch Buffer");
1382         }
1383     }
1384     return MOS_STATUS_SUCCESS;
1385 }
1386 
FreeBrcResources()1387 MOS_STATUS CodechalVdencHevcStateG11::FreeBrcResources()
1388 {
1389     CODECHAL_ENCODE_FUNCTION_ENTER;
1390 
1391     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::FreeBrcResources());
1392 
1393     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1394     {
1395         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1396         {
1397             m_osInterface->pfnFreeResource(m_osInterface, &m_vdencGroup3BatchBuffer[k][i]);
1398         }
1399     }
1400     return MOS_STATUS_SUCCESS;
1401 }
1402 
InitializePicture(const EncoderParams & params)1403 MOS_STATUS CodechalVdencHevcStateG11::InitializePicture(const EncoderParams& params)
1404 {
1405     CODECHAL_ENCODE_FUNCTION_ENTER;
1406 
1407     // common initilization
1408     return CodechalVdencHevcState::InitializePicture(params);
1409 }
1410 
SetPictureStructs()1411 MOS_STATUS CodechalVdencHevcStateG11::SetPictureStructs()
1412 {
1413     CODECHAL_ENCODE_FUNCTION_ENTER;
1414 
1415     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1416 
1417     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetPictureStructs());
1418 
1419     if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
1420         (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
1421     {
1422         if (Format_YUY2 != m_reconSurface.Format)
1423         {
1424             eStatus = MOS_STATUS_INVALID_PARAMETER;
1425             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface format is not correct!");
1426         }
1427         else if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
1428             m_reconSurface.dwWidth < m_oriFrameWidth / 2)
1429         {
1430             eStatus = MOS_STATUS_INVALID_PARAMETER;
1431             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface allocation size is not correct!");
1432         }
1433         else
1434         {
1435             // update Recon surface to Variant format
1436             CodechalEncodeHevcBase::UpdateYUY2SurfaceInfo(&m_reconSurface, m_is10BitHevc);
1437         }
1438     }
1439 
1440     return eStatus;
1441 }
1442 
~CodechalVdencHevcStateG11()1443 CodechalVdencHevcStateG11::~CodechalVdencHevcStateG11()
1444 {
1445     CODECHAL_ENCODE_FUNCTION_ENTER;
1446 
1447     if (m_scalabilityState)
1448     {
1449         MOS_FreeMemAndSetNull(m_scalabilityState);
1450     }
1451     //Note: virtual engine interface destroy is done in MOS layer
1452 
1453     CODECHAL_DEBUG_TOOL(
1454         MOS_Delete(m_encodeParState);
1455     )
1456     return;
1457 }
1458 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1459 MOS_STATUS CodechalVdencHevcStateG11::GetStatusReport(
1460     EncodeStatus *encodeStatus,
1461     EncodeStatusReport *encodeStatusReport)
1462 {
1463     CODECHAL_ENCODE_FUNCTION_ENTER;
1464 
1465     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1466 
1467     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1468     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1469 
1470     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpVdencOutputs()));
1471 
1472     if (encodeStatusReport->UsedVdBoxNumber <= 1)
1473     {
1474         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport));
1475         return eStatus;
1476     }
1477 
1478     // In case of CQP, PAK integration kernel is not called, so used tile size record from HW
1479     PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1480 
1481     MOS_LOCK_PARAMS lockFlags;
1482     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1483     lockFlags.ReadOnly = 1;
1484     HCPPakHWTileSizeRecord_G11* tileStatusReport = (HCPPakHWTileSizeRecord_G11*)m_osInterface->pfnLockResource(
1485         m_osInterface,
1486         &tileSizeStatusReport->sResource,
1487         &lockFlags);
1488     CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1489 
1490     encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1491     encodeStatusReport->PanicMode = false;
1492     encodeStatusReport->AverageQp = 0;
1493     encodeStatusReport->QpY = 0;
1494     encodeStatusReport->SuggestedQpYDelta = 0;
1495     encodeStatusReport->NumberPasses = 1;
1496     encodeStatusReport->bitstreamSize = 0;
1497     encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1498     encodeStatusReport->NumberSlices = 0;
1499 
1500     uint32_t* sliceSize = nullptr;
1501 
1502     // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
1503     if (encodeStatus->sliceReport.pSliceSize)
1504     {
1505         sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
1506         CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
1507     }
1508 
1509     uint32_t totalCU = 0, sliceCount = 0;
1510     double sumQp = 0.0;
1511     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1512     {
1513         if (tileStatusReport[i].Length == 0)
1514         {
1515             encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1516             return eStatus;
1517         }
1518 
1519         encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1520         totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1521         sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1522 
1523         if (sliceSize)
1524         {
1525             encodeStatusReport->pSliceSizes   = (uint16_t*)sliceSize;
1526             encodeStatusReport->NumberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile;
1527             uint16_t prevCumulativeSliceSize  = 0;
1528             // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
1529             for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++)
1530             {
1531                 // PAK output the sliceSize at 16DW intervals.
1532                 CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
1533 
1534                 //convert cummulative slice size to individual, first slice may have PPS/SPS,
1535                 uint32_t CurrAccumulatedSliceSize           = sliceSize[sliceCount * 16];
1536                 encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
1537                 prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
1538                 sliceCount++;
1539             }
1540         }
1541     }
1542 
1543     if (sliceSize)
1544     {
1545         encodeStatusReport->SizeOfSliceSizesBuffer  = sizeof(uint16_t) * encodeStatusReport->NumberSlices;
1546         encodeStatusReport->SliceSizeOverflow       = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
1547         m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
1548     }
1549 
1550     CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
1551 
1552     if (encodeStatusReport->bitstreamSize == 0 ||
1553         encodeStatusReport->bitstreamSize >m_bitstreamUpperBound)
1554     {
1555         encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1556         encodeStatusReport->bitstreamSize = 0;
1557         return MOS_STATUS_INVALID_FILE_SIZE;
1558     }
1559 
1560     if (totalCU != 0)
1561     {
1562         encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1563             (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1564     }
1565     else
1566     {
1567         return MOS_STATUS_INVALID_PARAMETER;
1568     }
1569 
1570     if (m_enableTileStitchByHW)
1571     {
1572         // clean-up the tile status report buffer
1573         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
1574         m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
1575         return eStatus;
1576     }
1577 
1578     uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
1579     tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
1580     CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
1581 
1582     PCODEC_REF_LIST currRefList = encodeStatus->encodeStatusReport.pCurrRefList;
1583     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1584     lockFlags.ReadOnly = 1;
1585     uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
1586         m_osInterface,
1587         &currRefList->resBitstreamBuffer,
1588         &lockFlags);
1589     if (bitstream == nullptr)
1590     {
1591         MOS_FreeMemory(tempBsBuffer);
1592         return MOS_STATUS_NULL_POINTER;
1593     }
1594 
1595     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1596     {
1597         uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1598         uint32_t len = tileStatusReport[i].Length;
1599 
1600         MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
1601         bufPtr += len;
1602     }
1603 
1604     MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
1605     MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
1606 
1607     if (bitstream)
1608     {
1609         m_osInterface->pfnUnlockResource(m_osInterface, &currRefList->resBitstreamBuffer);
1610     }
1611 
1612     MOS_FreeMemory(tempBsBuffer);
1613 
1614     if (tileStatusReport)
1615     {
1616         // clean-up the tile status report buffer
1617         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
1618 
1619         m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
1620     }
1621 
1622     return eStatus;
1623 }
1624 
UserFeatureKeyReport()1625 MOS_STATUS CodechalVdencHevcStateG11::UserFeatureKeyReport()
1626 {
1627     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1628 
1629     CODECHAL_ENCODE_FUNCTION_ENTER;
1630 
1631     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::UserFeatureKeyReport());
1632 
1633 #if (_DEBUG || _RELEASE_INTERNAL)
1634     CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
1635     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
1636     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
1637 #endif
1638 
1639     return eStatus;
1640 }
1641 
EncodeKernelFunctions()1642 MOS_STATUS CodechalVdencHevcStateG11::EncodeKernelFunctions()
1643 {
1644     CODECHAL_ENCODE_FUNCTION_ENTER;
1645 
1646     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1647 
1648 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
1649     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1650         m_rawSurfaceToEnc,
1651         CodechalDbgAttr::attrEncodeRawInputSurface,
1652         "SrcSurf")));
1653     auto singleTaskPhaseSupported = m_singleTaskPhaseSupported;    // local variable to save current setting before overwriting
1654 
1655     if (m_16xMeSupported)
1656     {
1657         m_singleTaskPhaseSupported = false;
1658 
1659         CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
1660         MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
1661 
1662         cscScalingKernelParams.bLastTaskInPhaseCSC  =
1663         cscScalingKernelParams.bLastTaskInPhase4xDS = false;
1664         cscScalingKernelParams.bLastTaskInPhase16xDS    = !(m_32xMeSupported || m_hmeEnabled);
1665         cscScalingKernelParams.bLastTaskInPhase32xDS    = !m_hmeEnabled;
1666 
1667         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->SetHevcCscFlagAndRawColor());
1668         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
1669     }
1670 
1671     if (m_b16XMeEnabled)
1672     {
1673         if (m_b32XMeEnabled)
1674         {
1675             //HME_P kernel for 32xME
1676             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_32x));
1677         }
1678 
1679         //HME_P kernel for 16xME
1680         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_16x));
1681 
1682         //StreamIn kernel, 4xME
1683         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x));
1684     }
1685 
1686     // retrieve SingleTaskPhase setting (SAO will need STP enabled setting)
1687     m_singleTaskPhaseSupported = singleTaskPhaseSupported;
1688 
1689     CODECHAL_DEBUG_TOOL(
1690         if (m_hmeEnabled) {
1691             CODECHAL_ME_OUTPUT_PARAMS meOutputParams;
1692 
1693             MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1694             meOutputParams.psMeMvBuffer            = &m_s4XMeMvDataBuffer;
1695             meOutputParams.psMeBrcDistortionBuffer = nullptr;
1696             meOutputParams.psMeDistortionBuffer    = &m_s4XMeDistortionBuffer;
1697             meOutputParams.b16xMeInUse = false;
1698             meOutputParams.b32xMeInUse = false;
1699 
1700             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1701                 &meOutputParams.psMeMvBuffer->OsResource,
1702                 CodechalDbgAttr::attrOutput,
1703                 "MvData",
1704                 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
1705                 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
1706                 CODECHAL_MEDIA_STATE_4X_ME));
1707 
1708             //CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1709             //    &meOutputParams.psMeBrcDistortionBuffer->OsResource,
1710             //    CodechalDbgAttr::attrOutput,
1711             //    "BrcDist",
1712             //    meOutputParams.psMeBrcDistortionBuffer->dwHeight *meOutputParams.psMeBrcDistortionBuffer->dwPitch,
1713             //    CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4), 8) : 0,
1714             //    CODECHAL_MEDIA_STATE_4X_ME));
1715             if (meOutputParams.psMeDistortionBuffer)
1716             {
1717                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1718                     &meOutputParams.psMeDistortionBuffer->OsResource,
1719                     CodechalDbgAttr::attrOutput,
1720                     "MeDist",
1721                     meOutputParams.psMeDistortionBuffer->dwHeight *meOutputParams.psMeDistortionBuffer->dwPitch,
1722                     CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
1723                     CODECHAL_MEDIA_STATE_4X_ME));
1724             }
1725             if (m_b16XMeEnabled)
1726             {
1727                 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1728                 meOutputParams.psMeMvBuffer            = &m_s16XMeMvDataBuffer;
1729                 meOutputParams.psMeBrcDistortionBuffer = nullptr;
1730                 meOutputParams.psMeDistortionBuffer = nullptr;
1731                 meOutputParams.b16xMeInUse = true;
1732                 meOutputParams.b32xMeInUse = false;
1733 
1734                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1735                     m_debugInterface->DumpBuffer(
1736                         &meOutputParams.psMeMvBuffer->OsResource,
1737                         CodechalDbgAttr::attrOutput,
1738                         "MvData",
1739                         meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
1740                         CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
1741                         CODECHAL_MEDIA_STATE_16X_ME));
1742             }
1743             if (m_b32XMeEnabled)
1744             {
1745                 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1746                 meOutputParams.psMeMvBuffer = &m_s32XMeMvDataBuffer;
1747                 meOutputParams.psMeBrcDistortionBuffer = nullptr;
1748                 meOutputParams.psMeDistortionBuffer = nullptr;
1749                 meOutputParams.b16xMeInUse = false;
1750                 meOutputParams.b32xMeInUse = true;
1751 
1752                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1753                     m_debugInterface->DumpBuffer(
1754                         &meOutputParams.psMeMvBuffer->OsResource,
1755                         CodechalDbgAttr::attrOutput,
1756                         "MvData",
1757                         meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
1758                         CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledFrameFieldHeightInMb32x * 4) : 0,
1759                         CODECHAL_MEDIA_STATE_32X_ME));
1760             }
1761 
1762             MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1763             meOutputParams.pResVdenStreamInBuffer = &(m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
1764             meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer;
1765             meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1766             meOutputParams.b16xMeInUse = false;
1767             meOutputParams.bVdencStreamInInUse = true;
1768             if (m_vdencStreamInEnabled) {
1769                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1770                     &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
1771                     CodechalDbgAttr::attrOutput,
1772                     "StreaminData",
1773                     m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE,
1774                     0,
1775                     CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN));
1776             }
1777         })
1778 #endif
1779 
1780     return eStatus;
1781 }
1782 
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)1783 MOS_STATUS CodechalVdencHevcStateG11::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
1784 {
1785     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1786 
1787     CODECHAL_ENCODE_FUNCTION_ENTER;
1788 
1789     // Use FrameStats buffer if in single pipe mode.
1790     if (m_numPipe == 1)
1791     {
1792         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ReadSliceSize(cmdBuffer));
1793         return eStatus;
1794     }
1795 
1796     // Report slice size to app only when dynamic scaling is enabled
1797     if (!m_hevcSeqParams->SliceSizeControl)
1798     {
1799         return eStatus;
1800     }
1801 
1802     // In multi-tile multi-pipe mode, use PAK integration kernel output
1803     // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
1804     MOS_LOCK_PARAMS lockFlags;
1805     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1806     lockFlags.WriteOnly = true;
1807 
1808     uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2);  // encodeStatus is offset by 2 DWs in the resource
1809     uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(m_numLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1810 
1811     if (IsFirstPipe())
1812     {
1813         if (IsFirstPass())
1814         {
1815             // Create/ Initialize slice report buffer once per frame, to be used across passes
1816             if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
1817             {
1818                 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1819                 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1820                 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1821                 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1822                 allocParamsForBufferLinear.Format = Format_Buffer;
1823                 allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
1824 
1825                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1826                     m_osInterface,
1827                     &allocParamsForBufferLinear,
1828                     &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
1829                     "Failed to create HEVC VDEnc Slice Report Buffer ");
1830             }
1831 
1832             // Clear slice size structure to be sent in EncodeStatusReport buffer
1833             uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
1834             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1835             MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
1836             m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
1837 
1838             // Set slice size pointer in slice size structure
1839             MHW_MI_FLUSH_DW_PARAMS  miFlushDwParams;
1840             MOS_ZeroMemory(&miFlushDwParams, sizeof(miFlushDwParams));
1841             miFlushDwParams.pOsResource      = &m_encodeStatusBuf.resStatusBuffer;
1842             miFlushDwParams.dwResourceOffset = CODECHAL_OFFSETOF(EncodeStatusSliceReport, pSliceSize) + baseOffset + m_encodeStatusBuf.dwSliceReportOffset;
1843             miFlushDwParams.dwDataDW1        = (uint32_t)((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF);
1844             miFlushDwParams.dwDataDW2        = (uint32_t)(((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF00000000) >> 32);
1845             miFlushDwParams.bQWordEnable     = 1;
1846             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1847                 cmdBuffer,
1848                 &miFlushDwParams));
1849         }
1850 
1851         // Copy Slice size data buffer from PAK to be sent back to App
1852         CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
1853             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
1854             m_hevcTileStatsOffset.uiHevcSliceStreamout,
1855             &m_resSliceReport[m_encodeStatusBuf.wCurrIndex],
1856             0,
1857             sizeOfSliceSizesBuffer));
1858 
1859         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1860         MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1861         miCpyMemMemParams.presSrc       = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
1862         miCpyMemMemParams.dwSrcOffset   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
1863         miCpyMemMemParams.presDst       = &m_encodeStatusBuf.resStatusBuffer;
1864         miCpyMemMemParams.dwDstOffset   = baseOffset + m_encodeStatusBuf.dwSliceReportOffset;     // Slice size overflow is at DW0 EncodeStatusSliceReport
1865         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1866     }
1867 
1868     return eStatus;
1869 }
1870 
ExecutePictureLevel()1871 MOS_STATUS CodechalVdencHevcStateG11::ExecutePictureLevel()
1872 {
1873     CODECHAL_ENCODE_FUNCTION_ENTER;
1874 
1875     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1876 
1877     if (IsFirstPipe() && IsFirstPass())
1878     {
1879         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams));
1880         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
1881     }
1882 
1883     if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
1884     {
1885         if (m_currRefSync == nullptr)
1886         {
1887             m_currRefSync = &m_refSync[m_currMbCodeIdx];
1888         }
1889     }
1890     else
1891     {
1892         m_currRefSync = nullptr;
1893     }
1894 
1895     if (m_lookaheadPass && (m_hevcSeqParams->MaxAdaptiveGopPicSize > 0))
1896     {
1897         bool forceIntra = m_intraInterval >= m_hevcSeqParams->MaxAdaptiveGopPicSize;
1898         if ((!IsFirstPass() || forceIntra) && (m_hevcPicParams->CodingType != I_TYPE))
1899         {
1900             m_vdencStreamInEnabled = true;
1901         }
1902 
1903         if (!m_lookaheadAdaptiveI)
1904         {
1905             m_intraInterval = forceIntra ? 1 : m_intraInterval + 1;
1906         }
1907     }
1908 
1909     m_firstTaskInPhase = m_singleTaskPhaseSupported? IsFirstPass(): false;
1910     m_lastTaskInPhase = m_singleTaskPhaseSupported? IsLastPass(): true;
1911 
1912     // Per frame maximum HuC kernels is 5 - BRC Init, BRC Update, PAK Int, BRC Update, PAK Int
1913     m_hucCommandsSize = m_hwInterface->m_hucCommandBufferSize * 5;
1914     PerfTagSetting perfTag;
1915     perfTag.Value             = 0;
1916     perfTag.Mode              = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1917     perfTag.CallType          = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
1918     perfTag.PictureCodingType = m_pictureCodingType;
1919     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1920 
1921     if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())                                                                         \
1922     {
1923         CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1924         eStatus = MOS_STATUS_INVALID_PARAMETER;
1925         return eStatus;
1926     }
1927 
1928     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
1929 
1930     if (!m_singleTaskPhaseSupportedInPak)
1931     {
1932         // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
1933         m_firstTaskInPhase = true;
1934         m_lastTaskInPhase = true;
1935     }
1936 
1937     if (m_lookaheadPass)
1938     {
1939         if (m_swLaMode != nullptr)
1940         {
1941             m_lastTaskInPhase = true;
1942         }
1943         else
1944         {
1945             m_lastTaskInPhase = !m_singleTaskPhaseSupported;
1946         }
1947     }
1948 
1949     // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
1950     SetPakPassType();
1951 
1952     bool pakOnlyMultipassEnable;
1953 
1954     // "PAK-Only Multi-Pass Enable" set to zero in first pass and 1 in subsequent passes
1955     // Slice size conformance feature can't be enabled. When SSC enabled, VDENC + PAK 2nd pass needs to be used.
1956     // SAO 2nd pass has to be PAK-only 2nd pass
1957     if (m_numPipe >= 2)
1958     {
1959         int32_t currentPass = GetCurrentPass();
1960 
1961         pakOnlyMultipassEnable = (currentPass != 0) && (m_hevcSeqParams->SAO_enabled_flag) && (!m_hevcSeqParams->SliceSizeControl);
1962     }
1963     else
1964     {
1965         pakOnlyMultipassEnable = m_pakOnlyPass;
1966     }
1967 
1968     bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (GetCurrentPass() == 1) && !m_pakOnlyPass;
1969 
1970     uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
1971 
1972     m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
1973         CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
1974 
1975     // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
1976     PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
1977     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
1978     {
1979         CODEC_PICTURE refPic = l0RefFrameList[refIdx];
1980 
1981         if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1982         {
1983             uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1984             m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
1985         }
1986     }
1987 
1988     if(IsFirstPipe())
1989     {
1990         CODECHAL_ENCODE_CHK_NULL_RETURN(m_hucCmdInitializer);
1991         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerSetConstData(
1992             m_osInterface,
1993             m_miInterface,
1994             m_vdencInterface,
1995             m_hevcSeqParams,
1996             m_hevcPicParams,
1997             m_hevcSliceParams,
1998             pakOnlyMultipassEnable,
1999             m_hevcVdencAcqpEnabled,
2000             m_brcEnabled,
2001             m_vdencStreamInEnabled,
2002             m_vdencNativeROIEnabled,
2003             m_brcAdaptiveRegionBoostEnable,
2004             m_hevcVdencRoundingEnabled,
2005             panicEnabled,
2006             GetCurrentPass()));
2007     }
2008 
2009     // clean-up per VDBOX semaphore memory
2010     int32_t currentPipe = GetCurrentPipe();
2011     int32_t currentPass = GetCurrentPass();
2012     if ((currentPipe < 0) || (currentPass < 0))
2013     {
2014         eStatus = MOS_STATUS_INVALID_PARAMETER;
2015         return eStatus;
2016     }
2017 
2018     if (m_numPipe >= 2)
2019     {
2020         // Send Cmd Buffer Header for VE in last pipe only
2021         MOS_COMMAND_BUFFER cmdBuffer;
2022         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2023 
2024         bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
2025         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2026 
2027         if (!m_singleTaskPhaseSupported || (m_singleTaskPhaseSupported && IsFirstPass()))
2028         {
2029             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
2030 
2031             //HW Semaphore cmd to make sure all pipes start encode at the same time
2032             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2033             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2034                 &m_resPipeStartSemaMem,
2035                 &cmdBuffer,
2036                 m_numPipe));
2037 
2038             // Program some placeholder cmds to resolve the hazard between pipe sync
2039             MHW_MI_STORE_DATA_PARAMS dataParams;
2040             dataParams.pOsResource = &m_resDelayMinus;
2041             dataParams.dwResourceOffset = 0;
2042             dataParams.dwValue = 0xDE1A;
2043             for (uint32_t i = 0; i < m_numDelay; i++)
2044             {
2045                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2046                     &cmdBuffer,
2047                     &dataParams));
2048             }
2049 
2050             //clean HW semaphore memory
2051             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2052 
2053             //Start Watchdog Timer
2054             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
2055         }
2056 
2057         // clean-up per VDBOX semaphore memory, only in the first BRC pass. Same semaphore is re-used across BRC passes for stitch command
2058         if (IsFirstPass())
2059         {
2060             if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
2061             {
2062                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2063                     SetSemaphoreMem(
2064                         &m_resVdBoxSemaphoreMem[currentPipe].sResource,
2065                         &cmdBuffer,
2066                         false));
2067             }
2068         }
2069 
2070         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2071     }
2072 
2073     // Send HuC BRC Init/ Update only on first pipe.
2074     if (m_vdencHucUsed && IsFirstPipe())
2075     {
2076         if (!m_singleTaskPhaseSupported)
2077         {
2078             //Reset earlier set PAK perf tag
2079             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2080 
2081             // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2082             perfTag.Value                = 0;
2083             perfTag.Mode                 = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2084             perfTag.CallType             = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2085             perfTag.PictureCodingType    = m_pictureCodingType;
2086             m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2087         }
2088         m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2089 
2090         // Invoke BRC init/reset FW
2091         if (m_brcInit || m_brcReset)
2092         {
2093             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2094         }
2095 
2096         if (!m_singleTaskPhaseSupported)
2097         {
2098             //Reset performance buffer used for BRC init
2099             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2100             // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2101             perfTag.Value                = 0;
2102             perfTag.Mode                 = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2103             perfTag.CallType             = CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE;
2104             perfTag.PictureCodingType    = m_pictureCodingType;
2105             m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2106         }
2107 
2108         // Invoke BRC update FW
2109         CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2110         m_brcInit = m_brcReset = false;
2111         if (!m_singleTaskPhaseSupported)
2112         {
2113             //reset performance buffer used for BRC update
2114             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2115         }
2116     }
2117 
2118     // for CQP case, we only need to add the generating cmds in first pass/pipe
2119     // the other pipes share the SLB which is generated in the first pass/pipe
2120     // but we need to sync the generating operation
2121     if (!m_vdencHucUsed && IsFirstPass())
2122     {
2123         if (IsFirstPipe())
2124         {
2125             ConstructBatchBufferHuCCQP(&m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource);
2126         }
2127 
2128         if (m_numPipe > 1)
2129         {
2130             MOS_COMMAND_BUFFER cmdBuffer;
2131             CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2132             //HW Semaphore cmd to make sure all pipes wait until the slb is ready
2133             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resSyncSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2134             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2135                 &m_resSyncSemaMem,
2136                 &cmdBuffer,
2137                 m_numPipe));
2138 
2139             // Program some placeholder cmds to resolve the hazard between pipe sync
2140             MHW_MI_STORE_DATA_PARAMS dataParams;
2141             dataParams.pOsResource = &m_resDelayMinus;
2142             dataParams.dwResourceOffset = 0;
2143             dataParams.dwValue = 0xDE1A;
2144             for (uint32_t i = 0; i < m_numDelay; i++)
2145             {
2146                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2147                     &cmdBuffer,
2148                     &dataParams));
2149             }
2150 
2151             //clean HW semaphore memory
2152             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resSyncSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2153             ReturnCommandBuffer(&cmdBuffer);
2154         }
2155 
2156     }
2157 
2158     MOS_COMMAND_BUFFER cmdBuffer;
2159     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2160 
2161     if (!m_singleTaskPhaseSupported)
2162     {
2163         //PAK Perf Tag
2164         perfTag.Value             = 0;
2165         perfTag.Mode              = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2166         perfTag.CallType          = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2167         perfTag.PictureCodingType = m_pictureCodingType;
2168         m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2169     }
2170 
2171     if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) && (m_numPipe == 1))
2172     {
2173         // Send command buffer header at the beginning (OS dependent)
2174         // frame tracking tag is only added in the last command buffer header
2175         bool requestFrameTracking = m_singleTaskPhaseSupported ?
2176             m_firstTaskInPhase :
2177             ((m_lookaheadPass && (!m_swLaMode || (m_currPass < m_numPasses))) ? false : m_lastTaskInPhase);
2178 
2179         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2180     }
2181 
2182     // Ensure the previous BRC Update is done, before executing PAK
2183     if (m_vdencHucUsed && (m_numPipe >= 2))
2184     {
2185         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resBrcPakSemaphoreMem.sResource, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2186         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2187             &m_resBrcPakSemaphoreMem.sResource,
2188             &cmdBuffer,
2189             m_numPipe));
2190 
2191         // Program some placeholder cmds to resolve the hazard between pipe sync
2192         MHW_MI_STORE_DATA_PARAMS dataParams;
2193         dataParams.pOsResource = &m_resDelayMinus;
2194         dataParams.dwResourceOffset = 0;
2195         dataParams.dwValue = 0xDE1A;
2196         for (uint32_t i = 0; i < m_numDelay; i++)
2197         {
2198             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2199                 &cmdBuffer,
2200                 &dataParams));
2201         }
2202 
2203         //clean HW semaphore memory
2204         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resBrcPakSemaphoreMem.sResource, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2205     }
2206 
2207     // ACQP + SSC, ACQP + WP, BRC, BRC + SSC, BRC + WP
2208     // 2nd pass for SSC, WP, BRC needs conditional batch buffer end cmd, which is decided by HUC_STATUS output from HuC
2209     if (currentPass && m_vdencHuCConditional2ndPass && (currentPass != m_uc2NdSaoPass))
2210     {
2211         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2212 
2213         // Insert conditional batch buffer end
2214         MOS_ZeroMemory(
2215             &miConditionalBatchBufferEndParams,
2216             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2217 
2218         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2219         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2220             &m_resPakMmioBuffer;
2221 
2222         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2223             &cmdBuffer,
2224             &miConditionalBatchBufferEndParams));
2225 
2226         if (m_numPipe == 1)
2227         {
2228             auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2229             CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2230             uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
2231 
2232             // Write back the HCP image control register for RC6 may clean it out
2233             MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2234             MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2235             miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2236             miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2237             miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2238             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2239 
2240             MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2241             MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2242             miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2243             miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2244             miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2245             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2246 
2247             MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2248             miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2249             miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2250             miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2251             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2252         }
2253     }
2254 
2255     if (!currentPass && m_osInterface->bTagResourceSync)
2256     {
2257         // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2258         // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2259         // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2260         // as long as Dec/VP/Enc won't depend on this PAK so soon.
2261 
2262         PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2263 
2264         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2265             m_osInterface,
2266             globalGpuContextSyncTagBuffer));
2267         CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2268 
2269         MHW_MI_STORE_DATA_PARAMS params;
2270         params.pOsResource = globalGpuContextSyncTagBuffer;
2271         params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2272         uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2273         params.dwValue = (value > 0) ? (value - 1) : 0;
2274         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &params));
2275     }
2276 
2277     if (IsFirstPipe() && (!m_lookaheadPass || m_swLaMode))
2278     {
2279         CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2280     }
2281 
2282     MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2283     SetHcpSrcSurfaceParams(srcSurfaceParams);
2284 
2285     MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
2286     SetHcpReconSurfaceParams(reconSurfaceParams);
2287 
2288     CODECHAL_ENCODE_CHK_NULL_RETURN(m_pipeBufAddrParams);
2289     *m_pipeBufAddrParams = {};
2290     SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2291     m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2292     m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2293 #ifdef _MMC_SUPPORTED
2294     SetPipeBufAddr(&cmdBuffer);
2295 #endif
2296     CODECHAL_ENCODE_CHK_NULL_RETURN(m_pipeModeSelectParams);
2297     SetHcpPipeModeSelectParams(*m_pipeModeSelectParams);
2298 
2299     // HuC modifies HCP pipe mode select command, when 2nd pass SAO is required
2300     if (m_vdencHucUsed && m_b2NdSaoPassNeeded)
2301     {
2302         // current location to add cmds in 2nd level batch buffer
2303         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2304         // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2305         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2306 
2307         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2308 
2309         // save offset for next 2nd level batch buffer usage
2310         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2311     }
2312     else
2313     {
2314         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2315     }
2316 
2317     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams));
2318 
2319     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams));
2320 
2321     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2322 
2323     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2324     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2325     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2326 
2327     MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2328     SetHcpQmStateParams(fqmParams, qmParams);
2329     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2330     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2331 
2332     SetVdencPipeModeSelectParams(*m_pipeModeSelectParams);
2333     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2334 
2335     MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];
2336     SetVdencSurfaceStateParams(srcSurfaceParams, reconSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2337     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams));
2338     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &reconSurfaceParams));
2339     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
2340 
2341     SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2342     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2343 
2344     MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2345     SetHcpPicStateParams(picStateParams);
2346 
2347     if (m_vdencHucUsed)
2348     {
2349         // 2nd level batch buffer
2350         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2351         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2352 
2353         // save offset for next 2nd level batch buffer usage
2354         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2355     }
2356     else
2357     {
2358         // current location to add cmds in 2nd level batch buffer
2359         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2360         // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2361         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2362 
2363         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2364         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2365     }
2366 
2367     // Send HEVC_VP9_RDOQ_STATE command
2368     if (m_hevcRdoqEnabled)
2369     {
2370         if (m_pictureCodingType == I_TYPE)
2371         {
2372             if (m_hevcIFrameRdoqEnabled)
2373             {
2374                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2375             }
2376         }
2377         else
2378         {
2379             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2380         }
2381     }
2382 
2383     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2384 
2385     return eStatus;
2386 }
2387 
ExecuteSliceLevel()2388 MOS_STATUS CodechalVdencHevcStateG11::ExecuteSliceLevel()
2389 {
2390     CODECHAL_ENCODE_FUNCTION_ENTER;
2391 
2392     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2393 
2394     if (!m_hevcPicParams->tiles_enabled_flag)
2395     {
2396         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ExecuteSliceLevel());
2397 
2398         if (m_lookaheadPass)
2399         {
2400             CODECHAL_ENCODE_CHK_STATUS_RETURN(AnalyzeLookaheadStats());
2401 
2402             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2403                 &m_vdencLaStatsBuffer,
2404                 CodechalDbgAttr::attrVdencOutput,
2405                 "_LookaheadStats",
2406                 m_brcLooaheadStatsBufferSize,
2407                 0,
2408                 CODECHAL_NUM_MEDIA_STATES)));
2409         }
2410 
2411         if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2412         {
2413             CODECHAL_DEBUG_TOOL(
2414                 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2415             )
2416         }
2417     }
2418     else
2419     {
2420         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
2421     }
2422 
2423     return eStatus;
2424 }
2425 
GetTileInfo(uint32_t xPosition,uint32_t yPosition,uint32_t * tileId,uint32_t * tileEndLCUX,uint32_t * tileEndLCUY)2426 void CodechalVdencHevcStateG11::GetTileInfo(
2427     uint32_t xPosition,
2428     uint32_t yPosition,
2429     uint32_t* tileId,
2430     uint32_t* tileEndLCUX,
2431     uint32_t* tileEndLCUY)
2432 {
2433     *tileId = 0;
2434     uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2435 
2436     for (uint8_t i = 0; i < m_numTiles; i++)
2437     {
2438         uint32_t tileWidthInLCU  = MOS_ROUNDUP_DIVIDE(((m_tileParams[i].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2439         uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((m_tileParams[i].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2440         *tileEndLCUX = m_tileParams[i].TileStartLCUX + tileWidthInLCU;
2441         *tileEndLCUY = m_tileParams[i].TileStartLCUY + tileHeightInLCU;
2442 
2443         if (xPosition >= (m_tileParams[i].TileStartLCUX * 2) &&
2444             yPosition >= (m_tileParams[i].TileStartLCUY * 2) &&
2445             xPosition <  (*tileEndLCUX * 2) &&
2446             yPosition <  (*tileEndLCUY * 2))
2447         {
2448             *tileId = i;
2449             break;
2450         }
2451     }
2452 }
2453 
PrepareVDEncStreamInData()2454 MOS_STATUS CodechalVdencHevcStateG11::PrepareVDEncStreamInData()
2455 {
2456     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2457 
2458     CODECHAL_ENCODE_FUNCTION_ENTER;
2459 
2460     if (m_lookaheadPass && m_firstFrame)
2461     {
2462         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupForceIntraStreamIn(&m_resVdencStreamInBuffer[0]));
2463     }
2464 
2465     if (m_hevcPicParams->tiles_enabled_flag)
2466     {
2467         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams));
2468     }
2469     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::PrepareVDEncStreamInData());
2470 
2471     return eStatus;
2472 }
2473 
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)2474 void CodechalVdencHevcStateG11::SetStreaminDataPerRegion(
2475     uint32_t streamInWidth,
2476     uint32_t top,
2477     uint32_t bottom,
2478     uint32_t left,
2479     uint32_t right,
2480     PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
2481     void* streaminData)
2482 {
2483     CODECHAL_ENCODE_FUNCTION_ENTER;
2484 
2485     if (!m_hevcPicParams->tiles_enabled_flag)
2486     {
2487         CodechalVdencHevcState::SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, streaminParams, streaminData);
2488         return;
2489     }
2490 
2491     uint8_t* data = (uint8_t*)streaminData;
2492     uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
2493     uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2494     GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
2495 
2496     for (auto y = top; y < bottom; y++)
2497     {
2498         for (auto x = left; x < right; x++)
2499         {
2500             uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
2501 
2502             if (x <  (m_tileParams[tileId].TileStartLCUX * 2) ||
2503                 y <  (m_tileParams[tileId].TileStartLCUY * 2) ||
2504                 x >= (tileEndLCUX * 2) ||
2505                 y >= (tileEndLCUY * 2))
2506             {
2507                 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
2508             }
2509             streamInBaseOffset = m_tileParams[tileId].TileStreaminOffset;
2510 
2511             auto xPositionInTile = x - (m_tileParams[tileId].TileStartLCUX * 2);
2512             auto yPositionInTile = y - (m_tileParams[tileId].TileStartLCUY * 2);
2513             auto tileWidthInLCU  = MOS_ROUNDUP_DIVIDE(((m_tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2514 
2515             StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
2516 
2517             SetStreaminDataPerLcu(streaminParams, data + (streamInBaseOffset + offset + xyOffset) * 64);
2518         }
2519     }
2520 }
2521 
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)2522 void CodechalVdencHevcStateG11::SetBrcRoiDeltaQpMap(
2523     uint32_t streamInWidth,
2524     uint32_t top,
2525     uint32_t bottom,
2526     uint32_t left,
2527     uint32_t right,
2528     uint8_t regionId,
2529     PDeltaQpForROI deltaQpMap)
2530 {
2531 
2532     CODECHAL_ENCODE_FUNCTION_ENTER;
2533 
2534     if (!m_hevcPicParams->tiles_enabled_flag)
2535     {
2536         CodechalVdencHevcState::SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, regionId, deltaQpMap);
2537         return;
2538     }
2539 
2540     uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
2541     uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2542     GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
2543 
2544     for (auto y = top; y < bottom; y++)
2545     {
2546         for (auto x = left; x < right; x++)
2547         {
2548             uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
2549 
2550             if (x < (m_tileParams[tileId].TileStartLCUX * 2) ||
2551                 y < (m_tileParams[tileId].TileStartLCUY * 2) ||
2552                 x >= (tileEndLCUX * 2) ||
2553                 y >= (tileEndLCUY * 2))
2554             {
2555                 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
2556             }
2557             streamInBaseOffset = m_tileParams[tileId].TileStreaminOffset;
2558 
2559             auto xPositionInTile = x - (m_tileParams[tileId].TileStartLCUX * 2);
2560             auto yPositionInTile = y - (m_tileParams[tileId].TileStartLCUY * 2);
2561             auto tileWidthInLCU  = MOS_ROUNDUP_DIVIDE(((m_tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2562 
2563             StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
2564 
2565             (deltaQpMap + (streamInBaseOffset + offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
2566         }
2567     }
2568 }
2569 
EncTileLevel()2570 MOS_STATUS CodechalVdencHevcStateG11::EncTileLevel()
2571 {
2572     CODECHAL_ENCODE_FUNCTION_ENTER;
2573 
2574     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2575 
2576     int32_t currentPipe = GetCurrentPipe();
2577     int32_t currentPass = GetCurrentPass();
2578 
2579     if (currentPipe < 0 || currentPass < 0)
2580     {
2581         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
2582         return MOS_STATUS_INVALID_PARAMETER;
2583     }
2584 
2585     MHW_VDBOX_HEVC_SLICE_STATE_G11 sliceState;
2586     SetHcpSliceStateCommonParams(sliceState);
2587 
2588     MOS_COMMAND_BUFFER cmdBuffer;
2589     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2590 
2591     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2592     uint32_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
2593 
2594     for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2595     {
2596         for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2597         {
2598             PCODEC_ENCODER_SLCDATA  slcData = m_slcData;
2599             uint32_t                slcCount, idx, sliceNumInTile = 0;
2600 
2601             idx = tileRow * numTileColumns + tileCol;
2602 
2603             if ((m_numPipe > 1) && (tileCol != currentPipe))
2604             {
2605                 continue;
2606             }
2607 
2608             // HCP_TILE_CODING commmand
2609             CODECHAL_ENCODE_CHK_STATUS_RETURN(
2610                 static_cast<MhwVdboxHcpInterfaceG11*>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx]));
2611 
2612             for (slcCount = 0; slcCount < m_numSlices; slcCount++)
2613             {
2614                 bool lastSliceInTile = false, sliceInTile = false;
2615 
2616                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
2617                     &m_tileParams[idx],
2618                     &sliceInTile,
2619                     &lastSliceInTile));
2620 
2621                 if (!sliceInTile)
2622                 {
2623                     continue;
2624                 }
2625 
2626                 if (IsFirstPass())
2627                 {
2628                     uint32_t startLCU = 0;
2629                     for (uint32_t ii = 0; ii < slcCount; ii++)
2630                     {
2631                         startLCU += m_hevcSliceParams[ii].NumLCUsInSlice;
2632                     }
2633                     slcData[slcCount].CmdOffset = startLCU * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t);
2634                 }
2635 
2636                 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
2637                 {
2638                     // save offset for next 2nd level batch buffer usage
2639                     // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
2640                     // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
2641                     // m_vdencBatchBufferPerSliceVarSize:   variable size for each slice
2642 
2643                     // starting location for executing slice level cmds
2644                     // To do: Improvize to only add current slice wSlcCount
2645                     m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2646 
2647                     for (uint32_t j = 0; j < slcCount; j++)
2648                     {
2649                         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset
2650                             += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]);
2651                     }
2652 
2653                 }
2654 
2655                 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx);
2656 
2657                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState));
2658 
2659                 // Send VD_PIPELINE_FLUSH command
2660                 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2661                 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2662                 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2663                 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2664                 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2665                 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2666                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2667 
2668                 sliceNumInTile++;
2669             } // end of slice
2670 
2671             if (0 == sliceNumInTile)
2672             {
2673                 // One tile must have at least one slice
2674                 CODECHAL_ENCODE_ASSERT(false);
2675                 eStatus = MOS_STATUS_INVALID_PARAMETER;
2676                 break;
2677             }
2678 
2679             if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
2680             {
2681                 CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
2682                 return MOS_STATUS_INVALID_PARAMETER;
2683             }
2684         } // end of row tile
2685     } // end of column tile
2686 
2687       // Insert end of sequence/stream if set
2688     if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
2689     {
2690         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2691         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2692         pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2693         pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2694         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2695     }
2696 
2697     // Send MI_FLUSH command
2698     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2699     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2700     flushDwParams.bVideoPipelineCacheInvalidate = true;
2701     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2702 
2703     // Send VD_PIPELINE_FLUSH command
2704     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2705     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2706     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2707     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2708     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2709     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2710 
2711     // Send MI_FLUSH command
2712     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2713     flushDwParams.bVideoPipelineCacheInvalidate = true;
2714     if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
2715     {
2716         flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource;
2717         flushDwParams.dwDataDW1 = currentPass+1;
2718     }
2719     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2720 
2721     if (IsFirstPipe())
2722     {
2723         // first pipe needs to ensure all other pipes are ready
2724         for (uint32_t i = 1; i < m_numPipe; i++)
2725         {
2726             if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource))
2727             {
2728                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2729                     SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource,
2730                         &cmdBuffer,
2731                         currentPass + 1));
2732             }
2733         }
2734 
2735         // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed.
2736         if (m_vdencHucUsed)  // ACQP/ BRC need PAK integration kernel to aggregate statistics
2737         {
2738             CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
2739         }
2740         // Use HW stitch commands only in the scalable mode
2741         if (m_numPipe > 1 && m_enableTileStitchByHW)
2742         {
2743             HucCopyParams copyParams;
2744             uint32_t index = m_virtualEngineBbIndex;
2745 
2746             copyParams.size = m_hwInterface->m_tileRecordSize;
2747             copyParams.presSrc = &m_tileRecordBuffer[index].sResource;
2748             copyParams.presDst = &m_resBitstreamBuffer;
2749             copyParams.lengthOfTable = (uint8_t)(m_numTiles);
2750 
2751             auto hucCmdInitializer = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
2752             CODECHAL_ENCODE_CHK_STATUS_RETURN(hucCmdInitializer->AddCopyCmds(&cmdBuffer, &copyParams));
2753         }
2754 
2755         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2756         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
2757 
2758         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2759 
2760         if (m_numPipe <= 1)  // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2761         {
2762             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2763 
2764             // BRC PAK statistics different for each pass
2765             if (m_brcEnabled)
2766             {
2767                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
2768             }
2769         }
2770     }
2771 
2772     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2773     {
2774         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2775     }
2776 
2777     std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass())+"]";
2778     CODECHAL_DEBUG_TOOL(
2779         CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2780             &cmdBuffer,
2781             CODECHAL_NUM_MEDIA_STATES,
2782             pakPassName.data()));)
2783 
2784     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2785 
2786     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2787     {
2788         bool nullRendering = m_videoContextUsesNullHw;
2789         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
2790         CODECHAL_DEBUG_TOOL(
2791             CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2792             if (m_mmcState)
2793             {
2794                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2795             }
2796         )
2797 
2798         if (IsFirstPipe() &&
2799             IsLastPass() &&
2800             m_signalEnc &&
2801             m_currRefSync &&
2802             !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2803         {
2804             // signal semaphore
2805             MOS_SYNC_PARAMS syncParams;
2806             syncParams                  = g_cInitSyncParams;
2807             syncParams.GpuContext       = m_videoContext;
2808             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2809 
2810             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2811             m_currRefSync->uiSemaphoreObjCount++;
2812             m_currRefSync->bInUsed = true;
2813             }
2814     }
2815 
2816     // Reset parameters for next PAK execution
2817     if (IsLastPipe() &&
2818         IsLastPass())
2819     {
2820         if (!m_singleTaskPhaseSupported)
2821         {
2822             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2823         }
2824 
2825         m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2826 
2827         if (m_hevcSeqParams->ParallelBRC)
2828         {
2829             m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
2830                 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2831         }
2832 
2833         m_newPpsHeader = 0;
2834         m_newSeqHeader = 0;
2835         m_frameNum++;
2836     }
2837 
2838     return eStatus;
2839 }
2840 
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)2841 void CodechalVdencHevcStateG11::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
2842 {
2843     CODECHAL_ENCODE_FUNCTION_ENTER;
2844 
2845     CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
2846 
2847     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11&>(vdboxPipeModeSelectParams);
2848 
2849     if (m_numPipe > 1)
2850     {
2851         // Running in the multiple VDBOX mode
2852         if (IsFirstPipe())
2853         {
2854             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2855         }
2856         else if (IsLastPipe())
2857         {
2858             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2859         }
2860         else
2861         {
2862             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2863         }
2864         pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2865     }
2866     else
2867     {
2868         pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2869         pipeModeSelectParams.PipeWorkMode    = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2870     }
2871 }
2872 
ConstructBatchBufferHuCCQP(PMOS_RESOURCE batchBuffer)2873 MOS_STATUS CodechalVdencHevcStateG11::ConstructBatchBufferHuCCQP(PMOS_RESOURCE batchBuffer)
2874 {
2875     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2876     MOS_COMMAND_BUFFER cmdBuffer;
2877     uint8_t data[CODECHAL_PAGE_SIZE] = {0};
2878     uint16_t len = 0;
2879     CodechalCmdInitializerG11* pCmdInitializerG11 = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
2880 
2881     CODECHAL_ENCODE_FUNCTION_ENTER;
2882     CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
2883     CODECHAL_ENCODE_CHK_NULL_RETURN(pCmdInitializerG11);
2884 
2885     MOS_COMMAND_BUFFER constructedCmdBuf;
2886     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
2887 
2888     constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
2889     constructedCmdBuf.iRemaining                           = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
2890 
2891     constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD1 / 4);
2892     constructedCmdBuf.iOffset += m_insertOffsetAfterCMD1;
2893 
2894     m_picStateCmdStartInBytes = constructedCmdBuf.iOffset;
2895 
2896     // set HCP_PIC_STATE command
2897     MHW_VDBOX_HEVC_PIC_STATE hevcPicState;
2898     SetHcpPicStateParams(hevcPicState);
2899     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState));
2900     m_cmd2StartInBytes = constructedCmdBuf.iOffset;
2901 
2902     constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD2 / 4);
2903     constructedCmdBuf.iOffset += m_insertOffsetAfterCMD2;
2904 
2905     len = m_cmd2StartInBytes - m_picStateCmdStartInBytes;
2906     pCmdInitializerG11->AddCmdConstData(
2907         CODECHAL_CMD5,
2908         (uint32_t*)(data + m_picStateCmdStartInBytes),
2909         len,
2910         m_picStateCmdStartInBytes);
2911 
2912     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2913     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerExecute(false, batchBuffer, &cmdBuffer));
2914     ReturnCommandBuffer(&cmdBuffer);
2915 
2916     if (!m_singleTaskPhaseSupported)
2917     {
2918         bool renderingFlags = m_videoContextUsesNullHw;
2919 
2920         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(GetDebugInterface()->DumpCmdBuffer(
2921             &cmdBuffer,
2922             CODECHAL_NUM_MEDIA_STATES,
2923             "HucCmd")));
2924 
2925         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
2926         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->DumpHucCmdInit(batchBuffer)));
2927     }
2928 
2929     return eStatus;
2930 }
2931 
ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)2932 MOS_STATUS CodechalVdencHevcStateG11::ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)
2933 {
2934     CODECHAL_ENCODE_FUNCTION_ENTER;
2935 
2936     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2937 
2938     CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
2939 
2940     MOS_LOCK_PARAMS lockFlags;
2941     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2942     lockFlags.WriteOnly = true;
2943 
2944     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
2945     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2946 
2947     MOS_COMMAND_BUFFER constructedCmdBuf;
2948     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
2949     constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
2950     constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
2951 
2952     // 1st Group : PIPE_MODE_SELECT
2953     // set PIPE_MODE_SELECT command
2954     // on Gen11 no need to set "bSaoFirstPass" since it is handled by HW now
2955     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11 pipeModeSelectParams;
2956     SetHcpPipeModeSelectParams(pipeModeSelectParams);
2957 
2958     pipeModeSelectParams.bVdencEnabled = true;
2959     pipeModeSelectParams.bAdvancedRateControlEnable = true;
2960     pipeModeSelectParams.bStreamOutEnabled = !IsLastPass();
2961     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&constructedCmdBuf, &pipeModeSelectParams));
2962 
2963     MHW_BATCH_BUFFER  TempBatchBuffer;
2964     MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER));
2965     TempBatchBuffer.iSize       = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
2966     TempBatchBuffer.pData       = data;
2967 
2968     // set MI_BATCH_BUFFER_END command
2969     int32_t cmdBufOffset = constructedCmdBuf.iOffset;
2970 
2971     TempBatchBuffer.iCurrent    = constructedCmdBuf.iOffset;
2972     TempBatchBuffer.iRemaining  = constructedCmdBuf.iRemaining;
2973     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
2974     constructedCmdBuf.pCmdPtr     += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
2975     constructedCmdBuf.iOffset      = TempBatchBuffer.iCurrent;
2976     constructedCmdBuf.iRemaining   = TempBatchBuffer.iRemaining;
2977 
2978     m_miBatchBufferEndCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
2979 
2980     CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer1stGroupSize == constructedCmdBuf.iOffset);
2981 
2982     constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD1 / 4);
2983     constructedCmdBuf.iOffset += m_insertOffsetAfterCMD1;
2984 
2985     m_picStateCmdStartInBytes = constructedCmdBuf.iOffset;
2986 
2987     // set HCP_PIC_STATE command
2988     MHW_VDBOX_HEVC_PIC_STATE hevcPicState;
2989     SetHcpPicStateParams(hevcPicState);
2990     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState));
2991     m_cmd2StartInBytes = constructedCmdBuf.iOffset;
2992 
2993     constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD2 / 4);
2994     constructedCmdBuf.iOffset += m_insertOffsetAfterCMD2;
2995 
2996     // set MI_BATCH_BUFFER_END command
2997     TempBatchBuffer.iCurrent    = constructedCmdBuf.iOffset;
2998     TempBatchBuffer.iRemaining  = constructedCmdBuf.iRemaining;
2999     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
3000     constructedCmdBuf.pCmdPtr     += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
3001     constructedCmdBuf.iOffset      = TempBatchBuffer.iCurrent;
3002     constructedCmdBuf.iRemaining   = TempBatchBuffer.iRemaining;
3003 
3004     CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer2ndGroupSize + m_hwInterface->m_vdencBatchBuffer1stGroupSize
3005         == constructedCmdBuf.iOffset);
3006 
3007     if (data)
3008     {
3009         m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
3010     }
3011 
3012     return eStatus;
3013 }
3014 
ConstructBatchBufferHuCBRCForGroup3(PMOS_RESOURCE batchBuffer)3015 MOS_STATUS CodechalVdencHevcStateG11::ConstructBatchBufferHuCBRCForGroup3(PMOS_RESOURCE batchBuffer)
3016 {
3017     CODECHAL_ENCODE_FUNCTION_ENTER;
3018 
3019     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3020     int32_t cmdBufOffset = 0;
3021 
3022     CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
3023     CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
3024 
3025     MOS_LOCK_PARAMS lockFlags;
3026     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3027     lockFlags.WriteOnly = true;
3028     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
3029     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3030 
3031     MOS_COMMAND_BUFFER constructedCmdBuf;
3032     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
3033     constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
3034     constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencGroup3BatchBufferSize, CODECHAL_PAGE_SIZE);
3035 
3036     // 3rd Group : HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
3037     MHW_VDBOX_HEVC_SLICE_STATE_G11 sliceState;
3038     SetHcpSliceStateCommonParams(sliceState);
3039 
3040     // slice level cmds for each slice
3041     PCODEC_ENCODER_SLCDATA slcData = m_slcData;
3042     uint16_t               numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
3043     uint16_t               numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
3044     for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
3045     {
3046         uint32_t idx = 0;
3047         bool lastSliceInTile = false, sliceInTile = false;
3048 
3049         for (auto tileRow = 0; (tileRow < numTileRows) && !sliceInTile; tileRow++)
3050         {
3051             for (auto tileCol = 0; (tileCol < numTileColumns) && !sliceInTile; tileCol++)
3052             {
3053                 idx = tileRow * numTileColumns + tileCol;
3054                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
3055                     &m_tileParams[idx],
3056                     &sliceInTile,
3057                     &lastSliceInTile));
3058             }
3059         }
3060 
3061         if (IsFirstPass())
3062         {
3063             slcData[slcCount].CmdOffset = startLCU * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
3064         }
3065 
3066         SetHcpSliceStateParams(sliceState, slcData, (uint16_t) slcCount, m_tileParams, lastSliceInTile, idx);
3067         m_vdencBatchBufferPerSliceVarSize[slcCount] = 0;
3068 
3069         // set HCP_WEIGHTOFFSET_STATE command
3070         // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
3071         //        If zero, then this command is not issued.
3072         if (m_hevcVdencWeightedPredEnabled)
3073         {
3074             MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
3075             MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
3076 
3077             // HuC based WP ignores App based weights
3078             if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
3079             {
3080                 for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
3081                 {
3082                     // Luma, Chroma Offset
3083                     for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
3084                     {
3085                         hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)m_hevcSliceParams->luma_offset[k][i];
3086                         // Cb, Cr
3087                         for (auto j = 0; j < 2; j++)
3088                         {
3089                             hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)m_hevcSliceParams->chroma_offset[k][i][j];
3090                         }
3091                     }
3092 
3093                     // Luma Weight
3094                     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
3095                         &hcpWeightOffsetParams.LumaWeights[k],
3096                         sizeof(hcpWeightOffsetParams.LumaWeights[k]),
3097                         &m_hevcSliceParams->delta_luma_weight[k],
3098                         sizeof(m_hevcSliceParams->delta_luma_weight[k])));
3099                     // Chroma Weight
3100                     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
3101                         &hcpWeightOffsetParams.ChromaWeights[k],
3102                         sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
3103                         &m_hevcSliceParams->delta_chroma_weight[k],
3104                         sizeof(m_hevcSliceParams->delta_chroma_weight[k])));
3105                 }
3106             }
3107 
3108             // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
3109             if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3110             {
3111                 hcpWeightOffsetParams.ucList = LIST_0;
3112 
3113                 cmdBufOffset = constructedCmdBuf.iOffset;
3114                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
3115                 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3116                 // 1st HcpWeightOffset cmd is not always inserted (except weighted prediction + P, B slices)
3117                 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
3118             }
3119 
3120             // 2nd HCP_WEIGHTOFFSET_STATE cmd - B only
3121             if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3122             {
3123                 hcpWeightOffsetParams.ucList = LIST_1;
3124 
3125                 cmdBufOffset = constructedCmdBuf.iOffset;
3126                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
3127                 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3128                 // 2nd HcpWeightOffset cmd is not always inserted (except weighted prediction + B slices)
3129                 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
3130             }
3131         }
3132 
3133         // set HCP_SLICE_STATE command
3134         cmdBufOffset = constructedCmdBuf.iOffset;
3135         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(&constructedCmdBuf, &sliceState));
3136         m_hcpSliceStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3137 
3138         // set 1st HCP_PAK_INSERT_OBJECT command
3139         // insert AU, SPS, PPS headers before first slice header
3140         if (sliceState.bInsertBeforeSliceHeaders)
3141         {
3142             uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for DwordLength field in PAK_INSERT_OBJ cmd
3143             m_1stPakInsertObjectCmdSize = 0;
3144 
3145             for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
3146             {
3147                 uint32_t nalUnitPosiSize = sliceState.ppNalUnitParams[i]->uiSize;
3148                 uint32_t nalUnitPosiOffset = sliceState.ppNalUnitParams[i]->uiOffset;
3149 
3150                 while (nalUnitPosiSize > 0)
3151                 {
3152                     uint32_t bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalUnitPosiSize * 8);
3153                     uint32_t offSet = nalUnitPosiOffset;
3154 
3155                     MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3156                     MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3157                     pakInsertObjectParams.bEmulationByteBitsInsert = sliceState.ppNalUnitParams[i]->bInsertEmulationBytes;
3158                     pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.ppNalUnitParams[i]->uiSkipEmulationCheckCount;
3159                     pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
3160                     pakInsertObjectParams.dwBitSize = bitSize;
3161                     pakInsertObjectParams.dwOffset = offSet;
3162 
3163                     if (nalUnitPosiSize > maxBytesInPakInsertObjCmd)
3164                     {
3165                         nalUnitPosiSize -= maxBytesInPakInsertObjCmd;
3166                         nalUnitPosiOffset += maxBytesInPakInsertObjCmd;
3167                     }
3168                     else
3169                     {
3170                         nalUnitPosiSize = 0;
3171                     }
3172 
3173                     cmdBufOffset = constructedCmdBuf.iOffset;
3174                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&constructedCmdBuf, &pakInsertObjectParams));
3175 
3176                     // this info needed again in BrcUpdate HuC FW const
3177                     m_1stPakInsertObjectCmdSize += (constructedCmdBuf.iOffset - cmdBufOffset);
3178                 }
3179             }
3180             // 1st PakInsertObject cmd is not always inserted for each slice
3181             m_vdencBatchBufferPerSliceVarSize[slcCount] += m_1stPakInsertObjectCmdSize;
3182         }
3183 
3184         // set 2nd HCP_PAK_INSERT_OBJECT command
3185         // Insert slice header
3186         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3187         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3188         pakInsertObjectParams.bLastHeader = true;
3189         pakInsertObjectParams.bEmulationByteBitsInsert = true;
3190 
3191         // App does the slice header packing, set the skip count passed by the app
3192         pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.uiSkipEmulationCheckCount;
3193         pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
3194         pakInsertObjectParams.dwBitSize = sliceState.dwLength;
3195         pakInsertObjectParams.dwOffset = sliceState.dwOffset;
3196 
3197         // For HEVC VDEnc Dynamic Slice
3198         if (m_hevcSeqParams->SliceSizeControl)
3199         {
3200             pakInsertObjectParams.bLastHeader = false;
3201             pakInsertObjectParams.bEmulationByteBitsInsert = false;
3202             pakInsertObjectParams.dwBitSize                  = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3203             pakInsertObjectParams.bResetBitstreamStartingPos = true;
3204         }
3205 
3206         uint32_t byteSize = (pakInsertObjectParams.dwBitSize + 7) >> 3;
3207         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
3208             &constructedCmdBuf,
3209             &pakInsertObjectParams));
3210 
3211         // 2nd PakInsertObject cmd is always inserted for each slice
3212         // so already reflected in dwVdencBatchBufferPerSliceConstSize
3213         m_vdencBatchBufferPerSliceVarSize[slcCount] += (MOS_ALIGN_CEIL(byteSize, sizeof(uint32_t))) / sizeof(uint32_t) * 4;
3214 
3215         // set 3rd HCP_PAK_INSERT_OBJECT command
3216         if (m_hevcSeqParams->SliceSizeControl)
3217         {
3218             // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
3219             pakInsertObjectParams.bLastHeader = true;
3220             pakInsertObjectParams.dwBitSize   = sliceState.dwLength - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3221             pakInsertObjectParams.dwOffset += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8);  // Skips the first 5 bytes which is Start Code + Nal Unit Header
3222             pakInsertObjectParams.bResetBitstreamStartingPos = true;
3223 
3224             cmdBufOffset = constructedCmdBuf.iOffset;
3225             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
3226                 &constructedCmdBuf,
3227                 &pakInsertObjectParams));
3228             // 3rd PakInsertObject cmd is not always inserted for each slice
3229             m_vdencBatchBufferPerSliceVarSize[slcCount] += (constructedCmdBuf.iOffset - cmdBufOffset);
3230         }
3231 
3232         // set VDENC_WEIGHT_OFFSETS_STATE command
3233         MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
3234         MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
3235         vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
3236 
3237         if (vdencWeightOffsetParams.bWeightedPredEnabled)
3238         {
3239             uint8_t lumaLog2WeightDenom     = m_hevcPicParams->bEnableGPUWeightedPrediction ? 6 : m_hevcSliceParams->luma_log2_weight_denom;
3240             vdencWeightOffsetParams.dwDenom = 1 << lumaLog2WeightDenom;
3241 
3242             // HuC based WP ignores App based weights
3243             if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
3244             {
3245                 // Luma Offsets
3246                 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
3247                 {
3248                     vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)m_hevcSliceParams->luma_offset[0][i];
3249                     vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)m_hevcSliceParams->luma_offset[1][i];
3250                 }
3251 
3252                 // Luma Weights
3253                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
3254                                                               &vdencWeightOffsetParams.LumaWeights[0],
3255                                                               sizeof(vdencWeightOffsetParams.LumaWeights[0]),
3256                                                               &m_hevcSliceParams->delta_luma_weight[0],
3257                                                               sizeof(m_hevcSliceParams->delta_luma_weight[0])),
3258                     "Failed to copy luma weight 0 memory.");
3259 
3260                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
3261                                                               &vdencWeightOffsetParams.LumaWeights[1],
3262                                                               sizeof(vdencWeightOffsetParams.LumaWeights[1]),
3263                                                               &m_hevcSliceParams->delta_luma_weight[1],
3264                                                               sizeof(m_hevcSliceParams->delta_luma_weight[1])),
3265                     "Failed to copy luma weight 1 memory.");
3266             }
3267         }
3268 
3269         cmdBufOffset = constructedCmdBuf.iOffset;
3270         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
3271             &constructedCmdBuf,
3272             nullptr,
3273             &vdencWeightOffsetParams));
3274         m_vdencWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3275 
3276         MHW_BATCH_BUFFER  TempBatchBuffer;
3277         MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER));
3278         TempBatchBuffer.iSize       = MOS_ALIGN_CEIL(m_hwInterface->m_vdencGroup3BatchBufferSize, CODECHAL_PAGE_SIZE);
3279         TempBatchBuffer.pData       = data;
3280 
3281         TempBatchBuffer.iCurrent    = constructedCmdBuf.iOffset;
3282         TempBatchBuffer.iRemaining  = constructedCmdBuf.iRemaining;
3283         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
3284         constructedCmdBuf.pCmdPtr     += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
3285         constructedCmdBuf.iOffset      = TempBatchBuffer.iCurrent;
3286         constructedCmdBuf.iRemaining   = TempBatchBuffer.iRemaining;
3287 
3288         m_vdencBatchBufferPerSliceVarSize[slcCount] += ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4;
3289         for (auto i = 0; i < ENCODE_VDENC_HEVC_PADDING_DW_SIZE; i++)
3290         {
3291             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiNoop(&constructedCmdBuf, nullptr));
3292         }
3293 
3294         startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
3295     }
3296 
3297     if (data)
3298     {
3299         m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
3300     }
3301 
3302     return eStatus;
3303 }
3304 
SetDmemHuCBrcInitReset()3305 MOS_STATUS CodechalVdencHevcStateG11::SetDmemHuCBrcInitReset()
3306 {
3307     CODECHAL_ENCODE_FUNCTION_ENTER;
3308 
3309     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3310 
3311     MOS_LOCK_PARAMS lockFlagsWriteOnly;
3312     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3313     lockFlagsWriteOnly.WriteOnly = true;
3314 
3315     // Setup BrcInit DMEM
3316     auto hucVdencBrcInitDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G11)m_osInterface->pfnLockResource(
3317         m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
3318     CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcInitDmem);
3319     MOS_ZeroMemory(hucVdencBrcInitDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G11));
3320 
3321     hucVdencBrcInitDmem->BRCFunc_U32 = 0;  // 0: Init, 1: Reset
3322     hucVdencBrcInitDmem->UserMaxFrame = GetProfileLevelMaxFrameSize();
3323     hucVdencBrcInitDmem->InitBufFull_U32   = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
3324     hucVdencBrcInitDmem->BufSize_U32       = m_hevcSeqParams->VBVBufferSizeInBit;
3325     hucVdencBrcInitDmem->TargetBitrate_U32 = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;  // DDI in Kbits
3326     hucVdencBrcInitDmem->MaxRate_U32       = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3327     hucVdencBrcInitDmem->MinRate_U32 = 0;
3328     hucVdencBrcInitDmem->FrameRateM_U32    = m_hevcSeqParams->FrameRate.Numerator;
3329     hucVdencBrcInitDmem->FrameRateD_U32    = m_hevcSeqParams->FrameRate.Denominator;
3330     hucVdencBrcInitDmem->ACQP_U32          = 0;
3331     if (m_hevcSeqParams->UserMaxPBFrameSize > 0)
3332     {
3333         //Backup CodingType as need to set it as B_Tpye to get MaxFrameSize for P/B frames.
3334         auto CodingTypeTemp = m_hevcPicParams->CodingType;
3335         m_hevcPicParams->CodingType = B_TYPE;
3336         hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = GetProfileLevelMaxFrameSize();
3337         m_hevcPicParams->CodingType = CodingTypeTemp;
3338     }
3339     else
3340     {
3341         hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = hucVdencBrcInitDmem->UserMaxFrame;
3342     }
3343 
3344     if (m_brcEnabled)
3345     {
3346         switch (m_hevcSeqParams->RateControlMethod)
3347         {
3348         case RATECONTROL_ICQ:
3349             hucVdencBrcInitDmem->BRCFlag = 0;
3350             break;
3351         case RATECONTROL_CBR:
3352             hucVdencBrcInitDmem->BRCFlag = 1;
3353             break;
3354         case RATECONTROL_VBR:
3355             hucVdencBrcInitDmem->BRCFlag = 2;
3356             hucVdencBrcInitDmem->ACQP_U32 = 0;
3357             break;
3358         case RATECONTROL_VCM:
3359             hucVdencBrcInitDmem->BRCFlag = 3;
3360             break;
3361         case RATECONTROL_QVBR:
3362             hucVdencBrcInitDmem->BRCFlag = 2;
3363             hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;
3364             break;
3365         default:
3366             break;
3367         }
3368 
3369         // Low Delay BRC
3370         if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3371         {
3372             hucVdencBrcInitDmem->BRCFlag = 5;
3373         }
3374 
3375         switch (m_hevcSeqParams->MBBRC)
3376         {
3377         case mbBrcInternal:
3378         case mbBrcEnabled:
3379             hucVdencBrcInitDmem->CuQpCtrl_U8 = 3;
3380             break;
3381         case mbBrcDisabled:
3382             hucVdencBrcInitDmem->CuQpCtrl_U8 = 0;
3383             break;
3384         default:
3385             break;
3386         }
3387     }
3388     else if (m_hevcVdencAcqpEnabled)
3389     {
3390         hucVdencBrcInitDmem->BRCFlag = 0;
3391 
3392         // 0=No CUQP; 1=CUQP for I-frame; 2=CUQP for P/B-frame
3393         // bit operation, bit 1 for I-frame, bit 2 for P/B frame
3394         // In VDENC mode, the field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
3395         if (m_hevcSeqParams->QpAdjustment)
3396         {
3397             hucVdencBrcInitDmem->CuQpCtrl_U8 = 3;  // wPictureCodingType I:0, P:1, B:2
3398         }
3399         else
3400         {
3401             hucVdencBrcInitDmem->CuQpCtrl_U8 = 0;  // wPictureCodingType I:0, P:1, B:2
3402         }
3403     }
3404 
3405     hucVdencBrcInitDmem->SSCFlag = m_hevcSeqParams->SliceSizeControl;
3406 
3407     // NumP/NumB in par file are different from GopP/GopB
3408     // definitions of P & B are not consistent
3409     // LDB case, NumP=0 & NumB=100, but GopP=100 & GopB=0
3410 
3411     hucVdencBrcInitDmem->GopP_U16 = m_hevcSeqParams->GopPicSize - m_hevcSeqParams->NumOfBInGop[0] - 1;
3412     hucVdencBrcInitDmem->GopB_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[0];
3413 
3414     hucVdencBrcInitDmem->FrameWidth_U16 = (uint16_t)m_frameWidth;
3415     hucVdencBrcInitDmem->FrameHeight_U16 = (uint16_t)m_frameHeight;
3416 
3417     hucVdencBrcInitDmem->GopB1_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[1];
3418     hucVdencBrcInitDmem->GopB2_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[2];
3419 
3420     hucVdencBrcInitDmem->MinQP_U8 = m_hevcPicParams->BRCMinQp < 10 ? 10 : m_hevcPicParams->BRCMinQp;                                           // Setting values from arch spec
3421     hucVdencBrcInitDmem->MaxQP_U8 = m_hevcPicParams->BRCMaxQp < 10 ? 51 : (m_hevcPicParams->BRCMaxQp > 51 ? 51 : m_hevcPicParams->BRCMaxQp);   // Setting values from arch spec
3422 
3423     hucVdencBrcInitDmem->MaxBRCLevel_U8 = 1;
3424     hucVdencBrcInitDmem->LumaBitDepth_U8   = m_hevcSeqParams->bit_depth_luma_minus8 + 8;
3425     hucVdencBrcInitDmem->ChromaBitDepth_U8 = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;
3426 
3427     if ((hucVdencBrcInitDmem->LowDelayMode_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)))
3428     {
3429         MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshPB0_S8, 8 * sizeof(int8_t), (void *)m_lowdelayDevThreshPB, 8 * sizeof(int8_t));
3430         MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshVBR0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshVBR, 8 * sizeof(int8_t));
3431         MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshI0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshI, 8 * sizeof(int8_t));
3432     }
3433     else
3434     {
3435         uint64_t inputbitsperframe = uint64_t(hucVdencBrcInitDmem->MaxRate_U32*100. / (hucVdencBrcInitDmem->FrameRateM_U32 * 100.0 / hucVdencBrcInitDmem->FrameRateD_U32));
3436         if (m_brcEnabled && !hucVdencBrcInitDmem->BufSize_U32)
3437         {
3438             CODECHAL_ENCODE_ASSERTMESSAGE("VBV BufSize should not be 0 for BRC case\n");
3439             eStatus = MOS_STATUS_INVALID_PARAMETER;
3440         }
3441         uint64_t vbvsz = hucVdencBrcInitDmem->BufSize_U32;
3442         double bps_ratio = inputbitsperframe / (vbvsz / m_devStdFPS);
3443         if (bps_ratio < m_bpsRatioLow) bps_ratio = m_bpsRatioLow;
3444         if (bps_ratio > m_bpsRatioHigh) bps_ratio = m_bpsRatioHigh;
3445 
3446         for (int i = 0; i < m_numDevThreshlds / 2; i++) {
3447             hucVdencBrcInitDmem->DevThreshPB0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshPBFPNEG[i], bps_ratio));
3448             hucVdencBrcInitDmem->DevThreshPB0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshPBFPPOS[i], bps_ratio));
3449 
3450             hucVdencBrcInitDmem->DevThreshI0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshIFPNEG[i], bps_ratio));
3451             hucVdencBrcInitDmem->DevThreshI0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshIFPPOS[i], bps_ratio));
3452 
3453             hucVdencBrcInitDmem->DevThreshVBR0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshVBRNEG[i], bps_ratio));
3454             hucVdencBrcInitDmem->DevThreshVBR0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_posMultVBR*pow(m_devThreshVBRPOS[i], bps_ratio));
3455         }
3456     }
3457 
3458     MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshP0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshP0, 4 * sizeof(int8_t));
3459     MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshB0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshB0, 4 * sizeof(int8_t));
3460     MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshI0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshI0, 4 * sizeof(int8_t));
3461 
3462     if (m_brcEnabled)
3463     {
3464         // initQPIP, initQPB values will be used for BRC in the future
3465         int32_t initQPIP = 0, initQPB = 0;
3466         ComputeVDEncInitQP(initQPIP, initQPB);
3467         hucVdencBrcInitDmem->InitQPIP_U8 = (uint8_t)initQPIP;
3468         hucVdencBrcInitDmem->InitQPB_U8 = (uint8_t)initQPB;
3469     }
3470     else
3471     {
3472         hucVdencBrcInitDmem->InitQPIP_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3473         hucVdencBrcInitDmem->InitQPB_U8  = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3474     }
3475 
3476     // recommendation
3477     hucVdencBrcInitDmem->TopFrmSzThrForAdapt2Pass_U8 = 32;
3478     hucVdencBrcInitDmem->BotFrmSzThrForAdapt2Pass_U8 = 24;
3479 
3480     MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshP0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshP0, 7 * sizeof(uint8_t));
3481     MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshB0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshB0, 7 * sizeof(uint8_t));
3482     MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshI0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshI0, 7 * sizeof(uint8_t));
3483 
3484     if (m_vdencStreamInEnabled && m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled)
3485     {
3486         hucVdencBrcInitDmem->StreamInROIEnable_U8 = 1;
3487         hucVdencBrcInitDmem->StreamInSurfaceEnable_U8 = 1;
3488     }
3489     // RDOQ adaptation hardened to HW starting Gen11
3490     hucVdencBrcInitDmem->RDOQ_AdaptationEnable_U8 = 0;
3491 
3492     // recommendation
3493     hucVdencBrcInitDmem->TopQPDeltaThrForAdapt2Pass_U8 = 2;
3494     hucVdencBrcInitDmem->BotQPDeltaThrForAdapt2Pass_U8 = 1;
3495     hucVdencBrcInitDmem->SlidingWindow_Size_U32        = 30;
3496 
3497     if ((m_hevcSeqParams->SlidingWindowSize != 0) && (m_hevcSeqParams->MaxBitRatePerSlidingWindow != 0))
3498     {
3499         hucVdencBrcInitDmem->SlidingWindow_Size_U32     = m_hevcSeqParams->SlidingWindowSize;
3500         hucVdencBrcInitDmem->SlidingWindow_MaxRateRatio_U8 = (m_hevcSeqParams->MaxBitRatePerSlidingWindow * 100)/ m_hevcSeqParams->TargetBitRate;
3501     }
3502     else
3503     {
3504         if (m_hevcSeqParams->FrameRate.Denominator == 0)
3505         {
3506             CODECHAL_ENCODE_ASSERTMESSAGE("FrameRate.Deminator is zero!");
3507             return MOS_STATUS_INVALID_PARAMETER;
3508         }
3509         uint32_t framerate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
3510         hucVdencBrcInitDmem->SlidingWindow_Size_U32 = MOS_MIN(framerate, 60);
3511         hucVdencBrcInitDmem->SlidingWindow_MaxRateRatio_U8 = 120;
3512     }
3513 
3514     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
3515 
3516     return eStatus;
3517 }
3518 
SetConstDataHuCBrcUpdate()3519 MOS_STATUS CodechalVdencHevcStateG11::SetConstDataHuCBrcUpdate()
3520 {
3521     CODECHAL_ENCODE_FUNCTION_ENTER;
3522 
3523     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3524 
3525     MOS_LOCK_PARAMS lockFlagsWriteOnly;
3526     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3527     lockFlagsWriteOnly.WriteOnly = true;
3528 
3529     auto hucConstData = (PCODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G11)m_osInterface->pfnLockResource(
3530         m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
3531     CODECHAL_ENCODE_CHK_NULL_RETURN(hucConstData);
3532 
3533     MOS_SecureMemcpy(hucConstData->SLCSZ_THRDELTAI_U16, sizeof(m_hucConstantData), m_hucConstantData, sizeof(m_hucConstantData));
3534 
3535     MOS_SecureMemcpy(hucConstData->RDQPLambdaI, sizeof(m_rdQpLambdaI), m_rdQpLambdaI, sizeof(m_rdQpLambdaI));
3536     MOS_SecureMemcpy(hucConstData->RDQPLambdaP, sizeof(m_rdQpLambdaP), m_rdQpLambdaP, sizeof(m_rdQpLambdaP));
3537 
3538     if (m_hevcVisualQualityImprovement)
3539     {
3540         MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI_VQI, sizeof(m_sadQpLambdaI));
3541         MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode_VQI, sizeof(m_penaltyForIntraNonDC32x32PredMode_VQI));
3542     }
3543     else
3544     {
3545         MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI, sizeof(m_sadQpLambdaI));
3546         MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode));
3547     }
3548 
3549     MOS_SecureMemcpy(hucConstData->SADQPLambdaP, sizeof(m_sadQpLambdaP), m_sadQpLambdaP, sizeof(m_sadQpLambdaP));
3550 
3551     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3552     {
3553         const int numEstrateThreshlds = 7;
3554 
3555         for (int i = 0; i < numEstrateThreshlds + 1; i++)
3556         {
3557             for (int j = 0; j < m_numDevThreshlds + 1; j++)
3558             {
3559                 hucConstData->FrmSzAdjTabI_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszI[j][i];
3560                 hucConstData->FrmSzAdjTabP_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszP[j][i];
3561                 hucConstData->FrmSzAdjTabB_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszB[j][i];
3562             }
3563         }
3564     }
3565 
3566     // ModeCosts depends on frame type
3567     if (m_pictureCodingType == I_TYPE)
3568     {
3569         MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsIFrame), m_hucModeCostsIFrame, sizeof(m_hucModeCostsIFrame));
3570     }
3571     else
3572     {
3573         MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsPbFrame), m_hucModeCostsPbFrame, sizeof(m_hucModeCostsPbFrame));
3574     }
3575 
3576     // starting location in batch buffer for each slice
3577     uint32_t baseLocation = 0; // base location is 0 after move Group3 cmds to region12
3578     uint32_t currentLocation = baseLocation;
3579 
3580     auto slcData = m_slcData;
3581     // HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
3582     for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
3583     {
3584         auto hevcSlcParams = &m_hevcSliceParams[slcCount];
3585         // HuC FW require unit in Bytes
3586         hucConstData->Slice[slcCount].SizeOfCMDs
3587             = (uint16_t)(m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount]);
3588 
3589         // HCP_WEIGHTOFFSET_STATE cmd
3590         if (m_hevcVdencWeightedPredEnabled)
3591         {
3592             // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
3593             if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3594             {
3595                 hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = (uint16_t)currentLocation;   // HCP_WEIGHTOFFSET_L0 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
3596                 currentLocation += m_hcpWeightOffsetStateCmdSize;
3597             }
3598 
3599             // 2nd HCP_WEIGHTOFFSET_STATE cmd - B
3600             if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3601             {
3602                 hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L1 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
3603                 currentLocation += m_hcpWeightOffsetStateCmdSize;
3604             }
3605         }
3606         else
3607         {
3608             // 0xFFFF means unavailable in SLB
3609             hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = 0xFFFF;
3610             hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = 0xFFFF;
3611         }
3612 
3613         // HCP_SLICE_STATE cmd
3614         hucConstData->Slice[slcCount].SliceState_StartInBytes = (uint16_t)currentLocation;  // HCP_WEIGHTOFFSET is not needed
3615         currentLocation += m_hcpSliceStateCmdSize;
3616 
3617         // VDENC_WEIGHT_OFFSETS_STATE cmd
3618         hucConstData->Slice[slcCount].VdencWeightOffset_StartInBytes                      // VdencWeightOffset cmd is the last one expect BatchBufferEnd cmd
3619             = (uint16_t)(baseLocation + hucConstData->Slice[slcCount].SizeOfCMDs - m_vdencWeightOffsetStateCmdSize - m_miBatchBufferEndCmdSize - ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4);
3620 
3621         // logic from PakInsertObject cmd
3622         uint32_t bitSize                = (m_hevcSeqParams->SliceSizeControl) ? (hevcSlcParams->BitLengthSliceHeaderStartingPortion) : slcData[slcCount].BitSize;  // 40 for HEVC VDEnc Dynamic Slice
3623         uint32_t sliceHeaderSizeInBytes = (bitSize + 7) >> 3;
3624 
3625         // 1st PakInsertObject cmd with AU, SPS, PPS headers only exists for the first slice
3626         if (slcCount == 0)
3627         {
3628             // assumes that there is no 3rd PakInsertObject cmd for SSC
3629             currentLocation += m_1stPakInsertObjectCmdSize;
3630         }
3631 
3632         hucConstData->Slice[slcCount].SliceHeaderPIO_StartInBytes = (uint16_t)currentLocation;
3633 
3634         // HuC FW requires true slice header size in bits without byte alignment
3635         hucConstData->Slice[slcCount].SliceHeader_SizeInBits = (uint16_t)(sliceHeaderSizeInBytes * 8);
3636         if (!IsFirstPass())
3637         {
3638             PBSBuffer bsBuffer = &m_bsBuffer;
3639             CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer);
3640             CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer->pBase);
3641             uint8_t *sliceHeaderLastByte = (uint8_t*)(bsBuffer->pBase + slcData[slcCount].SliceOffset + sliceHeaderSizeInBytes - 1);
3642             for (auto i = 0; i < 8; i++)
3643             {
3644                 uint8_t mask = 1 << i;
3645                 if (*sliceHeaderLastByte & mask)
3646                 {
3647                     hucConstData->Slice[slcCount].SliceHeader_SizeInBits -= (i+1);
3648                     break;
3649                 }
3650             }
3651         }
3652         if (m_hevcVdencWeightedPredEnabled)
3653         {
3654             hucConstData->Slice[slcCount].WeightTable_StartInBits = (uint16_t)hevcSlcParams->PredWeightTableBitOffset;
3655             hucConstData->Slice[slcCount].WeightTable_EndInBits = (uint16_t)(hevcSlcParams->PredWeightTableBitOffset + (hevcSlcParams->PredWeightTableBitLength));
3656         }
3657         else
3658         {
3659             // number of bits from beginning of slice header, 0xffff means not awailable
3660             hucConstData->Slice[slcCount].WeightTable_StartInBits = 0xFFFF;
3661             hucConstData->Slice[slcCount].WeightTable_EndInBits = 0xFFFF;
3662         }
3663 
3664         baseLocation += hucConstData->Slice[slcCount].SizeOfCMDs;
3665         currentLocation = baseLocation;
3666     }
3667 
3668     // Add motion apatative settings
3669     if (m_enableMotionAdaptive)
3670     {
3671         MOS_SecureMemcpy(hucConstData->QPAdaptiveWeight, sizeof(m_qpAdaptiveWeight), m_qpAdaptiveWeight, sizeof(m_qpAdaptiveWeight));
3672         MOS_SecureMemcpy(hucConstData->boostTable, sizeof(m_boostTable), m_boostTable, sizeof(m_boostTable));
3673     }
3674     else
3675     {
3676         MOS_ZeroMemory(hucConstData->QPAdaptiveWeight, sizeof(m_qpAdaptiveWeight));
3677         MOS_ZeroMemory(hucConstData->boostTable, sizeof(m_boostTable));
3678     }
3679 
3680     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]);
3681 
3682     return eStatus;
3683 }
3684 
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)3685 MOS_STATUS CodechalVdencHevcStateG11::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
3686 {
3687     CODECHAL_ENCODE_FUNCTION_ENTER;
3688 
3689     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3690 
3691     int32_t currentPass = GetCurrentPass();
3692     if (currentPass < 0)
3693     {
3694         eStatus = MOS_STATUS_INVALID_PARAMETER;
3695         return eStatus;
3696     }
3697 
3698     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams));
3699 
3700     // With multiple tiles, ensure that HuC BRC kernel is fed with vdenc frame level statistics from HuC PAK Int kernel
3701     // Applicable for scalable/ non-scalable mode
3702     if (m_hevcPicParams->tiles_enabled_flag)
3703     {
3704         virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1  VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
3705         virtualAddrParams->regionParams[1].dwOffset   = m_hevcFrameStatsOffset.uiVdencStatistics;
3706     }
3707 
3708     if (m_numPipe > 1)
3709     {
3710         virtualAddrParams->regionParams[2].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 2  PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
3711         virtualAddrParams->regionParams[2].dwOffset   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
3712         virtualAddrParams->regionParams[7].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 7  Slice Stat Streamout (Input)
3713         virtualAddrParams->regionParams[7].dwOffset   = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
3714         // In scalable-mode, use PAK Integration kernel output to get bistream size
3715         virtualAddrParams->regionParams[8].presRegion   = &m_resBrcDataBuffer;
3716     }
3717 
3718     virtualAddrParams->regionParams[12].presRegion = &m_vdencGroup3BatchBuffer[m_currRecycledBufIdx][currentPass];        // Region 12 - SLB buffer for group 3 (Input)
3719 
3720     // Tile reset case, use previous frame BRC data
3721     if ((m_numPipe != m_numPipePre) && IsFirstPass())
3722     {
3723         if (m_numPipePre > 1)
3724         {
3725             virtualAddrParams->regionParams[8].presRegion   = &m_resBrcDataBuffer;
3726         }
3727         else
3728         {
3729             virtualAddrParams->regionParams[8].presRegion   = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
3730         }
3731     }
3732 
3733     return eStatus;
3734 }
3735 
SetDmemHuCBrcUpdate()3736 MOS_STATUS CodechalVdencHevcStateG11::SetDmemHuCBrcUpdate()
3737 {
3738     CODECHAL_ENCODE_FUNCTION_ENTER;
3739 
3740     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3741 
3742     MOS_LOCK_PARAMS lockFlagsWriteOnly;
3743     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3744     lockFlagsWriteOnly.WriteOnly = true;
3745 
3746     // Program update DMEM
3747     auto hucVDEncBrcUpdateDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G11)m_osInterface->pfnLockResource(
3748         m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()], &lockFlagsWriteOnly);
3749     CODECHAL_ENCODE_CHK_NULL_RETURN(hucVDEncBrcUpdateDmem);
3750     MOS_ZeroMemory(hucVDEncBrcUpdateDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G11));
3751 
3752     hucVDEncBrcUpdateDmem->TARGETSIZE_U32 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)? m_hevcSeqParams->InitVBVBufferFullnessInBit :
3753                                             MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
3754     hucVDEncBrcUpdateDmem->FrameID_U32 = m_storeData;    // frame number
3755     MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->startGAdjFrame_U16, 4 * sizeof(uint16_t), (void*)m_startGAdjFrame, 4 * sizeof(uint16_t));
3756     hucVDEncBrcUpdateDmem->TargetSliceSize_U16           = (uint16_t)m_hevcPicParams->MaxSliceSizeInBytes;
3757     auto slbSliceSize = (m_hwInterface->m_vdenc2ndLevelBatchBufferSize - m_hwInterface->m_vdencBatchBuffer1stGroupSize -
3758         m_hwInterface->m_vdencBatchBuffer2ndGroupSize) / ENCODE_HEVC_VDENC_NUM_MAX_SLICES;
3759     hucVDEncBrcUpdateDmem->SLB_Data_SizeInBytes = (uint16_t)(slbSliceSize * m_numSlices);
3760     hucVDEncBrcUpdateDmem->PIPE_MODE_SELECT_StartInBytes = 0xFFFF;    // HuC need not need to modify the pipe mode select command in Gen11+
3761     hucVDEncBrcUpdateDmem->CMD1_StartInBytes = (uint16_t)m_hwInterface->m_vdencBatchBuffer1stGroupSize;
3762     hucVDEncBrcUpdateDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes;
3763     hucVDEncBrcUpdateDmem->CMD2_StartInBytes = (uint16_t)m_cmd2StartInBytes;
3764 
3765     uint16_t circularFrameIdx = (m_storeData - 1) % 4;
3766 
3767     // initial order before circular shift: current, ref0, ref1, ref2 = 0, 3, 2, 1
3768     // different initial order can be used, but this order (0, 3, 2, 1) is kernel recommendation
3769     hucVDEncBrcUpdateDmem->Current_Data_Offset = ((0 + circularFrameIdx) % 4) * m_weightHistSize;
3770     hucVDEncBrcUpdateDmem->Ref_Data_Offset[0] = ((3 + circularFrameIdx) % 4) * m_weightHistSize;
3771     hucVDEncBrcUpdateDmem->Ref_Data_Offset[1] = ((2 + circularFrameIdx) % 4) * m_weightHistSize;
3772     hucVDEncBrcUpdateDmem->Ref_Data_Offset[2] = ((1 + circularFrameIdx) % 4) * m_weightHistSize;
3773 
3774     hucVDEncBrcUpdateDmem->MaxNumSliceAllowed_U16 = (uint16_t)GetMaxAllowedSlices(m_hevcSeqParams->Level);
3775     hucVDEncBrcUpdateDmem->OpMode_U8         // 1: BRC (including ACQP), 2: Weighted prediction (should not be enabled in first pass)
3776         = (m_hevcVdencWeightedPredEnabled && m_hevcPicParams->bEnableGPUWeightedPrediction && !IsFirstPass()) ? 3 : 1;    // 01: BRC, 10: WP never used,  11: BRC + WP
3777 
3778     // LowDelay B needs to be considered as P frame although wPictureCodingType=3
3779     // wPictureCodingType I:1, P:2, B:3 -> CurrentFrameType I:2, P:0, B:1
3780     hucVDEncBrcUpdateDmem->CurrentFrameType_U8 = (m_pictureCodingType == I_TYPE) ? 2 : 0;
3781 
3782     // Num_Ref_L1 should be always same as Num_Ref_L0
3783     hucVDEncBrcUpdateDmem->Num_Ref_L0_U8 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
3784     hucVDEncBrcUpdateDmem->Num_Ref_L1_U8 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
3785     hucVDEncBrcUpdateDmem->Num_Slices    = (uint8_t)m_hevcPicParams->NumSlices;
3786 
3787     // CQP_QPValue_U8 setting is needed since ACQP is also part of ICQ
3788     hucVDEncBrcUpdateDmem->CQP_QPValue_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3789     hucVDEncBrcUpdateDmem->CQP_FracQP_U8 = 0;
3790     if (m_hevcPicParams->BRCPrecision == 1)
3791     {
3792         hucVDEncBrcUpdateDmem->MaxNumPass_U8 = 1;
3793     }
3794     else
3795     {
3796         hucVDEncBrcUpdateDmem->MaxNumPass_U8 = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
3797     }
3798 
3799     MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->gRateRatioThreshold_U8, 7 * sizeof(uint8_t), (void*)m_rateRatioThreshold, 7 * sizeof(uint8_t));
3800     MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->startGAdjMult_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjMult, 5 * sizeof(uint8_t));
3801     MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->startGAdjDiv_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjDiv, 5 * sizeof(uint8_t));
3802     MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->gRateRatioThresholdQP_U8, 8 * sizeof(uint8_t), (void*)m_rateRatioThresholdQP, 8 * sizeof(uint8_t));
3803 
3804     hucVDEncBrcUpdateDmem->IPAverageCoeff_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) ? 0 : 64;
3805     hucVDEncBrcUpdateDmem->CurrentPass_U8 = (uint8_t) GetCurrentPass();
3806 
3807     if ((m_hevcVdencAcqpEnabled && m_hevcSeqParams->QpAdjustment) || (m_brcEnabled && (m_hevcSeqParams->MBBRC != 2)))
3808     {
3809         hucVDEncBrcUpdateDmem->DeltaQPForSadZone0_S8 = -1;
3810         hucVDEncBrcUpdateDmem->DeltaQPForSadZone1_S8 = 0;
3811         hucVDEncBrcUpdateDmem->DeltaQPForSadZone2_S8 = 1;
3812         hucVDEncBrcUpdateDmem->DeltaQPForSadZone3_S8 = 2;
3813         hucVDEncBrcUpdateDmem->DeltaQPForMvZero_S8   = 3;
3814         hucVDEncBrcUpdateDmem->DeltaQPForMvZone0_S8  = -2;
3815         hucVDEncBrcUpdateDmem->DeltaQPForMvZone1_S8  = 0;
3816         hucVDEncBrcUpdateDmem->DeltaQPForMvZone2_S8  = 2;
3817     }
3818 
3819     if (m_hevcVdencWeightedPredEnabled)
3820     {
3821         hucVDEncBrcUpdateDmem->LumaLog2WeightDenom_S8 = 6;
3822         hucVDEncBrcUpdateDmem->ChromaLog2WeightDenom_S8 = 6;
3823     }
3824 
3825     // chroma weights are not confirmed to be supported from HW team yet
3826     hucVDEncBrcUpdateDmem->DisabledFeature_U8 = 0; // bit mask, 1 (bit0): disable chroma weight setting
3827 
3828     hucVDEncBrcUpdateDmem->SlidingWindow_Enable_U8          = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
3829     hucVDEncBrcUpdateDmem->LOG_LCU_Size_U8                  = 6;
3830     hucVDEncBrcUpdateDmem->RDOQ_Enable_U8                   = m_hevcRdoqEnabled ? (m_pictureCodingType == I_TYPE ? m_hevcIFrameRdoqEnabled : 1) : 0;
3831     hucVDEncBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8    = 4;
3832     hucVDEncBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8    = -5;
3833     hucVDEncBrcUpdateDmem->SceneChgPrevIntraPctThreshold_U8 = 96;
3834     hucVDEncBrcUpdateDmem->SceneChgCurIntraPctThreshold_U8  = 192;
3835 
3836     hucVDEncBrcUpdateDmem->EnableMotionAdaptive             = m_enableMotionAdaptive;
3837     hucVDEncBrcUpdateDmem->FrameSizeBoostForSceneChange     = 1;
3838     hucVDEncBrcUpdateDmem->ROMCurrent                       = 0;
3839     hucVDEncBrcUpdateDmem->ROMZero                          = 0;
3840     hucVDEncBrcUpdateDmem->TargetFrameSize                  = m_hevcPicParams->TargetFrameSize << 3;// byte to bit
3841 
3842     // reset skip frame statistics
3843     m_numSkipFrames = 0;
3844     m_sizeSkipFrames = 0;
3845 
3846     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()]);
3847 
3848     return eStatus;
3849 }
3850 
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)3851 void CodechalVdencHevcStateG11::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
3852 {
3853     CODECHAL_ENCODE_FUNCTION_ENTER;
3854 
3855     CodechalVdencHevcState::SetHcpSliceStateCommonParams(sliceStateParams);
3856 
3857     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11 &>(sliceStateParams).dwNumPipe = m_numPipe;
3858 }
3859 
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,bool lastSliceInTile,uint32_t idx)3860 void CodechalVdencHevcStateG11::SetHcpSliceStateParams(
3861     MHW_VDBOX_HEVC_SLICE_STATE&           sliceState,
3862     PCODEC_ENCODER_SLCDATA                slcData,
3863     uint16_t                              slcCount,
3864     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,
3865     bool                                  lastSliceInTile,
3866     uint32_t                              idx)
3867 {
3868     CODECHAL_ENCODE_FUNCTION_ENTER;
3869 
3870     CodechalEncodeHevcBase::SetHcpSliceStateParams(sliceState, slcData, slcCount);
3871 
3872     sliceState.bLastSliceInTile = lastSliceInTile ? true : false;
3873     sliceState.bLastSliceInTileColumn = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false;
3874     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).pTileCodingParams = tileCodingParams + idx;
3875     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).dwTileID = idx;
3876 }
3877 
SetKernelParams(EncOperation operation,MHW_KERNEL_PARAM * kernelParams)3878 MOS_STATUS CodechalVdencHevcStateG11::SetKernelParams(
3879     EncOperation operation,
3880     MHW_KERNEL_PARAM* kernelParams)
3881 {
3882     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3883 
3884     CODECHAL_ENCODE_FUNCTION_ENTER;
3885 
3886     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
3887 
3888     auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
3889 
3890     kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
3891     kernelParams->iIdCount = 1;
3892 
3893     switch (operation)
3894     {
3895     case VDENC_ME_P:
3896     case VDENC_ME_B:
3897     case VDENC_STREAMIN:
3898     case VDENC_STREAMIN_HEVC:
3899         kernelParams->iBTCount = CODECHAL_VDENC_HME_END_G11 - CODECHAL_VDENC_HME_BEGIN_G11;
3900         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_HEVC_VP9_VDENC_ME_CURBE_G11), (size_t)curbeAlignment);
3901         kernelParams->iBlockWidth = 32;
3902         kernelParams->iBlockHeight = 32;
3903         break;
3904     default:
3905         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
3906         eStatus = MOS_STATUS_INVALID_PARAMETER;
3907     }
3908 
3909     return eStatus;
3910 }
3911 
SetBindingTable(EncOperation operation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)3912 MOS_STATUS CodechalVdencHevcStateG11::SetBindingTable(
3913     EncOperation operation,
3914     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)
3915 {
3916     CODECHAL_ENCODE_FUNCTION_ENTER;
3917 
3918     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
3919 
3920     CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
3921 
3922     MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
3923 
3924     switch (operation)
3925     {
3926         case VDENC_ME_P:
3927         case VDENC_ME_B:
3928         case VDENC_STREAMIN:
3929         case VDENC_STREAMIN_HEVC:
3930             bindingTable->dwNumBindingTableEntries = CODECHAL_VDENC_HME_END_G11 - CODECHAL_VDENC_HME_BEGIN_G11;
3931             bindingTable->dwBindingTableStartOffset = CODECHAL_VDENC_HME_BEGIN_G11;
3932             break;
3933         default:
3934             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
3935             return MOS_STATUS_INVALID_PARAMETER;
3936     }
3937 
3938     for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
3939     {
3940         bindingTable->dwBindingTableEntries[i] = i;
3941     }
3942     return eStatus;
3943 }
3944 
EncodeMeKernel(HmeLevel hmeLevel)3945 MOS_STATUS CodechalVdencHevcStateG11::EncodeMeKernel(HmeLevel hmeLevel)
3946 {
3947     CODECHAL_ENCODE_FUNCTION_ENTER;
3948 
3949     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
3950 
3951     auto kernelState = (hmeLevel == HME_LEVEL_4x) ? &m_vdencStreaminKernelState : &m_vdencMeKernelState;
3952     auto encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
3953         (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
3954 
3955     // If Single Task Phase is not enabled, use BT count for the kernel state.
3956     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3957     {
3958         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
3959             m_maxBtCount : kernelState->KernelParams.iBTCount;
3960         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
3961             m_stateHeapInterface,
3962             maxBtCount));
3963         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
3964         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
3965     }
3966 
3967     // Set up the DSH/SSH as normal
3968     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3969         m_stateHeapInterface,
3970         kernelState,
3971         false,
3972         0,
3973         false,
3974         m_storeData));
3975 
3976     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
3977     MOS_ZeroMemory(&idParams, sizeof(idParams));
3978     idParams.pKernelState = kernelState;
3979     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
3980         m_stateHeapInterface,
3981         1,
3982         &idParams));
3983 
3984     //Setup curbe for StreamIn Kernel
3985     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbe(hmeLevel));
3986 
3987     CODECHAL_DEBUG_TOOL(
3988     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
3989         encFunctionType,
3990         MHW_DSH_TYPE,
3991         kernelState));
3992     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
3993         encFunctionType,
3994         kernelState));
3995     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
3996         encFunctionType,
3997         MHW_ISH_TYPE,
3998         kernelState));
3999     )
4000 
4001         MOS_COMMAND_BUFFER cmdBuffer;
4002     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
4003 
4004     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
4005     sendKernelCmdsParams.EncFunctionType = encFunctionType;
4006     sendKernelCmdsParams.pKernelState = kernelState;
4007     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
4008 
4009     // Add binding table
4010     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
4011         m_stateHeapInterface,
4012         kernelState));
4013 
4014     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(hmeLevel, &cmdBuffer));
4015 
4016     // Dump SSH for ME kernel
4017     CODECHAL_DEBUG_TOOL(
4018         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
4019             encFunctionType,
4020             MHW_SSH_TYPE,
4021             kernelState)));
4022 
4023     uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
4024         (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
4025 
4026     uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
4027     uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
4028 
4029     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
4030     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
4031     walkerCodecParams.WalkerMode = m_walkerMode;
4032     walkerCodecParams.dwResolutionX = resolutionX;
4033     walkerCodecParams.dwResolutionY = resolutionY;
4034     walkerCodecParams.bNoDependency = true;
4035     walkerCodecParams.bMbaff = false;
4036     walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
4037     walkerCodecParams.ucGroupId = m_groupId;
4038 
4039     MHW_WALKER_PARAMS walkerParams;
4040     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
4041         m_hwInterface,
4042         &walkerParams,
4043         &walkerCodecParams));
4044 
4045     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
4046         &cmdBuffer,
4047         &walkerParams));
4048 
4049     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
4050 
4051     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
4052     {
4053         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
4054             m_stateHeapInterface));
4055         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
4056     }
4057 
4058     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
4059         &cmdBuffer,
4060         encFunctionType,
4061         nullptr)));
4062 
4063     m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
4064 
4065     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
4066 
4067     MHW_MI_STORE_DATA_PARAMS    storeDataParams;
4068 
4069     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
4070     {
4071         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
4072         m_lastTaskInPhase = false;
4073     }
4074     return eStatus;
4075 }
4076 
SetMeCurbe(HmeLevel hmeLevel)4077 MOS_STATUS CodechalVdencHevcStateG11::SetMeCurbe(HmeLevel hmeLevel)
4078 {
4079     CODECHAL_ENCODE_FUNCTION_ENTER;
4080 
4081     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4082 
4083     CODECHAL_VDENC_HEVC_ME_CURBE_G11 curbe;
4084     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4085         &curbe,
4086         sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G11),
4087         ME_CURBE_INIT,
4088         sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G11)));
4089 
4090     PMHW_KERNEL_STATE kernelState = (hmeLevel == HME_LEVEL_4x) ? &m_vdencStreaminKernelState : &m_vdencMeKernelState;
4091     bool useMvFromPrevStep;
4092     bool writeDistortions;
4093     uint32_t scaleFactor;
4094     uint32_t  mvShiftFactor = 0;
4095     uint32_t  prevMvReadPosFactor = 0;
4096 
4097     switch (hmeLevel)
4098     {
4099     case HME_LEVEL_32x:
4100         useMvFromPrevStep = false;
4101         writeDistortions = false;
4102         scaleFactor = SCALE_FACTOR_32x;
4103         mvShiftFactor = 1;
4104         prevMvReadPosFactor = 0;
4105         break;
4106     case HME_LEVEL_16x:
4107         useMvFromPrevStep = (m_b32XMeEnabled) ? true : false;
4108         writeDistortions = false;
4109         scaleFactor = SCALE_FACTOR_16x;
4110         mvShiftFactor = 2;
4111         prevMvReadPosFactor = 1;
4112         break;
4113     case HME_LEVEL_4x:
4114         useMvFromPrevStep = (m_b16XMeEnabled) ? true : false;
4115         writeDistortions = true;
4116         scaleFactor = SCALE_FACTOR_4x;
4117         mvShiftFactor = 2;
4118         prevMvReadPosFactor = 0;
4119         break;
4120     default:
4121         eStatus = MOS_STATUS_INVALID_PARAMETER;
4122         return eStatus;
4123         break;
4124     }
4125 
4126     curbe.DW3.SubPelMode = 3;
4127     curbe.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
4128     curbe.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
4129     curbe.DW5.QpPrimeY            = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4130     curbe.DW6.WriteDistortions = writeDistortions;
4131     curbe.DW6.UseMvFromPrevStep = useMvFromPrevStep;
4132     curbe.DW6.SuperCombineDist = 5;//SuperCombineDist_Generic[pHevcSeqParams->TargetUsage]; Harded coded in KCM
4133     curbe.DW6.MaxVmvR = 511 * 4;
4134     curbe.DW15.MvShiftFactor = mvShiftFactor;
4135     curbe.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
4136 
4137     if (m_pictureCodingType == B_TYPE)
4138     {
4139         // This field is irrelevant since we are not using the bi-direct search.
4140         // set it to 32 to match
4141         curbe.DW1.BiWeight = m_bframeMeBidirectionalWeight;
4142         curbe.DW13.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
4143     }
4144 
4145     if (m_pictureCodingType == P_TYPE || m_pictureCodingType == B_TYPE)
4146     {
4147         curbe.DW13.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
4148     }
4149 
4150     if (hmeLevel == HME_LEVEL_4x)
4151     {
4152         curbe.DW30.ActualMBHeight = m_frameHeight;
4153         curbe.DW30.ActualMBWidth = m_frameWidth;
4154     }
4155     else
4156     {
4157         curbe.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
4158         curbe.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
4159     }
4160 
4161     curbe.DW13.RefStreaminCost = 0;
4162     // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
4163     curbe.DW13.ROIEnable = 0;
4164 
4165     uint8_t meMethod = (m_pictureCodingType == B_TYPE) ? m_bMeMethodGeneric[m_hevcSeqParams->TargetUsage] : m_meMethodGeneric[m_hevcSeqParams->TargetUsage];
4166     uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
4167     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe.SPDelta), 14 * sizeof(uint32_t),
4168         m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t)));
4169 
4170     if (hmeLevel == HME_LEVEL_4x)
4171     {
4172         //StreamIn CURBE
4173         curbe.DW6.LCUSize            = 1;//Only LCU64 supported by the VDEnc HW
4174         // Kernel should use driver-prepared stream-in surface during ROI/ MBQP(LCUQP)/ Dirty-Rect
4175         curbe.DW6.InputStreamInEn    = (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)));
4176         curbe.DW31.MaxCuSize         = 3;
4177         curbe.DW31.MaxTuSize         = 3;
4178         switch (m_hevcSeqParams->TargetUsage)
4179         {
4180         case 1:
4181         case 4:
4182             curbe.DW36.NumMergeCandCu64x64    = 4;
4183             curbe.DW36.NumMergeCandCu32x32    = 3;
4184             curbe.DW36.NumMergeCandCu16x16    = 2;
4185             curbe.DW36.NumMergeCandCu8x8      = 1;
4186             curbe.DW31.NumImePredictors       = m_imgStateImePredictors;
4187             break;
4188         case 7:
4189             curbe.DW36.NumMergeCandCu64x64    = 2;
4190             curbe.DW36.NumMergeCandCu32x32    = 2;
4191             curbe.DW36.NumMergeCandCu16x16    = 2;
4192             curbe.DW36.NumMergeCandCu8x8      = 0;
4193             curbe.DW31.NumImePredictors       = 4;
4194             break;
4195         }
4196     }
4197 
4198     curbe.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G11;
4199     curbe.DW41._16xOr32xMeMvInputDataSurfIndex = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G11 : CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G11;
4200     curbe.DW42._4xMeOutputDistSurfIndex = CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G11;
4201     curbe.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_VDENC_HME_BRC_DISTORTION_CM_G11;
4202     curbe.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G11;
4203     curbe.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G11;
4204     curbe.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G11;
4205     curbe.DW47.VDEncStreamInInputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G11;
4206 
4207     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
4208         &curbe,
4209         kernelState->dwCurbeOffset,
4210         sizeof(curbe)));
4211 
4212     return eStatus;
4213 }
4214 
SendMeSurfaces(HmeLevel hmeLevel,PMOS_COMMAND_BUFFER cmdBuffer)4215 MOS_STATUS CodechalVdencHevcStateG11::SendMeSurfaces(HmeLevel hmeLevel, PMOS_COMMAND_BUFFER cmdBuffer)
4216 {
4217     CODECHAL_ENCODE_FUNCTION_ENTER;
4218     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4219 
4220     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4221 
4222     MOS_SURFACE *meMvDataBuffer;
4223     uint32_t downscaledWidthInMb;
4224     uint32_t downscaledHeightInMb;
4225 
4226     if (hmeLevel == HME_LEVEL_32x)
4227     {
4228         meMvDataBuffer = &m_s32XMeMvDataBuffer;
4229         downscaledWidthInMb = m_downscaledWidthInMb32x;
4230         downscaledHeightInMb = m_downscaledHeightInMb32x;
4231     }
4232     else if (hmeLevel == HME_LEVEL_16x)
4233     {
4234         meMvDataBuffer = &m_s16XMeMvDataBuffer;
4235         downscaledWidthInMb = m_downscaledWidthInMb16x;
4236         downscaledHeightInMb = m_downscaledHeightInMb16x;
4237     }
4238     else
4239     {
4240         meMvDataBuffer = &m_s4XMeMvDataBuffer;
4241         downscaledWidthInMb = m_downscaledWidthInMb4x;
4242         downscaledHeightInMb = m_downscaledHeightInMb4x;
4243     }
4244 
4245     auto width = MOS_ALIGN_CEIL(downscaledWidthInMb * 32, 64);
4246     auto height = downscaledHeightInMb * 4 * 10;
4247     // Force the values
4248     meMvDataBuffer->dwWidth = width;
4249     meMvDataBuffer->dwHeight = height;
4250     meMvDataBuffer->dwPitch = width;
4251 
4252     auto kernelState = (hmeLevel == HME_LEVEL_4x) ? &m_vdencStreaminKernelState : &m_vdencMeKernelState;
4253     auto bindingTable = (hmeLevel == HME_LEVEL_4x) ?
4254         &m_vdencStreaminKernelBindingTable : &m_vdencMeKernelBindingTable;
4255     uint32_t meMvBottomFieldOffset = 0;
4256 
4257     CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4258     MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4259     surfaceCodecParams.bIs2DSurface = true;
4260     surfaceCodecParams.bMediaBlockRW = true;
4261     surfaceCodecParams.psSurface = meMvDataBuffer;
4262     surfaceCodecParams.dwOffset = meMvBottomFieldOffset;
4263     surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
4264     surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G11];
4265     surfaceCodecParams.bIsWritable = true;
4266     surfaceCodecParams.bRenderTarget = true;
4267     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4268         m_hwInterface,
4269         cmdBuffer,
4270         &surfaceCodecParams,
4271         kernelState));
4272 
4273     if (hmeLevel == HME_LEVEL_16x && m_b32XMeEnabled)
4274     {
4275         // Pass 32x MV to 16x ME operation
4276         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4277         surfaceCodecParams.bIs2DSurface = true;
4278         surfaceCodecParams.bMediaBlockRW = true;
4279         surfaceCodecParams.psSurface = &m_s32XMeMvDataBuffer;
4280         surfaceCodecParams.dwOffset = 0;
4281         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
4282         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G11];
4283         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4284             m_hwInterface,
4285             cmdBuffer,
4286             &surfaceCodecParams,
4287             kernelState));
4288     }
4289     else if (!(hmeLevel == HME_LEVEL_32x) && m_b16XMeEnabled)
4290     {
4291         // Pass 16x MV to 4x ME operation
4292         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4293         surfaceCodecParams.bIs2DSurface = true;
4294         surfaceCodecParams.bMediaBlockRW = true;
4295         surfaceCodecParams.psSurface = &m_s16XMeMvDataBuffer;
4296         surfaceCodecParams.dwOffset = 0;
4297         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
4298         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G11];
4299         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4300             m_hwInterface,
4301             cmdBuffer,
4302             &surfaceCodecParams,
4303             kernelState));
4304 
4305         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4306         surfaceCodecParams.bIs2DSurface = true;
4307         surfaceCodecParams.bMediaBlockRW = true;
4308         surfaceCodecParams.psSurface = &m_s4XMeDistortionBuffer;
4309         surfaceCodecParams.dwOffset = 0;
4310         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G11];
4311         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
4312         surfaceCodecParams.bIsWritable = true;
4313         surfaceCodecParams.bRenderTarget = true;
4314         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4315             m_hwInterface,
4316             cmdBuffer,
4317             &surfaceCodecParams,
4318             kernelState));
4319     }
4320 
4321     PMOS_SURFACE currScaledSurface = (hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) :
4322         ((hmeLevel == HME_LEVEL_16x) ? m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER) : m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER));
4323     MOS_SURFACE refScaledSurface = *currScaledSurface;
4324     bool currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic) ? true : false;
4325     bool currBottomField = CodecHal_PictureIsBottomField(m_currOriginalPic) ? true : false;
4326 
4327     uint8_t currVDirection = (!currFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
4328         ((currBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
4329     uint32_t currScaledBottomFieldOffset = (hmeLevel == HME_LEVEL_4x) ?
4330         (uint32_t)m_scaledBottomFieldOffset : ((hmeLevel == HME_LEVEL_16x) ? (uint32_t)m_scaled16xBottomFieldOffset : (uint32_t)m_scaled32xBottomFieldOffset);
4331 
4332     // Setup references 1...n
4333     // LIST 0 references
4334     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
4335     {
4336         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx];
4337 
4338         if (!CodecHal_PictureIsInvalid(refPic))
4339         {
4340             if (refIdx == 0)
4341             {
4342                 // Current Picture Y - VME
4343                 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4344                 surfaceCodecParams.bUseAdvState = true;
4345                 surfaceCodecParams.psSurface = currScaledSurface;
4346                 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
4347                 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4348                 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G11];
4349                 surfaceCodecParams.ucVDirection = currVDirection;
4350                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4351                     m_hwInterface,
4352                     cmdBuffer,
4353                     &surfaceCodecParams,
4354                     kernelState));
4355             }
4356 
4357             bool refFieldPicture = CodecHal_PictureIsField(refPic) ? true : false;
4358             bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? true : false;
4359             uint8_t refPicIdx       = m_picIdx[refPic.FrameIdx].ucPicIdx;
4360             uint8_t scaledIdx       = m_refList[refPicIdx]->ucScalingIdx;
4361             if (hmeLevel == HME_LEVEL_4x)
4362             {
4363                 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
4364             }
4365             else if (hmeLevel == HME_LEVEL_16x)
4366             {
4367                 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
4368             }
4369             else
4370             {
4371                 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
4372             }
4373             uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
4374 
4375             // L0 Reference Picture Y - VME
4376             MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4377             surfaceCodecParams.bUseAdvState = true;
4378             surfaceCodecParams.psSurface = &refScaledSurface;
4379             surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
4380             surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4381             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_FWD_REF_IDX0_CM_G11 + (refIdx * 2)];
4382             surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
4383                 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
4384             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4385                 m_hwInterface,
4386                 cmdBuffer,
4387                 &surfaceCodecParams,
4388                 kernelState));
4389         }
4390     }
4391 
4392     //List1
4393     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
4394     {
4395         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx];
4396 
4397         if (!CodecHal_PictureIsInvalid(refPic))
4398         {
4399             if (refIdx == 0)
4400             {
4401                 // Current Picture Y - VME
4402                 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4403                 surfaceCodecParams.bUseAdvState = true;
4404                 surfaceCodecParams.psSurface = currScaledSurface;
4405                 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
4406                 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4407                 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G11];
4408                 surfaceCodecParams.ucVDirection = currVDirection;
4409                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4410                     m_hwInterface,
4411                     cmdBuffer,
4412                     &surfaceCodecParams,
4413                     kernelState));
4414             }
4415 
4416             bool refFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
4417             bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? 1 : 0;
4418             auto    refPicIdx       = m_picIdx[refPic.FrameIdx].ucPicIdx;
4419             uint8_t scaledIdx       = m_refList[refPicIdx]->ucScalingIdx;
4420 
4421             if (hmeLevel == HME_LEVEL_4x)
4422             {
4423                 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
4424             }
4425             else if (hmeLevel == HME_LEVEL_16x)
4426             {
4427                 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
4428             }
4429             else
4430             {
4431                 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
4432             }
4433             uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
4434 
4435             // L1 Reference Picture Y - VME
4436             MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4437             surfaceCodecParams.bUseAdvState = true;
4438             surfaceCodecParams.psSurface = &refScaledSurface;
4439             surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
4440             surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4441             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_BWD_REF_IDX0_CM_G11 + (refIdx * 2)];
4442             surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
4443                 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
4444             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4445                 m_hwInterface,
4446                 cmdBuffer,
4447                 &surfaceCodecParams,
4448                 kernelState));
4449         }
4450     }
4451 
4452     if (hmeLevel == HME_LEVEL_4x)
4453     {
4454         CODECHAL_ENCODE_CHK_NULL_RETURN(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
4455 
4456         auto streamingSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
4457 
4458         // Send driver-prepared stream-in surface as input during ROI/ MBQP(LCUQP)/ Dirty-Rect
4459         if (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
4460         {
4461             MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4462             surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
4463             surfaceCodecParams.bIs2DSurface = false;
4464             surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4465             surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
4466             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G11];
4467             surfaceCodecParams.bIsWritable = true;
4468             surfaceCodecParams.bRenderTarget = true;
4469             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4470                 m_hwInterface,
4471                 cmdBuffer,
4472                 &surfaceCodecParams,
4473                 kernelState));
4474         }
4475         else    // Clear stream-in surface otherwise
4476         {
4477             MOS_LOCK_PARAMS lockFlags;
4478             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4479             lockFlags.WriteOnly = true;
4480 
4481             auto data = m_osInterface->pfnLockResource(
4482                 m_osInterface,
4483                 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
4484                 &lockFlags);
4485 
4486             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4487 
4488             MOS_ZeroMemory(
4489                 data,
4490                 streamingSize);
4491 
4492             m_osInterface->pfnUnlockResource(
4493                 m_osInterface,
4494                 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
4495         }
4496 
4497         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4498         surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
4499         surfaceCodecParams.bIs2DSurface = false;
4500         surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4501         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
4502         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G11];
4503         surfaceCodecParams.bIsWritable = true;
4504         surfaceCodecParams.bRenderTarget = true;
4505         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4506             m_hwInterface,
4507             cmdBuffer,
4508             &surfaceCodecParams,
4509             kernelState));
4510     }
4511 
4512     return eStatus;
4513 }
4514 
4515 MOS_STATUS
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)4516 CodechalVdencHevcStateG11::GetKernelHeaderAndSize(
4517     void                           *binary,
4518     EncOperation                   operation,
4519     uint32_t                       krnStateIdx,
4520     void                           *krnHeader,
4521     uint32_t                       *krnSize)
4522 {
4523     CODECHAL_ENCODE_FUNCTION_ENTER;
4524 
4525     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4526 
4527     CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
4528     CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
4529     CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
4530 
4531     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(binary, operation, krnStateIdx, krnHeader, krnSize));
4532 
4533     return eStatus;
4534 }
4535 
GetSystemPipeNumberCommon()4536 MOS_STATUS CodechalVdencHevcStateG11::GetSystemPipeNumberCommon()
4537 {
4538     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4539 
4540     CODECHAL_ENCODE_FUNCTION_ENTER;
4541 
4542     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
4543     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4544 
4545     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
4546     statusKey = MOS_UserFeature_ReadValue_ID(
4547         nullptr,
4548         __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
4549         &userFeatureData,
4550         m_osInterface->pOsContext);
4551 
4552     bool disableScalability = m_hwInterface->IsDisableScalability();
4553     if (statusKey == MOS_STATUS_SUCCESS)
4554     {
4555         disableScalability = userFeatureData.i32Data ? true : false;
4556     }
4557 
4558     MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
4559     CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
4560 
4561     if (gtSystemInfo && disableScalability == false)
4562     {
4563         // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
4564         m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
4565     }
4566     else
4567     {
4568         m_numVdbox = 1;
4569     }
4570 
4571     CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d.", m_numVdbox);
4572 
4573     return eStatus;
4574 }
4575 
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)4576 MOS_STATUS CodechalVdencHevcStateG11::HucPakIntegrate(
4577     PMOS_COMMAND_BUFFER cmdBuffer)
4578 {
4579     CODECHAL_ENCODE_FUNCTION_ENTER;
4580 
4581     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4582 
4583     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4584 
4585     CODECHAL_ENCODE_CHK_COND_RETURN(
4586         (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
4587         "ERROR - vdbox index exceed the maximum");
4588 
4589     auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
4590 
4591     // load kernel from WOPCM into L2 storage RAM
4592     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
4593     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
4594     imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
4595 
4596     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
4597 
4598     // pipe mode select
4599     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
4600     pipeModeSelectParams.Mode = m_mode;
4601     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
4602 
4603     // DMEM set
4604     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
4605     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
4606     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
4607 
4608     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
4609     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
4610     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
4611 
4612     // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
4613     MHW_MI_STORE_DATA_PARAMS storeDataParams;
4614     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
4615     storeDataParams.pOsResource = &m_resHucStatus2Buffer;
4616     storeDataParams.dwResourceOffset = 0;
4617     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
4618     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4619 
4620     // Store HUC_STATUS2 register
4621     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
4622     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
4623     storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
4624     storeRegParams.dwOffset = sizeof(uint32_t);
4625     storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
4626     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
4627 
4628     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
4629 
4630     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
4631 
4632     // wait Huc completion (use HEVC bit for now)
4633     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
4634     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
4635     vdPipeFlushParams.Flags.bFlushHEVC = 1;
4636     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
4637     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
4638 
4639     // Flush the engine to ensure memory written out
4640     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
4641     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
4642     flushDwParams.bVideoPipelineCacheInvalidate = true;
4643     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
4644 
4645     EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
4646 
4647     uint32_t baseOffset =
4648         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
4649 
4650                                                                                              // Write HUC_STATUS mask
4651     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
4652     storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
4653     storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
4654     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
4655     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
4656         cmdBuffer,
4657         &storeDataParams));
4658 
4659     // store HUC_STATUS register
4660     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
4661     storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
4662     storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
4663     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
4664     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
4665         cmdBuffer,
4666         &storeRegParams));
4667 
4668     return eStatus;
4669 }
4670 
Initialize(CodechalSetting * settings)4671 MOS_STATUS CodechalVdencHevcStateG11::Initialize(CodechalSetting * settings)
4672 {
4673     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4674 
4675     CODECHAL_ENCODE_FUNCTION_ENTER;
4676 
4677     //create child class
4678     m_hucCmdInitializer = MOS_New(CodechalCmdInitializerG11, this);
4679 
4680     // common initilization
4681     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::Initialize(settings));
4682 
4683     m_deltaQpRoiBufferSize = m_deltaQpBufferSize;
4684     m_brcRoiBufferSize = m_roiStreamInBufferSize;
4685     m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) *
4686         CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
4687 
4688     // we need additional buffer for (1) 1 CL for size info at the beginning of each tile column (max of 4 vdbox in scalability mode)
4689     // (2) CL alignment at end of every tile column
4690     // as a result, increase the height by 1 for allocation purposes
4691     m_numLcu = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * (MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE) + 1);
4692     m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * (m_numLcu * 5 + m_numLcu * 64 * 8), CODECHAL_PAGE_SIZE);
4693     m_mbCodeSize += m_mvOffset;
4694 
4695     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
4696 
4697     if (MOS_VE_SUPPORTED(m_osInterface))
4698     {
4699         m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
4700         CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
4701         //scalability initialize
4702         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
4703     }
4704 
4705     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
4706     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4707     MOS_UserFeature_ReadValue_ID(
4708         nullptr,
4709         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
4710         &userFeatureData,
4711         m_osInterface->pOsContext);
4712     m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
4713 
4714     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4715     MOS_UserFeature_ReadValue_ID(
4716         nullptr,
4717         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
4718         &userFeatureData,
4719         m_osInterface->pOsContext);
4720     m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
4721 
4722     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4723     MOS_UserFeature_ReadValue_ID(
4724         nullptr,
4725         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VDBOX_HW_SEMAPHORE,
4726         &userFeatureData,
4727         m_osInterface->pOsContext);
4728     m_enableVdBoxHWSemaphore = userFeatureData.i32Data ? true : false;
4729 
4730     m_numDelay = 15;
4731 
4732 #if (_DEBUG || _RELEASE_INTERNAL)
4733     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4734     MOS_UserFeature_ReadValue_ID(
4735         nullptr,
4736         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
4737         &userFeatureData,
4738         m_osInterface->pOsContext);
4739     m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
4740 #endif
4741 
4742     if (settings->disableUltraHME)
4743     {
4744         m_32xMeSupported = false;
4745         m_b32XMeEnabled = false;
4746     }
4747 
4748     if (settings->disableSuperHME)
4749     {
4750         m_16xMeSupported = false;
4751         m_b16XMeEnabled = false;
4752     }
4753 
4754     return eStatus;
4755 }
4756 
CodechalVdencHevcStateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)4757 CodechalVdencHevcStateG11::CodechalVdencHevcStateG11(
4758     CodechalHwInterface* hwInterface,
4759     CodechalDebugInterface* debugInterface,
4760     PCODECHAL_STANDARD_INFO standardInfo)
4761     :CodechalVdencHevcState(hwInterface, debugInterface, standardInfo)
4762 {
4763     CODECHAL_ENCODE_FUNCTION_ENTER;
4764 
4765     m_useCommonKernel = true;
4766     pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
4767     m_useHwScoreboard = false;
4768 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
4769     m_kernelBase = (uint8_t*)IGCODECKRN_G11;
4770 #endif
4771     m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
4772     m_hucPakStitchEnabled = true;
4773     m_scalabilityState = nullptr;
4774     m_brcAdaptiveRegionBoostSupported = true;
4775 
4776     MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
4777     MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
4778     MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
4779     MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
4780     MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
4781     MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
4782     MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
4783 
4784     MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
4785     MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
4786     MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
4787     MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
4788     MOS_ZeroMemory(m_resVdBoxSemaphoreMem, sizeof(m_resVdBoxSemaphoreMem));
4789     MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
4790     MOS_ZeroMemory(&m_resSyncSemaMem, sizeof(m_resSyncSemaMem));
4791 
4792     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
4793     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
4794     {
4795         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
4796         {
4797             MOS_ZeroMemory(&m_resHucPakStitchDmemBuffer[k][i], sizeof(m_resHucPakStitchDmemBuffer[k][i]));  // HuC Pak Integration Dmem data for each pass
4798         }
4799     }
4800     MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
4801 
4802     m_vdencBrcInitDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G11);
4803     m_vdencBrcUpdateDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G11);
4804     m_vdencBrcConstDataBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G11);
4805     m_maxNumSlicesSupported        = CODECHAL_VDENC_HEVC_MAX_SLICE_NUM;
4806 
4807     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
4808     m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
4809 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
4810     m_kernelBase = (uint8_t*)IGCODECKRN_G11;
4811 #endif
4812 
4813     MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
4814         m_kernelBase,
4815         m_kuidCommon,
4816         &m_kernelBinary,
4817         &m_combinedKernelSize);
4818     CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
4819 
4820     m_hwInterface->GetStateHeapSettings()->dwIshSize +=
4821         MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
4822 
4823     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
4824     Mos_SetVirtualEngineSupported(m_osInterface, true);
4825 
4826     CODECHAL_DEBUG_TOOL(
4827         CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG11, this));
4828     )
4829 }
4830 
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)4831 MOS_STATUS CodechalVdencHevcStateG11::SetRegionsHuCPakIntegrate(
4832     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS  virtualAddrParams)
4833 {
4834     CODECHAL_ENCODE_FUNCTION_ENTER;
4835 
4836     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4837 
4838     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
4839     // Add Virtual addr
4840     virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
4841     virtualAddrParams->regionParams[0].dwOffset   = 0;
4842     virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
4843     virtualAddrParams->regionParams[1].isWritable = true;
4844     virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer;                         // Region 4 - Last Tile bitstream
4845     virtualAddrParams->regionParams[4].dwOffset   = MOS_ALIGN_FLOOR(m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
4846     virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer;                         // Region 5 - HuC modifies the last tile bitstream before stitch command
4847     virtualAddrParams->regionParams[5].dwOffset   = MOS_ALIGN_FLOOR(m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
4848     virtualAddrParams->regionParams[5].isWritable = true;
4849     virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer;                 // Region 6  History Buffer (Input/Output)
4850     virtualAddrParams->regionParams[6].isWritable = true;
4851     virtualAddrParams->regionParams[7].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource;                // Region 7 - HCP PIC state command
4852     virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer;                           // Region 9  HuC outputs BRC data
4853     virtualAddrParams->regionParams[9].isWritable = true;
4854 
4855     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;          // Region 15 [In/Out] - Tile Record Buffer
4856     virtualAddrParams->regionParams[15].dwOffset   = 0;                                                                 // Tile record is at offset 0
4857 
4858     return eStatus;
4859 }
4860 
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)4861 MOS_STATUS CodechalVdencHevcStateG11::SetDmemHuCPakIntegrate(
4862     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS    dmemParams)
4863 {
4864     CODECHAL_ENCODE_FUNCTION_ENTER;
4865 
4866     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4867 
4868     MOS_LOCK_PARAMS lockFlagsWriteOnly;
4869     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4870     lockFlagsWriteOnly.WriteOnly = true;
4871 
4872     int32_t currentPass = GetCurrentPass();
4873     if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES)
4874     {
4875         eStatus = MOS_STATUS_INVALID_PARAMETER;
4876         return eStatus;
4877     }
4878 
4879     HucPakStitchDmemVdencG11* hucPakStitchDmem = (HucPakStitchDmemVdencG11*)m_osInterface->pfnLockResource(
4880         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
4881     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
4882     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG11));
4883 
4884     // Reset all the offsets to be shared in the huc dmem (6*5 DW's)
4885     memset(hucPakStitchDmem, 0xFF, 120);
4886 
4887     uint16_t numTileRows                        = m_hevcPicParams->num_tile_rows_minus1 + 1;
4888     uint16_t numTileColumns                     = m_hevcPicParams->num_tile_columns_minus1 + 1;
4889     uint16_t numTiles                           = numTileRows*numTileColumns;
4890     uint16_t numTilesPerPipe                    = m_numTiles / m_numPipe;
4891 
4892     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
4893     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
4894     hucPakStitchDmem->OffsetInCommandBuffer    = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
4895     hucPakStitchDmem->PicWidthInPixel          = (uint16_t)m_frameWidth;
4896     hucPakStitchDmem->PicHeightInPixel         = (uint16_t)m_frameHeight;
4897     hucPakStitchDmem->TotalNumberOfPAKs        = m_numPipe;
4898     hucPakStitchDmem->Codec                    = 2;             // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
4899     hucPakStitchDmem->MAXPass                  = m_brcEnabled ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1;
4900     hucPakStitchDmem->CurrentPass              = (uint8_t) currentPass+1;      // // Current BRC pass [1..MAXPass]
4901     hucPakStitchDmem->MinCUSize                = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
4902     hucPakStitchDmem->CabacZeroWordFlag        = false;
4903     hucPakStitchDmem->bitdepth_luma            = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
4904     hucPakStitchDmem->bitdepth_chroma          = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
4905     hucPakStitchDmem->ChromaFormatIdc          = m_hevcSeqParams->chroma_format_idc;
4906 
4907     hucPakStitchDmem->LastTileBS_StartInBytes = (m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
4908     hucPakStitchDmem->PIC_STATE_StartInBytes   = (uint16_t)m_picStateCmdStartInBytes;
4909 
4910     if (m_numPipe > 1)
4911     {
4912         //Set the kernel output offsets
4913         hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
4914         hucPakStitchDmem->HEVC_Streamout_offset[0] = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
4915         hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
4916         hucPakStitchDmem->VDENCSTAT_offset[0]      = m_hevcFrameStatsOffset.uiVdencStatistics;
4917 
4918         for (auto i = 0; i < m_numPipe; i++)
4919         {
4920             hucPakStitchDmem->NumTiles[i]   = numTilesPerPipe;
4921             hucPakStitchDmem->NumSlices[i]  = numTilesPerPipe;    // With tiling enabled, DDI restriction to have one slice per tile
4922 
4923             // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
4924             // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
4925             hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + m_hevcTileStatsOffset.uiTileSizeRecord;
4926             hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1]   = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) + m_hevcTileStatsOffset.uiHevcPakStatistics;
4927             hucPakStitchDmem->VDENCSTAT_offset[i + 1]      = (i * numTilesPerPipe * m_hevcStatsSize.uiVdencStatistics) + m_hevcTileStatsOffset.uiVdencStatistics;
4928             hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + m_hevcTileStatsOffset.uiHevcSliceStreamout;
4929             // All VP9 surfaces already initialized to 0xFFFFFFFF
4930         }
4931     }
4932     else
4933     {
4934         hucPakStitchDmem->NumTiles[0]               = numTiles;
4935         hucPakStitchDmem->TotalNumberOfPAKs         = m_numPipe;
4936 
4937         // non-scalable mode, only VDEnc statistics need to be aggregated
4938         hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics;
4939         hucPakStitchDmem->VDENCSTAT_offset[1] = m_hevcTileStatsOffset.uiVdencStatistics;
4940     }
4941 
4942     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
4943 
4944     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
4945     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
4946     dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG11), CODECHAL_CACHELINE_SIZE);
4947     dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
4948 
4949     return eStatus;
4950 }
4951 
DumpHucDebugOutputBuffers()4952 MOS_STATUS CodechalVdencHevcStateG11::DumpHucDebugOutputBuffers()
4953 {
4954     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4955 
4956     // Virtual Engine does only one submit per pass. Dump all HuC debug outputs
4957     bool dumpDebugBuffers = IsLastPipe();
4958     if (m_singleTaskPhaseSupported)
4959     {
4960         dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
4961     }
4962 
4963     if (dumpDebugBuffers)
4964     {
4965         CODECHAL_DEBUG_TOOL(
4966             if(m_vdencHucUsed)
4967             {
4968                 DumpHucBrcInit();
4969                 DumpHucBrcUpdate(true);
4970                 DumpHucBrcUpdate(false);
4971                 DumpHucPakIntegrate();
4972             }
4973             else
4974             {
4975                 DumpHucCqp();
4976             }
4977         )
4978     }
4979 
4980     return eStatus;
4981 }
4982 
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)4983 MOS_STATUS CodechalVdencHevcStateG11::AddVdencWalkerStateCmd(
4984     PMOS_COMMAND_BUFFER cmdBuffer,
4985     PMHW_VDBOX_HEVC_SLICE_STATE params)
4986 {
4987     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4988 
4989     CODECHAL_ENCODE_FUNCTION_ENTER;
4990 
4991     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4992     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4993 
4994     MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11 vdencWalkerStateParams;
4995     vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
4996     vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
4997     vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
4998     vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
4999     vdencWalkerStateParams.pTileCodingParams = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G11>(params)->pTileCodingParams;
5000     vdencWalkerStateParams.dwTileId = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G11>(params)->dwTileID;
5001     switch (static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G11>(params)->dwNumPipe)
5002     {
5003     case 0:
5004     case 1:
5005         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
5006         break;
5007     case 2:
5008         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
5009         break;
5010     case 4:
5011         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
5012         break;
5013     default:
5014         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
5015         break;
5016     }
5017     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
5018 
5019     return eStatus;
5020 }
5021 
CreateMhwParams()5022 void CodechalVdencHevcStateG11::CreateMhwParams()
5023 {
5024     m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G11);
5025     m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11);
5026     m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G11);
5027 }
5028 
CalculatePictureStateCommandSize()5029 MOS_STATUS CodechalVdencHevcStateG11::CalculatePictureStateCommandSize()
5030 {
5031     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5032 
5033     CODECHAL_ENCODE_FUNCTION_ENTER;
5034 
5035     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
5036     CODECHAL_ENCODE_CHK_STATUS_RETURN(
5037         m_hwInterface->GetHxxStateCommandSize(
5038             CODECHAL_ENCODE_MODE_HEVC,
5039             &m_defaultPictureStatesSize,
5040             &m_defaultPicturePatchListSize,
5041             &stateCmdSizeParams));
5042 
5043     return eStatus;
5044 }
5045 
SetPipeBufAddr(PMOS_COMMAND_BUFFER cmdBuffer)5046 MOS_STATUS CodechalVdencHevcStateG11::SetPipeBufAddr(
5047     PMOS_COMMAND_BUFFER cmdBuffer)
5048 {
5049     CODECHAL_ENCODE_FUNCTION_ENTER;
5050 
5051     return m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams, cmdBuffer);
5052 }
5053 
SetGpuCtxCreatOption()5054 MOS_STATUS CodechalVdencHevcStateG11::SetGpuCtxCreatOption()
5055 {
5056     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5057 
5058     CODECHAL_ENCODE_FUNCTION_ENTER;
5059 
5060     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
5061     {
5062         CodechalEncoderState::SetGpuCtxCreatOption();
5063     }
5064     else
5065     {
5066         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
5067         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
5068 
5069         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
5070             m_scalabilityState,
5071             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
5072     }
5073 
5074     return eStatus;
5075 }
5076 
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 * tileCodingParams)5077 MOS_STATUS CodechalVdencHevcStateG11::SetTileData(
5078     MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11*   tileCodingParams)
5079 {
5080     CODECHAL_ENCODE_FUNCTION_ENTER;
5081 
5082     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5083 
5084     if (!m_hevcPicParams->tiles_enabled_flag)
5085     {
5086         return eStatus;
5087     }
5088 
5089     uint32_t colBd[100] = { 0 };
5090     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5091     for (uint32_t i = 0; i < numTileColumns; i++)
5092     {
5093         colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
5094     }
5095 
5096     uint32_t rowBd[100] = { 0 };
5097     uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
5098     for (uint32_t i = 0; i < numTileRows; i++)
5099     {
5100         rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
5101     }
5102 
5103     m_numTiles = numTileRows * numTileColumns;
5104     if (m_numTiles > CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_VDENC_MIN_TILE_WIDTH_SIZE) *
5105         CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_VDENC_MIN_TILE_HEIGHT_SIZE))
5106     {
5107         return MOS_STATUS_INVALID_PARAMETER;
5108     }
5109 
5110     uint32_t const numCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
5111     uint32_t       numCuRecord      = numCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
5112     uint32_t       maxBytePerLCU    = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5113     maxBytePerLCU = maxBytePerLCU * maxBytePerLCU; // number of pixels per LCU
5114     maxBytePerLCU                   = maxBytePerLCU * 3 / (m_is10BitHevc ? 1 : 2);  //assume 4:2:0 format
5115     uint32_t    bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
5116     int32_t     frameWidthInMinCb  = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
5117     int32_t     frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
5118     int32_t     shift              = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5119     uint32_t    ctbSize            = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5120     uint32_t    streamInWidthinLCU = MOS_ROUNDUP_DIVIDE((frameWidthInMinCb << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
5121     uint32_t    numLcuInPic        = 0;
5122 
5123     for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
5124     {
5125         for (uint32_t j = 0; j < numTileColumns; j++)
5126         {
5127             numLcuInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
5128         }
5129     }
5130 
5131     uint32_t    numSliceInTile     = 0;
5132     uint64_t    activeBitstreamSize = (uint64_t)m_encodeParams.dwBitstreamSize;
5133     // There would be padding at the end of last tile in CBR, reserve dedicated part in the BS buf
5134     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
5135     {
5136         // Assume max padding num < target frame size derived from target bit rate and frame rate
5137         uint32_t actualFrameRate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
5138         uint64_t reservedPart    = (uint64_t)m_hevcSeqParams->TargetBitRate / 8 / (uint64_t)actualFrameRate * 1024;
5139 
5140         if (reservedPart > activeBitstreamSize)
5141         {
5142             CODECHAL_ENCODE_ASSERTMESSAGE("Frame size cal from target Bit rate is larger than BS buf! Issues in CBR paras!");
5143             return MOS_STATUS_INVALID_PARAMETER;
5144         }
5145 
5146         // Capping the reserved part to 1/10 of bs buf size
5147         if (reservedPart > activeBitstreamSize / 10)
5148         {
5149             reservedPart = activeBitstreamSize / 10;
5150         }
5151 
5152         activeBitstreamSize -= reservedPart;
5153     }
5154 
5155     for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
5156     {
5157         for (uint32_t j = 0; j < numTileColumns; j++)
5158         {
5159             uint32_t idx = i * numTileColumns + j;
5160             uint32_t numLcuInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
5161 
5162             tileCodingParams[idx].TileStartLCUX = colBd[j];
5163             tileCodingParams[idx].TileStartLCUY = rowBd[i];
5164 
5165             tileCodingParams[idx].TileColumnStoreSelect = j % 2;
5166             tileCodingParams[idx].TileRowStoreSelect = i % 2;
5167 
5168             if (j != numTileColumns - 1)
5169             {
5170                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
5171                 tileCodingParams[idx].IsLastTileofRow = false;
5172             }
5173             else
5174             {
5175                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
5176                 tileCodingParams[idx].IsLastTileofRow = true;
5177 
5178             }
5179 
5180             if (i != numTileRows - 1)
5181             {
5182                 tileCodingParams[idx].IsLastTileofColumn = false;
5183                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
5184             }
5185             else
5186             {
5187                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
5188                 tileCodingParams[idx].IsLastTileofColumn = true;
5189             }
5190 
5191             tileCodingParams[idx].NumOfTilesInFrame       = m_numTiles;
5192             tileCodingParams[idx].NumOfTileColumnsInFrame = numTileColumns;
5193             tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
5194                 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
5195             tileCodingParams[idx].NumberOfActiveBePipes   = (m_numPipe > 1) ? m_numPipe : 1;
5196 
5197             tileCodingParams[idx].PakTileStatisticsOffset = 8 * idx;
5198             tileCodingParams[idx].TileSizeStreamoutOffset = idx;
5199             tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
5200             tileCodingParams[idx].presHcpSyncBuffer                    = &m_resHcpScalabilitySyncBuffer.sResource;
5201             tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
5202             tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
5203             tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
5204             tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
5205             tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
5206 
5207             uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
5208             uint32_t tileWidthInLCU  = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
5209 
5210             //StreamIn data is 4 CLs per LCU
5211             tileCodingParams[idx].TileStreaminOffset = 4 * (tileCodingParams[idx].TileStartLCUY * streamInWidthinLCU + tileCodingParams[idx].TileStartLCUX * tileHeightInLCU);
5212 
5213             cuLevelStreamoutOffset += (tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16 / CODECHAL_CACHELINE_SIZE;
5214             sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
5215             saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
5216 
5217             uint64_t totalSizeTemp = (uint64_t)activeBitstreamSize * (uint64_t)numLcuInTile;
5218             uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)numLcuInPic) + ((totalSizeTemp % (uint64_t)numLcuInPic) ? 1 : 0);
5219             bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
5220 
5221             numLcusInTiles += numLcuInTile;
5222 
5223             for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
5224             {
5225                 bool lastSliceInTile = false, sliceInTile = false;
5226                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
5227                     &tileCodingParams[idx],
5228                     &sliceInTile,
5229                     &lastSliceInTile));
5230                 numSliceInTile += (sliceInTile ? 1 : 0);
5231             }
5232         }
5233 
5234         // same row store buffer for different tile rows.
5235         saoRowstoreOffset = 0;
5236         sseRowstoreOffset = 0;
5237     }
5238 
5239     return eStatus;
5240 }
5241 
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile,bool * sliceInTile,bool * lastSliceInTile)5242 MOS_STATUS CodechalVdencHevcStateG11::IsSliceInTile(
5243     uint32_t                                sliceNumber,
5244     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11   currentTile,
5245     bool                                   *sliceInTile,
5246     bool                                   *lastSliceInTile)
5247 {
5248     CODECHAL_ENCODE_FUNCTION_ENTER;
5249 
5250     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5251 
5252     CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
5253     CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
5254     CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
5255 
5256     if (!m_hevcPicParams->tiles_enabled_flag)
5257     {
5258         *lastSliceInTile = *sliceInTile = true;
5259         return eStatus;
5260     }
5261 
5262     uint32_t shift            = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5263     uint32_t residual = (1 << shift) - 1;
5264     uint32_t frameWidthInLCU  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
5265     uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
5266 
5267     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
5268     uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
5269     uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
5270     uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
5271 
5272     uint32_t tileColumnWidth = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
5273     uint32_t tileRowHeight = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
5274     if (sliceLCUx <  currentTile->TileStartLCUX ||
5275         sliceLCUy <  currentTile->TileStartLCUY ||
5276         sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
5277         sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
5278         )
5279     {
5280         // slice start is not in the tile boundary
5281         *lastSliceInTile = *sliceInTile = false;
5282         return eStatus;
5283     }
5284 
5285     sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tileColumnWidth;
5286     sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tileColumnWidth;
5287 
5288     if (sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth)
5289     {
5290         sliceLCUx -= tileColumnWidth;
5291         sliceLCUy++;
5292     }
5293 
5294     if (sliceLCUx <  currentTile->TileStartLCUX ||
5295         sliceLCUy <  currentTile->TileStartLCUY ||
5296         sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
5297         sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
5298         )
5299     {
5300         // last LCU of the slice is out of the tile boundary
5301         *lastSliceInTile = *sliceInTile = false;
5302         return eStatus;
5303     }
5304 
5305     *sliceInTile = true;
5306 
5307     sliceLCUx++;
5308     sliceLCUy++;
5309 
5310     // the end of slice is at the boundary of tile
5311     *lastSliceInTile = (
5312         sliceLCUx == currentTile->TileStartLCUX + tileColumnWidth &&
5313         sliceLCUy == currentTile->TileStartLCUY + tileRowHeight);
5314 
5315     return eStatus;
5316 }
5317 
VerifyCommandBufferSize()5318 MOS_STATUS CodechalVdencHevcStateG11::VerifyCommandBufferSize()
5319 {
5320     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5321 
5322     CODECHAL_ENCODE_FUNCTION_ENTER;
5323 
5324     if (UseRenderCommandBuffer() || m_numPipe == 1)
5325     {
5326         // legacy mode & resize CommandBuffer Size for every BRC pass
5327         if (!m_singleTaskPhaseSupported)
5328         {
5329             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5330         }
5331         return eStatus;
5332     }
5333 
5334     // virtual engine
5335     uint32_t requestedSize =
5336         m_pictureStatesSize +
5337         m_extraPictureStatesSize +
5338         (m_sliceStatesSize * m_numSlices);
5339 
5340     requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
5341 
5342     // Running in the multiple VDBOX mode
5343     int currentPipe = GetCurrentPipe();
5344     if (currentPipe < 0 || currentPipe >= m_numPipe)
5345     {
5346         eStatus = MOS_STATUS_INVALID_PARAMETER;
5347         return eStatus;
5348     }
5349     int currentPass = GetCurrentPass();
5350     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5351     {
5352         eStatus = MOS_STATUS_INVALID_PARAMETER;
5353         return eStatus;
5354     }
5355 
5356     if (IsFirstPipe() && m_osInterface->bUsesPatchList)
5357     {
5358         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5359     }
5360 
5361     PMOS_COMMAND_BUFFER pCmdBuffer;
5362     if (m_osInterface->phasedSubmission)
5363     {
5364         m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
5365         return eStatus;
5366     }
5367     else
5368     {
5369         pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
5370     }
5371 
5372     if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) ||
5373         m_sizeOfVeBatchBuffer < requestedSize)
5374     {
5375         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5376 
5377         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5378         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5379         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5380         allocParamsForBufferLinear.Format = Format_Buffer;
5381         allocParamsForBufferLinear.dwBytes = requestedSize;
5382         allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
5383 
5384         if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource))
5385         {
5386             if (pCmdBuffer->pCmdBase)
5387             {
5388                 m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource);
5389             }
5390             m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource);
5391         }
5392 
5393         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5394             m_osInterface,
5395             &allocParamsForBufferLinear,
5396             &pCmdBuffer->OsResource));
5397 
5398         m_sizeOfVeBatchBuffer = requestedSize;
5399     }
5400 
5401     if (pCmdBuffer->pCmdBase == nullptr)
5402     {
5403         MOS_LOCK_PARAMS lockParams;
5404         MOS_ZeroMemory(&lockParams, sizeof(lockParams));
5405         lockParams.WriteOnly = true;
5406         pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams);
5407         pCmdBuffer->iRemaining                     = m_sizeOfVeBatchBuffer;
5408         pCmdBuffer->iOffset = 0;
5409 
5410         if (pCmdBuffer->pCmdBase == nullptr)
5411         {
5412             eStatus = MOS_STATUS_NULL_POINTER;
5413             return eStatus;
5414         }
5415     }
5416 
5417     return eStatus;
5418 }
5419 
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)5420 MOS_STATUS CodechalVdencHevcStateG11::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
5421 {
5422     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5423 
5424     CODECHAL_ENCODE_FUNCTION_ENTER;
5425 
5426     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5427     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
5428 
5429     if (UseRenderCommandBuffer() || m_numPipe == 1)
5430     {
5431         // legacy mode
5432         m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
5433         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
5434         return eStatus;
5435     }
5436 
5437     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
5438 
5439     int currentPipe = GetCurrentPipe();
5440     if (currentPipe < 0 || currentPipe >= m_numPipe)
5441     {
5442         eStatus = MOS_STATUS_INVALID_PARAMETER;
5443         return eStatus;
5444     }
5445     int currentPass = GetCurrentPass();
5446     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5447     {
5448         eStatus = MOS_STATUS_INVALID_PARAMETER;
5449         return eStatus;
5450     }
5451 
5452     if (m_osInterface->phasedSubmission)
5453     {
5454         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1));
5455 
5456         CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
5457         if (IsLastPipe())
5458         {
5459             cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
5460         }
5461     }
5462     else
5463     {
5464         *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
5465     }
5466 
5467     if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
5468     {
5469         // Insert CP Prolog
5470         CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
5471         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
5472     }
5473     return eStatus;
5474 }
5475 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)5476 MOS_STATUS CodechalVdencHevcStateG11::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
5477 {
5478     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5479 
5480     CODECHAL_ENCODE_FUNCTION_ENTER;
5481 
5482     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5483 
5484     if (UseRenderCommandBuffer() || m_numPipe == 1)
5485     {
5486         // legacy mode
5487         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
5488         return eStatus;
5489     }
5490 
5491     // virtual engine
5492     int currentPipe = GetCurrentPipe();
5493     if (currentPipe < 0 || currentPipe >= m_numPipe)
5494     {
5495         eStatus = MOS_STATUS_INVALID_PARAMETER;
5496         return eStatus;
5497     }
5498     int currentPass = GetCurrentPass();
5499     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5500     {
5501         eStatus = MOS_STATUS_INVALID_PARAMETER;
5502         return eStatus;
5503     }
5504 
5505     if (m_osInterface->phasedSubmission)
5506     {
5507         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1);
5508         m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
5509     }
5510     else
5511     {
5512         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
5513         m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
5514         m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
5515     }
5516 
5517     return eStatus;
5518 }
5519 
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)5520 MOS_STATUS CodechalVdencHevcStateG11::SubmitCommandBuffer(
5521     PMOS_COMMAND_BUFFER cmdBuffer,
5522     bool                bNullRendering)
5523 {
5524     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5525 
5526     CODECHAL_ENCODE_FUNCTION_ENTER;
5527 
5528     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5529 
5530     if (UseRenderCommandBuffer() || m_numPipe == 1)
5531     {
5532         // legacy mode
5533         if (!UseRenderCommandBuffer() && MOS_VE_SUPPORTED(m_osInterface))  // Set VE Hints for video contexts only
5534         {
5535             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
5536         }
5537         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
5538         return eStatus;
5539     }
5540 
5541     bool cmdBufferReadyForSubmit = IsLastPipe();
5542 
5543     // In STF, Hold the command buffer submission till last pass
5544     if (m_singleTaskPhaseSupported)
5545     {
5546         cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
5547     }
5548 
5549     if(!cmdBufferReadyForSubmit)
5550     {
5551         return eStatus;
5552     }
5553 
5554     int currentPass = GetCurrentPass();
5555     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5556     {
5557         eStatus = MOS_STATUS_INVALID_PARAMETER;
5558         return eStatus;
5559     }
5560 
5561     if (m_osInterface->phasedSubmission)
5562     {
5563         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
5564     }
5565     else
5566     {
5567         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
5568 
5569         for (uint32_t i = 0; i < m_numPipe; i++)
5570         {
5571             PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
5572 
5573             if(cmdBuffer->pCmdBase)
5574             {
5575                 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
5576             }
5577 
5578             cmdBuffer->pCmdBase = 0;
5579             cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
5580         }
5581         m_sizeOfVeBatchBuffer = 0;
5582 
5583         if(eStatus == MOS_STATUS_SUCCESS)
5584         {
5585             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
5586             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
5587         }
5588     }
5589 
5590     return eStatus;
5591 }
5592 
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)5593 MOS_STATUS CodechalVdencHevcStateG11::SendPrologWithFrameTracking(
5594     PMOS_COMMAND_BUFFER         cmdBuffer,
5595     bool                        frameTrackingRequested,
5596     MHW_MI_MMIOREGISTERS       *mmioRegister)
5597 {
5598     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5599 
5600     CODECHAL_ENCODE_FUNCTION_ENTER;
5601 
5602     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5603 
5604     if (UseRenderCommandBuffer())
5605     {
5606         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
5607         return eStatus;
5608     }
5609 
5610     if (!IsLastPipe())
5611     {
5612         return eStatus;
5613     }
5614 
5615     PMOS_COMMAND_BUFFER commandBufferInUse;
5616     if (m_realCmdBuffer.pCmdBase)
5617     {
5618         commandBufferInUse = &m_realCmdBuffer;
5619     }
5620     else
5621         if (cmdBuffer && cmdBuffer->pCmdBase)
5622         {
5623             commandBufferInUse = cmdBuffer;
5624         }
5625         else
5626         {
5627             eStatus = MOS_STATUS_INVALID_PARAMETER;
5628             return eStatus;
5629         }
5630 
5631     // initialize command buffer attributes
5632     commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
5633     commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
5634     commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
5635     commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
5636     commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
5637 
5638     if (frameTrackingRequested && m_frameTrackingEnabled)
5639     {
5640         commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
5641         commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
5642             &m_encodeStatusBuf.resStatusBuffer;
5643         commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
5644         // Set media frame tracking address offset(the offset from the encoder status buffer page)
5645         commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
5646     }
5647 
5648     MHW_GENERIC_PROLOG_PARAMS  genericPrologParams;
5649     MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
5650     genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
5651     genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
5652     genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
5653     genericPrologParams.dwStoreDataValue = m_storeData - 1;
5654 
5655     CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
5656 
5657     return eStatus;
5658 }
5659 
SetSliceStructs()5660 MOS_STATUS CodechalVdencHevcStateG11::SetSliceStructs()
5661 {
5662     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
5663     eStatus = CodechalEncodeHevcBase::SetSliceStructs();
5664     m_numPassesInOnePipe                        = m_numPasses;
5665     m_numPasses                                 = (m_numPasses + 1) * m_numPipe - 1;
5666     return eStatus;
5667 }
5668 
AllocateTileStatistics()5669 MOS_STATUS CodechalVdencHevcStateG11::AllocateTileStatistics()
5670 {
5671     CODECHAL_ENCODE_FUNCTION_ENTER;
5672 
5673     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
5674 
5675     if (!m_hevcPicParams->tiles_enabled_flag)
5676     {
5677         return eStatus;
5678     }
5679 
5680     auto num_tile_rows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
5681     auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5682     auto num_tiles = num_tile_rows*num_tile_columns;
5683 
5684     MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
5685     MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
5686     MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
5687 
5688     MOS_LOCK_PARAMS lockFlagsWriteOnly;
5689     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
5690     lockFlagsWriteOnly.WriteOnly = true;
5691 
5692     // Set the maximum size based on frame level statistics.
5693     m_hevcStatsSize.uiTileSizeRecord     = CODECHAL_CACHELINE_SIZE;
5694     m_hevcStatsSize.uiHevcPakStatistics  = m_sizeOfHcpPakFrameStats;
5695     m_hevcStatsSize.uiVdencStatistics    = m_vdencEnabled ? CODECHAL_HEVC_VDENC_STATS_SIZE : 0;
5696     m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
5697 
5698     // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
5699     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
5700     m_hevcFrameStatsOffset.uiTileSizeRecord     = 0;  // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
5701     m_hevcFrameStatsOffset.uiHevcPakStatistics  = 0;
5702     m_hevcFrameStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
5703     m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
5704 
5705     // Frame level statistics
5706     m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu), CODECHAL_PAGE_SIZE);
5707 
5708     // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
5709     if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
5710     {
5711         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5712         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5713         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5714         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5715         allocParamsForBufferLinear.Format = Format_Buffer;
5716         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
5717         allocParamsForBufferLinear.pBufName = "HCP Aggregated Frame Statistics Streamout Buffer";
5718 
5719         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5720             m_osInterface,
5721             &allocParamsForBufferLinear,
5722             &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
5723         m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
5724 
5725         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
5726             m_osInterface,
5727             &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
5728             &lockFlagsWriteOnly);
5729 
5730         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
5731         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
5732         m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
5733     }
5734 
5735     // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
5736     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
5737     m_hevcTileStatsOffset.uiTileSizeRecord     = 0; // TileReord is in a separated resource
5738     m_hevcTileStatsOffset.uiHevcPakStatistics  = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer
5739     m_hevcTileStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
5740     m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
5741     // Combined statistics size for all tiles
5742     m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu, CODECHAL_PAGE_SIZE);
5743 
5744     // Tile size record size for all tiles
5745     m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
5746 
5747     if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
5748     {
5749         if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
5750         {
5751             m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
5752         }
5753         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5754         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5755         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5756         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5757         allocParamsForBufferLinear.Format = Format_Buffer;
5758         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
5759         allocParamsForBufferLinear.pBufName = "HCP Tile Level Statistics Streamout Buffer";
5760 
5761         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5762             m_osInterface,
5763             &allocParamsForBufferLinear,
5764             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
5765         m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
5766 
5767         uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
5768             m_osInterface,
5769             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
5770             &lockFlagsWriteOnly);
5771         CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
5772 
5773         MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
5774         m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
5775     }
5776 
5777     if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
5778     {
5779         if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
5780         {
5781             m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
5782         }
5783         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5784         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5785         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5786         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5787         allocParamsForBufferLinear.Format = Format_Buffer;
5788         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
5789         allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
5790 
5791         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5792             m_osInterface,
5793             &allocParamsForBufferLinear,
5794             &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource));
5795         m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize;
5796 
5797         uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
5798             m_osInterface,
5799             &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
5800             &lockFlagsWriteOnly);
5801         CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
5802 
5803         MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
5804         m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
5805     }
5806 
5807     return eStatus;
5808 }
5809 
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)5810 void CodechalVdencHevcStateG11::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
5811 {
5812     CODECHAL_ENCODE_FUNCTION_ENTER;
5813 
5814     CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
5815 
5816     PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
5817     if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
5818     {
5819         pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource;
5820         pipeBufAddrParams.dwLcuStreamOutOffset         = m_hevcTileStatsOffset.uiHevcSliceStreamout;
5821         pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
5822         pipeBufAddrParams.dwFrameStatStreamOutOffset   = m_hevcTileStatsOffset.uiHevcPakStatistics;
5823     }
5824 }
5825 
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)5826 MOS_STATUS CodechalVdencHevcStateG11::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
5827 {
5828     CODECHAL_ENCODE_FUNCTION_ENTER;
5829 
5830     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
5831 
5832     // encodeStatus is offset by 2 DWs in the resource
5833     uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
5834     for (auto i = 0; i < 6; i++)    // 64 bit SSE values for luma/ chroma channels need to be copied
5835     {
5836         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
5837         MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
5838         miCpyMemMemParams.presSrc     = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
5839         miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t);    // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
5840         miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
5841         miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
5842         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
5843     }
5844     return eStatus;
5845 }
5846 
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)5847 void CodechalVdencHevcStateG11::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
5848 {
5849     PCODECHAL_ENCODE_BUFFER tileRecordBuffer    = &m_tileRecordBuffer[m_virtualEngineBbIndex];
5850     bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
5851 
5852     MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
5853     indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
5854     indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
5855     indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
5856     indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
5857     indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
5858     indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
5859     indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
5860     indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
5861     indObjBaseAddrParams.dwPakTileSizeRecordOffset   = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
5862 }
5863 
HuCLookaheadInit()5864 MOS_STATUS CodechalVdencHevcStateG11::HuCLookaheadInit()
5865 {
5866     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5867 
5868     CODECHAL_ENCODE_FUNCTION_ENTER;
5869 
5870     m_firstTaskInPhase = !m_singleTaskPhaseSupported;
5871     m_lastTaskInPhase  = !m_singleTaskPhaseSupported;
5872 
5873     // set DMEM
5874     uint32_t initVbvFullness = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
5875     MOS_LOCK_PARAMS lockFlagsWriteOnly;
5876     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
5877     lockFlagsWriteOnly.WriteOnly = true;
5878 
5879     auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
5880         m_osInterface, &m_vdencLaInitDmemBuffer, &lockFlagsWriteOnly);
5881     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
5882     MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
5883 
5884     uint8_t downscaleRatioIndicator = 2;  // 4x downscaling
5885     if (m_hevcPicParams->DownScaleRatio.fields.X16Minus1_X == 15 && m_hevcPicParams->DownScaleRatio.fields.X16Minus1_Y == 15)
5886     {
5887         downscaleRatioIndicator = 0;  // no downscaling
5888     }
5889 
5890     dmem->lookAheadFunc      = 0;
5891     dmem->lengthAhead        = m_lookaheadDepth;
5892     dmem->vbvBufferSize      = m_hevcSeqParams->VBVBufferSizeInBit / m_averageFrameSize;
5893     dmem->vbvInitialFullness = initVbvFullness / m_averageFrameSize;
5894     dmem->statsRecords       = m_numLaDataEntry;
5895     dmem->avgFrameSizeInByte = m_averageFrameSize >> 3;
5896     dmem->downscaleRatio     = downscaleRatioIndicator;
5897     dmem->PGop               = 4;
5898     dmem->maxGop             = m_hevcSeqParams->MaxAdaptiveGopPicSize;
5899     dmem->minGop             = m_hevcSeqParams->MinAdaptiveGopPicSize;
5900     dmem->adaptiveIDR        = (uint8_t)m_lookaheadAdaptiveI;
5901 
5902     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaInitDmemBuffer);
5903 
5904     // set HuC regions
5905     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5906     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5907     virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
5908     virtualAddrParams.regionParams[0].isWritable = true;
5909 
5910 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
5911     if (m_swLaMode)
5912     {
5913         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
5914             m_debugInterface,
5915             m_swLaMode,
5916             CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
5917             &m_vdencLaInitDmemBuffer,
5918             nullptr,
5919             &virtualAddrParams));
5920 
5921         return eStatus;
5922     }
5923 #endif
5924 
5925     MOS_COMMAND_BUFFER cmdBuffer;
5926     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5927 
5928     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
5929     {
5930         // Send command buffer header at the beginning (OS dependent)
5931         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
5932         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5933     }
5934 
5935     // load kernel from WOPCM into L2 storage RAM
5936     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5937     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5938     imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
5939 
5940     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5941 
5942     // pipe mode select
5943     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5944     pipeModeSelectParams.Mode = m_mode;
5945     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5946 
5947     // set HuC DMEM param
5948     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5949     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5950     dmemParams.presHucDataSource = &m_vdencLaInitDmemBuffer;
5951     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
5952     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5953     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5954     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
5955     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
5956     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
5957 
5958     // wait Huc completion (use HEVC bit for now)
5959     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5960     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5961     vdPipeFlushParams.Flags.bFlushHEVC = 1;
5962     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5963     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
5964 
5965     // Flush the engine to ensure memory written out
5966     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5967     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5968     flushDwParams.bVideoPipelineCacheInvalidate = true;
5969     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
5970 
5971     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5972     {
5973         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5974     }
5975 
5976     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
5977 
5978     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5979     {
5980         bool renderingFlags = m_videoContextUsesNullHw;
5981 
5982         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
5983     }
5984 
5985     return eStatus;
5986 }
5987 
HuCLookaheadUpdate()5988 MOS_STATUS CodechalVdencHevcStateG11::HuCLookaheadUpdate()
5989 {
5990     uint8_t currentPass = (uint8_t)GetCurrentPass();
5991     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5992 
5993     CODECHAL_ENCODE_FUNCTION_ENTER;
5994 
5995     m_firstTaskInPhase = !m_singleTaskPhaseSupported;
5996     m_lastTaskInPhase  = (currentPass == m_numPasses);
5997 
5998     // set DMEM
5999     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6000     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6001     lockFlagsWriteOnly.WriteOnly = true;
6002 
6003     auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
6004         m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6005     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
6006     MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
6007 
6008     dmem->lookAheadFunc = 1;
6009     dmem->validStatsRecords = m_numValidLaRecords;
6010     dmem->offset = (m_numLaDataEntry + m_currLaDataIdx + 1 - m_numValidLaRecords) % m_numLaDataEntry;
6011     dmem->cqmQpThreshold = m_cqmQpThreshold;
6012     dmem->currentPass = currentPass;
6013 
6014     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
6015 
6016     // set HuC regions
6017     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
6018     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
6019     virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
6020     virtualAddrParams.regionParams[0].isWritable = true;
6021     virtualAddrParams.regionParams[1].presRegion = &m_vdencLaStatsBuffer;
6022     virtualAddrParams.regionParams[2].presRegion = &m_vdencLaDataBuffer;
6023     virtualAddrParams.regionParams[2].isWritable = true;
6024 
6025 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
6026     if (m_swLaMode)
6027     {
6028         bool isLaAnalysisRequired = true;
6029         MOS_LOCK_PARAMS lockFlags;
6030         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6031         lockFlags.ReadOnly = true;
6032 
6033         if (!IsFirstPass())
6034         {
6035             uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resPakMmioBuffer, &lockFlags);
6036             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6037             isLaAnalysisRequired = (*data == CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK);
6038             m_osInterface->pfnUnlockResource(m_osInterface, &m_resPakMmioBuffer);
6039         }
6040 
6041         if (isLaAnalysisRequired)
6042         {
6043             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
6044                 m_debugInterface,
6045                 m_swLaMode,
6046                 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6047                 &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
6048                 &m_resPakMmioBuffer,
6049                 &virtualAddrParams));
6050 
6051             EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
6052             uint32_t baseOffset = (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize);
6053 
6054             CodechalVdencHevcLaData *data = (CodechalVdencHevcLaData *)m_osInterface->pfnLockResource(m_osInterface, &m_vdencLaDataBuffer, &lockFlags);
6055             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6056 
6057             LookaheadReport *lookaheadStatus = (LookaheadReport *)(encodeStatusBuf.pEncodeStatus + baseOffset + encodeStatusBuf.dwLookaheadStatusOffset);
6058             lookaheadStatus->targetFrameSize = data[dmem->offset].targetFrameSize;
6059             lookaheadStatus->targetBufferFulness = data[dmem->offset].targetBufferFulness;
6060             lookaheadStatus->encodeHints = data[dmem->offset].encodeHints;
6061             lookaheadStatus->pyramidDeltaQP = data[dmem->offset].pyramidDeltaQP;
6062 
6063             m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaDataBuffer);
6064         }
6065 
6066         return eStatus;
6067     }
6068 #endif
6069 
6070     MOS_COMMAND_BUFFER cmdBuffer;
6071     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6072 
6073     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
6074     {
6075         // Send command buffer header at the beginning (OS dependent)
6076         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
6077         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
6078     }
6079 
6080     if (!IsFirstPass() && m_firstTaskInPhase)
6081     {
6082         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
6083         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
6084         MOS_ZeroMemory(
6085             &miConditionalBatchBufferEndParams,
6086             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
6087         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
6088             &m_resPakMmioBuffer;
6089         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
6090             &cmdBuffer,
6091             &miConditionalBatchBufferEndParams));
6092     }
6093 
6094     CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
6095 
6096     // load kernel from WOPCM into L2 storage RAM
6097     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
6098     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
6099     imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
6100 
6101     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
6102 
6103     // pipe mode select
6104     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
6105     pipeModeSelectParams.Mode = m_mode;
6106     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
6107 
6108     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
6109 
6110     // set HuC DMEM param
6111     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
6112     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
6113     dmemParams.presHucDataSource = &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass];
6114     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
6115     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6116     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
6117     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
6118     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6119     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6120 
6121     // wait Huc completion (use HEVC bit for now)
6122     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6123     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6124     vdPipeFlushParams.Flags.bFlushHEVC = 1;
6125     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6126     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6127 
6128     // Flush the engine to ensure memory written out
6129     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6130     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6131     flushDwParams.bVideoPipelineCacheInvalidate = true;
6132     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6133 
6134     if (IsFirstPass())
6135     {
6136         // Write HUC_STATUS mask: DW1 (mask value)
6137         MHW_MI_STORE_DATA_PARAMS storeDataParams;
6138         MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6139         storeDataParams.pOsResource = &m_resPakMmioBuffer;
6140         storeDataParams.dwResourceOffset = sizeof(uint32_t);
6141         storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
6142         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
6143 
6144         // store HUC_STATUS register: DW0 (actual value)
6145         CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
6146         auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
6147         MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
6148         MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
6149         storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
6150         storeRegParams.dwOffset = 0;
6151         storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
6152         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
6153     }
6154 
6155     // Write lookahead status to encode status buffer
6156     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
6157     EncodeStatusBuffer       encodeStatusBuf = m_encodeStatusBuf;
6158     uint32_t baseOffset =
6159         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
6160     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
6161     miCpyMemMemParams.presSrc = &m_vdencLaDataBuffer;
6162     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, encodeHints);
6163     miCpyMemMemParams.presDst = &encodeStatusBuf.resStatusBuffer;
6164     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, encodeHints);
6165     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6166     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetFrameSize);
6167     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetFrameSize);
6168     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6169     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetBufferFulness);
6170     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetBufferFulness);
6171     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6172     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, pyramidDeltaQP);
6173     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, pyramidDeltaQP);
6174     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6175 
6176     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6177     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6178 
6179     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
6180 
6181     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6182     {
6183         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6184     }
6185 
6186     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
6187 
6188     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6189     {
6190         bool renderingFlags = m_videoContextUsesNullHw;
6191         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6192     }
6193 
6194     return eStatus;
6195 }
6196 
AnalyzeLookaheadStats()6197 MOS_STATUS CodechalVdencHevcStateG11::AnalyzeLookaheadStats()
6198 {
6199     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6200 
6201     CODECHAL_ENCODE_FUNCTION_ENTER;
6202 
6203     if (IsFirstPass())
6204     {
6205         m_numValidLaRecords++;
6206     }
6207 
6208     if (m_lookaheadInit)
6209     {
6210         CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadInit());
6211         m_lookaheadInit = false;
6212     }
6213 
6214     CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
6215     if (IsLastPass() && (m_numValidLaRecords >= m_lookaheadDepth))
6216     {
6217         m_numValidLaRecords--;
6218         m_lookaheadReport = true;
6219     }
6220 
6221     if (m_hevcPicParams->bLastPicInStream)
6222     {
6223         // Flush the last frames
6224         while (m_numValidLaRecords > 0)
6225         {
6226             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
6227             m_numValidLaRecords--;
6228         }
6229     }
6230 
6231     return eStatus;
6232 }
6233 
HuCBrcInitReset()6234 MOS_STATUS CodechalVdencHevcStateG11::HuCBrcInitReset()
6235 {
6236     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6237 
6238     CODECHAL_ENCODE_FUNCTION_ENTER;
6239 
6240     MOS_COMMAND_BUFFER cmdBuffer;
6241     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6242 
6243     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && (m_numPipe == 1))
6244     {
6245         // Send command buffer header at the beginning (OS dependent)
6246         bool requestFrameTracking = m_singleTaskPhaseSupported ?
6247             m_firstTaskInPhase : 0;
6248         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
6249     }
6250 
6251     // load kernel from WOPCM into L2 storage RAM
6252     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
6253     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
6254     imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
6255 
6256     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
6257 
6258     // pipe mode select
6259     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
6260     pipeModeSelectParams.Mode = m_mode;
6261     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
6262 
6263     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
6264 
6265     // set HuC DMEM param
6266     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
6267     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
6268     dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
6269     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
6270     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6271     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
6272 
6273     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
6274     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
6275     virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
6276     virtualAddrParams.regionParams[0].isWritable = true;
6277     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
6278 
6279     // Store HUC_STATUS2 register bit 6 before HUC_Start command
6280     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
6281     // (HUC_Start command with last start bit set).
6282     CODECHAL_DEBUG_TOOL(
6283         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
6284     )
6285 
6286     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6287 
6288     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6289 
6290     // wait Huc completion (use HEVC bit for now)
6291     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6292     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6293     vdPipeFlushParams.Flags.bFlushHEVC = 1;
6294     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6295     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6296 
6297     // Flush the engine to ensure memory written out
6298     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6299     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6300     flushDwParams.bVideoPipelineCacheInvalidate = true;
6301     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6302 
6303     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
6304     {
6305         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6306     }
6307 
6308     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
6309 
6310     if (!m_singleTaskPhaseSupported)
6311     {
6312         bool renderingFlags = m_videoContextUsesNullHw;
6313 
6314         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6315             &cmdBuffer,
6316             CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
6317             nullptr)));
6318 
6319         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6320     }
6321 
6322     CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
6323 
6324     return eStatus;
6325 }
6326 
ConstructHucCmdForBRC(PMOS_RESOURCE batchBuffer)6327 MOS_STATUS CodechalVdencHevcStateG11::ConstructHucCmdForBRC(PMOS_RESOURCE batchBuffer)
6328 {
6329     MOS_COMMAND_BUFFER cmdBuffer;
6330     int32_t currentPass = GetCurrentPass();
6331     uint16_t len = 0;
6332 
6333     MOS_LOCK_PARAMS lockFlags;
6334     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6335     lockFlags.ReadOnly = true;
6336 
6337     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
6338     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6339 
6340     CodechalCmdInitializerG11* pCmdInitializerG11 = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
6341     len = m_cmd2StartInBytes - m_picStateCmdStartInBytes;
6342     pCmdInitializerG11->AddCmdConstData(
6343         CODECHAL_CMD5,
6344         (uint32_t*)(data + m_picStateCmdStartInBytes),
6345         len,
6346         m_picStateCmdStartInBytes);
6347 
6348     m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
6349 
6350     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6351     CODECHAL_ENCODE_CHK_STATUS_RETURN(
6352         m_hucCmdInitializer->CmdInitializerExecute(true, &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass], &cmdBuffer));
6353     ReturnCommandBuffer(&cmdBuffer);
6354 
6355     if (!m_singleTaskPhaseSupported)
6356     {
6357         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6358             &cmdBuffer,
6359             CODECHAL_NUM_MEDIA_STATES,
6360             "HucCmd")));
6361 
6362         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, m_videoContextUsesNullHw));
6363         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->DumpHucCmdInit(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass])));
6364     }
6365 
6366     return MOS_STATUS_SUCCESS;
6367 }
6368 
HuCBrcUpdate()6369 MOS_STATUS CodechalVdencHevcStateG11::HuCBrcUpdate()
6370 {
6371     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6372 
6373     CODECHAL_ENCODE_FUNCTION_ENTER;
6374 
6375     int32_t currentPass = GetCurrentPass();
6376     if (currentPass < 0)
6377     {
6378         eStatus = MOS_STATUS_INVALID_PARAMETER;
6379         return eStatus;
6380     }
6381 
6382     MOS_COMMAND_BUFFER cmdBuffer;
6383 
6384     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
6385     //For Group 3 cmds, they are constructed by driver, separate them into m_vdencGroup3BatchBuffer to avoid surface misorder under CP use case.
6386     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRCForGroup3(&m_vdencGroup3BatchBuffer[m_currRecycledBufIdx][currentPass]));
6387 
6388     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructHucCmdForBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
6389 
6390     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6391     if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && (m_numPipe == 1))
6392     {
6393         // Send command buffer header at the beginning (OS dependent)
6394         bool requestFrameTracking = m_singleTaskPhaseSupported ?
6395             m_firstTaskInPhase : 0;
6396         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
6397     }
6398 
6399     // load kernel from WOPCM into L2 storage RAM
6400     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
6401     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
6402 
6403     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)  // Low Delay BRC
6404     {
6405         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
6406     }
6407     else
6408     {
6409         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
6410     }
6411 
6412     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
6413 
6414     // pipe mode select
6415     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
6416     pipeModeSelectParams.Mode = m_mode;
6417     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
6418 
6419     // DMEM set
6420     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
6421 
6422     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
6423     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
6424     dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
6425     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
6426     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6427 
6428     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
6429 
6430     // Set Const Data buffer
6431     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
6432 
6433     // Add Virtual addr
6434     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
6435     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
6436 
6437     // Store HUC_STATUS2 register bit 6 before HUC_Start command
6438     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
6439     // (HUC_Start command with last start bit set).
6440     CODECHAL_DEBUG_TOOL(
6441         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
6442     )
6443 
6444     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6445 
6446     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6447 
6448     // wait Huc completion (use HEVC bit for now)
6449     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6450     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6451     vdPipeFlushParams.Flags.bFlushHEVC = 1;
6452     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6453     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6454 
6455     // Flush the engine to ensure memory written out
6456     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6457     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6458     flushDwParams.bVideoPipelineCacheInvalidate = true;
6459     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6460 
6461     // Write HUC_STATUS mask: DW1 (mask value)
6462     MHW_MI_STORE_DATA_PARAMS storeDataParams;
6463     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6464     storeDataParams.pOsResource = &m_resPakMmioBuffer;
6465     storeDataParams.dwResourceOffset = sizeof(uint32_t);
6466     storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
6467     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
6468 
6469     // store HUC_STATUS register: DW0 (actual value)
6470     CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
6471     auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
6472     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
6473     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
6474     storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
6475     storeRegParams.dwOffset = 0;
6476     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
6477     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
6478 
6479     // DW0 & DW1 will considered together for conditional batch buffer end cmd later
6480     if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
6481     {
6482         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6483     }
6484 
6485     // HuC Input
6486     CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
6487 
6488     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
6489 
6490     if (!m_singleTaskPhaseSupported)
6491     {
6492         bool renderingFlags = m_videoContextUsesNullHw;
6493 
6494         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6495             &cmdBuffer,
6496             CODECHAL_MEDIA_STATE_BRC_UPDATE,
6497             nullptr)));
6498 
6499         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6500     }
6501 
6502     // HuC Output
6503     CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
6504 
6505     return eStatus;
6506 }
6507 
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)6508 void CodechalVdencHevcStateG11::SetVdencPipeBufAddrParams(
6509     MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
6510 {
6511     CODECHAL_ENCODE_FUNCTION_ENTER;
6512 
6513     CodechalVdencHevcState::SetVdencPipeBufAddrParams(pipeBufAddrParams);
6514 
6515     PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
6516     if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource))
6517     {
6518         pipeBufAddrParams.presVdencStreamOutBuffer = &tileStatisticsBuffer->sResource;
6519         pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_hevcTileStatsOffset.uiVdencStatistics;
6520     }
6521 }
6522 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)6523 MOS_STATUS CodechalVdencHevcStateG11::UpdateCmdBufAttribute(
6524     PMOS_COMMAND_BUFFER cmdBuffer,
6525     bool                renderEngineInUse)
6526 {
6527     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6528 
6529     // should not be there. Will remove it in the next change
6530     CODECHAL_ENCODE_FUNCTION_ENTER;
6531     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
6532     {
6533         PMOS_CMD_BUF_ATTRI_VE attriExt =
6534             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
6535 
6536         memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
6537         attriExt->bUseVirtualEngineHint =
6538             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
6539     }
6540 
6541     return eStatus;
6542 }
6543 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)6544 MOS_STATUS CodechalVdencHevcStateG11::SetAndPopulateVEHintParams(
6545     PMOS_COMMAND_BUFFER  cmdBuffer)
6546 {
6547     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
6548 
6549     CODECHAL_ENCODE_FUNCTION_ENTER;
6550 
6551     if (!MOS_VE_SUPPORTED(m_osInterface))
6552     {
6553         return eStatus;
6554     }
6555 
6556     CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
6557     MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
6558 
6559     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
6560     {
6561         scalSetParms.bNeedSyncWithPrevious = true;
6562     }
6563 
6564     int32_t currentPass = GetCurrentPass();
6565     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
6566     {
6567         eStatus = MOS_STATUS_INVALID_PARAMETER;
6568         return eStatus;
6569     }
6570     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
6571     if (m_numPipe >= 2)
6572     {
6573         for (auto i = 0; i < m_numPipe; i++)
6574         {
6575             scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
6576         }
6577     }
6578 
6579     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
6580     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
6581     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
6582 
6583     return eStatus;
6584 }
6585 
6586 #if USE_CODECHAL_DEBUG_TOOL
DumpHucPakIntegrate()6587 MOS_STATUS CodechalVdencHevcStateG11::DumpHucPakIntegrate()
6588 {
6589     int32_t currentPass = GetCurrentPass();
6590     // HuC Input
6591     // HuC DMEM
6592     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
6593         &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
6594         MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG11), CODECHAL_CACHELINE_SIZE),
6595         currentPass,
6596         hucRegionDumpPakIntegrate));
6597 
6598     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6599         &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
6600         0,
6601         m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
6602         0,
6603         "",
6604         true,
6605         currentPass,
6606         hucRegionDumpPakIntegrate));
6607 
6608     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6609         &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
6610         0,
6611         m_resHuCPakAggregatedFrameStatsBuffer.dwSize,
6612         1,
6613         "",
6614         true,
6615         currentPass,
6616         hucRegionDumpPakIntegrate));
6617 
6618     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6619         &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
6620         0,
6621         m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
6622         15,
6623         "",
6624         true,
6625         currentPass,
6626         hucRegionDumpPakIntegrate));
6627 
6628     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6629         &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource,
6630         0,
6631         m_vdenc2ndLevelBatchBufferSize[m_currRecycledBufIdx],
6632         7,
6633         "",
6634         true,
6635         currentPass,
6636         hucRegionDumpPakIntegrate));
6637 
6638     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6639         &m_resBitstreamBuffer,
6640         0,
6641         m_encodeParams.dwBitstreamSize,
6642         5,
6643         "",
6644         false,
6645         currentPass,
6646         hucRegionDumpPakIntegrate));
6647 
6648     // Region 6 - BRC History buffer
6649     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6650         &m_vdencBrcHistoryBuffer,
6651         0,
6652         CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
6653         6,
6654         "",
6655         false,
6656         currentPass,
6657         hucRegionDumpPakIntegrate));
6658 
6659     // Region 9 - HCP BRC Data Output
6660     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6661         &m_resBrcDataBuffer,
6662         0,
6663         CODECHAL_CACHELINE_SIZE,
6664         9,
6665         "",
6666         false,
6667         currentPass,
6668         hucRegionDumpPakIntegrate));
6669 
6670     return MOS_STATUS_SUCCESS;
6671 }
6672 
DumpHucCqp()6673 MOS_STATUS CodechalVdencHevcStateG11::DumpHucCqp()
6674 {
6675     CODECHAL_ENCODE_FUNCTION_ENTER;
6676     int32_t currentPass = GetCurrentPass();
6677 
6678     // Region 5 - Output SLB Buffer
6679     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6680         &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource,
6681         0,
6682         m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
6683         5,
6684         "_Out_Slb",
6685         false,
6686         currentPass,
6687         hucRegionDumpUpdate));
6688 
6689     return MOS_STATUS_SUCCESS;
6690 }
6691 
DumpVdencOutputs()6692 MOS_STATUS CodechalVdencHevcStateG11::DumpVdencOutputs()
6693 {
6694     CODECHAL_ENCODE_FUNCTION_ENTER;
6695 
6696     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::DumpVdencOutputs());
6697 
6698     if (m_hevcPicParams->tiles_enabled_flag)
6699     {
6700         PMOS_RESOURCE presVdencTileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
6701         auto          num_tiles                     = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
6702         auto vdencStatsSizeAllTiles = num_tiles * m_vdencBrcStatsBufferSize;
6703         auto          vdencStatsOffset              = m_hevcTileStatsOffset.uiVdencStatistics;
6704 
6705         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6706             presVdencTileStatisticsBuffer,
6707             CodechalDbgAttr::attrVdencOutput,
6708             "_TileStats",
6709             vdencStatsSizeAllTiles,
6710             vdencStatsOffset,
6711             CODECHAL_NUM_MEDIA_STATES));
6712 
6713         // Slice Size Conformance
6714         if (m_hevcSeqParams->SliceSizeControl)
6715         {
6716             PMOS_RESOURCE presLcuBaseAddressBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
6717             auto          sliceStreamoutOffset     = m_hevcTileStatsOffset.uiHevcSliceStreamout;
6718             uint32_t size = m_numLcu * CODECHAL_CACHELINE_SIZE;
6719             // Slice Size StreamOut Surface
6720             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6721                 presLcuBaseAddressBuffer,
6722                 CodechalDbgAttr::attrVdencOutput,
6723                 "_SliceSize",
6724                 size,
6725                 sliceStreamoutOffset,
6726                 CODECHAL_NUM_MEDIA_STATES));
6727         }
6728     }
6729     return MOS_STATUS_SUCCESS;
6730 }
6731 #endif
6732