1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_vdenc_hevc_g11.cpp
24 //! \brief HEVC VDEnc encoder for GEN11.
25 //!
26
27 #include "codechal_vdenc_hevc_g11.h"
28 #include "codechal_kernel_header_g11.h"
29 #include "codeckrnheader.h"
30 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
31 #include "igcodeckrn_g11.h"
32 #endif
33 #include "mhw_vdbox_g11_X.h"
34 #include "mhw_vdbox_hcp_g11_X.h"
35 #include "mhw_vdbox_vdenc_g11_X.h"
36 #include "codechal_huc_cmd_initializer_g11.h"
37 #include "codechal_debug_encode_par_g11.h"
38 #ifdef _ENCODE_VDENC_RESERVED
39 #include "codechal_debug_encode_brc.h"
40 #endif
41
42 const double CodechalVdencHevcStateG11::m_devThreshIFPNEG[] = {
43 0.80, 0.60, 0.34, 0.2,
44 };
45
46 const double CodechalVdencHevcStateG11::m_devThreshIFPPOS[] = {
47 0.2, 0.4 , 0.66, 0.9,
48 };
49
50 const double CodechalVdencHevcStateG11::m_devThreshPBFPNEG[] = {
51 0.90, 0.66, 0.46, 0.3,
52 };
53
54 const double CodechalVdencHevcStateG11::m_devThreshPBFPPOS[] = {
55 0.3, 0.46, 0.70, 0.90,
56 };
57
58 const double CodechalVdencHevcStateG11::m_devThreshVBRNEG[] = {
59 0.90, 0.70, 0.50, 0.3,
60 };
61
62 const double CodechalVdencHevcStateG11::m_devThreshVBRPOS[] = {
63 0.4, 0.5, 0.75, 0.90,
64 };
65
66 const int8_t CodechalVdencHevcStateG11::m_lowdelayDevThreshPB[] = {
67 -45, -33, -23, -15, -8, 0, 15, 25,
68 };
69 const int8_t CodechalVdencHevcStateG11::m_lowdelayDevThreshVBR[] = {
70 -45, -35, -25, -15, -8, 0, 20, 40,
71 };
72 const int8_t CodechalVdencHevcStateG11::m_lowdelayDevThreshI[] = {
73 -40, -30, -17, -10, -5, 0, 10, 20,
74 };
75
76 const int8_t CodechalVdencHevcStateG11::m_lowdelayDeltaFrmszI[][8] = {
77 { 0, 0, -8, -12, -16, -20, -28, -36 },
78 { 0, 0, -4, -8, -12, -16, -24, -32 },
79 { 4, 2, 0, -1, -3, -8, -16, -24 },
80 { 8, 4, 2, 0, -1, -4, -8, -16 },
81 { 20, 16, 4, 0, -1, -4, -8, -16 },
82 { 24, 20, 16, 8, 4, 0, -4, -8 },
83 { 28, 24, 20, 16, 8, 4, 0, -8 },
84 { 32, 24, 20, 16, 8, 4, 0, -4 },
85 { 64, 48, 28, 20, 16, 12, 8, 4 },
86 };
87
88 const int8_t CodechalVdencHevcStateG11::m_lowdelayDeltaFrmszP[][8] = {
89 { -8, -24, -32, -40, -44, -48, -52, -80 },
90 { -8, -16, -32, -40, -40, -44, -44, -56 },
91 { 0, 0, -12, -20, -24, -28, -32, -36 },
92 { 8, 4, 0, 0, -8, -16, -24, -32 },
93 { 32, 16, 8, 4, -4, -8, -16, -20 },
94 { 36, 24, 16, 8, 4, -2, -4, -8 },
95 { 40, 36, 24, 20, 16, 8, 0, -8 },
96 { 48, 40, 28, 24, 20, 12, 0, -4 },
97 { 64, 48, 28, 20, 16, 12, 8, 4 },
98 };
99
100 const int8_t CodechalVdencHevcStateG11::m_lowdelayDeltaFrmszB[][8] = {
101 { 0, -4, -8, -16, -24, -32, -40, -48 },
102 { 1, 0, -4, -8, -16, -24, -32, -40 },
103 { 4, 2, 0, -1, -3, -8, -16, -24 },
104 { 8, 4, 2, 0, -1, -4, -8, -16 },
105 { 20, 16, 4, 0, -1, -4, -8, -16 },
106 { 24, 20, 16, 8, 4, 0, -4, -8 },
107 { 28, 24, 20, 16, 8, 4, 0, -8 },
108 { 32, 24, 20, 16, 8, 4, 0, -4 },
109 { 64, 48, 28, 20, 16, 12, 8, 4 },
110 };
111
112 const uint8_t m_qpAdaptiveWeight[52] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
113 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
114 8, 8, 8, 9, 9, 10, 11, 12, 13, 14,
115 16, 17, 18, 20, 21, 23, 24, 26, 28, 30,
116 32, 34, 36, 38, 40, 42, 44, 46, 48, 50,
117 50, 50 };
118 const uint8_t m_boostTable[52] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 4, 4, 5, 5, 5,
120 6, 6, 6, 7, 7, 8, 8, 8, 9, 9,
121 9, 10,10,10,11,11,11,11,11,11,
122 11,11,12,12,12,12,12,12,12,12,12,12 };
123
124 const uint32_t CodechalVdencHevcStateG11::m_hucConstantData[] = {
125 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c, 0x012c012c, 0x012c012c,
126 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00640064,
127 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
128 0x00640064, 0x00640064, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c,
129 0x012c012c, 0x012c012c, 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8,
130 0x00c800c8, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
131 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x503c1e04, 0xffc88c78, 0x3c1e0400, 0xc88c7850,
132 0x140200ff, 0xa0824628, 0x0000ffc8, 0x00000000, 0x04030302, 0x00000000, 0x03030200, 0x0000ff04,
133 0x02020000, 0xffff0303, 0x01000000, 0xff020202, 0x0000ffff, 0x02020100, 0x00fffffe, 0x01010000,
134 0xfffffe02, 0x010000ff, 0xfefe0201, 0x0000ffff, 0xfe010100, 0x00fffffe, 0x01010000, 0x00000000,
135 0x03030200, 0x00000004, 0x03020000, 0x00ff0403, 0x02000000, 0xff030302, 0x000000ff, 0x02020201,
136 0x00ffffff, 0x02010000, 0xfffffe02, 0x01000000, 0xfffe0201, 0x0000ffff, 0xfe020101, 0x00fffffe,
137 0x01010000, 0xfffffefe, 0x01000000, 0x00000001, 0x03020000, 0x00000403, 0x02000000, 0xff040303,
138 0x00000000, 0x03030202, 0x0000ffff, 0x02020100, 0xffffff02, 0x01000000, 0xfffe0202, 0x000000ff,
139 0xfe020101, 0x00ffffff, 0x02010100, 0xfffffefe, 0x01000000, 0xfffefe01, 0x000000ff, 0xe0e00101,
140 0xc0d0d0d0, 0xe0e0b0c0, 0xd0d0d0e0, 0xf0f0c0d0, 0xd0e0e0e0, 0x0408d0d0, 0xe8f0f800, 0x1820dce0,
141 0xf8fc0210, 0x2024ecf0, 0x0008101c, 0x2428f8fc, 0x08101418, 0x2830f800, 0x0c14181c, 0x3040fc00,
142 0x0c10141c, 0xe8f80408, 0xc8d0d4e0, 0xf0f8b0c0, 0xccd4d8e0, 0x0000c0c8, 0xd8dce4f0, 0x0408d0d4,
143 0xf0f80000, 0x0808dce8, 0xf0f80004, 0x0810dce8, 0x00080808, 0x0810f8fc, 0x08080808, 0x1010f800,
144 0x08080808, 0x1020fc00, 0x08080810, 0xfc000408, 0xe0e8f0f8, 0x0001d0d8, 0xe8f0f8fc, 0x0204d8e0,
145 0xf8fdff00, 0x0408e8f0, 0xfcff0002, 0x1014f0f8, 0xfcff0004, 0x1418f0f8, 0x00040810, 0x181cf8fc,
146 0x04081014, 0x1820f800, 0x04081014, 0x3040fc00, 0x0c10141c, 0x40300408, 0x80706050, 0x30a0a090,
147 0x70605040, 0xa0a09080, 0x60504030, 0xa0908070, 0x040201a0, 0x18141008, 0x02012420, 0x0a080604,
148 0x01101010, 0x0c080402, 0x10101010, 0x05030201, 0x02010106, 0x00000503, 0xff030201, 0x02010000,
149 0x000000ff, 0xfffefe01, 0xfdfd0100, 0xfb00ffff, 0xfffffefd, 0xfefdfbfa, 0x030201ff, 0x01010605,
150 0x00050302, 0x03020101, 0x010000ff, 0x0000ff02, 0xffff0100, 0xfe0100ff, 0x00ffffff, 0xfffffefc,
151 0xfefcfb00, 0x0101ffff, 0x01050402, 0x04020101, 0x01010000, 0x0000ff02, 0x00ff0101, 0xff000000,
152 0x0100ffff, 0xfffffffe, 0xfffefd00, 0xfcfb00ff, 0x1efffffe, 0x070d0e10, 0x00003207, 0x00000000,
153 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
154 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
155 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
156 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
157 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
158 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
159 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
160 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
161 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
162 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
163 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
164 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
165 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
166 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
167 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
168 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
169 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
170 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
171 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
172 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
173 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
174 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
175 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
176 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
178 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
179 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
180 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
181 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
182 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
183 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
184 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
185 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
186 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
187 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
188 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
189 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
190 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
191 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
192 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
193 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
194 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
195 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
196 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
197 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
198 0x00000000, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
199 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
200 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
201 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
202 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
203 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
204 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
205 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
206 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
207 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
208 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
209 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
210 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
211 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
212 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
213 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
214 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
215 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
216 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
217 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
218 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
219 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
220 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
221 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
222 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
223 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
224 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
225 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
226 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
227 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
228 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
229 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
230 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
231 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
232 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
233 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
234 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
235 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
236 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
237 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff
238 };
239
GetMaxBtCount()240 uint32_t CodechalVdencHevcStateG11::GetMaxBtCount()
241 {
242 CODECHAL_ENCODE_FUNCTION_ENTER;
243
244 uint32_t maxBtCount = 0;
245
246 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
247 auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
248
249 // DsConversion kernel
250 maxBtCount = 2 * (MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment));
251 #endif
252
253 // add ME and stream-in later
254 return maxBtCount;
255 }
256
InitKernelStateMe()257 MOS_STATUS CodechalVdencHevcStateG11::InitKernelStateMe()
258 {
259 CODECHAL_ENCODE_FUNCTION_ENTER;
260
261 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
262
263 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
264 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
265
266 uint32_t kernelSize = m_combinedKernelSize;
267 CODECHAL_KERNEL_HEADER currKrnHeader;
268 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
269 m_kernelBinary,
270 ENC_ME,
271 0,
272 &currKrnHeader,
273 &kernelSize));
274
275 auto kernelStatePtr = &m_vdencMeKernelState;
276 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
277 VDENC_ME_P,
278 &kernelStatePtr->KernelParams));
279
280 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
281 VDENC_ME_P,
282 &m_vdencMeKernelBindingTable));
283
284 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
285 kernelStatePtr->KernelParams.pBinary =
286 m_kernelBinary +
287 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
288 kernelStatePtr->KernelParams.iSize = kernelSize;
289
290 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
291 m_stateHeapInterface,
292 kernelStatePtr->KernelParams.iBTCount,
293 &kernelStatePtr->dwSshSize,
294 &kernelStatePtr->dwBindingTableSize));
295
296 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
297
298 return eStatus;
299 }
300
InitKernelStateStreamIn()301 MOS_STATUS CodechalVdencHevcStateG11::InitKernelStateStreamIn()
302 {
303 CODECHAL_ENCODE_FUNCTION_ENTER;
304 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
305
306 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
307 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
308
309 uint32_t kernelSize = m_combinedKernelSize;
310 CODECHAL_KERNEL_HEADER currKrnHeader;
311 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
312 m_kernelBinary,
313 VDENC_STREAMIN_HEVC,
314 0,
315 &currKrnHeader,
316 &kernelSize));
317
318 auto kernelStatePtr = &m_vdencStreaminKernelState;
319 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
320 VDENC_STREAMIN_HEVC,
321 &kernelStatePtr->KernelParams));
322
323 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
324 VDENC_STREAMIN_HEVC,
325 &m_vdencStreaminKernelBindingTable));
326
327 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
328 kernelStatePtr->KernelParams.pBinary =
329 m_kernelBinary +
330 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
331 kernelStatePtr->KernelParams.iSize = kernelSize;
332
333 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
334 m_stateHeapInterface,
335 kernelStatePtr->KernelParams.iBTCount,
336 &kernelStatePtr->dwSshSize,
337 &kernelStatePtr->dwBindingTableSize));
338
339 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
340
341 return eStatus;
342 }
343
InitKernelState()344 MOS_STATUS CodechalVdencHevcStateG11::InitKernelState()
345 {
346 CODECHAL_ENCODE_FUNCTION_ENTER;
347
348 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
349
350 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
351 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
352 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateStreamIn());
353 #endif
354
355 return eStatus;
356 }
357
DecideEncodingPipeNumber()358 MOS_STATUS CodechalVdencHevcStateG11::DecideEncodingPipeNumber()
359 {
360 CODECHAL_ENCODE_FUNCTION_ENTER;
361
362 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
363
364 m_numPipePre = m_numPipe;
365 m_numPipe = m_numVdbox;
366
367 uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
368 uint8_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
369
370 CODECHAL_ENCODE_VERBOSEMESSAGE("Tile Columns = %d, Tile Rows = %d.", numTileColumns, numTileRows);
371
372 // Only support 1 colomn or 1 row when only have 1 VDBOX
373 if (m_numVdbox <= 1 && numTileRows > 1 && numTileColumns > 1)
374 {
375 CODECHAL_ENCODE_ASSERTMESSAGE("Only 1 VDBOX detected, and Gen11 only support 1xN or Nx1 tiles for single pipe!");
376 return MOS_STATUS_PLATFORM_NOT_SUPPORTED;
377 }
378
379 if (numTileColumns > m_numPipe)
380 {
381 m_numPipe = 1;
382 }
383
384 if (numTileColumns < m_numPipe)
385 {
386 if (numTileColumns >= 1 && numTileColumns <= 4)
387 {
388 m_numPipe = numTileColumns;
389 }
390 else
391 {
392 m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
393 }
394 }
395
396 m_useVirtualEngine = true; // always use virtual engine interface for single pipe and scalability mode
397
398 m_numUsedVdbox = m_numPipe;
399 m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
400
401 if (m_scalabilityState)
402 {
403 // Create/ re-use a GPU context with 2 pipes
404 m_scalabilityState->ucScalablePipeNum = m_numPipe;
405 }
406
407 CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d, decided pipe num = %d.", m_numVdbox, m_numPipe);
408
409 return eStatus;
410 }
411
CheckSupportedFormat(PMOS_SURFACE surface)412 bool CodechalVdencHevcStateG11::CheckSupportedFormat(PMOS_SURFACE surface)
413 {
414 CODECHAL_ENCODE_FUNCTION_ENTER;
415
416 bool isColorFormatSupported = false;
417
418 if (nullptr == surface)
419 {
420 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
421 return isColorFormatSupported;
422 }
423
424 switch (surface->Format)
425 {
426 case Format_NV12:
427 case Format_NV21:
428 case Format_P010: // Planar 4:2:0
429 case Format_YUY2:
430 case Format_YUYV:
431 case Format_YVYU:
432 case Format_UYVY:
433 case Format_VYUY:
434 case Format_A8R8G8B8:
435 case Format_A8B8G8R8:
436 case Format_R10G10B10A2:// Packed RGB 4:4:4
437 case Format_B10G10R10A2:// Packed RGB 4:4:4
438 case Format_AYUV:
439 case Format_Y410: // Packed 4:4:4
440 isColorFormatSupported = true;
441 break;
442 case Format_Y210: // Packed 4:2:2
443 isColorFormatSupported = surface->TileType == MOS_TILE_Y;
444 break;
445 default:
446 CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
447 break;
448 }
449
450 return isColorFormatSupported;
451 }
452
PlatformCapabilityCheck()453 MOS_STATUS CodechalVdencHevcStateG11::PlatformCapabilityCheck()
454 {
455 CODECHAL_ENCODE_FUNCTION_ENTER;
456
457 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
458
459 CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
460
461 if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
462 {
463 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
464 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
465 }
466
467 if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_8K_PIC_WIDTH * ENCODE_HEVC_MAX_8K_PIC_HEIGHT)
468 {
469 eStatus = MOS_STATUS_INVALID_PARAMETER;
470 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 8k not supported");
471 }
472
473 if (m_hevcSeqParams->SliceSizeControl && m_frameWidth * m_frameHeight < ENCODE_HEVC_MIN_DSS_PIC_WIDTH * ENCODE_HEVC_MIN_DSS_PIC_HEIGHT)
474 {
475 eStatus = MOS_STATUS_INVALID_PARAMETER;
476 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "DSS is not supported when frame resolution less than 320p");
477 }
478
479 if (m_hevcSeqParams->ParallelBRC)
480 {
481 eStatus = MOS_STATUS_INVALID_PARAMETER;
482 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Parallel BRC is not supported on VDENC");
483 }
484
485 if (m_hevcSeqParams->bit_depth_luma_minus8 >= 4 || m_hevcSeqParams->bit_depth_chroma_minus8 >= 4)
486 {
487 eStatus = MOS_STATUS_INVALID_PARAMETER;
488 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "12bit encoding is not supported on VDENC");
489 }
490
491 if (m_hevcSeqParams->chroma_format_idc == 2)
492 {
493 eStatus = MOS_STATUS_INVALID_PARAMETER;
494 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "422 recon format encoding is not supported on HEVC VDENC");
495 }
496
497 if (m_vdencEnabled && m_chromaFormat == HCP_CHROMA_FORMAT_YUV444 && m_hevcSeqParams->TargetUsage == 7)
498 {
499 CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n");
500 m_hevcSeqParams->TargetUsage = 4;
501 }
502
503 bool oneLcuInTile = false;
504 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
505 for (auto i = 0; i < numTileColumns; i++)
506 {
507 if (m_hevcPicParams->tile_column_width[i] == 1)
508 {
509 oneLcuInTile = true;
510 break;
511 }
512 }
513
514 //Commented out in order to enable height of 64 pixels for Row tile on PO silicon for ICL.
515 /* uint16_t numTileTows = pHevcPicParams->num_tile_rows_minus1 + 1;
516 for (auto i = 0; i < numTileTows; i++)
517 {
518 if (pHevcPicParams->tile_row_height[i] == 1)
519 {
520 oneLcuInTile = true;
521 break;
522 }
523 }*/
524
525 if (oneLcuInTile)
526 {
527 eStatus = MOS_STATUS_INVALID_PARAMETER;
528 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Tile width/ height of 1 LCU is not supported");
529 }
530
531 // TU configuration for RDOQ
532 if (m_hevcRdoqEnabled)
533 {
534 m_hevcRdoqEnabled = (m_hevcSeqParams->TargetUsage < 7);
535 }
536
537 // set RDOQ Intra blocks Threshold for Gen11+
538 m_rdoqIntraTuThreshold = 0;
539 if (m_hevcRdoqEnabled)
540 {
541 if (1 == m_hevcSeqParams->TargetUsage)
542 {
543 m_rdoqIntraTuThreshold = 0xffff;
544 }
545 else if (4 == m_hevcSeqParams->TargetUsage)
546 {
547 m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
548 m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
549 }
550 }
551
552 return eStatus;
553 }
554
SetStreaminDataPerLcu(PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)555 void CodechalVdencHevcStateG11::SetStreaminDataPerLcu(
556 PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
557 void* streaminData)
558 {
559 CODECHAL_ENCODE_FUNCTION_ENTER;
560 PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G11 data = (PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G11)streaminData;
561 if (streaminParams->setQpRoiCtrl)
562 {
563 if (m_vdencNativeROIEnabled || m_brcAdaptiveRegionBoostEnable)
564 {
565 data->DW0.RoiCtrl = streaminParams->roiCtrl;
566 }
567 else
568 {
569 data->DW7.QpEnable = 0xf;
570 data->DW14.ForceQp_0 = streaminParams->forceQp[0];
571 data->DW14.ForceQp_1 = streaminParams->forceQp[1];
572 data->DW14.ForceQp_2 = streaminParams->forceQp[2];
573 data->DW14.ForceQp_3 = streaminParams->forceQp[3];
574 }
575 }
576 else
577 {
578 data->DW0.MaxTuSize = streaminParams->maxTuSize;
579 data->DW0.MaxCuSize = streaminParams->maxCuSize;
580 data->DW0.NumImePredictors = streaminParams->numImePredictors;
581 data->DW0.PuTypeCtrl = streaminParams->puTypeCtrl;
582 data->DW6.NumMergeCandidateCu64x64 = streaminParams->numMergeCandidateCu64x64;
583 data->DW6.NumMergeCandidateCu32x32 = streaminParams->numMergeCandidateCu32x32;
584 data->DW6.NumMergeCandidateCu16x16 = streaminParams->numMergeCandidateCu16x16;
585 data->DW6.NumMergeCandidateCu8x8 = streaminParams->numMergeCandidateCu8x8;
586 }
587 }
588
AllocatePakResources()589 MOS_STATUS CodechalVdencHevcStateG11::AllocatePakResources()
590 {
591 CODECHAL_ENCODE_FUNCTION_ENTER;
592
593 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
594
595 uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
596 uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
597 m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
598
599 const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max width
600 const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max height
601
602 MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
603 MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
604 hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
605 hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
606 // We should move the buffer allocation to picture level if the size is dependent on LCU size
607 hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
608 hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
609 hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
610
611 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
612 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
613 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
614 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
615 allocParamsForBufferLinear.Format = Format_Buffer;
616
617 // Deblocking Filter Row Store Scratch data surface
618 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
619 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
620 &hcpBufSizeParam);
621
622 if (eStatus != MOS_STATUS_SUCCESS)
623 {
624 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
625 return eStatus;
626 }
627
628 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
629 allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
630
631 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
632 m_osInterface,
633 &allocParamsForBufferLinear,
634 &m_resDeblockingFilterRowStoreScratchBuffer);
635
636 if (eStatus != MOS_STATUS_SUCCESS)
637 {
638 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
639 return eStatus;
640 }
641
642 // Deblocking Filter Tile Row Store Scratch data surface
643 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
644 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
645 &hcpBufSizeParam);
646
647 if (eStatus != MOS_STATUS_SUCCESS)
648 {
649 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
650 return eStatus;
651 }
652
653 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
654 allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
655
656 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
657 m_osInterface,
658 &allocParamsForBufferLinear,
659 &m_resDeblockingFilterTileRowStoreScratchBuffer);
660
661 if (eStatus != MOS_STATUS_SUCCESS)
662 {
663 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
664 return eStatus;
665 }
666
667 // Deblocking Filter Column Row Store Scratch data surface
668 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
669 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
670 &hcpBufSizeParam);
671
672 if (eStatus != MOS_STATUS_SUCCESS)
673 {
674 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
675 return eStatus;
676 }
677
678 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
679 allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
680
681 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
682 m_osInterface,
683 &allocParamsForBufferLinear,
684 &m_resDeblockingFilterColumnRowStoreScratchBuffer);
685
686 if (eStatus != MOS_STATUS_SUCCESS)
687 {
688 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
689 return eStatus;
690 }
691
692 // Metadata Line buffer
693 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
694 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
695 &hcpBufSizeParam);
696
697 if (eStatus != MOS_STATUS_SUCCESS)
698 {
699 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
700 return eStatus;
701 }
702
703 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
704 allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
705
706 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
707 m_osInterface,
708 &allocParamsForBufferLinear,
709 &m_resMetadataLineBuffer);
710
711 if (eStatus != MOS_STATUS_SUCCESS)
712 {
713 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
714 return eStatus;
715 }
716
717 // Metadata Tile Line buffer
718 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
719 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
720 &hcpBufSizeParam);
721
722 if (eStatus != MOS_STATUS_SUCCESS)
723 {
724 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
725 return eStatus;
726 }
727
728 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
729 allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
730
731 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
732 m_osInterface,
733 &allocParamsForBufferLinear,
734 &m_resMetadataTileLineBuffer);
735
736 if (eStatus != MOS_STATUS_SUCCESS)
737 {
738 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
739 return eStatus;
740 }
741
742 // Metadata Tile Column buffer
743 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
744 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
745 &hcpBufSizeParam);
746
747 if (eStatus != MOS_STATUS_SUCCESS)
748 {
749 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
750 return eStatus;
751 }
752
753 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
754 allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
755
756 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
757 m_osInterface,
758 &allocParamsForBufferLinear,
759 &m_resMetadataTileColumnBuffer);
760
761 if (eStatus != MOS_STATUS_SUCCESS)
762 {
763 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
764 return eStatus;
765 }
766
767 // SAO Line buffer
768 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
769 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
770 &hcpBufSizeParam);
771
772 if (eStatus != MOS_STATUS_SUCCESS)
773 {
774 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
775 return eStatus;
776 }
777
778 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
779 allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
780
781 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
782 m_osInterface,
783 &allocParamsForBufferLinear,
784 &m_resSaoLineBuffer);
785
786 if (eStatus != MOS_STATUS_SUCCESS)
787 {
788 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
789 return eStatus;
790 }
791
792 // SAO Tile Line buffer
793 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
794 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
795 &hcpBufSizeParam);
796
797 if (eStatus != MOS_STATUS_SUCCESS)
798 {
799 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
800 return eStatus;
801 }
802
803 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
804 allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
805
806 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
807 m_osInterface,
808 &allocParamsForBufferLinear,
809 &m_resSaoTileLineBuffer);
810
811 if (eStatus != MOS_STATUS_SUCCESS)
812 {
813 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
814 return eStatus;
815 }
816
817 // SAO Tile Column buffer
818 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
819 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
820 &hcpBufSizeParam);
821
822 if (eStatus != MOS_STATUS_SUCCESS)
823 {
824 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
825 return eStatus;
826 }
827
828 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
829 allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
830
831 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
832 m_osInterface,
833 &allocParamsForBufferLinear,
834 &m_resSaoTileColumnBuffer);
835
836 if (eStatus != MOS_STATUS_SUCCESS)
837 {
838 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
839 return eStatus;
840 }
841
842 // Lcu ILDB StreamOut buffer
843 // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
844 allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
845 allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
846
847 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
848 m_osInterface,
849 &allocParamsForBufferLinear,
850 &m_resLcuIldbStreamOutBuffer);
851
852 if (eStatus != MOS_STATUS_SUCCESS)
853 {
854 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
855 return eStatus;
856 }
857
858 // Lcu Base Address buffer
859 // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
860 // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
861 // Align to page for HUC requirement
862 uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
863 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
864 allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
865
866 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
867 m_osInterface,
868 &allocParamsForBufferLinear,
869 &m_resLcuBaseAddressBuffer);
870
871 if (eStatus != MOS_STATUS_SUCCESS)
872 {
873 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
874 return eStatus;
875 }
876
877 // SAO StreamOut buffer
878 uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
879 //extra added size to cover tile enabled case, per tile width aligned to 4. 20: max tile column No.
880 size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
881 allocParamsForBufferLinear.dwBytes = size;
882 allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
883
884 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
885 m_osInterface,
886 &allocParamsForBufferLinear,
887 &m_resSaoStreamOutBuffer);
888
889 if (eStatus != MOS_STATUS_SUCCESS)
890 {
891 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
892 return eStatus;
893 }
894
895 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
896 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
897 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
898 allocParamsForBufferLinear.Format = Format_Buffer;
899
900 // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
901 size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE); //Each tile has 8 cache size bytes of data, Align to page is HuC requirement
902 allocParamsForBufferLinear.dwBytes = size;
903 allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
904
905 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
906 m_osInterface,
907 &allocParamsForBufferLinear,
908 &m_resFrameStatStreamOutBuffer),
909 "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
910
911 // PAK Statistics buffer
912 size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
913 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
914 m_standard, size, 1, pakStats, "pakStats"));
915
916 // Slice Count buffer 1 DW = 4 Bytes
917 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
918 allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
919
920 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
921 m_osInterface,
922 &allocParamsForBufferLinear,
923 &m_sliceCountBuffer),
924 "Failed to create VDENC Slice Count Buffer");
925
926 // VDEncMode Timer buffer 1 DW = 4 Bytes
927 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
928 allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
929
930 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
931 m_osInterface,
932 &allocParamsForBufferLinear,
933 &m_vdencModeTimerBuffer),
934 "Failed to create VDEncMode Timer Buffer");
935
936 uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
937 uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
938 uint32_t frameWidthInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
939 uint32_t frameHeightInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
940
941 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
942 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
943 size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
944 allocParamsForBufferLinear.dwBytes = size;
945 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
946
947 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
948 m_osInterface,
949 &allocParamsForBufferLinear,
950 &m_resPakcuLevelStreamoutData.sResource));
951 m_resPakcuLevelStreamoutData.dwSize = size;
952 CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
953
954 // these 2 buffers are not used so far, but put the correct size calculation here
955 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
956 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
957 //size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
958
959 // PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
960 // one LCU has one cache line. Use CU as LCU during creation
961 //size = frameWidthInLcus * frameHeightInLcus * CODECHAL_CACHELINE_SIZE;
962
963 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
964 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
965 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
966 allocParamsForBufferLinear.Format = Format_Buffer;
967
968 // Allocate SSE Source Pixel Row Store Buffer
969 m_sizeOfSseSrcPixelRowStoreBufferPerLcu = CODECHAL_CACHELINE_SIZE * (4 + 4) << 1;
970 allocParamsForBufferLinear.dwBytes = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_widthAlignedMaxLcu + 3);
971 allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
972
973 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
974 m_osInterface,
975 &allocParamsForBufferLinear,
976 &m_resSseSrcPixelRowStoreBuffer),
977 "Failed to create SseSrcPixelRowStoreBuffer");
978
979 //HCP scalability Sync buffer
980 allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
981 allocParamsForBufferLinear.pBufName = "GEN11 HCP scalability Sync buffer ";
982
983 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
984 m_osInterface,
985 &allocParamsForBufferLinear,
986 &m_resHcpScalabilitySyncBuffer.sResource),
987 "Failed to create GEN11 HCP scalability Sync Buffer");
988
989 // create the tile coding state parameters
990 m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)MOS_AllocAndZeroMemory(
991 sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)* m_maxTileNumber);
992
993 if (m_enableHWSemaphore)
994 {
995 // Create the HW sync objects which will be used by each reference frame and BRC in GEN11
996 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
997 allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
998
999 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1000 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1001 lockFlagsWriteOnly.WriteOnly = 1;
1002
1003 uint32_t* data = nullptr;
1004
1005 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1006 {
1007 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1008 m_osInterface,
1009 &allocParamsForBufferLinear,
1010 &m_refSync[i].resSemaphoreMem.sResource),
1011 "Failed to create HW Semaphore Memory.");
1012 m_refSync[i].resSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
1013
1014 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1015 m_osInterface,
1016 &m_refSync[i].resSemaphoreMem.sResource,
1017 &lockFlagsWriteOnly));
1018
1019 *data = 1;
1020
1021 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1022 m_osInterface,
1023 &m_refSync[i].resSemaphoreMem.sResource));
1024 }
1025
1026 }
1027
1028 // create the HW semaphore buffer to sync up between VDBOXes. This is used to WA HW internal lock issue
1029 if (m_enableVdBoxHWSemaphore)
1030 {
1031 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1032 allocParamsForBufferLinear.pBufName = "VDBOX SemaphoreMemory";
1033
1034 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1035 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1036 lockFlagsWriteOnly.WriteOnly = 1;
1037
1038 uint32_t* data = nullptr;
1039
1040 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1041 {
1042 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1043 m_osInterface,
1044 &allocParamsForBufferLinear,
1045 &m_resVdBoxSemaphoreMem[i].sResource),
1046 "Failed to create VDBOX HW Semaphore Memory.");
1047
1048 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1049 m_osInterface,
1050 &m_resVdBoxSemaphoreMem[i].sResource,
1051 &lockFlagsWriteOnly));
1052
1053 *data = 1;
1054
1055 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1056 m_osInterface,
1057 &m_resVdBoxSemaphoreMem[i].sResource));
1058 }
1059 }
1060
1061 uint32_t* data = nullptr;
1062 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1063 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1064 lockFlagsWriteOnly.WriteOnly = 1;
1065
1066 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1067 allocParamsForBufferLinear.pBufName = "Pipe Start SemaphoreMemory";
1068
1069 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1070 m_osInterface,
1071 &allocParamsForBufferLinear,
1072 &m_resPipeStartSemaMem),
1073 "Cannot create Scalability pipe start sync HW semaphore.");
1074
1075 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1076 m_osInterface,
1077 &m_resPipeStartSemaMem,
1078 &lockFlagsWriteOnly));
1079
1080 *data = 0;
1081
1082 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1083 m_osInterface,
1084 &m_resPipeStartSemaMem));
1085
1086
1087 // SyncSemaMem
1088 data = nullptr;
1089 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1090 allocParamsForBufferLinear.pBufName = "SyncSemaphoreMemory";
1091
1092 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1093 m_osInterface,
1094 &allocParamsForBufferLinear,
1095 &m_resSyncSemaMem),
1096 "Cannot create sync HW semaphore.");
1097
1098 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1099 m_osInterface,
1100 &m_resSyncSemaMem,
1101 &lockFlagsWriteOnly));
1102
1103 *data = 0;
1104
1105 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1106 m_osInterface,
1107 &m_resSyncSemaMem));
1108
1109
1110 data = nullptr;
1111 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1112 allocParamsForBufferLinear.pBufName = "BrcPakSemaphoreMemory";
1113
1114 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1115 m_osInterface,
1116 &allocParamsForBufferLinear,
1117 &m_resBrcPakSemaphoreMem.sResource),
1118 "Failed to create BRC PAK Semaphore Memory.");
1119
1120 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1121 m_osInterface,
1122 &m_resBrcPakSemaphoreMem.sResource,
1123 &lockFlagsWriteOnly));
1124
1125 *data = 0;
1126
1127 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1128 m_osInterface,
1129 &m_resBrcPakSemaphoreMem.sResource));
1130
1131 if (m_hucPakStitchEnabled)
1132 {
1133 uint8_t* data;
1134
1135 // Pak stitch DMEM
1136 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG11), CODECHAL_CACHELINE_SIZE);
1137 allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
1138 auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
1139 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1140 {
1141 for (auto i = 0; i < numOfPasses; i++)
1142 {
1143 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1144 m_osInterface->pfnAllocateResource(
1145 m_osInterface,
1146 &allocParamsForBufferLinear,
1147 &m_resHucPakStitchDmemBuffer[k][i]),
1148 "Failed to allocate PAK Stitch Dmem Buffer.");
1149
1150 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1151 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1152 lockFlagsWriteOnly.WriteOnly = 1;
1153
1154 data = (uint8_t*)m_osInterface->pfnLockResource(
1155 m_osInterface,
1156 &m_resHucPakStitchDmemBuffer[k][i],
1157 &lockFlagsWriteOnly);
1158
1159 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1160
1161 MOS_ZeroMemory(
1162 data,
1163 allocParamsForBufferLinear.dwBytes);
1164
1165 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
1166 }
1167 }
1168
1169 // BRC Data Buffer
1170 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1171 allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
1172
1173 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1174 m_osInterface->pfnAllocateResource(
1175 m_osInterface,
1176 &allocParamsForBufferLinear,
1177 &m_resBrcDataBuffer),
1178 "Failed to allocate BRC Data Buffer Buffer.");
1179
1180 MOS_LOCK_PARAMS lockFlags;
1181 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1182 lockFlags.WriteOnly = 1;
1183
1184 data = (uint8_t*)m_osInterface->pfnLockResource(
1185 m_osInterface,
1186 &m_resBrcDataBuffer,
1187 &lockFlags);
1188
1189 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1190
1191 MOS_ZeroMemory(
1192 data,
1193 allocParamsForBufferLinear.dwBytes);
1194
1195 m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
1196 }
1197
1198 if (m_numDelay)
1199 {
1200 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1201 allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1202
1203 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1204 m_osInterface,
1205 &allocParamsForBufferLinear,
1206 &m_resDelayMinus), "Failed to allocate delay minus memory.");
1207
1208 uint8_t* data;
1209 MOS_LOCK_PARAMS lockFlags;
1210 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1211 lockFlags.WriteOnly = 1;
1212 data = (uint8_t*)m_osInterface->pfnLockResource(
1213 m_osInterface,
1214 &m_resDelayMinus,
1215 &lockFlags);
1216
1217 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1218
1219 MOS_ZeroMemory(data, sizeof(uint32_t));
1220
1221 m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1222 }
1223
1224 return eStatus;
1225 }
1226
FreePakResources()1227 MOS_STATUS CodechalVdencHevcStateG11::FreePakResources()
1228 {
1229 CODECHAL_ENCODE_FUNCTION_ENTER;
1230
1231 m_osInterface->pfnFreeResource(m_osInterface, &m_resSseSrcPixelRowStoreBuffer);
1232 m_osInterface->pfnFreeResource(m_osInterface, &m_resHcpScalabilitySyncBuffer.sResource);
1233 m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
1234
1235 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1236 {
1237 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1238 }
1239 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1240 {
1241 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1242 }
1243 m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1244
1245 MOS_FreeMemory(m_tileParams);
1246
1247 // command buffer for VE, allocated in MOS_STATUS CodechalEncodeHevcBase::VerifyCommandBufferSize()
1248 for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1249 {
1250 for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1251 {
1252 for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1253 {
1254 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1255
1256 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
1257 {
1258 if (cmdBuffer->pCmdBase)
1259 {
1260 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1261 }
1262 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1263 }
1264 }
1265 }
1266 }
1267
1268 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1269 {
1270 auto sync = &m_refSync[i];
1271
1272 if (!Mos_ResourceIsNull(&sync->resSyncObject))
1273 {
1274 // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1275 if (sync->uiSemaphoreObjCount || sync->bInUsed)
1276 {
1277 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1278 syncParams.GpuContext = m_renderContext;
1279 syncParams.presSyncResource = &sync->resSyncObject;
1280 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1281 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1282 }
1283 }
1284 m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1285 }
1286 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcPakSemaphoreMem.sResource);
1287 m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem);
1288 m_osInterface->pfnFreeResource(m_osInterface, &m_resSyncSemaMem);
1289
1290 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1291 {
1292 m_osInterface->pfnFreeResource(m_osInterface, &m_resVdBoxSemaphoreMem[i].sResource);
1293 }
1294
1295 if (m_hucPakStitchEnabled)
1296 {
1297 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1298 auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
1299 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1300 {
1301 for (auto i = 0; i < numOfPasses; i++)
1302 {
1303 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
1304 }
1305 }
1306 }
1307
1308 if (m_numDelay)
1309 {
1310 m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
1311 }
1312
1313 return CodechalVdencHevcState::FreePakResources();
1314 }
1315
AllocateEncResources()1316 MOS_STATUS CodechalVdencHevcStateG11::AllocateEncResources()
1317 {
1318 CODECHAL_ENCODE_FUNCTION_ENTER;
1319
1320 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1321
1322 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateEncResources());
1323
1324 if (m_hmeSupported)
1325 {
1326 HmeParams hmeParams;
1327
1328 MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1329 hmeParams.b4xMeDistortionBufferSupported = true;
1330 hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer;
1331 hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer;
1332 hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1333 hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer;
1334 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources4xME(&hmeParams));
1335 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources16xME(&hmeParams));
1336 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources32xME(&hmeParams));
1337 }
1338
1339 return eStatus;
1340 }
1341
FreeEncResources()1342 MOS_STATUS CodechalVdencHevcStateG11::FreeEncResources()
1343 {
1344 CODECHAL_ENCODE_FUNCTION_ENTER;
1345 // Free ME resources
1346 HmeParams hmeParams;
1347
1348 MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1349 hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer;
1350 hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer;
1351 hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1352 hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer;
1353 DestroyMEResources(&hmeParams);
1354
1355 return CodechalVdencHevcState::FreeEncResources();
1356 }
1357
AllocateBrcResources()1358 MOS_STATUS CodechalVdencHevcStateG11::AllocateBrcResources()
1359 {
1360 CODECHAL_ENCODE_FUNCTION_ENTER;
1361
1362 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateBrcResources());
1363 // initiate allocation paramters and lock flags
1364 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1365 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1366 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1367 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1368 allocParamsForBufferLinear.Format = Format_Buffer;
1369 // VDEnc Group3 batch buffer (input for HuC FW)
1370 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencGroup3BatchBufferSize, CODECHAL_PAGE_SIZE);
1371 allocParamsForBufferLinear.pBufName = "VDENC Group3 Batch Buffer";
1372
1373 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1374 {
1375 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1376 {
1377 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1378 m_osInterface,
1379 &allocParamsForBufferLinear,
1380 &m_vdencGroup3BatchBuffer[k][i]),
1381 "Failed to allocate VDENC Group 3 Batch Buffer");
1382 }
1383 }
1384 return MOS_STATUS_SUCCESS;
1385 }
1386
FreeBrcResources()1387 MOS_STATUS CodechalVdencHevcStateG11::FreeBrcResources()
1388 {
1389 CODECHAL_ENCODE_FUNCTION_ENTER;
1390
1391 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::FreeBrcResources());
1392
1393 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1394 {
1395 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1396 {
1397 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencGroup3BatchBuffer[k][i]);
1398 }
1399 }
1400 return MOS_STATUS_SUCCESS;
1401 }
1402
InitializePicture(const EncoderParams & params)1403 MOS_STATUS CodechalVdencHevcStateG11::InitializePicture(const EncoderParams& params)
1404 {
1405 CODECHAL_ENCODE_FUNCTION_ENTER;
1406
1407 // common initilization
1408 return CodechalVdencHevcState::InitializePicture(params);
1409 }
1410
SetPictureStructs()1411 MOS_STATUS CodechalVdencHevcStateG11::SetPictureStructs()
1412 {
1413 CODECHAL_ENCODE_FUNCTION_ENTER;
1414
1415 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1416
1417 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetPictureStructs());
1418
1419 if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
1420 (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
1421 {
1422 if (Format_YUY2 != m_reconSurface.Format)
1423 {
1424 eStatus = MOS_STATUS_INVALID_PARAMETER;
1425 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface format is not correct!");
1426 }
1427 else if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
1428 m_reconSurface.dwWidth < m_oriFrameWidth / 2)
1429 {
1430 eStatus = MOS_STATUS_INVALID_PARAMETER;
1431 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface allocation size is not correct!");
1432 }
1433 else
1434 {
1435 // update Recon surface to Variant format
1436 CodechalEncodeHevcBase::UpdateYUY2SurfaceInfo(&m_reconSurface, m_is10BitHevc);
1437 }
1438 }
1439
1440 return eStatus;
1441 }
1442
~CodechalVdencHevcStateG11()1443 CodechalVdencHevcStateG11::~CodechalVdencHevcStateG11()
1444 {
1445 CODECHAL_ENCODE_FUNCTION_ENTER;
1446
1447 if (m_scalabilityState)
1448 {
1449 MOS_FreeMemAndSetNull(m_scalabilityState);
1450 }
1451 //Note: virtual engine interface destroy is done in MOS layer
1452
1453 CODECHAL_DEBUG_TOOL(
1454 MOS_Delete(m_encodeParState);
1455 )
1456 return;
1457 }
1458
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1459 MOS_STATUS CodechalVdencHevcStateG11::GetStatusReport(
1460 EncodeStatus *encodeStatus,
1461 EncodeStatusReport *encodeStatusReport)
1462 {
1463 CODECHAL_ENCODE_FUNCTION_ENTER;
1464
1465 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1466
1467 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1468 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1469
1470 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpVdencOutputs()));
1471
1472 if (encodeStatusReport->UsedVdBoxNumber <= 1)
1473 {
1474 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport));
1475 return eStatus;
1476 }
1477
1478 // In case of CQP, PAK integration kernel is not called, so used tile size record from HW
1479 PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1480
1481 MOS_LOCK_PARAMS lockFlags;
1482 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1483 lockFlags.ReadOnly = 1;
1484 HCPPakHWTileSizeRecord_G11* tileStatusReport = (HCPPakHWTileSizeRecord_G11*)m_osInterface->pfnLockResource(
1485 m_osInterface,
1486 &tileSizeStatusReport->sResource,
1487 &lockFlags);
1488 CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1489
1490 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1491 encodeStatusReport->PanicMode = false;
1492 encodeStatusReport->AverageQp = 0;
1493 encodeStatusReport->QpY = 0;
1494 encodeStatusReport->SuggestedQpYDelta = 0;
1495 encodeStatusReport->NumberPasses = 1;
1496 encodeStatusReport->bitstreamSize = 0;
1497 encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1498 encodeStatusReport->NumberSlices = 0;
1499
1500 uint32_t* sliceSize = nullptr;
1501
1502 // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
1503 if (encodeStatus->sliceReport.pSliceSize)
1504 {
1505 sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
1506 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
1507 }
1508
1509 uint32_t totalCU = 0, sliceCount = 0;
1510 double sumQp = 0.0;
1511 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1512 {
1513 if (tileStatusReport[i].Length == 0)
1514 {
1515 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1516 return eStatus;
1517 }
1518
1519 encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1520 totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1521 sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1522
1523 if (sliceSize)
1524 {
1525 encodeStatusReport->pSliceSizes = (uint16_t*)sliceSize;
1526 encodeStatusReport->NumberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile;
1527 uint16_t prevCumulativeSliceSize = 0;
1528 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
1529 for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++)
1530 {
1531 // PAK output the sliceSize at 16DW intervals.
1532 CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
1533
1534 //convert cummulative slice size to individual, first slice may have PPS/SPS,
1535 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
1536 encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
1537 prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
1538 sliceCount++;
1539 }
1540 }
1541 }
1542
1543 if (sliceSize)
1544 {
1545 encodeStatusReport->SizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatusReport->NumberSlices;
1546 encodeStatusReport->SliceSizeOverflow = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
1547 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
1548 }
1549
1550 CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
1551
1552 if (encodeStatusReport->bitstreamSize == 0 ||
1553 encodeStatusReport->bitstreamSize >m_bitstreamUpperBound)
1554 {
1555 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1556 encodeStatusReport->bitstreamSize = 0;
1557 return MOS_STATUS_INVALID_FILE_SIZE;
1558 }
1559
1560 if (totalCU != 0)
1561 {
1562 encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1563 (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1564 }
1565 else
1566 {
1567 return MOS_STATUS_INVALID_PARAMETER;
1568 }
1569
1570 if (m_enableTileStitchByHW)
1571 {
1572 // clean-up the tile status report buffer
1573 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
1574 m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
1575 return eStatus;
1576 }
1577
1578 uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
1579 tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
1580 CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
1581
1582 PCODEC_REF_LIST currRefList = encodeStatus->encodeStatusReport.pCurrRefList;
1583 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1584 lockFlags.ReadOnly = 1;
1585 uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
1586 m_osInterface,
1587 &currRefList->resBitstreamBuffer,
1588 &lockFlags);
1589 if (bitstream == nullptr)
1590 {
1591 MOS_FreeMemory(tempBsBuffer);
1592 return MOS_STATUS_NULL_POINTER;
1593 }
1594
1595 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1596 {
1597 uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1598 uint32_t len = tileStatusReport[i].Length;
1599
1600 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
1601 bufPtr += len;
1602 }
1603
1604 MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
1605 MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
1606
1607 if (bitstream)
1608 {
1609 m_osInterface->pfnUnlockResource(m_osInterface, &currRefList->resBitstreamBuffer);
1610 }
1611
1612 MOS_FreeMemory(tempBsBuffer);
1613
1614 if (tileStatusReport)
1615 {
1616 // clean-up the tile status report buffer
1617 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
1618
1619 m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
1620 }
1621
1622 return eStatus;
1623 }
1624
UserFeatureKeyReport()1625 MOS_STATUS CodechalVdencHevcStateG11::UserFeatureKeyReport()
1626 {
1627 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1628
1629 CODECHAL_ENCODE_FUNCTION_ENTER;
1630
1631 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::UserFeatureKeyReport());
1632
1633 #if (_DEBUG || _RELEASE_INTERNAL)
1634 CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
1635 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
1636 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
1637 #endif
1638
1639 return eStatus;
1640 }
1641
EncodeKernelFunctions()1642 MOS_STATUS CodechalVdencHevcStateG11::EncodeKernelFunctions()
1643 {
1644 CODECHAL_ENCODE_FUNCTION_ENTER;
1645
1646 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1647
1648 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
1649 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1650 m_rawSurfaceToEnc,
1651 CodechalDbgAttr::attrEncodeRawInputSurface,
1652 "SrcSurf")));
1653 auto singleTaskPhaseSupported = m_singleTaskPhaseSupported; // local variable to save current setting before overwriting
1654
1655 if (m_16xMeSupported)
1656 {
1657 m_singleTaskPhaseSupported = false;
1658
1659 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
1660 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
1661
1662 cscScalingKernelParams.bLastTaskInPhaseCSC =
1663 cscScalingKernelParams.bLastTaskInPhase4xDS = false;
1664 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
1665 cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
1666
1667 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->SetHevcCscFlagAndRawColor());
1668 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
1669 }
1670
1671 if (m_b16XMeEnabled)
1672 {
1673 if (m_b32XMeEnabled)
1674 {
1675 //HME_P kernel for 32xME
1676 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_32x));
1677 }
1678
1679 //HME_P kernel for 16xME
1680 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_16x));
1681
1682 //StreamIn kernel, 4xME
1683 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x));
1684 }
1685
1686 // retrieve SingleTaskPhase setting (SAO will need STP enabled setting)
1687 m_singleTaskPhaseSupported = singleTaskPhaseSupported;
1688
1689 CODECHAL_DEBUG_TOOL(
1690 if (m_hmeEnabled) {
1691 CODECHAL_ME_OUTPUT_PARAMS meOutputParams;
1692
1693 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1694 meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer;
1695 meOutputParams.psMeBrcDistortionBuffer = nullptr;
1696 meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1697 meOutputParams.b16xMeInUse = false;
1698 meOutputParams.b32xMeInUse = false;
1699
1700 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1701 &meOutputParams.psMeMvBuffer->OsResource,
1702 CodechalDbgAttr::attrOutput,
1703 "MvData",
1704 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
1705 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
1706 CODECHAL_MEDIA_STATE_4X_ME));
1707
1708 //CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1709 // &meOutputParams.psMeBrcDistortionBuffer->OsResource,
1710 // CodechalDbgAttr::attrOutput,
1711 // "BrcDist",
1712 // meOutputParams.psMeBrcDistortionBuffer->dwHeight *meOutputParams.psMeBrcDistortionBuffer->dwPitch,
1713 // CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4), 8) : 0,
1714 // CODECHAL_MEDIA_STATE_4X_ME));
1715 if (meOutputParams.psMeDistortionBuffer)
1716 {
1717 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1718 &meOutputParams.psMeDistortionBuffer->OsResource,
1719 CodechalDbgAttr::attrOutput,
1720 "MeDist",
1721 meOutputParams.psMeDistortionBuffer->dwHeight *meOutputParams.psMeDistortionBuffer->dwPitch,
1722 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
1723 CODECHAL_MEDIA_STATE_4X_ME));
1724 }
1725 if (m_b16XMeEnabled)
1726 {
1727 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1728 meOutputParams.psMeMvBuffer = &m_s16XMeMvDataBuffer;
1729 meOutputParams.psMeBrcDistortionBuffer = nullptr;
1730 meOutputParams.psMeDistortionBuffer = nullptr;
1731 meOutputParams.b16xMeInUse = true;
1732 meOutputParams.b32xMeInUse = false;
1733
1734 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1735 m_debugInterface->DumpBuffer(
1736 &meOutputParams.psMeMvBuffer->OsResource,
1737 CodechalDbgAttr::attrOutput,
1738 "MvData",
1739 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
1740 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
1741 CODECHAL_MEDIA_STATE_16X_ME));
1742 }
1743 if (m_b32XMeEnabled)
1744 {
1745 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1746 meOutputParams.psMeMvBuffer = &m_s32XMeMvDataBuffer;
1747 meOutputParams.psMeBrcDistortionBuffer = nullptr;
1748 meOutputParams.psMeDistortionBuffer = nullptr;
1749 meOutputParams.b16xMeInUse = false;
1750 meOutputParams.b32xMeInUse = true;
1751
1752 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1753 m_debugInterface->DumpBuffer(
1754 &meOutputParams.psMeMvBuffer->OsResource,
1755 CodechalDbgAttr::attrOutput,
1756 "MvData",
1757 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
1758 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledFrameFieldHeightInMb32x * 4) : 0,
1759 CODECHAL_MEDIA_STATE_32X_ME));
1760 }
1761
1762 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
1763 meOutputParams.pResVdenStreamInBuffer = &(m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
1764 meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer;
1765 meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1766 meOutputParams.b16xMeInUse = false;
1767 meOutputParams.bVdencStreamInInUse = true;
1768 if (m_vdencStreamInEnabled) {
1769 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1770 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
1771 CodechalDbgAttr::attrOutput,
1772 "StreaminData",
1773 m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE,
1774 0,
1775 CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN));
1776 }
1777 })
1778 #endif
1779
1780 return eStatus;
1781 }
1782
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)1783 MOS_STATUS CodechalVdencHevcStateG11::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
1784 {
1785 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1786
1787 CODECHAL_ENCODE_FUNCTION_ENTER;
1788
1789 // Use FrameStats buffer if in single pipe mode.
1790 if (m_numPipe == 1)
1791 {
1792 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ReadSliceSize(cmdBuffer));
1793 return eStatus;
1794 }
1795
1796 // Report slice size to app only when dynamic scaling is enabled
1797 if (!m_hevcSeqParams->SliceSizeControl)
1798 {
1799 return eStatus;
1800 }
1801
1802 // In multi-tile multi-pipe mode, use PAK integration kernel output
1803 // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
1804 MOS_LOCK_PARAMS lockFlags;
1805 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1806 lockFlags.WriteOnly = true;
1807
1808 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2); // encodeStatus is offset by 2 DWs in the resource
1809 uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(m_numLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1810
1811 if (IsFirstPipe())
1812 {
1813 if (IsFirstPass())
1814 {
1815 // Create/ Initialize slice report buffer once per frame, to be used across passes
1816 if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
1817 {
1818 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1819 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1820 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1821 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1822 allocParamsForBufferLinear.Format = Format_Buffer;
1823 allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
1824
1825 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1826 m_osInterface,
1827 &allocParamsForBufferLinear,
1828 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
1829 "Failed to create HEVC VDEnc Slice Report Buffer ");
1830 }
1831
1832 // Clear slice size structure to be sent in EncodeStatusReport buffer
1833 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
1834 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1835 MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
1836 m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
1837
1838 // Set slice size pointer in slice size structure
1839 MHW_MI_FLUSH_DW_PARAMS miFlushDwParams;
1840 MOS_ZeroMemory(&miFlushDwParams, sizeof(miFlushDwParams));
1841 miFlushDwParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
1842 miFlushDwParams.dwResourceOffset = CODECHAL_OFFSETOF(EncodeStatusSliceReport, pSliceSize) + baseOffset + m_encodeStatusBuf.dwSliceReportOffset;
1843 miFlushDwParams.dwDataDW1 = (uint32_t)((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF);
1844 miFlushDwParams.dwDataDW2 = (uint32_t)(((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF00000000) >> 32);
1845 miFlushDwParams.bQWordEnable = 1;
1846 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1847 cmdBuffer,
1848 &miFlushDwParams));
1849 }
1850
1851 // Copy Slice size data buffer from PAK to be sent back to App
1852 CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
1853 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
1854 m_hevcTileStatsOffset.uiHevcSliceStreamout,
1855 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex],
1856 0,
1857 sizeOfSliceSizesBuffer));
1858
1859 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1860 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1861 miCpyMemMemParams.presSrc = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
1862 miCpyMemMemParams.dwSrcOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics;
1863 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
1864 miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset; // Slice size overflow is at DW0 EncodeStatusSliceReport
1865 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1866 }
1867
1868 return eStatus;
1869 }
1870
ExecutePictureLevel()1871 MOS_STATUS CodechalVdencHevcStateG11::ExecutePictureLevel()
1872 {
1873 CODECHAL_ENCODE_FUNCTION_ENTER;
1874
1875 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1876
1877 if (IsFirstPipe() && IsFirstPass())
1878 {
1879 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams));
1880 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
1881 }
1882
1883 if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
1884 {
1885 if (m_currRefSync == nullptr)
1886 {
1887 m_currRefSync = &m_refSync[m_currMbCodeIdx];
1888 }
1889 }
1890 else
1891 {
1892 m_currRefSync = nullptr;
1893 }
1894
1895 if (m_lookaheadPass && (m_hevcSeqParams->MaxAdaptiveGopPicSize > 0))
1896 {
1897 bool forceIntra = m_intraInterval >= m_hevcSeqParams->MaxAdaptiveGopPicSize;
1898 if ((!IsFirstPass() || forceIntra) && (m_hevcPicParams->CodingType != I_TYPE))
1899 {
1900 m_vdencStreamInEnabled = true;
1901 }
1902
1903 if (!m_lookaheadAdaptiveI)
1904 {
1905 m_intraInterval = forceIntra ? 1 : m_intraInterval + 1;
1906 }
1907 }
1908
1909 m_firstTaskInPhase = m_singleTaskPhaseSupported? IsFirstPass(): false;
1910 m_lastTaskInPhase = m_singleTaskPhaseSupported? IsLastPass(): true;
1911
1912 // Per frame maximum HuC kernels is 5 - BRC Init, BRC Update, PAK Int, BRC Update, PAK Int
1913 m_hucCommandsSize = m_hwInterface->m_hucCommandBufferSize * 5;
1914 PerfTagSetting perfTag;
1915 perfTag.Value = 0;
1916 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1917 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
1918 perfTag.PictureCodingType = m_pictureCodingType;
1919 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1920
1921 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
1922 {
1923 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1924 eStatus = MOS_STATUS_INVALID_PARAMETER;
1925 return eStatus;
1926 }
1927
1928 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
1929
1930 if (!m_singleTaskPhaseSupportedInPak)
1931 {
1932 // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
1933 m_firstTaskInPhase = true;
1934 m_lastTaskInPhase = true;
1935 }
1936
1937 if (m_lookaheadPass)
1938 {
1939 if (m_swLaMode != nullptr)
1940 {
1941 m_lastTaskInPhase = true;
1942 }
1943 else
1944 {
1945 m_lastTaskInPhase = !m_singleTaskPhaseSupported;
1946 }
1947 }
1948
1949 // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
1950 SetPakPassType();
1951
1952 bool pakOnlyMultipassEnable;
1953
1954 // "PAK-Only Multi-Pass Enable" set to zero in first pass and 1 in subsequent passes
1955 // Slice size conformance feature can't be enabled. When SSC enabled, VDENC + PAK 2nd pass needs to be used.
1956 // SAO 2nd pass has to be PAK-only 2nd pass
1957 if (m_numPipe >= 2)
1958 {
1959 int32_t currentPass = GetCurrentPass();
1960
1961 pakOnlyMultipassEnable = (currentPass != 0) && (m_hevcSeqParams->SAO_enabled_flag) && (!m_hevcSeqParams->SliceSizeControl);
1962 }
1963 else
1964 {
1965 pakOnlyMultipassEnable = m_pakOnlyPass;
1966 }
1967
1968 bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (GetCurrentPass() == 1) && !m_pakOnlyPass;
1969
1970 uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
1971
1972 m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
1973 CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
1974
1975 // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
1976 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
1977 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
1978 {
1979 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
1980
1981 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1982 {
1983 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1984 m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
1985 }
1986 }
1987
1988 if(IsFirstPipe())
1989 {
1990 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hucCmdInitializer);
1991 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerSetConstData(
1992 m_osInterface,
1993 m_miInterface,
1994 m_vdencInterface,
1995 m_hevcSeqParams,
1996 m_hevcPicParams,
1997 m_hevcSliceParams,
1998 pakOnlyMultipassEnable,
1999 m_hevcVdencAcqpEnabled,
2000 m_brcEnabled,
2001 m_vdencStreamInEnabled,
2002 m_vdencNativeROIEnabled,
2003 m_brcAdaptiveRegionBoostEnable,
2004 m_hevcVdencRoundingEnabled,
2005 panicEnabled,
2006 GetCurrentPass()));
2007 }
2008
2009 // clean-up per VDBOX semaphore memory
2010 int32_t currentPipe = GetCurrentPipe();
2011 int32_t currentPass = GetCurrentPass();
2012 if ((currentPipe < 0) || (currentPass < 0))
2013 {
2014 eStatus = MOS_STATUS_INVALID_PARAMETER;
2015 return eStatus;
2016 }
2017
2018 if (m_numPipe >= 2)
2019 {
2020 // Send Cmd Buffer Header for VE in last pipe only
2021 MOS_COMMAND_BUFFER cmdBuffer;
2022 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2023
2024 bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
2025 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2026
2027 if (!m_singleTaskPhaseSupported || (m_singleTaskPhaseSupported && IsFirstPass()))
2028 {
2029 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
2030
2031 //HW Semaphore cmd to make sure all pipes start encode at the same time
2032 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2033 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2034 &m_resPipeStartSemaMem,
2035 &cmdBuffer,
2036 m_numPipe));
2037
2038 // Program some placeholder cmds to resolve the hazard between pipe sync
2039 MHW_MI_STORE_DATA_PARAMS dataParams;
2040 dataParams.pOsResource = &m_resDelayMinus;
2041 dataParams.dwResourceOffset = 0;
2042 dataParams.dwValue = 0xDE1A;
2043 for (uint32_t i = 0; i < m_numDelay; i++)
2044 {
2045 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2046 &cmdBuffer,
2047 &dataParams));
2048 }
2049
2050 //clean HW semaphore memory
2051 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2052
2053 //Start Watchdog Timer
2054 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
2055 }
2056
2057 // clean-up per VDBOX semaphore memory, only in the first BRC pass. Same semaphore is re-used across BRC passes for stitch command
2058 if (IsFirstPass())
2059 {
2060 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
2061 {
2062 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2063 SetSemaphoreMem(
2064 &m_resVdBoxSemaphoreMem[currentPipe].sResource,
2065 &cmdBuffer,
2066 false));
2067 }
2068 }
2069
2070 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2071 }
2072
2073 // Send HuC BRC Init/ Update only on first pipe.
2074 if (m_vdencHucUsed && IsFirstPipe())
2075 {
2076 if (!m_singleTaskPhaseSupported)
2077 {
2078 //Reset earlier set PAK perf tag
2079 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2080
2081 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2082 perfTag.Value = 0;
2083 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2084 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2085 perfTag.PictureCodingType = m_pictureCodingType;
2086 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2087 }
2088 m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2089
2090 // Invoke BRC init/reset FW
2091 if (m_brcInit || m_brcReset)
2092 {
2093 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2094 }
2095
2096 if (!m_singleTaskPhaseSupported)
2097 {
2098 //Reset performance buffer used for BRC init
2099 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2100 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2101 perfTag.Value = 0;
2102 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2103 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE;
2104 perfTag.PictureCodingType = m_pictureCodingType;
2105 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2106 }
2107
2108 // Invoke BRC update FW
2109 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2110 m_brcInit = m_brcReset = false;
2111 if (!m_singleTaskPhaseSupported)
2112 {
2113 //reset performance buffer used for BRC update
2114 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2115 }
2116 }
2117
2118 // for CQP case, we only need to add the generating cmds in first pass/pipe
2119 // the other pipes share the SLB which is generated in the first pass/pipe
2120 // but we need to sync the generating operation
2121 if (!m_vdencHucUsed && IsFirstPass())
2122 {
2123 if (IsFirstPipe())
2124 {
2125 ConstructBatchBufferHuCCQP(&m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource);
2126 }
2127
2128 if (m_numPipe > 1)
2129 {
2130 MOS_COMMAND_BUFFER cmdBuffer;
2131 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2132 //HW Semaphore cmd to make sure all pipes wait until the slb is ready
2133 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resSyncSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2134 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2135 &m_resSyncSemaMem,
2136 &cmdBuffer,
2137 m_numPipe));
2138
2139 // Program some placeholder cmds to resolve the hazard between pipe sync
2140 MHW_MI_STORE_DATA_PARAMS dataParams;
2141 dataParams.pOsResource = &m_resDelayMinus;
2142 dataParams.dwResourceOffset = 0;
2143 dataParams.dwValue = 0xDE1A;
2144 for (uint32_t i = 0; i < m_numDelay; i++)
2145 {
2146 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2147 &cmdBuffer,
2148 &dataParams));
2149 }
2150
2151 //clean HW semaphore memory
2152 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resSyncSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2153 ReturnCommandBuffer(&cmdBuffer);
2154 }
2155
2156 }
2157
2158 MOS_COMMAND_BUFFER cmdBuffer;
2159 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2160
2161 if (!m_singleTaskPhaseSupported)
2162 {
2163 //PAK Perf Tag
2164 perfTag.Value = 0;
2165 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2166 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2167 perfTag.PictureCodingType = m_pictureCodingType;
2168 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2169 }
2170
2171 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) && (m_numPipe == 1))
2172 {
2173 // Send command buffer header at the beginning (OS dependent)
2174 // frame tracking tag is only added in the last command buffer header
2175 bool requestFrameTracking = m_singleTaskPhaseSupported ?
2176 m_firstTaskInPhase :
2177 ((m_lookaheadPass && (!m_swLaMode || (m_currPass < m_numPasses))) ? false : m_lastTaskInPhase);
2178
2179 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2180 }
2181
2182 // Ensure the previous BRC Update is done, before executing PAK
2183 if (m_vdencHucUsed && (m_numPipe >= 2))
2184 {
2185 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resBrcPakSemaphoreMem.sResource, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2186 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2187 &m_resBrcPakSemaphoreMem.sResource,
2188 &cmdBuffer,
2189 m_numPipe));
2190
2191 // Program some placeholder cmds to resolve the hazard between pipe sync
2192 MHW_MI_STORE_DATA_PARAMS dataParams;
2193 dataParams.pOsResource = &m_resDelayMinus;
2194 dataParams.dwResourceOffset = 0;
2195 dataParams.dwValue = 0xDE1A;
2196 for (uint32_t i = 0; i < m_numDelay; i++)
2197 {
2198 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2199 &cmdBuffer,
2200 &dataParams));
2201 }
2202
2203 //clean HW semaphore memory
2204 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resBrcPakSemaphoreMem.sResource, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2205 }
2206
2207 // ACQP + SSC, ACQP + WP, BRC, BRC + SSC, BRC + WP
2208 // 2nd pass for SSC, WP, BRC needs conditional batch buffer end cmd, which is decided by HUC_STATUS output from HuC
2209 if (currentPass && m_vdencHuCConditional2ndPass && (currentPass != m_uc2NdSaoPass))
2210 {
2211 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2212
2213 // Insert conditional batch buffer end
2214 MOS_ZeroMemory(
2215 &miConditionalBatchBufferEndParams,
2216 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2217
2218 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2219 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2220 &m_resPakMmioBuffer;
2221
2222 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2223 &cmdBuffer,
2224 &miConditionalBatchBufferEndParams));
2225
2226 if (m_numPipe == 1)
2227 {
2228 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2229 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2230 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2231
2232 // Write back the HCP image control register for RC6 may clean it out
2233 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2234 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2235 miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2236 miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2237 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2238 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2239
2240 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2241 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2242 miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2243 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2244 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2245 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2246
2247 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2248 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2249 miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2250 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2251 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2252 }
2253 }
2254
2255 if (!currentPass && m_osInterface->bTagResourceSync)
2256 {
2257 // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2258 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2259 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2260 // as long as Dec/VP/Enc won't depend on this PAK so soon.
2261
2262 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2263
2264 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2265 m_osInterface,
2266 globalGpuContextSyncTagBuffer));
2267 CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2268
2269 MHW_MI_STORE_DATA_PARAMS params;
2270 params.pOsResource = globalGpuContextSyncTagBuffer;
2271 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2272 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2273 params.dwValue = (value > 0) ? (value - 1) : 0;
2274 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
2275 }
2276
2277 if (IsFirstPipe() && (!m_lookaheadPass || m_swLaMode))
2278 {
2279 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2280 }
2281
2282 MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2283 SetHcpSrcSurfaceParams(srcSurfaceParams);
2284
2285 MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
2286 SetHcpReconSurfaceParams(reconSurfaceParams);
2287
2288 CODECHAL_ENCODE_CHK_NULL_RETURN(m_pipeBufAddrParams);
2289 *m_pipeBufAddrParams = {};
2290 SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2291 m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2292 m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2293 #ifdef _MMC_SUPPORTED
2294 SetPipeBufAddr(&cmdBuffer);
2295 #endif
2296 CODECHAL_ENCODE_CHK_NULL_RETURN(m_pipeModeSelectParams);
2297 SetHcpPipeModeSelectParams(*m_pipeModeSelectParams);
2298
2299 // HuC modifies HCP pipe mode select command, when 2nd pass SAO is required
2300 if (m_vdencHucUsed && m_b2NdSaoPassNeeded)
2301 {
2302 // current location to add cmds in 2nd level batch buffer
2303 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2304 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2305 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2306
2307 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2308
2309 // save offset for next 2nd level batch buffer usage
2310 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2311 }
2312 else
2313 {
2314 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2315 }
2316
2317 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams));
2318
2319 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams));
2320
2321 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2322
2323 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2324 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2325 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2326
2327 MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2328 SetHcpQmStateParams(fqmParams, qmParams);
2329 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2331
2332 SetVdencPipeModeSelectParams(*m_pipeModeSelectParams);
2333 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2334
2335 MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];
2336 SetVdencSurfaceStateParams(srcSurfaceParams, reconSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2337 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams));
2338 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &reconSurfaceParams));
2339 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
2340
2341 SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2342 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2343
2344 MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2345 SetHcpPicStateParams(picStateParams);
2346
2347 if (m_vdencHucUsed)
2348 {
2349 // 2nd level batch buffer
2350 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2351 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2352
2353 // save offset for next 2nd level batch buffer usage
2354 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2355 }
2356 else
2357 {
2358 // current location to add cmds in 2nd level batch buffer
2359 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2360 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2361 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2362
2363 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2364 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2365 }
2366
2367 // Send HEVC_VP9_RDOQ_STATE command
2368 if (m_hevcRdoqEnabled)
2369 {
2370 if (m_pictureCodingType == I_TYPE)
2371 {
2372 if (m_hevcIFrameRdoqEnabled)
2373 {
2374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2375 }
2376 }
2377 else
2378 {
2379 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2380 }
2381 }
2382
2383 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2384
2385 return eStatus;
2386 }
2387
ExecuteSliceLevel()2388 MOS_STATUS CodechalVdencHevcStateG11::ExecuteSliceLevel()
2389 {
2390 CODECHAL_ENCODE_FUNCTION_ENTER;
2391
2392 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2393
2394 if (!m_hevcPicParams->tiles_enabled_flag)
2395 {
2396 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ExecuteSliceLevel());
2397
2398 if (m_lookaheadPass)
2399 {
2400 CODECHAL_ENCODE_CHK_STATUS_RETURN(AnalyzeLookaheadStats());
2401
2402 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2403 &m_vdencLaStatsBuffer,
2404 CodechalDbgAttr::attrVdencOutput,
2405 "_LookaheadStats",
2406 m_brcLooaheadStatsBufferSize,
2407 0,
2408 CODECHAL_NUM_MEDIA_STATES)));
2409 }
2410
2411 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2412 {
2413 CODECHAL_DEBUG_TOOL(
2414 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2415 )
2416 }
2417 }
2418 else
2419 {
2420 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
2421 }
2422
2423 return eStatus;
2424 }
2425
GetTileInfo(uint32_t xPosition,uint32_t yPosition,uint32_t * tileId,uint32_t * tileEndLCUX,uint32_t * tileEndLCUY)2426 void CodechalVdencHevcStateG11::GetTileInfo(
2427 uint32_t xPosition,
2428 uint32_t yPosition,
2429 uint32_t* tileId,
2430 uint32_t* tileEndLCUX,
2431 uint32_t* tileEndLCUY)
2432 {
2433 *tileId = 0;
2434 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2435
2436 for (uint8_t i = 0; i < m_numTiles; i++)
2437 {
2438 uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((m_tileParams[i].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2439 uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((m_tileParams[i].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2440 *tileEndLCUX = m_tileParams[i].TileStartLCUX + tileWidthInLCU;
2441 *tileEndLCUY = m_tileParams[i].TileStartLCUY + tileHeightInLCU;
2442
2443 if (xPosition >= (m_tileParams[i].TileStartLCUX * 2) &&
2444 yPosition >= (m_tileParams[i].TileStartLCUY * 2) &&
2445 xPosition < (*tileEndLCUX * 2) &&
2446 yPosition < (*tileEndLCUY * 2))
2447 {
2448 *tileId = i;
2449 break;
2450 }
2451 }
2452 }
2453
PrepareVDEncStreamInData()2454 MOS_STATUS CodechalVdencHevcStateG11::PrepareVDEncStreamInData()
2455 {
2456 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2457
2458 CODECHAL_ENCODE_FUNCTION_ENTER;
2459
2460 if (m_lookaheadPass && m_firstFrame)
2461 {
2462 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupForceIntraStreamIn(&m_resVdencStreamInBuffer[0]));
2463 }
2464
2465 if (m_hevcPicParams->tiles_enabled_flag)
2466 {
2467 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams));
2468 }
2469 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::PrepareVDEncStreamInData());
2470
2471 return eStatus;
2472 }
2473
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)2474 void CodechalVdencHevcStateG11::SetStreaminDataPerRegion(
2475 uint32_t streamInWidth,
2476 uint32_t top,
2477 uint32_t bottom,
2478 uint32_t left,
2479 uint32_t right,
2480 PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
2481 void* streaminData)
2482 {
2483 CODECHAL_ENCODE_FUNCTION_ENTER;
2484
2485 if (!m_hevcPicParams->tiles_enabled_flag)
2486 {
2487 CodechalVdencHevcState::SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, streaminParams, streaminData);
2488 return;
2489 }
2490
2491 uint8_t* data = (uint8_t*)streaminData;
2492 uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
2493 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2494 GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
2495
2496 for (auto y = top; y < bottom; y++)
2497 {
2498 for (auto x = left; x < right; x++)
2499 {
2500 uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
2501
2502 if (x < (m_tileParams[tileId].TileStartLCUX * 2) ||
2503 y < (m_tileParams[tileId].TileStartLCUY * 2) ||
2504 x >= (tileEndLCUX * 2) ||
2505 y >= (tileEndLCUY * 2))
2506 {
2507 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
2508 }
2509 streamInBaseOffset = m_tileParams[tileId].TileStreaminOffset;
2510
2511 auto xPositionInTile = x - (m_tileParams[tileId].TileStartLCUX * 2);
2512 auto yPositionInTile = y - (m_tileParams[tileId].TileStartLCUY * 2);
2513 auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((m_tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2514
2515 StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
2516
2517 SetStreaminDataPerLcu(streaminParams, data + (streamInBaseOffset + offset + xyOffset) * 64);
2518 }
2519 }
2520 }
2521
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)2522 void CodechalVdencHevcStateG11::SetBrcRoiDeltaQpMap(
2523 uint32_t streamInWidth,
2524 uint32_t top,
2525 uint32_t bottom,
2526 uint32_t left,
2527 uint32_t right,
2528 uint8_t regionId,
2529 PDeltaQpForROI deltaQpMap)
2530 {
2531
2532 CODECHAL_ENCODE_FUNCTION_ENTER;
2533
2534 if (!m_hevcPicParams->tiles_enabled_flag)
2535 {
2536 CodechalVdencHevcState::SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, regionId, deltaQpMap);
2537 return;
2538 }
2539
2540 uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
2541 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2542 GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
2543
2544 for (auto y = top; y < bottom; y++)
2545 {
2546 for (auto x = left; x < right; x++)
2547 {
2548 uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
2549
2550 if (x < (m_tileParams[tileId].TileStartLCUX * 2) ||
2551 y < (m_tileParams[tileId].TileStartLCUY * 2) ||
2552 x >= (tileEndLCUX * 2) ||
2553 y >= (tileEndLCUY * 2))
2554 {
2555 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
2556 }
2557 streamInBaseOffset = m_tileParams[tileId].TileStreaminOffset;
2558
2559 auto xPositionInTile = x - (m_tileParams[tileId].TileStartLCUX * 2);
2560 auto yPositionInTile = y - (m_tileParams[tileId].TileStartLCUY * 2);
2561 auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((m_tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
2562
2563 StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
2564
2565 (deltaQpMap + (streamInBaseOffset + offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
2566 }
2567 }
2568 }
2569
EncTileLevel()2570 MOS_STATUS CodechalVdencHevcStateG11::EncTileLevel()
2571 {
2572 CODECHAL_ENCODE_FUNCTION_ENTER;
2573
2574 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2575
2576 int32_t currentPipe = GetCurrentPipe();
2577 int32_t currentPass = GetCurrentPass();
2578
2579 if (currentPipe < 0 || currentPass < 0)
2580 {
2581 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
2582 return MOS_STATUS_INVALID_PARAMETER;
2583 }
2584
2585 MHW_VDBOX_HEVC_SLICE_STATE_G11 sliceState;
2586 SetHcpSliceStateCommonParams(sliceState);
2587
2588 MOS_COMMAND_BUFFER cmdBuffer;
2589 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2590
2591 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2592 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
2593
2594 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2595 {
2596 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2597 {
2598 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2599 uint32_t slcCount, idx, sliceNumInTile = 0;
2600
2601 idx = tileRow * numTileColumns + tileCol;
2602
2603 if ((m_numPipe > 1) && (tileCol != currentPipe))
2604 {
2605 continue;
2606 }
2607
2608 // HCP_TILE_CODING commmand
2609 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2610 static_cast<MhwVdboxHcpInterfaceG11*>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx]));
2611
2612 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
2613 {
2614 bool lastSliceInTile = false, sliceInTile = false;
2615
2616 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
2617 &m_tileParams[idx],
2618 &sliceInTile,
2619 &lastSliceInTile));
2620
2621 if (!sliceInTile)
2622 {
2623 continue;
2624 }
2625
2626 if (IsFirstPass())
2627 {
2628 uint32_t startLCU = 0;
2629 for (uint32_t ii = 0; ii < slcCount; ii++)
2630 {
2631 startLCU += m_hevcSliceParams[ii].NumLCUsInSlice;
2632 }
2633 slcData[slcCount].CmdOffset = startLCU * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t);
2634 }
2635
2636 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
2637 {
2638 // save offset for next 2nd level batch buffer usage
2639 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
2640 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
2641 // m_vdencBatchBufferPerSliceVarSize: variable size for each slice
2642
2643 // starting location for executing slice level cmds
2644 // To do: Improvize to only add current slice wSlcCount
2645 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2646
2647 for (uint32_t j = 0; j < slcCount; j++)
2648 {
2649 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset
2650 += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]);
2651 }
2652
2653 }
2654
2655 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx);
2656
2657 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState));
2658
2659 // Send VD_PIPELINE_FLUSH command
2660 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2661 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2662 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2663 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2664 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2665 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2666 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2667
2668 sliceNumInTile++;
2669 } // end of slice
2670
2671 if (0 == sliceNumInTile)
2672 {
2673 // One tile must have at least one slice
2674 CODECHAL_ENCODE_ASSERT(false);
2675 eStatus = MOS_STATUS_INVALID_PARAMETER;
2676 break;
2677 }
2678
2679 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
2680 {
2681 CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
2682 return MOS_STATUS_INVALID_PARAMETER;
2683 }
2684 } // end of row tile
2685 } // end of column tile
2686
2687 // Insert end of sequence/stream if set
2688 if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
2689 {
2690 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2691 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2692 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2693 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2694 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2695 }
2696
2697 // Send MI_FLUSH command
2698 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2699 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2700 flushDwParams.bVideoPipelineCacheInvalidate = true;
2701 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2702
2703 // Send VD_PIPELINE_FLUSH command
2704 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2705 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2706 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2707 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2708 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2709 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2710
2711 // Send MI_FLUSH command
2712 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2713 flushDwParams.bVideoPipelineCacheInvalidate = true;
2714 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
2715 {
2716 flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource;
2717 flushDwParams.dwDataDW1 = currentPass+1;
2718 }
2719 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2720
2721 if (IsFirstPipe())
2722 {
2723 // first pipe needs to ensure all other pipes are ready
2724 for (uint32_t i = 1; i < m_numPipe; i++)
2725 {
2726 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource))
2727 {
2728 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2729 SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource,
2730 &cmdBuffer,
2731 currentPass + 1));
2732 }
2733 }
2734
2735 // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed.
2736 if (m_vdencHucUsed) // ACQP/ BRC need PAK integration kernel to aggregate statistics
2737 {
2738 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
2739 }
2740 // Use HW stitch commands only in the scalable mode
2741 if (m_numPipe > 1 && m_enableTileStitchByHW)
2742 {
2743 HucCopyParams copyParams;
2744 uint32_t index = m_virtualEngineBbIndex;
2745
2746 copyParams.size = m_hwInterface->m_tileRecordSize;
2747 copyParams.presSrc = &m_tileRecordBuffer[index].sResource;
2748 copyParams.presDst = &m_resBitstreamBuffer;
2749 copyParams.lengthOfTable = (uint8_t)(m_numTiles);
2750
2751 auto hucCmdInitializer = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
2752 CODECHAL_ENCODE_CHK_STATUS_RETURN(hucCmdInitializer->AddCopyCmds(&cmdBuffer, ©Params));
2753 }
2754
2755 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2756 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
2757
2758 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2759
2760 if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2761 {
2762 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2763
2764 // BRC PAK statistics different for each pass
2765 if (m_brcEnabled)
2766 {
2767 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
2768 }
2769 }
2770 }
2771
2772 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2773 {
2774 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2775 }
2776
2777 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass())+"]";
2778 CODECHAL_DEBUG_TOOL(
2779 CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2780 &cmdBuffer,
2781 CODECHAL_NUM_MEDIA_STATES,
2782 pakPassName.data()));)
2783
2784 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2785
2786 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2787 {
2788 bool nullRendering = m_videoContextUsesNullHw;
2789 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
2790 CODECHAL_DEBUG_TOOL(
2791 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2792 if (m_mmcState)
2793 {
2794 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2795 }
2796 )
2797
2798 if (IsFirstPipe() &&
2799 IsLastPass() &&
2800 m_signalEnc &&
2801 m_currRefSync &&
2802 !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2803 {
2804 // signal semaphore
2805 MOS_SYNC_PARAMS syncParams;
2806 syncParams = g_cInitSyncParams;
2807 syncParams.GpuContext = m_videoContext;
2808 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2809
2810 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2811 m_currRefSync->uiSemaphoreObjCount++;
2812 m_currRefSync->bInUsed = true;
2813 }
2814 }
2815
2816 // Reset parameters for next PAK execution
2817 if (IsLastPipe() &&
2818 IsLastPass())
2819 {
2820 if (!m_singleTaskPhaseSupported)
2821 {
2822 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2823 }
2824
2825 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2826
2827 if (m_hevcSeqParams->ParallelBRC)
2828 {
2829 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
2830 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2831 }
2832
2833 m_newPpsHeader = 0;
2834 m_newSeqHeader = 0;
2835 m_frameNum++;
2836 }
2837
2838 return eStatus;
2839 }
2840
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)2841 void CodechalVdencHevcStateG11::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
2842 {
2843 CODECHAL_ENCODE_FUNCTION_ENTER;
2844
2845 CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
2846
2847 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11&>(vdboxPipeModeSelectParams);
2848
2849 if (m_numPipe > 1)
2850 {
2851 // Running in the multiple VDBOX mode
2852 if (IsFirstPipe())
2853 {
2854 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2855 }
2856 else if (IsLastPipe())
2857 {
2858 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2859 }
2860 else
2861 {
2862 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2863 }
2864 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2865 }
2866 else
2867 {
2868 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2869 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2870 }
2871 }
2872
ConstructBatchBufferHuCCQP(PMOS_RESOURCE batchBuffer)2873 MOS_STATUS CodechalVdencHevcStateG11::ConstructBatchBufferHuCCQP(PMOS_RESOURCE batchBuffer)
2874 {
2875 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2876 MOS_COMMAND_BUFFER cmdBuffer;
2877 uint8_t data[CODECHAL_PAGE_SIZE] = {0};
2878 uint16_t len = 0;
2879 CodechalCmdInitializerG11* pCmdInitializerG11 = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
2880
2881 CODECHAL_ENCODE_FUNCTION_ENTER;
2882 CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
2883 CODECHAL_ENCODE_CHK_NULL_RETURN(pCmdInitializerG11);
2884
2885 MOS_COMMAND_BUFFER constructedCmdBuf;
2886 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
2887
2888 constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
2889 constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
2890
2891 constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD1 / 4);
2892 constructedCmdBuf.iOffset += m_insertOffsetAfterCMD1;
2893
2894 m_picStateCmdStartInBytes = constructedCmdBuf.iOffset;
2895
2896 // set HCP_PIC_STATE command
2897 MHW_VDBOX_HEVC_PIC_STATE hevcPicState;
2898 SetHcpPicStateParams(hevcPicState);
2899 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState));
2900 m_cmd2StartInBytes = constructedCmdBuf.iOffset;
2901
2902 constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD2 / 4);
2903 constructedCmdBuf.iOffset += m_insertOffsetAfterCMD2;
2904
2905 len = m_cmd2StartInBytes - m_picStateCmdStartInBytes;
2906 pCmdInitializerG11->AddCmdConstData(
2907 CODECHAL_CMD5,
2908 (uint32_t*)(data + m_picStateCmdStartInBytes),
2909 len,
2910 m_picStateCmdStartInBytes);
2911
2912 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2913 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerExecute(false, batchBuffer, &cmdBuffer));
2914 ReturnCommandBuffer(&cmdBuffer);
2915
2916 if (!m_singleTaskPhaseSupported)
2917 {
2918 bool renderingFlags = m_videoContextUsesNullHw;
2919
2920 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(GetDebugInterface()->DumpCmdBuffer(
2921 &cmdBuffer,
2922 CODECHAL_NUM_MEDIA_STATES,
2923 "HucCmd")));
2924
2925 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
2926 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->DumpHucCmdInit(batchBuffer)));
2927 }
2928
2929 return eStatus;
2930 }
2931
ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)2932 MOS_STATUS CodechalVdencHevcStateG11::ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)
2933 {
2934 CODECHAL_ENCODE_FUNCTION_ENTER;
2935
2936 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2937
2938 CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
2939
2940 MOS_LOCK_PARAMS lockFlags;
2941 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2942 lockFlags.WriteOnly = true;
2943
2944 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
2945 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2946
2947 MOS_COMMAND_BUFFER constructedCmdBuf;
2948 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
2949 constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
2950 constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
2951
2952 // 1st Group : PIPE_MODE_SELECT
2953 // set PIPE_MODE_SELECT command
2954 // on Gen11 no need to set "bSaoFirstPass" since it is handled by HW now
2955 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11 pipeModeSelectParams;
2956 SetHcpPipeModeSelectParams(pipeModeSelectParams);
2957
2958 pipeModeSelectParams.bVdencEnabled = true;
2959 pipeModeSelectParams.bAdvancedRateControlEnable = true;
2960 pipeModeSelectParams.bStreamOutEnabled = !IsLastPass();
2961 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&constructedCmdBuf, &pipeModeSelectParams));
2962
2963 MHW_BATCH_BUFFER TempBatchBuffer;
2964 MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER));
2965 TempBatchBuffer.iSize = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
2966 TempBatchBuffer.pData = data;
2967
2968 // set MI_BATCH_BUFFER_END command
2969 int32_t cmdBufOffset = constructedCmdBuf.iOffset;
2970
2971 TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset;
2972 TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining;
2973 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
2974 constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
2975 constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent;
2976 constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining;
2977
2978 m_miBatchBufferEndCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
2979
2980 CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer1stGroupSize == constructedCmdBuf.iOffset);
2981
2982 constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD1 / 4);
2983 constructedCmdBuf.iOffset += m_insertOffsetAfterCMD1;
2984
2985 m_picStateCmdStartInBytes = constructedCmdBuf.iOffset;
2986
2987 // set HCP_PIC_STATE command
2988 MHW_VDBOX_HEVC_PIC_STATE hevcPicState;
2989 SetHcpPicStateParams(hevcPicState);
2990 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState));
2991 m_cmd2StartInBytes = constructedCmdBuf.iOffset;
2992
2993 constructedCmdBuf.pCmdPtr += (m_insertOffsetAfterCMD2 / 4);
2994 constructedCmdBuf.iOffset += m_insertOffsetAfterCMD2;
2995
2996 // set MI_BATCH_BUFFER_END command
2997 TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset;
2998 TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining;
2999 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
3000 constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
3001 constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent;
3002 constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining;
3003
3004 CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer2ndGroupSize + m_hwInterface->m_vdencBatchBuffer1stGroupSize
3005 == constructedCmdBuf.iOffset);
3006
3007 if (data)
3008 {
3009 m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
3010 }
3011
3012 return eStatus;
3013 }
3014
ConstructBatchBufferHuCBRCForGroup3(PMOS_RESOURCE batchBuffer)3015 MOS_STATUS CodechalVdencHevcStateG11::ConstructBatchBufferHuCBRCForGroup3(PMOS_RESOURCE batchBuffer)
3016 {
3017 CODECHAL_ENCODE_FUNCTION_ENTER;
3018
3019 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3020 int32_t cmdBufOffset = 0;
3021
3022 CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
3023 CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
3024
3025 MOS_LOCK_PARAMS lockFlags;
3026 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3027 lockFlags.WriteOnly = true;
3028 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
3029 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3030
3031 MOS_COMMAND_BUFFER constructedCmdBuf;
3032 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
3033 constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
3034 constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencGroup3BatchBufferSize, CODECHAL_PAGE_SIZE);
3035
3036 // 3rd Group : HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
3037 MHW_VDBOX_HEVC_SLICE_STATE_G11 sliceState;
3038 SetHcpSliceStateCommonParams(sliceState);
3039
3040 // slice level cmds for each slice
3041 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
3042 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
3043 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
3044 for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
3045 {
3046 uint32_t idx = 0;
3047 bool lastSliceInTile = false, sliceInTile = false;
3048
3049 for (auto tileRow = 0; (tileRow < numTileRows) && !sliceInTile; tileRow++)
3050 {
3051 for (auto tileCol = 0; (tileCol < numTileColumns) && !sliceInTile; tileCol++)
3052 {
3053 idx = tileRow * numTileColumns + tileCol;
3054 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
3055 &m_tileParams[idx],
3056 &sliceInTile,
3057 &lastSliceInTile));
3058 }
3059 }
3060
3061 if (IsFirstPass())
3062 {
3063 slcData[slcCount].CmdOffset = startLCU * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
3064 }
3065
3066 SetHcpSliceStateParams(sliceState, slcData, (uint16_t) slcCount, m_tileParams, lastSliceInTile, idx);
3067 m_vdencBatchBufferPerSliceVarSize[slcCount] = 0;
3068
3069 // set HCP_WEIGHTOFFSET_STATE command
3070 // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
3071 // If zero, then this command is not issued.
3072 if (m_hevcVdencWeightedPredEnabled)
3073 {
3074 MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
3075 MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
3076
3077 // HuC based WP ignores App based weights
3078 if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
3079 {
3080 for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
3081 {
3082 // Luma, Chroma Offset
3083 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
3084 {
3085 hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)m_hevcSliceParams->luma_offset[k][i];
3086 // Cb, Cr
3087 for (auto j = 0; j < 2; j++)
3088 {
3089 hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)m_hevcSliceParams->chroma_offset[k][i][j];
3090 }
3091 }
3092
3093 // Luma Weight
3094 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
3095 &hcpWeightOffsetParams.LumaWeights[k],
3096 sizeof(hcpWeightOffsetParams.LumaWeights[k]),
3097 &m_hevcSliceParams->delta_luma_weight[k],
3098 sizeof(m_hevcSliceParams->delta_luma_weight[k])));
3099 // Chroma Weight
3100 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
3101 &hcpWeightOffsetParams.ChromaWeights[k],
3102 sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
3103 &m_hevcSliceParams->delta_chroma_weight[k],
3104 sizeof(m_hevcSliceParams->delta_chroma_weight[k])));
3105 }
3106 }
3107
3108 // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
3109 if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3110 {
3111 hcpWeightOffsetParams.ucList = LIST_0;
3112
3113 cmdBufOffset = constructedCmdBuf.iOffset;
3114 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
3115 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3116 // 1st HcpWeightOffset cmd is not always inserted (except weighted prediction + P, B slices)
3117 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
3118 }
3119
3120 // 2nd HCP_WEIGHTOFFSET_STATE cmd - B only
3121 if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3122 {
3123 hcpWeightOffsetParams.ucList = LIST_1;
3124
3125 cmdBufOffset = constructedCmdBuf.iOffset;
3126 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
3127 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3128 // 2nd HcpWeightOffset cmd is not always inserted (except weighted prediction + B slices)
3129 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
3130 }
3131 }
3132
3133 // set HCP_SLICE_STATE command
3134 cmdBufOffset = constructedCmdBuf.iOffset;
3135 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(&constructedCmdBuf, &sliceState));
3136 m_hcpSliceStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3137
3138 // set 1st HCP_PAK_INSERT_OBJECT command
3139 // insert AU, SPS, PPS headers before first slice header
3140 if (sliceState.bInsertBeforeSliceHeaders)
3141 {
3142 uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for DwordLength field in PAK_INSERT_OBJ cmd
3143 m_1stPakInsertObjectCmdSize = 0;
3144
3145 for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
3146 {
3147 uint32_t nalUnitPosiSize = sliceState.ppNalUnitParams[i]->uiSize;
3148 uint32_t nalUnitPosiOffset = sliceState.ppNalUnitParams[i]->uiOffset;
3149
3150 while (nalUnitPosiSize > 0)
3151 {
3152 uint32_t bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalUnitPosiSize * 8);
3153 uint32_t offSet = nalUnitPosiOffset;
3154
3155 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3156 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3157 pakInsertObjectParams.bEmulationByteBitsInsert = sliceState.ppNalUnitParams[i]->bInsertEmulationBytes;
3158 pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.ppNalUnitParams[i]->uiSkipEmulationCheckCount;
3159 pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
3160 pakInsertObjectParams.dwBitSize = bitSize;
3161 pakInsertObjectParams.dwOffset = offSet;
3162
3163 if (nalUnitPosiSize > maxBytesInPakInsertObjCmd)
3164 {
3165 nalUnitPosiSize -= maxBytesInPakInsertObjCmd;
3166 nalUnitPosiOffset += maxBytesInPakInsertObjCmd;
3167 }
3168 else
3169 {
3170 nalUnitPosiSize = 0;
3171 }
3172
3173 cmdBufOffset = constructedCmdBuf.iOffset;
3174 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&constructedCmdBuf, &pakInsertObjectParams));
3175
3176 // this info needed again in BrcUpdate HuC FW const
3177 m_1stPakInsertObjectCmdSize += (constructedCmdBuf.iOffset - cmdBufOffset);
3178 }
3179 }
3180 // 1st PakInsertObject cmd is not always inserted for each slice
3181 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_1stPakInsertObjectCmdSize;
3182 }
3183
3184 // set 2nd HCP_PAK_INSERT_OBJECT command
3185 // Insert slice header
3186 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3187 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3188 pakInsertObjectParams.bLastHeader = true;
3189 pakInsertObjectParams.bEmulationByteBitsInsert = true;
3190
3191 // App does the slice header packing, set the skip count passed by the app
3192 pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.uiSkipEmulationCheckCount;
3193 pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
3194 pakInsertObjectParams.dwBitSize = sliceState.dwLength;
3195 pakInsertObjectParams.dwOffset = sliceState.dwOffset;
3196
3197 // For HEVC VDEnc Dynamic Slice
3198 if (m_hevcSeqParams->SliceSizeControl)
3199 {
3200 pakInsertObjectParams.bLastHeader = false;
3201 pakInsertObjectParams.bEmulationByteBitsInsert = false;
3202 pakInsertObjectParams.dwBitSize = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3203 pakInsertObjectParams.bResetBitstreamStartingPos = true;
3204 }
3205
3206 uint32_t byteSize = (pakInsertObjectParams.dwBitSize + 7) >> 3;
3207 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
3208 &constructedCmdBuf,
3209 &pakInsertObjectParams));
3210
3211 // 2nd PakInsertObject cmd is always inserted for each slice
3212 // so already reflected in dwVdencBatchBufferPerSliceConstSize
3213 m_vdencBatchBufferPerSliceVarSize[slcCount] += (MOS_ALIGN_CEIL(byteSize, sizeof(uint32_t))) / sizeof(uint32_t) * 4;
3214
3215 // set 3rd HCP_PAK_INSERT_OBJECT command
3216 if (m_hevcSeqParams->SliceSizeControl)
3217 {
3218 // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
3219 pakInsertObjectParams.bLastHeader = true;
3220 pakInsertObjectParams.dwBitSize = sliceState.dwLength - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3221 pakInsertObjectParams.dwOffset += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8); // Skips the first 5 bytes which is Start Code + Nal Unit Header
3222 pakInsertObjectParams.bResetBitstreamStartingPos = true;
3223
3224 cmdBufOffset = constructedCmdBuf.iOffset;
3225 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
3226 &constructedCmdBuf,
3227 &pakInsertObjectParams));
3228 // 3rd PakInsertObject cmd is not always inserted for each slice
3229 m_vdencBatchBufferPerSliceVarSize[slcCount] += (constructedCmdBuf.iOffset - cmdBufOffset);
3230 }
3231
3232 // set VDENC_WEIGHT_OFFSETS_STATE command
3233 MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
3234 MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
3235 vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
3236
3237 if (vdencWeightOffsetParams.bWeightedPredEnabled)
3238 {
3239 uint8_t lumaLog2WeightDenom = m_hevcPicParams->bEnableGPUWeightedPrediction ? 6 : m_hevcSliceParams->luma_log2_weight_denom;
3240 vdencWeightOffsetParams.dwDenom = 1 << lumaLog2WeightDenom;
3241
3242 // HuC based WP ignores App based weights
3243 if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
3244 {
3245 // Luma Offsets
3246 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
3247 {
3248 vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)m_hevcSliceParams->luma_offset[0][i];
3249 vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)m_hevcSliceParams->luma_offset[1][i];
3250 }
3251
3252 // Luma Weights
3253 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
3254 &vdencWeightOffsetParams.LumaWeights[0],
3255 sizeof(vdencWeightOffsetParams.LumaWeights[0]),
3256 &m_hevcSliceParams->delta_luma_weight[0],
3257 sizeof(m_hevcSliceParams->delta_luma_weight[0])),
3258 "Failed to copy luma weight 0 memory.");
3259
3260 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
3261 &vdencWeightOffsetParams.LumaWeights[1],
3262 sizeof(vdencWeightOffsetParams.LumaWeights[1]),
3263 &m_hevcSliceParams->delta_luma_weight[1],
3264 sizeof(m_hevcSliceParams->delta_luma_weight[1])),
3265 "Failed to copy luma weight 1 memory.");
3266 }
3267 }
3268
3269 cmdBufOffset = constructedCmdBuf.iOffset;
3270 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
3271 &constructedCmdBuf,
3272 nullptr,
3273 &vdencWeightOffsetParams));
3274 m_vdencWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
3275
3276 MHW_BATCH_BUFFER TempBatchBuffer;
3277 MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER));
3278 TempBatchBuffer.iSize = MOS_ALIGN_CEIL(m_hwInterface->m_vdencGroup3BatchBufferSize, CODECHAL_PAGE_SIZE);
3279 TempBatchBuffer.pData = data;
3280
3281 TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset;
3282 TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining;
3283 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
3284 constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
3285 constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent;
3286 constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining;
3287
3288 m_vdencBatchBufferPerSliceVarSize[slcCount] += ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4;
3289 for (auto i = 0; i < ENCODE_VDENC_HEVC_PADDING_DW_SIZE; i++)
3290 {
3291 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiNoop(&constructedCmdBuf, nullptr));
3292 }
3293
3294 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
3295 }
3296
3297 if (data)
3298 {
3299 m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
3300 }
3301
3302 return eStatus;
3303 }
3304
SetDmemHuCBrcInitReset()3305 MOS_STATUS CodechalVdencHevcStateG11::SetDmemHuCBrcInitReset()
3306 {
3307 CODECHAL_ENCODE_FUNCTION_ENTER;
3308
3309 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3310
3311 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3312 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3313 lockFlagsWriteOnly.WriteOnly = true;
3314
3315 // Setup BrcInit DMEM
3316 auto hucVdencBrcInitDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G11)m_osInterface->pfnLockResource(
3317 m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
3318 CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcInitDmem);
3319 MOS_ZeroMemory(hucVdencBrcInitDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G11));
3320
3321 hucVdencBrcInitDmem->BRCFunc_U32 = 0; // 0: Init, 1: Reset
3322 hucVdencBrcInitDmem->UserMaxFrame = GetProfileLevelMaxFrameSize();
3323 hucVdencBrcInitDmem->InitBufFull_U32 = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
3324 hucVdencBrcInitDmem->BufSize_U32 = m_hevcSeqParams->VBVBufferSizeInBit;
3325 hucVdencBrcInitDmem->TargetBitrate_U32 = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; // DDI in Kbits
3326 hucVdencBrcInitDmem->MaxRate_U32 = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3327 hucVdencBrcInitDmem->MinRate_U32 = 0;
3328 hucVdencBrcInitDmem->FrameRateM_U32 = m_hevcSeqParams->FrameRate.Numerator;
3329 hucVdencBrcInitDmem->FrameRateD_U32 = m_hevcSeqParams->FrameRate.Denominator;
3330 hucVdencBrcInitDmem->ACQP_U32 = 0;
3331 if (m_hevcSeqParams->UserMaxPBFrameSize > 0)
3332 {
3333 //Backup CodingType as need to set it as B_Tpye to get MaxFrameSize for P/B frames.
3334 auto CodingTypeTemp = m_hevcPicParams->CodingType;
3335 m_hevcPicParams->CodingType = B_TYPE;
3336 hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = GetProfileLevelMaxFrameSize();
3337 m_hevcPicParams->CodingType = CodingTypeTemp;
3338 }
3339 else
3340 {
3341 hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = hucVdencBrcInitDmem->UserMaxFrame;
3342 }
3343
3344 if (m_brcEnabled)
3345 {
3346 switch (m_hevcSeqParams->RateControlMethod)
3347 {
3348 case RATECONTROL_ICQ:
3349 hucVdencBrcInitDmem->BRCFlag = 0;
3350 break;
3351 case RATECONTROL_CBR:
3352 hucVdencBrcInitDmem->BRCFlag = 1;
3353 break;
3354 case RATECONTROL_VBR:
3355 hucVdencBrcInitDmem->BRCFlag = 2;
3356 hucVdencBrcInitDmem->ACQP_U32 = 0;
3357 break;
3358 case RATECONTROL_VCM:
3359 hucVdencBrcInitDmem->BRCFlag = 3;
3360 break;
3361 case RATECONTROL_QVBR:
3362 hucVdencBrcInitDmem->BRCFlag = 2;
3363 hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;
3364 break;
3365 default:
3366 break;
3367 }
3368
3369 // Low Delay BRC
3370 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3371 {
3372 hucVdencBrcInitDmem->BRCFlag = 5;
3373 }
3374
3375 switch (m_hevcSeqParams->MBBRC)
3376 {
3377 case mbBrcInternal:
3378 case mbBrcEnabled:
3379 hucVdencBrcInitDmem->CuQpCtrl_U8 = 3;
3380 break;
3381 case mbBrcDisabled:
3382 hucVdencBrcInitDmem->CuQpCtrl_U8 = 0;
3383 break;
3384 default:
3385 break;
3386 }
3387 }
3388 else if (m_hevcVdencAcqpEnabled)
3389 {
3390 hucVdencBrcInitDmem->BRCFlag = 0;
3391
3392 // 0=No CUQP; 1=CUQP for I-frame; 2=CUQP for P/B-frame
3393 // bit operation, bit 1 for I-frame, bit 2 for P/B frame
3394 // In VDENC mode, the field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
3395 if (m_hevcSeqParams->QpAdjustment)
3396 {
3397 hucVdencBrcInitDmem->CuQpCtrl_U8 = 3; // wPictureCodingType I:0, P:1, B:2
3398 }
3399 else
3400 {
3401 hucVdencBrcInitDmem->CuQpCtrl_U8 = 0; // wPictureCodingType I:0, P:1, B:2
3402 }
3403 }
3404
3405 hucVdencBrcInitDmem->SSCFlag = m_hevcSeqParams->SliceSizeControl;
3406
3407 // NumP/NumB in par file are different from GopP/GopB
3408 // definitions of P & B are not consistent
3409 // LDB case, NumP=0 & NumB=100, but GopP=100 & GopB=0
3410
3411 hucVdencBrcInitDmem->GopP_U16 = m_hevcSeqParams->GopPicSize - m_hevcSeqParams->NumOfBInGop[0] - 1;
3412 hucVdencBrcInitDmem->GopB_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[0];
3413
3414 hucVdencBrcInitDmem->FrameWidth_U16 = (uint16_t)m_frameWidth;
3415 hucVdencBrcInitDmem->FrameHeight_U16 = (uint16_t)m_frameHeight;
3416
3417 hucVdencBrcInitDmem->GopB1_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[1];
3418 hucVdencBrcInitDmem->GopB2_U16 = (uint16_t)m_hevcSeqParams->NumOfBInGop[2];
3419
3420 hucVdencBrcInitDmem->MinQP_U8 = m_hevcPicParams->BRCMinQp < 10 ? 10 : m_hevcPicParams->BRCMinQp; // Setting values from arch spec
3421 hucVdencBrcInitDmem->MaxQP_U8 = m_hevcPicParams->BRCMaxQp < 10 ? 51 : (m_hevcPicParams->BRCMaxQp > 51 ? 51 : m_hevcPicParams->BRCMaxQp); // Setting values from arch spec
3422
3423 hucVdencBrcInitDmem->MaxBRCLevel_U8 = 1;
3424 hucVdencBrcInitDmem->LumaBitDepth_U8 = m_hevcSeqParams->bit_depth_luma_minus8 + 8;
3425 hucVdencBrcInitDmem->ChromaBitDepth_U8 = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;
3426
3427 if ((hucVdencBrcInitDmem->LowDelayMode_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)))
3428 {
3429 MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshPB0_S8, 8 * sizeof(int8_t), (void *)m_lowdelayDevThreshPB, 8 * sizeof(int8_t));
3430 MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshVBR0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshVBR, 8 * sizeof(int8_t));
3431 MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshI0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshI, 8 * sizeof(int8_t));
3432 }
3433 else
3434 {
3435 uint64_t inputbitsperframe = uint64_t(hucVdencBrcInitDmem->MaxRate_U32*100. / (hucVdencBrcInitDmem->FrameRateM_U32 * 100.0 / hucVdencBrcInitDmem->FrameRateD_U32));
3436 if (m_brcEnabled && !hucVdencBrcInitDmem->BufSize_U32)
3437 {
3438 CODECHAL_ENCODE_ASSERTMESSAGE("VBV BufSize should not be 0 for BRC case\n");
3439 eStatus = MOS_STATUS_INVALID_PARAMETER;
3440 }
3441 uint64_t vbvsz = hucVdencBrcInitDmem->BufSize_U32;
3442 double bps_ratio = inputbitsperframe / (vbvsz / m_devStdFPS);
3443 if (bps_ratio < m_bpsRatioLow) bps_ratio = m_bpsRatioLow;
3444 if (bps_ratio > m_bpsRatioHigh) bps_ratio = m_bpsRatioHigh;
3445
3446 for (int i = 0; i < m_numDevThreshlds / 2; i++) {
3447 hucVdencBrcInitDmem->DevThreshPB0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshPBFPNEG[i], bps_ratio));
3448 hucVdencBrcInitDmem->DevThreshPB0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshPBFPPOS[i], bps_ratio));
3449
3450 hucVdencBrcInitDmem->DevThreshI0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshIFPNEG[i], bps_ratio));
3451 hucVdencBrcInitDmem->DevThreshI0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshIFPPOS[i], bps_ratio));
3452
3453 hucVdencBrcInitDmem->DevThreshVBR0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshVBRNEG[i], bps_ratio));
3454 hucVdencBrcInitDmem->DevThreshVBR0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_posMultVBR*pow(m_devThreshVBRPOS[i], bps_ratio));
3455 }
3456 }
3457
3458 MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshP0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshP0, 4 * sizeof(int8_t));
3459 MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshB0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshB0, 4 * sizeof(int8_t));
3460 MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshI0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshI0, 4 * sizeof(int8_t));
3461
3462 if (m_brcEnabled)
3463 {
3464 // initQPIP, initQPB values will be used for BRC in the future
3465 int32_t initQPIP = 0, initQPB = 0;
3466 ComputeVDEncInitQP(initQPIP, initQPB);
3467 hucVdencBrcInitDmem->InitQPIP_U8 = (uint8_t)initQPIP;
3468 hucVdencBrcInitDmem->InitQPB_U8 = (uint8_t)initQPB;
3469 }
3470 else
3471 {
3472 hucVdencBrcInitDmem->InitQPIP_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3473 hucVdencBrcInitDmem->InitQPB_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3474 }
3475
3476 // recommendation
3477 hucVdencBrcInitDmem->TopFrmSzThrForAdapt2Pass_U8 = 32;
3478 hucVdencBrcInitDmem->BotFrmSzThrForAdapt2Pass_U8 = 24;
3479
3480 MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshP0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshP0, 7 * sizeof(uint8_t));
3481 MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshB0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshB0, 7 * sizeof(uint8_t));
3482 MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshI0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshI0, 7 * sizeof(uint8_t));
3483
3484 if (m_vdencStreamInEnabled && m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled)
3485 {
3486 hucVdencBrcInitDmem->StreamInROIEnable_U8 = 1;
3487 hucVdencBrcInitDmem->StreamInSurfaceEnable_U8 = 1;
3488 }
3489 // RDOQ adaptation hardened to HW starting Gen11
3490 hucVdencBrcInitDmem->RDOQ_AdaptationEnable_U8 = 0;
3491
3492 // recommendation
3493 hucVdencBrcInitDmem->TopQPDeltaThrForAdapt2Pass_U8 = 2;
3494 hucVdencBrcInitDmem->BotQPDeltaThrForAdapt2Pass_U8 = 1;
3495 hucVdencBrcInitDmem->SlidingWindow_Size_U32 = 30;
3496
3497 if ((m_hevcSeqParams->SlidingWindowSize != 0) && (m_hevcSeqParams->MaxBitRatePerSlidingWindow != 0))
3498 {
3499 hucVdencBrcInitDmem->SlidingWindow_Size_U32 = m_hevcSeqParams->SlidingWindowSize;
3500 hucVdencBrcInitDmem->SlidingWindow_MaxRateRatio_U8 = (m_hevcSeqParams->MaxBitRatePerSlidingWindow * 100)/ m_hevcSeqParams->TargetBitRate;
3501 }
3502 else
3503 {
3504 if (m_hevcSeqParams->FrameRate.Denominator == 0)
3505 {
3506 CODECHAL_ENCODE_ASSERTMESSAGE("FrameRate.Deminator is zero!");
3507 return MOS_STATUS_INVALID_PARAMETER;
3508 }
3509 uint32_t framerate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
3510 hucVdencBrcInitDmem->SlidingWindow_Size_U32 = MOS_MIN(framerate, 60);
3511 hucVdencBrcInitDmem->SlidingWindow_MaxRateRatio_U8 = 120;
3512 }
3513
3514 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
3515
3516 return eStatus;
3517 }
3518
SetConstDataHuCBrcUpdate()3519 MOS_STATUS CodechalVdencHevcStateG11::SetConstDataHuCBrcUpdate()
3520 {
3521 CODECHAL_ENCODE_FUNCTION_ENTER;
3522
3523 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3524
3525 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3526 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3527 lockFlagsWriteOnly.WriteOnly = true;
3528
3529 auto hucConstData = (PCODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G11)m_osInterface->pfnLockResource(
3530 m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
3531 CODECHAL_ENCODE_CHK_NULL_RETURN(hucConstData);
3532
3533 MOS_SecureMemcpy(hucConstData->SLCSZ_THRDELTAI_U16, sizeof(m_hucConstantData), m_hucConstantData, sizeof(m_hucConstantData));
3534
3535 MOS_SecureMemcpy(hucConstData->RDQPLambdaI, sizeof(m_rdQpLambdaI), m_rdQpLambdaI, sizeof(m_rdQpLambdaI));
3536 MOS_SecureMemcpy(hucConstData->RDQPLambdaP, sizeof(m_rdQpLambdaP), m_rdQpLambdaP, sizeof(m_rdQpLambdaP));
3537
3538 if (m_hevcVisualQualityImprovement)
3539 {
3540 MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI_VQI, sizeof(m_sadQpLambdaI));
3541 MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode_VQI, sizeof(m_penaltyForIntraNonDC32x32PredMode_VQI));
3542 }
3543 else
3544 {
3545 MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI, sizeof(m_sadQpLambdaI));
3546 MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode));
3547 }
3548
3549 MOS_SecureMemcpy(hucConstData->SADQPLambdaP, sizeof(m_sadQpLambdaP), m_sadQpLambdaP, sizeof(m_sadQpLambdaP));
3550
3551 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3552 {
3553 const int numEstrateThreshlds = 7;
3554
3555 for (int i = 0; i < numEstrateThreshlds + 1; i++)
3556 {
3557 for (int j = 0; j < m_numDevThreshlds + 1; j++)
3558 {
3559 hucConstData->FrmSzAdjTabI_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszI[j][i];
3560 hucConstData->FrmSzAdjTabP_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszP[j][i];
3561 hucConstData->FrmSzAdjTabB_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszB[j][i];
3562 }
3563 }
3564 }
3565
3566 // ModeCosts depends on frame type
3567 if (m_pictureCodingType == I_TYPE)
3568 {
3569 MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsIFrame), m_hucModeCostsIFrame, sizeof(m_hucModeCostsIFrame));
3570 }
3571 else
3572 {
3573 MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsPbFrame), m_hucModeCostsPbFrame, sizeof(m_hucModeCostsPbFrame));
3574 }
3575
3576 // starting location in batch buffer for each slice
3577 uint32_t baseLocation = 0; // base location is 0 after move Group3 cmds to region12
3578 uint32_t currentLocation = baseLocation;
3579
3580 auto slcData = m_slcData;
3581 // HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
3582 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
3583 {
3584 auto hevcSlcParams = &m_hevcSliceParams[slcCount];
3585 // HuC FW require unit in Bytes
3586 hucConstData->Slice[slcCount].SizeOfCMDs
3587 = (uint16_t)(m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount]);
3588
3589 // HCP_WEIGHTOFFSET_STATE cmd
3590 if (m_hevcVdencWeightedPredEnabled)
3591 {
3592 // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
3593 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3594 {
3595 hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L0 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
3596 currentLocation += m_hcpWeightOffsetStateCmdSize;
3597 }
3598
3599 // 2nd HCP_WEIGHTOFFSET_STATE cmd - B
3600 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
3601 {
3602 hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L1 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
3603 currentLocation += m_hcpWeightOffsetStateCmdSize;
3604 }
3605 }
3606 else
3607 {
3608 // 0xFFFF means unavailable in SLB
3609 hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = 0xFFFF;
3610 hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = 0xFFFF;
3611 }
3612
3613 // HCP_SLICE_STATE cmd
3614 hucConstData->Slice[slcCount].SliceState_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET is not needed
3615 currentLocation += m_hcpSliceStateCmdSize;
3616
3617 // VDENC_WEIGHT_OFFSETS_STATE cmd
3618 hucConstData->Slice[slcCount].VdencWeightOffset_StartInBytes // VdencWeightOffset cmd is the last one expect BatchBufferEnd cmd
3619 = (uint16_t)(baseLocation + hucConstData->Slice[slcCount].SizeOfCMDs - m_vdencWeightOffsetStateCmdSize - m_miBatchBufferEndCmdSize - ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4);
3620
3621 // logic from PakInsertObject cmd
3622 uint32_t bitSize = (m_hevcSeqParams->SliceSizeControl) ? (hevcSlcParams->BitLengthSliceHeaderStartingPortion) : slcData[slcCount].BitSize; // 40 for HEVC VDEnc Dynamic Slice
3623 uint32_t sliceHeaderSizeInBytes = (bitSize + 7) >> 3;
3624
3625 // 1st PakInsertObject cmd with AU, SPS, PPS headers only exists for the first slice
3626 if (slcCount == 0)
3627 {
3628 // assumes that there is no 3rd PakInsertObject cmd for SSC
3629 currentLocation += m_1stPakInsertObjectCmdSize;
3630 }
3631
3632 hucConstData->Slice[slcCount].SliceHeaderPIO_StartInBytes = (uint16_t)currentLocation;
3633
3634 // HuC FW requires true slice header size in bits without byte alignment
3635 hucConstData->Slice[slcCount].SliceHeader_SizeInBits = (uint16_t)(sliceHeaderSizeInBytes * 8);
3636 if (!IsFirstPass())
3637 {
3638 PBSBuffer bsBuffer = &m_bsBuffer;
3639 CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer);
3640 CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer->pBase);
3641 uint8_t *sliceHeaderLastByte = (uint8_t*)(bsBuffer->pBase + slcData[slcCount].SliceOffset + sliceHeaderSizeInBytes - 1);
3642 for (auto i = 0; i < 8; i++)
3643 {
3644 uint8_t mask = 1 << i;
3645 if (*sliceHeaderLastByte & mask)
3646 {
3647 hucConstData->Slice[slcCount].SliceHeader_SizeInBits -= (i+1);
3648 break;
3649 }
3650 }
3651 }
3652 if (m_hevcVdencWeightedPredEnabled)
3653 {
3654 hucConstData->Slice[slcCount].WeightTable_StartInBits = (uint16_t)hevcSlcParams->PredWeightTableBitOffset;
3655 hucConstData->Slice[slcCount].WeightTable_EndInBits = (uint16_t)(hevcSlcParams->PredWeightTableBitOffset + (hevcSlcParams->PredWeightTableBitLength));
3656 }
3657 else
3658 {
3659 // number of bits from beginning of slice header, 0xffff means not awailable
3660 hucConstData->Slice[slcCount].WeightTable_StartInBits = 0xFFFF;
3661 hucConstData->Slice[slcCount].WeightTable_EndInBits = 0xFFFF;
3662 }
3663
3664 baseLocation += hucConstData->Slice[slcCount].SizeOfCMDs;
3665 currentLocation = baseLocation;
3666 }
3667
3668 // Add motion apatative settings
3669 if (m_enableMotionAdaptive)
3670 {
3671 MOS_SecureMemcpy(hucConstData->QPAdaptiveWeight, sizeof(m_qpAdaptiveWeight), m_qpAdaptiveWeight, sizeof(m_qpAdaptiveWeight));
3672 MOS_SecureMemcpy(hucConstData->boostTable, sizeof(m_boostTable), m_boostTable, sizeof(m_boostTable));
3673 }
3674 else
3675 {
3676 MOS_ZeroMemory(hucConstData->QPAdaptiveWeight, sizeof(m_qpAdaptiveWeight));
3677 MOS_ZeroMemory(hucConstData->boostTable, sizeof(m_boostTable));
3678 }
3679
3680 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]);
3681
3682 return eStatus;
3683 }
3684
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)3685 MOS_STATUS CodechalVdencHevcStateG11::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
3686 {
3687 CODECHAL_ENCODE_FUNCTION_ENTER;
3688
3689 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3690
3691 int32_t currentPass = GetCurrentPass();
3692 if (currentPass < 0)
3693 {
3694 eStatus = MOS_STATUS_INVALID_PARAMETER;
3695 return eStatus;
3696 }
3697
3698 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams));
3699
3700 // With multiple tiles, ensure that HuC BRC kernel is fed with vdenc frame level statistics from HuC PAK Int kernel
3701 // Applicable for scalable/ non-scalable mode
3702 if (m_hevcPicParams->tiles_enabled_flag)
3703 {
3704 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
3705 virtualAddrParams->regionParams[1].dwOffset = m_hevcFrameStatsOffset.uiVdencStatistics;
3706 }
3707
3708 if (m_numPipe > 1)
3709 {
3710 virtualAddrParams->regionParams[2].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 2 PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
3711 virtualAddrParams->regionParams[2].dwOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics;
3712 virtualAddrParams->regionParams[7].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 7 Slice Stat Streamout (Input)
3713 virtualAddrParams->regionParams[7].dwOffset = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
3714 // In scalable-mode, use PAK Integration kernel output to get bistream size
3715 virtualAddrParams->regionParams[8].presRegion = &m_resBrcDataBuffer;
3716 }
3717
3718 virtualAddrParams->regionParams[12].presRegion = &m_vdencGroup3BatchBuffer[m_currRecycledBufIdx][currentPass]; // Region 12 - SLB buffer for group 3 (Input)
3719
3720 // Tile reset case, use previous frame BRC data
3721 if ((m_numPipe != m_numPipePre) && IsFirstPass())
3722 {
3723 if (m_numPipePre > 1)
3724 {
3725 virtualAddrParams->regionParams[8].presRegion = &m_resBrcDataBuffer;
3726 }
3727 else
3728 {
3729 virtualAddrParams->regionParams[8].presRegion = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
3730 }
3731 }
3732
3733 return eStatus;
3734 }
3735
SetDmemHuCBrcUpdate()3736 MOS_STATUS CodechalVdencHevcStateG11::SetDmemHuCBrcUpdate()
3737 {
3738 CODECHAL_ENCODE_FUNCTION_ENTER;
3739
3740 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3741
3742 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3743 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3744 lockFlagsWriteOnly.WriteOnly = true;
3745
3746 // Program update DMEM
3747 auto hucVDEncBrcUpdateDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G11)m_osInterface->pfnLockResource(
3748 m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()], &lockFlagsWriteOnly);
3749 CODECHAL_ENCODE_CHK_NULL_RETURN(hucVDEncBrcUpdateDmem);
3750 MOS_ZeroMemory(hucVDEncBrcUpdateDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G11));
3751
3752 hucVDEncBrcUpdateDmem->TARGETSIZE_U32 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)? m_hevcSeqParams->InitVBVBufferFullnessInBit :
3753 MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
3754 hucVDEncBrcUpdateDmem->FrameID_U32 = m_storeData; // frame number
3755 MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->startGAdjFrame_U16, 4 * sizeof(uint16_t), (void*)m_startGAdjFrame, 4 * sizeof(uint16_t));
3756 hucVDEncBrcUpdateDmem->TargetSliceSize_U16 = (uint16_t)m_hevcPicParams->MaxSliceSizeInBytes;
3757 auto slbSliceSize = (m_hwInterface->m_vdenc2ndLevelBatchBufferSize - m_hwInterface->m_vdencBatchBuffer1stGroupSize -
3758 m_hwInterface->m_vdencBatchBuffer2ndGroupSize) / ENCODE_HEVC_VDENC_NUM_MAX_SLICES;
3759 hucVDEncBrcUpdateDmem->SLB_Data_SizeInBytes = (uint16_t)(slbSliceSize * m_numSlices);
3760 hucVDEncBrcUpdateDmem->PIPE_MODE_SELECT_StartInBytes = 0xFFFF; // HuC need not need to modify the pipe mode select command in Gen11+
3761 hucVDEncBrcUpdateDmem->CMD1_StartInBytes = (uint16_t)m_hwInterface->m_vdencBatchBuffer1stGroupSize;
3762 hucVDEncBrcUpdateDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes;
3763 hucVDEncBrcUpdateDmem->CMD2_StartInBytes = (uint16_t)m_cmd2StartInBytes;
3764
3765 uint16_t circularFrameIdx = (m_storeData - 1) % 4;
3766
3767 // initial order before circular shift: current, ref0, ref1, ref2 = 0, 3, 2, 1
3768 // different initial order can be used, but this order (0, 3, 2, 1) is kernel recommendation
3769 hucVDEncBrcUpdateDmem->Current_Data_Offset = ((0 + circularFrameIdx) % 4) * m_weightHistSize;
3770 hucVDEncBrcUpdateDmem->Ref_Data_Offset[0] = ((3 + circularFrameIdx) % 4) * m_weightHistSize;
3771 hucVDEncBrcUpdateDmem->Ref_Data_Offset[1] = ((2 + circularFrameIdx) % 4) * m_weightHistSize;
3772 hucVDEncBrcUpdateDmem->Ref_Data_Offset[2] = ((1 + circularFrameIdx) % 4) * m_weightHistSize;
3773
3774 hucVDEncBrcUpdateDmem->MaxNumSliceAllowed_U16 = (uint16_t)GetMaxAllowedSlices(m_hevcSeqParams->Level);
3775 hucVDEncBrcUpdateDmem->OpMode_U8 // 1: BRC (including ACQP), 2: Weighted prediction (should not be enabled in first pass)
3776 = (m_hevcVdencWeightedPredEnabled && m_hevcPicParams->bEnableGPUWeightedPrediction && !IsFirstPass()) ? 3 : 1; // 01: BRC, 10: WP never used, 11: BRC + WP
3777
3778 // LowDelay B needs to be considered as P frame although wPictureCodingType=3
3779 // wPictureCodingType I:1, P:2, B:3 -> CurrentFrameType I:2, P:0, B:1
3780 hucVDEncBrcUpdateDmem->CurrentFrameType_U8 = (m_pictureCodingType == I_TYPE) ? 2 : 0;
3781
3782 // Num_Ref_L1 should be always same as Num_Ref_L0
3783 hucVDEncBrcUpdateDmem->Num_Ref_L0_U8 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
3784 hucVDEncBrcUpdateDmem->Num_Ref_L1_U8 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
3785 hucVDEncBrcUpdateDmem->Num_Slices = (uint8_t)m_hevcPicParams->NumSlices;
3786
3787 // CQP_QPValue_U8 setting is needed since ACQP is also part of ICQ
3788 hucVDEncBrcUpdateDmem->CQP_QPValue_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3789 hucVDEncBrcUpdateDmem->CQP_FracQP_U8 = 0;
3790 if (m_hevcPicParams->BRCPrecision == 1)
3791 {
3792 hucVDEncBrcUpdateDmem->MaxNumPass_U8 = 1;
3793 }
3794 else
3795 {
3796 hucVDEncBrcUpdateDmem->MaxNumPass_U8 = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
3797 }
3798
3799 MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->gRateRatioThreshold_U8, 7 * sizeof(uint8_t), (void*)m_rateRatioThreshold, 7 * sizeof(uint8_t));
3800 MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->startGAdjMult_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjMult, 5 * sizeof(uint8_t));
3801 MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->startGAdjDiv_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjDiv, 5 * sizeof(uint8_t));
3802 MOS_SecureMemcpy(hucVDEncBrcUpdateDmem->gRateRatioThresholdQP_U8, 8 * sizeof(uint8_t), (void*)m_rateRatioThresholdQP, 8 * sizeof(uint8_t));
3803
3804 hucVDEncBrcUpdateDmem->IPAverageCoeff_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) ? 0 : 64;
3805 hucVDEncBrcUpdateDmem->CurrentPass_U8 = (uint8_t) GetCurrentPass();
3806
3807 if ((m_hevcVdencAcqpEnabled && m_hevcSeqParams->QpAdjustment) || (m_brcEnabled && (m_hevcSeqParams->MBBRC != 2)))
3808 {
3809 hucVDEncBrcUpdateDmem->DeltaQPForSadZone0_S8 = -1;
3810 hucVDEncBrcUpdateDmem->DeltaQPForSadZone1_S8 = 0;
3811 hucVDEncBrcUpdateDmem->DeltaQPForSadZone2_S8 = 1;
3812 hucVDEncBrcUpdateDmem->DeltaQPForSadZone3_S8 = 2;
3813 hucVDEncBrcUpdateDmem->DeltaQPForMvZero_S8 = 3;
3814 hucVDEncBrcUpdateDmem->DeltaQPForMvZone0_S8 = -2;
3815 hucVDEncBrcUpdateDmem->DeltaQPForMvZone1_S8 = 0;
3816 hucVDEncBrcUpdateDmem->DeltaQPForMvZone2_S8 = 2;
3817 }
3818
3819 if (m_hevcVdencWeightedPredEnabled)
3820 {
3821 hucVDEncBrcUpdateDmem->LumaLog2WeightDenom_S8 = 6;
3822 hucVDEncBrcUpdateDmem->ChromaLog2WeightDenom_S8 = 6;
3823 }
3824
3825 // chroma weights are not confirmed to be supported from HW team yet
3826 hucVDEncBrcUpdateDmem->DisabledFeature_U8 = 0; // bit mask, 1 (bit0): disable chroma weight setting
3827
3828 hucVDEncBrcUpdateDmem->SlidingWindow_Enable_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
3829 hucVDEncBrcUpdateDmem->LOG_LCU_Size_U8 = 6;
3830 hucVDEncBrcUpdateDmem->RDOQ_Enable_U8 = m_hevcRdoqEnabled ? (m_pictureCodingType == I_TYPE ? m_hevcIFrameRdoqEnabled : 1) : 0;
3831 hucVDEncBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8 = 4;
3832 hucVDEncBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8 = -5;
3833 hucVDEncBrcUpdateDmem->SceneChgPrevIntraPctThreshold_U8 = 96;
3834 hucVDEncBrcUpdateDmem->SceneChgCurIntraPctThreshold_U8 = 192;
3835
3836 hucVDEncBrcUpdateDmem->EnableMotionAdaptive = m_enableMotionAdaptive;
3837 hucVDEncBrcUpdateDmem->FrameSizeBoostForSceneChange = 1;
3838 hucVDEncBrcUpdateDmem->ROMCurrent = 0;
3839 hucVDEncBrcUpdateDmem->ROMZero = 0;
3840 hucVDEncBrcUpdateDmem->TargetFrameSize = m_hevcPicParams->TargetFrameSize << 3;// byte to bit
3841
3842 // reset skip frame statistics
3843 m_numSkipFrames = 0;
3844 m_sizeSkipFrames = 0;
3845
3846 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()]);
3847
3848 return eStatus;
3849 }
3850
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)3851 void CodechalVdencHevcStateG11::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
3852 {
3853 CODECHAL_ENCODE_FUNCTION_ENTER;
3854
3855 CodechalVdencHevcState::SetHcpSliceStateCommonParams(sliceStateParams);
3856
3857 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11 &>(sliceStateParams).dwNumPipe = m_numPipe;
3858 }
3859
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,bool lastSliceInTile,uint32_t idx)3860 void CodechalVdencHevcStateG11::SetHcpSliceStateParams(
3861 MHW_VDBOX_HEVC_SLICE_STATE& sliceState,
3862 PCODEC_ENCODER_SLCDATA slcData,
3863 uint16_t slcCount,
3864 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,
3865 bool lastSliceInTile,
3866 uint32_t idx)
3867 {
3868 CODECHAL_ENCODE_FUNCTION_ENTER;
3869
3870 CodechalEncodeHevcBase::SetHcpSliceStateParams(sliceState, slcData, slcCount);
3871
3872 sliceState.bLastSliceInTile = lastSliceInTile ? true : false;
3873 sliceState.bLastSliceInTileColumn = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false;
3874 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).pTileCodingParams = tileCodingParams + idx;
3875 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).dwTileID = idx;
3876 }
3877
SetKernelParams(EncOperation operation,MHW_KERNEL_PARAM * kernelParams)3878 MOS_STATUS CodechalVdencHevcStateG11::SetKernelParams(
3879 EncOperation operation,
3880 MHW_KERNEL_PARAM* kernelParams)
3881 {
3882 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3883
3884 CODECHAL_ENCODE_FUNCTION_ENTER;
3885
3886 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
3887
3888 auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
3889
3890 kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
3891 kernelParams->iIdCount = 1;
3892
3893 switch (operation)
3894 {
3895 case VDENC_ME_P:
3896 case VDENC_ME_B:
3897 case VDENC_STREAMIN:
3898 case VDENC_STREAMIN_HEVC:
3899 kernelParams->iBTCount = CODECHAL_VDENC_HME_END_G11 - CODECHAL_VDENC_HME_BEGIN_G11;
3900 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_HEVC_VP9_VDENC_ME_CURBE_G11), (size_t)curbeAlignment);
3901 kernelParams->iBlockWidth = 32;
3902 kernelParams->iBlockHeight = 32;
3903 break;
3904 default:
3905 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
3906 eStatus = MOS_STATUS_INVALID_PARAMETER;
3907 }
3908
3909 return eStatus;
3910 }
3911
SetBindingTable(EncOperation operation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)3912 MOS_STATUS CodechalVdencHevcStateG11::SetBindingTable(
3913 EncOperation operation,
3914 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)
3915 {
3916 CODECHAL_ENCODE_FUNCTION_ENTER;
3917
3918 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3919
3920 CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
3921
3922 MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
3923
3924 switch (operation)
3925 {
3926 case VDENC_ME_P:
3927 case VDENC_ME_B:
3928 case VDENC_STREAMIN:
3929 case VDENC_STREAMIN_HEVC:
3930 bindingTable->dwNumBindingTableEntries = CODECHAL_VDENC_HME_END_G11 - CODECHAL_VDENC_HME_BEGIN_G11;
3931 bindingTable->dwBindingTableStartOffset = CODECHAL_VDENC_HME_BEGIN_G11;
3932 break;
3933 default:
3934 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
3935 return MOS_STATUS_INVALID_PARAMETER;
3936 }
3937
3938 for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
3939 {
3940 bindingTable->dwBindingTableEntries[i] = i;
3941 }
3942 return eStatus;
3943 }
3944
EncodeMeKernel(HmeLevel hmeLevel)3945 MOS_STATUS CodechalVdencHevcStateG11::EncodeMeKernel(HmeLevel hmeLevel)
3946 {
3947 CODECHAL_ENCODE_FUNCTION_ENTER;
3948
3949 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3950
3951 auto kernelState = (hmeLevel == HME_LEVEL_4x) ? &m_vdencStreaminKernelState : &m_vdencMeKernelState;
3952 auto encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
3953 (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
3954
3955 // If Single Task Phase is not enabled, use BT count for the kernel state.
3956 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3957 {
3958 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
3959 m_maxBtCount : kernelState->KernelParams.iBTCount;
3960 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
3961 m_stateHeapInterface,
3962 maxBtCount));
3963 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
3964 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
3965 }
3966
3967 // Set up the DSH/SSH as normal
3968 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3969 m_stateHeapInterface,
3970 kernelState,
3971 false,
3972 0,
3973 false,
3974 m_storeData));
3975
3976 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
3977 MOS_ZeroMemory(&idParams, sizeof(idParams));
3978 idParams.pKernelState = kernelState;
3979 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
3980 m_stateHeapInterface,
3981 1,
3982 &idParams));
3983
3984 //Setup curbe for StreamIn Kernel
3985 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbe(hmeLevel));
3986
3987 CODECHAL_DEBUG_TOOL(
3988 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
3989 encFunctionType,
3990 MHW_DSH_TYPE,
3991 kernelState));
3992 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
3993 encFunctionType,
3994 kernelState));
3995 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
3996 encFunctionType,
3997 MHW_ISH_TYPE,
3998 kernelState));
3999 )
4000
4001 MOS_COMMAND_BUFFER cmdBuffer;
4002 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
4003
4004 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
4005 sendKernelCmdsParams.EncFunctionType = encFunctionType;
4006 sendKernelCmdsParams.pKernelState = kernelState;
4007 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
4008
4009 // Add binding table
4010 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
4011 m_stateHeapInterface,
4012 kernelState));
4013
4014 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(hmeLevel, &cmdBuffer));
4015
4016 // Dump SSH for ME kernel
4017 CODECHAL_DEBUG_TOOL(
4018 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
4019 encFunctionType,
4020 MHW_SSH_TYPE,
4021 kernelState)));
4022
4023 uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
4024 (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
4025
4026 uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
4027 uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
4028
4029 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
4030 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
4031 walkerCodecParams.WalkerMode = m_walkerMode;
4032 walkerCodecParams.dwResolutionX = resolutionX;
4033 walkerCodecParams.dwResolutionY = resolutionY;
4034 walkerCodecParams.bNoDependency = true;
4035 walkerCodecParams.bMbaff = false;
4036 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
4037 walkerCodecParams.ucGroupId = m_groupId;
4038
4039 MHW_WALKER_PARAMS walkerParams;
4040 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
4041 m_hwInterface,
4042 &walkerParams,
4043 &walkerCodecParams));
4044
4045 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
4046 &cmdBuffer,
4047 &walkerParams));
4048
4049 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
4050
4051 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
4052 {
4053 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
4054 m_stateHeapInterface));
4055 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
4056 }
4057
4058 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
4059 &cmdBuffer,
4060 encFunctionType,
4061 nullptr)));
4062
4063 m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
4064
4065 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
4066
4067 MHW_MI_STORE_DATA_PARAMS storeDataParams;
4068
4069 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
4070 {
4071 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
4072 m_lastTaskInPhase = false;
4073 }
4074 return eStatus;
4075 }
4076
SetMeCurbe(HmeLevel hmeLevel)4077 MOS_STATUS CodechalVdencHevcStateG11::SetMeCurbe(HmeLevel hmeLevel)
4078 {
4079 CODECHAL_ENCODE_FUNCTION_ENTER;
4080
4081 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4082
4083 CODECHAL_VDENC_HEVC_ME_CURBE_G11 curbe;
4084 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4085 &curbe,
4086 sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G11),
4087 ME_CURBE_INIT,
4088 sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G11)));
4089
4090 PMHW_KERNEL_STATE kernelState = (hmeLevel == HME_LEVEL_4x) ? &m_vdencStreaminKernelState : &m_vdencMeKernelState;
4091 bool useMvFromPrevStep;
4092 bool writeDistortions;
4093 uint32_t scaleFactor;
4094 uint32_t mvShiftFactor = 0;
4095 uint32_t prevMvReadPosFactor = 0;
4096
4097 switch (hmeLevel)
4098 {
4099 case HME_LEVEL_32x:
4100 useMvFromPrevStep = false;
4101 writeDistortions = false;
4102 scaleFactor = SCALE_FACTOR_32x;
4103 mvShiftFactor = 1;
4104 prevMvReadPosFactor = 0;
4105 break;
4106 case HME_LEVEL_16x:
4107 useMvFromPrevStep = (m_b32XMeEnabled) ? true : false;
4108 writeDistortions = false;
4109 scaleFactor = SCALE_FACTOR_16x;
4110 mvShiftFactor = 2;
4111 prevMvReadPosFactor = 1;
4112 break;
4113 case HME_LEVEL_4x:
4114 useMvFromPrevStep = (m_b16XMeEnabled) ? true : false;
4115 writeDistortions = true;
4116 scaleFactor = SCALE_FACTOR_4x;
4117 mvShiftFactor = 2;
4118 prevMvReadPosFactor = 0;
4119 break;
4120 default:
4121 eStatus = MOS_STATUS_INVALID_PARAMETER;
4122 return eStatus;
4123 break;
4124 }
4125
4126 curbe.DW3.SubPelMode = 3;
4127 curbe.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
4128 curbe.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
4129 curbe.DW5.QpPrimeY = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4130 curbe.DW6.WriteDistortions = writeDistortions;
4131 curbe.DW6.UseMvFromPrevStep = useMvFromPrevStep;
4132 curbe.DW6.SuperCombineDist = 5;//SuperCombineDist_Generic[pHevcSeqParams->TargetUsage]; Harded coded in KCM
4133 curbe.DW6.MaxVmvR = 511 * 4;
4134 curbe.DW15.MvShiftFactor = mvShiftFactor;
4135 curbe.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
4136
4137 if (m_pictureCodingType == B_TYPE)
4138 {
4139 // This field is irrelevant since we are not using the bi-direct search.
4140 // set it to 32 to match
4141 curbe.DW1.BiWeight = m_bframeMeBidirectionalWeight;
4142 curbe.DW13.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
4143 }
4144
4145 if (m_pictureCodingType == P_TYPE || m_pictureCodingType == B_TYPE)
4146 {
4147 curbe.DW13.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
4148 }
4149
4150 if (hmeLevel == HME_LEVEL_4x)
4151 {
4152 curbe.DW30.ActualMBHeight = m_frameHeight;
4153 curbe.DW30.ActualMBWidth = m_frameWidth;
4154 }
4155 else
4156 {
4157 curbe.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
4158 curbe.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
4159 }
4160
4161 curbe.DW13.RefStreaminCost = 0;
4162 // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
4163 curbe.DW13.ROIEnable = 0;
4164
4165 uint8_t meMethod = (m_pictureCodingType == B_TYPE) ? m_bMeMethodGeneric[m_hevcSeqParams->TargetUsage] : m_meMethodGeneric[m_hevcSeqParams->TargetUsage];
4166 uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
4167 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe.SPDelta), 14 * sizeof(uint32_t),
4168 m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t)));
4169
4170 if (hmeLevel == HME_LEVEL_4x)
4171 {
4172 //StreamIn CURBE
4173 curbe.DW6.LCUSize = 1;//Only LCU64 supported by the VDEnc HW
4174 // Kernel should use driver-prepared stream-in surface during ROI/ MBQP(LCUQP)/ Dirty-Rect
4175 curbe.DW6.InputStreamInEn = (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)));
4176 curbe.DW31.MaxCuSize = 3;
4177 curbe.DW31.MaxTuSize = 3;
4178 switch (m_hevcSeqParams->TargetUsage)
4179 {
4180 case 1:
4181 case 4:
4182 curbe.DW36.NumMergeCandCu64x64 = 4;
4183 curbe.DW36.NumMergeCandCu32x32 = 3;
4184 curbe.DW36.NumMergeCandCu16x16 = 2;
4185 curbe.DW36.NumMergeCandCu8x8 = 1;
4186 curbe.DW31.NumImePredictors = m_imgStateImePredictors;
4187 break;
4188 case 7:
4189 curbe.DW36.NumMergeCandCu64x64 = 2;
4190 curbe.DW36.NumMergeCandCu32x32 = 2;
4191 curbe.DW36.NumMergeCandCu16x16 = 2;
4192 curbe.DW36.NumMergeCandCu8x8 = 0;
4193 curbe.DW31.NumImePredictors = 4;
4194 break;
4195 }
4196 }
4197
4198 curbe.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G11;
4199 curbe.DW41._16xOr32xMeMvInputDataSurfIndex = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G11 : CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G11;
4200 curbe.DW42._4xMeOutputDistSurfIndex = CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G11;
4201 curbe.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_VDENC_HME_BRC_DISTORTION_CM_G11;
4202 curbe.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G11;
4203 curbe.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G11;
4204 curbe.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G11;
4205 curbe.DW47.VDEncStreamInInputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G11;
4206
4207 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
4208 &curbe,
4209 kernelState->dwCurbeOffset,
4210 sizeof(curbe)));
4211
4212 return eStatus;
4213 }
4214
SendMeSurfaces(HmeLevel hmeLevel,PMOS_COMMAND_BUFFER cmdBuffer)4215 MOS_STATUS CodechalVdencHevcStateG11::SendMeSurfaces(HmeLevel hmeLevel, PMOS_COMMAND_BUFFER cmdBuffer)
4216 {
4217 CODECHAL_ENCODE_FUNCTION_ENTER;
4218 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4219
4220 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4221
4222 MOS_SURFACE *meMvDataBuffer;
4223 uint32_t downscaledWidthInMb;
4224 uint32_t downscaledHeightInMb;
4225
4226 if (hmeLevel == HME_LEVEL_32x)
4227 {
4228 meMvDataBuffer = &m_s32XMeMvDataBuffer;
4229 downscaledWidthInMb = m_downscaledWidthInMb32x;
4230 downscaledHeightInMb = m_downscaledHeightInMb32x;
4231 }
4232 else if (hmeLevel == HME_LEVEL_16x)
4233 {
4234 meMvDataBuffer = &m_s16XMeMvDataBuffer;
4235 downscaledWidthInMb = m_downscaledWidthInMb16x;
4236 downscaledHeightInMb = m_downscaledHeightInMb16x;
4237 }
4238 else
4239 {
4240 meMvDataBuffer = &m_s4XMeMvDataBuffer;
4241 downscaledWidthInMb = m_downscaledWidthInMb4x;
4242 downscaledHeightInMb = m_downscaledHeightInMb4x;
4243 }
4244
4245 auto width = MOS_ALIGN_CEIL(downscaledWidthInMb * 32, 64);
4246 auto height = downscaledHeightInMb * 4 * 10;
4247 // Force the values
4248 meMvDataBuffer->dwWidth = width;
4249 meMvDataBuffer->dwHeight = height;
4250 meMvDataBuffer->dwPitch = width;
4251
4252 auto kernelState = (hmeLevel == HME_LEVEL_4x) ? &m_vdencStreaminKernelState : &m_vdencMeKernelState;
4253 auto bindingTable = (hmeLevel == HME_LEVEL_4x) ?
4254 &m_vdencStreaminKernelBindingTable : &m_vdencMeKernelBindingTable;
4255 uint32_t meMvBottomFieldOffset = 0;
4256
4257 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4258 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4259 surfaceCodecParams.bIs2DSurface = true;
4260 surfaceCodecParams.bMediaBlockRW = true;
4261 surfaceCodecParams.psSurface = meMvDataBuffer;
4262 surfaceCodecParams.dwOffset = meMvBottomFieldOffset;
4263 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
4264 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G11];
4265 surfaceCodecParams.bIsWritable = true;
4266 surfaceCodecParams.bRenderTarget = true;
4267 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4268 m_hwInterface,
4269 cmdBuffer,
4270 &surfaceCodecParams,
4271 kernelState));
4272
4273 if (hmeLevel == HME_LEVEL_16x && m_b32XMeEnabled)
4274 {
4275 // Pass 32x MV to 16x ME operation
4276 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4277 surfaceCodecParams.bIs2DSurface = true;
4278 surfaceCodecParams.bMediaBlockRW = true;
4279 surfaceCodecParams.psSurface = &m_s32XMeMvDataBuffer;
4280 surfaceCodecParams.dwOffset = 0;
4281 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
4282 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G11];
4283 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4284 m_hwInterface,
4285 cmdBuffer,
4286 &surfaceCodecParams,
4287 kernelState));
4288 }
4289 else if (!(hmeLevel == HME_LEVEL_32x) && m_b16XMeEnabled)
4290 {
4291 // Pass 16x MV to 4x ME operation
4292 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4293 surfaceCodecParams.bIs2DSurface = true;
4294 surfaceCodecParams.bMediaBlockRW = true;
4295 surfaceCodecParams.psSurface = &m_s16XMeMvDataBuffer;
4296 surfaceCodecParams.dwOffset = 0;
4297 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
4298 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G11];
4299 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4300 m_hwInterface,
4301 cmdBuffer,
4302 &surfaceCodecParams,
4303 kernelState));
4304
4305 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4306 surfaceCodecParams.bIs2DSurface = true;
4307 surfaceCodecParams.bMediaBlockRW = true;
4308 surfaceCodecParams.psSurface = &m_s4XMeDistortionBuffer;
4309 surfaceCodecParams.dwOffset = 0;
4310 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G11];
4311 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
4312 surfaceCodecParams.bIsWritable = true;
4313 surfaceCodecParams.bRenderTarget = true;
4314 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4315 m_hwInterface,
4316 cmdBuffer,
4317 &surfaceCodecParams,
4318 kernelState));
4319 }
4320
4321 PMOS_SURFACE currScaledSurface = (hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) :
4322 ((hmeLevel == HME_LEVEL_16x) ? m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER) : m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER));
4323 MOS_SURFACE refScaledSurface = *currScaledSurface;
4324 bool currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic) ? true : false;
4325 bool currBottomField = CodecHal_PictureIsBottomField(m_currOriginalPic) ? true : false;
4326
4327 uint8_t currVDirection = (!currFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
4328 ((currBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
4329 uint32_t currScaledBottomFieldOffset = (hmeLevel == HME_LEVEL_4x) ?
4330 (uint32_t)m_scaledBottomFieldOffset : ((hmeLevel == HME_LEVEL_16x) ? (uint32_t)m_scaled16xBottomFieldOffset : (uint32_t)m_scaled32xBottomFieldOffset);
4331
4332 // Setup references 1...n
4333 // LIST 0 references
4334 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
4335 {
4336 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx];
4337
4338 if (!CodecHal_PictureIsInvalid(refPic))
4339 {
4340 if (refIdx == 0)
4341 {
4342 // Current Picture Y - VME
4343 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4344 surfaceCodecParams.bUseAdvState = true;
4345 surfaceCodecParams.psSurface = currScaledSurface;
4346 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
4347 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4348 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G11];
4349 surfaceCodecParams.ucVDirection = currVDirection;
4350 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4351 m_hwInterface,
4352 cmdBuffer,
4353 &surfaceCodecParams,
4354 kernelState));
4355 }
4356
4357 bool refFieldPicture = CodecHal_PictureIsField(refPic) ? true : false;
4358 bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? true : false;
4359 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
4360 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
4361 if (hmeLevel == HME_LEVEL_4x)
4362 {
4363 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
4364 }
4365 else if (hmeLevel == HME_LEVEL_16x)
4366 {
4367 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
4368 }
4369 else
4370 {
4371 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
4372 }
4373 uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
4374
4375 // L0 Reference Picture Y - VME
4376 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4377 surfaceCodecParams.bUseAdvState = true;
4378 surfaceCodecParams.psSurface = &refScaledSurface;
4379 surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
4380 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4381 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_FWD_REF_IDX0_CM_G11 + (refIdx * 2)];
4382 surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
4383 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
4384 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4385 m_hwInterface,
4386 cmdBuffer,
4387 &surfaceCodecParams,
4388 kernelState));
4389 }
4390 }
4391
4392 //List1
4393 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
4394 {
4395 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx];
4396
4397 if (!CodecHal_PictureIsInvalid(refPic))
4398 {
4399 if (refIdx == 0)
4400 {
4401 // Current Picture Y - VME
4402 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4403 surfaceCodecParams.bUseAdvState = true;
4404 surfaceCodecParams.psSurface = currScaledSurface;
4405 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
4406 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
4407 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G11];
4408 surfaceCodecParams.ucVDirection = currVDirection;
4409 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4410 m_hwInterface,
4411 cmdBuffer,
4412 &surfaceCodecParams,
4413 kernelState));
4414 }
4415
4416 bool refFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
4417 bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? 1 : 0;
4418 auto refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
4419 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
4420
4421 if (hmeLevel == HME_LEVEL_4x)
4422 {
4423 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
4424 }
4425 else if (hmeLevel == HME_LEVEL_16x)
4426 {
4427 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
4428 }
4429 else
4430 {
4431 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
4432 }
4433 uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
4434
4435 // L1 Reference Picture Y - VME
4436 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4437 surfaceCodecParams.bUseAdvState = true;
4438 surfaceCodecParams.psSurface = &refScaledSurface;
4439 surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
4440 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
4441 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_BWD_REF_IDX0_CM_G11 + (refIdx * 2)];
4442 surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
4443 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
4444 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4445 m_hwInterface,
4446 cmdBuffer,
4447 &surfaceCodecParams,
4448 kernelState));
4449 }
4450 }
4451
4452 if (hmeLevel == HME_LEVEL_4x)
4453 {
4454 CODECHAL_ENCODE_CHK_NULL_RETURN(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
4455
4456 auto streamingSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
4457
4458 // Send driver-prepared stream-in surface as input during ROI/ MBQP(LCUQP)/ Dirty-Rect
4459 if (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
4460 {
4461 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4462 surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
4463 surfaceCodecParams.bIs2DSurface = false;
4464 surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4465 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
4466 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G11];
4467 surfaceCodecParams.bIsWritable = true;
4468 surfaceCodecParams.bRenderTarget = true;
4469 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4470 m_hwInterface,
4471 cmdBuffer,
4472 &surfaceCodecParams,
4473 kernelState));
4474 }
4475 else // Clear stream-in surface otherwise
4476 {
4477 MOS_LOCK_PARAMS lockFlags;
4478 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4479 lockFlags.WriteOnly = true;
4480
4481 auto data = m_osInterface->pfnLockResource(
4482 m_osInterface,
4483 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
4484 &lockFlags);
4485
4486 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4487
4488 MOS_ZeroMemory(
4489 data,
4490 streamingSize);
4491
4492 m_osInterface->pfnUnlockResource(
4493 m_osInterface,
4494 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
4495 }
4496
4497 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
4498 surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
4499 surfaceCodecParams.bIs2DSurface = false;
4500 surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4501 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
4502 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G11];
4503 surfaceCodecParams.bIsWritable = true;
4504 surfaceCodecParams.bRenderTarget = true;
4505 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4506 m_hwInterface,
4507 cmdBuffer,
4508 &surfaceCodecParams,
4509 kernelState));
4510 }
4511
4512 return eStatus;
4513 }
4514
4515 MOS_STATUS
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)4516 CodechalVdencHevcStateG11::GetKernelHeaderAndSize(
4517 void *binary,
4518 EncOperation operation,
4519 uint32_t krnStateIdx,
4520 void *krnHeader,
4521 uint32_t *krnSize)
4522 {
4523 CODECHAL_ENCODE_FUNCTION_ENTER;
4524
4525 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4526
4527 CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
4528 CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
4529 CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
4530
4531 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(binary, operation, krnStateIdx, krnHeader, krnSize));
4532
4533 return eStatus;
4534 }
4535
GetSystemPipeNumberCommon()4536 MOS_STATUS CodechalVdencHevcStateG11::GetSystemPipeNumberCommon()
4537 {
4538 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4539
4540 CODECHAL_ENCODE_FUNCTION_ENTER;
4541
4542 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
4543 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4544
4545 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
4546 statusKey = MOS_UserFeature_ReadValue_ID(
4547 nullptr,
4548 __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
4549 &userFeatureData,
4550 m_osInterface->pOsContext);
4551
4552 bool disableScalability = m_hwInterface->IsDisableScalability();
4553 if (statusKey == MOS_STATUS_SUCCESS)
4554 {
4555 disableScalability = userFeatureData.i32Data ? true : false;
4556 }
4557
4558 MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
4559 CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
4560
4561 if (gtSystemInfo && disableScalability == false)
4562 {
4563 // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
4564 m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
4565 }
4566 else
4567 {
4568 m_numVdbox = 1;
4569 }
4570
4571 CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d.", m_numVdbox);
4572
4573 return eStatus;
4574 }
4575
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)4576 MOS_STATUS CodechalVdencHevcStateG11::HucPakIntegrate(
4577 PMOS_COMMAND_BUFFER cmdBuffer)
4578 {
4579 CODECHAL_ENCODE_FUNCTION_ENTER;
4580
4581 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4582
4583 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4584
4585 CODECHAL_ENCODE_CHK_COND_RETURN(
4586 (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
4587 "ERROR - vdbox index exceed the maximum");
4588
4589 auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
4590
4591 // load kernel from WOPCM into L2 storage RAM
4592 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
4593 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
4594 imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
4595
4596 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
4597
4598 // pipe mode select
4599 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
4600 pipeModeSelectParams.Mode = m_mode;
4601 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
4602
4603 // DMEM set
4604 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
4605 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
4606 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
4607
4608 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
4609 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
4610 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
4611
4612 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
4613 MHW_MI_STORE_DATA_PARAMS storeDataParams;
4614 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
4615 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
4616 storeDataParams.dwResourceOffset = 0;
4617 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
4618 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4619
4620 // Store HUC_STATUS2 register
4621 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
4622 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
4623 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
4624 storeRegParams.dwOffset = sizeof(uint32_t);
4625 storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
4626 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
4627
4628 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
4629
4630 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
4631
4632 // wait Huc completion (use HEVC bit for now)
4633 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
4634 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
4635 vdPipeFlushParams.Flags.bFlushHEVC = 1;
4636 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
4637 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
4638
4639 // Flush the engine to ensure memory written out
4640 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
4641 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
4642 flushDwParams.bVideoPipelineCacheInvalidate = true;
4643 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
4644
4645 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
4646
4647 uint32_t baseOffset =
4648 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
4649
4650 // Write HUC_STATUS mask
4651 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
4652 storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
4653 storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
4654 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
4655 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
4656 cmdBuffer,
4657 &storeDataParams));
4658
4659 // store HUC_STATUS register
4660 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
4661 storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
4662 storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
4663 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
4664 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
4665 cmdBuffer,
4666 &storeRegParams));
4667
4668 return eStatus;
4669 }
4670
Initialize(CodechalSetting * settings)4671 MOS_STATUS CodechalVdencHevcStateG11::Initialize(CodechalSetting * settings)
4672 {
4673 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4674
4675 CODECHAL_ENCODE_FUNCTION_ENTER;
4676
4677 //create child class
4678 m_hucCmdInitializer = MOS_New(CodechalCmdInitializerG11, this);
4679
4680 // common initilization
4681 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::Initialize(settings));
4682
4683 m_deltaQpRoiBufferSize = m_deltaQpBufferSize;
4684 m_brcRoiBufferSize = m_roiStreamInBufferSize;
4685 m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) *
4686 CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
4687
4688 // we need additional buffer for (1) 1 CL for size info at the beginning of each tile column (max of 4 vdbox in scalability mode)
4689 // (2) CL alignment at end of every tile column
4690 // as a result, increase the height by 1 for allocation purposes
4691 m_numLcu = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * (MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE) + 1);
4692 m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * (m_numLcu * 5 + m_numLcu * 64 * 8), CODECHAL_PAGE_SIZE);
4693 m_mbCodeSize += m_mvOffset;
4694
4695 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
4696
4697 if (MOS_VE_SUPPORTED(m_osInterface))
4698 {
4699 m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
4700 CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
4701 //scalability initialize
4702 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
4703 }
4704
4705 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
4706 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4707 MOS_UserFeature_ReadValue_ID(
4708 nullptr,
4709 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
4710 &userFeatureData,
4711 m_osInterface->pOsContext);
4712 m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
4713
4714 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4715 MOS_UserFeature_ReadValue_ID(
4716 nullptr,
4717 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
4718 &userFeatureData,
4719 m_osInterface->pOsContext);
4720 m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
4721
4722 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4723 MOS_UserFeature_ReadValue_ID(
4724 nullptr,
4725 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VDBOX_HW_SEMAPHORE,
4726 &userFeatureData,
4727 m_osInterface->pOsContext);
4728 m_enableVdBoxHWSemaphore = userFeatureData.i32Data ? true : false;
4729
4730 m_numDelay = 15;
4731
4732 #if (_DEBUG || _RELEASE_INTERNAL)
4733 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4734 MOS_UserFeature_ReadValue_ID(
4735 nullptr,
4736 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
4737 &userFeatureData,
4738 m_osInterface->pOsContext);
4739 m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
4740 #endif
4741
4742 if (settings->disableUltraHME)
4743 {
4744 m_32xMeSupported = false;
4745 m_b32XMeEnabled = false;
4746 }
4747
4748 if (settings->disableSuperHME)
4749 {
4750 m_16xMeSupported = false;
4751 m_b16XMeEnabled = false;
4752 }
4753
4754 return eStatus;
4755 }
4756
CodechalVdencHevcStateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)4757 CodechalVdencHevcStateG11::CodechalVdencHevcStateG11(
4758 CodechalHwInterface* hwInterface,
4759 CodechalDebugInterface* debugInterface,
4760 PCODECHAL_STANDARD_INFO standardInfo)
4761 :CodechalVdencHevcState(hwInterface, debugInterface, standardInfo)
4762 {
4763 CODECHAL_ENCODE_FUNCTION_ENTER;
4764
4765 m_useCommonKernel = true;
4766 pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
4767 m_useHwScoreboard = false;
4768 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
4769 m_kernelBase = (uint8_t*)IGCODECKRN_G11;
4770 #endif
4771 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
4772 m_hucPakStitchEnabled = true;
4773 m_scalabilityState = nullptr;
4774 m_brcAdaptiveRegionBoostSupported = true;
4775
4776 MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
4777 MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
4778 MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
4779 MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
4780 MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
4781 MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
4782 MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
4783
4784 MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
4785 MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
4786 MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
4787 MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
4788 MOS_ZeroMemory(m_resVdBoxSemaphoreMem, sizeof(m_resVdBoxSemaphoreMem));
4789 MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
4790 MOS_ZeroMemory(&m_resSyncSemaMem, sizeof(m_resSyncSemaMem));
4791
4792 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
4793 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
4794 {
4795 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
4796 {
4797 MOS_ZeroMemory(&m_resHucPakStitchDmemBuffer[k][i], sizeof(m_resHucPakStitchDmemBuffer[k][i])); // HuC Pak Integration Dmem data for each pass
4798 }
4799 }
4800 MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
4801
4802 m_vdencBrcInitDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G11);
4803 m_vdencBrcUpdateDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G11);
4804 m_vdencBrcConstDataBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G11);
4805 m_maxNumSlicesSupported = CODECHAL_VDENC_HEVC_MAX_SLICE_NUM;
4806
4807 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
4808 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
4809 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
4810 m_kernelBase = (uint8_t*)IGCODECKRN_G11;
4811 #endif
4812
4813 MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
4814 m_kernelBase,
4815 m_kuidCommon,
4816 &m_kernelBinary,
4817 &m_combinedKernelSize);
4818 CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
4819
4820 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
4821 MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
4822
4823 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
4824 Mos_SetVirtualEngineSupported(m_osInterface, true);
4825
4826 CODECHAL_DEBUG_TOOL(
4827 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG11, this));
4828 )
4829 }
4830
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)4831 MOS_STATUS CodechalVdencHevcStateG11::SetRegionsHuCPakIntegrate(
4832 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
4833 {
4834 CODECHAL_ENCODE_FUNCTION_ENTER;
4835
4836 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4837
4838 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
4839 // Add Virtual addr
4840 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
4841 virtualAddrParams->regionParams[0].dwOffset = 0;
4842 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
4843 virtualAddrParams->regionParams[1].isWritable = true;
4844 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
4845 virtualAddrParams->regionParams[4].dwOffset = MOS_ALIGN_FLOOR(m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
4846 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
4847 virtualAddrParams->regionParams[5].dwOffset = MOS_ALIGN_FLOOR(m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
4848 virtualAddrParams->regionParams[5].isWritable = true;
4849 virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
4850 virtualAddrParams->regionParams[6].isWritable = true;
4851 virtualAddrParams->regionParams[7].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource; // Region 7 - HCP PIC state command
4852 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
4853 virtualAddrParams->regionParams[9].isWritable = true;
4854
4855 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
4856 virtualAddrParams->regionParams[15].dwOffset = 0; // Tile record is at offset 0
4857
4858 return eStatus;
4859 }
4860
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)4861 MOS_STATUS CodechalVdencHevcStateG11::SetDmemHuCPakIntegrate(
4862 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
4863 {
4864 CODECHAL_ENCODE_FUNCTION_ENTER;
4865
4866 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4867
4868 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4869 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4870 lockFlagsWriteOnly.WriteOnly = true;
4871
4872 int32_t currentPass = GetCurrentPass();
4873 if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES)
4874 {
4875 eStatus = MOS_STATUS_INVALID_PARAMETER;
4876 return eStatus;
4877 }
4878
4879 HucPakStitchDmemVdencG11* hucPakStitchDmem = (HucPakStitchDmemVdencG11*)m_osInterface->pfnLockResource(
4880 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
4881 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
4882 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG11));
4883
4884 // Reset all the offsets to be shared in the huc dmem (6*5 DW's)
4885 memset(hucPakStitchDmem, 0xFF, 120);
4886
4887 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
4888 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
4889 uint16_t numTiles = numTileRows*numTileColumns;
4890 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
4891
4892 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
4893 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
4894 hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
4895 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
4896 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
4897 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
4898 hucPakStitchDmem->Codec = 2; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
4899 hucPakStitchDmem->MAXPass = m_brcEnabled ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1;
4900 hucPakStitchDmem->CurrentPass = (uint8_t) currentPass+1; // // Current BRC pass [1..MAXPass]
4901 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
4902 hucPakStitchDmem->CabacZeroWordFlag = false;
4903 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
4904 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
4905 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
4906
4907 hucPakStitchDmem->LastTileBS_StartInBytes = (m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
4908 hucPakStitchDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes;
4909
4910 if (m_numPipe > 1)
4911 {
4912 //Set the kernel output offsets
4913 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = m_hevcFrameStatsOffset.uiHevcPakStatistics;
4914 hucPakStitchDmem->HEVC_Streamout_offset[0] = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
4915 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
4916 hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics;
4917
4918 for (auto i = 0; i < m_numPipe; i++)
4919 {
4920 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
4921 hucPakStitchDmem->NumSlices[i] = numTilesPerPipe; // With tiling enabled, DDI restriction to have one slice per tile
4922
4923 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
4924 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
4925 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + m_hevcTileStatsOffset.uiTileSizeRecord;
4926 hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) + m_hevcTileStatsOffset.uiHevcPakStatistics;
4927 hucPakStitchDmem->VDENCSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiVdencStatistics) + m_hevcTileStatsOffset.uiVdencStatistics;
4928 hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + m_hevcTileStatsOffset.uiHevcSliceStreamout;
4929 // All VP9 surfaces already initialized to 0xFFFFFFFF
4930 }
4931 }
4932 else
4933 {
4934 hucPakStitchDmem->NumTiles[0] = numTiles;
4935 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
4936
4937 // non-scalable mode, only VDEnc statistics need to be aggregated
4938 hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics;
4939 hucPakStitchDmem->VDENCSTAT_offset[1] = m_hevcTileStatsOffset.uiVdencStatistics;
4940 }
4941
4942 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
4943
4944 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
4945 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
4946 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG11), CODECHAL_CACHELINE_SIZE);
4947 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
4948
4949 return eStatus;
4950 }
4951
DumpHucDebugOutputBuffers()4952 MOS_STATUS CodechalVdencHevcStateG11::DumpHucDebugOutputBuffers()
4953 {
4954 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4955
4956 // Virtual Engine does only one submit per pass. Dump all HuC debug outputs
4957 bool dumpDebugBuffers = IsLastPipe();
4958 if (m_singleTaskPhaseSupported)
4959 {
4960 dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
4961 }
4962
4963 if (dumpDebugBuffers)
4964 {
4965 CODECHAL_DEBUG_TOOL(
4966 if(m_vdencHucUsed)
4967 {
4968 DumpHucBrcInit();
4969 DumpHucBrcUpdate(true);
4970 DumpHucBrcUpdate(false);
4971 DumpHucPakIntegrate();
4972 }
4973 else
4974 {
4975 DumpHucCqp();
4976 }
4977 )
4978 }
4979
4980 return eStatus;
4981 }
4982
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)4983 MOS_STATUS CodechalVdencHevcStateG11::AddVdencWalkerStateCmd(
4984 PMOS_COMMAND_BUFFER cmdBuffer,
4985 PMHW_VDBOX_HEVC_SLICE_STATE params)
4986 {
4987 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4988
4989 CODECHAL_ENCODE_FUNCTION_ENTER;
4990
4991 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
4992 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
4993
4994 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11 vdencWalkerStateParams;
4995 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
4996 vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
4997 vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
4998 vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
4999 vdencWalkerStateParams.pTileCodingParams = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G11>(params)->pTileCodingParams;
5000 vdencWalkerStateParams.dwTileId = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G11>(params)->dwTileID;
5001 switch (static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G11>(params)->dwNumPipe)
5002 {
5003 case 0:
5004 case 1:
5005 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
5006 break;
5007 case 2:
5008 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
5009 break;
5010 case 4:
5011 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
5012 break;
5013 default:
5014 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
5015 break;
5016 }
5017 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
5018
5019 return eStatus;
5020 }
5021
CreateMhwParams()5022 void CodechalVdencHevcStateG11::CreateMhwParams()
5023 {
5024 m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G11);
5025 m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11);
5026 m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G11);
5027 }
5028
CalculatePictureStateCommandSize()5029 MOS_STATUS CodechalVdencHevcStateG11::CalculatePictureStateCommandSize()
5030 {
5031 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5032
5033 CODECHAL_ENCODE_FUNCTION_ENTER;
5034
5035 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
5036 CODECHAL_ENCODE_CHK_STATUS_RETURN(
5037 m_hwInterface->GetHxxStateCommandSize(
5038 CODECHAL_ENCODE_MODE_HEVC,
5039 &m_defaultPictureStatesSize,
5040 &m_defaultPicturePatchListSize,
5041 &stateCmdSizeParams));
5042
5043 return eStatus;
5044 }
5045
SetPipeBufAddr(PMOS_COMMAND_BUFFER cmdBuffer)5046 MOS_STATUS CodechalVdencHevcStateG11::SetPipeBufAddr(
5047 PMOS_COMMAND_BUFFER cmdBuffer)
5048 {
5049 CODECHAL_ENCODE_FUNCTION_ENTER;
5050
5051 return m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams, cmdBuffer);
5052 }
5053
SetGpuCtxCreatOption()5054 MOS_STATUS CodechalVdencHevcStateG11::SetGpuCtxCreatOption()
5055 {
5056 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5057
5058 CODECHAL_ENCODE_FUNCTION_ENTER;
5059
5060 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
5061 {
5062 CodechalEncoderState::SetGpuCtxCreatOption();
5063 }
5064 else
5065 {
5066 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
5067 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
5068
5069 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
5070 m_scalabilityState,
5071 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
5072 }
5073
5074 return eStatus;
5075 }
5076
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 * tileCodingParams)5077 MOS_STATUS CodechalVdencHevcStateG11::SetTileData(
5078 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11* tileCodingParams)
5079 {
5080 CODECHAL_ENCODE_FUNCTION_ENTER;
5081
5082 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5083
5084 if (!m_hevcPicParams->tiles_enabled_flag)
5085 {
5086 return eStatus;
5087 }
5088
5089 uint32_t colBd[100] = { 0 };
5090 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5091 for (uint32_t i = 0; i < numTileColumns; i++)
5092 {
5093 colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
5094 }
5095
5096 uint32_t rowBd[100] = { 0 };
5097 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
5098 for (uint32_t i = 0; i < numTileRows; i++)
5099 {
5100 rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
5101 }
5102
5103 m_numTiles = numTileRows * numTileColumns;
5104 if (m_numTiles > CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_VDENC_MIN_TILE_WIDTH_SIZE) *
5105 CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_VDENC_MIN_TILE_HEIGHT_SIZE))
5106 {
5107 return MOS_STATUS_INVALID_PARAMETER;
5108 }
5109
5110 uint32_t const numCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
5111 uint32_t numCuRecord = numCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
5112 uint32_t maxBytePerLCU = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5113 maxBytePerLCU = maxBytePerLCU * maxBytePerLCU; // number of pixels per LCU
5114 maxBytePerLCU = maxBytePerLCU * 3 / (m_is10BitHevc ? 1 : 2); //assume 4:2:0 format
5115 uint32_t bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
5116 int32_t frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
5117 int32_t frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
5118 int32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5119 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5120 uint32_t streamInWidthinLCU = MOS_ROUNDUP_DIVIDE((frameWidthInMinCb << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
5121 uint32_t numLcuInPic = 0;
5122
5123 for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
5124 {
5125 for (uint32_t j = 0; j < numTileColumns; j++)
5126 {
5127 numLcuInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
5128 }
5129 }
5130
5131 uint32_t numSliceInTile = 0;
5132 uint64_t activeBitstreamSize = (uint64_t)m_encodeParams.dwBitstreamSize;
5133 // There would be padding at the end of last tile in CBR, reserve dedicated part in the BS buf
5134 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
5135 {
5136 // Assume max padding num < target frame size derived from target bit rate and frame rate
5137 uint32_t actualFrameRate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
5138 uint64_t reservedPart = (uint64_t)m_hevcSeqParams->TargetBitRate / 8 / (uint64_t)actualFrameRate * 1024;
5139
5140 if (reservedPart > activeBitstreamSize)
5141 {
5142 CODECHAL_ENCODE_ASSERTMESSAGE("Frame size cal from target Bit rate is larger than BS buf! Issues in CBR paras!");
5143 return MOS_STATUS_INVALID_PARAMETER;
5144 }
5145
5146 // Capping the reserved part to 1/10 of bs buf size
5147 if (reservedPart > activeBitstreamSize / 10)
5148 {
5149 reservedPart = activeBitstreamSize / 10;
5150 }
5151
5152 activeBitstreamSize -= reservedPart;
5153 }
5154
5155 for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
5156 {
5157 for (uint32_t j = 0; j < numTileColumns; j++)
5158 {
5159 uint32_t idx = i * numTileColumns + j;
5160 uint32_t numLcuInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
5161
5162 tileCodingParams[idx].TileStartLCUX = colBd[j];
5163 tileCodingParams[idx].TileStartLCUY = rowBd[i];
5164
5165 tileCodingParams[idx].TileColumnStoreSelect = j % 2;
5166 tileCodingParams[idx].TileRowStoreSelect = i % 2;
5167
5168 if (j != numTileColumns - 1)
5169 {
5170 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
5171 tileCodingParams[idx].IsLastTileofRow = false;
5172 }
5173 else
5174 {
5175 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
5176 tileCodingParams[idx].IsLastTileofRow = true;
5177
5178 }
5179
5180 if (i != numTileRows - 1)
5181 {
5182 tileCodingParams[idx].IsLastTileofColumn = false;
5183 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
5184 }
5185 else
5186 {
5187 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
5188 tileCodingParams[idx].IsLastTileofColumn = true;
5189 }
5190
5191 tileCodingParams[idx].NumOfTilesInFrame = m_numTiles;
5192 tileCodingParams[idx].NumOfTileColumnsInFrame = numTileColumns;
5193 tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
5194 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
5195 tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1;
5196
5197 tileCodingParams[idx].PakTileStatisticsOffset = 8 * idx;
5198 tileCodingParams[idx].TileSizeStreamoutOffset = idx;
5199 tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
5200 tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource;
5201 tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
5202 tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
5203 tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
5204 tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
5205 tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
5206
5207 uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
5208 uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
5209
5210 //StreamIn data is 4 CLs per LCU
5211 tileCodingParams[idx].TileStreaminOffset = 4 * (tileCodingParams[idx].TileStartLCUY * streamInWidthinLCU + tileCodingParams[idx].TileStartLCUX * tileHeightInLCU);
5212
5213 cuLevelStreamoutOffset += (tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16 / CODECHAL_CACHELINE_SIZE;
5214 sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
5215 saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
5216
5217 uint64_t totalSizeTemp = (uint64_t)activeBitstreamSize * (uint64_t)numLcuInTile;
5218 uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)numLcuInPic) + ((totalSizeTemp % (uint64_t)numLcuInPic) ? 1 : 0);
5219 bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
5220
5221 numLcusInTiles += numLcuInTile;
5222
5223 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
5224 {
5225 bool lastSliceInTile = false, sliceInTile = false;
5226 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
5227 &tileCodingParams[idx],
5228 &sliceInTile,
5229 &lastSliceInTile));
5230 numSliceInTile += (sliceInTile ? 1 : 0);
5231 }
5232 }
5233
5234 // same row store buffer for different tile rows.
5235 saoRowstoreOffset = 0;
5236 sseRowstoreOffset = 0;
5237 }
5238
5239 return eStatus;
5240 }
5241
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile,bool * sliceInTile,bool * lastSliceInTile)5242 MOS_STATUS CodechalVdencHevcStateG11::IsSliceInTile(
5243 uint32_t sliceNumber,
5244 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile,
5245 bool *sliceInTile,
5246 bool *lastSliceInTile)
5247 {
5248 CODECHAL_ENCODE_FUNCTION_ENTER;
5249
5250 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5251
5252 CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
5253 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
5254 CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
5255
5256 if (!m_hevcPicParams->tiles_enabled_flag)
5257 {
5258 *lastSliceInTile = *sliceInTile = true;
5259 return eStatus;
5260 }
5261
5262 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5263 uint32_t residual = (1 << shift) - 1;
5264 uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
5265 uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
5266
5267 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
5268 uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
5269 uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
5270 uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
5271
5272 uint32_t tileColumnWidth = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
5273 uint32_t tileRowHeight = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
5274 if (sliceLCUx < currentTile->TileStartLCUX ||
5275 sliceLCUy < currentTile->TileStartLCUY ||
5276 sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
5277 sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
5278 )
5279 {
5280 // slice start is not in the tile boundary
5281 *lastSliceInTile = *sliceInTile = false;
5282 return eStatus;
5283 }
5284
5285 sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tileColumnWidth;
5286 sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tileColumnWidth;
5287
5288 if (sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth)
5289 {
5290 sliceLCUx -= tileColumnWidth;
5291 sliceLCUy++;
5292 }
5293
5294 if (sliceLCUx < currentTile->TileStartLCUX ||
5295 sliceLCUy < currentTile->TileStartLCUY ||
5296 sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
5297 sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
5298 )
5299 {
5300 // last LCU of the slice is out of the tile boundary
5301 *lastSliceInTile = *sliceInTile = false;
5302 return eStatus;
5303 }
5304
5305 *sliceInTile = true;
5306
5307 sliceLCUx++;
5308 sliceLCUy++;
5309
5310 // the end of slice is at the boundary of tile
5311 *lastSliceInTile = (
5312 sliceLCUx == currentTile->TileStartLCUX + tileColumnWidth &&
5313 sliceLCUy == currentTile->TileStartLCUY + tileRowHeight);
5314
5315 return eStatus;
5316 }
5317
VerifyCommandBufferSize()5318 MOS_STATUS CodechalVdencHevcStateG11::VerifyCommandBufferSize()
5319 {
5320 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5321
5322 CODECHAL_ENCODE_FUNCTION_ENTER;
5323
5324 if (UseRenderCommandBuffer() || m_numPipe == 1)
5325 {
5326 // legacy mode & resize CommandBuffer Size for every BRC pass
5327 if (!m_singleTaskPhaseSupported)
5328 {
5329 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5330 }
5331 return eStatus;
5332 }
5333
5334 // virtual engine
5335 uint32_t requestedSize =
5336 m_pictureStatesSize +
5337 m_extraPictureStatesSize +
5338 (m_sliceStatesSize * m_numSlices);
5339
5340 requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
5341
5342 // Running in the multiple VDBOX mode
5343 int currentPipe = GetCurrentPipe();
5344 if (currentPipe < 0 || currentPipe >= m_numPipe)
5345 {
5346 eStatus = MOS_STATUS_INVALID_PARAMETER;
5347 return eStatus;
5348 }
5349 int currentPass = GetCurrentPass();
5350 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5351 {
5352 eStatus = MOS_STATUS_INVALID_PARAMETER;
5353 return eStatus;
5354 }
5355
5356 if (IsFirstPipe() && m_osInterface->bUsesPatchList)
5357 {
5358 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5359 }
5360
5361 PMOS_COMMAND_BUFFER pCmdBuffer;
5362 if (m_osInterface->phasedSubmission)
5363 {
5364 m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
5365 return eStatus;
5366 }
5367 else
5368 {
5369 pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
5370 }
5371
5372 if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) ||
5373 m_sizeOfVeBatchBuffer < requestedSize)
5374 {
5375 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5376
5377 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5378 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5379 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5380 allocParamsForBufferLinear.Format = Format_Buffer;
5381 allocParamsForBufferLinear.dwBytes = requestedSize;
5382 allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
5383
5384 if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource))
5385 {
5386 if (pCmdBuffer->pCmdBase)
5387 {
5388 m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource);
5389 }
5390 m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource);
5391 }
5392
5393 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5394 m_osInterface,
5395 &allocParamsForBufferLinear,
5396 &pCmdBuffer->OsResource));
5397
5398 m_sizeOfVeBatchBuffer = requestedSize;
5399 }
5400
5401 if (pCmdBuffer->pCmdBase == nullptr)
5402 {
5403 MOS_LOCK_PARAMS lockParams;
5404 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
5405 lockParams.WriteOnly = true;
5406 pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams);
5407 pCmdBuffer->iRemaining = m_sizeOfVeBatchBuffer;
5408 pCmdBuffer->iOffset = 0;
5409
5410 if (pCmdBuffer->pCmdBase == nullptr)
5411 {
5412 eStatus = MOS_STATUS_NULL_POINTER;
5413 return eStatus;
5414 }
5415 }
5416
5417 return eStatus;
5418 }
5419
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)5420 MOS_STATUS CodechalVdencHevcStateG11::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
5421 {
5422 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5423
5424 CODECHAL_ENCODE_FUNCTION_ENTER;
5425
5426 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5427 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
5428
5429 if (UseRenderCommandBuffer() || m_numPipe == 1)
5430 {
5431 // legacy mode
5432 m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
5433 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
5434 return eStatus;
5435 }
5436
5437 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
5438
5439 int currentPipe = GetCurrentPipe();
5440 if (currentPipe < 0 || currentPipe >= m_numPipe)
5441 {
5442 eStatus = MOS_STATUS_INVALID_PARAMETER;
5443 return eStatus;
5444 }
5445 int currentPass = GetCurrentPass();
5446 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5447 {
5448 eStatus = MOS_STATUS_INVALID_PARAMETER;
5449 return eStatus;
5450 }
5451
5452 if (m_osInterface->phasedSubmission)
5453 {
5454 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1));
5455
5456 CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
5457 if (IsLastPipe())
5458 {
5459 cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
5460 }
5461 }
5462 else
5463 {
5464 *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
5465 }
5466
5467 if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
5468 {
5469 // Insert CP Prolog
5470 CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
5471 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
5472 }
5473 return eStatus;
5474 }
5475
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)5476 MOS_STATUS CodechalVdencHevcStateG11::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
5477 {
5478 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5479
5480 CODECHAL_ENCODE_FUNCTION_ENTER;
5481
5482 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5483
5484 if (UseRenderCommandBuffer() || m_numPipe == 1)
5485 {
5486 // legacy mode
5487 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
5488 return eStatus;
5489 }
5490
5491 // virtual engine
5492 int currentPipe = GetCurrentPipe();
5493 if (currentPipe < 0 || currentPipe >= m_numPipe)
5494 {
5495 eStatus = MOS_STATUS_INVALID_PARAMETER;
5496 return eStatus;
5497 }
5498 int currentPass = GetCurrentPass();
5499 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5500 {
5501 eStatus = MOS_STATUS_INVALID_PARAMETER;
5502 return eStatus;
5503 }
5504
5505 if (m_osInterface->phasedSubmission)
5506 {
5507 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1);
5508 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
5509 }
5510 else
5511 {
5512 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
5513 m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
5514 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
5515 }
5516
5517 return eStatus;
5518 }
5519
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)5520 MOS_STATUS CodechalVdencHevcStateG11::SubmitCommandBuffer(
5521 PMOS_COMMAND_BUFFER cmdBuffer,
5522 bool bNullRendering)
5523 {
5524 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5525
5526 CODECHAL_ENCODE_FUNCTION_ENTER;
5527
5528 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5529
5530 if (UseRenderCommandBuffer() || m_numPipe == 1)
5531 {
5532 // legacy mode
5533 if (!UseRenderCommandBuffer() && MOS_VE_SUPPORTED(m_osInterface)) // Set VE Hints for video contexts only
5534 {
5535 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
5536 }
5537 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
5538 return eStatus;
5539 }
5540
5541 bool cmdBufferReadyForSubmit = IsLastPipe();
5542
5543 // In STF, Hold the command buffer submission till last pass
5544 if (m_singleTaskPhaseSupported)
5545 {
5546 cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
5547 }
5548
5549 if(!cmdBufferReadyForSubmit)
5550 {
5551 return eStatus;
5552 }
5553
5554 int currentPass = GetCurrentPass();
5555 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
5556 {
5557 eStatus = MOS_STATUS_INVALID_PARAMETER;
5558 return eStatus;
5559 }
5560
5561 if (m_osInterface->phasedSubmission)
5562 {
5563 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
5564 }
5565 else
5566 {
5567 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
5568
5569 for (uint32_t i = 0; i < m_numPipe; i++)
5570 {
5571 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
5572
5573 if(cmdBuffer->pCmdBase)
5574 {
5575 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
5576 }
5577
5578 cmdBuffer->pCmdBase = 0;
5579 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
5580 }
5581 m_sizeOfVeBatchBuffer = 0;
5582
5583 if(eStatus == MOS_STATUS_SUCCESS)
5584 {
5585 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
5586 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
5587 }
5588 }
5589
5590 return eStatus;
5591 }
5592
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)5593 MOS_STATUS CodechalVdencHevcStateG11::SendPrologWithFrameTracking(
5594 PMOS_COMMAND_BUFFER cmdBuffer,
5595 bool frameTrackingRequested,
5596 MHW_MI_MMIOREGISTERS *mmioRegister)
5597 {
5598 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5599
5600 CODECHAL_ENCODE_FUNCTION_ENTER;
5601
5602 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5603
5604 if (UseRenderCommandBuffer())
5605 {
5606 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
5607 return eStatus;
5608 }
5609
5610 if (!IsLastPipe())
5611 {
5612 return eStatus;
5613 }
5614
5615 PMOS_COMMAND_BUFFER commandBufferInUse;
5616 if (m_realCmdBuffer.pCmdBase)
5617 {
5618 commandBufferInUse = &m_realCmdBuffer;
5619 }
5620 else
5621 if (cmdBuffer && cmdBuffer->pCmdBase)
5622 {
5623 commandBufferInUse = cmdBuffer;
5624 }
5625 else
5626 {
5627 eStatus = MOS_STATUS_INVALID_PARAMETER;
5628 return eStatus;
5629 }
5630
5631 // initialize command buffer attributes
5632 commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
5633 commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
5634 commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
5635 commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
5636 commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
5637
5638 if (frameTrackingRequested && m_frameTrackingEnabled)
5639 {
5640 commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
5641 commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
5642 &m_encodeStatusBuf.resStatusBuffer;
5643 commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
5644 // Set media frame tracking address offset(the offset from the encoder status buffer page)
5645 commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
5646 }
5647
5648 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
5649 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
5650 genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
5651 genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
5652 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
5653 genericPrologParams.dwStoreDataValue = m_storeData - 1;
5654
5655 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
5656
5657 return eStatus;
5658 }
5659
SetSliceStructs()5660 MOS_STATUS CodechalVdencHevcStateG11::SetSliceStructs()
5661 {
5662 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5663 eStatus = CodechalEncodeHevcBase::SetSliceStructs();
5664 m_numPassesInOnePipe = m_numPasses;
5665 m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
5666 return eStatus;
5667 }
5668
AllocateTileStatistics()5669 MOS_STATUS CodechalVdencHevcStateG11::AllocateTileStatistics()
5670 {
5671 CODECHAL_ENCODE_FUNCTION_ENTER;
5672
5673 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5674
5675 if (!m_hevcPicParams->tiles_enabled_flag)
5676 {
5677 return eStatus;
5678 }
5679
5680 auto num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
5681 auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5682 auto num_tiles = num_tile_rows*num_tile_columns;
5683
5684 MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
5685 MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
5686 MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
5687
5688 MOS_LOCK_PARAMS lockFlagsWriteOnly;
5689 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
5690 lockFlagsWriteOnly.WriteOnly = true;
5691
5692 // Set the maximum size based on frame level statistics.
5693 m_hevcStatsSize.uiTileSizeRecord = CODECHAL_CACHELINE_SIZE;
5694 m_hevcStatsSize.uiHevcPakStatistics = m_sizeOfHcpPakFrameStats;
5695 m_hevcStatsSize.uiVdencStatistics = m_vdencEnabled ? CODECHAL_HEVC_VDENC_STATS_SIZE : 0;
5696 m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
5697
5698 // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
5699 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
5700 m_hevcFrameStatsOffset.uiTileSizeRecord = 0; // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
5701 m_hevcFrameStatsOffset.uiHevcPakStatistics = 0;
5702 m_hevcFrameStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
5703 m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
5704
5705 // Frame level statistics
5706 m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu), CODECHAL_PAGE_SIZE);
5707
5708 // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
5709 if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
5710 {
5711 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5712 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5713 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5714 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5715 allocParamsForBufferLinear.Format = Format_Buffer;
5716 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
5717 allocParamsForBufferLinear.pBufName = "HCP Aggregated Frame Statistics Streamout Buffer";
5718
5719 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5720 m_osInterface,
5721 &allocParamsForBufferLinear,
5722 &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
5723 m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
5724
5725 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
5726 m_osInterface,
5727 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
5728 &lockFlagsWriteOnly);
5729
5730 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
5731 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
5732 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
5733 }
5734
5735 // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
5736 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
5737 m_hevcTileStatsOffset.uiTileSizeRecord = 0; // TileReord is in a separated resource
5738 m_hevcTileStatsOffset.uiHevcPakStatistics = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer
5739 m_hevcTileStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
5740 m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
5741 // Combined statistics size for all tiles
5742 m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu, CODECHAL_PAGE_SIZE);
5743
5744 // Tile size record size for all tiles
5745 m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
5746
5747 if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
5748 {
5749 if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
5750 {
5751 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
5752 }
5753 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5754 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5755 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5756 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5757 allocParamsForBufferLinear.Format = Format_Buffer;
5758 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
5759 allocParamsForBufferLinear.pBufName = "HCP Tile Level Statistics Streamout Buffer";
5760
5761 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5762 m_osInterface,
5763 &allocParamsForBufferLinear,
5764 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
5765 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
5766
5767 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
5768 m_osInterface,
5769 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
5770 &lockFlagsWriteOnly);
5771 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
5772
5773 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
5774 m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
5775 }
5776
5777 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
5778 {
5779 if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
5780 {
5781 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
5782 }
5783 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
5784 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
5785 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
5786 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
5787 allocParamsForBufferLinear.Format = Format_Buffer;
5788 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
5789 allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
5790
5791 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
5792 m_osInterface,
5793 &allocParamsForBufferLinear,
5794 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource));
5795 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize;
5796
5797 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
5798 m_osInterface,
5799 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
5800 &lockFlagsWriteOnly);
5801 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
5802
5803 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
5804 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
5805 }
5806
5807 return eStatus;
5808 }
5809
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)5810 void CodechalVdencHevcStateG11::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
5811 {
5812 CODECHAL_ENCODE_FUNCTION_ENTER;
5813
5814 CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
5815
5816 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
5817 if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
5818 {
5819 pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource;
5820 pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
5821 pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
5822 pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
5823 }
5824 }
5825
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)5826 MOS_STATUS CodechalVdencHevcStateG11::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
5827 {
5828 CODECHAL_ENCODE_FUNCTION_ENTER;
5829
5830 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5831
5832 // encodeStatus is offset by 2 DWs in the resource
5833 uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
5834 for (auto i = 0; i < 6; i++) // 64 bit SSE values for luma/ chroma channels need to be copied
5835 {
5836 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
5837 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
5838 miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
5839 miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
5840 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
5841 miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
5842 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
5843 }
5844 return eStatus;
5845 }
5846
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)5847 void CodechalVdencHevcStateG11::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
5848 {
5849 PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBbIndex];
5850 bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
5851
5852 MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
5853 indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
5854 indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
5855 indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
5856 indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
5857 indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
5858 indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
5859 indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
5860 indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
5861 indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
5862 }
5863
HuCLookaheadInit()5864 MOS_STATUS CodechalVdencHevcStateG11::HuCLookaheadInit()
5865 {
5866 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5867
5868 CODECHAL_ENCODE_FUNCTION_ENTER;
5869
5870 m_firstTaskInPhase = !m_singleTaskPhaseSupported;
5871 m_lastTaskInPhase = !m_singleTaskPhaseSupported;
5872
5873 // set DMEM
5874 uint32_t initVbvFullness = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
5875 MOS_LOCK_PARAMS lockFlagsWriteOnly;
5876 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
5877 lockFlagsWriteOnly.WriteOnly = true;
5878
5879 auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
5880 m_osInterface, &m_vdencLaInitDmemBuffer, &lockFlagsWriteOnly);
5881 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
5882 MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
5883
5884 uint8_t downscaleRatioIndicator = 2; // 4x downscaling
5885 if (m_hevcPicParams->DownScaleRatio.fields.X16Minus1_X == 15 && m_hevcPicParams->DownScaleRatio.fields.X16Minus1_Y == 15)
5886 {
5887 downscaleRatioIndicator = 0; // no downscaling
5888 }
5889
5890 dmem->lookAheadFunc = 0;
5891 dmem->lengthAhead = m_lookaheadDepth;
5892 dmem->vbvBufferSize = m_hevcSeqParams->VBVBufferSizeInBit / m_averageFrameSize;
5893 dmem->vbvInitialFullness = initVbvFullness / m_averageFrameSize;
5894 dmem->statsRecords = m_numLaDataEntry;
5895 dmem->avgFrameSizeInByte = m_averageFrameSize >> 3;
5896 dmem->downscaleRatio = downscaleRatioIndicator;
5897 dmem->PGop = 4;
5898 dmem->maxGop = m_hevcSeqParams->MaxAdaptiveGopPicSize;
5899 dmem->minGop = m_hevcSeqParams->MinAdaptiveGopPicSize;
5900 dmem->adaptiveIDR = (uint8_t)m_lookaheadAdaptiveI;
5901
5902 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaInitDmemBuffer);
5903
5904 // set HuC regions
5905 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5906 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5907 virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
5908 virtualAddrParams.regionParams[0].isWritable = true;
5909
5910 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
5911 if (m_swLaMode)
5912 {
5913 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
5914 m_debugInterface,
5915 m_swLaMode,
5916 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
5917 &m_vdencLaInitDmemBuffer,
5918 nullptr,
5919 &virtualAddrParams));
5920
5921 return eStatus;
5922 }
5923 #endif
5924
5925 MOS_COMMAND_BUFFER cmdBuffer;
5926 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5927
5928 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
5929 {
5930 // Send command buffer header at the beginning (OS dependent)
5931 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
5932 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5933 }
5934
5935 // load kernel from WOPCM into L2 storage RAM
5936 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5937 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5938 imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
5939
5940 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5941
5942 // pipe mode select
5943 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5944 pipeModeSelectParams.Mode = m_mode;
5945 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5946
5947 // set HuC DMEM param
5948 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5949 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5950 dmemParams.presHucDataSource = &m_vdencLaInitDmemBuffer;
5951 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
5952 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5953 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5954 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
5955 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
5956 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
5957
5958 // wait Huc completion (use HEVC bit for now)
5959 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5960 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5961 vdPipeFlushParams.Flags.bFlushHEVC = 1;
5962 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5963 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
5964
5965 // Flush the engine to ensure memory written out
5966 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5967 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5968 flushDwParams.bVideoPipelineCacheInvalidate = true;
5969 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
5970
5971 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5972 {
5973 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5974 }
5975
5976 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
5977
5978 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5979 {
5980 bool renderingFlags = m_videoContextUsesNullHw;
5981
5982 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
5983 }
5984
5985 return eStatus;
5986 }
5987
HuCLookaheadUpdate()5988 MOS_STATUS CodechalVdencHevcStateG11::HuCLookaheadUpdate()
5989 {
5990 uint8_t currentPass = (uint8_t)GetCurrentPass();
5991 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5992
5993 CODECHAL_ENCODE_FUNCTION_ENTER;
5994
5995 m_firstTaskInPhase = !m_singleTaskPhaseSupported;
5996 m_lastTaskInPhase = (currentPass == m_numPasses);
5997
5998 // set DMEM
5999 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6000 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6001 lockFlagsWriteOnly.WriteOnly = true;
6002
6003 auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
6004 m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6005 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
6006 MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
6007
6008 dmem->lookAheadFunc = 1;
6009 dmem->validStatsRecords = m_numValidLaRecords;
6010 dmem->offset = (m_numLaDataEntry + m_currLaDataIdx + 1 - m_numValidLaRecords) % m_numLaDataEntry;
6011 dmem->cqmQpThreshold = m_cqmQpThreshold;
6012 dmem->currentPass = currentPass;
6013
6014 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
6015
6016 // set HuC regions
6017 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
6018 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
6019 virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
6020 virtualAddrParams.regionParams[0].isWritable = true;
6021 virtualAddrParams.regionParams[1].presRegion = &m_vdencLaStatsBuffer;
6022 virtualAddrParams.regionParams[2].presRegion = &m_vdencLaDataBuffer;
6023 virtualAddrParams.regionParams[2].isWritable = true;
6024
6025 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
6026 if (m_swLaMode)
6027 {
6028 bool isLaAnalysisRequired = true;
6029 MOS_LOCK_PARAMS lockFlags;
6030 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6031 lockFlags.ReadOnly = true;
6032
6033 if (!IsFirstPass())
6034 {
6035 uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resPakMmioBuffer, &lockFlags);
6036 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6037 isLaAnalysisRequired = (*data == CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK);
6038 m_osInterface->pfnUnlockResource(m_osInterface, &m_resPakMmioBuffer);
6039 }
6040
6041 if (isLaAnalysisRequired)
6042 {
6043 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
6044 m_debugInterface,
6045 m_swLaMode,
6046 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6047 &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
6048 &m_resPakMmioBuffer,
6049 &virtualAddrParams));
6050
6051 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
6052 uint32_t baseOffset = (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize);
6053
6054 CodechalVdencHevcLaData *data = (CodechalVdencHevcLaData *)m_osInterface->pfnLockResource(m_osInterface, &m_vdencLaDataBuffer, &lockFlags);
6055 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6056
6057 LookaheadReport *lookaheadStatus = (LookaheadReport *)(encodeStatusBuf.pEncodeStatus + baseOffset + encodeStatusBuf.dwLookaheadStatusOffset);
6058 lookaheadStatus->targetFrameSize = data[dmem->offset].targetFrameSize;
6059 lookaheadStatus->targetBufferFulness = data[dmem->offset].targetBufferFulness;
6060 lookaheadStatus->encodeHints = data[dmem->offset].encodeHints;
6061 lookaheadStatus->pyramidDeltaQP = data[dmem->offset].pyramidDeltaQP;
6062
6063 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaDataBuffer);
6064 }
6065
6066 return eStatus;
6067 }
6068 #endif
6069
6070 MOS_COMMAND_BUFFER cmdBuffer;
6071 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6072
6073 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
6074 {
6075 // Send command buffer header at the beginning (OS dependent)
6076 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
6077 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
6078 }
6079
6080 if (!IsFirstPass() && m_firstTaskInPhase)
6081 {
6082 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
6083 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
6084 MOS_ZeroMemory(
6085 &miConditionalBatchBufferEndParams,
6086 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
6087 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
6088 &m_resPakMmioBuffer;
6089 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
6090 &cmdBuffer,
6091 &miConditionalBatchBufferEndParams));
6092 }
6093
6094 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
6095
6096 // load kernel from WOPCM into L2 storage RAM
6097 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
6098 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
6099 imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
6100
6101 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
6102
6103 // pipe mode select
6104 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
6105 pipeModeSelectParams.Mode = m_mode;
6106 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
6107
6108 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
6109
6110 // set HuC DMEM param
6111 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
6112 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
6113 dmemParams.presHucDataSource = &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass];
6114 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
6115 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6116 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
6117 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
6118 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6119 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6120
6121 // wait Huc completion (use HEVC bit for now)
6122 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6123 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6124 vdPipeFlushParams.Flags.bFlushHEVC = 1;
6125 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6126 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6127
6128 // Flush the engine to ensure memory written out
6129 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6130 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6131 flushDwParams.bVideoPipelineCacheInvalidate = true;
6132 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6133
6134 if (IsFirstPass())
6135 {
6136 // Write HUC_STATUS mask: DW1 (mask value)
6137 MHW_MI_STORE_DATA_PARAMS storeDataParams;
6138 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6139 storeDataParams.pOsResource = &m_resPakMmioBuffer;
6140 storeDataParams.dwResourceOffset = sizeof(uint32_t);
6141 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
6142 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
6143
6144 // store HUC_STATUS register: DW0 (actual value)
6145 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
6146 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
6147 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
6148 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
6149 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
6150 storeRegParams.dwOffset = 0;
6151 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
6152 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
6153 }
6154
6155 // Write lookahead status to encode status buffer
6156 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
6157 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
6158 uint32_t baseOffset =
6159 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
6160 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
6161 miCpyMemMemParams.presSrc = &m_vdencLaDataBuffer;
6162 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, encodeHints);
6163 miCpyMemMemParams.presDst = &encodeStatusBuf.resStatusBuffer;
6164 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, encodeHints);
6165 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6166 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetFrameSize);
6167 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetFrameSize);
6168 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6169 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetBufferFulness);
6170 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetBufferFulness);
6171 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6172 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, pyramidDeltaQP);
6173 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, pyramidDeltaQP);
6174 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
6175
6176 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6177 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6178
6179 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
6180
6181 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6182 {
6183 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6184 }
6185
6186 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
6187
6188 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6189 {
6190 bool renderingFlags = m_videoContextUsesNullHw;
6191 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6192 }
6193
6194 return eStatus;
6195 }
6196
AnalyzeLookaheadStats()6197 MOS_STATUS CodechalVdencHevcStateG11::AnalyzeLookaheadStats()
6198 {
6199 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6200
6201 CODECHAL_ENCODE_FUNCTION_ENTER;
6202
6203 if (IsFirstPass())
6204 {
6205 m_numValidLaRecords++;
6206 }
6207
6208 if (m_lookaheadInit)
6209 {
6210 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadInit());
6211 m_lookaheadInit = false;
6212 }
6213
6214 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
6215 if (IsLastPass() && (m_numValidLaRecords >= m_lookaheadDepth))
6216 {
6217 m_numValidLaRecords--;
6218 m_lookaheadReport = true;
6219 }
6220
6221 if (m_hevcPicParams->bLastPicInStream)
6222 {
6223 // Flush the last frames
6224 while (m_numValidLaRecords > 0)
6225 {
6226 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
6227 m_numValidLaRecords--;
6228 }
6229 }
6230
6231 return eStatus;
6232 }
6233
HuCBrcInitReset()6234 MOS_STATUS CodechalVdencHevcStateG11::HuCBrcInitReset()
6235 {
6236 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6237
6238 CODECHAL_ENCODE_FUNCTION_ENTER;
6239
6240 MOS_COMMAND_BUFFER cmdBuffer;
6241 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6242
6243 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && (m_numPipe == 1))
6244 {
6245 // Send command buffer header at the beginning (OS dependent)
6246 bool requestFrameTracking = m_singleTaskPhaseSupported ?
6247 m_firstTaskInPhase : 0;
6248 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
6249 }
6250
6251 // load kernel from WOPCM into L2 storage RAM
6252 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
6253 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
6254 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
6255
6256 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
6257
6258 // pipe mode select
6259 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
6260 pipeModeSelectParams.Mode = m_mode;
6261 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
6262
6263 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
6264
6265 // set HuC DMEM param
6266 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
6267 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
6268 dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
6269 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
6270 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6271 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
6272
6273 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
6274 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
6275 virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
6276 virtualAddrParams.regionParams[0].isWritable = true;
6277 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
6278
6279 // Store HUC_STATUS2 register bit 6 before HUC_Start command
6280 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
6281 // (HUC_Start command with last start bit set).
6282 CODECHAL_DEBUG_TOOL(
6283 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
6284 )
6285
6286 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6287
6288 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6289
6290 // wait Huc completion (use HEVC bit for now)
6291 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6292 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6293 vdPipeFlushParams.Flags.bFlushHEVC = 1;
6294 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6295 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6296
6297 // Flush the engine to ensure memory written out
6298 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6299 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6300 flushDwParams.bVideoPipelineCacheInvalidate = true;
6301 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6302
6303 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
6304 {
6305 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6306 }
6307
6308 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
6309
6310 if (!m_singleTaskPhaseSupported)
6311 {
6312 bool renderingFlags = m_videoContextUsesNullHw;
6313
6314 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6315 &cmdBuffer,
6316 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
6317 nullptr)));
6318
6319 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6320 }
6321
6322 CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
6323
6324 return eStatus;
6325 }
6326
ConstructHucCmdForBRC(PMOS_RESOURCE batchBuffer)6327 MOS_STATUS CodechalVdencHevcStateG11::ConstructHucCmdForBRC(PMOS_RESOURCE batchBuffer)
6328 {
6329 MOS_COMMAND_BUFFER cmdBuffer;
6330 int32_t currentPass = GetCurrentPass();
6331 uint16_t len = 0;
6332
6333 MOS_LOCK_PARAMS lockFlags;
6334 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6335 lockFlags.ReadOnly = true;
6336
6337 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
6338 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6339
6340 CodechalCmdInitializerG11* pCmdInitializerG11 = static_cast<CodechalCmdInitializerG11*>(m_hucCmdInitializer);
6341 len = m_cmd2StartInBytes - m_picStateCmdStartInBytes;
6342 pCmdInitializerG11->AddCmdConstData(
6343 CODECHAL_CMD5,
6344 (uint32_t*)(data + m_picStateCmdStartInBytes),
6345 len,
6346 m_picStateCmdStartInBytes);
6347
6348 m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
6349
6350 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6351 CODECHAL_ENCODE_CHK_STATUS_RETURN(
6352 m_hucCmdInitializer->CmdInitializerExecute(true, &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass], &cmdBuffer));
6353 ReturnCommandBuffer(&cmdBuffer);
6354
6355 if (!m_singleTaskPhaseSupported)
6356 {
6357 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6358 &cmdBuffer,
6359 CODECHAL_NUM_MEDIA_STATES,
6360 "HucCmd")));
6361
6362 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, m_videoContextUsesNullHw));
6363 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->DumpHucCmdInit(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass])));
6364 }
6365
6366 return MOS_STATUS_SUCCESS;
6367 }
6368
HuCBrcUpdate()6369 MOS_STATUS CodechalVdencHevcStateG11::HuCBrcUpdate()
6370 {
6371 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6372
6373 CODECHAL_ENCODE_FUNCTION_ENTER;
6374
6375 int32_t currentPass = GetCurrentPass();
6376 if (currentPass < 0)
6377 {
6378 eStatus = MOS_STATUS_INVALID_PARAMETER;
6379 return eStatus;
6380 }
6381
6382 MOS_COMMAND_BUFFER cmdBuffer;
6383
6384 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
6385 //For Group 3 cmds, they are constructed by driver, separate them into m_vdencGroup3BatchBuffer to avoid surface misorder under CP use case.
6386 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRCForGroup3(&m_vdencGroup3BatchBuffer[m_currRecycledBufIdx][currentPass]));
6387
6388 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructHucCmdForBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
6389
6390 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
6391 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && (m_numPipe == 1))
6392 {
6393 // Send command buffer header at the beginning (OS dependent)
6394 bool requestFrameTracking = m_singleTaskPhaseSupported ?
6395 m_firstTaskInPhase : 0;
6396 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
6397 }
6398
6399 // load kernel from WOPCM into L2 storage RAM
6400 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
6401 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
6402
6403 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC
6404 {
6405 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
6406 }
6407 else
6408 {
6409 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
6410 }
6411
6412 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
6413
6414 // pipe mode select
6415 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
6416 pipeModeSelectParams.Mode = m_mode;
6417 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
6418
6419 // DMEM set
6420 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
6421
6422 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
6423 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
6424 dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
6425 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
6426 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6427
6428 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
6429
6430 // Set Const Data buffer
6431 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
6432
6433 // Add Virtual addr
6434 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
6435 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
6436
6437 // Store HUC_STATUS2 register bit 6 before HUC_Start command
6438 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
6439 // (HUC_Start command with last start bit set).
6440 CODECHAL_DEBUG_TOOL(
6441 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
6442 )
6443
6444 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6445
6446 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6447
6448 // wait Huc completion (use HEVC bit for now)
6449 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6450 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6451 vdPipeFlushParams.Flags.bFlushHEVC = 1;
6452 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6453 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6454
6455 // Flush the engine to ensure memory written out
6456 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6457 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6458 flushDwParams.bVideoPipelineCacheInvalidate = true;
6459 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6460
6461 // Write HUC_STATUS mask: DW1 (mask value)
6462 MHW_MI_STORE_DATA_PARAMS storeDataParams;
6463 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6464 storeDataParams.pOsResource = &m_resPakMmioBuffer;
6465 storeDataParams.dwResourceOffset = sizeof(uint32_t);
6466 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
6467 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
6468
6469 // store HUC_STATUS register: DW0 (actual value)
6470 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
6471 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
6472 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
6473 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
6474 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
6475 storeRegParams.dwOffset = 0;
6476 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
6477 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
6478
6479 // DW0 & DW1 will considered together for conditional batch buffer end cmd later
6480 if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
6481 {
6482 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6483 }
6484
6485 // HuC Input
6486 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
6487
6488 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
6489
6490 if (!m_singleTaskPhaseSupported)
6491 {
6492 bool renderingFlags = m_videoContextUsesNullHw;
6493
6494 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6495 &cmdBuffer,
6496 CODECHAL_MEDIA_STATE_BRC_UPDATE,
6497 nullptr)));
6498
6499 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6500 }
6501
6502 // HuC Output
6503 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
6504
6505 return eStatus;
6506 }
6507
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)6508 void CodechalVdencHevcStateG11::SetVdencPipeBufAddrParams(
6509 MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
6510 {
6511 CODECHAL_ENCODE_FUNCTION_ENTER;
6512
6513 CodechalVdencHevcState::SetVdencPipeBufAddrParams(pipeBufAddrParams);
6514
6515 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
6516 if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource))
6517 {
6518 pipeBufAddrParams.presVdencStreamOutBuffer = &tileStatisticsBuffer->sResource;
6519 pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_hevcTileStatsOffset.uiVdencStatistics;
6520 }
6521 }
6522
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)6523 MOS_STATUS CodechalVdencHevcStateG11::UpdateCmdBufAttribute(
6524 PMOS_COMMAND_BUFFER cmdBuffer,
6525 bool renderEngineInUse)
6526 {
6527 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6528
6529 // should not be there. Will remove it in the next change
6530 CODECHAL_ENCODE_FUNCTION_ENTER;
6531 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
6532 {
6533 PMOS_CMD_BUF_ATTRI_VE attriExt =
6534 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
6535
6536 memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
6537 attriExt->bUseVirtualEngineHint =
6538 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
6539 }
6540
6541 return eStatus;
6542 }
6543
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)6544 MOS_STATUS CodechalVdencHevcStateG11::SetAndPopulateVEHintParams(
6545 PMOS_COMMAND_BUFFER cmdBuffer)
6546 {
6547 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6548
6549 CODECHAL_ENCODE_FUNCTION_ENTER;
6550
6551 if (!MOS_VE_SUPPORTED(m_osInterface))
6552 {
6553 return eStatus;
6554 }
6555
6556 CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
6557 MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
6558
6559 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
6560 {
6561 scalSetParms.bNeedSyncWithPrevious = true;
6562 }
6563
6564 int32_t currentPass = GetCurrentPass();
6565 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
6566 {
6567 eStatus = MOS_STATUS_INVALID_PARAMETER;
6568 return eStatus;
6569 }
6570 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
6571 if (m_numPipe >= 2)
6572 {
6573 for (auto i = 0; i < m_numPipe; i++)
6574 {
6575 scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
6576 }
6577 }
6578
6579 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
6580 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
6581 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
6582
6583 return eStatus;
6584 }
6585
6586 #if USE_CODECHAL_DEBUG_TOOL
DumpHucPakIntegrate()6587 MOS_STATUS CodechalVdencHevcStateG11::DumpHucPakIntegrate()
6588 {
6589 int32_t currentPass = GetCurrentPass();
6590 // HuC Input
6591 // HuC DMEM
6592 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
6593 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
6594 MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG11), CODECHAL_CACHELINE_SIZE),
6595 currentPass,
6596 hucRegionDumpPakIntegrate));
6597
6598 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6599 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
6600 0,
6601 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
6602 0,
6603 "",
6604 true,
6605 currentPass,
6606 hucRegionDumpPakIntegrate));
6607
6608 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6609 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
6610 0,
6611 m_resHuCPakAggregatedFrameStatsBuffer.dwSize,
6612 1,
6613 "",
6614 true,
6615 currentPass,
6616 hucRegionDumpPakIntegrate));
6617
6618 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6619 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
6620 0,
6621 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
6622 15,
6623 "",
6624 true,
6625 currentPass,
6626 hucRegionDumpPakIntegrate));
6627
6628 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6629 &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource,
6630 0,
6631 m_vdenc2ndLevelBatchBufferSize[m_currRecycledBufIdx],
6632 7,
6633 "",
6634 true,
6635 currentPass,
6636 hucRegionDumpPakIntegrate));
6637
6638 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6639 &m_resBitstreamBuffer,
6640 0,
6641 m_encodeParams.dwBitstreamSize,
6642 5,
6643 "",
6644 false,
6645 currentPass,
6646 hucRegionDumpPakIntegrate));
6647
6648 // Region 6 - BRC History buffer
6649 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6650 &m_vdencBrcHistoryBuffer,
6651 0,
6652 CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
6653 6,
6654 "",
6655 false,
6656 currentPass,
6657 hucRegionDumpPakIntegrate));
6658
6659 // Region 9 - HCP BRC Data Output
6660 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
6661 &m_resBrcDataBuffer,
6662 0,
6663 CODECHAL_CACHELINE_SIZE,
6664 9,
6665 "",
6666 false,
6667 currentPass,
6668 hucRegionDumpPakIntegrate));
6669
6670 return MOS_STATUS_SUCCESS;
6671 }
6672
DumpHucCqp()6673 MOS_STATUS CodechalVdencHevcStateG11::DumpHucCqp()
6674 {
6675 CODECHAL_ENCODE_FUNCTION_ENTER;
6676 int32_t currentPass = GetCurrentPass();
6677
6678 // Region 5 - Output SLB Buffer
6679 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6680 &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource,
6681 0,
6682 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
6683 5,
6684 "_Out_Slb",
6685 false,
6686 currentPass,
6687 hucRegionDumpUpdate));
6688
6689 return MOS_STATUS_SUCCESS;
6690 }
6691
DumpVdencOutputs()6692 MOS_STATUS CodechalVdencHevcStateG11::DumpVdencOutputs()
6693 {
6694 CODECHAL_ENCODE_FUNCTION_ENTER;
6695
6696 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::DumpVdencOutputs());
6697
6698 if (m_hevcPicParams->tiles_enabled_flag)
6699 {
6700 PMOS_RESOURCE presVdencTileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
6701 auto num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
6702 auto vdencStatsSizeAllTiles = num_tiles * m_vdencBrcStatsBufferSize;
6703 auto vdencStatsOffset = m_hevcTileStatsOffset.uiVdencStatistics;
6704
6705 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6706 presVdencTileStatisticsBuffer,
6707 CodechalDbgAttr::attrVdencOutput,
6708 "_TileStats",
6709 vdencStatsSizeAllTiles,
6710 vdencStatsOffset,
6711 CODECHAL_NUM_MEDIA_STATES));
6712
6713 // Slice Size Conformance
6714 if (m_hevcSeqParams->SliceSizeControl)
6715 {
6716 PMOS_RESOURCE presLcuBaseAddressBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
6717 auto sliceStreamoutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
6718 uint32_t size = m_numLcu * CODECHAL_CACHELINE_SIZE;
6719 // Slice Size StreamOut Surface
6720 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6721 presLcuBaseAddressBuffer,
6722 CodechalDbgAttr::attrVdencOutput,
6723 "_SliceSize",
6724 size,
6725 sliceStreamoutOffset,
6726 CODECHAL_NUM_MEDIA_STATES));
6727 }
6728 }
6729 return MOS_STATUS_SUCCESS;
6730 }
6731 #endif
6732