1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     vphal_render_composite_g8.cpp
24 //! \brief    Composite related VPHAL functions
25 //! \details  Unified VP HAL Composite module including render initialization,
26 //!           resource allocation/free and rendering
27 //!
28 #include "vphal_render_composite_g8.h"
29 
30 #define VPHAL_COMP_WA_BDW_GT2_IEF_THREAD_LIMIT  96
31 
SubmitStatesFillGenSpecificStaticData(PVPHAL_RENDERING_DATA_COMPOSITE pRenderingData,PVPHAL_SURFACE pTarget,MEDIA_OBJECT_KA2_STATIC_DATA * pStatic)32 void CompositeStateG8::SubmitStatesFillGenSpecificStaticData(
33     PVPHAL_RENDERING_DATA_COMPOSITE     pRenderingData,
34     PVPHAL_SURFACE                      pTarget,
35     MEDIA_OBJECT_KA2_STATIC_DATA        *pStatic)
36 {
37     PVPHAL_SURFACE                      pSurface;
38 
39     //Set shift offset for interlace scaling
40     //Vertical Frame Origin for Layer 0 - Layer 7
41     //Format = Single precision floating point
42     pSurface = pRenderingData->pLayers[0]; // only using primary layer [0]
43     if (nullptr != pSurface && pSurface->bInterlacedScaling)
44     {
45         if (pSurface->SampleType == SAMPLE_INTERLEAVED_EVEN_FIRST_TOP_FIELD || pSurface->SampleType == SAMPLE_INTERLEAVED_ODD_FIRST_TOP_FIELD)
46         {
47             //use the cropping size, not the surface size
48             pStatic->DW11.TopBottomDelta = (float)(1.0 / (pSurface->rcDst.bottom - pSurface->rcDst.top) - 1.0 / (pSurface->rcSrc.bottom - pSurface->rcSrc.top));
49         }
50         else
51         {
52             pStatic->DW11.TopBottomDelta = (float)(-(1.0 / (pSurface->rcDst.bottom - pSurface->rcDst.top) - 1.0 / (pSurface->rcSrc.bottom - pSurface->rcSrc.top)));
53         }
54     }
55 
56     // Set ChromaSitting
57     pStatic->DW10.ChromaSitingLocation = GetOutputChromaSitting(pTarget);
58 
59     if (pRenderingData->iLayers > 0)
60     {
61         pStatic->DW09.ObjKa2Gen8.IEFByPassEnable = pRenderingData->pLayers[0]->bIEF ? false : true;
62     }
63 
64     // Set alpha calculation flag. The bit definitions are different for GEN8 and GEN9+.
65     // Set Bit-17
66     pStatic->DW09.ObjKa2Gen8.AlphaChannelCalculation = pRenderingData->bAlphaCalculateEnable ? true : false;
67 }
68 
GetPlaneOffsetOverrideParam(PRENDERHAL_SURFACE pRenderHalSurface,PRENDERHAL_SURFACE_STATE_PARAMS pParams,PRENDERHAL_OFFSET_OVERRIDE pOverride)69 PRENDERHAL_OFFSET_OVERRIDE CompositeStateG8::GetPlaneOffsetOverrideParam(
70     PRENDERHAL_SURFACE              pRenderHalSurface,
71     PRENDERHAL_SURFACE_STATE_PARAMS pParams,
72     PRENDERHAL_OFFSET_OVERRIDE      pOverride)
73 {
74     uint32_t                        uBytesPerPixelShift  = 0;
75     uint32_t                        uYPlaneTopLvlIndexY  = 0;
76     uint32_t                        uYPlane2ndLvlIndexY  = 0;
77     uint32_t                        uYPlaneTopLvlIndexX  = 0;
78     uint32_t                        uYPlane2ndLvlIndexX  = 0;
79     uint32_t                        uUVPlaneTopLvlIndexY = 0;
80     uint32_t                        uUVPlane2ndLvlIndexY = 0;
81     uint32_t                        uUVPlaneTopLvlIndexX = 0;
82     uint32_t                        uUVPlane2ndLvlIndexX = 0;
83     uint32_t                        uSurfPitch           = 0;
84     PMOS_SURFACE                    pSurface;
85     RECT                            tempRect;
86     PRENDERHAL_OFFSET_OVERRIDE      returnOverride = nullptr;
87 
88     if ((pRenderHalSurface == nullptr)          ||
89         (pParams == nullptr)                    ||
90         (pOverride == nullptr))
91     {
92         return nullptr;
93     }
94 
95     pSurface = &pRenderHalSurface->OsSurface;
96 
97     if (!pParams->b32MWColorFillKern)
98     {
99         return nullptr;
100     }
101 
102     // Due to lacking of native 32x32 block support, we use 2x2 x (16x16) instead,
103     // so only the left-top block can perform none zero X/Y origin correctly.
104     // Due to limitation of X/Y offset in Surface State, it can be programmed
105     // within a tile only to make left/top coordinate falling into (0, 0) block.
106     // Adjust Surface Base Address to tile alignment.
107     // The same approach below is applied to both X and Y direction.
108     //
109     // |   32 DWORDs    |   32 DWORDs    | 16 pxls| 16 pxls|
110     // |----------------|----------------|--------|--------|-----
111     // |      tile      |      tile      |        |   left |
112     // |                |                |        |     |  |
113     //                              base address  |     |
114     //                                adjustment  |     |
115     //                                            |     |
116     //                                     X/Y Offset   |
117     //                                  in Surface State|
118     //                                                  |
119     //                                       DestHorizontalBlockOrigin
120 
121     if (pParams->b32MWColorFillKern == true)
122     {
123         uint32_t uiOld_YplaneHeight   = pSurface->dwHeight;
124         tempRect                      = pRenderHalSurface->rcDst;
125         // Backup Original Surface Pitch for surface offset tuning solution later
126         uSurfPitch                    = pSurface->dwPitch;
127 
128         pSurface->YPlaneOffset.iXOffset = tempRect.left;
129         pSurface->YPlaneOffset.iYOffset = tempRect.top;
130 
131         // This is to preserve lowest 4 bits of left/top value for DW69.block original
132         // i.e. pRenderHalSurface->rcDst.left    = tempRect.left % VPHAL_MACROBLOCK_SIZE
133         //      pRenderHalSurface->rcDst.top     = tempRect.top  % VPHAL_MACROBLOCK_SIZE
134         pRenderHalSurface->rcDst.left    = tempRect.left & (VPHAL_MACROBLOCK_SIZE - 1);
135         pRenderHalSurface->rcDst.top     = tempRect.top  & (VPHAL_MACROBLOCK_SIZE - 1);
136 
137         // Due to we offset/shifted surface base address so that we need update width/height and right/bottom
138         pSurface->dwWidth       = pRenderHalSurface->rcDst.right     = tempRect.right  - MOS_ALIGN_FLOOR(tempRect.left, VPHAL_MACROBLOCK_SIZE);
139         pSurface->dwHeight      = pRenderHalSurface->rcDst.bottom    = tempRect.bottom - MOS_ALIGN_FLOOR(tempRect.top, VPHAL_MACROBLOCK_SIZE);
140 
141         switch (pSurface->Format)
142         {
143             case Format_A8B8G8R8:
144             case Format_X8B8G8R8:
145             case Format_A8R8G8B8:
146             case Format_X8R8G8B8:
147             case Format_R10G10B10A2:
148                     uBytesPerPixelShift = 2;    // 4 bytes per pixel
149                break;
150             // packeted format
151             case Format_YUY2:
152             case Format_YUYV:
153             case Format_YVYU:
154             case Format_UYVY:
155             case Format_VYUY:
156                     uBytesPerPixelShift = 1;    // 2 bytes per pixel
157                 break;
158             // planar format
159             case Format_P010:
160             case Format_P016:
161                     uBytesPerPixelShift = 1;    // 2 bytes per pixel
162                 break;
163             // planar format
164             case Format_NV12:
165                     uBytesPerPixelShift = 0;    // 1 bytes per pixel
166                 break;
167             default:
168                     uBytesPerPixelShift = 0;
169                 break;
170         }
171 
172         // # of tiles in Y direction for base address adjustment
173         uYPlaneTopLvlIndexY     = tempRect.top >> VPHAL_YTILE_H_SHIFTBITS;
174 
175         // Yoffset within tile and above 16x16 block
176         // It's to retrieve 2ndLvlIndex bits field through bitwise operations.
177         // The intersection part of two AND MASKS would be data bits what we need.
178         uYPlane2ndLvlIndexY     = (tempRect.top & (VPHAL_YTILE_H_ALIGNMENT - 1)) &
179                 ~(VPHAL_MACROBLOCK_SIZE - 1);
180 
181         // # of tiles in X direction for base address adjustment
182         // it's to simply shifting left VPHAL_YTILE_W_SHIFTBITS and
183         // then right shifting uBytesPerPixelShift number of bits
184         uYPlaneTopLvlIndexX     = tempRect.left >> (VPHAL_YTILE_W_SHIFTBITS - uBytesPerPixelShift);
185 
186         // Xoffset within tile and above 16x16 block, in DWORD
187         uYPlane2ndLvlIndexX     = ((tempRect.left &
188                 ((VPHAL_YTILE_W_ALIGNMENT >> uBytesPerPixelShift)-1)) &
189                 ~(VPHAL_MACROBLOCK_SIZE - 1)) >> (2 - uBytesPerPixelShift);
190 
191         // NV12/P010/P016 is using two planes so that we have to caculate TopLvl/2ndLvl index for UV plane
192         if (pSurface->Format == Format_NV12 ||
193             pSurface->Format == Format_P010 ||
194             pSurface->Format == Format_P016)
195         {
196             //      (original) old Y plane base ->| +------------------+     |
197             //                                    | |                  |     |uiOld_YplaneHeight
198             //                                    | |                  |     |
199             //                                   ......   Y-plane     ...   ...
200             //        (rebased) new Y plane base->| |(tile-aligned)    |     |  |
201             //                                    | |                  |     |  |
202             // uiOld_YplaneBase_to_New_UVplaneBase| |////colorfill/////|     |  |
203             //                                    | |//////area////////|     |  |uiNew_YplaneHeight
204             //                old UV plane base ->| +------------------+     |  |
205             //                                    | |                  |  |
206             //                 new UV plane base->| |(tile-aligned)    |  |
207             //                                  |   |                  |  |
208             //          32 > YOffsetFor_UV_Plane|   |     UV-plane     |  | uiOldUVplaneBaseToBottom
209             //     16 > new UV plane rect.top|  |   |                  |  | =tempRect.bottom/2
210             //                                    | |                  |  |
211             //                 uiNew_UVplaneHeight| |//colorfill area//|  |
212             //               =uiNew_YplaneHeight/2| +------------------+  |<-UV plane rect.bottom (old == new)
213             //
214 
215             uint32_t uiOldUVplaneBaseToBottom            = tempRect.bottom / 2;
216             uint32_t uiNew_UVplaneHeight                 = pSurface->dwHeight / 2; // (= uiNew_YplaneHeight / 2);
217             uint32_t uiOld_YplaneBase_to_UVplaneBottom   = uiOld_YplaneHeight + uiOldUVplaneBaseToBottom;
218             uint32_t uiOldYBaseToNewUVBase_Unaligned     = uiOld_YplaneBase_to_UVplaneBottom - uiNew_UVplaneHeight;
219 
220             // uiOld_YplaneBase_to_New_UVplaneBase is diff_in_rows(new UV plane base, old Y plane base), which is tile (32-rows) alignment.
221             uint32_t uiOld_YplaneBase_to_New_UVplaneBase = MOS_ALIGN_FLOOR(uiOldYBaseToNewUVBase_Unaligned, VPHAL_YTILE_H_ALIGNMENT);
222 
223             uUVPlaneTopLvlIndexY = tempRect.top >> (VPHAL_YTILE_H_SHIFTBITS + 1);
224             // uUVPlane2ndLvlIndexY is YOffsetFor_UV_Plane, which is 16-rows alignment offset + new UV plane rect.top
225             uUVPlane2ndLvlIndexY = uiOldYBaseToNewUVBase_Unaligned - uiOld_YplaneBase_to_New_UVplaneBase;
226             uUVPlaneTopLvlIndexX = uYPlaneTopLvlIndexX;
227             uUVPlane2ndLvlIndexX = uYPlane2ndLvlIndexX;
228         }
229 
230         // Y plane adjustments/overrides
231         pOverride->iYOffsetAdjust  = uYPlaneTopLvlIndexY * (uSurfPitch / VPHAL_YTILE_W_ALIGNMENT) * MHW_PAGE_SIZE +
232                                     uYPlaneTopLvlIndexX * MHW_PAGE_SIZE;
233         pOverride->iYOffsetX       = uYPlane2ndLvlIndexX;
234         pOverride->iYOffsetY       = uYPlane2ndLvlIndexY;
235 
236         // UV plane adjustments/overrides
237         pOverride->iUVOffsetAdjust = uUVPlaneTopLvlIndexY * (uSurfPitch >> VPHAL_YTILE_W_SHIFTBITS) * MHW_PAGE_SIZE +
238                                     uUVPlaneTopLvlIndexX * MHW_PAGE_SIZE;
239         pOverride->iUVOffsetX      = uUVPlane2ndLvlIndexX;
240         pOverride->iUVOffsetY      = uUVPlane2ndLvlIndexY;
241 
242         // calculation is done, assign return value to the pointer to Override Data
243         returnOverride = pOverride;
244     }
245 
246     return returnOverride;
247 }
248 
GetThreadCountForVfeState(PVPHAL_RENDERING_DATA_COMPOSITE pRenderingData,PVPHAL_SURFACE pTarget)249 int32_t CompositeStateG8::GetThreadCountForVfeState(
250     PVPHAL_RENDERING_DATA_COMPOSITE     pRenderingData,
251     PVPHAL_SURFACE                      pTarget)
252 {
253     // Solution for BDW GT2 IEF performance issue
254     if (!m_pPerfData->CompMaxThreads.bEnabled           &&
255         MEDIA_IS_SKU(m_pRenderHal->pSkuTable, FtrGT2)                &&
256         pRenderingData->pLayers[0]                      &&
257         pRenderingData->pLayers[0]->bIEF)
258     {
259         return waBdwGt2ThreadLimit;
260     }
261     else
262     {
263         return CompositeState::GetThreadCountForVfeState(pRenderingData, pTarget);
264     }
265 }
266