xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/gen8/cm/cm_hal_g8.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_hal_g8.cpp
24 //! \brief     Common HAL CM Gen8 functions
25 //!
26 
27 #include "cm_hal_g8.h"
28 #include "cm_common.h"
29 #include "renderhal_platform_interface.h"
30 #include "mhw_state_heap_hwcmd_g8_X.h"
31 #if defined(ENABLE_KERNELS) && (!defined(_FULL_OPEN_SOURCE))
32 #include "cm_gpucopy_kernel_g8.h"
33 #include "cm_gpuinit_kernel_g8.h"
34 #else
35 unsigned int iGPUCopy_kernel_isa_size_gen8 = 0;
36 unsigned int iGPUInit_kernel_isa_size_Gen8 = 0;
37 unsigned char *pGPUCopy_kernel_isa_gen8 = nullptr;
38 unsigned char *pGPUInit_kernel_isa_Gen8 = nullptr;
39 
40 #endif
41 
42 #define CM_NS_PER_TICK_RENDER_G8        (80)
43 
44 union CM_HAL_MEMORY_OBJECT_CONTROL_G8
45 {
46     struct
47     {
48         uint32_t age          : 2;
49         uint32_t              : 1;
50         uint32_t targetCache  : 2;
51         uint32_t cacheControl : 2;
52         uint32_t              : 25;
53     } Gen8;
54 
55     uint32_t value;
56 };
57 
58 #if (_RELEASE_INTERNAL || _DEBUG)
59 #if defined(CM_DIRECT_GUC_SUPPORT)
SubmitDummyCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)60 MOS_STATUS CM_HAL_G8_X::SubmitDummyCommands(
61     PMHW_BATCH_BUFFER       batchBuffer,
62     int32_t                 taskId,
63     PCM_HAL_KERNEL_PARAM    *kernelParam,
64     void                    **cmdBuffer)
65 {
66     return MOS_STATUS_UNIMPLEMENTED;
67 
68 }
69 #endif
70 #endif
71 
SubmitCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)72 MOS_STATUS CM_HAL_G8_X::SubmitCommands(
73     PMHW_BATCH_BUFFER       batchBuffer,
74     int32_t                 taskId,
75     PCM_HAL_KERNEL_PARAM    *kernelParam,
76     void                    **cmdBuffer)
77 {
78     MOS_STATUS                      eStatus        = MOS_STATUS_SUCCESS;
79     PCM_HAL_STATE                   state          = m_cmState;
80     PMOS_INTERFACE                  osInterface    = m_cmState->osInterface;
81     PRENDERHAL_INTERFACE_LEGACY     renderHal      = m_cmState->renderHal;
82     MhwRenderInterface              *mhwRender     = renderHal->pMhwRenderInterface;
83     PMHW_MI_INTERFACE               mhwMiInterface = renderHal->pMhwMiInterface;
84     PRENDERHAL_STATE_HEAP           stateHeap      = renderHal->pStateHeap;
85     MHW_PIPE_CONTROL_PARAMS         pipeCtrlParams   = g_cRenderHal_InitPipeControlParams;
86     MHW_MEDIA_STATE_FLUSH_PARAM     flushParam      = g_cRenderHal_InitMediaStateFlushParams;
87     MHW_ID_LOAD_PARAMS              idLoadParams;
88     int32_t                         remaining      = 0;
89     bool                            enableWalker    = state->walkerParams.CmWalkerEnable;
90     bool                            enableGpGpu     = state->taskParam->blGpGpuWalkerEnabled;
91     MOS_COMMAND_BUFFER              mosCmdBuffer;
92     uint32_t                        syncTag;
93     int64_t                         *taskSyncLocation;
94     int32_t                         syncOffset;
95     int32_t                         tmp;
96     PCM_HAL_TASK_PARAM              taskParam = state->taskParam;
97     PCM_HAL_BB_ARGS                 bbCmArgs;
98     RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
99     MOS_RESOURCE                    *osResource;
100     uint32_t                        tag;
101     uint32_t                        tagOffset = 0;
102     bool                            slmUsed = false;
103 
104     MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
105 
106     // get the tag
107     tag = renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex);
108 
109     // Get the task sync offset
110     syncOffset     = state->pfnGetTaskSyncLocation(state, taskId);
111 
112     // Initialize the location
113     taskSyncLocation                 = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
114     *taskSyncLocation                = CM_INVALID_INDEX;
115     *(taskSyncLocation + 1)          = CM_INVALID_INDEX;
116     if(state->cbbEnabled)
117     {
118         *(taskSyncLocation + 2)      = tag;
119         *(taskSyncLocation + 3)      = state->renderHal->currentTrackerIndex;
120     }
121 
122     // Update power option of this command;
123     CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnUpdatePowerOption( state, &state->powerOption ) );
124 
125     // Register batch buffer for rendering
126     if (!enableWalker && !enableGpGpu)
127     {
128         CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
129             osInterface,
130             &batchBuffer->OsResource,
131             true,
132             true));
133     }
134 
135     // Register Timestamp Buffer
136     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
137         osInterface,
138         &state->renderTimeStampResource.osResource,
139         true,
140         true));
141 
142     // Allocate all available space, unused buffer will be returned later
143     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
144     remaining = mosCmdBuffer.iRemaining;
145 
146     // Enable preemption flag in the command buffer header
147     // The flag is required for both Middle Batch Buffer(Thread Group) and Middle Thread preemptions.
148     if (enableGpGpu)
149     {
150         if (taskParam->slmSize == 0 && taskParam->hasBarrier == false)
151         {
152             state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption(state->renderHal);
153         }
154     }
155 
156     // use frame tracking to write the tracker ID to CM tracker resource
157     renderHal->trackerProducer.GetLatestTrackerResource(renderHal->currentTrackerIndex, &osResource, &tagOffset);
158     renderHal->pfnSetupPrologParams(renderHal, &genericPrologParams, osResource, tagOffset, tag);
159     FrameTrackerTokenFlat_SetProducer(&stateHeap->pCurMediaState->trackerToken, &renderHal->trackerProducer);
160     FrameTrackerTokenFlat_Merge(&stateHeap->pCurMediaState->trackerToken, renderHal->currentTrackerIndex, tag);
161 
162     // Record registers by unified media profiler in the beginning
163     if (state->perfProfiler != nullptr)
164     {
165         CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectStartCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
166     }
167 
168     //Send the First PipeControl Command to indicate the beginning of execution
169     pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
170     pipeCtrlParams.presDest          = &state->renderTimeStampResource.osResource;
171     pipeCtrlParams.dwResourceOffset  = syncOffset;
172     pipeCtrlParams.dwPostSyncOp      = MHW_FLUSH_WRITE_TIMESTAMP_REG;
173     pipeCtrlParams.dwFlushMode       = MHW_FLUSH_WRITE_CACHE;
174     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
175 
176     // Initialize command buffer and insert prolog
177     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
178 
179     // update tracker tag used with CM tracker resource
180     renderHal->trackerProducer.StepForward(renderHal->currentTrackerIndex);
181 
182     // Increment sync tag
183     syncTag = renderHal->pStateHeap->dwNextTag++;
184 
185     // Check if any task to use SLM
186     for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
187     {
188         if (kernelParam[i]->slmSize > 0)
189         {
190             slmUsed = true;
191             break;
192         }
193     }
194 
195     //Check GPGPU task param
196     if (taskParam->slmSize > 0) {
197         slmUsed = true;
198     }
199 
200     //enable BDW L3 config
201     state->l3Settings.enableSlm = slmUsed;
202     HalCm_GetLegacyRenderHalL3Setting( &state->l3Settings, &renderHal->L3CacheSettings );
203     renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
204     mhwRender->SetL3Cache(&mosCmdBuffer);
205 
206     if (renderHal->bSIPKernel)
207     {
208         CM_CHK_MOSSTATUS_GOTOFINISH(SetupHwDebugControl(renderHal, &mosCmdBuffer));
209     }
210 
211     // Send Pipeline Select command
212     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu));
213 
214     // Send State Base Address command
215     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendStateBaseAddress(renderHal, &mosCmdBuffer));
216 
217     // Send Surface States
218     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSurfaces(renderHal, &mosCmdBuffer));
219 
220     if ( renderHal->bSIPKernel)
221     {
222         // Send SIP State
223         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSipStateCmd(renderHal, &mosCmdBuffer));
224     }
225 
226     // Setup VFE State params. Each Renderer MUST call pfnSetVfeStateParams().
227     // See comment in RenderHal_SetVfeStateParams() for details.
228     tmp = RENDERHAL_USE_MEDIA_THREADS_MAX;
229     if (state->maxHWThreadValues.userFeatureValue != 0)
230     {
231         if( state->maxHWThreadValues.userFeatureValue < renderHal->pHwCaps->dwMaxThreads)
232         {
233             tmp = state->maxHWThreadValues.userFeatureValue;
234         }
235     }
236     else if (state->maxHWThreadValues.apiValue != 0)
237     {
238         if( state->maxHWThreadValues.apiValue < renderHal->pHwCaps->dwMaxThreads)
239         {
240             tmp = state->maxHWThreadValues.apiValue;
241         }
242     }
243 
244     renderHal->pfnSetVfeStateParams(
245         renderHal,
246         MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
247         tmp,
248         state->taskParam->vfeCurbeSize,
249         state->taskParam->urbEntrySize,
250         &state->scoreboardParams);
251 
252     // Send VFE State
253     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaVfeCmd(&mosCmdBuffer,
254                      renderHal->pRenderHalPltInterface->GetVfeStateParameters()));
255 
256     // Send CURBE Load
257     if (state->taskParam->vfeCurbeSize > 0)
258     {
259         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendCurbeLoad(renderHal, &mosCmdBuffer));
260     }
261 
262     // Send Interface Descriptor Load
263     if (state->dshEnabled)
264     {
265         PRENDERHAL_DYNAMIC_STATE dynamicState = ((PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState)->pDynamicState;
266         idLoadParams.dwInterfaceDescriptorStartOffset = dynamicState->memoryBlock.GetOffset() +
267                                                         dynamicState->MediaID.dwOffset;
268         idLoadParams.dwInterfaceDescriptorLength      = dynamicState->MediaID.iCount * stateHeap->dwSizeMediaID;
269     }
270     else
271     {
272         idLoadParams.dwInterfaceDescriptorStartOffset = stateHeap->pCurMediaState->dwOffset + stateHeap->dwOffsetMediaID;
273         idLoadParams.dwInterfaceDescriptorLength      = renderHal->StateHeapSettings.iMediaIDs * stateHeap->dwSizeMediaID;
274     }
275     idLoadParams.pKernelState = nullptr;
276     CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaIDLoadCmd(&mosCmdBuffer, &idLoadParams));
277 
278     if (enableWalker)
279     {
280         // send media walker command, if required
281         for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
282         {
283             // Insert CONDITIONAL_BATCH_BUFFER_END
284             if ( taskParam->conditionalEndBitmap & ((uint64_t)1 << (i)))
285             {
286                 // this could be batch buffer end so need to update sync tag, media state flush, write end timestamp
287 
288                 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSyncTag(renderHal, &mosCmdBuffer));
289 
290                 // WA for BDW/CHV
291                 if (MEDIA_IS_WA(renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
292                 {
293                     flushParam.bFlushToGo = 1;
294                     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
295                 }
296                 else if (MEDIA_IS_WA(renderHal->pWaTable, WaAddMediaStateFlushCmd))
297                 {
298                     flushParam.bFlushToGo = 0;
299                     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
300                 }
301 
302                 // Insert a pipe control for synchronization since this Conditional Batch Buffer End command
303                 // will use value written by previous kernel. Also needed since this may be the Batch Buffer End
304                 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
305                 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
306                 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
307                 pipeCtrlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
308                                 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
309 
310                 // issue a PIPE_CONTROL to write timestamp
311                 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
312                 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
313                 pipeCtrlParams.dwResourceOffset = syncOffset + sizeof(uint64_t);
314                 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
315                 pipeCtrlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
316                                 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
317 
318                 // Insert conditional batch buffer end
319                 mhwMiInterface->AddMiConditionalBatchBufferEndCmd(&mosCmdBuffer, &taskParam->conditionalBBEndParams[i]);
320             }
321 
322             //Insert PIPE_CONTROL at two cases:
323             // 1. synchronization is set
324             // 2. the next kernel has dependency pattern
325             if((i > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (i-1))) ||
326                 (kernelParam[i]->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
327             {
328                 //Insert a pipe control as synchronization
329                 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
330                 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
331                 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
332                 pipeCtrlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
333                 pipeCtrlParams.bInvalidateTextureCache = true;
334                 pipeCtrlParams.bFlushRenderTargetCache = true;
335                 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
336             }
337 
338             CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendMediaWalkerState(state, kernelParam[i], &mosCmdBuffer));
339         }
340 
341         // WA for BDW/CHV
342         if (MEDIA_IS_WA(renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
343         {
344             flushParam.bFlushToGo = 1;
345             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
346         }
347         else if (MEDIA_IS_WA(renderHal->pWaTable, WaAddMediaStateFlushCmd))
348         {
349             flushParam.bFlushToGo = 0;
350             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
351         }
352     }
353     else if (enableGpGpu)
354     {
355         // send GPGPU walker command, if required
356         for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
357         {
358             //Insert PIPE_CONTROL as synchronization if synchronization is set
359             if((i > 0) && (taskParam->syncBitmap & ((uint64_t)1 << (i-1))))
360             {
361                 //Insert a pipe control as synchronization
362                 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
363                 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
364                 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
365                 pipeCtrlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
366                 pipeCtrlParams.bInvalidateTextureCache = true;
367                 pipeCtrlParams.bFlushRenderTargetCache = true;
368                 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
369             }
370 
371             CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendGpGpuWalkerState(state, kernelParam[i], &mosCmdBuffer));
372         }
373 
374         // WA for BDW/CHV
375         if (MEDIA_IS_WA(renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
376         {
377             flushParam.bFlushToGo = 1;
378             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
379         }
380         else if (MEDIA_IS_WA(renderHal->pWaTable, WaAddMediaStateFlushCmd))
381         {
382             flushParam.bFlushToGo = 0;
383             CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
384         }
385 
386     }
387     else
388     {
389         // Send Start batch buffer command
390         CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferStartCmd(
391             &mosCmdBuffer,
392             batchBuffer));
393 
394         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
395         bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
396 
397         if ( (bbCmArgs->refCount == 1) ||
398                  (state->taskParam->reuseBBUpdateMask == 1) )
399         {
400             // Add BB end command
401             mhwMiInterface->AddMiBatchBufferEnd(nullptr, batchBuffer);
402         }
403         else //reuse BB
404         {
405             // Skip BB end command
406             mhwMiInterface->SkipMiBatchBufferEndBb(batchBuffer);
407         }
408 
409         // UnLock the batch buffer
410         if ( (bbCmArgs->refCount == 1) ||
411              (state->taskParam->reuseBBUpdateMask == 1) )
412         {
413             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnUnlockBB(renderHal, batchBuffer));
414         }
415     }
416 
417     // issue a PIPE_CONTROL to flush all caches and the stall the CS before
418     // issuing a PIPE_CONTROL to write the timestamp
419     pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
420     pipeCtrlParams.presDest      = &state->renderTimeStampResource.osResource;
421     pipeCtrlParams.dwPostSyncOp  = MHW_FLUSH_NOWRITE;
422     pipeCtrlParams.dwFlushMode   = MHW_FLUSH_WRITE_CACHE;
423     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
424 
425     if (state->svmBufferUsed)
426     {
427         // Find the SVM slot, patch it into this dummy pipe_control
428         for (uint32_t i = 0; i < state->cmDeviceParam.maxBufferTableSize; i++)
429         {
430             //Only register SVM resource here
431             if (state->bufferTable[i].address)
432             {
433                 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
434                     osInterface,
435                     &state->bufferTable[i].osResource,
436                     true,
437                     false));
438             }
439         }
440     }
441 
442     if ( slmUsed & state->pfnIsWASLMinL3Cache())
443     {
444         //Disable SLM in L3 when command submitted
445         state->l3Settings.enableSlm = false;
446         HalCm_GetLegacyRenderHalL3Setting( &state->l3Settings, &renderHal->L3CacheSettings );
447         renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
448         mhwRender->SetL3Cache(&mosCmdBuffer);
449     }
450 
451     // Send Sync Tag
452     if (!state->dshEnabled || !(enableWalker || enableGpGpu))
453     {
454         CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSyncTag( renderHal, &mosCmdBuffer ) );
455     }
456 
457     // Update tracker resource
458     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnUpdateTrackerResource(state, &mosCmdBuffer, tag));
459 
460     // issue a PIPE_CONTROL to write timestamp
461     syncOffset += sizeof(uint64_t);
462     pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
463     pipeCtrlParams.presDest          = &state->renderTimeStampResource.osResource;
464     pipeCtrlParams.dwResourceOffset  = syncOffset;
465     pipeCtrlParams.dwPostSyncOp      = MHW_FLUSH_WRITE_TIMESTAMP_REG;
466     pipeCtrlParams.dwFlushMode       = MHW_FLUSH_READ_CACHE;
467     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
468 
469     // Record registers by unified media profiler in the end
470     if (state->perfProfiler != nullptr)
471     {
472         CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectEndCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
473     }
474 
475     //Couple to the BB_START , otherwise GPU Hang without it in KMD.
476     CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr));
477 
478     // Return unused command buffer space to OS
479     osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
480 
481 #if MDF_COMMAND_BUFFER_DUMP
482     if (state->dumpCommandBuffer)
483     {
484         state->pfnDumpCommadBuffer(state, &mosCmdBuffer, 0, mhw_state_heap_g8_X::RENDER_SURFACE_STATE_CMD::byteSize);
485     }
486 #endif
487 
488 
489 #if MDF_SURFACE_STATE_DUMP
490     if (state->dumpSurfaceState)
491     {
492         state->pfnDumpSurfaceState(state, 0, mhw_state_heap_g8_X::RENDER_SURFACE_STATE_CMD::byteSize);
493 
494     }
495 #endif
496 
497     CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetGpuTime( state, &state->taskTimeStamp->submitTimeInGpu[ taskId ] ) );
498     CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetGlobalTime( &state->taskTimeStamp->submitTimeInCpu[ taskId ] ) );
499 
500     // Submit command buffer
501     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitCommandBuffer(osInterface,
502         &mosCmdBuffer,
503         state->nullHwRenderCm));
504 
505     if (state->nullHwRenderCm == false)
506     {
507         stateHeap->pCurMediaState->bBusy = true;
508         if ( !enableWalker && !enableGpGpu )
509         {
510             batchBuffer->bBusy     = true;
511             batchBuffer->dwSyncTag = syncTag;
512         }
513     }
514 
515     // reset API call number of HW threads
516     state->maxHWThreadValues.apiValue = 0;
517 
518     // reset EU saturation
519     state->euSaturationEnabled = false;
520 
521     renderHal->bEUSaturationNoSSD   = false;
522 
523     state->pfnReferenceCommandBuffer(&mosCmdBuffer.OsResource, cmdBuffer);
524 
525     eStatus = MOS_STATUS_SUCCESS;
526 
527 finish:
528     // Failed -> discard all changes in Command Buffer
529     if (eStatus != MOS_STATUS_SUCCESS)
530     {
531         // Buffer overflow - display overflow size
532         if (mosCmdBuffer.iRemaining < 0)
533         {
534             CM_ASSERTMESSAGE("Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining);
535         }
536 
537         // Move command buffer back to beginning
538         tmp = remaining - mosCmdBuffer.iRemaining;
539         mosCmdBuffer.iRemaining  = remaining;
540         mosCmdBuffer.iOffset    -= tmp;
541         mosCmdBuffer.pCmdPtr     = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset/sizeof(uint32_t);
542 
543         // Return unused command buffer space to OS
544         osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
545     }
546 
547     return eStatus;
548 }
549 
SetMediaWalkerParams(CM_WALKING_PARAMETERS engineeringParams,PCM_HAL_WALKER_PARAMS walkerParams)550 MOS_STATUS CM_HAL_G8_X::SetMediaWalkerParams(
551     CM_WALKING_PARAMETERS          engineeringParams,
552     PCM_HAL_WALKER_PARAMS          walkerParams)
553 {
554 
555     MEDIA_OBJECT_WALKER_CMD_G6 mediaWalkerCmd;
556     mediaWalkerCmd.DW5.value = engineeringParams.Value[0];
557     walkerParams->scoreboardMask = mediaWalkerCmd.DW5.scoreboardMask;
558 
559     mediaWalkerCmd.DW6.value = engineeringParams.Value[1];
560     walkerParams->colorCountMinusOne = mediaWalkerCmd.DW6.colorCountMinusOne;
561     walkerParams->midLoopUnitX = mediaWalkerCmd.DW6.midLoopUnitX;
562     walkerParams->midLoopUnitY = mediaWalkerCmd.DW6.midLoopUnitY;
563     walkerParams->middleLoopExtraSteps = mediaWalkerCmd.DW6.midLoopExtraSteps;
564 
565     mediaWalkerCmd.DW7.value = engineeringParams.Value[2];
566     walkerParams->localLoopExecCount = mediaWalkerCmd.DW7.localLoopExecCount;
567     walkerParams->globalLoopExecCount = mediaWalkerCmd.DW7.globalLoopExecCount;
568 
569     mediaWalkerCmd.DW8.value = engineeringParams.Value[3];
570     walkerParams->blockResolution.x = mediaWalkerCmd.DW8.blockResolutionX;
571     walkerParams->blockResolution.y = mediaWalkerCmd.DW8.blockResolutionY;
572 
573     mediaWalkerCmd.DW9.value = engineeringParams.Value[4];
574     walkerParams->localStart.x = mediaWalkerCmd.DW9.localStartX;
575     walkerParams->localStart.y = mediaWalkerCmd.DW9.localStartY;
576 
577     mediaWalkerCmd.DW11.value = engineeringParams.Value[6];
578     walkerParams->localOutLoopStride.x = mediaWalkerCmd.DW11.localOuterLoopStrideX;
579     walkerParams->localOutLoopStride.y = mediaWalkerCmd.DW11.localOuterLoopStrideY;
580 
581     mediaWalkerCmd.DW12.value = engineeringParams.Value[7];
582     walkerParams->localInnerLoopUnit.x = mediaWalkerCmd.DW12.localInnerLoopUnitX;
583     walkerParams->localInnerLoopUnit.y = mediaWalkerCmd.DW12.localInnerLoopUnitY;
584 
585     mediaWalkerCmd.DW13.value = engineeringParams.Value[8];
586     walkerParams->globalResolution.x = mediaWalkerCmd.DW13.globalResolutionX;
587     walkerParams->globalResolution.y = mediaWalkerCmd.DW13.globalResolutionY;
588 
589     mediaWalkerCmd.DW14.value = engineeringParams.Value[9];
590     walkerParams->globalStart.x = mediaWalkerCmd.DW14.globalStartX;
591     walkerParams->globalStart.y = mediaWalkerCmd.DW14.globalStartY;
592 
593     mediaWalkerCmd.DW15.value = engineeringParams.Value[10];
594     walkerParams->globalOutlerLoopStride.x = mediaWalkerCmd.DW15.globalOuterLoopStrideX;
595     walkerParams->globalOutlerLoopStride.y = mediaWalkerCmd.DW15.globalOuterLoopStrideY;
596 
597     mediaWalkerCmd.DW16.value = engineeringParams.Value[11];
598     walkerParams->globalInnerLoopUnit.x = mediaWalkerCmd.DW16.globalInnerLoopUnitX;
599     walkerParams->globalInnerLoopUnit.y = mediaWalkerCmd.DW16.globalInnerLoopUnitY;
600 
601     walkerParams->localEnd.x = 0;
602     walkerParams->localEnd.y = 0;
603 
604     return MOS_STATUS_SUCCESS;
605 }
606 
HwSetSurfaceMemoryObjectControl(uint16_t memObjCtl,PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)607 MOS_STATUS CM_HAL_G8_X::HwSetSurfaceMemoryObjectControl(
608     uint16_t                        memObjCtl,
609     PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams )
610 {
611     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
612     PRENDERHAL_INTERFACE            renderHal = m_cmState->renderHal;
613     CM_HAL_MEMORY_OBJECT_CONTROL_G8 cacheType;
614 
615     MOS_ZeroMemory( &cacheType, sizeof( CM_HAL_MEMORY_OBJECT_CONTROL_G8 ) );
616 
617     if ( ( memObjCtl & CM_MEMOBJCTL_CACHE_MASK ) >> 8 == CM_INVALID_MEMOBJCTL )
618     {
619         CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface));
620         cacheType.value = renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface)->CachePolicyGetMemoryObject(nullptr, CM_RESOURCE_USAGE_SurfaceState).DwordValue;
621 
622         // for default value and SVM surface, override the cache control from WB to WT
623         if ( ( ( memObjCtl & 0xF0 ) >> 4 ) == 2 )
624         {
625             cacheType.Gen8.cacheControl = 2;
626         }
627     }
628     else
629     {
630         // Get the cache type of the memory object.
631         // Since memObjCtl is composed with cache type(8:15), memory type(4:7), ages(0:3), rearranging is needed
632         cacheType.Gen8.age = ( memObjCtl & 0xF );
633         cacheType.Gen8.cacheControl = ( memObjCtl & 0xF0 ) >> 4;
634         cacheType.Gen8.targetCache = ( memObjCtl & CM_MEMOBJCTL_CACHE_MASK ) >> 8;
635     }
636 
637     surfStateParams->MemObjCtl = cacheType.value;
638 
639 finish:
640     return eStatus;
641 }
642 
RegisterSampler8x8(PCM_HAL_SAMPLER_8X8_PARAM param)643 MOS_STATUS CM_HAL_G8_X::RegisterSampler8x8(
644     PCM_HAL_SAMPLER_8X8_PARAM    param)
645 {
646     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
647     PMHW_SAMPLER_STATE_PARAM    samplerEntry = nullptr;
648     PCM_HAL_SAMPLER_8X8_ENTRY   sampler8x8Entry = nullptr;
649     PCM_HAL_STATE               state = m_cmState;
650 
651     if (param->sampler8x8State.stateType == CM_SAMPLER8X8_AVS)
652     {
653         for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) {
654             if (!state->samplerTable[i].bInUse) {
655                 samplerEntry = &state->samplerTable[i];
656                 param->handle = (uint32_t)i << 16;
657                 samplerEntry->bInUse = true;
658                 break;
659             }
660         }
661 
662         int16_t samplerIndex = 0;
663         for (uint32_t i = 0; i < state->cmDeviceParam.maxSampler8x8TableSize; i++) {
664             if (!state->sampler8x8Table[i].inUse) {
665                 sampler8x8Entry = &state->sampler8x8Table[i];
666                 samplerIndex = (int16_t)i;
667                 param->handle |= (uint32_t)(i & 0xffff);
668                 sampler8x8Entry->inUse = true;
669                 break;
670             }
671         }
672 
673         if (!samplerEntry || !sampler8x8Entry) {
674             CM_ASSERTMESSAGE("Sampler or AVS table is full");
675             return MOS_STATUS_NULL_POINTER;
676         }
677 
678         //State data from application
679         samplerEntry->SamplerType                  = MHW_SAMPLER_TYPE_AVS;
680         samplerEntry->ElementType                  = MHW_Sampler64Elements;
681         samplerEntry->Avs                          = param->sampler8x8State.avsParam.avsState;
682         samplerEntry->Avs.stateID                  = samplerIndex;
683         samplerEntry->Avs.iTable8x8_Index          = samplerIndex;  // Used for calculating the Media offset of 8x8 table
684         samplerEntry->Avs.pMhwSamplerAvsTableParam = &sampler8x8Entry->sampler8x8State.mhwSamplerAvsTableParam;
685 
686         if (samplerEntry->Avs.EightTapAFEnable)
687             param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = true;
688         else
689             param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = false;
690 
691         RegisterSampler8x8AVSTable(&sampler8x8Entry->sampler8x8State,
692                                    &param->sampler8x8State.avsParam.avsTable);
693 
694         sampler8x8Entry->sampler8x8State.stateType  = CM_SAMPLER8X8_AVS;
695     }
696     else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_MISC)
697     {
698         for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
699         {
700             if (!state->samplerTable[i].bInUse)
701             {
702                 samplerEntry = &state->samplerTable[i];
703                 param->handle = (uint32_t)i << 16;
704                 samplerEntry->bInUse = true;
705                 break;
706             }
707         }
708 
709         if ( samplerEntry == nullptr )
710         {
711             return MOS_STATUS_INVALID_HANDLE;
712         }
713         samplerEntry->SamplerType  = MHW_SAMPLER_TYPE_MISC;
714 
715         samplerEntry->Misc.byteHeight = param->sampler8x8State.miscState.DW0.Height;
716         samplerEntry->Misc.byteWidth  = param->sampler8x8State.miscState.DW0.Width;
717         samplerEntry->Misc.wRow[0]    = param->sampler8x8State.miscState.DW0.Row0;
718         samplerEntry->Misc.wRow[1]    = param->sampler8x8State.miscState.DW1.Row1;
719         samplerEntry->Misc.wRow[2]    = param->sampler8x8State.miscState.DW1.Row2;
720         samplerEntry->Misc.wRow[3]    = param->sampler8x8State.miscState.DW2.Row3;
721         samplerEntry->Misc.wRow[4]    = param->sampler8x8State.miscState.DW2.Row4;
722         samplerEntry->Misc.wRow[5]    = param->sampler8x8State.miscState.DW3.Row5;
723         samplerEntry->Misc.wRow[6]    = param->sampler8x8State.miscState.DW3.Row6;
724         samplerEntry->Misc.wRow[7]    = param->sampler8x8State.miscState.DW4.Row7;
725         samplerEntry->Misc.wRow[8]    = param->sampler8x8State.miscState.DW4.Row8;
726         samplerEntry->Misc.wRow[9]    = param->sampler8x8State.miscState.DW5.Row9;
727         samplerEntry->Misc.wRow[10]   = param->sampler8x8State.miscState.DW5.Row10;
728         samplerEntry->Misc.wRow[11]   = param->sampler8x8State.miscState.DW6.Row11;
729         samplerEntry->Misc.wRow[12]   = param->sampler8x8State.miscState.DW6.Row12;
730         samplerEntry->Misc.wRow[13]   = param->sampler8x8State.miscState.DW7.Row13;
731         samplerEntry->Misc.wRow[14]   = param->sampler8x8State.miscState.DW7.Row14;
732     }
733     else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_CONV)
734     {
735         for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
736         {
737             if (!state->samplerTable[i].bInUse) {
738                 samplerEntry = &state->samplerTable[i];
739                 param->handle = (uint32_t)i << 16;
740                 samplerEntry->bInUse = true;
741                 break;
742             }
743         }
744 
745         if ( samplerEntry == nullptr )
746         {
747             return MOS_STATUS_INVALID_HANDLE;
748         }
749 
750         MOS_ZeroMemory(&samplerEntry->Convolve, sizeof(samplerEntry->Convolve));
751 
752         samplerEntry->SamplerType  = MHW_SAMPLER_TYPE_CONV;
753 
754         samplerEntry->Convolve.ui8Height               = param->sampler8x8State.convolveState.height;
755         samplerEntry->Convolve.ui8Width                = param->sampler8x8State.convolveState.width;
756         samplerEntry->Convolve.ui8ScaledDownValue      = param->sampler8x8State.convolveState.scaleDownValue;
757         samplerEntry->Convolve.ui8SizeOfTheCoefficient = param->sampler8x8State.convolveState.coeffSize;
758 
759         samplerEntry->ElementType = MHW_Sampler64Elements;
760 
761         for ( int i = 0; i < CM_NUM_CONVOLVE_ROWS_BDW; i++ )
762         {
763             MHW_SAMPLER_CONVOLVE_COEFF_TABLE *coeffTable  = &(samplerEntry->Convolve.CoeffTable[i]);
764             CM_HAL_CONVOLVE_COEFF_TABLE      *sourceTable = &(param->sampler8x8State.convolveState.table[i]);
765             if ( samplerEntry->Convolve.ui8SizeOfTheCoefficient == 1 )
766             {
767                 coeffTable->wFilterCoeff[0]  = FloatToS3_12( sourceTable->FilterCoeff_0_0 );
768                 coeffTable->wFilterCoeff[1]  = FloatToS3_12( sourceTable->FilterCoeff_0_1 );
769                 coeffTable->wFilterCoeff[2]  = FloatToS3_12( sourceTable->FilterCoeff_0_2 );
770                 coeffTable->wFilterCoeff[3]  = FloatToS3_12( sourceTable->FilterCoeff_0_3 );
771                 coeffTable->wFilterCoeff[4]  = FloatToS3_12( sourceTable->FilterCoeff_0_4 );
772                 coeffTable->wFilterCoeff[5]  = FloatToS3_12( sourceTable->FilterCoeff_0_5 );
773                 coeffTable->wFilterCoeff[6]  = FloatToS3_12( sourceTable->FilterCoeff_0_6 );
774                 coeffTable->wFilterCoeff[7]  = FloatToS3_12( sourceTable->FilterCoeff_0_7 );
775                 coeffTable->wFilterCoeff[8]  = FloatToS3_12( sourceTable->FilterCoeff_0_8 );
776                 coeffTable->wFilterCoeff[9]  = FloatToS3_12( sourceTable->FilterCoeff_0_9 );
777                 coeffTable->wFilterCoeff[10] = FloatToS3_12( sourceTable->FilterCoeff_0_10 );
778                 coeffTable->wFilterCoeff[11] = FloatToS3_12( sourceTable->FilterCoeff_0_11 );
779                 coeffTable->wFilterCoeff[12] = FloatToS3_12( sourceTable->FilterCoeff_0_12 );
780                 coeffTable->wFilterCoeff[13] = FloatToS3_12( sourceTable->FilterCoeff_0_13 );
781                 coeffTable->wFilterCoeff[14] = FloatToS3_12( sourceTable->FilterCoeff_0_14 );
782                 coeffTable->wFilterCoeff[15] = FloatToS3_12( sourceTable->FilterCoeff_0_15 );
783             }
784             else
785             {
786                 coeffTable->wFilterCoeff[0]  = FloatToS3_4( sourceTable->FilterCoeff_0_0 );
787                 coeffTable->wFilterCoeff[1]  = FloatToS3_4( sourceTable->FilterCoeff_0_1 );
788                 coeffTable->wFilterCoeff[2]  = FloatToS3_4( sourceTable->FilterCoeff_0_2 );
789                 coeffTable->wFilterCoeff[3]  = FloatToS3_4( sourceTable->FilterCoeff_0_3 );
790                 coeffTable->wFilterCoeff[4]  = FloatToS3_4( sourceTable->FilterCoeff_0_4 );
791                 coeffTable->wFilterCoeff[5]  = FloatToS3_4( sourceTable->FilterCoeff_0_5 );
792                 coeffTable->wFilterCoeff[6]  = FloatToS3_4( sourceTable->FilterCoeff_0_6 );
793                 coeffTable->wFilterCoeff[7]  = FloatToS3_4( sourceTable->FilterCoeff_0_7 );
794                 coeffTable->wFilterCoeff[8]  = FloatToS3_4( sourceTable->FilterCoeff_0_8 );
795                 coeffTable->wFilterCoeff[9]  = FloatToS3_4( sourceTable->FilterCoeff_0_9 );
796                 coeffTable->wFilterCoeff[10] = FloatToS3_4( sourceTable->FilterCoeff_0_10 );
797                 coeffTable->wFilterCoeff[11] = FloatToS3_4( sourceTable->FilterCoeff_0_11 );
798                 coeffTable->wFilterCoeff[12] = FloatToS3_4( sourceTable->FilterCoeff_0_12 );
799                 coeffTable->wFilterCoeff[13] = FloatToS3_4( sourceTable->FilterCoeff_0_13 );
800                 coeffTable->wFilterCoeff[14] = FloatToS3_4( sourceTable->FilterCoeff_0_14 );
801                 coeffTable->wFilterCoeff[15] = FloatToS3_4( sourceTable->FilterCoeff_0_15 );
802             }
803         }
804 
805     }
806 
807     return eStatus;
808 }
809 
SetupHwDebugControl(PRENDERHAL_INTERFACE renderHal,PMOS_COMMAND_BUFFER cmdBuffer)810 MOS_STATUS CM_HAL_G8_X::SetupHwDebugControl(
811     PRENDERHAL_INTERFACE   renderHal,
812     PMOS_COMMAND_BUFFER    cmdBuffer)
813 {
814     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
815 
816     if (!renderHal || !cmdBuffer)
817     {
818         return MOS_STATUS_NULL_POINTER;
819     }
820 
821     MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
822     MOS_ZeroMemory(&loadRegImm, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
823 
824     // INSTPM, global debug enable
825     loadRegImm.dwRegister = INSTPM;
826     loadRegImm.dwData = (INSTPM_GLOBAL_DEBUG_ENABLE << 16) | INSTPM_GLOBAL_DEBUG_ENABLE;
827     eStatus = renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm);
828     if(eStatus != MOS_STATUS_SUCCESS)
829     {
830         return eStatus;
831     }
832 
833     // TD_CTL, force thread breakpoint enable
834     // Also enable external exception, because the source-level debugger has to
835     // be able to interrupt runing EU threads.
836     loadRegImm.dwRegister = TD_CTL;
837     loadRegImm.dwData = TD_CTL_FORCE_THREAD_BKPT_ENABLE | TD_CTL_FORCE_EXT_EXCEPTION_ENABLE;
838     eStatus = renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm);
839 
840     return eStatus;
841 }
842 
RegisterSampler8x8AVSTable(PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,PCM_AVS_TABLE_STATE_PARAMS avsTable)843 MOS_STATUS CM_HAL_G8_X::RegisterSampler8x8AVSTable(
844     PCM_HAL_SAMPLER_8X8_TABLE  sampler8x8AvsTable,
845     PCM_AVS_TABLE_STATE_PARAMS avsTable)
846 {
847     MOS_ZeroMemory(&sampler8x8AvsTable->mhwSamplerAvsTableParam, sizeof(sampler8x8AvsTable->mhwSamplerAvsTableParam));
848 
849     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
850     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
851     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels  = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
852     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels  = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
853     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = MEDIASTATE_AVS_SHARPNESS_LEVEL_SHARP;
854 
855     sampler8x8AvsTable->mhwSamplerAvsTableParam.bEnableRGBAdaptive         = false;
856     sampler8x8AvsTable->mhwSamplerAvsTableParam.bAdaptiveFilterAllChannels = avsTable->adaptiveFilterAllChannels;
857     sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering  = true;
858     sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering  = true;
859 
860     // Assign the coefficient table;
861     for (uint32_t i = 0; i < CM_NUM_HW_POLYPHASE_TABLES_G8; i++)
862     {
863         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[0] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_0;
864         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[1] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_1;
865         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[2] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_2;
866         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[3] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_3;
867         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[4] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_4;
868         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[5] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_5;
869         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[6] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_6;
870         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[7] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_7;
871 
872         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[0] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_0;
873         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[1] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_1;
874         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[2] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_2;
875         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[3] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_3;
876         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[4] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_4;
877         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[5] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_5;
878         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[6] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_6;
879         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[7] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_7;
880 
881         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[0] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_2;
882         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[1] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_3;
883         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[2] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_4;
884         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[3] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_5;
885 
886         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[0] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_2;
887         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[1] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_3;
888         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[2] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_4;
889         sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[3] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_5;
890     }
891 
892     sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = avsTable->defaultSharpLevel;
893     sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering = avsTable->bypassXAF;
894     sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering = avsTable->bypassYAF;
895 
896     if (!avsTable->bypassXAF  && !avsTable->bypassYAF) {
897         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels  = avsTable->maxDerivative8Pixels;
898         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels  = avsTable->maxDerivative4Pixels;
899         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = avsTable->transitionArea8Pixels;
900         sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = avsTable->transitionArea4Pixels;
901     }
902 
903     return MOS_STATUS_SUCCESS;
904 }
905 
UpdatePlatformInfoFromPower(PCM_PLATFORM_INFO platformInfo,bool euSaturated)906 MOS_STATUS CM_HAL_G8_X::UpdatePlatformInfoFromPower(
907     PCM_PLATFORM_INFO platformInfo,
908     bool              euSaturated)
909 {
910     PCM_HAL_STATE              state     = m_cmState;
911     PRENDERHAL_INTERFACE       renderHal = state->renderHal;
912     CM_POWER_OPTION            cmPower;
913 
914     if ( state->requestSingleSlice ||
915          renderHal->bRequestSingleSlice ||
916         (state->powerOption.nSlice != 0 && state->powerOption.nSlice < platformInfo->numSlices))
917     {
918         platformInfo->numSubSlices = platformInfo->numSubSlices / platformInfo->numSlices;
919         if (state->powerOption.nSlice > 1)
920         {
921             platformInfo->numSubSlices *= state->powerOption.nSlice;
922             platformInfo->numSlices     = state->powerOption.nSlice;
923         }
924         else
925         {
926             platformInfo->numSlices     = 1;
927         }
928     }
929     else if (euSaturated)
930     {
931         // No SSD and EU Saturation, request maximum number of slices/subslices/EUs
932         cmPower.nSlice    = (uint16_t)platformInfo->numSlices;
933         cmPower.nSubSlice = (uint16_t)platformInfo->numSubSlices;
934         cmPower.nEU       = (uint16_t)(platformInfo->numEUsPerSubSlice * platformInfo->numSubSlices);
935 
936         state->pfnSetPowerOption(state, &cmPower);
937     }
938 
939     return MOS_STATUS_SUCCESS;
940 }
941 
GetExpectedGtSystemConfig(PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)942 MOS_STATUS CM_HAL_G8_X::GetExpectedGtSystemConfig(
943     PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)
944 {
945     if (m_genGT == PLATFORM_INTEL_GT1)
946     {
947         expectedConfig->numSlices    = BDW_GT1_MAX_NUM_SLICES;
948         expectedConfig->numSubSlices = BDW_GT1_MAX_NUM_SUBSLICES;
949     }
950     else if( m_genGT == PLATFORM_INTEL_GT1_5 )
951     {
952         expectedConfig->numSlices    = BDW_GT1_5_MAX_NUM_SLICES;
953         expectedConfig->numSubSlices = BDW_GT1_5_MAX_NUM_SUBSLICES;
954     }
955     else if (m_genGT == PLATFORM_INTEL_GT2)
956     {
957         expectedConfig->numSlices    = BDW_GT2_MAX_NUM_SLICES;
958         expectedConfig->numSubSlices = BDW_GT2_MAX_NUM_SUBSLICES;
959     }
960     else if (m_genGT == PLATFORM_INTEL_GT3)
961     {
962         expectedConfig->numSlices    = BDW_GT3_MAX_NUM_SLICES;
963         expectedConfig->numSubSlices = BDW_GT3_MAX_NUM_SUBSLICES;
964     }
965     else
966     {
967         expectedConfig->numSlices    = 0;
968         expectedConfig->numSubSlices = 0;
969     }
970 
971     return MOS_STATUS_SUCCESS;
972 }
973 
AllocateSIPCSRResource()974 MOS_STATUS CM_HAL_G8_X::AllocateSIPCSRResource()
975 {
976     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
977     if (Mos_ResourceIsNull(&m_cmState->sipResource.osResource))
978     {
979         eStatus = HalCm_AllocateSipResource(m_cmState); // create  sip resource if it does not exist
980     }
981 
982     return eStatus;
983 }
984 
GetCopyKernelIsa(void * & isa,uint32_t & isaSize)985 MOS_STATUS CM_HAL_G8_X::GetCopyKernelIsa(void  *&isa, uint32_t &isaSize)
986 {
987 
988     isa = (void *)pGPUCopy_kernel_isa_gen8;
989     isaSize = iGPUCopy_kernel_isa_size_gen8;
990 
991     return MOS_STATUS_SUCCESS;
992 }
993 
GetInitKernelIsa(void * & isa,uint32_t & isaSize)994 MOS_STATUS CM_HAL_G8_X::GetInitKernelIsa(void  *&isa, uint32_t &isaSize)
995 {
996     isa = (void *)pGPUInit_kernel_isa_Gen8;
997     isaSize = iGPUInit_kernel_isa_size_Gen8;
998 
999     return MOS_STATUS_SUCCESS;
1000 }
1001 
GetMediaWalkerMaxThreadWidth()1002 uint32_t CM_HAL_G8_X::GetMediaWalkerMaxThreadWidth()
1003 {
1004     return CM_MAX_THREADSPACE_WIDTH_FOR_MW;
1005 }
1006 
GetMediaWalkerMaxThreadHeight()1007 uint32_t CM_HAL_G8_X::GetMediaWalkerMaxThreadHeight()
1008 {
1009     return CM_MAX_THREADSPACE_HEIGHT_FOR_MW;
1010 }
1011 
GetHwSurfaceBTIInfo(PCM_SURFACE_BTI_INFO btiInfo)1012 MOS_STATUS CM_HAL_G8_X::GetHwSurfaceBTIInfo(
1013           PCM_SURFACE_BTI_INFO btiInfo)
1014 {
1015     if (btiInfo == nullptr)
1016     {
1017         return MOS_STATUS_NULL_POINTER;
1018     }
1019 
1020     btiInfo->normalSurfaceStart      =  CM_NULL_SURFACE_BINDING_INDEX + 1;
1021     btiInfo->normalSurfaceEnd        =  CM_GLOBAL_SURFACE_INDEX_START - 1;
1022     btiInfo->reservedSurfaceStart    =  CM_GLOBAL_SURFACE_INDEX_START;
1023     btiInfo->reservedSurfaceEnd      =  CM_GLOBAL_SURFACE_INDEX_START +  CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER;
1024 
1025     return MOS_STATUS_SUCCESS;
1026 }
1027 
SetSuggestedL3Conf(L3_SUGGEST_CONFIG l3Config)1028 MOS_STATUS CM_HAL_G8_X::SetSuggestedL3Conf(
1029             L3_SUGGEST_CONFIG l3Config)
1030 {
1031     if (l3Config >= sizeof(BDW_L3_PLANE)/sizeof(L3ConfigRegisterValues))
1032     {
1033         return MOS_STATUS_INVALID_PARAMETER;
1034     }
1035     return HalCm_SetL3Cache((L3ConfigRegisterValues *)&BDW_L3_PLANE[l3Config],
1036                                      &m_cmState->l3Settings);
1037 }
1038 
GetGenStepInfo(char * & stepInfoStr)1039 MOS_STATUS CM_HAL_G8_X::GetGenStepInfo(char*& stepInfoStr)
1040 {
1041     const char *genSteppingInfoTable[] = { "A0", "XX", "XX", "B0", "D0", "E0", "F0",
1042                                            "G0", "G1", "H0", "J0" };
1043 
1044     uint32_t genStepId = m_cmState->platform.usRevId;
1045 
1046     uint32_t tablesize = sizeof(genSteppingInfoTable) / sizeof(char *);
1047 
1048     if (genStepId < tablesize)
1049     {
1050         stepInfoStr = (char *)genSteppingInfoTable[genStepId];
1051     }
1052     else
1053     {
1054         stepInfoStr = nullptr;
1055     }
1056 
1057     return MOS_STATUS_SUCCESS;
1058 }
1059 
ColorCountSanityCheck(uint32_t colorCount)1060 int32_t CM_HAL_G8_X::ColorCountSanityCheck(uint32_t colorCount)
1061 {
1062     if (colorCount == CM_INVALID_COLOR_COUNT || colorCount > CM_THREADSPACE_MAX_COLOR_COUNT)
1063     {
1064         CM_ASSERTMESSAGE("Error: Invalid color count.");
1065         return CM_INVALID_ARG_VALUE;
1066     }
1067     return CM_SUCCESS;
1068 }
1069 
MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)1070 bool CM_HAL_G8_X::MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)
1071 {
1072     if ( memCtrl > MEMORY_OBJECT_CONTROL_BDW_L3_LLC_ELLC_ALLOWED )
1073     {
1074         return false;
1075     }
1076 
1077     return true;
1078 }
1079 
GetConvSamplerIndex(PMHW_SAMPLER_STATE_PARAM samplerParam,char * samplerIndexTable,int32_t nSamp8X8Num,int32_t nSampConvNum)1080 int32_t CM_HAL_G8_X::GetConvSamplerIndex(
1081     PMHW_SAMPLER_STATE_PARAM  samplerParam,
1082     char                     *samplerIndexTable,
1083     int32_t                   nSamp8X8Num,
1084     int32_t                   nSampConvNum)
1085 {
1086 
1087     //  2D convolve BDW
1088     int32_t samplerIndex = 1 + (nSamp8X8Num + nSampConvNum) * 2;
1089     while (samplerIndexTable[samplerIndex] != CM_INVALID_INDEX)
1090     {
1091         samplerIndex += 2;
1092     }
1093 
1094     return samplerIndex;
1095 }
1096 
SetL3CacheConfig(const L3ConfigRegisterValues * values,PCmHalL3Settings cmHalL3Setting)1097 MOS_STATUS CM_HAL_G8_X::SetL3CacheConfig(
1098             const L3ConfigRegisterValues *values,
1099             PCmHalL3Settings cmHalL3Setting)
1100 {
1101     return HalCm_SetL3Cache( values, cmHalL3Setting );
1102 }
1103 
GetSamplerParamInfoForSamplerType(PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,SamplerParam & samplerParam)1104 MOS_STATUS CM_HAL_G8_X::GetSamplerParamInfoForSamplerType(
1105             PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,
1106             SamplerParam  &samplerParam)
1107 {
1108     const unsigned int samplerElementSize[MAX_ELEMENT_TYPE_COUNT] = {16, 32, 64, 128, 1024, 2048};
1109 
1110     // gets element_type
1111     switch (mhwSamplerParam->SamplerType)
1112     {
1113         case MHW_SAMPLER_TYPE_CONV:
1114         case MHW_SAMPLER_TYPE_AVS:
1115             samplerParam.elementType = MHW_Sampler64Elements;
1116             break;
1117         case MHW_SAMPLER_TYPE_MISC:
1118             samplerParam.elementType = MHW_Sampler2Elements;
1119             break;
1120         case MHW_SAMPLER_TYPE_3D:
1121             samplerParam.elementType = MHW_Sampler1Element;
1122             break;
1123         default:
1124             samplerParam.elementType = MHW_Sampler1Element;
1125             break;
1126     }
1127 
1128     // bti_stepping for convolve or AVS is 2, other cases are 1.
1129     if ((mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_CONV) ||
1130         (mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_AVS))
1131     {
1132         samplerParam.btiStepping = 2;
1133     }
1134     else
1135     {
1136         samplerParam.btiStepping = 1;
1137     }
1138 
1139     // gets multiplier
1140     samplerParam.btiMultiplier = samplerElementSize[samplerParam.elementType] / samplerParam.btiStepping;
1141 
1142     // gets size
1143     samplerParam.size = samplerElementSize[samplerParam.elementType];
1144 
1145     return MOS_STATUS_SUCCESS;
1146 }
1147 
ConverTicksToNanoSecondsDefault(uint64_t ticks)1148 uint64_t CM_HAL_G8_X::ConverTicksToNanoSecondsDefault(uint64_t ticks)
1149 {
1150     return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G8);
1151 }
1152 
1153