1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_hal_g8.cpp
24 //! \brief Common HAL CM Gen8 functions
25 //!
26
27 #include "cm_hal_g8.h"
28 #include "cm_common.h"
29 #include "renderhal_platform_interface.h"
30 #include "mhw_state_heap_hwcmd_g8_X.h"
31 #if defined(ENABLE_KERNELS) && (!defined(_FULL_OPEN_SOURCE))
32 #include "cm_gpucopy_kernel_g8.h"
33 #include "cm_gpuinit_kernel_g8.h"
34 #else
35 unsigned int iGPUCopy_kernel_isa_size_gen8 = 0;
36 unsigned int iGPUInit_kernel_isa_size_Gen8 = 0;
37 unsigned char *pGPUCopy_kernel_isa_gen8 = nullptr;
38 unsigned char *pGPUInit_kernel_isa_Gen8 = nullptr;
39
40 #endif
41
42 #define CM_NS_PER_TICK_RENDER_G8 (80)
43
44 union CM_HAL_MEMORY_OBJECT_CONTROL_G8
45 {
46 struct
47 {
48 uint32_t age : 2;
49 uint32_t : 1;
50 uint32_t targetCache : 2;
51 uint32_t cacheControl : 2;
52 uint32_t : 25;
53 } Gen8;
54
55 uint32_t value;
56 };
57
58 #if (_RELEASE_INTERNAL || _DEBUG)
59 #if defined(CM_DIRECT_GUC_SUPPORT)
SubmitDummyCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)60 MOS_STATUS CM_HAL_G8_X::SubmitDummyCommands(
61 PMHW_BATCH_BUFFER batchBuffer,
62 int32_t taskId,
63 PCM_HAL_KERNEL_PARAM *kernelParam,
64 void **cmdBuffer)
65 {
66 return MOS_STATUS_UNIMPLEMENTED;
67
68 }
69 #endif
70 #endif
71
SubmitCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)72 MOS_STATUS CM_HAL_G8_X::SubmitCommands(
73 PMHW_BATCH_BUFFER batchBuffer,
74 int32_t taskId,
75 PCM_HAL_KERNEL_PARAM *kernelParam,
76 void **cmdBuffer)
77 {
78 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
79 PCM_HAL_STATE state = m_cmState;
80 PMOS_INTERFACE osInterface = m_cmState->osInterface;
81 PRENDERHAL_INTERFACE_LEGACY renderHal = m_cmState->renderHal;
82 MhwRenderInterface *mhwRender = renderHal->pMhwRenderInterface;
83 PMHW_MI_INTERFACE mhwMiInterface = renderHal->pMhwMiInterface;
84 PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
85 MHW_PIPE_CONTROL_PARAMS pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
86 MHW_MEDIA_STATE_FLUSH_PARAM flushParam = g_cRenderHal_InitMediaStateFlushParams;
87 MHW_ID_LOAD_PARAMS idLoadParams;
88 int32_t remaining = 0;
89 bool enableWalker = state->walkerParams.CmWalkerEnable;
90 bool enableGpGpu = state->taskParam->blGpGpuWalkerEnabled;
91 MOS_COMMAND_BUFFER mosCmdBuffer;
92 uint32_t syncTag;
93 int64_t *taskSyncLocation;
94 int32_t syncOffset;
95 int32_t tmp;
96 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
97 PCM_HAL_BB_ARGS bbCmArgs;
98 RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
99 MOS_RESOURCE *osResource;
100 uint32_t tag;
101 uint32_t tagOffset = 0;
102 bool slmUsed = false;
103
104 MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
105
106 // get the tag
107 tag = renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex);
108
109 // Get the task sync offset
110 syncOffset = state->pfnGetTaskSyncLocation(state, taskId);
111
112 // Initialize the location
113 taskSyncLocation = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
114 *taskSyncLocation = CM_INVALID_INDEX;
115 *(taskSyncLocation + 1) = CM_INVALID_INDEX;
116 if(state->cbbEnabled)
117 {
118 *(taskSyncLocation + 2) = tag;
119 *(taskSyncLocation + 3) = state->renderHal->currentTrackerIndex;
120 }
121
122 // Update power option of this command;
123 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnUpdatePowerOption( state, &state->powerOption ) );
124
125 // Register batch buffer for rendering
126 if (!enableWalker && !enableGpGpu)
127 {
128 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
129 osInterface,
130 &batchBuffer->OsResource,
131 true,
132 true));
133 }
134
135 // Register Timestamp Buffer
136 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
137 osInterface,
138 &state->renderTimeStampResource.osResource,
139 true,
140 true));
141
142 // Allocate all available space, unused buffer will be returned later
143 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
144 remaining = mosCmdBuffer.iRemaining;
145
146 // Enable preemption flag in the command buffer header
147 // The flag is required for both Middle Batch Buffer(Thread Group) and Middle Thread preemptions.
148 if (enableGpGpu)
149 {
150 if (taskParam->slmSize == 0 && taskParam->hasBarrier == false)
151 {
152 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption(state->renderHal);
153 }
154 }
155
156 // use frame tracking to write the tracker ID to CM tracker resource
157 renderHal->trackerProducer.GetLatestTrackerResource(renderHal->currentTrackerIndex, &osResource, &tagOffset);
158 renderHal->pfnSetupPrologParams(renderHal, &genericPrologParams, osResource, tagOffset, tag);
159 FrameTrackerTokenFlat_SetProducer(&stateHeap->pCurMediaState->trackerToken, &renderHal->trackerProducer);
160 FrameTrackerTokenFlat_Merge(&stateHeap->pCurMediaState->trackerToken, renderHal->currentTrackerIndex, tag);
161
162 // Record registers by unified media profiler in the beginning
163 if (state->perfProfiler != nullptr)
164 {
165 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectStartCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
166 }
167
168 //Send the First PipeControl Command to indicate the beginning of execution
169 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
170 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
171 pipeCtrlParams.dwResourceOffset = syncOffset;
172 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
173 pipeCtrlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
174 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
175
176 // Initialize command buffer and insert prolog
177 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
178
179 // update tracker tag used with CM tracker resource
180 renderHal->trackerProducer.StepForward(renderHal->currentTrackerIndex);
181
182 // Increment sync tag
183 syncTag = renderHal->pStateHeap->dwNextTag++;
184
185 // Check if any task to use SLM
186 for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
187 {
188 if (kernelParam[i]->slmSize > 0)
189 {
190 slmUsed = true;
191 break;
192 }
193 }
194
195 //Check GPGPU task param
196 if (taskParam->slmSize > 0) {
197 slmUsed = true;
198 }
199
200 //enable BDW L3 config
201 state->l3Settings.enableSlm = slmUsed;
202 HalCm_GetLegacyRenderHalL3Setting( &state->l3Settings, &renderHal->L3CacheSettings );
203 renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
204 mhwRender->SetL3Cache(&mosCmdBuffer);
205
206 if (renderHal->bSIPKernel)
207 {
208 CM_CHK_MOSSTATUS_GOTOFINISH(SetupHwDebugControl(renderHal, &mosCmdBuffer));
209 }
210
211 // Send Pipeline Select command
212 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu));
213
214 // Send State Base Address command
215 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendStateBaseAddress(renderHal, &mosCmdBuffer));
216
217 // Send Surface States
218 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSurfaces(renderHal, &mosCmdBuffer));
219
220 if ( renderHal->bSIPKernel)
221 {
222 // Send SIP State
223 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSipStateCmd(renderHal, &mosCmdBuffer));
224 }
225
226 // Setup VFE State params. Each Renderer MUST call pfnSetVfeStateParams().
227 // See comment in RenderHal_SetVfeStateParams() for details.
228 tmp = RENDERHAL_USE_MEDIA_THREADS_MAX;
229 if (state->maxHWThreadValues.userFeatureValue != 0)
230 {
231 if( state->maxHWThreadValues.userFeatureValue < renderHal->pHwCaps->dwMaxThreads)
232 {
233 tmp = state->maxHWThreadValues.userFeatureValue;
234 }
235 }
236 else if (state->maxHWThreadValues.apiValue != 0)
237 {
238 if( state->maxHWThreadValues.apiValue < renderHal->pHwCaps->dwMaxThreads)
239 {
240 tmp = state->maxHWThreadValues.apiValue;
241 }
242 }
243
244 renderHal->pfnSetVfeStateParams(
245 renderHal,
246 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
247 tmp,
248 state->taskParam->vfeCurbeSize,
249 state->taskParam->urbEntrySize,
250 &state->scoreboardParams);
251
252 // Send VFE State
253 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaVfeCmd(&mosCmdBuffer,
254 renderHal->pRenderHalPltInterface->GetVfeStateParameters()));
255
256 // Send CURBE Load
257 if (state->taskParam->vfeCurbeSize > 0)
258 {
259 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendCurbeLoad(renderHal, &mosCmdBuffer));
260 }
261
262 // Send Interface Descriptor Load
263 if (state->dshEnabled)
264 {
265 PRENDERHAL_DYNAMIC_STATE dynamicState = ((PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState)->pDynamicState;
266 idLoadParams.dwInterfaceDescriptorStartOffset = dynamicState->memoryBlock.GetOffset() +
267 dynamicState->MediaID.dwOffset;
268 idLoadParams.dwInterfaceDescriptorLength = dynamicState->MediaID.iCount * stateHeap->dwSizeMediaID;
269 }
270 else
271 {
272 idLoadParams.dwInterfaceDescriptorStartOffset = stateHeap->pCurMediaState->dwOffset + stateHeap->dwOffsetMediaID;
273 idLoadParams.dwInterfaceDescriptorLength = renderHal->StateHeapSettings.iMediaIDs * stateHeap->dwSizeMediaID;
274 }
275 idLoadParams.pKernelState = nullptr;
276 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddMediaIDLoadCmd(&mosCmdBuffer, &idLoadParams));
277
278 if (enableWalker)
279 {
280 // send media walker command, if required
281 for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
282 {
283 // Insert CONDITIONAL_BATCH_BUFFER_END
284 if ( taskParam->conditionalEndBitmap & ((uint64_t)1 << (i)))
285 {
286 // this could be batch buffer end so need to update sync tag, media state flush, write end timestamp
287
288 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSyncTag(renderHal, &mosCmdBuffer));
289
290 // WA for BDW/CHV
291 if (MEDIA_IS_WA(renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
292 {
293 flushParam.bFlushToGo = 1;
294 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
295 }
296 else if (MEDIA_IS_WA(renderHal->pWaTable, WaAddMediaStateFlushCmd))
297 {
298 flushParam.bFlushToGo = 0;
299 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
300 }
301
302 // Insert a pipe control for synchronization since this Conditional Batch Buffer End command
303 // will use value written by previous kernel. Also needed since this may be the Batch Buffer End
304 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
305 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
306 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
307 pipeCtrlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
308 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
309
310 // issue a PIPE_CONTROL to write timestamp
311 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
312 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
313 pipeCtrlParams.dwResourceOffset = syncOffset + sizeof(uint64_t);
314 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
315 pipeCtrlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
316 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
317
318 // Insert conditional batch buffer end
319 mhwMiInterface->AddMiConditionalBatchBufferEndCmd(&mosCmdBuffer, &taskParam->conditionalBBEndParams[i]);
320 }
321
322 //Insert PIPE_CONTROL at two cases:
323 // 1. synchronization is set
324 // 2. the next kernel has dependency pattern
325 if((i > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (i-1))) ||
326 (kernelParam[i]->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
327 {
328 //Insert a pipe control as synchronization
329 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
330 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
331 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
332 pipeCtrlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
333 pipeCtrlParams.bInvalidateTextureCache = true;
334 pipeCtrlParams.bFlushRenderTargetCache = true;
335 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
336 }
337
338 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendMediaWalkerState(state, kernelParam[i], &mosCmdBuffer));
339 }
340
341 // WA for BDW/CHV
342 if (MEDIA_IS_WA(renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
343 {
344 flushParam.bFlushToGo = 1;
345 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
346 }
347 else if (MEDIA_IS_WA(renderHal->pWaTable, WaAddMediaStateFlushCmd))
348 {
349 flushParam.bFlushToGo = 0;
350 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
351 }
352 }
353 else if (enableGpGpu)
354 {
355 // send GPGPU walker command, if required
356 for (uint32_t i = 0; i < state->taskParam->numKernels; i ++)
357 {
358 //Insert PIPE_CONTROL as synchronization if synchronization is set
359 if((i > 0) && (taskParam->syncBitmap & ((uint64_t)1 << (i-1))))
360 {
361 //Insert a pipe control as synchronization
362 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
363 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
364 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
365 pipeCtrlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
366 pipeCtrlParams.bInvalidateTextureCache = true;
367 pipeCtrlParams.bFlushRenderTargetCache = true;
368 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
369 }
370
371 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnSendGpGpuWalkerState(state, kernelParam[i], &mosCmdBuffer));
372 }
373
374 // WA for BDW/CHV
375 if (MEDIA_IS_WA(renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
376 {
377 flushParam.bFlushToGo = 1;
378 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
379 }
380 else if (MEDIA_IS_WA(renderHal->pWaTable, WaAddMediaStateFlushCmd))
381 {
382 flushParam.bFlushToGo = 0;
383 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMediaStateFlush(&mosCmdBuffer, nullptr, &flushParam));
384 }
385
386 }
387 else
388 {
389 // Send Start batch buffer command
390 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferStartCmd(
391 &mosCmdBuffer,
392 batchBuffer));
393
394 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
395 bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
396
397 if ( (bbCmArgs->refCount == 1) ||
398 (state->taskParam->reuseBBUpdateMask == 1) )
399 {
400 // Add BB end command
401 mhwMiInterface->AddMiBatchBufferEnd(nullptr, batchBuffer);
402 }
403 else //reuse BB
404 {
405 // Skip BB end command
406 mhwMiInterface->SkipMiBatchBufferEndBb(batchBuffer);
407 }
408
409 // UnLock the batch buffer
410 if ( (bbCmArgs->refCount == 1) ||
411 (state->taskParam->reuseBBUpdateMask == 1) )
412 {
413 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnUnlockBB(renderHal, batchBuffer));
414 }
415 }
416
417 // issue a PIPE_CONTROL to flush all caches and the stall the CS before
418 // issuing a PIPE_CONTROL to write the timestamp
419 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
420 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
421 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
422 pipeCtrlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
423 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
424
425 if (state->svmBufferUsed)
426 {
427 // Find the SVM slot, patch it into this dummy pipe_control
428 for (uint32_t i = 0; i < state->cmDeviceParam.maxBufferTableSize; i++)
429 {
430 //Only register SVM resource here
431 if (state->bufferTable[i].address)
432 {
433 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
434 osInterface,
435 &state->bufferTable[i].osResource,
436 true,
437 false));
438 }
439 }
440 }
441
442 if ( slmUsed & state->pfnIsWASLMinL3Cache())
443 {
444 //Disable SLM in L3 when command submitted
445 state->l3Settings.enableSlm = false;
446 HalCm_GetLegacyRenderHalL3Setting( &state->l3Settings, &renderHal->L3CacheSettings );
447 renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
448 mhwRender->SetL3Cache(&mosCmdBuffer);
449 }
450
451 // Send Sync Tag
452 if (!state->dshEnabled || !(enableWalker || enableGpGpu))
453 {
454 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSyncTag( renderHal, &mosCmdBuffer ) );
455 }
456
457 // Update tracker resource
458 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnUpdateTrackerResource(state, &mosCmdBuffer, tag));
459
460 // issue a PIPE_CONTROL to write timestamp
461 syncOffset += sizeof(uint64_t);
462 pipeCtrlParams = g_cRenderHal_InitPipeControlParams;
463 pipeCtrlParams.presDest = &state->renderTimeStampResource.osResource;
464 pipeCtrlParams.dwResourceOffset = syncOffset;
465 pipeCtrlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
466 pipeCtrlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
467 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtrlParams));
468
469 // Record registers by unified media profiler in the end
470 if (state->perfProfiler != nullptr)
471 {
472 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectEndCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
473 }
474
475 //Couple to the BB_START , otherwise GPU Hang without it in KMD.
476 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr));
477
478 // Return unused command buffer space to OS
479 osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
480
481 #if MDF_COMMAND_BUFFER_DUMP
482 if (state->dumpCommandBuffer)
483 {
484 state->pfnDumpCommadBuffer(state, &mosCmdBuffer, 0, mhw_state_heap_g8_X::RENDER_SURFACE_STATE_CMD::byteSize);
485 }
486 #endif
487
488
489 #if MDF_SURFACE_STATE_DUMP
490 if (state->dumpSurfaceState)
491 {
492 state->pfnDumpSurfaceState(state, 0, mhw_state_heap_g8_X::RENDER_SURFACE_STATE_CMD::byteSize);
493
494 }
495 #endif
496
497 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetGpuTime( state, &state->taskTimeStamp->submitTimeInGpu[ taskId ] ) );
498 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetGlobalTime( &state->taskTimeStamp->submitTimeInCpu[ taskId ] ) );
499
500 // Submit command buffer
501 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnSubmitCommandBuffer(osInterface,
502 &mosCmdBuffer,
503 state->nullHwRenderCm));
504
505 if (state->nullHwRenderCm == false)
506 {
507 stateHeap->pCurMediaState->bBusy = true;
508 if ( !enableWalker && !enableGpGpu )
509 {
510 batchBuffer->bBusy = true;
511 batchBuffer->dwSyncTag = syncTag;
512 }
513 }
514
515 // reset API call number of HW threads
516 state->maxHWThreadValues.apiValue = 0;
517
518 // reset EU saturation
519 state->euSaturationEnabled = false;
520
521 renderHal->bEUSaturationNoSSD = false;
522
523 state->pfnReferenceCommandBuffer(&mosCmdBuffer.OsResource, cmdBuffer);
524
525 eStatus = MOS_STATUS_SUCCESS;
526
527 finish:
528 // Failed -> discard all changes in Command Buffer
529 if (eStatus != MOS_STATUS_SUCCESS)
530 {
531 // Buffer overflow - display overflow size
532 if (mosCmdBuffer.iRemaining < 0)
533 {
534 CM_ASSERTMESSAGE("Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining);
535 }
536
537 // Move command buffer back to beginning
538 tmp = remaining - mosCmdBuffer.iRemaining;
539 mosCmdBuffer.iRemaining = remaining;
540 mosCmdBuffer.iOffset -= tmp;
541 mosCmdBuffer.pCmdPtr = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset/sizeof(uint32_t);
542
543 // Return unused command buffer space to OS
544 osInterface->pfnReturnCommandBuffer(osInterface, &mosCmdBuffer, 0);
545 }
546
547 return eStatus;
548 }
549
SetMediaWalkerParams(CM_WALKING_PARAMETERS engineeringParams,PCM_HAL_WALKER_PARAMS walkerParams)550 MOS_STATUS CM_HAL_G8_X::SetMediaWalkerParams(
551 CM_WALKING_PARAMETERS engineeringParams,
552 PCM_HAL_WALKER_PARAMS walkerParams)
553 {
554
555 MEDIA_OBJECT_WALKER_CMD_G6 mediaWalkerCmd;
556 mediaWalkerCmd.DW5.value = engineeringParams.Value[0];
557 walkerParams->scoreboardMask = mediaWalkerCmd.DW5.scoreboardMask;
558
559 mediaWalkerCmd.DW6.value = engineeringParams.Value[1];
560 walkerParams->colorCountMinusOne = mediaWalkerCmd.DW6.colorCountMinusOne;
561 walkerParams->midLoopUnitX = mediaWalkerCmd.DW6.midLoopUnitX;
562 walkerParams->midLoopUnitY = mediaWalkerCmd.DW6.midLoopUnitY;
563 walkerParams->middleLoopExtraSteps = mediaWalkerCmd.DW6.midLoopExtraSteps;
564
565 mediaWalkerCmd.DW7.value = engineeringParams.Value[2];
566 walkerParams->localLoopExecCount = mediaWalkerCmd.DW7.localLoopExecCount;
567 walkerParams->globalLoopExecCount = mediaWalkerCmd.DW7.globalLoopExecCount;
568
569 mediaWalkerCmd.DW8.value = engineeringParams.Value[3];
570 walkerParams->blockResolution.x = mediaWalkerCmd.DW8.blockResolutionX;
571 walkerParams->blockResolution.y = mediaWalkerCmd.DW8.blockResolutionY;
572
573 mediaWalkerCmd.DW9.value = engineeringParams.Value[4];
574 walkerParams->localStart.x = mediaWalkerCmd.DW9.localStartX;
575 walkerParams->localStart.y = mediaWalkerCmd.DW9.localStartY;
576
577 mediaWalkerCmd.DW11.value = engineeringParams.Value[6];
578 walkerParams->localOutLoopStride.x = mediaWalkerCmd.DW11.localOuterLoopStrideX;
579 walkerParams->localOutLoopStride.y = mediaWalkerCmd.DW11.localOuterLoopStrideY;
580
581 mediaWalkerCmd.DW12.value = engineeringParams.Value[7];
582 walkerParams->localInnerLoopUnit.x = mediaWalkerCmd.DW12.localInnerLoopUnitX;
583 walkerParams->localInnerLoopUnit.y = mediaWalkerCmd.DW12.localInnerLoopUnitY;
584
585 mediaWalkerCmd.DW13.value = engineeringParams.Value[8];
586 walkerParams->globalResolution.x = mediaWalkerCmd.DW13.globalResolutionX;
587 walkerParams->globalResolution.y = mediaWalkerCmd.DW13.globalResolutionY;
588
589 mediaWalkerCmd.DW14.value = engineeringParams.Value[9];
590 walkerParams->globalStart.x = mediaWalkerCmd.DW14.globalStartX;
591 walkerParams->globalStart.y = mediaWalkerCmd.DW14.globalStartY;
592
593 mediaWalkerCmd.DW15.value = engineeringParams.Value[10];
594 walkerParams->globalOutlerLoopStride.x = mediaWalkerCmd.DW15.globalOuterLoopStrideX;
595 walkerParams->globalOutlerLoopStride.y = mediaWalkerCmd.DW15.globalOuterLoopStrideY;
596
597 mediaWalkerCmd.DW16.value = engineeringParams.Value[11];
598 walkerParams->globalInnerLoopUnit.x = mediaWalkerCmd.DW16.globalInnerLoopUnitX;
599 walkerParams->globalInnerLoopUnit.y = mediaWalkerCmd.DW16.globalInnerLoopUnitY;
600
601 walkerParams->localEnd.x = 0;
602 walkerParams->localEnd.y = 0;
603
604 return MOS_STATUS_SUCCESS;
605 }
606
HwSetSurfaceMemoryObjectControl(uint16_t memObjCtl,PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)607 MOS_STATUS CM_HAL_G8_X::HwSetSurfaceMemoryObjectControl(
608 uint16_t memObjCtl,
609 PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams )
610 {
611 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
612 PRENDERHAL_INTERFACE renderHal = m_cmState->renderHal;
613 CM_HAL_MEMORY_OBJECT_CONTROL_G8 cacheType;
614
615 MOS_ZeroMemory( &cacheType, sizeof( CM_HAL_MEMORY_OBJECT_CONTROL_G8 ) );
616
617 if ( ( memObjCtl & CM_MEMOBJCTL_CACHE_MASK ) >> 8 == CM_INVALID_MEMOBJCTL )
618 {
619 CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface));
620 cacheType.value = renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface)->CachePolicyGetMemoryObject(nullptr, CM_RESOURCE_USAGE_SurfaceState).DwordValue;
621
622 // for default value and SVM surface, override the cache control from WB to WT
623 if ( ( ( memObjCtl & 0xF0 ) >> 4 ) == 2 )
624 {
625 cacheType.Gen8.cacheControl = 2;
626 }
627 }
628 else
629 {
630 // Get the cache type of the memory object.
631 // Since memObjCtl is composed with cache type(8:15), memory type(4:7), ages(0:3), rearranging is needed
632 cacheType.Gen8.age = ( memObjCtl & 0xF );
633 cacheType.Gen8.cacheControl = ( memObjCtl & 0xF0 ) >> 4;
634 cacheType.Gen8.targetCache = ( memObjCtl & CM_MEMOBJCTL_CACHE_MASK ) >> 8;
635 }
636
637 surfStateParams->MemObjCtl = cacheType.value;
638
639 finish:
640 return eStatus;
641 }
642
RegisterSampler8x8(PCM_HAL_SAMPLER_8X8_PARAM param)643 MOS_STATUS CM_HAL_G8_X::RegisterSampler8x8(
644 PCM_HAL_SAMPLER_8X8_PARAM param)
645 {
646 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
647 PMHW_SAMPLER_STATE_PARAM samplerEntry = nullptr;
648 PCM_HAL_SAMPLER_8X8_ENTRY sampler8x8Entry = nullptr;
649 PCM_HAL_STATE state = m_cmState;
650
651 if (param->sampler8x8State.stateType == CM_SAMPLER8X8_AVS)
652 {
653 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) {
654 if (!state->samplerTable[i].bInUse) {
655 samplerEntry = &state->samplerTable[i];
656 param->handle = (uint32_t)i << 16;
657 samplerEntry->bInUse = true;
658 break;
659 }
660 }
661
662 int16_t samplerIndex = 0;
663 for (uint32_t i = 0; i < state->cmDeviceParam.maxSampler8x8TableSize; i++) {
664 if (!state->sampler8x8Table[i].inUse) {
665 sampler8x8Entry = &state->sampler8x8Table[i];
666 samplerIndex = (int16_t)i;
667 param->handle |= (uint32_t)(i & 0xffff);
668 sampler8x8Entry->inUse = true;
669 break;
670 }
671 }
672
673 if (!samplerEntry || !sampler8x8Entry) {
674 CM_ASSERTMESSAGE("Sampler or AVS table is full");
675 return MOS_STATUS_NULL_POINTER;
676 }
677
678 //State data from application
679 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_AVS;
680 samplerEntry->ElementType = MHW_Sampler64Elements;
681 samplerEntry->Avs = param->sampler8x8State.avsParam.avsState;
682 samplerEntry->Avs.stateID = samplerIndex;
683 samplerEntry->Avs.iTable8x8_Index = samplerIndex; // Used for calculating the Media offset of 8x8 table
684 samplerEntry->Avs.pMhwSamplerAvsTableParam = &sampler8x8Entry->sampler8x8State.mhwSamplerAvsTableParam;
685
686 if (samplerEntry->Avs.EightTapAFEnable)
687 param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = true;
688 else
689 param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = false;
690
691 RegisterSampler8x8AVSTable(&sampler8x8Entry->sampler8x8State,
692 ¶m->sampler8x8State.avsParam.avsTable);
693
694 sampler8x8Entry->sampler8x8State.stateType = CM_SAMPLER8X8_AVS;
695 }
696 else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_MISC)
697 {
698 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
699 {
700 if (!state->samplerTable[i].bInUse)
701 {
702 samplerEntry = &state->samplerTable[i];
703 param->handle = (uint32_t)i << 16;
704 samplerEntry->bInUse = true;
705 break;
706 }
707 }
708
709 if ( samplerEntry == nullptr )
710 {
711 return MOS_STATUS_INVALID_HANDLE;
712 }
713 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_MISC;
714
715 samplerEntry->Misc.byteHeight = param->sampler8x8State.miscState.DW0.Height;
716 samplerEntry->Misc.byteWidth = param->sampler8x8State.miscState.DW0.Width;
717 samplerEntry->Misc.wRow[0] = param->sampler8x8State.miscState.DW0.Row0;
718 samplerEntry->Misc.wRow[1] = param->sampler8x8State.miscState.DW1.Row1;
719 samplerEntry->Misc.wRow[2] = param->sampler8x8State.miscState.DW1.Row2;
720 samplerEntry->Misc.wRow[3] = param->sampler8x8State.miscState.DW2.Row3;
721 samplerEntry->Misc.wRow[4] = param->sampler8x8State.miscState.DW2.Row4;
722 samplerEntry->Misc.wRow[5] = param->sampler8x8State.miscState.DW3.Row5;
723 samplerEntry->Misc.wRow[6] = param->sampler8x8State.miscState.DW3.Row6;
724 samplerEntry->Misc.wRow[7] = param->sampler8x8State.miscState.DW4.Row7;
725 samplerEntry->Misc.wRow[8] = param->sampler8x8State.miscState.DW4.Row8;
726 samplerEntry->Misc.wRow[9] = param->sampler8x8State.miscState.DW5.Row9;
727 samplerEntry->Misc.wRow[10] = param->sampler8x8State.miscState.DW5.Row10;
728 samplerEntry->Misc.wRow[11] = param->sampler8x8State.miscState.DW6.Row11;
729 samplerEntry->Misc.wRow[12] = param->sampler8x8State.miscState.DW6.Row12;
730 samplerEntry->Misc.wRow[13] = param->sampler8x8State.miscState.DW7.Row13;
731 samplerEntry->Misc.wRow[14] = param->sampler8x8State.miscState.DW7.Row14;
732 }
733 else if (param->sampler8x8State.stateType == CM_SAMPLER8X8_CONV)
734 {
735 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
736 {
737 if (!state->samplerTable[i].bInUse) {
738 samplerEntry = &state->samplerTable[i];
739 param->handle = (uint32_t)i << 16;
740 samplerEntry->bInUse = true;
741 break;
742 }
743 }
744
745 if ( samplerEntry == nullptr )
746 {
747 return MOS_STATUS_INVALID_HANDLE;
748 }
749
750 MOS_ZeroMemory(&samplerEntry->Convolve, sizeof(samplerEntry->Convolve));
751
752 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_CONV;
753
754 samplerEntry->Convolve.ui8Height = param->sampler8x8State.convolveState.height;
755 samplerEntry->Convolve.ui8Width = param->sampler8x8State.convolveState.width;
756 samplerEntry->Convolve.ui8ScaledDownValue = param->sampler8x8State.convolveState.scaleDownValue;
757 samplerEntry->Convolve.ui8SizeOfTheCoefficient = param->sampler8x8State.convolveState.coeffSize;
758
759 samplerEntry->ElementType = MHW_Sampler64Elements;
760
761 for ( int i = 0; i < CM_NUM_CONVOLVE_ROWS_BDW; i++ )
762 {
763 MHW_SAMPLER_CONVOLVE_COEFF_TABLE *coeffTable = &(samplerEntry->Convolve.CoeffTable[i]);
764 CM_HAL_CONVOLVE_COEFF_TABLE *sourceTable = &(param->sampler8x8State.convolveState.table[i]);
765 if ( samplerEntry->Convolve.ui8SizeOfTheCoefficient == 1 )
766 {
767 coeffTable->wFilterCoeff[0] = FloatToS3_12( sourceTable->FilterCoeff_0_0 );
768 coeffTable->wFilterCoeff[1] = FloatToS3_12( sourceTable->FilterCoeff_0_1 );
769 coeffTable->wFilterCoeff[2] = FloatToS3_12( sourceTable->FilterCoeff_0_2 );
770 coeffTable->wFilterCoeff[3] = FloatToS3_12( sourceTable->FilterCoeff_0_3 );
771 coeffTable->wFilterCoeff[4] = FloatToS3_12( sourceTable->FilterCoeff_0_4 );
772 coeffTable->wFilterCoeff[5] = FloatToS3_12( sourceTable->FilterCoeff_0_5 );
773 coeffTable->wFilterCoeff[6] = FloatToS3_12( sourceTable->FilterCoeff_0_6 );
774 coeffTable->wFilterCoeff[7] = FloatToS3_12( sourceTable->FilterCoeff_0_7 );
775 coeffTable->wFilterCoeff[8] = FloatToS3_12( sourceTable->FilterCoeff_0_8 );
776 coeffTable->wFilterCoeff[9] = FloatToS3_12( sourceTable->FilterCoeff_0_9 );
777 coeffTable->wFilterCoeff[10] = FloatToS3_12( sourceTable->FilterCoeff_0_10 );
778 coeffTable->wFilterCoeff[11] = FloatToS3_12( sourceTable->FilterCoeff_0_11 );
779 coeffTable->wFilterCoeff[12] = FloatToS3_12( sourceTable->FilterCoeff_0_12 );
780 coeffTable->wFilterCoeff[13] = FloatToS3_12( sourceTable->FilterCoeff_0_13 );
781 coeffTable->wFilterCoeff[14] = FloatToS3_12( sourceTable->FilterCoeff_0_14 );
782 coeffTable->wFilterCoeff[15] = FloatToS3_12( sourceTable->FilterCoeff_0_15 );
783 }
784 else
785 {
786 coeffTable->wFilterCoeff[0] = FloatToS3_4( sourceTable->FilterCoeff_0_0 );
787 coeffTable->wFilterCoeff[1] = FloatToS3_4( sourceTable->FilterCoeff_0_1 );
788 coeffTable->wFilterCoeff[2] = FloatToS3_4( sourceTable->FilterCoeff_0_2 );
789 coeffTable->wFilterCoeff[3] = FloatToS3_4( sourceTable->FilterCoeff_0_3 );
790 coeffTable->wFilterCoeff[4] = FloatToS3_4( sourceTable->FilterCoeff_0_4 );
791 coeffTable->wFilterCoeff[5] = FloatToS3_4( sourceTable->FilterCoeff_0_5 );
792 coeffTable->wFilterCoeff[6] = FloatToS3_4( sourceTable->FilterCoeff_0_6 );
793 coeffTable->wFilterCoeff[7] = FloatToS3_4( sourceTable->FilterCoeff_0_7 );
794 coeffTable->wFilterCoeff[8] = FloatToS3_4( sourceTable->FilterCoeff_0_8 );
795 coeffTable->wFilterCoeff[9] = FloatToS3_4( sourceTable->FilterCoeff_0_9 );
796 coeffTable->wFilterCoeff[10] = FloatToS3_4( sourceTable->FilterCoeff_0_10 );
797 coeffTable->wFilterCoeff[11] = FloatToS3_4( sourceTable->FilterCoeff_0_11 );
798 coeffTable->wFilterCoeff[12] = FloatToS3_4( sourceTable->FilterCoeff_0_12 );
799 coeffTable->wFilterCoeff[13] = FloatToS3_4( sourceTable->FilterCoeff_0_13 );
800 coeffTable->wFilterCoeff[14] = FloatToS3_4( sourceTable->FilterCoeff_0_14 );
801 coeffTable->wFilterCoeff[15] = FloatToS3_4( sourceTable->FilterCoeff_0_15 );
802 }
803 }
804
805 }
806
807 return eStatus;
808 }
809
SetupHwDebugControl(PRENDERHAL_INTERFACE renderHal,PMOS_COMMAND_BUFFER cmdBuffer)810 MOS_STATUS CM_HAL_G8_X::SetupHwDebugControl(
811 PRENDERHAL_INTERFACE renderHal,
812 PMOS_COMMAND_BUFFER cmdBuffer)
813 {
814 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
815
816 if (!renderHal || !cmdBuffer)
817 {
818 return MOS_STATUS_NULL_POINTER;
819 }
820
821 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
822 MOS_ZeroMemory(&loadRegImm, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
823
824 // INSTPM, global debug enable
825 loadRegImm.dwRegister = INSTPM;
826 loadRegImm.dwData = (INSTPM_GLOBAL_DEBUG_ENABLE << 16) | INSTPM_GLOBAL_DEBUG_ENABLE;
827 eStatus = renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm);
828 if(eStatus != MOS_STATUS_SUCCESS)
829 {
830 return eStatus;
831 }
832
833 // TD_CTL, force thread breakpoint enable
834 // Also enable external exception, because the source-level debugger has to
835 // be able to interrupt runing EU threads.
836 loadRegImm.dwRegister = TD_CTL;
837 loadRegImm.dwData = TD_CTL_FORCE_THREAD_BKPT_ENABLE | TD_CTL_FORCE_EXT_EXCEPTION_ENABLE;
838 eStatus = renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd(cmdBuffer, &loadRegImm);
839
840 return eStatus;
841 }
842
RegisterSampler8x8AVSTable(PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,PCM_AVS_TABLE_STATE_PARAMS avsTable)843 MOS_STATUS CM_HAL_G8_X::RegisterSampler8x8AVSTable(
844 PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,
845 PCM_AVS_TABLE_STATE_PARAMS avsTable)
846 {
847 MOS_ZeroMemory(&sampler8x8AvsTable->mhwSamplerAvsTableParam, sizeof(sampler8x8AvsTable->mhwSamplerAvsTableParam));
848
849 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
850 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
851 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
852 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
853 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = MEDIASTATE_AVS_SHARPNESS_LEVEL_SHARP;
854
855 sampler8x8AvsTable->mhwSamplerAvsTableParam.bEnableRGBAdaptive = false;
856 sampler8x8AvsTable->mhwSamplerAvsTableParam.bAdaptiveFilterAllChannels = avsTable->adaptiveFilterAllChannels;
857 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering = true;
858 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering = true;
859
860 // Assign the coefficient table;
861 for (uint32_t i = 0; i < CM_NUM_HW_POLYPHASE_TABLES_G8; i++)
862 {
863 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[0] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_0;
864 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[1] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_1;
865 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[2] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_2;
866 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[3] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_3;
867 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[4] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_4;
868 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[5] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_5;
869 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[6] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_6;
870 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[7] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_7;
871
872 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[0] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_0;
873 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[1] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_1;
874 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[2] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_2;
875 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[3] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_3;
876 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[4] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_4;
877 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[5] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_5;
878 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[6] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_6;
879 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[7] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_7;
880
881 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[0] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_2;
882 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[1] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_3;
883 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[2] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_4;
884 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[3] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_5;
885
886 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[0] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_2;
887 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[1] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_3;
888 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[2] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_4;
889 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[3] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_5;
890 }
891
892 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = avsTable->defaultSharpLevel;
893 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering = avsTable->bypassXAF;
894 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering = avsTable->bypassYAF;
895
896 if (!avsTable->bypassXAF && !avsTable->bypassYAF) {
897 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels = avsTable->maxDerivative8Pixels;
898 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels = avsTable->maxDerivative4Pixels;
899 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = avsTable->transitionArea8Pixels;
900 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = avsTable->transitionArea4Pixels;
901 }
902
903 return MOS_STATUS_SUCCESS;
904 }
905
UpdatePlatformInfoFromPower(PCM_PLATFORM_INFO platformInfo,bool euSaturated)906 MOS_STATUS CM_HAL_G8_X::UpdatePlatformInfoFromPower(
907 PCM_PLATFORM_INFO platformInfo,
908 bool euSaturated)
909 {
910 PCM_HAL_STATE state = m_cmState;
911 PRENDERHAL_INTERFACE renderHal = state->renderHal;
912 CM_POWER_OPTION cmPower;
913
914 if ( state->requestSingleSlice ||
915 renderHal->bRequestSingleSlice ||
916 (state->powerOption.nSlice != 0 && state->powerOption.nSlice < platformInfo->numSlices))
917 {
918 platformInfo->numSubSlices = platformInfo->numSubSlices / platformInfo->numSlices;
919 if (state->powerOption.nSlice > 1)
920 {
921 platformInfo->numSubSlices *= state->powerOption.nSlice;
922 platformInfo->numSlices = state->powerOption.nSlice;
923 }
924 else
925 {
926 platformInfo->numSlices = 1;
927 }
928 }
929 else if (euSaturated)
930 {
931 // No SSD and EU Saturation, request maximum number of slices/subslices/EUs
932 cmPower.nSlice = (uint16_t)platformInfo->numSlices;
933 cmPower.nSubSlice = (uint16_t)platformInfo->numSubSlices;
934 cmPower.nEU = (uint16_t)(platformInfo->numEUsPerSubSlice * platformInfo->numSubSlices);
935
936 state->pfnSetPowerOption(state, &cmPower);
937 }
938
939 return MOS_STATUS_SUCCESS;
940 }
941
GetExpectedGtSystemConfig(PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)942 MOS_STATUS CM_HAL_G8_X::GetExpectedGtSystemConfig(
943 PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)
944 {
945 if (m_genGT == PLATFORM_INTEL_GT1)
946 {
947 expectedConfig->numSlices = BDW_GT1_MAX_NUM_SLICES;
948 expectedConfig->numSubSlices = BDW_GT1_MAX_NUM_SUBSLICES;
949 }
950 else if( m_genGT == PLATFORM_INTEL_GT1_5 )
951 {
952 expectedConfig->numSlices = BDW_GT1_5_MAX_NUM_SLICES;
953 expectedConfig->numSubSlices = BDW_GT1_5_MAX_NUM_SUBSLICES;
954 }
955 else if (m_genGT == PLATFORM_INTEL_GT2)
956 {
957 expectedConfig->numSlices = BDW_GT2_MAX_NUM_SLICES;
958 expectedConfig->numSubSlices = BDW_GT2_MAX_NUM_SUBSLICES;
959 }
960 else if (m_genGT == PLATFORM_INTEL_GT3)
961 {
962 expectedConfig->numSlices = BDW_GT3_MAX_NUM_SLICES;
963 expectedConfig->numSubSlices = BDW_GT3_MAX_NUM_SUBSLICES;
964 }
965 else
966 {
967 expectedConfig->numSlices = 0;
968 expectedConfig->numSubSlices = 0;
969 }
970
971 return MOS_STATUS_SUCCESS;
972 }
973
AllocateSIPCSRResource()974 MOS_STATUS CM_HAL_G8_X::AllocateSIPCSRResource()
975 {
976 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
977 if (Mos_ResourceIsNull(&m_cmState->sipResource.osResource))
978 {
979 eStatus = HalCm_AllocateSipResource(m_cmState); // create sip resource if it does not exist
980 }
981
982 return eStatus;
983 }
984
GetCopyKernelIsa(void * & isa,uint32_t & isaSize)985 MOS_STATUS CM_HAL_G8_X::GetCopyKernelIsa(void *&isa, uint32_t &isaSize)
986 {
987
988 isa = (void *)pGPUCopy_kernel_isa_gen8;
989 isaSize = iGPUCopy_kernel_isa_size_gen8;
990
991 return MOS_STATUS_SUCCESS;
992 }
993
GetInitKernelIsa(void * & isa,uint32_t & isaSize)994 MOS_STATUS CM_HAL_G8_X::GetInitKernelIsa(void *&isa, uint32_t &isaSize)
995 {
996 isa = (void *)pGPUInit_kernel_isa_Gen8;
997 isaSize = iGPUInit_kernel_isa_size_Gen8;
998
999 return MOS_STATUS_SUCCESS;
1000 }
1001
GetMediaWalkerMaxThreadWidth()1002 uint32_t CM_HAL_G8_X::GetMediaWalkerMaxThreadWidth()
1003 {
1004 return CM_MAX_THREADSPACE_WIDTH_FOR_MW;
1005 }
1006
GetMediaWalkerMaxThreadHeight()1007 uint32_t CM_HAL_G8_X::GetMediaWalkerMaxThreadHeight()
1008 {
1009 return CM_MAX_THREADSPACE_HEIGHT_FOR_MW;
1010 }
1011
GetHwSurfaceBTIInfo(PCM_SURFACE_BTI_INFO btiInfo)1012 MOS_STATUS CM_HAL_G8_X::GetHwSurfaceBTIInfo(
1013 PCM_SURFACE_BTI_INFO btiInfo)
1014 {
1015 if (btiInfo == nullptr)
1016 {
1017 return MOS_STATUS_NULL_POINTER;
1018 }
1019
1020 btiInfo->normalSurfaceStart = CM_NULL_SURFACE_BINDING_INDEX + 1;
1021 btiInfo->normalSurfaceEnd = CM_GLOBAL_SURFACE_INDEX_START - 1;
1022 btiInfo->reservedSurfaceStart = CM_GLOBAL_SURFACE_INDEX_START;
1023 btiInfo->reservedSurfaceEnd = CM_GLOBAL_SURFACE_INDEX_START + CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER;
1024
1025 return MOS_STATUS_SUCCESS;
1026 }
1027
SetSuggestedL3Conf(L3_SUGGEST_CONFIG l3Config)1028 MOS_STATUS CM_HAL_G8_X::SetSuggestedL3Conf(
1029 L3_SUGGEST_CONFIG l3Config)
1030 {
1031 if (l3Config >= sizeof(BDW_L3_PLANE)/sizeof(L3ConfigRegisterValues))
1032 {
1033 return MOS_STATUS_INVALID_PARAMETER;
1034 }
1035 return HalCm_SetL3Cache((L3ConfigRegisterValues *)&BDW_L3_PLANE[l3Config],
1036 &m_cmState->l3Settings);
1037 }
1038
GetGenStepInfo(char * & stepInfoStr)1039 MOS_STATUS CM_HAL_G8_X::GetGenStepInfo(char*& stepInfoStr)
1040 {
1041 const char *genSteppingInfoTable[] = { "A0", "XX", "XX", "B0", "D0", "E0", "F0",
1042 "G0", "G1", "H0", "J0" };
1043
1044 uint32_t genStepId = m_cmState->platform.usRevId;
1045
1046 uint32_t tablesize = sizeof(genSteppingInfoTable) / sizeof(char *);
1047
1048 if (genStepId < tablesize)
1049 {
1050 stepInfoStr = (char *)genSteppingInfoTable[genStepId];
1051 }
1052 else
1053 {
1054 stepInfoStr = nullptr;
1055 }
1056
1057 return MOS_STATUS_SUCCESS;
1058 }
1059
ColorCountSanityCheck(uint32_t colorCount)1060 int32_t CM_HAL_G8_X::ColorCountSanityCheck(uint32_t colorCount)
1061 {
1062 if (colorCount == CM_INVALID_COLOR_COUNT || colorCount > CM_THREADSPACE_MAX_COLOR_COUNT)
1063 {
1064 CM_ASSERTMESSAGE("Error: Invalid color count.");
1065 return CM_INVALID_ARG_VALUE;
1066 }
1067 return CM_SUCCESS;
1068 }
1069
MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)1070 bool CM_HAL_G8_X::MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)
1071 {
1072 if ( memCtrl > MEMORY_OBJECT_CONTROL_BDW_L3_LLC_ELLC_ALLOWED )
1073 {
1074 return false;
1075 }
1076
1077 return true;
1078 }
1079
GetConvSamplerIndex(PMHW_SAMPLER_STATE_PARAM samplerParam,char * samplerIndexTable,int32_t nSamp8X8Num,int32_t nSampConvNum)1080 int32_t CM_HAL_G8_X::GetConvSamplerIndex(
1081 PMHW_SAMPLER_STATE_PARAM samplerParam,
1082 char *samplerIndexTable,
1083 int32_t nSamp8X8Num,
1084 int32_t nSampConvNum)
1085 {
1086
1087 // 2D convolve BDW
1088 int32_t samplerIndex = 1 + (nSamp8X8Num + nSampConvNum) * 2;
1089 while (samplerIndexTable[samplerIndex] != CM_INVALID_INDEX)
1090 {
1091 samplerIndex += 2;
1092 }
1093
1094 return samplerIndex;
1095 }
1096
SetL3CacheConfig(const L3ConfigRegisterValues * values,PCmHalL3Settings cmHalL3Setting)1097 MOS_STATUS CM_HAL_G8_X::SetL3CacheConfig(
1098 const L3ConfigRegisterValues *values,
1099 PCmHalL3Settings cmHalL3Setting)
1100 {
1101 return HalCm_SetL3Cache( values, cmHalL3Setting );
1102 }
1103
GetSamplerParamInfoForSamplerType(PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,SamplerParam & samplerParam)1104 MOS_STATUS CM_HAL_G8_X::GetSamplerParamInfoForSamplerType(
1105 PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,
1106 SamplerParam &samplerParam)
1107 {
1108 const unsigned int samplerElementSize[MAX_ELEMENT_TYPE_COUNT] = {16, 32, 64, 128, 1024, 2048};
1109
1110 // gets element_type
1111 switch (mhwSamplerParam->SamplerType)
1112 {
1113 case MHW_SAMPLER_TYPE_CONV:
1114 case MHW_SAMPLER_TYPE_AVS:
1115 samplerParam.elementType = MHW_Sampler64Elements;
1116 break;
1117 case MHW_SAMPLER_TYPE_MISC:
1118 samplerParam.elementType = MHW_Sampler2Elements;
1119 break;
1120 case MHW_SAMPLER_TYPE_3D:
1121 samplerParam.elementType = MHW_Sampler1Element;
1122 break;
1123 default:
1124 samplerParam.elementType = MHW_Sampler1Element;
1125 break;
1126 }
1127
1128 // bti_stepping for convolve or AVS is 2, other cases are 1.
1129 if ((mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_CONV) ||
1130 (mhwSamplerParam->SamplerType == MHW_SAMPLER_TYPE_AVS))
1131 {
1132 samplerParam.btiStepping = 2;
1133 }
1134 else
1135 {
1136 samplerParam.btiStepping = 1;
1137 }
1138
1139 // gets multiplier
1140 samplerParam.btiMultiplier = samplerElementSize[samplerParam.elementType] / samplerParam.btiStepping;
1141
1142 // gets size
1143 samplerParam.size = samplerElementSize[samplerParam.elementType];
1144
1145 return MOS_STATUS_SUCCESS;
1146 }
1147
ConverTicksToNanoSecondsDefault(uint64_t ticks)1148 uint64_t CM_HAL_G8_X::ConverTicksToNanoSecondsDefault(uint64_t ticks)
1149 {
1150 return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G8);
1151 }
1152
1153