1 /*
2 * Copyright (c) 2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     mhw_render_xe_hpg_impl.h
24 //! \brief    MHW render interface common base for Xe_HPG
25 //! \details
26 //!
27 
28 #ifndef __MHW_RENDER_XE_HPG_IMPL_H__
29 #define __MHW_RENDER_XE_HPG_IMPL_H__
30 
31 #include "mhw_render_impl.h"
32 #include "mhw_render_hwcmd_xe_hpg.h"
33 #include "mhw_render_itf.h"
34 #include "mhw_impl.h"
35 
36 namespace mhw
37 {
38 namespace render
39 {
40 namespace xe_hpg
41 {
42 class Impl : public render::Impl<mhw::render::xe_hpg::Cmd>
43 {
44 public:
Impl(PMOS_INTERFACE osItf)45     Impl(PMOS_INTERFACE osItf) : base_t(osItf)
46     {
47         MHW_FUNCTION_ENTER;
48 
49         InitMmioRegisters();
50     };
51 
InitMmioRegisters()52     MOS_STATUS InitMmioRegisters() override
53     {
54         MHW_FUNCTION_ENTER;
55         MHW_MI_MMIOREGISTERS* mmioRegisters = &m_mmioRegisters;
56         mmioRegisters->generalPurposeRegister0LoOffset  = CS_GENERAL_PURPOSE_REGISTER0_LO_OFFSET;
57         mmioRegisters->generalPurposeRegister0HiOffset  = CS_GENERAL_PURPOSE_REGISTER0_HI_OFFSET;
58         mmioRegisters->generalPurposeRegister4LoOffset  = CS_GENERAL_PURPOSE_REGISTER4_LO_OFFSET;
59         mmioRegisters->generalPurposeRegister4HiOffset  = CS_GENERAL_PURPOSE_REGISTER4_HI_OFFSET;
60         mmioRegisters->generalPurposeRegister11LoOffset = CS_GENERAL_PURPOSE_REGISTER11_LO_OFFSET;
61         mmioRegisters->generalPurposeRegister11HiOffset = CS_GENERAL_PURPOSE_REGISTER11_HI_OFFSET;
62         mmioRegisters->generalPurposeRegister12LoOffset = CS_GENERAL_PURPOSE_REGISTER12_LO_OFFSET;
63         mmioRegisters->generalPurposeRegister12HiOffset = CS_GENERAL_PURPOSE_REGISTER12_HI_OFFSET;
64 
65         return MOS_STATUS_SUCCESS;
66     }
67 
EnableL3Caching(mhw::render::MHW_RENDER_ENGINE_L3_CACHE_SETTINGS * cacheSettings)68     MOS_STATUS EnableL3Caching(mhw::render::MHW_RENDER_ENGINE_L3_CACHE_SETTINGS *cacheSettings) override
69     {
70         // L3 Caching enabled by default
71         m_l3CacheConfig.bL3CachingEnabled               = true;
72         m_l3CacheConfig.dwRcsL3CacheAllocReg_Register   = M_MMIO_RCS_L3ALLOCREG;
73         m_l3CacheConfig.dwRcsL3CacheTcCntlReg_Register  = M_MMIO_RCS_TCCNTLREG;
74         m_l3CacheConfig.dwCcs0L3CacheAllocReg_Register  = M_MMIO_CCS0_L3ALLOCREG;
75         m_l3CacheConfig.dwCcs0L3CacheTcCntlReg_Register = M_MMIO_CCS0_TCCNTLREG;
76         if (cacheSettings)
77         {
78             MHW_RENDER_ENGINE_L3_CACHE_SETTINGS *cacheSettingsHpg = (MHW_RENDER_ENGINE_L3_CACHE_SETTINGS*)cacheSettings;
79             m_l3CacheConfig.dwL3CacheAllocReg_Setting  = cacheSettingsHpg->dwAllocReg;
80             m_l3CacheConfig.dwL3CacheTcCntlReg_Setting = cacheSettingsHpg->dwTcCntlReg;
81             // update default settings is needed from CM HAL call
82             if (cacheSettingsHpg->bUpdateDefault)
83             {
84                 m_l3CacheAllocRegisterValueDefault  = cacheSettingsHpg->dwAllocReg;
85                 m_l3CacheTcCntlRegisterValueDefault = cacheSettingsHpg->dwTcCntlReg;
86             }
87         }
88         else // Use the default setting if regkey is not set
89         {
90             // different default settings after CM HAL call
91             m_l3CacheConfig.dwL3CacheAllocReg_Setting  = m_l3CacheAllocRegisterValueDefault;
92             m_l3CacheConfig.dwL3CacheTcCntlReg_Setting = m_l3CacheTcCntlRegisterValueDefault;
93         }
94 
95         return MOS_STATUS_SUCCESS;
96     }
97 
SetL3Cache(PMOS_COMMAND_BUFFER cmdBuffer,std::shared_ptr<mhw::mi::Itf> miItf)98     MOS_STATUS SetL3Cache(PMOS_COMMAND_BUFFER cmdBuffer, std::shared_ptr<mhw::mi::Itf> miItf) override
99     {
100         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
101 
102         MHW_MI_CHK_NULL(cmdBuffer);
103         MHW_MI_CHK_NULL(miItf);
104 
105         if (m_l3CacheConfig.bL3CachingEnabled)
106         {
107             //L3CacheAllocReg_Setting and L3CacheTcCntlReg_Setting
108             if ((m_l3CacheConfig.dwL3CacheAllocReg_Setting != 0) || (m_l3CacheConfig.dwL3CacheTcCntlReg_Setting != 0))
109             {
110                 //update L3 AllocReg setting for RCS; CCS L3 AllocReg setting will be dulicated from RCS
111                 auto& l3CachePar = miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_IMM)();
112                 l3CachePar = {};
113                 l3CachePar.dwRegister = m_l3CacheConfig.dwRcsL3CacheAllocReg_Register;
114                 l3CachePar.dwData     = m_l3CacheConfig.dwL3CacheAllocReg_Setting;
115                 MHW_MI_CHK_STATUS(miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer));
116 
117                 //update L3 TcCntlReg setting for RCS; CCS L3 TcCntlReg setting will be dulicated from RCS
118                 auto& rcsL3CacheTcCntlPar = miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_IMM)();
119                 rcsL3CacheTcCntlPar = {};
120                 rcsL3CacheTcCntlPar.dwRegister = m_l3CacheConfig.dwRcsL3CacheTcCntlReg_Register;
121                 rcsL3CacheTcCntlPar.dwData     = m_l3CacheConfig.dwL3CacheTcCntlReg_Setting;
122                 MHW_MI_CHK_STATUS(miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer));
123 
124             }
125         }
126 
127         return eStatus;
128     }
129 
_MHW_SETCMD_OVERRIDE_DECL(_3DSTATE_CHROMA_KEY)130     _MHW_SETCMD_OVERRIDE_DECL(_3DSTATE_CHROMA_KEY)
131     {
132         _MHW_SETCMD_CALLBASE(_3DSTATE_CHROMA_KEY);
133 
134         cmd.DW1.ChromakeyTableIndex = params.dwIndex;
135         cmd.DW2.ChromakeyLowValue   = params.dwLow;
136         cmd.DW3.ChromakeyHighValue  = params.dwHigh;
137 
138         return MOS_STATUS_SUCCESS;
139     }
140 
_MHW_SETCMD_OVERRIDE_DECL(PIPELINE_SELECT)141     _MHW_SETCMD_OVERRIDE_DECL(PIPELINE_SELECT)
142     {
143         _MHW_SETCMD_CALLBASE(PIPELINE_SELECT);
144         cmd.DW0.PipelineSelection = (params.gpGpuPipe) ? cmd.PIPELINE_SELECTION_GPGPU : cmd.PIPELINE_SELECTION_MEDIA;
145         cmd.DW0.MaskBits = 0x13;
146         return MOS_STATUS_SUCCESS;
147     }
148 
_MHW_SETCMD_OVERRIDE_DECL(STATE_COMPUTE_MODE)149     _MHW_SETCMD_OVERRIDE_DECL(STATE_COMPUTE_MODE)
150     {
151         _MHW_SETCMD_CALLBASE(STATE_COMPUTE_MODE);
152         cmd.DW1.MaskBits         = 0xFFFF;
153         cmd.DW1.LargeGrfMode     = 0;
154         cmd.DW1.ForceNonCoherent = 2;
155 
156         return MOS_STATUS_SUCCESS;
157     }
158 
_MHW_SETCMD_OVERRIDE_DECL(CFE_STATE)159     _MHW_SETCMD_OVERRIDE_DECL(CFE_STATE)
160     {
161         _MHW_SETCMD_CALLBASE(CFE_STATE);
162 
163         cmd.DW3.MaximumNumberOfThreads     = params.dwMaximumNumberofThreads;
164         cmd.DW1_2.ScratchSpaceBuffer       = params.ScratchSpaceBuffer;
165         cmd.DW3.FusedEuDispatch            = false; // enabled Fused EU Mode
166         cmd.DW3.NumberOfWalkers            = params.NumberOfWalkers;
167         cmd.DW3.SingleSliceDispatchCcsMode = params.SingleSliceDispatchCcsMode;
168 
169         return MOS_STATUS_SUCCESS;
170     }
171 
_MHW_SETCMD_OVERRIDE_DECL(COMPUTE_WALKER)172     _MHW_SETCMD_OVERRIDE_DECL(COMPUTE_WALKER)
173     {
174         _MHW_SETCMD_CALLBASE(COMPUTE_WALKER);
175 
176         cmd.DW2.IndirectDataLength = params.IndirectDataLength;
177         cmd.DW3.IndirectDataStartAddress = params.IndirectDataStartAddress >> MHW_COMPUTE_INDIRECT_SHIFT;
178 
179         cmd.DW4.SIMDSize = mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::SIMD_SIZE_SIMD32;
180 
181         cmd.DW5.ExecutionMask = 0xffffffff;
182         cmd.DW6.LocalXMaximum = params.ThreadWidth - 1;
183         cmd.DW6.LocalYMaximum = params.ThreadHeight - 1;
184         cmd.DW6.LocalZMaximum = params.ThreadDepth - 1;
185 
186         cmd.DW7.ThreadGroupIDXDimension = params.GroupWidth;
187         cmd.DW8.ThreadGroupIDYDimension = params.GroupHeight;
188         cmd.DW9.ThreadGroupIDZDimension = params.GroupDepth;
189         cmd.DW10.ThreadGroupIDStartingX = params.GroupStartingX;
190         cmd.DW11.ThreadGroupIDStartingY = params.GroupStartingY;
191         cmd.DW12.ThreadGroupIDStartingZ = params.GroupStartingZ;
192 
193         cmd.interface_descriptor_data.DW0_1.KernelStartPointer = params.dwKernelOffset >> MHW_KERNEL_OFFSET_SHIFT;
194         cmd.interface_descriptor_data.DW3.SamplerCount = params.dwSamplerCount;
195         cmd.interface_descriptor_data.DW3.SamplerStatePointer = params.dwSamplerOffset >> MHW_SAMPLER_SHIFT;
196         cmd.interface_descriptor_data.DW4.BindingTablePointer = MOS_ROUNDUP_SHIFT(params.dwBindingTableOffset, MHW_BINDING_TABLE_ID_SHIFT);
197         cmd.interface_descriptor_data.DW5.NumberOfThreadsInGpgpuThreadGroup = params.dwNumberofThreadsInGPGPUGroup;
198         cmd.interface_descriptor_data.DW5.SharedLocalMemorySize = params.dwSharedLocalMemorySize;
199         if (params.dwSharedLocalMemorySize > 0)
200         {
201             cmd.interface_descriptor_data.DW6_7.PreferredSlmAllocationSizePerSubslice = mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K;
202         }
203         else  // if (params.dwSharedLocalMemorySize == 0)
204         {
205             cmd.interface_descriptor_data.DW6_7.PreferredSlmAllocationSizePerSubslice = params.forcePreferredSLMZero ?
206                 mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODESMAX :
207                 mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K;
208         }
209         // when Barriers is not 0, the EU fusion will close.
210         // Assigns barrier count.
211         if (params.bBarrierEnable)
212         {   // Bits [28:30] represent the number of barriers.
213             cmd.interface_descriptor_data.DW5.Reserved188 = 1;
214         }
215 
216         if (nullptr != params.postsyncResource)
217         {
218             MHW_RESOURCE_PARAMS resourceParams = {};
219 
220             InitMocsParams(resourceParams, &cmd.postsync_data.DW0.Value, 5, 10);
221             resourceParams.presResource = params.postsyncResource;
222             resourceParams.pdwCmd = cmd.postsync_data.DW1_2.Value;
223             resourceParams.dwLocationInCmd = 24;
224             resourceParams.dwOffset = params.resourceOffset;
225             resourceParams.bIsWritable = true;
226             MHW_MI_CHK_STATUS(AddResourceToCmd(
227                 this->m_osItf,
228                 this->m_currentCmdBuf,
229                 &resourceParams));
230             cmd.postsync_data.DW0.Operation = mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::POSTSYNC_DATA_CMD::POSTSYNC_OPERATION_WRITE_TIMESTAMP;
231         }
232 
233         return MOS_STATUS_SUCCESS;
234     }
235 
_MHW_SETCMD_OVERRIDE_DECL(STATE_BASE_ADDRESS)236     _MHW_SETCMD_OVERRIDE_DECL(STATE_BASE_ADDRESS)
237     {
238         _MHW_SETCMD_CALLBASE(STATE_BASE_ADDRESS);
239 
240         cmd.DW3.L1CachePolicy                                   = params.l1CacheConfig;
241 
242         return MOS_STATUS_SUCCESS;
243     }
244 
245 protected:
246     using base_t = render::Impl<mhw::render::xe_hpg::Cmd>;
247     uint32_t    m_l3CacheTcCntlRegisterValueDefault = 0x80000080;
248     uint32_t    m_l3CacheAllocRegisterValueDefault  = 0xD0000020;
249 
250 MEDIA_CLASS_DEFINE_END(mhw__render__xe_hpg__Impl)
251 };
252 
253 }  // namespace xe_hpg
254 }  // namespace render
255 }  // namespace mhw
256 
257 #endif  // __MHW_RENDER_XE_HPG_IMPL_H__
258