xref: /aosp_15_r20/external/mesa3d/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx11addrlib.cpp
12 * @brief Contain the implementation for the Gfx11Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx11addrlib.h"
17 #include "gfx11_gb_reg.h"
18 
19 #include "amdgpu_asic_addr.h"
20 
21 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 
24 namespace Addr
25 {
26 /**
27 ************************************************************************************************************************
28 *   Gfx11HwlInit
29 *
30 *   @brief
31 *       Creates an Gfx11Lib object.
32 *
33 *   @return
34 *       Returns an Gfx11Lib object pointer.
35 ************************************************************************************************************************
36 */
Gfx11HwlInit(const Client * pClient)37 Addr::Lib* Gfx11HwlInit(const Client* pClient)
38 {
39     return V2::Gfx11Lib::CreateObj(pClient);
40 }
41 
42 namespace V2
43 {
44 
45 ////////////////////////////////////////////////////////////////////////////////////////////////////
46 //                               Static Const Member
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 
49 const SwizzleModeFlags Gfx11Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
50 {//Linear 256B  4KB  64KB  256KB   Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
51     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
52     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
53     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
54     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
55 
56     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
57     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
58     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
59     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
60 
61     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
62     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
63     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
64     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
65 
66     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
67     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70 
71     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
72     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
73     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
74     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
75 
76     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
77     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
78     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
79     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
80 
81     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
82     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
83     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
84     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
85 
86     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_Z_X
87     {{0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_S_X
88     {{0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_256KB_D_X
89     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_256KB_R_X
90     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
91 };
92 
93 const Dim3d Gfx11Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
94 
95 const Dim3d Gfx11Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}};
96 const Dim3d Gfx11Lib::Block64K_Log2_3d[]  = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
97 const Dim3d Gfx11Lib::Block4K_Log2_3d[]   = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
98 
99 /**
100 ************************************************************************************************************************
101 *   Gfx11Lib::Gfx11Lib
102 *
103 *   @brief
104 *       Constructor
105 *
106 ************************************************************************************************************************
107 */
Gfx11Lib(const Client * pClient)108 Gfx11Lib::Gfx11Lib(const Client* pClient)
109     :
110     Lib(pClient),
111     m_numPkrLog2(0),
112     m_numSaLog2(0),
113     m_colorBaseIndex(0),
114     m_htileBaseIndex(0),
115     m_dccBaseIndex(0)
116 {
117     memset(&m_settings, 0, sizeof(m_settings));
118     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
119 }
120 
121 /**
122 ************************************************************************************************************************
123 *   Gfx11Lib::~Gfx11Lib
124 *
125 *   @brief
126 *       Destructor
127 ************************************************************************************************************************
128 */
~Gfx11Lib()129 Gfx11Lib::~Gfx11Lib()
130 {
131 }
132 
133 /**
134 ************************************************************************************************************************
135 *   Gfx11Lib::HwlComputeHtileInfo
136 *
137 *   @brief
138 *       Interface function stub of AddrComputeHtilenfo
139 *
140 *   @return
141 *       ADDR_E_RETURNCODE
142 ************************************************************************************************************************
143 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const144 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileInfo(
145     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
146     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
147     ) const
148 {
149     ADDR_E_RETURNCODE ret = ADDR_OK;
150 
151     if ((pIn->swizzleMode != ADDR_SW_64KB_Z_X)  &&
152         (pIn->swizzleMode != ADDR_SW_256KB_Z_X) &&
153         (pIn->hTileFlags.pipeAligned != TRUE))
154     {
155         ret = ADDR_INVALIDPARAMS;
156     }
157     else
158     {
159         Dim3d         metaBlk     = {};
160         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataDepthStencil,
161                                                    ADDR_RSRC_TEX_2D,
162                                                    pIn->swizzleMode,
163                                                    0,
164                                                    0,
165                                                    TRUE,
166                                                    &metaBlk);
167 
168         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
169         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
170         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
171         pOut->metaBlkWidth  = metaBlk.w;
172         pOut->metaBlkHeight = metaBlk.h;
173 
174         if (pIn->numMipLevels > 1)
175         {
176             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
177 
178             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
179 
180             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
181             {
182                 UINT_32 mipWidth, mipHeight;
183 
184                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
185 
186                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
187                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
188 
189                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
190                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
191                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
192 
193                 if (pOut->pMipInfo != NULL)
194                 {
195                     pOut->pMipInfo[i].inMiptail = FALSE;
196                     pOut->pMipInfo[i].offset    = offset;
197                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
198                 }
199 
200                 offset += mipSliceSize;
201             }
202 
203             pOut->sliceSize          = offset;
204             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
205             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
206 
207             if (pOut->pMipInfo != NULL)
208             {
209                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
210                 {
211                     pOut->pMipInfo[i].inMiptail = TRUE;
212                     pOut->pMipInfo[i].offset    = 0;
213                     pOut->pMipInfo[i].sliceSize = 0;
214                 }
215 
216                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
217                 {
218                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
219                 }
220             }
221         }
222         else
223         {
224             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
225             const UINT_32 heightInM = pOut->height / metaBlk.h;
226 
227             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
228             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
229             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
230 
231             if (pOut->pMipInfo != NULL)
232             {
233                 pOut->pMipInfo[0].inMiptail = FALSE;
234                 pOut->pMipInfo[0].offset    = 0;
235                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
236             }
237         }
238 
239         // Get the HTILE address equation (copied from HtileAddrFromCoord).
240         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
241         const UINT_32  index         = m_htileBaseIndex;
242         const UINT_8* patIdxTable = GFX11_HTILE_PATIDX;
243 
244         ADDR_C_ASSERT(sizeof(GFX11_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
245         pOut->equation.gfx10_bits = (UINT_16 *)GFX11_HTILE_SW_PATTERN[patIdxTable[index]];
246     }
247 
248     return ret;
249 }
250 
251 /**
252 ************************************************************************************************************************
253 *   Gfx11Lib::HwlComputeDccInfo
254 *
255 *   @brief
256 *       Interface function to compute DCC key info
257 *
258 *   @return
259 *       ADDR_E_RETURNCODE
260 ************************************************************************************************************************
261 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const262 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo(
263     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
264     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
265     ) const
266 {
267     ADDR_E_RETURNCODE ret = ADDR_OK;
268 
269     if (IsLinear(pIn->swizzleMode))
270     {
271         ret = ADDR_INVALIDPARAMS;
272     }
273     else if (pIn->dccKeyFlags.pipeAligned &&
274              (IsStandardSwizzle(pIn->swizzleMode) ||
275               IsDisplaySwizzle(pIn->swizzleMode)))
276     {
277         ret = ADDR_INVALIDPARAMS;
278     }
279     else
280     {
281         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
282         const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
283         Dim3d         compBlock   = {};
284 
285         GetCompressedBlockSizeLog2(Gfx11DataColor,
286                                    pIn->resourceType,
287                                    pIn->swizzleMode,
288                                    elemLog2,
289                                    numFragLog2,
290                                    &compBlock);
291         pOut->compressBlkWidth  = 1 << compBlock.w;
292         pOut->compressBlkHeight = 1 << compBlock.h;
293         pOut->compressBlkDepth  = 1 << compBlock.d;
294 
295         if (ret == ADDR_OK)
296         {
297             Dim3d         metaBlk     = {};
298             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor,
299                                                        pIn->resourceType,
300                                                        pIn->swizzleMode,
301                                                        elemLog2,
302                                                        numFragLog2,
303                                                        pIn->dccKeyFlags.pipeAligned,
304                                                        &metaBlk);
305 
306             pOut->dccRamBaseAlign   = metaBlkSize;
307             pOut->metaBlkWidth      = metaBlk.w;
308             pOut->metaBlkHeight     = metaBlk.h;
309             pOut->metaBlkDepth      = metaBlk.d;
310             pOut->metaBlkSize       = metaBlkSize;
311 
312             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
313             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
314             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
315 
316             if (pIn->numMipLevels > 1)
317             {
318                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
319 
320                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
321 
322                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
323                 {
324                     UINT_32 mipWidth, mipHeight;
325 
326                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
327 
328                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
329                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
330 
331                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
332                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
333                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
334 
335                     if (pOut->pMipInfo != NULL)
336                     {
337                         pOut->pMipInfo[i].inMiptail = FALSE;
338                         pOut->pMipInfo[i].offset    = offset;
339                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
340                     }
341 
342                     offset += mipSliceSize;
343                 }
344 
345                 pOut->dccRamSliceSize    = offset;
346                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
347                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
348 
349                 if (pOut->pMipInfo != NULL)
350                 {
351                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
352                     {
353                         pOut->pMipInfo[i].inMiptail = TRUE;
354                         pOut->pMipInfo[i].offset    = 0;
355                         pOut->pMipInfo[i].sliceSize = 0;
356                     }
357 
358                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
359                     {
360                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
361                     }
362                 }
363             }
364             else
365             {
366                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
367                 const UINT_32 heightInM = pOut->height / metaBlk.h;
368 
369                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
370                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
371                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
372 
373                 if (pOut->pMipInfo != NULL)
374                 {
375                     pOut->pMipInfo[0].inMiptail = FALSE;
376                     pOut->pMipInfo[0].offset    = 0;
377                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
378                 }
379             }
380 
381             // Get the DCC address equation (copied from DccAddrFromCoord)
382             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
383             const UINT_32 numPipeLog2 = m_pipesLog2;
384             UINT_32       index       = m_dccBaseIndex + elemLog2;
385             const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
386                                         GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
387 
388             if (pIn->dccKeyFlags.pipeAligned)
389             {
390                 index += MaxNumOfBpp;
391 
392                 if (m_numPkrLog2 < 2)
393                 {
394                     index += m_pipesLog2 * MaxNumOfBpp;
395                 }
396                 else
397                 {
398                     // 4 groups for "m_numPkrLog2 < 2" case
399                     index += 4 * MaxNumOfBpp;
400 
401                     const UINT_32 dccPipePerPkr = 3;
402 
403                     index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
404                              (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
405                 }
406             }
407 
408             ADDR_C_ASSERT(sizeof(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
409             pOut->equation.gfx10_bits = (UINT_16*)GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]];
410         }
411     }
412 
413     return ret;
414 }
415 
416 /**
417 ************************************************************************************************************************
418 *   Gfx11Lib::HwlComputeHtileAddrFromCoord
419 *
420 *   @brief
421 *       Interface function stub of AddrComputeHtileAddrFromCoord
422 *
423 *   @return
424 *       ADDR_E_RETURNCODE
425 ************************************************************************************************************************
426 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)427 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileAddrFromCoord(
428     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
429     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
430 {
431     ADDR_E_RETURNCODE returnCode = ADDR_OK;
432 
433     if (pIn->numMipLevels > 1)
434     {
435         returnCode = ADDR_NOTIMPLEMENTED;
436     }
437     else
438     {
439         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
440         input.size            = sizeof(input);
441         input.hTileFlags      = pIn->hTileFlags;
442         input.depthFlags      = pIn->depthflags;
443         input.swizzleMode     = pIn->swizzleMode;
444         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
445         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
446         input.numSlices       = Max(pIn->numSlices,       1u);
447         input.numMipLevels    = 1;
448 
449         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
450         output.size = sizeof(output);
451 
452         returnCode = ComputeHtileInfo(&input, &output);
453 
454         if (returnCode == ADDR_OK)
455         {
456             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
457             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
458             const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
459             const UINT_8*  patIdxTable   = GFX11_HTILE_PATIDX;
460             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
461             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
462             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX11_HTILE_SW_PATTERN[patIdxTable[index]],
463                                                                            blkSizeLog2 + 1, // +1 for nibble offset
464                                                                            pIn->x,
465                                                                            pIn->y,
466                                                                            pIn->slice,
467                                                                            0);
468             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
469             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
470             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
471             const UINT_32 blkIndex = (yb * pb) + xb;
472             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
473 
474             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
475                          (blkIndex * (1 << blkSizeLog2)) +
476                          ((blkOffset >> 1) ^ pipeXor);
477         }
478     }
479 
480     return returnCode;
481 }
482 
483 /**
484 ************************************************************************************************************************
485 *   Gfx11Lib::HwlComputeHtileCoordFromAddr
486 *
487 *   @brief
488 *       Interface function stub of AddrComputeHtileCoordFromAddr
489 *
490 *   @return
491 *       ADDR_E_RETURNCODE
492 ************************************************************************************************************************
493 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)494 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeHtileCoordFromAddr(
495     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
496     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
497 {
498     ADDR_NOT_IMPLEMENTED();
499 
500     return ADDR_OK;
501 }
502 
503 /**
504 ************************************************************************************************************************
505 *   Gfx11Lib::HwlSupportComputeDccAddrFromCoord
506 *
507 *   @brief
508 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
509 *
510 *   @return
511 *       ADDR_E_RETURNCODE
512 ************************************************************************************************************************
513 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)514 ADDR_E_RETURNCODE Gfx11Lib::HwlSupportComputeDccAddrFromCoord(
515     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
516 {
517     ADDR_E_RETURNCODE returnCode = ADDR_OK;
518 
519     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
520         ((pIn->swizzleMode != ADDR_SW_64KB_R_X) &&
521          (pIn->swizzleMode != ADDR_SW_256KB_R_X)) ||
522         (pIn->dccKeyFlags.linear == TRUE) ||
523         (pIn->numFrags > 1) ||
524         (pIn->numMipLevels > 1) ||
525         (pIn->mipId > 0))
526     {
527         returnCode = ADDR_NOTSUPPORTED;
528     }
529     else if ((pIn->pitch == 0)         ||
530              (pIn->metaBlkWidth == 0)  ||
531              (pIn->metaBlkHeight == 0) ||
532              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
533     {
534         returnCode = ADDR_NOTSUPPORTED;
535     }
536 
537     return returnCode;
538 }
539 
540 /**
541 ************************************************************************************************************************
542 *   Gfx11Lib::HwlComputeDccAddrFromCoord
543 *
544 *   @brief
545 *       Interface function stub of AddrComputeDccAddrFromCoord
546 *
547 *   @return
548 *       N/A
549 ************************************************************************************************************************
550 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)551 VOID Gfx11Lib::HwlComputeDccAddrFromCoord(
552     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
553     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
554 {
555     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
556     const UINT_32 numPipeLog2 = m_pipesLog2;
557     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
558     UINT_32       index       = m_dccBaseIndex + elemLog2;
559     const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ?
560                                 GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX;
561 
562     if (pIn->dccKeyFlags.pipeAligned)
563     {
564         index += MaxNumOfBpp;
565 
566         if (m_numPkrLog2 < 2)
567         {
568             index += m_pipesLog2 * MaxNumOfBpp;
569         }
570         else
571         {
572             // 4 groups for "m_numPkrLog2 < 2" case
573             index += 4 * MaxNumOfBpp;
574 
575             const UINT_32 dccPipePerPkr = 3;
576 
577             index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
578                      (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
579         }
580     }
581 
582     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
583     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
584     const UINT_32  blkOffset   = ComputeOffsetFromSwizzlePattern(GFX11_DCC_R_X_SW_PATTERN[patIdxTable[index]],
585                                                                  blkSizeLog2 + 1, // +1 for nibble offset
586                                                                  pIn->x,
587                                                                  pIn->y,
588                                                                  pIn->slice,
589                                                                  0);
590     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
591     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
592     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
593     const UINT_32 blkIndex = (yb * pb) + xb;
594     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
595 
596     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
597                  (blkIndex * (1 << blkSizeLog2)) +
598                  ((blkOffset >> 1) ^ pipeXor);
599 }
600 
601 /**
602 ************************************************************************************************************************
603 *   Gfx11Lib::HwlInitGlobalParams
604 *
605 *   @brief
606 *       Initializes global parameters
607 *
608 *   @return
609 *       TRUE if all settings are valid
610 *
611 ************************************************************************************************************************
612 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)613 BOOL_32 Gfx11Lib::HwlInitGlobalParams(
614     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
615 {
616     BOOL_32              valid = TRUE;
617     GB_ADDR_CONFIG_GFX11 gbAddrConfig;
618 
619     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
620 
621     switch (gbAddrConfig.bits.NUM_PIPES)
622     {
623         case ADDR_CONFIG_1_PIPE:
624             m_pipes     = 1;
625             m_pipesLog2 = 0;
626             break;
627         case ADDR_CONFIG_2_PIPE:
628             m_pipes     = 2;
629             m_pipesLog2 = 1;
630             break;
631         case ADDR_CONFIG_4_PIPE:
632             m_pipes     = 4;
633             m_pipesLog2 = 2;
634             break;
635         case ADDR_CONFIG_8_PIPE:
636             m_pipes     = 8;
637             m_pipesLog2 = 3;
638             break;
639         case ADDR_CONFIG_16_PIPE:
640             m_pipes     = 16;
641             m_pipesLog2 = 4;
642             break;
643         case ADDR_CONFIG_32_PIPE:
644             m_pipes     = 32;
645             m_pipesLog2 = 5;
646             break;
647         case ADDR_CONFIG_64_PIPE:
648             m_pipes     = 64;
649             m_pipesLog2 = 6;
650             break;
651         default:
652             ADDR_ASSERT_ALWAYS();
653             valid = FALSE;
654             break;
655     }
656 
657     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
658     {
659         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
660             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
661             m_pipeInterleaveLog2  = 8;
662             break;
663         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
664             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
665             m_pipeInterleaveLog2  = 9;
666             break;
667         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
668             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
669             m_pipeInterleaveLog2  = 10;
670             break;
671         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
672             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
673             m_pipeInterleaveLog2  = 11;
674             break;
675         default:
676             ADDR_ASSERT_ALWAYS();
677             valid = FALSE;
678             break;
679     }
680 
681     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
682     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
683     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
684     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
685 
686     // These fields are deprecated on GFX11; they do nothing on HW.
687     m_maxCompFrag     = 1;
688     m_maxCompFragLog2 = 0;
689 
690     // Skip unaligned case
691     m_htileBaseIndex += MaxNumOfAA;
692 
693     m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
694     m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
695 
696     m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
697     m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
698 
699     ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
700 
701     if (m_numPkrLog2 >= 2)
702     {
703         m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
704         m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
705     }
706 
707     // There is no so-called VAR swizzle mode on GFX11 and instead there are 4 256KB swizzle modes. Here we treat 256KB
708     // swizzle mode as "VAR" swizzle mode for reusing exising facilities (e.g GetBlockSizeLog2()) provided by base class
709     m_blockVarSizeLog2 = 18;
710 
711     if (valid)
712     {
713         InitEquationTable();
714     }
715 
716     return valid;
717 }
718 
719 /**
720 ************************************************************************************************************************
721 *   Gfx11Lib::HwlConvertChipFamily
722 *
723 *   @brief
724 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
725 *   @return
726 *       ChipFamily
727 ************************************************************************************************************************
728 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)729 ChipFamily Gfx11Lib::HwlConvertChipFamily(
730     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
731     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
732 {
733     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
734 
735     switch (chipFamily)
736     {
737         case FAMILY_NV3:
738             if (ASICREV_IS_NAVI31_P(chipRevision))
739             {
740             }
741             if (ASICREV_IS_NAVI32_P(chipRevision))
742             {
743             }
744             if (ASICREV_IS_NAVI33_P(chipRevision))
745             {
746             }
747             break;
748 
749         case FAMILY_GFX1150:
750             {
751                 m_settings.isGfx1150 = 1;
752             }
753             break;
754         case FAMILY_GFX1103:
755             m_settings.isGfx1103 = 1;
756             break;
757         default:
758             ADDR_ASSERT(!"Unknown chip family");
759             break;
760     }
761 
762     m_configFlags.use32bppFor422Fmt = TRUE;
763 
764     return family;
765 }
766 
767 /**
768 ************************************************************************************************************************
769 *   Gfx11Lib::GetBlk256SizeLog2
770 *
771 *   @brief
772 *       Get block 256 size
773 *
774 *   @return
775 *       N/A
776 ************************************************************************************************************************
777 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const778 void Gfx11Lib::GetBlk256SizeLog2(
779     AddrResourceType resourceType,      ///< [in] Resource type
780     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
781     UINT_32          elemLog2,          ///< [in] element size log2
782     UINT_32          numSamplesLog2,    ///< [in] number of samples
783     Dim3d*           pBlock             ///< [out] block size
784     ) const
785 {
786     if (IsThin(resourceType, swizzleMode))
787     {
788         UINT_32 blockBits = 8 - elemLog2;
789 
790         // On GFX11, Z and R modes are the same thing.
791         if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
792         {
793             blockBits -= numSamplesLog2;
794         }
795 
796         pBlock->w = (blockBits >> 1) + (blockBits & 1);
797         pBlock->h = (blockBits >> 1);
798         pBlock->d = 0;
799     }
800     else
801     {
802         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
803 
804         UINT_32 blockBits = 8 - elemLog2;
805 
806         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
807         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
808         pBlock->h = (blockBits / 3);
809     }
810 }
811 
812 /**
813 ************************************************************************************************************************
814 *   Gfx11Lib::GetCompressedBlockSizeLog2
815 *
816 *   @brief
817 *       Get compress block size
818 *
819 *   @return
820 *       N/A
821 ************************************************************************************************************************
822 */
GetCompressedBlockSizeLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const823 void Gfx11Lib::GetCompressedBlockSizeLog2(
824     Gfx11DataType    dataType,          ///< [in] Data type
825     AddrResourceType resourceType,      ///< [in] Resource type
826     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
827     UINT_32          elemLog2,          ///< [in] element size log2
828     UINT_32          numSamplesLog2,    ///< [in] number of samples
829     Dim3d*           pBlock             ///< [out] block size
830     ) const
831 {
832     if (dataType == Gfx11DataColor)
833     {
834         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
835     }
836     else
837     {
838         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
839         pBlock->w = 3;
840         pBlock->h = 3;
841         pBlock->d = 0;
842     }
843 }
844 
845 /**
846 ************************************************************************************************************************
847 *   Gfx11Lib::GetMetaOverlapLog2
848 *
849 *   @brief
850 *       Get meta block overlap
851 *
852 *   @return
853 *       N/A
854 ************************************************************************************************************************
855 */
GetMetaOverlapLog2(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const856 INT_32 Gfx11Lib::GetMetaOverlapLog2(
857     Gfx11DataType    dataType,          ///< [in] Data type
858     AddrResourceType resourceType,      ///< [in] Resource type
859     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
860     UINT_32          elemLog2,          ///< [in] element size log2
861     UINT_32          numSamplesLog2     ///< [in] number of samples
862     ) const
863 {
864     Dim3d compBlock;
865     Dim3d microBlock;
866 
867     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
868     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
869 
870     const INT_32 blkSizeLog2    = GetBlockSizeLog2(swizzleMode);
871     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
872     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
873     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
874     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
875     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
876 
877     if (numPipesLog2 > 1)
878     {
879         overlap++;
880     }
881 
882     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
883     if ((elemLog2 == 4) && (numSamplesLog2 == 3) && (blkSizeLog2 == 16))
884     {
885         overlap--;
886     }
887     overlap += 16 - blkSizeLog2;
888     overlap = Max(overlap, 0);
889     return overlap;
890 }
891 
892 /**
893 ************************************************************************************************************************
894 *   Gfx11Lib::Get3DMetaOverlapLog2
895 *
896 *   @brief
897 *       Get 3d meta block overlap
898 *
899 *   @return
900 *       N/A
901 ************************************************************************************************************************
902 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const903 INT_32 Gfx11Lib::Get3DMetaOverlapLog2(
904     AddrResourceType resourceType,      ///< [in] Resource type
905     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
906     UINT_32          elemLog2           ///< [in] element size log2
907     ) const
908 {
909     Dim3d microBlock;
910     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
911 
912     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
913 
914     overlap++;
915 
916     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
917     {
918         overlap = 0;
919     }
920     return overlap;
921 }
922 
923 /**
924 ************************************************************************************************************************
925 *   Gfx11Lib::GetPipeRotateAmount
926 *
927 *   @brief
928 *       Get pipe rotate amount
929 *
930 *   @return
931 *       Pipe rotate amount
932 ************************************************************************************************************************
933 */
934 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const935 INT_32 Gfx11Lib::GetPipeRotateAmount(
936     AddrResourceType resourceType,      ///< [in] Resource type
937     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
938     ) const
939 {
940     INT_32 amount = 0;
941 
942     if ((m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
943     {
944         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
945                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
946     }
947 
948     return amount;
949 }
950 
951 /**
952 ************************************************************************************************************************
953 *   Gfx11Lib::GetMetaBlkSize
954 *
955 *   @brief
956 *       Get metadata block size
957 *
958 *   @return
959 *       Meta block size
960 ************************************************************************************************************************
961 */
GetMetaBlkSize(Gfx11DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const962 UINT_32 Gfx11Lib::GetMetaBlkSize(
963     Gfx11DataType    dataType,          ///< [in] Data type
964     AddrResourceType resourceType,      ///< [in] Resource type
965     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
966     UINT_32          elemLog2,          ///< [in] element size log2
967     UINT_32          numSamplesLog2,    ///< [in] number of samples
968     BOOL_32          pipeAlign,         ///< [in] pipe align
969     Dim3d*           pBlock             ///< [out] block size
970     ) const
971 {
972     INT_32 metablkSizeLog2;
973 
974     const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
975     const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
976     const INT_32 compBlkSizeLog2    = (dataType == Gfx11DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
977     const INT_32 metaBlkSamplesLog2 = numSamplesLog2;
978     const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
979     INT_32       numPipesLog2       = m_pipesLog2;
980 
981     if (IsThin(resourceType, swizzleMode))
982     {
983         if ((pipeAlign == FALSE) ||
984             (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
985             (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
986         {
987             if (pipeAlign)
988             {
989                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
990                 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
991             }
992             else
993             {
994                 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
995             }
996         }
997         else
998         {
999             if ((m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1000             {
1001                 numPipesLog2++;
1002             }
1003 
1004             INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1005 
1006             if (numPipesLog2 >= 4)
1007             {
1008                 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1009 
1010                 // In 16Bpe 8xaa, we have an extra overlap bit
1011                 if ((pipeRotateLog2 > 0)  &&
1012                     (elemLog2 == 4)       &&
1013                     (numSamplesLog2 == 3) &&
1014                     (IsZOrderSwizzle(swizzleMode) ||
1015                      IsRtOptSwizzle(swizzleMode)  ||
1016                      (GetEffectiveNumPipes() > 3)))
1017                 {
1018                     overlapLog2++;
1019                 }
1020 
1021                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1022                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1023             }
1024             else
1025             {
1026                 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1027             }
1028 
1029             if (dataType == Gfx11DataDepthStencil)
1030             {
1031                 // For htile surfaces, pad meta block size to 2K * num_pipes
1032                 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1033             }
1034 
1035             /* This chunk is not part of upstream addrlib. See !28268 */
1036             const INT_32 compFragLog2 = numSamplesLog2;
1037 
1038             if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1039             {
1040                 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1041 
1042                 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1043             }
1044             /* End of the non-upstream chunk. */
1045         }
1046 
1047         const INT_32 metablkBitsLog2 =
1048             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1049         pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1050         pBlock->h = 1 << (metablkBitsLog2 >> 1);
1051         pBlock->d = 1;
1052     }
1053     else
1054     {
1055         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1056 
1057         if (pipeAlign)
1058         {
1059             if ((m_pipesLog2 == m_numSaLog2 + 1) &&
1060                 (m_pipesLog2 > 1)                &&
1061                 IsRbAligned(resourceType, swizzleMode))
1062             {
1063                 numPipesLog2++;
1064             }
1065 
1066             const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1067 
1068             metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1069             metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1070             metablkSizeLog2 = Max(metablkSizeLog2, 12);
1071         }
1072         else
1073         {
1074             metablkSizeLog2 = 12;
1075         }
1076 
1077         const INT_32 metablkBitsLog2 =
1078             metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1079         pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1080         pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1081         pBlock->d = 1 << (metablkBitsLog2 / 3);
1082     }
1083 
1084     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1085 }
1086 
1087 /**
1088 ************************************************************************************************************************
1089 *   Gfx11Lib::ConvertSwizzlePatternToEquation
1090 *
1091 *   @brief
1092 *       Convert swizzle pattern to equation.
1093 *
1094 *   @return
1095 *       N/A
1096 ************************************************************************************************************************
1097 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1098 VOID Gfx11Lib::ConvertSwizzlePatternToEquation(
1099     UINT_32                elemLog2,  ///< [in] element bytes log2
1100     AddrResourceType       rsrcType,  ///< [in] resource type
1101     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1102     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1103     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1104     const
1105 {
1106     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1107     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1108 
1109     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1110     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1111     memset(pEquation, 0, sizeof(ADDR_EQUATION));
1112     pEquation->numBits            = blockSizeLog2;
1113     pEquation->numBitComponents   = pPatInfo->maxItemCount;
1114     pEquation->stackedDepthSlices = FALSE;
1115 
1116     for (UINT_32 i = 0; i < elemLog2; i++)
1117     {
1118         pEquation->addr[i].channel = 0;
1119         pEquation->addr[i].valid   = 1;
1120         pEquation->addr[i].index   = i;
1121     }
1122 
1123     if (IsXor(swMode) == FALSE)
1124     {
1125         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1126         {
1127             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1128 
1129             if (pSwizzle[i].x != 0)
1130             {
1131                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1132 
1133                 pEquation->addr[i].channel = 0;
1134                 pEquation->addr[i].valid   = 1;
1135                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1136             }
1137             else if (pSwizzle[i].y != 0)
1138             {
1139                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1140 
1141                 pEquation->addr[i].channel = 1;
1142                 pEquation->addr[i].valid   = 1;
1143                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1144             }
1145             else
1146             {
1147                 ADDR_ASSERT(pSwizzle[i].z != 0);
1148                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1149 
1150                 pEquation->addr[i].channel = 2;
1151                 pEquation->addr[i].valid   = 1;
1152                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1153             }
1154 
1155             pEquation->xor1[i].value = 0;
1156             pEquation->xor2[i].value = 0;
1157         }
1158     }
1159     else if (IsThin(rsrcType, swMode))
1160     {
1161         Dim3d dim;
1162         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1163 
1164         const UINT_32 blkXLog2 = Log2(dim.w);
1165         const UINT_32 blkYLog2 = Log2(dim.h);
1166         const UINT_32 blkXMask = dim.w - 1;
1167         const UINT_32 blkYMask = dim.h - 1;
1168 
1169         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT];
1170         UINT_32          xMask = 0;
1171         UINT_32          yMask = 0;
1172         UINT_32          bMask = (1 << elemLog2) - 1;
1173 
1174         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1175         {
1176             if (IsPow2(pSwizzle[i].value))
1177             {
1178                 if (pSwizzle[i].x != 0)
1179                 {
1180                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1181                     xMask |= pSwizzle[i].x;
1182 
1183                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1184 
1185                     ADDR_ASSERT(xLog2 < blkXLog2);
1186 
1187                     pEquation->addr[i].channel = 0;
1188                     pEquation->addr[i].valid   = 1;
1189                     pEquation->addr[i].index   = xLog2 + elemLog2;
1190                 }
1191                 else
1192                 {
1193                     ADDR_ASSERT(pSwizzle[i].y != 0);
1194                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1195                     yMask |= pSwizzle[i].y;
1196 
1197                     pEquation->addr[i].channel = 1;
1198                     pEquation->addr[i].valid   = 1;
1199                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1200 
1201                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1202                 }
1203 
1204                 swizzle[i].value = 0;
1205                 bMask |= 1 << i;
1206             }
1207             else
1208             {
1209                 if (pSwizzle[i].z != 0)
1210                 {
1211                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1212 
1213                     pEquation->xor2[i].channel = 2;
1214                     pEquation->xor2[i].valid   = 1;
1215                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1216                 }
1217 
1218                 swizzle[i].x = pSwizzle[i].x;
1219                 swizzle[i].y = pSwizzle[i].y;
1220                 swizzle[i].z = swizzle[i].s = 0;
1221 
1222                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1223 
1224                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1225 
1226                 if (xHi != 0)
1227                 {
1228                     ADDR_ASSERT(IsPow2(xHi));
1229                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1230 
1231                     pEquation->xor1[i].channel = 0;
1232                     pEquation->xor1[i].valid   = 1;
1233                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1234 
1235                     swizzle[i].x &= blkXMask;
1236                 }
1237 
1238                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1239 
1240                 if (yHi != 0)
1241                 {
1242                     ADDR_ASSERT(IsPow2(yHi));
1243 
1244                     if (xHi == 0)
1245                     {
1246                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1247                         pEquation->xor1[i].channel = 1;
1248                         pEquation->xor1[i].valid   = 1;
1249                         pEquation->xor1[i].index   = Log2(yHi);
1250                     }
1251                     else
1252                     {
1253                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1254                         pEquation->xor2[i].channel = 1;
1255                         pEquation->xor2[i].valid   = 1;
1256                         pEquation->xor2[i].index   = Log2(yHi);
1257                     }
1258 
1259                     swizzle[i].y &= blkYMask;
1260                 }
1261 
1262                 if (swizzle[i].value == 0)
1263                 {
1264                     bMask |= 1 << i;
1265                 }
1266             }
1267         }
1268 
1269         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1270         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1271 
1272         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1273 
1274         while (bMask != blockMask)
1275         {
1276             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1277             {
1278                 if ((bMask & (1 << i)) == 0)
1279                 {
1280                     if (IsPow2(swizzle[i].value))
1281                     {
1282                         if (swizzle[i].x != 0)
1283                         {
1284                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1285                             xMask |= swizzle[i].x;
1286 
1287                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1288 
1289                             ADDR_ASSERT(xLog2 < blkXLog2);
1290 
1291                             pEquation->addr[i].channel = 0;
1292                             pEquation->addr[i].valid   = 1;
1293                             pEquation->addr[i].index   = xLog2 + elemLog2;
1294                         }
1295                         else
1296                         {
1297                             ADDR_ASSERT(swizzle[i].y != 0);
1298                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1299                             yMask |= swizzle[i].y;
1300 
1301                             pEquation->addr[i].channel = 1;
1302                             pEquation->addr[i].valid   = 1;
1303                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1304 
1305                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1306                         }
1307 
1308                         swizzle[i].value = 0;
1309                         bMask |= 1 << i;
1310                     }
1311                     else
1312                     {
1313                         const UINT_32 x = swizzle[i].x & xMask;
1314                         const UINT_32 y = swizzle[i].y & yMask;
1315 
1316                         if (x != 0)
1317                         {
1318                             ADDR_ASSERT(IsPow2(x));
1319 
1320                             if (pEquation->xor1[i].value == 0)
1321                             {
1322                                 pEquation->xor1[i].channel = 0;
1323                                 pEquation->xor1[i].valid   = 1;
1324                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1325                             }
1326                             else
1327                             {
1328                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1329                                 pEquation->xor2[i].channel = 0;
1330                                 pEquation->xor2[i].valid   = 1;
1331                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1332                             }
1333                         }
1334 
1335                         if (y != 0)
1336                         {
1337                             ADDR_ASSERT(IsPow2(y));
1338 
1339                             if (pEquation->xor1[i].value == 0)
1340                             {
1341                                 pEquation->xor1[i].channel = 1;
1342                                 pEquation->xor1[i].valid   = 1;
1343                                 pEquation->xor1[i].index   = Log2(y);
1344                             }
1345                             else
1346                             {
1347                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1348                                 pEquation->xor2[i].channel = 1;
1349                                 pEquation->xor2[i].valid   = 1;
1350                                 pEquation->xor2[i].index   = Log2(y);
1351                             }
1352                         }
1353 
1354                         swizzle[i].x &= ~x;
1355                         swizzle[i].y &= ~y;
1356                     }
1357                 }
1358             }
1359         }
1360 
1361         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1362     }
1363     else
1364     {
1365         const Dim3d& blkDim = (blockSizeLog2 == Log2Size256K) ?
1366                               Block256K_Log2_3d[elemLog2] :
1367                               ((blockSizeLog2 == Log2Size4K) ? Block4K_Log2_3d[elemLog2] : Block64K_Log2_3d[elemLog2]);
1368 
1369         const UINT_32 blkXLog2 = blkDim.w;
1370         const UINT_32 blkYLog2 = blkDim.h;
1371         const UINT_32 blkZLog2 = blkDim.d;
1372         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1373         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1374         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1375 
1376         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1377         UINT_32          xMask = 0;
1378         UINT_32          yMask = 0;
1379         UINT_32          zMask = 0;
1380         UINT_32          bMask = (1 << elemLog2) - 1;
1381 
1382         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1383         {
1384             if (IsPow2(pSwizzle[i].value))
1385             {
1386                 if (pSwizzle[i].x != 0)
1387                 {
1388                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1389                     xMask |= pSwizzle[i].x;
1390 
1391                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1392 
1393                     ADDR_ASSERT(xLog2 < blkXLog2);
1394 
1395                     pEquation->addr[i].channel = 0;
1396                     pEquation->addr[i].valid   = 1;
1397                     pEquation->addr[i].index   = xLog2 + elemLog2;
1398                 }
1399                 else if (pSwizzle[i].y != 0)
1400                 {
1401                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1402                     yMask |= pSwizzle[i].y;
1403 
1404                     pEquation->addr[i].channel = 1;
1405                     pEquation->addr[i].valid   = 1;
1406                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1407 
1408                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1409                 }
1410                 else
1411                 {
1412                     ADDR_ASSERT(pSwizzle[i].z != 0);
1413                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1414                     zMask |= pSwizzle[i].z;
1415 
1416                     pEquation->addr[i].channel = 2;
1417                     pEquation->addr[i].valid   = 1;
1418                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1419 
1420                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1421                 }
1422 
1423                 swizzle[i].value = 0;
1424                 bMask |= 1 << i;
1425             }
1426             else
1427             {
1428                 swizzle[i].x = pSwizzle[i].x;
1429                 swizzle[i].y = pSwizzle[i].y;
1430                 swizzle[i].z = pSwizzle[i].z;
1431                 swizzle[i].s = 0;
1432 
1433                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1434 
1435                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1436                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1437                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1438 
1439                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1440 
1441                 if (xHi != 0)
1442                 {
1443                     ADDR_ASSERT(IsPow2(xHi));
1444                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1445 
1446                     pEquation->xor1[i].channel = 0;
1447                     pEquation->xor1[i].valid   = 1;
1448                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1449 
1450                     swizzle[i].x &= blkXMask;
1451                 }
1452 
1453                 if (yHi != 0)
1454                 {
1455                     ADDR_ASSERT(IsPow2(yHi));
1456 
1457                     if (pEquation->xor1[i].value == 0)
1458                     {
1459                         pEquation->xor1[i].channel = 1;
1460                         pEquation->xor1[i].valid   = 1;
1461                         pEquation->xor1[i].index   = Log2(yHi);
1462                     }
1463                     else
1464                     {
1465                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1466                         pEquation->xor2[i].channel = 1;
1467                         pEquation->xor2[i].valid   = 1;
1468                         pEquation->xor2[i].index   = Log2(yHi);
1469                     }
1470 
1471                     swizzle[i].y &= blkYMask;
1472                 }
1473 
1474                 if (zHi != 0)
1475                 {
1476                     ADDR_ASSERT(IsPow2(zHi));
1477 
1478                     if (pEquation->xor1[i].value == 0)
1479                     {
1480                         pEquation->xor1[i].channel = 2;
1481                         pEquation->xor1[i].valid   = 1;
1482                         pEquation->xor1[i].index   = Log2(zHi);
1483                     }
1484                     else
1485                     {
1486                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1487                         pEquation->xor2[i].channel = 2;
1488                         pEquation->xor2[i].valid   = 1;
1489                         pEquation->xor2[i].index   = Log2(zHi);
1490                     }
1491 
1492                     swizzle[i].z &= blkZMask;
1493                 }
1494 
1495                 if (swizzle[i].value == 0)
1496                 {
1497                     bMask |= 1 << i;
1498                 }
1499             }
1500         }
1501 
1502         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1503         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1504 
1505         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1506 
1507         while (bMask != blockMask)
1508         {
1509             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1510             {
1511                 if ((bMask & (1 << i)) == 0)
1512                 {
1513                     if (IsPow2(swizzle[i].value))
1514                     {
1515                         if (swizzle[i].x != 0)
1516                         {
1517                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1518                             xMask |= swizzle[i].x;
1519 
1520                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1521 
1522                             ADDR_ASSERT(xLog2 < blkXLog2);
1523 
1524                             pEquation->addr[i].channel = 0;
1525                             pEquation->addr[i].valid   = 1;
1526                             pEquation->addr[i].index   = xLog2 + elemLog2;
1527                         }
1528                         else if (swizzle[i].y != 0)
1529                         {
1530                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1531                             yMask |= swizzle[i].y;
1532 
1533                             pEquation->addr[i].channel = 1;
1534                             pEquation->addr[i].valid   = 1;
1535                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1536 
1537                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1538                         }
1539                         else
1540                         {
1541                             ADDR_ASSERT(swizzle[i].z != 0);
1542                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1543                             zMask |= swizzle[i].z;
1544 
1545                             pEquation->addr[i].channel = 2;
1546                             pEquation->addr[i].valid   = 1;
1547                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1548 
1549                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1550                         }
1551 
1552                         swizzle[i].value = 0;
1553                         bMask |= 1 << i;
1554                     }
1555                     else
1556                     {
1557                         const UINT_32 x = swizzle[i].x & xMask;
1558                         const UINT_32 y = swizzle[i].y & yMask;
1559                         const UINT_32 z = swizzle[i].z & zMask;
1560 
1561                         if (x != 0)
1562                         {
1563                             ADDR_ASSERT(IsPow2(x));
1564 
1565                             if (pEquation->xor1[i].value == 0)
1566                             {
1567                                 pEquation->xor1[i].channel = 0;
1568                                 pEquation->xor1[i].valid   = 1;
1569                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1570                             }
1571                             else
1572                             {
1573                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1574                                 pEquation->xor2[i].channel = 0;
1575                                 pEquation->xor2[i].valid   = 1;
1576                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1577                             }
1578                         }
1579 
1580                         if (y != 0)
1581                         {
1582                             ADDR_ASSERT(IsPow2(y));
1583 
1584                             if (pEquation->xor1[i].value == 0)
1585                             {
1586                                 pEquation->xor1[i].channel = 1;
1587                                 pEquation->xor1[i].valid   = 1;
1588                                 pEquation->xor1[i].index   = Log2(y);
1589                             }
1590                             else
1591                             {
1592                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1593                                 pEquation->xor2[i].channel = 1;
1594                                 pEquation->xor2[i].valid   = 1;
1595                                 pEquation->xor2[i].index   = Log2(y);
1596                             }
1597                         }
1598 
1599                         if (z != 0)
1600                         {
1601                             ADDR_ASSERT(IsPow2(z));
1602 
1603                             if (pEquation->xor1[i].value == 0)
1604                             {
1605                                 pEquation->xor1[i].channel = 2;
1606                                 pEquation->xor1[i].valid   = 1;
1607                                 pEquation->xor1[i].index   = Log2(z);
1608                             }
1609                             else
1610                             {
1611                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1612                                 pEquation->xor2[i].channel = 2;
1613                                 pEquation->xor2[i].valid   = 1;
1614                                 pEquation->xor2[i].index   = Log2(z);
1615                             }
1616                         }
1617 
1618                         swizzle[i].x &= ~x;
1619                         swizzle[i].y &= ~y;
1620                         swizzle[i].z &= ~z;
1621                     }
1622                 }
1623             }
1624         }
1625 
1626         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1627     }
1628 }
1629 
1630 /**
1631 ************************************************************************************************************************
1632 *   Gfx11Lib::InitEquationTable
1633 *
1634 *   @brief
1635 *       Initialize Equation table.
1636 *
1637 *   @return
1638 *       N/A
1639 ************************************************************************************************************************
1640 */
InitEquationTable()1641 VOID Gfx11Lib::InitEquationTable()
1642 {
1643     memset(m_equationTable, 0, sizeof(m_equationTable));
1644 
1645     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1646     {
1647         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1648 
1649         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1650         {
1651             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1652 
1653             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1654             {
1655                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
1656                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1657 
1658                 if (pPatInfo != NULL)
1659                 {
1660                     ADDR_ASSERT(IsValidSwMode(swMode));
1661 
1662                     if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
1663                     {
1664                         ADDR_EQUATION equation = {};
1665 
1666                         // Passing in pPatInfo to get the addr equation
1667                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1668 
1669                         equationIndex = m_numEquations;
1670                         ADDR_ASSERT(equationIndex < EquationTableSize);
1671                         // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
1672                         m_equationTable[equationIndex] = equation;
1673                         // Increment m_numEquations
1674                         m_numEquations++;
1675                     }
1676                     else // There is no equationIndex
1677                     {
1678                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X
1679                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
1680                         ADDR_ASSERT(rsrcType == ADDR_RSRC_TEX_3D);
1681                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
1682                     }
1683                 }
1684 
1685                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
1686             }
1687         }
1688     }
1689 }
1690 
1691 /**
1692 ************************************************************************************************************************
1693 *   Gfx11Lib::HwlGetEquationIndex
1694 *
1695 *   @brief
1696 *       Interface function stub of GetEquationIndex
1697 *
1698 *   @return
1699 *       ADDR_E_RETURNCODE
1700 ************************************************************************************************************************
1701 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1702 UINT_32 Gfx11Lib::HwlGetEquationIndex(
1703     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
1704     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
1705     ) const
1706 {
1707     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
1708 
1709     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
1710         (pIn->resourceType == ADDR_RSRC_TEX_3D))
1711     {
1712         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
1713         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
1714         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
1715 
1716         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
1717     }
1718 
1719     if (pOut->pMipInfo != NULL)
1720     {
1721         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
1722         {
1723             pOut->pMipInfo[i].equationIndex = equationIdx;
1724         }
1725     }
1726 
1727     return equationIdx;
1728 }
1729 
1730 /**
1731 ************************************************************************************************************************
1732 *   Gfx11Lib::GetValidDisplaySwizzleModes
1733 *
1734 *   @brief
1735 *       Get valid swizzle modes mask for displayable surface
1736 *
1737 *   @return
1738 *       Valid swizzle modes mask for displayable surface
1739 ************************************************************************************************************************
1740 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const1741 UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes(
1742     UINT_32 bpp
1743     ) const
1744 {
1745     UINT_32 swModeMask = 0;
1746 
1747     if (bpp <= 64)
1748     {
1749         const ChipFamily  family = GetChipFamily();
1750 
1751         swModeMask = Dcn32SwModeMask;
1752 
1753         if (false
1754             || (m_settings.isGfx1103)
1755             || (m_settings.isGfx1150)
1756            )
1757         {
1758             // Not all GPUs support displaying with 256kB swizzle modes.
1759             swModeMask &= ~((1u << ADDR_SW_256KB_D_X) |
1760                             (1u << ADDR_SW_256KB_R_X));
1761         }
1762     }
1763 
1764     return swModeMask;
1765 }
1766 
1767 /**
1768 ************************************************************************************************************************
1769 *   Gfx11Lib::IsValidDisplaySwizzleMode
1770 *
1771 *   @brief
1772 *       Check if a swizzle mode is supported by display engine
1773 *
1774 *   @return
1775 *       TRUE is swizzle mode is supported by display engine
1776 ************************************************************************************************************************
1777 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const1778 BOOL_32 Gfx11Lib::IsValidDisplaySwizzleMode(
1779     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
1780     ) const
1781 {
1782     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
1783 
1784     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
1785 }
1786 
1787 /**
1788 ************************************************************************************************************************
1789 *   Gfx11Lib::GetMaxNumMipsInTail
1790 *
1791 *   @brief
1792 *       Return max number of mips in tails
1793 *
1794 *   @return
1795 *       Max number of mips in tails
1796 ************************************************************************************************************************
1797 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const1798 UINT_32 Gfx11Lib::GetMaxNumMipsInTail(
1799     UINT_32 blockSizeLog2,     ///< block size log2
1800     BOOL_32 isThin             ///< is thin or thick
1801     ) const
1802 {
1803     UINT_32 effectiveLog2 = blockSizeLog2;
1804 
1805     if (isThin == FALSE)
1806     {
1807         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
1808     }
1809 
1810     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
1811 }
1812 
1813 /**
1814 ************************************************************************************************************************
1815 *   Gfx11Lib::HwlComputePipeBankXor
1816 *
1817 *   @brief
1818 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
1819 *
1820 *   @return
1821 *       PipeBankXor value
1822 ************************************************************************************************************************
1823 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const1824 ADDR_E_RETURNCODE Gfx11Lib::HwlComputePipeBankXor(
1825     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
1826     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
1827     ) const
1828 {
1829     if (IsNonPrtXor(pIn->swizzleMode))
1830     {
1831         pOut->pipeBankXor = 0;
1832     }
1833     else
1834     {
1835         pOut->pipeBankXor = 0;
1836     }
1837 
1838     return ADDR_OK;
1839 }
1840 
1841 /**
1842 ************************************************************************************************************************
1843 *   Gfx11Lib::HwlComputeSlicePipeBankXor
1844 *
1845 *   @brief
1846 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1847 *
1848 *   @return
1849 *       PipeBankXor value
1850 ************************************************************************************************************************
1851 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1852 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSlicePipeBankXor(
1853     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
1854     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
1855     ) const
1856 {
1857     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1858 
1859     if (IsNonPrtXor(pIn->swizzleMode))
1860     {
1861         if (pIn->bpe == 0)
1862         {
1863             ADDR_ASSERT_ALWAYS();
1864 
1865             // Require a valid bytes-per-element value passed from client...
1866             returnCode = ADDR_INVALIDPARAMS;
1867         }
1868         else
1869         {
1870             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1871                                                                     pIn->resourceType,
1872                                                                     Log2(pIn->bpe >> 3),
1873                                                                     1);
1874 
1875             if (pPatInfo != NULL)
1876             {
1877                 ADDR_BIT_SETTING fullSwizzlePattern[20];
1878                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1879 
1880                 const UINT_32 pipeBankXorOffset =
1881                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
1882                                                     GetBlockSizeLog2(pIn->swizzleMode),
1883                                                     0,
1884                                                     0,
1885                                                     pIn->slice,
1886                                                     0);
1887 
1888                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1889 
1890                 // Should have no bit set under pipe interleave
1891                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1892 
1893                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1894             }
1895             else
1896             {
1897                 // Should never come here...
1898                 ADDR_NOT_IMPLEMENTED();
1899 
1900                 returnCode = ADDR_NOTSUPPORTED;
1901             }
1902         }
1903     }
1904     else
1905     {
1906         pOut->pipeBankXor = 0;
1907     }
1908 
1909     return returnCode;
1910 }
1911 
1912 /**
1913 ************************************************************************************************************************
1914 *   Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1915 *
1916 *   @brief
1917 *       Compute sub resource offset to support swizzle pattern
1918 *
1919 *   @return
1920 *       Offset
1921 ************************************************************************************************************************
1922 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1923 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1924     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
1925     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
1926     ) const
1927 {
1928     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
1929 
1930     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1931 
1932     return ADDR_OK;
1933 }
1934 
1935 /**
1936 ************************************************************************************************************************
1937 *   Gfx11Lib::HwlComputeNonBlockCompressedView
1938 *
1939 *   @brief
1940 *       Compute non-block-compressed view for a given mipmap level/slice.
1941 *
1942 *   @return
1943 *       ADDR_E_RETURNCODE
1944 ************************************************************************************************************************
1945 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1946 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView(
1947     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
1948     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
1949     ) const
1950 {
1951     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1952 
1953     if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
1954     {
1955         // Only thin swizzle mode can have a NonBC view...
1956         returnCode = ADDR_INVALIDPARAMS;
1957     }
1958     else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1959              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1960     {
1961         // Only support BC1~BC7, ASTC, or ETC2 for now...
1962         returnCode = ADDR_NOTSUPPORTED;
1963     }
1964     else
1965     {
1966         UINT_32 bcWidth, bcHeight;
1967         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1968 
1969         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1970         infoIn.flags        = pIn->flags;
1971         infoIn.swizzleMode  = pIn->swizzleMode;
1972         infoIn.resourceType = pIn->resourceType;
1973         infoIn.bpp          = bpp;
1974         infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
1975         infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
1976         infoIn.numSlices    = pIn->numSlices;
1977         infoIn.numMipLevels = pIn->numMipLevels;
1978         infoIn.numSamples   = 1;
1979         infoIn.numFrags     = 1;
1980 
1981         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
1982 
1983         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1984         infoOut.pMipInfo = mipInfo;
1985 
1986         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
1987 
1988         if (tiled)
1989         {
1990             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
1991         }
1992         else
1993         {
1994             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
1995         }
1996 
1997         if (returnCode == ADDR_OK)
1998         {
1999             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2000             subOffIn.swizzleMode      = infoIn.swizzleMode;
2001             subOffIn.resourceType     = infoIn.resourceType;
2002             subOffIn.slice            = pIn->slice;
2003             subOffIn.sliceSize        = infoOut.sliceSize;
2004             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2005             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2006 
2007             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2008 
2009             // For any mipmap level, move nonBc view base address by offset
2010             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2011             pOut->offset = subOffOut.offset;
2012 
2013             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2014             slicePbXorIn.bpe             = infoIn.bpp;
2015             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2016             slicePbXorIn.resourceType    = infoIn.resourceType;
2017             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2018             slicePbXorIn.slice           = pIn->slice;
2019 
2020             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2021 
2022             // For any mipmap level, nonBc view should use computed pbXor
2023             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2024             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2025 
2026             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2027             const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2028             const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2029 
2030             if (inTail)
2031             {
2032                 // For mipmap level that is in mip tail block, hack a lot of things...
2033                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2034                 // are fit in tail block:
2035 
2036                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2037                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2038 
2039                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2040                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2041 
2042                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2043                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2044 
2045                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2046                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2047             }
2048             // This check should cover at least mipId == 0
2049             else if (requestMipWidth << pIn->mipId == infoIn.width)
2050             {
2051                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2052                 // - only one mipmap level and mipId = 0
2053                 pOut->mipId        = 0;
2054                 pOut->numMipLevels = 1;
2055 
2056                 // (mip0) width = requestMipWidth
2057                 pOut->unalignedWidth = requestMipWidth;
2058 
2059                 // (mip0) height = requestMipHeight
2060                 pOut->unalignedHeight = requestMipHeight;
2061             }
2062             else
2063             {
2064                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2065                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2066                 // because single mip view may have different pitch value than original (multiple) mip view...
2067                 // A simple case would be:
2068                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2069                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2070                 //   mip0 width = 0x101/mip1 width = 0x80
2071                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2072                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2073 
2074                 // - 2 levels and mipId = 1
2075                 pOut->mipId        = 1;
2076                 pOut->numMipLevels = 2;
2077 
2078                 const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2079                 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2080 
2081                 const BOOL_32 needToAvoidInTail =
2082                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2083                     TRUE : FALSE;
2084 
2085                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2086                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2087 
2088                 const BOOL_32 needExtraWidth =
2089                     ((upperMipWidth < requestMipWidth * 2) ||
2090                      ((upperMipWidth == requestMipWidth * 2) &&
2091                       ((needToAvoidInTail == TRUE) ||
2092                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2093 
2094                 const BOOL_32 needExtraHeight =
2095                     ((upperMipHeight < requestMipHeight * 2) ||
2096                      ((upperMipHeight == requestMipHeight * 2) &&
2097                       ((needToAvoidInTail == TRUE) ||
2098                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2099 
2100                 // (mip0) width = requestLastMipLevelWidth
2101                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2102 
2103                 // (mip0) height = requestLastMipLevelHeight
2104                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2105             }
2106 
2107             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2108             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2109             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2110             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2111         }
2112     }
2113 
2114     return returnCode;
2115 }
2116 
2117 /**
2118 ************************************************************************************************************************
2119 *   Gfx11Lib::ValidateNonSwModeParams
2120 *
2121 *   @brief
2122 *       Validate compute surface info params except swizzle mode
2123 *
2124 *   @return
2125 *       TRUE if parameters are valid, FALSE otherwise
2126 ************************************************************************************************************************
2127 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2128 BOOL_32 Gfx11Lib::ValidateNonSwModeParams(
2129     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2130 {
2131     BOOL_32 valid = TRUE;
2132 
2133     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8))
2134     {
2135         ADDR_ASSERT_ALWAYS();
2136         valid = FALSE;
2137     }
2138     else if (pIn->flags.fmask == 1)
2139     {
2140         // There is no FMASK for GFX11 ASICs
2141         ADDR_ASSERT_ALWAYS();
2142         valid = FALSE;
2143     }
2144     else if (pIn->numSamples > 8)
2145     {
2146         // There is no EQAA support for GFX11 ASICs, so the max number of sample is 8
2147         ADDR_ASSERT_ALWAYS();
2148         valid = FALSE;
2149     }
2150     else if ((pIn->numFrags != 0) && (pIn->numSamples != pIn->numFrags))
2151     {
2152         // There is no EQAA support for GFX11 ASICs, so the number of sample has to be same as number of fragment
2153         ADDR_ASSERT_ALWAYS();
2154         valid = FALSE;
2155     }
2156 
2157     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2158     const AddrResourceType    rsrcType = pIn->resourceType;
2159     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2160     const BOOL_32             msaa     = (pIn->numSamples > 1);
2161     const BOOL_32             display  = flags.display;
2162     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2163     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2164     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2165     const BOOL_32             stereo   = flags.qbStereo;
2166 
2167     // Resource type check
2168     if (tex1d)
2169     {
2170         if (msaa || display || stereo)
2171         {
2172             ADDR_ASSERT_ALWAYS();
2173             valid = FALSE;
2174         }
2175     }
2176     else if (tex2d)
2177     {
2178         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2179         {
2180             ADDR_ASSERT_ALWAYS();
2181             valid = FALSE;
2182         }
2183     }
2184     else if (tex3d)
2185     {
2186         if (msaa || display || stereo)
2187         {
2188             ADDR_ASSERT_ALWAYS();
2189             valid = FALSE;
2190         }
2191     }
2192     else
2193     {
2194         ADDR_ASSERT_ALWAYS();
2195         valid = FALSE;
2196     }
2197 
2198     return valid;
2199 }
2200 
2201 /**
2202 ************************************************************************************************************************
2203 *   Gfx11Lib::ValidateSwModeParams
2204 *
2205 *   @brief
2206 *       Validate compute surface info related to swizzle mode
2207 *
2208 *   @return
2209 *       TRUE if parameters are valid, FALSE otherwise
2210 ************************************************************************************************************************
2211 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2212 BOOL_32 Gfx11Lib::ValidateSwModeParams(
2213     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2214 {
2215     BOOL_32 valid = TRUE;
2216 
2217     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2218     {
2219         ADDR_ASSERT_ALWAYS();
2220         valid = FALSE;
2221     }
2222     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2223     {
2224         ADDR_ASSERT_ALWAYS();
2225         valid = FALSE;
2226     }
2227 
2228     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2229     const AddrResourceType    rsrcType    = pIn->resourceType;
2230     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2231     const BOOL_32             msaa        = (pIn->numSamples > 1);
2232     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2233     const BOOL_32             color       = flags.color;
2234     const BOOL_32             display     = flags.display;
2235     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2236     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2237     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2238     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2239     const BOOL_32             linear      = IsLinear(swizzle);
2240     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2241     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2242     const BOOL_32             prt         = flags.prt;
2243 
2244     // Misc check
2245     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numSamples)))
2246     {
2247         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2248         ADDR_ASSERT_ALWAYS();
2249         valid = FALSE;
2250     }
2251 
2252     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2253     {
2254         ADDR_ASSERT_ALWAYS();
2255         valid = FALSE;
2256     }
2257 
2258     if ((pIn->bpp == 96) && (linear == FALSE))
2259     {
2260         ADDR_ASSERT_ALWAYS();
2261         valid = FALSE;
2262     }
2263 
2264     const UINT_32 swizzleMask = 1 << swizzle;
2265 
2266     // Resource type check
2267     if (tex1d)
2268     {
2269         if ((swizzleMask & Gfx11Rsrc1dSwModeMask) == 0)
2270         {
2271             ADDR_ASSERT_ALWAYS();
2272             valid = FALSE;
2273         }
2274     }
2275     else if (tex2d)
2276     {
2277         if ((swizzleMask & Gfx11Rsrc2dSwModeMask) == 0)
2278         {
2279             ADDR_ASSERT_ALWAYS();
2280             valid = FALSE;
2281         }
2282         else if (prt && ((swizzleMask & Gfx11Rsrc2dPrtSwModeMask) == 0))
2283         {
2284             ADDR_ASSERT_ALWAYS();
2285             valid = FALSE;
2286         }
2287     }
2288     else if (tex3d)
2289     {
2290         if (((swizzleMask & Gfx11Rsrc3dSwModeMask) == 0) ||
2291             (prt && ((swizzleMask & Gfx11Rsrc3dPrtSwModeMask) == 0)) ||
2292             (thin3d && ((swizzleMask & Gfx11Rsrc3dViewAs2dSwModeMask) == 0)))
2293         {
2294             ADDR_ASSERT_ALWAYS();
2295             valid = FALSE;
2296         }
2297     }
2298 
2299     // Swizzle type check
2300     if (linear)
2301     {
2302         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2303         {
2304             ADDR_ASSERT_ALWAYS();
2305             valid = FALSE;
2306         }
2307     }
2308     else if (IsZOrderSwizzle(swizzle))
2309     {
2310         if ((pIn->bpp > 64)                         ||
2311             (msaa && (color || (pIn->bpp > 32)))    ||
2312             ElemLib::IsBlockCompressed(pIn->format) ||
2313             ElemLib::IsMacroPixelPacked(pIn->format))
2314         {
2315             ADDR_ASSERT_ALWAYS();
2316             valid = FALSE;
2317         }
2318     }
2319     else if (IsStandardSwizzle(rsrcType, swizzle))
2320     {
2321         if (zbuffer || msaa)
2322         {
2323             ADDR_ASSERT_ALWAYS();
2324             valid = FALSE;
2325         }
2326     }
2327     else if (IsDisplaySwizzle(rsrcType, swizzle))
2328     {
2329         if (zbuffer || msaa)
2330         {
2331             ADDR_ASSERT_ALWAYS();
2332             valid = FALSE;
2333         }
2334     }
2335     else if (IsRtOptSwizzle(swizzle))
2336     {
2337         if (zbuffer)
2338         {
2339             ADDR_ASSERT_ALWAYS();
2340             valid = FALSE;
2341         }
2342     }
2343     else
2344     {
2345         ADDR_ASSERT_ALWAYS();
2346         valid = FALSE;
2347     }
2348 
2349     // Block type check
2350     if (blk256B)
2351     {
2352         if (zbuffer || tex3d || msaa)
2353         {
2354             ADDR_ASSERT_ALWAYS();
2355             valid = FALSE;
2356         }
2357     }
2358 
2359     return valid;
2360 }
2361 
2362 /**
2363 ************************************************************************************************************************
2364 *   Gfx11Lib::HwlComputeSurfaceInfoSanityCheck
2365 *
2366 *   @brief
2367 *       Compute surface info sanity check
2368 *
2369 *   @return
2370 *       Offset
2371 ************************************************************************************************************************
2372 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2373 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoSanityCheck(
2374     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2375     ) const
2376 {
2377     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2378 }
2379 
2380 /**
2381 ************************************************************************************************************************
2382 *   Gfx11Lib::HwlGetPreferredSurfaceSetting
2383 *
2384 *   @brief
2385 *       Internal function to get suggested surface information for cliet to use
2386 *
2387 *   @return
2388 *       ADDR_E_RETURNCODE
2389 ************************************************************************************************************************
2390 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2391 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting(
2392     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2393     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2394     ) const
2395 {
2396     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2397 
2398     if (pIn->flags.fmask)
2399     {
2400         // There is no FMASK for GFX11 ASICs.
2401         ADDR_ASSERT_ALWAYS();
2402 
2403         returnCode = ADDR_INVALIDPARAMS;
2404     }
2405     else
2406     {
2407         UINT_32 bpp    = pIn->bpp;
2408         UINT_32 width  = Max(pIn->width, 1u);
2409         UINT_32 height = Max(pIn->height, 1u);
2410 
2411         // Set format to INVALID will skip this conversion
2412         if (pIn->format != ADDR_FMT_INVALID)
2413         {
2414             ElemMode elemMode = ADDR_UNCOMPRESSED;
2415             UINT_32 expandX, expandY;
2416 
2417             // Get compression/expansion factors and element mode which indicates compression/expansion
2418             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2419                                                 &elemMode,
2420                                                 &expandX,
2421                                                 &expandY);
2422 
2423             UINT_32 basePitch = 0;
2424             GetElemLib()->AdjustSurfaceInfo(elemMode,
2425                                             expandX,
2426                                             expandY,
2427                                             &bpp,
2428                                             &basePitch,
2429                                             &width,
2430                                             &height);
2431         }
2432 
2433         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2434         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2435         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2436         const BOOL_32 msaa         = numSamples > 1;
2437 
2438         // Pre sanity check on non swizzle mode parameters
2439         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2440         localIn.flags        = pIn->flags;
2441         localIn.resourceType = pIn->resourceType;
2442         localIn.format       = pIn->format;
2443         localIn.bpp          = bpp;
2444         localIn.width        = width;
2445         localIn.height       = height;
2446         localIn.numSlices    = numSlices;
2447         localIn.numMipLevels = numMipLevels;
2448         localIn.numSamples   = numSamples;
2449         localIn.numFrags     = numSamples;
2450 
2451         if (ValidateNonSwModeParams(&localIn))
2452         {
2453             // Forbid swizzle mode(s) by client setting
2454             ADDR2_SWMODE_SET allowedSwModeSet = {};
2455             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx11LinearSwModeMask;
2456             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx11Blk256BSwModeMask;
2457             allowedSwModeSet.value |=
2458                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2459                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx11Blk4KBSwModeMask);
2460             allowedSwModeSet.value |=
2461                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2462                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick4KBSwModeMask : 0);
2463             allowedSwModeSet.value |=
2464                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2465                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask);
2466             allowedSwModeSet.value |=
2467                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2468                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick64KBSwModeMask : 0);
2469             allowedSwModeSet.value |=
2470                 pIn->forbiddenBlock.gfx11.thin256KB ? 0 :
2471                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask);
2472             allowedSwModeSet.value |=
2473                 pIn->forbiddenBlock.gfx11.thick256KB ? 0 :
2474                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx11Rsrc3dThick256KBSwModeMask : 0);
2475 
2476             if (pIn->preferredSwSet.value != 0)
2477             {
2478                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx11ZSwModeMask;
2479                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx11StandardSwModeMask;
2480                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx11DisplaySwModeMask;
2481                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx11RenderSwModeMask;
2482             }
2483 
2484             if (pIn->noXor)
2485             {
2486                 allowedSwModeSet.value &= ~Gfx11XorSwModeMask;
2487             }
2488 
2489             if (pIn->maxAlign > 0)
2490             {
2491                 if (pIn->maxAlign < Size256K)
2492                 {
2493                     allowedSwModeSet.value &= ~Gfx11Blk256KBSwModeMask;
2494                 }
2495 
2496                 if (pIn->maxAlign < Size64K)
2497                 {
2498                     allowedSwModeSet.value &= ~Gfx11Blk64KBSwModeMask;
2499                 }
2500 
2501                 if (pIn->maxAlign < Size4K)
2502                 {
2503                     allowedSwModeSet.value &= ~Gfx11Blk4KBSwModeMask;
2504                 }
2505 
2506                 if (pIn->maxAlign < Size256)
2507                 {
2508                     allowedSwModeSet.value &= ~Gfx11Blk256BSwModeMask;
2509                 }
2510             }
2511 
2512             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2513             switch (pIn->resourceType)
2514             {
2515                 case ADDR_RSRC_TEX_1D:
2516                     allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
2517                     break;
2518 
2519                 case ADDR_RSRC_TEX_2D:
2520                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
2521                     break;
2522 
2523                 case ADDR_RSRC_TEX_3D:
2524                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
2525 
2526                     if (pIn->flags.view3dAs2dArray)
2527                     {
2528                         // SW_LINEAR can be used for 3D thin images, including BCn image format.
2529                         allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
2530                     }
2531                     break;
2532 
2533                 default:
2534                     ADDR_ASSERT_ALWAYS();
2535                     allowedSwModeSet.value = 0;
2536                     break;
2537             }
2538 
2539             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2540                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2541                 (bpp > 64)                               ||
2542                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2543             {
2544                 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
2545             }
2546 
2547             if (pIn->format == ADDR_FMT_32_32_32)
2548             {
2549                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2550             }
2551 
2552             if (msaa)
2553             {
2554                 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
2555             }
2556 
2557             if (pIn->flags.depth || pIn->flags.stencil)
2558             {
2559                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2560             }
2561 
2562             if (pIn->flags.requireMetadata)
2563             {
2564                 // Linear images can never be compressed
2565                 allowedSwModeSet.value &= ~Gfx11LinearSwModeMask;
2566                 if (pIn->flags.color)
2567                 {
2568                     // 256B formats must not be pipe-aligned (can't use in CB)
2569                     allowedSwModeSet.value &= ~(Gfx11Blk256BSwModeMask);
2570                     // D/S formats must not be pipe-aligned
2571                     allowedSwModeSet.value &= ~(Gfx11DisplaySwModeMask | Gfx11StandardSwModeMask);
2572                 }
2573             }
2574 
2575             if (pIn->flags.display)
2576             {
2577                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2578             }
2579 
2580             if (allowedSwModeSet.value != 0)
2581             {
2582 #if DEBUG
2583                 // Post sanity check, at least AddrLib should accept the output generated by its own
2584                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
2585 
2586                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
2587                 {
2588                     if (validateSwModeSet & 1)
2589                     {
2590                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
2591                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
2592                     }
2593 
2594                     validateSwModeSet >>= 1;
2595                 }
2596 #endif
2597 
2598                 pOut->resourceType   = pIn->resourceType;
2599                 pOut->validSwModeSet = allowedSwModeSet;
2600                 pOut->canXor         = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
2601 
2602                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &(pOut->validBlockSet));
2603                 GetAllowedSwSet(allowedSwModeSet, &(pOut->validSwTypeSet));
2604 
2605                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
2606 
2607                 if (pOut->clientPreferredSwSet.value == 0)
2608                 {
2609                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
2610                 }
2611 
2612                 // Apply optional restrictions
2613                 if (pIn->flags.needEquation)
2614                 {
2615                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
2616                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
2617                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
2618                 }
2619 
2620                 if (allowedSwModeSet.value == Gfx11LinearSwModeMask)
2621                 {
2622                     pOut->swizzleMode = ADDR_SW_LINEAR;
2623                 }
2624                 else
2625                 {
2626                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
2627 
2628                     if ((height > 1) && (computeMinSize == FALSE))
2629                     {
2630                         // Always ignore linear swizzle mode if:
2631                         // 1. This is a (2D/3D) resource with height > 1
2632                         // 2. Client doesn't require computing minimize size
2633                         allowedSwModeSet.swLinear = 0;
2634                     }
2635 
2636                     ADDR2_BLOCK_SET allowedBlockSet = {};
2637                     GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
2638 
2639                     // Determine block size if there are 2 or more block type candidates
2640                     if (IsPow2(allowedBlockSet.value) == FALSE)
2641                     {
2642                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
2643 
2644                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
2645 
2646                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
2647                         {
2648                             swMode[AddrBlockThick4KB]   = ADDR_SW_4KB_S_X;
2649                             swMode[AddrBlockThin64KB]   = ADDR_SW_64KB_R_X;
2650                             swMode[AddrBlockThick64KB]  = ADDR_SW_64KB_S_X;
2651                             swMode[AddrBlockThin256KB]  = ADDR_SW_256KB_R_X;
2652                             swMode[AddrBlockThick256KB] = ADDR_SW_256KB_S_X;
2653                         }
2654                         else
2655                         {
2656                             swMode[AddrBlockMicro]     = ADDR_SW_256B_D;
2657                             swMode[AddrBlockThin4KB]   = ADDR_SW_4KB_D_X;
2658                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_D_X;
2659                             swMode[AddrBlockThin256KB] = ADDR_SW_256KB_D_X;
2660                         }
2661 
2662                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
2663 
2664                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
2665                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
2666                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
2667                         UINT_32       minSizeBlk         = AddrBlockMicro;
2668                         UINT_64       minSize            = 0;
2669 
2670                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
2671 
2672                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
2673                         {
2674                             if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2675                             {
2676                                 localIn.swizzleMode = swMode[i];
2677 
2678                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
2679                                 {
2680                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
2681                                 }
2682                                 else
2683                                 {
2684                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
2685                                 }
2686 
2687                                 if (returnCode == ADDR_OK)
2688                                 {
2689                                     padSize[i] = localOut.surfSize;
2690 
2691                                     if ((minSize == 0) ||
2692                                         Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
2693                                     {
2694                                         minSize    = padSize[i];
2695                                         minSizeBlk = i;
2696                                     }
2697                                 }
2698                                 else
2699                                 {
2700                                     ADDR_ASSERT_ALWAYS();
2701                                     break;
2702                                 }
2703                             }
2704                         }
2705 
2706                         if (pIn->memoryBudget > 1.0)
2707                         {
2708                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
2709                             // smaller-block type again in coming loop
2710                             switch (minSizeBlk)
2711                             {
2712                                 case AddrBlockThick256KB:
2713                                     allowedBlockSet.gfx11.thin256KB = 0;
2714                                 case AddrBlockThin256KB:
2715                                     allowedBlockSet.macroThick64KB = 0;
2716                                 case AddrBlockThick64KB:
2717                                     allowedBlockSet.macroThin64KB = 0;
2718                                 case AddrBlockThin64KB:
2719                                     allowedBlockSet.macroThick4KB = 0;
2720                                 case AddrBlockThick4KB:
2721                                     allowedBlockSet.macroThin4KB = 0;
2722                                 case AddrBlockThin4KB:
2723                                     allowedBlockSet.micro  = 0;
2724                                 case AddrBlockMicro:
2725                                     allowedBlockSet.linear = 0;
2726                                 case AddrBlockLinear:
2727                                     break;
2728 
2729                                 default:
2730                                     ADDR_ASSERT_ALWAYS();
2731                                     break;
2732                             }
2733 
2734                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
2735                             {
2736                                 if ((i != minSizeBlk) &&
2737                                     Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
2738                                 {
2739                                     if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
2740                                     {
2741                                         // Clear the block type if the memory waste is unacceptable
2742                                         allowedBlockSet.value &= ~(1u << (i - 1));
2743                                     }
2744                                 }
2745                             }
2746 
2747                             // Remove linear block type if 2 or more block types are allowed
2748                             if (IsPow2(allowedBlockSet.value) == FALSE)
2749                             {
2750                                 allowedBlockSet.linear = 0;
2751                             }
2752 
2753                             // Select the biggest allowed block type
2754                             minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
2755 
2756                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
2757                             {
2758                                 minSizeBlk = AddrBlockLinear;
2759                             }
2760                         }
2761 
2762                         switch (minSizeBlk)
2763                         {
2764                             case AddrBlockLinear:
2765                                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
2766                                 break;
2767 
2768                             case AddrBlockMicro:
2769                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2770                                 allowedSwModeSet.value &= Gfx11Blk256BSwModeMask;
2771                                 break;
2772 
2773                             case AddrBlockThin4KB:
2774                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
2775                                 allowedSwModeSet.value &= Gfx11Blk4KBSwModeMask;
2776                                 break;
2777 
2778                             case AddrBlockThick4KB:
2779                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2780                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick4KBSwModeMask;
2781                                 break;
2782 
2783                             case AddrBlockThin64KB:
2784                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2785                                                           Gfx11Rsrc3dThin64KBSwModeMask : Gfx11Blk64KBSwModeMask;
2786                                 break;
2787 
2788                             case AddrBlockThick64KB:
2789                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2790                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick64KBSwModeMask;
2791                                 break;
2792 
2793                             case AddrBlockThin256KB:
2794                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
2795                                                           Gfx11Rsrc3dThin256KBSwModeMask : Gfx11Blk256KBSwModeMask;
2796                                 break;
2797 
2798                             case AddrBlockThick256KB:
2799                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
2800                                 allowedSwModeSet.value &= Gfx11Rsrc3dThick256KBSwModeMask;
2801                                 break;
2802 
2803                             default:
2804                                 ADDR_ASSERT_ALWAYS();
2805                                 allowedSwModeSet.value = 0;
2806                                 break;
2807                         }
2808                     }
2809 
2810                     // Block type should be determined.
2811                     GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet);
2812                     ADDR_ASSERT(IsPow2(allowedBlockSet.value));
2813 
2814                     ADDR2_SWTYPE_SET allowedSwSet = {};
2815                     GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
2816 
2817                     // Determine swizzle type if there are 2 or more swizzle type candidates
2818                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
2819                     {
2820                         if (ElemLib::IsBlockCompressed(pIn->format))
2821                         {
2822                             if (allowedSwSet.sw_D)
2823                             {
2824                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2825                             }
2826                             else if (allowedSwSet.sw_S)
2827                             {
2828                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2829                             }
2830                             else
2831                             {
2832                                 ADDR_ASSERT(allowedSwSet.sw_R);
2833                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2834                             }
2835                         }
2836                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
2837                         {
2838                             if (allowedSwSet.sw_S)
2839                             {
2840                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2841                             }
2842                             else if (allowedSwSet.sw_D)
2843                             {
2844                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2845                             }
2846                             else
2847                             {
2848                                 ADDR_ASSERT(allowedSwSet.sw_R);
2849                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2850                             }
2851                         }
2852                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2853                         {
2854                             if (pIn->flags.color && allowedSwSet.sw_R)
2855                             {
2856                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2857                             }
2858                             else if (allowedSwSet.sw_S)
2859                             {
2860                                 allowedSwModeSet.value &= Gfx11StandardSwModeMask;
2861                             }
2862                             else if (allowedSwSet.sw_D)
2863                             {
2864                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2865                             }
2866                             else
2867                             {
2868                                 ADDR_ASSERT(allowedSwSet.sw_Z);
2869                                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2870                             }
2871                         }
2872                         else
2873                         {
2874                             if (allowedSwSet.sw_R)
2875                             {
2876                                 allowedSwModeSet.value &= Gfx11RenderSwModeMask;
2877                             }
2878                             else if (allowedSwSet.sw_D)
2879                             {
2880                                 allowedSwModeSet.value &= Gfx11DisplaySwModeMask;
2881                             }
2882                             else if (allowedSwSet.sw_Z)
2883                             {
2884                                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
2885                             }
2886                             else
2887                             {
2888                                 ADDR_ASSERT_ALWAYS();
2889                             }
2890                         }
2891 
2892                         // Swizzle type should be determined.
2893                         GetAllowedSwSet(allowedSwModeSet, &allowedSwSet);
2894                         ADDR_ASSERT(IsPow2(allowedSwSet.value));
2895                     }
2896 
2897                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
2898                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
2899                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
2900                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
2901                 }
2902             }
2903             else
2904             {
2905                 // Invalid combination...
2906                 ADDR_ASSERT_ALWAYS();
2907                 returnCode = ADDR_INVALIDPARAMS;
2908             }
2909         }
2910         else
2911         {
2912             // Invalid combination...
2913             ADDR_ASSERT_ALWAYS();
2914             returnCode = ADDR_INVALIDPARAMS;
2915         }
2916     }
2917 
2918     return returnCode;
2919 }
2920 
2921 /**
2922 ************************************************************************************************************************
2923 *   Gfx11Lib::HwlGetPossibleSwizzleModes
2924 *
2925 *   @brief
2926 *       Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from
2927 *
2928 *   @return
2929 *       ADDR_E_RETURNCODE
2930 ************************************************************************************************************************
2931 */
HwlGetPossibleSwizzleModes(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2932 ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes(
2933     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2934     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2935     ) const
2936 {
2937     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2938 
2939     if (pIn->flags.fmask)
2940     {
2941         // There is no FMASK for GFX11 ASICs.
2942         ADDR_ASSERT_ALWAYS();
2943 
2944         returnCode = ADDR_INVALIDPARAMS;
2945     }
2946     else
2947     {
2948         UINT_32 bpp    = pIn->bpp;
2949         UINT_32 width  = Max(pIn->width, 1u);
2950         UINT_32 height = Max(pIn->height, 1u);
2951 
2952         // Set format to INVALID will skip this conversion
2953         if (pIn->format != ADDR_FMT_INVALID)
2954         {
2955             ElemMode elemMode = ADDR_UNCOMPRESSED;
2956             UINT_32 expandX, expandY;
2957 
2958             // Get compression/expansion factors and element mode which indicates compression/expansion
2959             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2960                 &elemMode,
2961                 &expandX,
2962                 &expandY);
2963 
2964             UINT_32 basePitch = 0;
2965             GetElemLib()->AdjustSurfaceInfo(elemMode,
2966                 expandX,
2967                 expandY,
2968                 &bpp,
2969                 &basePitch,
2970                 &width,
2971                 &height);
2972         }
2973 
2974         const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
2975         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2976         const UINT_32 numSamples   = Max(pIn->numSamples, 1u);
2977         const BOOL_32 msaa         = numSamples > 1;
2978 
2979         // Pre sanity check on non swizzle mode parameters
2980         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2981         localIn.flags = pIn->flags;
2982         localIn.resourceType = pIn->resourceType;
2983         localIn.format = pIn->format;
2984         localIn.bpp = bpp;
2985         localIn.width = width;
2986         localIn.height = height;
2987         localIn.numSlices = numSlices;
2988         localIn.numMipLevels = numMipLevels;
2989         localIn.numSamples = numSamples;
2990         localIn.numFrags = numSamples;
2991 
2992         if (ValidateNonSwModeParams(&localIn))
2993         {
2994             // Allow appropriate swizzle modes by default
2995             ADDR2_SWMODE_SET allowedSwModeSet = {};
2996             allowedSwModeSet.value |= Gfx11LinearSwModeMask | Gfx11Blk256BSwModeMask;
2997             if (pIn->resourceType == ADDR_RSRC_TEX_3D)
2998             {
2999                 allowedSwModeSet.value |= Gfx11Rsrc3dThick4KBSwModeMask  |
3000                                           Gfx11Rsrc3dThin64KBSwModeMask  |
3001                                           Gfx11Rsrc3dThick64KBSwModeMask |
3002                                           Gfx11Rsrc3dThin256KBSwModeMask |
3003                                           Gfx11Rsrc3dThick256KBSwModeMask;
3004             }
3005             else
3006             {
3007                 allowedSwModeSet.value |= Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask | Gfx11Blk256KBSwModeMask;
3008             }
3009 
3010             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3011             switch (pIn->resourceType)
3012             {
3013             case ADDR_RSRC_TEX_1D:
3014                 allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask;
3015                 break;
3016 
3017             case ADDR_RSRC_TEX_2D:
3018                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask;
3019                 break;
3020 
3021             case ADDR_RSRC_TEX_3D:
3022                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask;
3023 
3024                 if (pIn->flags.view3dAs2dArray)
3025                 {
3026                     // SW_LINEAR can be used for 3D thin images, including BCn image format.
3027                     allowedSwModeSet.value &= Gfx11Rsrc3dViewAs2dSwModeMask;
3028                 }
3029                 break;
3030 
3031             default:
3032                 ADDR_ASSERT_ALWAYS();
3033                 allowedSwModeSet.value = 0;
3034                 break;
3035             }
3036 
3037             // TODO: figure out if following restrictions are correct on GFX11...
3038             if (ElemLib::IsBlockCompressed(pIn->format) ||
3039                 ElemLib::IsMacroPixelPacked(pIn->format) ||
3040                 (bpp > 64) ||
3041                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3042             {
3043                 allowedSwModeSet.value &= ~Gfx11ZSwModeMask;
3044             }
3045 
3046             if (pIn->format == ADDR_FMT_32_32_32)
3047             {
3048                 allowedSwModeSet.value &= Gfx11LinearSwModeMask;
3049             }
3050 
3051             if (msaa)
3052             {
3053                 allowedSwModeSet.value &= Gfx11MsaaSwModeMask;
3054             }
3055 
3056             if (pIn->flags.depth || pIn->flags.stencil)
3057             {
3058                 allowedSwModeSet.value &= Gfx11ZSwModeMask;
3059             }
3060 
3061             if (pIn->flags.requireMetadata)
3062             {
3063                 // Linear images can never be compressed
3064                 allowedSwModeSet.value &= ~Gfx11LinearSwModeMask;
3065                 if (pIn->flags.color)
3066                 {
3067                     // 256B formats must not be pipe-aligned (can't use in CB)
3068                     allowedSwModeSet.value &= ~(Gfx11Blk256BSwModeMask);
3069                     // D/S formats must not be pipe-aligned
3070                     allowedSwModeSet.value &= ~(Gfx11DisplaySwModeMask | Gfx11StandardSwModeMask);
3071                 }
3072             }
3073 
3074             if (pIn->flags.display)
3075             {
3076                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3077             }
3078 
3079             if (allowedSwModeSet.value != 0)
3080             {
3081 #if DEBUG
3082                 // Post sanity check, at least AddrLib should accept the output generated by its own
3083                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3084 
3085                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3086                 {
3087                     if (validateSwModeSet & 1)
3088                     {
3089                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3090                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
3091                     }
3092 
3093                     validateSwModeSet >>= 1;
3094                 }
3095 #endif
3096 
3097                 pOut->resourceType = pIn->resourceType;
3098                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3099 
3100                 if (pOut->clientPreferredSwSet.value == 0)
3101                 {
3102                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
3103                 }
3104 
3105                 if (pIn->flags.needEquation)
3106                 {
3107                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3108                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
3109                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3110                 }
3111 
3112                 pOut->validSwModeSet = allowedSwModeSet;
3113                 pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE;
3114             }
3115             else
3116             {
3117                 // Invalid combination...
3118                 ADDR_ASSERT_ALWAYS();
3119                 returnCode = ADDR_INVALIDPARAMS;
3120             }
3121         }
3122         else
3123         {
3124             // Invalid combination...
3125             ADDR_ASSERT_ALWAYS();
3126             returnCode = ADDR_INVALIDPARAMS;
3127         }
3128     }
3129 
3130     return returnCode;
3131 }
3132 
3133 /**
3134 ************************************************************************************************************************
3135 *   Gfx11Lib::HwlGetAllowedBlockSet
3136 *
3137 *   @brief
3138 *       Returns the set of allowed block sizes given the allowed swizzle modes and resource type
3139 *
3140 *   @return
3141 *       ADDR_E_RETURNCODE
3142 ************************************************************************************************************************
3143 */
HwlGetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet,AddrResourceType rsrcType,ADDR2_BLOCK_SET * pAllowedBlockSet) const3144 ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedBlockSet(
3145     ADDR2_SWMODE_SET allowedSwModeSet,  ///< [in] allowed swizzle modes
3146     AddrResourceType rsrcType,          ///< [in] resource type
3147     ADDR2_BLOCK_SET* pAllowedBlockSet   ///< [out] allowed block sizes
3148     ) const
3149 {
3150     ADDR2_BLOCK_SET allowedBlockSet = {};
3151 
3152     allowedBlockSet.micro  = (allowedSwModeSet.value & Gfx11Blk256BSwModeMask) ? TRUE : FALSE;
3153     allowedBlockSet.linear = (allowedSwModeSet.value & Gfx11LinearSwModeMask)  ? TRUE : FALSE;
3154 
3155     if (rsrcType == ADDR_RSRC_TEX_3D)
3156     {
3157         allowedBlockSet.macroThick4KB    = (allowedSwModeSet.value & Gfx11Rsrc3dThick4KBSwModeMask)   ? TRUE : FALSE;
3158         allowedBlockSet.macroThin64KB    = (allowedSwModeSet.value & Gfx11Rsrc3dThin64KBSwModeMask)   ? TRUE : FALSE;
3159         allowedBlockSet.macroThick64KB   = (allowedSwModeSet.value & Gfx11Rsrc3dThick64KBSwModeMask)  ? TRUE : FALSE;
3160         allowedBlockSet.gfx11.thin256KB  = (allowedSwModeSet.value & Gfx11Rsrc3dThin256KBSwModeMask)  ? TRUE : FALSE;
3161         allowedBlockSet.gfx11.thick256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick256KBSwModeMask) ? TRUE : FALSE;
3162     }
3163     else
3164     {
3165         allowedBlockSet.macroThin4KB    = (allowedSwModeSet.value & Gfx11Blk4KBSwModeMask)   ? TRUE : FALSE;
3166         allowedBlockSet.macroThin64KB   = (allowedSwModeSet.value & Gfx11Blk64KBSwModeMask)  ? TRUE : FALSE;
3167         allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Blk256KBSwModeMask) ? TRUE : FALSE;
3168     }
3169 
3170     *pAllowedBlockSet = allowedBlockSet;
3171     return ADDR_OK;
3172 }
3173 
3174 /**
3175 ************************************************************************************************************************
3176 *   Gfx11Lib::HwlGetAllowedSwSet
3177 *
3178 *   @brief
3179 *       Returns the set of allowed swizzle types given the allowed swizzle modes
3180 *   @return
3181 *       ADDR_E_RETURNCODE
3182 ************************************************************************************************************************
3183 */
HwlGetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet,ADDR2_SWTYPE_SET * pAllowedSwSet) const3184 ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedSwSet(
3185     ADDR2_SWMODE_SET  allowedSwModeSet, ///< [in] allowed swizzle modes
3186     ADDR2_SWTYPE_SET* pAllowedSwSet     ///< [out] allowed swizzle types
3187     ) const
3188 {
3189     ADDR2_SWTYPE_SET allowedSwSet = {};
3190 
3191     allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx11ZSwModeMask)        ? TRUE : FALSE;
3192     allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx11StandardSwModeMask) ? TRUE : FALSE;
3193     allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx11DisplaySwModeMask)  ? TRUE : FALSE;
3194     allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx11RenderSwModeMask)   ? TRUE : FALSE;
3195 
3196     *pAllowedSwSet = allowedSwSet;
3197     return ADDR_OK;
3198 }
3199 
3200 /**
3201 ************************************************************************************************************************
3202 *   Gfx11Lib::ComputeStereoInfo
3203 *
3204 *   @brief
3205 *       Compute height alignment and right eye pipeBankXor for stereo surface
3206 *
3207 *   @return
3208 *       Error code
3209 *
3210 ************************************************************************************************************************
3211 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3212 ADDR_E_RETURNCODE Gfx11Lib::ComputeStereoInfo(
3213     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3214     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3215     UINT_32*                                pRightXor   ///< Right eye xor
3216     ) const
3217 {
3218     ADDR_E_RETURNCODE ret = ADDR_OK;
3219 
3220     *pRightXor = 0;
3221 
3222     if (IsNonPrtXor(pIn->swizzleMode))
3223     {
3224         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3225         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3226         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3227         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3228         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3229 
3230         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3231         {
3232             UINT_32 yMax     = 0;
3233             UINT_32 yPosMask = 0;
3234 
3235             // First get "max y bit"
3236             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3237             {
3238                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3239 
3240                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3241                     (m_equationTable[eqIndex].addr[i].index > yMax))
3242                 {
3243                     yMax = m_equationTable[eqIndex].addr[i].index;
3244                 }
3245 
3246                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3247                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3248                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3249                 {
3250                     yMax = m_equationTable[eqIndex].xor1[i].index;
3251                 }
3252 
3253                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3254                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3255                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3256                 {
3257                     yMax = m_equationTable[eqIndex].xor2[i].index;
3258                 }
3259             }
3260 
3261             // Then loop again for populating a position mask of "max Y bit"
3262             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3263             {
3264                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3265                     (m_equationTable[eqIndex].addr[i].index == yMax))
3266                 {
3267                     yPosMask |= 1u << i;
3268                 }
3269                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3270                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3271                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3272                 {
3273                     yPosMask |= 1u << i;
3274                 }
3275                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3276                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3277                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3278                 {
3279                     yPosMask |= 1u << i;
3280                 }
3281             }
3282 
3283             const UINT_32 additionalAlign = 1 << yMax;
3284 
3285             if (additionalAlign >= *pAlignY)
3286             {
3287                 *pAlignY = additionalAlign;
3288 
3289                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3290 
3291                 if ((alignedHeight >> yMax) & 1)
3292                 {
3293                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3294                 }
3295             }
3296         }
3297         else
3298         {
3299             ret = ADDR_INVALIDPARAMS;
3300         }
3301     }
3302 
3303     return ret;
3304 }
3305 
3306 /**
3307 ************************************************************************************************************************
3308 *   Gfx11Lib::HwlComputeSurfaceInfoTiled
3309 *
3310 *   @brief
3311 *       Internal function to calculate alignment for tiled surface
3312 *
3313 *   @return
3314 *       ADDR_E_RETURNCODE
3315 ************************************************************************************************************************
3316 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3317 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoTiled(
3318      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3319      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3320      ) const
3321 {
3322     ADDR_E_RETURNCODE ret;
3323 
3324     // Mip chain dimesion and epitch has no meaning in GFX11, set to default value
3325     pOut->mipChainPitch    = 0;
3326     pOut->mipChainHeight   = 0;
3327     pOut->mipChainSlice    = 0;
3328     pOut->epitchIsHeight   = FALSE;
3329 
3330     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3331     pOut->mipChainInTail   = FALSE;
3332     pOut->firstMipIdInTail = pIn->numMipLevels;
3333 
3334     if (IsBlock256b(pIn->swizzleMode))
3335     {
3336         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3337     }
3338     else
3339     {
3340         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3341     }
3342 
3343     return ret;
3344 }
3345 
3346 /**
3347 ************************************************************************************************************************
3348 *   Gfx11Lib::ComputeSurfaceInfoMicroTiled
3349 *
3350 *   @brief
3351 *       Internal function to calculate alignment for micro tiled surface
3352 *
3353 *   @return
3354 *       ADDR_E_RETURNCODE
3355 ************************************************************************************************************************
3356 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3357 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMicroTiled(
3358      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3359      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3360      ) const
3361 {
3362     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3363                                                          &pOut->blockHeight,
3364                                                          &pOut->blockSlices,
3365                                                          pIn->bpp,
3366                                                          pIn->numSamples,
3367                                                          pIn->resourceType,
3368                                                          pIn->swizzleMode);
3369 
3370     if (ret == ADDR_OK)
3371     {
3372         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3373 
3374         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3375         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3376         pOut->numSlices = pIn->numSlices;
3377         pOut->baseAlign = blockSize;
3378 
3379         if (pIn->numMipLevels > 1)
3380         {
3381             const UINT_32 mip0Width    = pIn->width;
3382             const UINT_32 mip0Height   = pIn->height;
3383             UINT_64       mipSliceSize = 0;
3384 
3385             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3386             {
3387                 UINT_32 mipWidth, mipHeight;
3388 
3389                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3390 
3391                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3392                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3393 
3394                 if (pOut->pMipInfo != NULL)
3395                 {
3396                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3397                     pOut->pMipInfo[i].height           = mipActualHeight;
3398                     pOut->pMipInfo[i].depth            = 1;
3399                     pOut->pMipInfo[i].offset           = mipSliceSize;
3400                     pOut->pMipInfo[i].mipTailOffset    = 0;
3401                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3402                 }
3403 
3404                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3405             }
3406 
3407             pOut->sliceSize = mipSliceSize;
3408             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3409         }
3410         else
3411         {
3412             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3413             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3414 
3415             if (pOut->pMipInfo != NULL)
3416             {
3417                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3418                 pOut->pMipInfo[0].height           = pOut->height;
3419                 pOut->pMipInfo[0].depth            = 1;
3420                 pOut->pMipInfo[0].offset           = 0;
3421                 pOut->pMipInfo[0].mipTailOffset    = 0;
3422                 pOut->pMipInfo[0].macroBlockOffset = 0;
3423             }
3424         }
3425 
3426     }
3427 
3428     return ret;
3429 }
3430 
3431 /**
3432 ************************************************************************************************************************
3433 *   Gfx11Lib::ComputeSurfaceInfoMacroTiled
3434 *
3435 *   @brief
3436 *       Internal function to calculate alignment for macro tiled surface
3437 *
3438 *   @return
3439 *       ADDR_E_RETURNCODE
3440 ************************************************************************************************************************
3441 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3442 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled(
3443      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3444      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3445      ) const
3446 {
3447     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3448                                                                 &pOut->blockHeight,
3449                                                                 &pOut->blockSlices,
3450                                                                 pIn->bpp,
3451                                                                 pIn->numSamples,
3452                                                                 pIn->resourceType,
3453                                                                 pIn->swizzleMode);
3454 
3455     if (returnCode == ADDR_OK)
3456     {
3457         UINT_32 heightAlign = pOut->blockHeight;
3458 
3459         if (pIn->flags.qbStereo)
3460         {
3461             UINT_32 rightXor = 0;
3462 
3463             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3464 
3465             if (returnCode == ADDR_OK)
3466             {
3467                 pOut->pStereoInfo->rightSwizzle = rightXor;
3468             }
3469         }
3470 
3471         if (returnCode == ADDR_OK)
3472         {
3473             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3474             const UINT_32 blockSize     = 1 << blockSizeLog2;
3475 
3476             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3477             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3478             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3479             pOut->baseAlign = blockSize;
3480 
3481             if (pIn->numMipLevels > 1)
3482             {
3483                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3484                                                                 pIn->swizzleMode,
3485                                                                 pOut->blockWidth,
3486                                                                 pOut->blockHeight,
3487                                                                 pOut->blockSlices);
3488                 const UINT_32 mip0Width         = pIn->width;
3489                 const UINT_32 mip0Height        = pIn->height;
3490                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3491                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3492                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3493                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3494                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3495                 UINT_64       mipChainSliceSize = 0;
3496                 UINT_64       mipSize[MaxMipLevels];
3497                 UINT_64       mipSliceSize[MaxMipLevels];
3498 
3499                 // For htile, we need to make z16 and stencil enter the mip tail at the same time as z32 would
3500                 Dim3d fixedTailMaxDim = tailMaxDim;
3501                 if (IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3502                 {
3503                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3504                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3505                 }
3506 
3507                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3508                 {
3509                     UINT_32 mipWidth, mipHeight, mipDepth;
3510 
3511                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3512 
3513                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3514                     {
3515                         firstMipInTail     = i;
3516                         mipChainSliceSize += blockSize / pOut->blockSlices;
3517                         break;
3518                     }
3519                     else
3520                     {
3521                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3522                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3523                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3524                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3525 
3526                         mipSize[i]         = sliceSize * depth;
3527                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3528                         mipChainSliceSize += sliceSize;
3529 
3530                         if (pOut->pMipInfo != NULL)
3531                         {
3532                             pOut->pMipInfo[i].pitch  = pitch;
3533                             pOut->pMipInfo[i].height = height;
3534                             pOut->pMipInfo[i].depth  = depth;
3535                         }
3536                     }
3537                 }
3538 
3539                 pOut->sliceSize        = mipChainSliceSize;
3540                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3541                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3542                 pOut->firstMipIdInTail = firstMipInTail;
3543 
3544                 if (pOut->pMipInfo != NULL)
3545                 {
3546                     UINT_64 offset         = 0;
3547                     UINT_64 macroBlkOffset = 0;
3548                     UINT_32 tailMaxDepth   = 0;
3549 
3550                     if (firstMipInTail != pIn->numMipLevels)
3551                     {
3552                         UINT_32 mipWidth, mipHeight;
3553 
3554                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3555                                    &mipWidth, &mipHeight, &tailMaxDepth);
3556 
3557                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3558                         macroBlkOffset = blockSize;
3559                     }
3560 
3561                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3562                     {
3563                         pOut->pMipInfo[i].offset           = offset;
3564                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3565                         pOut->pMipInfo[i].mipTailOffset    = 0;
3566 
3567                         offset         += mipSize[i];
3568                         macroBlkOffset += mipSliceSize[i];
3569                     }
3570 
3571                     UINT_32 pitch  = tailMaxDim.w;
3572                     UINT_32 height = tailMaxDim.h;
3573                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3574 
3575                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3576 
3577                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3578                     {
3579                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3580                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3581 
3582                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3583                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3584                         pOut->pMipInfo[i].macroBlockOffset = 0;
3585 
3586                         pOut->pMipInfo[i].pitch  = pitch;
3587                         pOut->pMipInfo[i].height = height;
3588                         pOut->pMipInfo[i].depth  = depth;
3589 
3590                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3591                                        ((mipOffset >> 10) & 2)  |
3592                                        ((mipOffset >> 11) & 4)  |
3593                                        ((mipOffset >> 12) & 8)  |
3594                                        ((mipOffset >> 13) & 16) |
3595                                        ((mipOffset >> 14) & 32);
3596                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3597                                        ((mipOffset >> 9)  & 2)  |
3598                                        ((mipOffset >> 10) & 4)  |
3599                                        ((mipOffset >> 11) & 8)  |
3600                                        ((mipOffset >> 12) & 16) |
3601                                        ((mipOffset >> 13) & 32);
3602 
3603                         if (blockSizeLog2 & 1)
3604                         {
3605                             const UINT_32 temp = mipX;
3606                             mipX = mipY;
3607                             mipY = temp;
3608 
3609                             if (index & 1)
3610                             {
3611                                 mipY = (mipY << 1) | (mipX & 1);
3612                                 mipX = mipX >> 1;
3613                             }
3614                         }
3615 
3616                         if (isThin)
3617                         {
3618                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3619                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3620                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3621 
3622                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3623                             height = Max(height >> 1, Block256_2d[index].h);
3624                             depth  = 1;
3625                         }
3626                         else
3627                         {
3628                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3629                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3630                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3631 
3632                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3633                             height = Max(height >> 1, Block256_3d[index].h);
3634                             depth  = PowTwoAlign(Max(depth  >> 1, 1u), Block256_3d[index].d);
3635                         }
3636                     }
3637                 }
3638             }
3639             else
3640             {
3641                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numSamples;
3642                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3643 
3644                 if (pOut->pMipInfo != NULL)
3645                 {
3646                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3647                     pOut->pMipInfo[0].height           = pOut->height;
3648                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
3649                     pOut->pMipInfo[0].offset           = 0;
3650                     pOut->pMipInfo[0].mipTailOffset    = 0;
3651                     pOut->pMipInfo[0].macroBlockOffset = 0;
3652                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3653                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3654                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3655                 }
3656             }
3657         }
3658     }
3659 
3660     return returnCode;
3661 }
3662 
3663 /**
3664 ************************************************************************************************************************
3665 *   Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled
3666 *
3667 *   @brief
3668 *       Internal function to calculate address from coord for tiled swizzle surface
3669 *
3670 *   @return
3671 *       ADDR_E_RETURNCODE
3672 ************************************************************************************************************************
3673 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3674 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceAddrFromCoordTiled(
3675      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3676      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3677      ) const
3678 {
3679     ADDR_E_RETURNCODE ret;
3680 
3681     if (IsBlock256b(pIn->swizzleMode))
3682     {
3683         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3684     }
3685     else
3686     {
3687         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3688     }
3689 
3690     return ret;
3691 }
3692 
3693 /**
3694 ************************************************************************************************************************
3695 *   Gfx11Lib::ComputeOffsetFromEquation
3696 *
3697 *   @brief
3698 *       Compute offset from equation
3699 *
3700 *   @return
3701 *       Offset
3702 ************************************************************************************************************************
3703 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3704 UINT_32 Gfx11Lib::ComputeOffsetFromEquation(
3705     const ADDR_EQUATION* pEq,   ///< Equation
3706     UINT_32              x,     ///< x coord in bytes
3707     UINT_32              y,     ///< y coord in pixel
3708     UINT_32              z      ///< z coord in slice
3709     ) const
3710 {
3711     UINT_32 offset = 0;
3712 
3713     for (UINT_32 i = 0; i < pEq->numBits; i++)
3714     {
3715         UINT_32 v = 0;
3716 
3717         for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3718         {
3719             if (pEq->comps[c][i].valid)
3720             {
3721                 if (pEq->comps[c][i].channel == 0)
3722                 {
3723                     v ^= (x >> pEq->comps[c][i].index) & 1;
3724                 }
3725                 else if (pEq->comps[c][i].channel == 1)
3726                 {
3727                     v ^= (y >> pEq->comps[c][i].index) & 1;
3728                 }
3729                 else
3730                 {
3731                     ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3732                     v ^= (z >> pEq->comps[c][i].index) & 1;
3733                 }
3734             }
3735         }
3736 
3737         offset |= (v << i);
3738     }
3739 
3740     return offset;
3741 }
3742 
3743 /**
3744 ************************************************************************************************************************
3745 *   Gfx11Lib::ComputeOffsetFromSwizzlePattern
3746 *
3747 *   @brief
3748 *       Compute offset from swizzle pattern
3749 *
3750 *   @return
3751 *       Offset
3752 ************************************************************************************************************************
3753 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3754 UINT_32 Gfx11Lib::ComputeOffsetFromSwizzlePattern(
3755     const UINT_64* pPattern,    ///< Swizzle pattern
3756     UINT_32        numBits,     ///< Number of bits in pattern
3757     UINT_32        x,           ///< x coord in pixel
3758     UINT_32        y,           ///< y coord in pixel
3759     UINT_32        z,           ///< z coord in slice
3760     UINT_32        s            ///< sample id
3761     ) const
3762 {
3763     UINT_32                 offset          = 0;
3764     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3765 
3766     for (UINT_32 i = 0; i < numBits; i++)
3767     {
3768         UINT_32 v = 0;
3769 
3770         if (pSwizzlePattern[i].x != 0)
3771         {
3772             UINT_16 mask  = pSwizzlePattern[i].x;
3773             UINT_32 xBits = x;
3774 
3775             while (mask != 0)
3776             {
3777                 if (mask & 1)
3778                 {
3779                     v ^= xBits & 1;
3780                 }
3781 
3782                 xBits >>= 1;
3783                 mask  >>= 1;
3784             }
3785         }
3786 
3787         if (pSwizzlePattern[i].y != 0)
3788         {
3789             UINT_16 mask  = pSwizzlePattern[i].y;
3790             UINT_32 yBits = y;
3791 
3792             while (mask != 0)
3793             {
3794                 if (mask & 1)
3795                 {
3796                     v ^= yBits & 1;
3797                 }
3798 
3799                 yBits >>= 1;
3800                 mask  >>= 1;
3801             }
3802         }
3803 
3804         if (pSwizzlePattern[i].z != 0)
3805         {
3806             UINT_16 mask  = pSwizzlePattern[i].z;
3807             UINT_32 zBits = z;
3808 
3809             while (mask != 0)
3810             {
3811                 if (mask & 1)
3812                 {
3813                     v ^= zBits & 1;
3814                 }
3815 
3816                 zBits >>= 1;
3817                 mask  >>= 1;
3818             }
3819         }
3820 
3821         if (pSwizzlePattern[i].s != 0)
3822         {
3823             UINT_16 mask  = pSwizzlePattern[i].s;
3824             UINT_32 sBits = s;
3825 
3826             while (mask != 0)
3827             {
3828                 if (mask & 1)
3829                 {
3830                     v ^= sBits & 1;
3831                 }
3832 
3833                 sBits >>= 1;
3834                 mask  >>= 1;
3835             }
3836         }
3837 
3838         offset |= (v << i);
3839     }
3840 
3841     return offset;
3842 }
3843 
3844 /**
3845 ************************************************************************************************************************
3846 *   Gfx11Lib::GetSwizzlePatternInfo
3847 *
3848 *   @brief
3849 *       Get swizzle pattern
3850 *
3851 *   @return
3852 *       Swizzle pattern information
3853 ************************************************************************************************************************
3854 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const3855 const ADDR_SW_PATINFO* Gfx11Lib::GetSwizzlePatternInfo(
3856     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
3857     AddrResourceType resourceType,      ///< Resource type
3858     UINT_32          elemLog2,          ///< Element size in bytes log2
3859     UINT_32          numFrag            ///< Number of fragment
3860     ) const
3861 {
3862     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
3863     const ADDR_SW_PATINFO* patInfo     = NULL;
3864     const UINT_32          swizzleMask = 1 << swizzleMode;
3865     const BOOL_32          isBlock256k = IsBlock256kb(swizzleMode);
3866     const BOOL_32          isBlock64K  = IsBlock64kb(swizzleMode);
3867 
3868     if (IsLinear(swizzleMode) == FALSE)
3869     {
3870         if (resourceType == ADDR_RSRC_TEX_3D)
3871         {
3872             ADDR_ASSERT(numFrag == 1);
3873 
3874             if ((swizzleMask & Gfx11Rsrc3dSwModeMask) != 0)
3875             {
3876                 if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3877                 {
3878                     if (isBlock256k)
3879                     {
3880                         ADDR_ASSERT((swizzleMode == ADDR_SW_256KB_Z_X) || (swizzleMode == ADDR_SW_256KB_R_X));
3881                         patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
3882                     }
3883                     else if (isBlock64K)
3884                     {
3885                         ADDR_ASSERT((swizzleMode == ADDR_SW_64KB_Z_X) || (swizzleMode == ADDR_SW_64KB_R_X));
3886                         patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
3887                     }
3888                     else
3889                     {
3890                         ADDR_ASSERT_ALWAYS();
3891                     }
3892                 }
3893                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
3894                 {
3895                     if (isBlock256k)
3896                     {
3897                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
3898                         // patInfo = GFX11_SW_256K_D3_X_PATINFO;
3899                     }
3900                     else if (isBlock64K)
3901                     {
3902                         ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
3903                         patInfo = GFX11_SW_64K_D3_X_PATINFO;
3904                     }
3905                     else
3906                     {
3907                         ADDR_ASSERT_ALWAYS();
3908                     }
3909                 }
3910                 else
3911                 {
3912                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
3913 
3914                     if (isBlock256k)
3915                     {
3916                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_S_X);
3917                         patInfo = GFX11_SW_256K_S3_X_PATINFO;
3918                     }
3919                     else if (isBlock64K)
3920                     {
3921                         if (swizzleMode == ADDR_SW_64KB_S)
3922                         {
3923                             patInfo = GFX11_SW_64K_S3_PATINFO;
3924                         }
3925                         else if (swizzleMode == ADDR_SW_64KB_S_X)
3926                         {
3927                             patInfo = GFX11_SW_64K_S3_X_PATINFO;
3928                         }
3929                         else if (swizzleMode == ADDR_SW_64KB_S_T)
3930                         {
3931                             patInfo = GFX11_SW_64K_S3_T_PATINFO;
3932                         }
3933                         else
3934                         {
3935                             ADDR_ASSERT_ALWAYS();
3936                         }
3937                     }
3938                     else if (IsBlock4kb(swizzleMode))
3939                     {
3940                         if (swizzleMode == ADDR_SW_4KB_S)
3941                         {
3942                             patInfo = GFX11_SW_4K_S3_PATINFO;
3943                         }
3944                         else if (swizzleMode == ADDR_SW_4KB_S_X)
3945                         {
3946                             patInfo = GFX11_SW_4K_S3_X_PATINFO;
3947                         }
3948                         else
3949                         {
3950                             ADDR_ASSERT_ALWAYS();
3951                         }
3952                     }
3953                     else
3954                     {
3955                         ADDR_ASSERT_ALWAYS();
3956                     }
3957                 }
3958             }
3959         }
3960         else
3961         {
3962             if ((swizzleMask & Gfx11Rsrc2dSwModeMask) != 0)
3963             {
3964                 if (IsBlock256b(swizzleMode))
3965                 {
3966                     ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
3967                     patInfo = GFX11_SW_256_D_PATINFO;
3968                 }
3969                 else if (IsBlock4kb(swizzleMode))
3970                 {
3971                     if (swizzleMode == ADDR_SW_4KB_D)
3972                     {
3973                         patInfo = GFX11_SW_4K_D_PATINFO;
3974                     }
3975                     else if (swizzleMode == ADDR_SW_4KB_D_X)
3976                     {
3977                         patInfo = GFX11_SW_4K_D_X_PATINFO;
3978                     }
3979                     else
3980                     {
3981                         ADDR_ASSERT_ALWAYS();
3982                     }
3983                 }
3984                 else if (isBlock64K)
3985                 {
3986                     if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
3987                     {
3988                         if (numFrag == 1)
3989                         {
3990                             patInfo = GFX11_SW_64K_ZR_X_1xaa_PATINFO;
3991                         }
3992                         else if (numFrag == 2)
3993                         {
3994                             patInfo = GFX11_SW_64K_ZR_X_2xaa_PATINFO;
3995                         }
3996                         else if (numFrag == 4)
3997                         {
3998                             patInfo = GFX11_SW_64K_ZR_X_4xaa_PATINFO;
3999                         }
4000                         else if (numFrag == 8)
4001                         {
4002                             patInfo = GFX11_SW_64K_ZR_X_8xaa_PATINFO;
4003                         }
4004                         else
4005                         {
4006                             ADDR_ASSERT_ALWAYS();
4007                         }
4008                     }
4009                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4010                     {
4011                         if (swizzleMode == ADDR_SW_64KB_D)
4012                         {
4013                             patInfo = GFX11_SW_64K_D_PATINFO;
4014                         }
4015                         else if (swizzleMode == ADDR_SW_64KB_D_X)
4016                         {
4017                             patInfo = GFX11_SW_64K_D_X_PATINFO;
4018                         }
4019                         else if (swizzleMode == ADDR_SW_64KB_D_T)
4020                         {
4021                             patInfo = GFX11_SW_64K_D_T_PATINFO;
4022                         }
4023                         else
4024                         {
4025                             ADDR_ASSERT_ALWAYS();
4026                         }
4027                     }
4028                     else
4029                     {
4030                         ADDR_ASSERT_ALWAYS();
4031                     }
4032                 }
4033                 else if (isBlock256k)
4034                 {
4035                     if (IsZOrderSwizzle(swizzleMode) || IsRtOptSwizzle(swizzleMode))
4036                     {
4037                         if (numFrag == 1)
4038                         {
4039                             patInfo = GFX11_SW_256K_ZR_X_1xaa_PATINFO;
4040                         }
4041                         else if (numFrag == 2)
4042                         {
4043                             patInfo = GFX11_SW_256K_ZR_X_2xaa_PATINFO;
4044                         }
4045                         else if (numFrag == 4)
4046                         {
4047                             patInfo = GFX11_SW_256K_ZR_X_4xaa_PATINFO;
4048                         }
4049                         else if (numFrag == 8)
4050                         {
4051                             patInfo = GFX11_SW_256K_ZR_X_8xaa_PATINFO;
4052                         }
4053                         else
4054                         {
4055                             ADDR_ASSERT_ALWAYS();
4056                         }
4057                     }
4058                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4059                     {
4060                         ADDR_ASSERT(swizzleMode == ADDR_SW_256KB_D_X);
4061                         patInfo = GFX11_SW_256K_D_X_PATINFO;
4062                     }
4063                     else
4064                     {
4065                         ADDR_ASSERT_ALWAYS();
4066                     }
4067                 }
4068                 else
4069                 {
4070                     ADDR_ASSERT_ALWAYS();
4071                 }
4072             }
4073         }
4074     }
4075 
4076     return (patInfo != NULL) ? &patInfo[index] : NULL;
4077 }
4078 
4079 /**
4080 ************************************************************************************************************************
4081 *   Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled
4082 *
4083 *   @brief
4084 *       Internal function to calculate address from coord for micro tiled swizzle surface
4085 *
4086 *   @return
4087 *       ADDR_E_RETURNCODE
4088 ************************************************************************************************************************
4089 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4090 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4091      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4092      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4093      ) const
4094 {
4095     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4096     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4097     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4098 
4099     localIn.swizzleMode  = pIn->swizzleMode;
4100     localIn.flags        = pIn->flags;
4101     localIn.resourceType = pIn->resourceType;
4102     localIn.bpp          = pIn->bpp;
4103     localIn.width        = Max(pIn->unalignedWidth,  1u);
4104     localIn.height       = Max(pIn->unalignedHeight, 1u);
4105     localIn.numSlices    = Max(pIn->numSlices,       1u);
4106     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4107     localIn.numSamples   = Max(pIn->numSamples,      1u);
4108     localIn.numFrags     = localIn.numSamples;
4109     localOut.pMipInfo    = mipInfo;
4110 
4111     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4112 
4113     if (ret == ADDR_OK)
4114     {
4115         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4116         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4117         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4118         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4119 
4120         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4121         {
4122             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4123             const UINT_32 yb           = pIn->y / localOut.blockHeight;
4124             const UINT_32 xb           = pIn->x / localOut.blockWidth;
4125             const UINT_32 blockIndex   = yb * pb + xb;
4126             const UINT_32 blockSize    = 256;
4127             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4128                                                                    pIn->x << elemLog2,
4129                                                                    pIn->y,
4130                                                                    0);
4131             pOut->addr = localOut.sliceSize * pIn->slice +
4132                          mipInfo[pIn->mipId].macroBlockOffset +
4133                          (blockIndex * blockSize) +
4134                          blk256Offset;
4135         }
4136         else
4137         {
4138             ret = ADDR_INVALIDPARAMS;
4139         }
4140     }
4141 
4142     return ret;
4143 }
4144 
4145 /**
4146 ************************************************************************************************************************
4147 *   Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled
4148 *
4149 *   @brief
4150 *       Internal function to calculate address from coord for macro tiled swizzle surface
4151 *
4152 *   @return
4153 *       ADDR_E_RETURNCODE
4154 ************************************************************************************************************************
4155 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4156 ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4157      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4158      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4159      ) const
4160 {
4161     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4162     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4163     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4164 
4165     localIn.swizzleMode  = pIn->swizzleMode;
4166     localIn.flags        = pIn->flags;
4167     localIn.resourceType = pIn->resourceType;
4168     localIn.bpp          = pIn->bpp;
4169     localIn.width        = Max(pIn->unalignedWidth,  1u);
4170     localIn.height       = Max(pIn->unalignedHeight, 1u);
4171     localIn.numSlices    = Max(pIn->numSlices,       1u);
4172     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4173     localIn.numSamples   = Max(pIn->numSamples,      1u);
4174     localIn.numFrags     = localIn.numSamples;
4175     localOut.pMipInfo    = mipInfo;
4176 
4177     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4178 
4179     if (ret == ADDR_OK)
4180     {
4181         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4182         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4183         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4184         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4185         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4186         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4187                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4188 
4189         if (localIn.numSamples > 1)
4190         {
4191             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4192                                                                     pIn->resourceType,
4193                                                                     elemLog2,
4194                                                                     localIn.numSamples);
4195 
4196             if (pPatInfo != NULL)
4197             {
4198                 const UINT_32 pb     = localOut.pitch / localOut.blockWidth;
4199                 const UINT_32 yb     = pIn->y / localOut.blockHeight;
4200                 const UINT_32 xb     = pIn->x / localOut.blockWidth;
4201                 const UINT_64 blkIdx = yb * pb + xb;
4202 
4203                 ADDR_BIT_SETTING fullSwizzlePattern[20];
4204                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4205 
4206                 const UINT_32 blkOffset =
4207                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4208                                                     blkSizeLog2,
4209                                                     pIn->x,
4210                                                     pIn->y,
4211                                                     pIn->slice,
4212                                                     pIn->sample);
4213 
4214                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4215                              (blkIdx << blkSizeLog2) +
4216                              (blkOffset ^ pipeBankXor);
4217             }
4218             else
4219             {
4220                 ret = ADDR_INVALIDPARAMS;
4221             }
4222         }
4223         else
4224         {
4225             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4226             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4227             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4228 
4229             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4230             {
4231                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4232                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4233                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4234                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4235                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4236                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4237                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4238                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4239                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4240                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4241                 const UINT_64 blkIdx    = yb * pb + xb;
4242                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4243                                                                     x << elemLog2,
4244                                                                     y,
4245                                                                     z);
4246                 pOut->addr = sliceSize * sliceId +
4247                              mipInfo[pIn->mipId].macroBlockOffset +
4248                              (blkIdx << blkSizeLog2) +
4249                              (blkOffset ^ pipeBankXor);
4250             }
4251             else
4252             {
4253                 ret = ADDR_INVALIDPARAMS;
4254             }
4255         }
4256     }
4257 
4258     return ret;
4259 }
4260 
4261 /**
4262 ************************************************************************************************************************
4263 *   Gfx11Lib::HwlComputeMaxBaseAlignments
4264 *
4265 *   @brief
4266 *       Gets maximum alignments
4267 *   @return
4268 *       maximum alignments
4269 ************************************************************************************************************************
4270 */
HwlComputeMaxBaseAlignments() const4271 UINT_32 Gfx11Lib::HwlComputeMaxBaseAlignments() const
4272 {
4273     return Size256K;
4274 }
4275 
4276 /**
4277 ************************************************************************************************************************
4278 *   Gfx11Lib::HwlComputeMaxMetaBaseAlignments
4279 *
4280 *   @brief
4281 *       Gets maximum alignments for metadata
4282 *   @return
4283 *       maximum alignments for metadata
4284 ************************************************************************************************************************
4285 */
HwlComputeMaxMetaBaseAlignments() const4286 UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const
4287 {
4288     Dim3d metaBlk;
4289 
4290     // Max base alignment for Htile
4291     const AddrSwizzleMode ValidSwizzleModeForHtile[] =
4292     {
4293         ADDR_SW_64KB_Z_X,
4294         ADDR_SW_256KB_Z_X,
4295     };
4296 
4297     UINT_32 maxBaseAlignHtile = 0;
4298 
4299     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForHtile) / sizeof(ValidSwizzleModeForHtile[0]); swIdx++)
4300     {
4301         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4302         {
4303             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4304             {
4305                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx11DataDepthStencil,
4306                                                                 ADDR_RSRC_TEX_2D,
4307                                                                 ValidSwizzleModeForHtile[swIdx],
4308                                                                 bppLog2,
4309                                                                 numFragLog2,
4310                                                                 TRUE,
4311                                                                 &metaBlk);
4312 
4313                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4314             }
4315         }
4316     }
4317 
4318     // Max base alignment for 2D Dcc
4319     // swizzle mode support DCC...
4320     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4321     {
4322         ADDR_SW_64KB_R_X,
4323         ADDR_SW_256KB_R_X,
4324     };
4325 
4326     UINT_32 maxBaseAlignDcc2D = 0;
4327 
4328     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4329     {
4330         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4331         {
4332             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4333             {
4334                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx11DataColor,
4335                                                              ADDR_RSRC_TEX_2D,
4336                                                              ValidSwizzleModeForDcc2D[swIdx],
4337                                                              bppLog2,
4338                                                              numFragLog2,
4339                                                              TRUE,
4340                                                              &metaBlk);
4341 
4342                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4343             }
4344         }
4345     }
4346 
4347     // Max base alignment for 3D Dcc
4348     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4349     {
4350         ADDR_SW_64KB_S_X,
4351         ADDR_SW_64KB_D_X,
4352         ADDR_SW_64KB_R_X,
4353         ADDR_SW_256KB_S_X,
4354         ADDR_SW_256KB_D_X,
4355         ADDR_SW_256KB_R_X,
4356     };
4357 
4358     UINT_32 maxBaseAlignDcc3D = 0;
4359 
4360     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4361     {
4362         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4363         {
4364             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx11DataColor,
4365                                                          ADDR_RSRC_TEX_3D,
4366                                                          ValidSwizzleModeForDcc3D[swIdx],
4367                                                          bppLog2,
4368                                                          0,
4369                                                          TRUE,
4370                                                          &metaBlk);
4371 
4372             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4373         }
4374     }
4375 
4376     return Max(maxBaseAlignHtile, Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4377 }
4378 
4379 /**
4380 ************************************************************************************************************************
4381 *   Gfx11Lib::GetMetaElementSizeLog2
4382 *
4383 *   @brief
4384 *       Gets meta data element size log2
4385 *   @return
4386 *       Meta data element size log2
4387 ************************************************************************************************************************
4388 */
GetMetaElementSizeLog2(Gfx11DataType dataType)4389 INT_32 Gfx11Lib::GetMetaElementSizeLog2(
4390     Gfx11DataType dataType) ///< Data surface type
4391 {
4392     INT_32 elemSizeLog2 = 0;
4393 
4394     if (dataType == Gfx11DataColor)
4395     {
4396         elemSizeLog2 = 0;
4397     }
4398     else
4399     {
4400         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4401         elemSizeLog2 = 2;
4402     }
4403 
4404     return elemSizeLog2;
4405 }
4406 
4407 /**
4408 ************************************************************************************************************************
4409 *   Gfx11Lib::GetMetaCacheSizeLog2
4410 *
4411 *   @brief
4412 *       Gets meta data cache line size log2
4413 *   @return
4414 *       Meta data cache line size log2
4415 ************************************************************************************************************************
4416 */
GetMetaCacheSizeLog2(Gfx11DataType dataType)4417 INT_32 Gfx11Lib::GetMetaCacheSizeLog2(
4418     Gfx11DataType dataType) ///< Data surface type
4419 {
4420     INT_32 cacheSizeLog2 = 0;
4421 
4422     if (dataType == Gfx11DataColor)
4423     {
4424         cacheSizeLog2 = 6;
4425     }
4426     else
4427     {
4428         ADDR_ASSERT(dataType == Gfx11DataDepthStencil);
4429         cacheSizeLog2 = 8;
4430     }
4431 
4432     return cacheSizeLog2;
4433 }
4434 
4435 /**
4436 ************************************************************************************************************************
4437 *   Gfx11Lib::HwlComputeSurfaceInfoLinear
4438 *
4439 *   @brief
4440 *       Internal function to calculate alignment for linear surface
4441 *
4442 *   @return
4443 *       ADDR_E_RETURNCODE
4444 ************************************************************************************************************************
4445 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4446 ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear(
4447      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4448      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4449      ) const
4450 {
4451     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4452 
4453     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4454     {
4455         returnCode = ADDR_INVALIDPARAMS;
4456     }
4457     else
4458     {
4459         const UINT_32 elementBytes = pIn->bpp >> 3;
4460         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4461         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4462         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4463         UINT_32       actualHeight = pIn->height;
4464         UINT_64       sliceSize    = 0;
4465 
4466         if (pIn->numMipLevels > 1)
4467         {
4468             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4469             {
4470                 UINT_32 mipWidth, mipHeight;
4471 
4472                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4473 
4474                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4475 
4476                 if (pOut->pMipInfo != NULL)
4477                 {
4478                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4479                     pOut->pMipInfo[i].height           = mipHeight;
4480                     pOut->pMipInfo[i].depth            = mipDepth;
4481                     pOut->pMipInfo[i].offset           = sliceSize;
4482                     pOut->pMipInfo[i].mipTailOffset    = 0;
4483                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4484                 }
4485 
4486                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4487             }
4488         }
4489         else
4490         {
4491             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4492 
4493             if (returnCode == ADDR_OK)
4494             {
4495                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4496 
4497                 if (pOut->pMipInfo != NULL)
4498                 {
4499                     pOut->pMipInfo[0].pitch            = pitch;
4500                     pOut->pMipInfo[0].height           = actualHeight;
4501                     pOut->pMipInfo[0].depth            = mipDepth;
4502                     pOut->pMipInfo[0].offset           = 0;
4503                     pOut->pMipInfo[0].mipTailOffset    = 0;
4504                     pOut->pMipInfo[0].macroBlockOffset = 0;
4505                 }
4506             }
4507         }
4508 
4509         if (returnCode == ADDR_OK)
4510         {
4511             pOut->pitch          = pitch;
4512             pOut->height         = actualHeight;
4513             pOut->numSlices      = pIn->numSlices;
4514             pOut->sliceSize      = sliceSize;
4515             pOut->surfSize       = sliceSize * pOut->numSlices;
4516             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4517             pOut->blockWidth     = pitchAlign;
4518             pOut->blockHeight    = 1;
4519             pOut->blockSlices    = 1;
4520 
4521             // Following members are useless on GFX11
4522             pOut->mipChainPitch  = 0;
4523             pOut->mipChainHeight = 0;
4524             pOut->mipChainSlice  = 0;
4525             pOut->epitchIsHeight = FALSE;
4526 
4527             // Post calculation validate
4528             ADDR_ASSERT(pOut->sliceSize > 0);
4529         }
4530     }
4531 
4532     return returnCode;
4533 }
4534 
4535 } // V2
4536 } // Addr
4537