xref: /aosp_15_r20/external/mesa3d/src/intel/common/intel_genX_state_brw.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef INTEL_GENX_STATE_BRW_H
25 #define INTEL_GENX_STATE_BRW_H
26 
27 #ifndef GFX_VERx10
28 #error This file should only be included by genX files.
29 #endif
30 
31 #include <stdbool.h>
32 
33 #include "dev/intel_device_info.h"
34 #include "genxml/gen_macros.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
40 #if GFX_VER >= 7
41 
42 static inline void
43 intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
44                             const struct intel_device_info *devinfo,
45                             const struct brw_wm_prog_data *prog_data,
46                             unsigned rasterization_samples,
47                             enum intel_msaa_flags msaa_flags)
48 {
49    assert(rasterization_samples != 0);
50 
51    bool enable_8  = prog_data->dispatch_8;
52    bool enable_16 = prog_data->dispatch_16;
53    bool enable_32 = prog_data->dispatch_32;
54    uint8_t dispatch_multi = prog_data->dispatch_multi;
55 
56 #if GFX_VER >= 20
57    if (ps->RenderTargetFastClearEnable) {
58       /* Bspec 57340 (r59562):
59        *
60        *   Clearing shader must use SIMD16 dispatch mode.
61        *
62        * The spec doesn't state if a fast-clear shader can be multi-poly. We
63        * just assume it can't.
64        */
65       assert(enable_16);
66       enable_32 = enable_8 = false;
67       dispatch_multi = 0;
68    }
69 #elif GFX_VER >= 9
70    /* SKL PRMs, Volume 2a: Command Reference: Instructions:
71     *    3DSTATE_PS_BODY::8 Pixel Dispatch Enable:
72     *
73     *    "When Render Target Fast Clear Enable is ENABLED or Render Target
74     *     Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit must be
75     *     DISABLED."
76     */
77    if (ps->RenderTargetFastClearEnable ||
78        ps->RenderTargetResolveType == RESOLVE_PARTIAL ||
79        ps->RenderTargetResolveType == RESOLVE_FULL)
80       enable_8 = false;
81 #elif GFX_VER == 8
82    /* BDW has the same wording as SKL, except some of the fields mentioned
83     * don't exist...
84     */
85    if (ps->RenderTargetFastClearEnable ||
86        ps->RenderTargetResolveEnable)
87       enable_8 = false;
88 #endif
89 
90    const bool is_persample_dispatch =
91       brw_wm_prog_data_is_persample(prog_data, msaa_flags);
92 
93    if (is_persample_dispatch) {
94       /* TGL PRMs, Volume 2d: Command Reference: Structures:
95        *    3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
96        *
97        *    "Must not be enabled when dispatch rate is sample AND NUM_MULTISAMPLES > 1."
98        */
99       if (GFX_VER >= 12 && rasterization_samples > 1)
100          enable_32 = false;
101 
102       /* Starting with SandyBridge (where we first get MSAA), the different
103        * pixel dispatch combinations are grouped into classifications A
104        * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1).  On most hardware
105        * generations, the only configurations supporting persample dispatch
106        * are those in which only one dispatch width is enabled.
107        *
108        * The Gfx12 hardware spec has a similar dispatch grouping table, but
109        * the following conflicting restriction applies (from the page on
110        * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
111        *
112        *  "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also
113        *   enabled."
114        */
115       if (enable_32 || enable_16)
116          enable_8 = false;
117       if (GFX_VER < 12 && enable_32)
118          enable_16 = false;
119    }
120 
121    /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
122     *
123     *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
124     *     SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
125     *     mode."
126     *
127     * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
128     */
129    if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) {
130       assert(enable_8 || enable_16);
131       enable_32 = false;
132    }
133 
134    assert(enable_8 || enable_16 || enable_32 ||
135           (GFX_VER >= 12 && dispatch_multi));
136    assert(!dispatch_multi || (GFX_VER >= 12 && !enable_8));
137 
138 #if GFX_VER >= 20
139    if (dispatch_multi) {
140       ps->Kernel0Enable = true;
141       ps->Kernel0SIMDWidth = (dispatch_multi == 32 ? PS_SIMD32 : PS_SIMD16);
142       ps->Kernel0MaximumPolysperThread =
143          prog_data->max_polygons - 1;
144       switch (dispatch_multi / prog_data->max_polygons) {
145       case 8:
146          ps->Kernel0PolyPackingPolicy = POLY_PACK8_FIXED;
147          break;
148       case 16:
149          ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED;
150          break;
151       default:
152          unreachable("Invalid polygon width");
153       }
154    } else if (enable_16) {
155       ps->Kernel0Enable = true;
156       ps->Kernel0SIMDWidth = PS_SIMD16;
157       ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED;
158    }
159 
160    if (enable_32) {
161       ps->Kernel1Enable = true;
162       ps->Kernel1SIMDWidth = PS_SIMD32;
163    } else if (enable_16 && dispatch_multi == 16) {
164       ps->Kernel1Enable = true;
165       ps->Kernel1SIMDWidth = PS_SIMD16;
166    }
167 #else
168    ps->_8PixelDispatchEnable = enable_8 || (GFX_VER == 12 && dispatch_multi);
169    ps->_16PixelDispatchEnable = enable_16;
170    ps->_32PixelDispatchEnable = enable_32;
171 #endif
172 }
173 
174 #endif
175 
176 #ifdef __cplusplus
177 }
178 #endif
179 
180 #endif /* INTEL_GENX_STATE_BRW_H */
181