1 /*
2 * Copyright © 2006-2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_fs.h"
25 #include "brw_fs_builder.h"
26
27 using namespace brw;
28
vs_thread_payload(const fs_visitor & v)29 vs_thread_payload::vs_thread_payload(const fs_visitor &v)
30 {
31 unsigned r = 0;
32
33 /* R0: Thread header. */
34 r += reg_unit(v.devinfo);
35
36 /* R1: URB handles. */
37 urb_handles = brw_ud8_grf(r, 0);
38 r += reg_unit(v.devinfo);
39
40 num_regs = r;
41 }
42
tcs_thread_payload(const fs_visitor & v)43 tcs_thread_payload::tcs_thread_payload(const fs_visitor &v)
44 {
45 struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
46 struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(v.prog_data);
47 struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) v.key;
48
49 if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH) {
50 patch_urb_output = brw_ud1_grf(0, 0);
51 primitive_id = brw_vec1_grf(0, 1);
52
53 /* r1-r4 contain the ICP handles. */
54 icp_handle_start = brw_ud8_grf(1, 0);
55
56 num_regs = 5;
57 } else {
58 assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH);
59 assert(tcs_key->input_vertices <= BRW_MAX_TCS_INPUT_VERTICES);
60
61 unsigned r = 0;
62
63 r += reg_unit(v.devinfo);
64
65 patch_urb_output = brw_ud8_grf(r, 0);
66 r += reg_unit(v.devinfo);
67
68 if (tcs_prog_data->include_primitive_id) {
69 primitive_id = brw_vec8_grf(r, 0);
70 r += reg_unit(v.devinfo);
71 }
72
73 /* ICP handles occupy the next 1-32 registers. */
74 icp_handle_start = brw_ud8_grf(r, 0);
75 r += brw_tcs_prog_key_input_vertices(tcs_key) * reg_unit(v.devinfo);
76
77 num_regs = r;
78 }
79 }
80
tes_thread_payload(const fs_visitor & v)81 tes_thread_payload::tes_thread_payload(const fs_visitor &v)
82 {
83 unsigned r = 0;
84
85 /* R0: Thread Header. */
86 patch_urb_input = retype(brw_vec1_grf(0, 0), BRW_TYPE_UD);
87 primitive_id = brw_vec1_grf(0, 1);
88 r += reg_unit(v.devinfo);
89
90 /* R1-3: gl_TessCoord.xyz. */
91 for (unsigned i = 0; i < 3; i++) {
92 coords[i] = brw_vec8_grf(r, 0);
93 r += reg_unit(v.devinfo);
94 }
95
96 /* R4: URB output handles. */
97 urb_output = brw_ud8_grf(r, 0);
98 r += reg_unit(v.devinfo);
99
100 num_regs = r;
101 }
102
gs_thread_payload(fs_visitor & v)103 gs_thread_payload::gs_thread_payload(fs_visitor &v)
104 {
105 struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
106 struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
107 const fs_builder bld = fs_builder(&v).at_end();
108
109 /* R0: thread header. */
110 unsigned r = reg_unit(v.devinfo);
111
112 /* R1: output URB handles. */
113 urb_handles = bld.vgrf(BRW_TYPE_UD);
114 bld.AND(urb_handles, brw_ud8_grf(r, 0),
115 v.devinfo->ver >= 20 ? brw_imm_ud(0xFFFFFF) : brw_imm_ud(0xFFFF));
116
117 /* R1: Instance ID stored in bits 31:27 */
118 instance_id = bld.vgrf(BRW_TYPE_UD);
119 bld.SHR(instance_id, brw_ud8_grf(r, 0), brw_imm_ud(27u));
120
121 r += reg_unit(v.devinfo);
122
123 if (gs_prog_data->include_primitive_id) {
124 primitive_id = brw_ud8_grf(r, 0);
125 r += reg_unit(v.devinfo);
126 }
127
128 /* Always enable VUE handles so we can safely use pull model if needed.
129 *
130 * The push model for a GS uses a ton of register space even for trivial
131 * scenarios with just a few inputs, so just make things easier and a bit
132 * safer by always having pull model available.
133 */
134 gs_prog_data->base.include_vue_handles = true;
135
136 /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
137 icp_handle_start = brw_ud8_grf(r, 0);
138 r += v.nir->info.gs.vertices_in * reg_unit(v.devinfo);
139
140 num_regs = r;
141
142 /* Use a maximum of 24 registers for push-model inputs. */
143 const unsigned max_push_components = 24;
144
145 /* If pushing our inputs would take too many registers, reduce the URB read
146 * length (which is in HWords, or 8 registers), and resort to pulling.
147 *
148 * Note that the GS reads <URB Read Length> HWords for every vertex - so we
149 * have to multiply by VerticesIn to obtain the total storage requirement.
150 */
151 if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
152 max_push_components) {
153 vue_prog_data->urb_read_length =
154 ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
155 }
156 }
157
158 static inline void
setup_fs_payload_gfx20(fs_thread_payload & payload,const fs_visitor & v,bool & source_depth_to_render_target)159 setup_fs_payload_gfx20(fs_thread_payload &payload,
160 const fs_visitor &v,
161 bool &source_depth_to_render_target)
162 {
163 struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
164 const unsigned payload_width = 16;
165 assert(v.dispatch_width % payload_width == 0);
166 assert(v.devinfo->ver >= 20);
167
168 for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
169 /* R0-1: PS thread payload header, masks and pixel X/Y coordinates. */
170 payload.num_regs++;
171 payload.subspan_coord_reg[j] = payload.num_regs++;
172 }
173
174 for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
175 /* R2-13: Barycentric interpolation coordinates. These appear
176 * in the same order that they appear in the brw_barycentric_mode
177 * enum. Each set of coordinates occupies 2 64B registers per
178 * SIMD16 half. Coordinates only appear if they were enabled
179 * using the "Barycentric Interpolation Mode" bits in WM_STATE.
180 */
181 for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
182 if (prog_data->barycentric_interp_modes & (1 << i)) {
183 payload.barycentric_coord_reg[i][j] = payload.num_regs;
184 payload.num_regs += payload_width / 4;
185 }
186 }
187
188 /* R14: Interpolated depth if "Pixel Shader Uses Source Depth" is set. */
189 if (prog_data->uses_src_depth) {
190 payload.source_depth_reg[j] = payload.num_regs;
191 payload.num_regs += payload_width / 8;
192 }
193
194 /* R15: Interpolated W if "Pixel Shader Uses Source W" is set. */
195 if (prog_data->uses_src_w) {
196 payload.source_w_reg[j] = payload.num_regs;
197 payload.num_regs += payload_width / 8;
198 }
199
200 /* R16: MSAA input coverage mask if "Pixel Shader Uses Input
201 * Coverage Mask" is set.
202 */
203 if (prog_data->uses_sample_mask) {
204 payload.sample_mask_in_reg[j] = payload.num_regs;
205 payload.num_regs += payload_width / 8;
206 }
207
208 /* R19: MSAA position XY offsets if "Position XY Offset Select"
209 * is either POSOFFSET_CENTROID or POSOFFSET_SAMPLE. Note that
210 * this is delivered as a single SIMD32 vector, inconsistently
211 * with most other PS payload fields.
212 */
213 if (prog_data->uses_pos_offset && j == 0) {
214 for (unsigned k = 0; k < 2; k++) {
215 payload.sample_pos_reg[k] = payload.num_regs;
216 payload.num_regs++;
217 }
218 }
219
220 /* R22: Sample offsets. */
221 if (prog_data->uses_sample_offsets && j == 0) {
222 payload.sample_offsets_reg = payload.num_regs;
223 payload.num_regs += 2;
224 }
225 }
226
227 /* RP0: Source Depth and/or W Attribute Vertex Deltas and/or
228 * Perspective Bary Planes.
229 */
230 if (prog_data->uses_depth_w_coefficients ||
231 prog_data->uses_pc_bary_coefficients) {
232 payload.depth_w_coef_reg = payload.pc_bary_coef_reg = payload.num_regs;
233 payload.num_regs += 2 * v.max_polygons;
234 }
235
236 /* RP4: Non-Perspective Bary planes. */
237 if (prog_data->uses_npc_bary_coefficients) {
238 payload.npc_bary_coef_reg = payload.num_regs;
239 payload.num_regs += 2 * v.max_polygons;
240 }
241
242 if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
243 source_depth_to_render_target = true;
244 }
245 }
246
247 static inline void
setup_fs_payload_gfx9(fs_thread_payload & payload,const fs_visitor & v,bool & source_depth_to_render_target)248 setup_fs_payload_gfx9(fs_thread_payload &payload,
249 const fs_visitor &v,
250 bool &source_depth_to_render_target)
251 {
252 struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
253
254 const unsigned payload_width = MIN2(16, v.dispatch_width);
255 assert(v.dispatch_width % payload_width == 0);
256 assert(v.devinfo->ver < 20);
257
258 payload.num_regs = 0;
259
260 /* R0: PS thread payload header. */
261 payload.num_regs++;
262
263 for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
264 /* R1: masks, pixel X/Y coordinates. */
265 payload.subspan_coord_reg[j] = payload.num_regs++;
266 }
267
268 for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
269 /* R3-26: barycentric interpolation coordinates. These appear in the
270 * same order that they appear in the brw_barycentric_mode enum. Each
271 * set of coordinates occupies 2 registers if dispatch width == 8 and 4
272 * registers if dispatch width == 16. Coordinates only appear if they
273 * were enabled using the "Barycentric Interpolation Mode" bits in
274 * WM_STATE.
275 */
276 for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
277 if (prog_data->barycentric_interp_modes & (1 << i)) {
278 payload.barycentric_coord_reg[i][j] = payload.num_regs;
279 payload.num_regs += payload_width / 4;
280 }
281 }
282
283 /* R27-28: interpolated depth if uses source depth */
284 if (prog_data->uses_src_depth) {
285 payload.source_depth_reg[j] = payload.num_regs;
286 payload.num_regs += payload_width / 8;
287 }
288
289 /* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
290 if (prog_data->uses_src_w) {
291 payload.source_w_reg[j] = payload.num_regs;
292 payload.num_regs += payload_width / 8;
293 }
294
295 /* R31: MSAA position offsets. */
296 if (prog_data->uses_pos_offset) {
297 payload.sample_pos_reg[j] = payload.num_regs;
298 payload.num_regs++;
299 }
300
301 /* R32-33: MSAA input coverage mask */
302 if (prog_data->uses_sample_mask) {
303 payload.sample_mask_in_reg[j] = payload.num_regs;
304 payload.num_regs += payload_width / 8;
305 }
306 }
307
308 /* R66: Source Depth and/or W Attribute Vertex Deltas. */
309 if (prog_data->uses_depth_w_coefficients) {
310 payload.depth_w_coef_reg = payload.num_regs;
311 payload.num_regs += v.max_polygons;
312 }
313
314 /* R68: Perspective bary planes. */
315 if (prog_data->uses_pc_bary_coefficients) {
316 payload.pc_bary_coef_reg = payload.num_regs;
317 payload.num_regs += v.max_polygons;
318 }
319
320 /* R70: Non-perspective bary planes. */
321 if (prog_data->uses_npc_bary_coefficients) {
322 payload.npc_bary_coef_reg = payload.num_regs;
323 payload.num_regs += v.max_polygons;
324 }
325
326 /* R72: Sample offsets. */
327 if (prog_data->uses_sample_offsets) {
328 payload.sample_offsets_reg = payload.num_regs;
329 payload.num_regs++;
330 }
331
332 if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
333 source_depth_to_render_target = true;
334 }
335 }
336
fs_thread_payload(const fs_visitor & v,bool & source_depth_to_render_target)337 fs_thread_payload::fs_thread_payload(const fs_visitor &v,
338 bool &source_depth_to_render_target)
339 : subspan_coord_reg(),
340 source_depth_reg(),
341 source_w_reg(),
342 aa_dest_stencil_reg(),
343 dest_depth_reg(),
344 sample_pos_reg(),
345 sample_mask_in_reg(),
346 barycentric_coord_reg(),
347 depth_w_coef_reg(),
348 pc_bary_coef_reg(),
349 npc_bary_coef_reg(),
350 sample_offsets_reg()
351 {
352 if (v.devinfo->ver >= 20)
353 setup_fs_payload_gfx20(*this, v, source_depth_to_render_target);
354 else
355 setup_fs_payload_gfx9(*this, v, source_depth_to_render_target);
356 }
357
cs_thread_payload(const fs_visitor & v)358 cs_thread_payload::cs_thread_payload(const fs_visitor &v)
359 {
360 struct brw_cs_prog_data *prog_data = brw_cs_prog_data(v.prog_data);
361
362 unsigned r = reg_unit(v.devinfo);
363
364 /* See nir_setup_uniforms for subgroup_id in earlier versions. */
365 if (v.devinfo->verx10 >= 125) {
366 subgroup_id_ = brw_ud1_grf(0, 2);
367
368 for (int i = 0; i < 3; i++) {
369 if (prog_data->generate_local_id & (1 << i)) {
370 local_invocation_id[i] = brw_uw8_grf(r, 0);
371 r += reg_unit(v.devinfo);
372 if (v.devinfo->ver < 20 && v.dispatch_width == 32)
373 r += reg_unit(v.devinfo);
374 } else {
375 local_invocation_id[i] = brw_imm_uw(0);
376 }
377 }
378
379 /* TODO: Fill out uses_btd_stack_ids automatically */
380 if (prog_data->uses_btd_stack_ids)
381 r += reg_unit(v.devinfo);
382 }
383
384 num_regs = r;
385 }
386
387 void
load_subgroup_id(const fs_builder & bld,brw_reg & dest) const388 cs_thread_payload::load_subgroup_id(const fs_builder &bld,
389 brw_reg &dest) const
390 {
391 auto devinfo = bld.shader->devinfo;
392 dest = retype(dest, BRW_TYPE_UD);
393
394 if (subgroup_id_.file != BAD_FILE) {
395 assert(devinfo->verx10 >= 125);
396 bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
397 } else {
398 assert(devinfo->verx10 < 125);
399 assert(gl_shader_stage_is_compute(bld.shader->stage));
400 int index = brw_get_subgroup_id_param_index(devinfo,
401 bld.shader->prog_data);
402 bld.MOV(dest, brw_uniform_reg(index, BRW_TYPE_UD));
403 }
404 }
405
task_mesh_thread_payload(fs_visitor & v)406 task_mesh_thread_payload::task_mesh_thread_payload(fs_visitor &v)
407 : cs_thread_payload(v)
408 {
409 /* Task and Mesh Shader Payloads (SIMD8 and SIMD16)
410 *
411 * R0: Header
412 * R1: Local_ID.X[0-7 or 0-15]
413 * R2: Inline Parameter
414 *
415 * Task and Mesh Shader Payloads (SIMD32)
416 *
417 * R0: Header
418 * R1: Local_ID.X[0-15]
419 * R2: Local_ID.X[16-31]
420 * R3: Inline Parameter
421 *
422 * Local_ID.X values are 16 bits.
423 *
424 * Inline parameter is optional but always present since we use it to pass
425 * the address to descriptors.
426 */
427
428 const fs_builder bld = fs_builder(&v).at_end();
429
430 unsigned r = 0;
431 assert(subgroup_id_.file != BAD_FILE);
432 extended_parameter_0 = retype(brw_vec1_grf(0, 3), BRW_TYPE_UD);
433
434 if (v.devinfo->ver >= 20) {
435 urb_output = brw_ud1_grf(1, 0);
436 } else {
437 urb_output = bld.vgrf(BRW_TYPE_UD);
438 /* In both mesh and task shader payload, lower 16 bits of g0.6 is
439 * an offset within Slice's Local URB, which says where shader is
440 * supposed to output its data.
441 */
442 bld.AND(urb_output, brw_ud1_grf(0, 6), brw_imm_ud(0xFFFF));
443 }
444
445 if (v.stage == MESA_SHADER_MESH) {
446 /* g0.7 is Task Shader URB Entry Offset, which contains both an offset
447 * within Slice's Local USB (bits 0:15) and a slice selector
448 * (bits 16:24). Slice selector can be non zero when mesh shader
449 * is spawned on slice other than the one where task shader was run.
450 * Bit 24 says that Slice ID is present and bits 16:23 is the Slice ID.
451 */
452 task_urb_input = brw_ud1_grf(0, 7);
453 }
454 r += reg_unit(v.devinfo);
455
456 local_index = brw_uw8_grf(r, 0);
457 r += reg_unit(v.devinfo);
458 if (v.devinfo->ver < 20 && v.dispatch_width == 32)
459 r += reg_unit(v.devinfo);
460
461 inline_parameter = brw_ud1_grf(r, 0);
462 r += reg_unit(v.devinfo);
463
464 num_regs = r;
465 }
466
bs_thread_payload(const fs_visitor & v)467 bs_thread_payload::bs_thread_payload(const fs_visitor &v)
468 {
469 unsigned r = 0;
470
471 /* R0: Thread header. */
472 r += reg_unit(v.devinfo);
473
474 /* R1: Stack IDs. */
475 r += reg_unit(v.devinfo);
476
477 /* R2: Inline Parameter. Used for argument addresses. */
478 global_arg_ptr = brw_ud1_grf(r, 0);
479 local_arg_ptr = brw_ud1_grf(r, 2);
480 r += reg_unit(v.devinfo);
481
482 num_regs = r;
483 }
484
485 void
load_shader_type(const fs_builder & bld,brw_reg & dest) const486 bs_thread_payload::load_shader_type(const fs_builder &bld, brw_reg &dest) const
487 {
488 brw_reg ud_dest = retype(dest, BRW_TYPE_UD);
489 bld.MOV(ud_dest, retype(brw_vec1_grf(0, 3), ud_dest.type));
490 bld.AND(ud_dest, ud_dest, brw_imm_ud(0xf));
491 }
492