1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28 #include "genxml/genX_rt_pack.h"
29
30 #include "common/intel_compute_slm.h"
31 #include "common/intel_genX_state_elk.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_sample_positions.h"
34 #include "nir/nir_xfb_info.h"
35 #include "vk_util.h"
36 #include "vk_format.h"
37 #include "vk_log.h"
38 #include "vk_render_pass.h"
39
40 static uint32_t
vertex_element_comp_control(enum isl_format format,unsigned comp)41 vertex_element_comp_control(enum isl_format format, unsigned comp)
42 {
43 uint8_t bits;
44 switch (comp) {
45 case 0: bits = isl_format_layouts[format].channels.r.bits; break;
46 case 1: bits = isl_format_layouts[format].channels.g.bits; break;
47 case 2: bits = isl_format_layouts[format].channels.b.bits; break;
48 case 3: bits = isl_format_layouts[format].channels.a.bits; break;
49 default: unreachable("Invalid component");
50 }
51
52 /*
53 * Take in account hardware restrictions when dealing with 64-bit floats.
54 *
55 * From Broadwell spec, command reference structures, page 586:
56 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
57 * 64-bit components are stored * in the URB without any conversion. In
58 * this case, vertex elements must be written as 128 or 256 bits, with
59 * VFCOMP_STORE_0 being used to pad the output as required. E.g., if
60 * R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
61 * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
62 * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
63 * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
64 * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
65 * Component 3 to be specified as VFCOMP_STORE_0 in order to output a
66 * 256-bit vertex element."
67 */
68 if (bits) {
69 return VFCOMP_STORE_SRC;
70 } else if (comp >= 2 &&
71 !isl_format_layouts[format].channels.b.bits &&
72 isl_format_layouts[format].channels.r.type == ISL_RAW) {
73 /* When emitting 64-bit attributes, we need to write either 128 or 256
74 * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
75 * VFCOMP_STORE_0 to pad the written chunk */
76 return VFCOMP_NOSTORE;
77 } else if (comp < 3 ||
78 isl_format_layouts[format].channels.r.type == ISL_RAW) {
79 /* Note we need to pad with value 0, not 1, due hardware restrictions
80 * (see comment above) */
81 return VFCOMP_STORE_0;
82 } else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
83 isl_format_layouts[format].channels.r.type == ISL_SINT) {
84 assert(comp == 3);
85 return VFCOMP_STORE_1_INT;
86 } else {
87 assert(comp == 3);
88 return VFCOMP_STORE_1_FP;
89 }
90 }
91
92 static void
emit_vertex_input(struct anv_graphics_pipeline * pipeline,const struct vk_vertex_input_state * vi)93 emit_vertex_input(struct anv_graphics_pipeline *pipeline,
94 const struct vk_vertex_input_state *vi)
95 {
96 const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
97
98 /* Pull inputs_read out of the VS prog data */
99 const uint64_t inputs_read = vs_prog_data->inputs_read;
100 const uint64_t double_inputs_read =
101 vs_prog_data->double_inputs_read & inputs_read;
102 assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
103 const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
104 const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
105 const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
106 vs_prog_data->uses_instanceid ||
107 vs_prog_data->uses_firstvertex ||
108 vs_prog_data->uses_baseinstance;
109
110 uint32_t elem_count = __builtin_popcount(elements) -
111 __builtin_popcount(elements_double) / 2;
112
113 const uint32_t total_elems =
114 MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid);
115
116 uint32_t *p;
117
118 const uint32_t num_dwords = 1 + total_elems * 2;
119 p = anv_batch_emitn(&pipeline->base.batch, num_dwords,
120 GENX(3DSTATE_VERTEX_ELEMENTS));
121 if (!p)
122 return;
123
124 for (uint32_t i = 0; i < total_elems; i++) {
125 /* The SKL docs for VERTEX_ELEMENT_STATE say:
126 *
127 * "All elements must be valid from Element[0] to the last valid
128 * element. (I.e. if Element[2] is valid then Element[1] and
129 * Element[0] must also be valid)."
130 *
131 * The SKL docs for 3D_Vertex_Component_Control say:
132 *
133 * "Don't store this component. (Not valid for Component 0, but can
134 * be used for Component 1-3)."
135 *
136 * So we can't just leave a vertex element blank and hope for the best.
137 * We have to tell the VF hardware to put something in it; so we just
138 * store a bunch of zero.
139 *
140 * TODO: Compact vertex elements so we never end up with holes.
141 */
142 struct GENX(VERTEX_ELEMENT_STATE) element = {
143 .Valid = true,
144 .Component0Control = VFCOMP_STORE_0,
145 .Component1Control = VFCOMP_STORE_0,
146 .Component2Control = VFCOMP_STORE_0,
147 .Component3Control = VFCOMP_STORE_0,
148 };
149 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element);
150 }
151
152 u_foreach_bit(a, vi->attributes_valid) {
153 enum isl_format format = anv_get_isl_format(pipeline->base.device->info,
154 vi->attributes[a].format,
155 VK_IMAGE_ASPECT_COLOR_BIT,
156 VK_IMAGE_TILING_LINEAR);
157 assume(format < ISL_NUM_FORMATS);
158
159 uint32_t binding = vi->attributes[a].binding;
160 assert(binding < MAX_VBS);
161
162 if ((elements & (1 << a)) == 0)
163 continue; /* Binding unused */
164
165 uint32_t slot =
166 __builtin_popcount(elements & ((1 << a) - 1)) -
167 DIV_ROUND_UP(__builtin_popcount(elements_double &
168 ((1 << a) -1)), 2);
169
170 struct GENX(VERTEX_ELEMENT_STATE) element = {
171 .VertexBufferIndex = vi->attributes[a].binding,
172 .Valid = true,
173 .SourceElementFormat = format,
174 .EdgeFlagEnable = false,
175 .SourceElementOffset = vi->attributes[a].offset,
176 .Component0Control = vertex_element_comp_control(format, 0),
177 .Component1Control = vertex_element_comp_control(format, 1),
178 .Component2Control = vertex_element_comp_control(format, 2),
179 .Component3Control = vertex_element_comp_control(format, 3),
180 };
181 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
182
183 #if GFX_VER >= 8
184 /* On Broadwell and later, we have a separate VF_INSTANCING packet
185 * that controls instancing. On Haswell and prior, that's part of
186 * VERTEX_BUFFER_STATE which we emit later.
187 */
188 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
189 bool per_instance = pipeline->vb[binding].instanced;
190 uint32_t divisor = pipeline->vb[binding].instance_divisor *
191 pipeline->instance_multiplier;
192
193 vfi.InstancingEnable = per_instance;
194 vfi.VertexElementIndex = slot;
195 vfi.InstanceDataStepRate = per_instance ? divisor : 1;
196 }
197 #endif
198 }
199
200 const uint32_t id_slot = elem_count;
201 if (needs_svgs_elem) {
202 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
203 * "Within a VERTEX_ELEMENT_STATE structure, if a Component
204 * Control field is set to something other than VFCOMP_STORE_SRC,
205 * no higher-numbered Component Control fields may be set to
206 * VFCOMP_STORE_SRC"
207 *
208 * This means, that if we have BaseInstance, we need BaseVertex as
209 * well. Just do all or nothing.
210 */
211 uint32_t base_ctrl = (vs_prog_data->uses_firstvertex ||
212 vs_prog_data->uses_baseinstance) ?
213 VFCOMP_STORE_SRC : VFCOMP_STORE_0;
214
215 struct GENX(VERTEX_ELEMENT_STATE) element = {
216 .VertexBufferIndex = ANV_SVGS_VB_INDEX,
217 .Valid = true,
218 .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
219 .Component0Control = base_ctrl,
220 .Component1Control = base_ctrl,
221 #if GFX_VER >= 8
222 .Component2Control = VFCOMP_STORE_0,
223 .Component3Control = VFCOMP_STORE_0,
224 #else
225 .Component2Control = VFCOMP_STORE_VID,
226 .Component3Control = VFCOMP_STORE_IID,
227 #endif
228 };
229 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
230
231 #if GFX_VER >= 8
232 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
233 vfi.VertexElementIndex = id_slot;
234 }
235 #endif
236 }
237
238 #if GFX_VER >= 8
239 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
240 sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
241 sgvs.VertexIDComponentNumber = 2;
242 sgvs.VertexIDElementOffset = id_slot;
243 sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid;
244 sgvs.InstanceIDComponentNumber = 3;
245 sgvs.InstanceIDElementOffset = id_slot;
246 }
247 #endif
248
249 const uint32_t drawid_slot = elem_count + needs_svgs_elem;
250 if (vs_prog_data->uses_drawid) {
251 struct GENX(VERTEX_ELEMENT_STATE) element = {
252 .VertexBufferIndex = ANV_DRAWID_VB_INDEX,
253 .Valid = true,
254 .SourceElementFormat = ISL_FORMAT_R32_UINT,
255 .Component0Control = VFCOMP_STORE_SRC,
256 .Component1Control = VFCOMP_STORE_0,
257 .Component2Control = VFCOMP_STORE_0,
258 .Component3Control = VFCOMP_STORE_0,
259 };
260 GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
261 &p[1 + drawid_slot * 2],
262 &element);
263
264 #if GFX_VER >= 8
265 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
266 vfi.VertexElementIndex = drawid_slot;
267 }
268 #endif
269 }
270 }
271
272 void
genX(emit_urb_setup)273 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
274 const struct intel_l3_config *l3_config,
275 VkShaderStageFlags active_stages,
276 const unsigned entry_size[4],
277 enum intel_urb_deref_block_size *deref_block_size)
278 {
279 const struct intel_device_info *devinfo = device->info;
280 struct intel_urb_config urb_cfg = {
281 .size = { entry_size[0], entry_size[1], entry_size[2], entry_size[3], },
282 };
283
284 bool constrained;
285 intel_get_urb_config(devinfo, l3_config,
286 active_stages &
287 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
288 active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
289 &urb_cfg, deref_block_size, &constrained);
290
291 #if GFX_VERx10 == 70
292 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
293 *
294 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
295 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
296 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
297 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
298 * needs to be sent before any combination of VS associated 3DSTATE."
299 */
300 anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) {
301 pc.DepthStallEnable = true;
302 pc.PostSyncOperation = WriteImmediateData;
303 pc.Address = device->workaround_address;
304 }
305 #endif
306
307 for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
308 anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
309 urb._3DCommandSubOpcode += i;
310 urb.VSURBStartingAddress = urb_cfg.start[i];
311 urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
312 urb.VSNumberofURBEntries = urb_cfg.entries[i];
313 }
314 }
315 }
316
317 static void
emit_urb_setup(struct anv_graphics_pipeline * pipeline,enum intel_urb_deref_block_size * deref_block_size)318 emit_urb_setup(struct anv_graphics_pipeline *pipeline,
319 enum intel_urb_deref_block_size *deref_block_size)
320 {
321 unsigned entry_size[4];
322 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
323 const struct elk_vue_prog_data *prog_data =
324 !anv_pipeline_has_stage(pipeline, i) ? NULL :
325 (const struct elk_vue_prog_data *) pipeline->shaders[i]->prog_data;
326
327 entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
328 }
329
330 genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch,
331 pipeline->base.l3_config,
332 pipeline->active_stages, entry_size,
333 deref_block_size);
334 }
335
336 static void
emit_3dstate_sbe(struct anv_graphics_pipeline * pipeline)337 emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
338 {
339 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
340
341 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
342 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
343 #if GFX_VER >= 8
344 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
345 #endif
346 return;
347 }
348
349 struct GENX(3DSTATE_SBE) sbe = {
350 GENX(3DSTATE_SBE_header),
351 .AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline),
352 .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
353 .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
354 .ConstantInterpolationEnable = wm_prog_data->flat_inputs,
355 };
356
357 #if GFX_VER >= 8
358 /* On Broadwell, they broke 3DSTATE_SBE into two packets */
359 struct GENX(3DSTATE_SBE_SWIZ) swiz = {
360 GENX(3DSTATE_SBE_SWIZ_header),
361 };
362 #else
363 # define swiz sbe
364 #endif
365
366 const struct intel_vue_map *fs_input_map =
367 &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
368
369 int first_slot = elk_compute_first_urb_slot_required(wm_prog_data->inputs,
370 fs_input_map);
371 assert(first_slot % 2 == 0);
372 unsigned urb_entry_read_offset = first_slot / 2;
373 int max_source_attr = 0;
374 for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
375 uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
376 int input_index = wm_prog_data->urb_setup[attr];
377
378 assert(0 <= input_index);
379
380 /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
381 * VUE header
382 */
383 if (attr == VARYING_SLOT_VIEWPORT ||
384 attr == VARYING_SLOT_LAYER ||
385 attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
386 continue;
387 }
388
389 if (attr == VARYING_SLOT_PNTC) {
390 sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
391 continue;
392 }
393
394 const int slot = fs_input_map->varying_to_slot[attr];
395
396 if (slot == -1) {
397 /* This attribute does not exist in the VUE--that means that the
398 * vertex shader did not write to it. It could be that it's a regular
399 * varying read by the fragment shader but not written by the vertex
400 * shader or it's gl_PrimitiveID. In the first case the value is
401 * undefined, in the second it needs to be gl_PrimitiveID.
402 */
403 swiz.Attribute[input_index].ConstantSource = PRIM_ID;
404 swiz.Attribute[input_index].ComponentOverrideX = true;
405 swiz.Attribute[input_index].ComponentOverrideY = true;
406 swiz.Attribute[input_index].ComponentOverrideZ = true;
407 swiz.Attribute[input_index].ComponentOverrideW = true;
408 continue;
409 }
410
411 /* We have to subtract two slots to account for the URB entry output
412 * read offset in the VS and GS stages.
413 */
414 const int source_attr = slot - 2 * urb_entry_read_offset;
415 assert(source_attr >= 0 && source_attr < 32);
416 max_source_attr = MAX2(max_source_attr, source_attr);
417 /* The hardware can only do overrides on 16 overrides at a time, and the
418 * other up to 16 have to be lined up so that the input index = the
419 * output index. We'll need to do some tweaking to make sure that's the
420 * case.
421 */
422 if (input_index < 16)
423 swiz.Attribute[input_index].SourceAttribute = source_attr;
424 else
425 assert(source_attr == input_index);
426 }
427
428 sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
429 sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
430 #if GFX_VER >= 8
431 sbe.ForceVertexURBEntryReadOffset = true;
432 sbe.ForceVertexURBEntryReadLength = true;
433 #endif
434
435 uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
436 GENX(3DSTATE_SBE_length));
437 if (!dw)
438 return;
439 GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
440
441 #if GFX_VER >= 8
442 dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
443 if (!dw)
444 return;
445 GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
446 #endif
447 }
448
449 /** Returns the final polygon mode for rasterization
450 *
451 * This function takes into account polygon mode, primitive topology and the
452 * different shader stages which might generate their own type of primitives.
453 */
454 VkPolygonMode
genX(raster_polygon_mode)455 genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
456 VkPrimitiveTopology primitive_topology)
457 {
458 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
459 switch (get_gs_prog_data(pipeline)->output_topology) {
460 case _3DPRIM_POINTLIST:
461 return VK_POLYGON_MODE_POINT;
462
463 case _3DPRIM_LINELIST:
464 case _3DPRIM_LINESTRIP:
465 case _3DPRIM_LINELOOP:
466 return VK_POLYGON_MODE_LINE;
467
468 case _3DPRIM_TRILIST:
469 case _3DPRIM_TRIFAN:
470 case _3DPRIM_TRISTRIP:
471 case _3DPRIM_RECTLIST:
472 case _3DPRIM_QUADLIST:
473 case _3DPRIM_QUADSTRIP:
474 case _3DPRIM_POLYGON:
475 return pipeline->polygon_mode;
476 }
477 unreachable("Unsupported GS output topology");
478 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
479 switch (get_tes_prog_data(pipeline)->output_topology) {
480 case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
481 return VK_POLYGON_MODE_POINT;
482
483 case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
484 return VK_POLYGON_MODE_LINE;
485
486 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
487 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
488 return pipeline->polygon_mode;
489 }
490 unreachable("Unsupported TCS output topology");
491 } else {
492 switch (primitive_topology) {
493 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
494 return VK_POLYGON_MODE_POINT;
495
496 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
497 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
498 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
499 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
500 return VK_POLYGON_MODE_LINE;
501
502 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
503 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
504 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
505 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
506 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
507 return pipeline->polygon_mode;
508
509 default:
510 unreachable("Unsupported primitive topology");
511 }
512 }
513 }
514
515 uint32_t
genX(ms_rasterization_mode)516 genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
517 VkPolygonMode raster_mode)
518 {
519 #if GFX_VER <= 7
520 if (raster_mode == VK_POLYGON_MODE_LINE) {
521 switch (pipeline->line_mode) {
522 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
523 return MSRASTMODE_ON_PATTERN;
524
525 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
526 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
527 return MSRASTMODE_OFF_PIXEL;
528
529 default:
530 unreachable("Unsupported line rasterization mode");
531 }
532 } else {
533 return pipeline->rasterization_samples > 1 ?
534 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
535 }
536 #else
537 unreachable("Only on gen7");
538 #endif
539 }
540
541 const uint32_t genX(vk_to_intel_cullmode)[] = {
542 [VK_CULL_MODE_NONE] = CULLMODE_NONE,
543 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
544 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
545 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
546 };
547
548 const uint32_t genX(vk_to_intel_fillmode)[] = {
549 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
550 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
551 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
552 };
553
554 const uint32_t genX(vk_to_intel_front_face)[] = {
555 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
556 [VK_FRONT_FACE_CLOCKWISE] = 0
557 };
558
559 void
genX(rasterization_mode)560 genX(rasterization_mode)(VkPolygonMode raster_mode,
561 VkLineRasterizationModeEXT line_mode,
562 float line_width,
563 uint32_t *api_mode,
564 bool *msaa_rasterization_enable)
565 {
566 #if GFX_VER >= 8
567 if (raster_mode == VK_POLYGON_MODE_LINE) {
568 /* Unfortunately, configuring our line rasterization hardware on gfx8
569 * and later is rather painful. Instead of giving us bits to tell the
570 * hardware what line mode to use like we had on gfx7, we now have an
571 * arcane combination of API Mode and MSAA enable bits which do things
572 * in a table which are expected to magically put the hardware into the
573 * right mode for your API. Sadly, Vulkan isn't any of the APIs the
574 * hardware people thought of so nothing works the way you want it to.
575 *
576 * Look at the table titled "Multisample Rasterization Modes" in Vol 7
577 * of the Skylake PRM for more details.
578 */
579 switch (line_mode) {
580 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
581 *api_mode = DX100;
582 /* The algorithm the HW uses to draw wide lines doesn't quite match
583 * what the CTS expects, at least for rectangular lines, so we set
584 * this to false here, making it draw parallelograms instead, which
585 * work well enough.
586 */
587 *msaa_rasterization_enable = line_width < 1.0078125;
588 break;
589
590 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
591 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
592 *api_mode = DX9OGL;
593 *msaa_rasterization_enable = false;
594 break;
595
596 default:
597 unreachable("Unsupported line rasterization mode");
598 }
599 } else {
600 *api_mode = DX100;
601 *msaa_rasterization_enable = true;
602 }
603 #else
604 unreachable("Invalid call");
605 #endif
606 }
607
608 static void
emit_rs_state(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,enum intel_urb_deref_block_size urb_deref_block_size)609 emit_rs_state(struct anv_graphics_pipeline *pipeline,
610 const struct vk_input_assembly_state *ia,
611 const struct vk_rasterization_state *rs,
612 const struct vk_multisample_state *ms,
613 const struct vk_render_pass_state *rp,
614 enum intel_urb_deref_block_size urb_deref_block_size)
615 {
616 struct GENX(3DSTATE_SF) sf = {
617 GENX(3DSTATE_SF_header),
618 };
619
620 sf.ViewportTransformEnable = true;
621 sf.StatisticsEnable = true;
622 sf.VertexSubPixelPrecisionSelect = _8Bit;
623 sf.AALineDistanceMode = true;
624
625 switch (rs->provoking_vertex) {
626 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
627 sf.TriangleStripListProvokingVertexSelect = 0;
628 sf.LineStripListProvokingVertexSelect = 0;
629 sf.TriangleFanProvokingVertexSelect = 1;
630 break;
631
632 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
633 sf.TriangleStripListProvokingVertexSelect = 2;
634 sf.LineStripListProvokingVertexSelect = 1;
635 sf.TriangleFanProvokingVertexSelect = 2;
636 break;
637
638 default:
639 unreachable("Invalid provoking vertex mode");
640 }
641
642 #if GFX_VERx10 == 75
643 sf.LineStippleEnable = rs->line.stipple.enable;
644 #endif
645
646 bool point_from_shader;
647 const struct elk_vue_prog_data *last_vue_prog_data =
648 anv_pipeline_get_last_vue_prog_data(pipeline);
649 point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
650
651 if (point_from_shader) {
652 sf.PointWidthSource = Vertex;
653 } else {
654 sf.PointWidthSource = State;
655 sf.PointWidth = 1.0;
656 }
657
658 #if GFX_VER >= 8
659 struct GENX(3DSTATE_RASTER) raster = {
660 GENX(3DSTATE_RASTER_header),
661 };
662 #else
663 # define raster sf
664 #endif
665
666 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
667 * "Multisample Modes State".
668 */
669 #if GFX_VER >= 8
670 /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
671 * computations. If we ever set this bit to a different value, they will
672 * need to be updated accordingly.
673 */
674 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
675 raster.ForceMultisampling = false;
676 #endif
677
678 raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
679 raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
680 raster.ScissorRectangleEnable = true;
681
682 #if GFX_VER >= 8
683 raster.ViewportZClipTestEnable = pipeline->depth_clip_enable;
684 #endif
685
686 #if GFX_VER == 7
687 /* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
688 * can get the depth offsets correct.
689 */
690 if (rp != NULL &&
691 rp->depth_attachment_format != VK_FORMAT_UNDEFINED) {
692 assert(vk_format_has_depth(rp->depth_attachment_format));
693 enum isl_format isl_format =
694 anv_get_isl_format(pipeline->base.device->info,
695 rp->depth_attachment_format,
696 VK_IMAGE_ASPECT_DEPTH_BIT,
697 VK_IMAGE_TILING_OPTIMAL);
698 sf.DepthBufferSurfaceFormat =
699 isl_format_get_depth_format(isl_format, false);
700 }
701 #endif
702
703 #if GFX_VER >= 8
704 GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
705 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
706 #else
707 # undef raster
708 GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf);
709 #endif
710 }
711
712 static void
emit_ms_state(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms)713 emit_ms_state(struct anv_graphics_pipeline *pipeline,
714 const struct vk_multisample_state *ms)
715 {
716 #if GFX_VER >= 8
717 /* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
718 genX(emit_multisample)(&pipeline->base.batch,
719 pipeline->rasterization_samples,
720 NULL);
721 #endif
722
723 /* From the Vulkan 1.0 spec:
724 * If pSampleMask is NULL, it is treated as if the mask has all bits
725 * enabled, i.e. no coverage is removed from fragments.
726 *
727 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
728 */
729 #if GFX_VER >= 8
730 uint32_t sample_mask = 0xffff;
731 #else
732 uint32_t sample_mask = 0xff;
733 #endif
734
735 if (ms != NULL)
736 sample_mask &= ms->sample_mask;
737
738 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
739 sm.SampleMask = sample_mask;
740 }
741 }
742
743 const uint32_t genX(vk_to_intel_logic_op)[] = {
744 [VK_LOGIC_OP_COPY] = LOGICOP_COPY,
745 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
746 [VK_LOGIC_OP_AND] = LOGICOP_AND,
747 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
748 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
749 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
750 [VK_LOGIC_OP_XOR] = LOGICOP_XOR,
751 [VK_LOGIC_OP_OR] = LOGICOP_OR,
752 [VK_LOGIC_OP_NOR] = LOGICOP_NOR,
753 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
754 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
755 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
756 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
757 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
758 [VK_LOGIC_OP_NAND] = LOGICOP_NAND,
759 [VK_LOGIC_OP_SET] = LOGICOP_SET,
760 };
761
762 static const uint32_t vk_to_intel_blend[] = {
763 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
764 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
765 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
766 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
767 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
768 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
769 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
770 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
771 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
772 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
773 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
774 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
775 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
776 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
777 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
778 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
779 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
780 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
781 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
782 };
783
784 static const uint32_t vk_to_intel_blend_op[] = {
785 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
786 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
787 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
788 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
789 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
790 };
791
792 const uint32_t genX(vk_to_intel_compare_op)[] = {
793 [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
794 [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
795 [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
796 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
797 [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
798 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
799 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
800 [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
801 };
802
803 const uint32_t genX(vk_to_intel_stencil_op)[] = {
804 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
805 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
806 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
807 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
808 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
809 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
810 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
811 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
812 };
813
814 const uint32_t genX(vk_to_intel_primitive_type)[] = {
815 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
816 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
817 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
818 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
819 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
820 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
821 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
822 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
823 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
824 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
825 };
826
827 static bool
is_dual_src_blend_factor(VkBlendFactor factor)828 is_dual_src_blend_factor(VkBlendFactor factor)
829 {
830 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
831 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
832 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
833 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
834 }
835
836 static inline uint32_t *
write_disabled_blend(uint32_t * state)837 write_disabled_blend(uint32_t *state)
838 {
839 struct GENX(BLEND_STATE_ENTRY) entry = {
840 .WriteDisableAlpha = true,
841 .WriteDisableRed = true,
842 .WriteDisableGreen = true,
843 .WriteDisableBlue = true,
844 };
845 GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry);
846 return state + GENX(BLEND_STATE_ENTRY_length);
847 }
848
849 static void
emit_cb_state(struct anv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb,const struct vk_multisample_state * ms)850 emit_cb_state(struct anv_graphics_pipeline *pipeline,
851 const struct vk_color_blend_state *cb,
852 const struct vk_multisample_state *ms)
853 {
854 struct anv_device *device = pipeline->base.device;
855 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
856
857 struct GENX(BLEND_STATE) blend_state = {
858 #if GFX_VER >= 8
859 .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
860 .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
861 #endif
862 };
863
864 uint32_t surface_count = 0;
865 struct anv_pipeline_bind_map *map;
866 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
867 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
868 surface_count = map->surface_count;
869 }
870
871 const struct intel_device_info *devinfo = pipeline->base.device->info;
872 uint32_t *blend_state_start = devinfo->ver >= 8 ?
873 pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
874 uint32_t *state_pos = blend_state_start;
875
876 state_pos += GENX(BLEND_STATE_length);
877 #if GFX_VER >= 8
878 struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
879 #endif
880 for (unsigned i = 0; i < surface_count; i++) {
881 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
882
883 /* All color attachments are at the beginning of the binding table */
884 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
885 break;
886
887 /* We can have at most 8 attachments */
888 assert(i < MAX_RTS);
889
890 if (cb == NULL || binding->index >= cb->attachment_count) {
891 state_pos = write_disabled_blend(state_pos);
892 continue;
893 }
894
895 const struct vk_color_blend_attachment_state *a =
896 &cb->attachments[binding->index];
897
898 struct GENX(BLEND_STATE_ENTRY) entry = {
899 #if GFX_VER < 8
900 .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
901 .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
902 #endif
903 .LogicOpEnable = cb->logic_op_enable,
904
905 /* Vulkan specification 1.2.168, VkLogicOp:
906 *
907 * "Logical operations are controlled by the logicOpEnable and
908 * logicOp members of VkPipelineColorBlendStateCreateInfo. If
909 * logicOpEnable is VK_TRUE, then a logical operation selected by
910 * logicOp is applied between each color attachment and the
911 * fragment’s corresponding output value, and blending of all
912 * attachments is treated as if it were disabled."
913 *
914 * From the Broadwell PRM Volume 2d: Command Reference: Structures:
915 * BLEND_STATE_ENTRY:
916 *
917 * "Enabling LogicOp and Color Buffer Blending at the same time is
918 * UNDEFINED"
919 */
920 .ColorBufferBlendEnable = !cb->logic_op_enable && a->blend_enable,
921 .ColorClampRange = COLORCLAMP_RTFORMAT,
922 .PreBlendColorClampEnable = true,
923 .PostBlendColorClampEnable = true,
924 .SourceBlendFactor = vk_to_intel_blend[a->src_color_blend_factor],
925 .DestinationBlendFactor = vk_to_intel_blend[a->dst_color_blend_factor],
926 .ColorBlendFunction = vk_to_intel_blend_op[a->color_blend_op],
927 .SourceAlphaBlendFactor = vk_to_intel_blend[a->src_alpha_blend_factor],
928 .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dst_alpha_blend_factor],
929 .AlphaBlendFunction = vk_to_intel_blend_op[a->alpha_blend_op],
930 };
931
932 if (a->src_color_blend_factor != a->src_alpha_blend_factor ||
933 a->dst_color_blend_factor != a->dst_alpha_blend_factor ||
934 a->color_blend_op != a->alpha_blend_op) {
935 #if GFX_VER >= 8
936 blend_state.IndependentAlphaBlendEnable = true;
937 #else
938 entry.IndependentAlphaBlendEnable = true;
939 #endif
940 }
941
942 /* The Dual Source Blending documentation says:
943 *
944 * "If SRC1 is included in a src/dst blend factor and
945 * a DualSource RT Write message is not used, results
946 * are UNDEFINED. (This reflects the same restriction in DX APIs,
947 * where undefined results are produced if “o1” is not written
948 * by a PS – there are no default values defined)."
949 *
950 * There is no way to gracefully fix this undefined situation
951 * so we just disable the blending to prevent possible issues.
952 */
953 if (!wm_prog_data->dual_src_blend &&
954 (is_dual_src_blend_factor(a->src_color_blend_factor) ||
955 is_dual_src_blend_factor(a->dst_color_blend_factor) ||
956 is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
957 is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
958 vk_logw(VK_LOG_OBJS(&device->vk.base),
959 "Enabled dual-src blend factors without writing both targets "
960 "in the shader. Disabling blending to avoid GPU hangs.");
961 entry.ColorBufferBlendEnable = false;
962 }
963
964 /* Our hardware applies the blend factor prior to the blend function
965 * regardless of what function is used. Technically, this means the
966 * hardware can do MORE than GL or Vulkan specify. However, it also
967 * means that, for MIN and MAX, we have to stomp the blend factor to
968 * ONE to make it a no-op.
969 */
970 if (a->color_blend_op == VK_BLEND_OP_MIN ||
971 a->color_blend_op == VK_BLEND_OP_MAX) {
972 entry.SourceBlendFactor = BLENDFACTOR_ONE;
973 entry.DestinationBlendFactor = BLENDFACTOR_ONE;
974 }
975 if (a->alpha_blend_op == VK_BLEND_OP_MIN ||
976 a->alpha_blend_op == VK_BLEND_OP_MAX) {
977 entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
978 entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
979 }
980 GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
981 state_pos += GENX(BLEND_STATE_ENTRY_length);
982 #if GFX_VER >= 8
983 if (i == 0)
984 bs0 = entry;
985 #endif
986 }
987
988 #if GFX_VER >= 8
989 struct GENX(3DSTATE_PS_BLEND) blend = {
990 GENX(3DSTATE_PS_BLEND_header),
991 };
992 blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
993 blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
994 blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
995 blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
996 blend.SourceBlendFactor = bs0.SourceBlendFactor;
997 blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
998 blend.AlphaTestEnable = false;
999 blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
1000
1001 GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
1002 #endif
1003
1004 GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
1005 }
1006
1007 static void
emit_3dstate_clip(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_viewport_state * vp,const struct vk_rasterization_state * rs)1008 emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
1009 const struct vk_input_assembly_state *ia,
1010 const struct vk_viewport_state *vp,
1011 const struct vk_rasterization_state *rs)
1012 {
1013 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1014 (void) wm_prog_data;
1015
1016 struct GENX(3DSTATE_CLIP) clip = {
1017 GENX(3DSTATE_CLIP_header),
1018 };
1019
1020 clip.ClipEnable = true;
1021 clip.StatisticsEnable = true;
1022 clip.EarlyCullEnable = true;
1023 clip.APIMode = pipeline->negative_one_to_one ? APIMODE_OGL : APIMODE_D3D;
1024 clip.GuardbandClipTestEnable = true;
1025
1026 #if GFX_VER >= 8
1027 clip.VertexSubPixelPrecisionSelect = _8Bit;
1028 #endif
1029 clip.ClipMode = CLIPMODE_NORMAL;
1030
1031 switch (rs->provoking_vertex) {
1032 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1033 clip.TriangleStripListProvokingVertexSelect = 0;
1034 clip.LineStripListProvokingVertexSelect = 0;
1035 clip.TriangleFanProvokingVertexSelect = 1;
1036 break;
1037
1038 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1039 clip.TriangleStripListProvokingVertexSelect = 2;
1040 clip.LineStripListProvokingVertexSelect = 1;
1041 clip.TriangleFanProvokingVertexSelect = 2;
1042 break;
1043
1044 default:
1045 unreachable("Invalid provoking vertex mode");
1046 }
1047
1048 clip.MinimumPointWidth = 0.125;
1049 clip.MaximumPointWidth = 255.875;
1050
1051 const struct elk_vue_prog_data *last =
1052 anv_pipeline_get_last_vue_prog_data(pipeline);
1053
1054 /* From the Vulkan 1.0.45 spec:
1055 *
1056 * "If the last active vertex processing stage shader entry point's
1057 * interface does not include a variable decorated with ViewportIndex,
1058 * then the first viewport is used."
1059 */
1060 if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
1061 clip.MaximumVPIndex = vp->viewport_count > 0 ?
1062 vp->viewport_count - 1 : 0;
1063 } else {
1064 clip.MaximumVPIndex = 0;
1065 }
1066
1067 /* From the Vulkan 1.0.45 spec:
1068 *
1069 * "If the last active vertex processing stage shader entry point's
1070 * interface does not include a variable decorated with Layer, then the
1071 * first layer is used."
1072 */
1073 clip.ForceZeroRTAIndexEnable =
1074 !(last->vue_map.slots_valid & VARYING_BIT_LAYER);
1075
1076 #if GFX_VER == 7
1077 clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
1078 clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
1079 clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
1080 clip.CullMode = genX(vk_to_intel_cullmode)[rs->cull_mode];
1081 clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
1082 #endif
1083
1084 clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
1085 wm_prog_data->uses_nonperspective_interp_modes : 0;
1086
1087 GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
1088 }
1089
1090 static void
emit_3dstate_streamout(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1091 emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
1092 const struct vk_rasterization_state *rs)
1093 {
1094 const struct elk_vue_prog_data *prog_data =
1095 anv_pipeline_get_last_vue_prog_data(pipeline);
1096 const struct intel_vue_map *vue_map = &prog_data->vue_map;
1097
1098 nir_xfb_info *xfb_info;
1099 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
1100 xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info;
1101 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1102 xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
1103 else
1104 xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
1105
1106 if (xfb_info) {
1107 struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
1108 int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
1109 int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0};
1110
1111 memset(so_decl, 0, sizeof(so_decl));
1112
1113 for (unsigned i = 0; i < xfb_info->output_count; i++) {
1114 const nir_xfb_output_info *output = &xfb_info->outputs[i];
1115 unsigned buffer = output->buffer;
1116 unsigned stream = xfb_info->buffer_to_stream[buffer];
1117
1118 /* Our hardware is unusual in that it requires us to program SO_DECLs
1119 * for fake "hole" components, rather than simply taking the offset
1120 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1121 * program as many size = 4 holes as we can, then a final hole to
1122 * accommodate the final 1, 2, or 3 remaining.
1123 */
1124 int hole_dwords = (output->offset - next_offset[buffer]) / 4;
1125 while (hole_dwords > 0) {
1126 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1127 .HoleFlag = 1,
1128 .OutputBufferSlot = buffer,
1129 .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1,
1130 };
1131 hole_dwords -= 4;
1132 }
1133
1134 int varying = output->location;
1135 uint8_t component_mask = output->component_mask;
1136 /* VARYING_SLOT_PSIZ contains four scalar fields packed together:
1137 * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
1138 * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y
1139 * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
1140 * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w
1141 */
1142 if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
1143 varying = VARYING_SLOT_PSIZ;
1144 component_mask = 1 << 0; // SO_DECL_COMPMASK_X
1145 } else if (varying == VARYING_SLOT_LAYER) {
1146 varying = VARYING_SLOT_PSIZ;
1147 component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
1148 } else if (varying == VARYING_SLOT_VIEWPORT) {
1149 varying = VARYING_SLOT_PSIZ;
1150 component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
1151 } else if (varying == VARYING_SLOT_PSIZ) {
1152 component_mask = 1 << 3; // SO_DECL_COMPMASK_W
1153 }
1154
1155 next_offset[buffer] = output->offset +
1156 __builtin_popcount(component_mask) * 4;
1157
1158 const int slot = vue_map->varying_to_slot[varying];
1159 if (slot < 0) {
1160 /* This can happen if the shader never writes to the varying.
1161 * Insert a hole instead of actual varying data.
1162 */
1163 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1164 .HoleFlag = true,
1165 .OutputBufferSlot = buffer,
1166 .ComponentMask = component_mask,
1167 };
1168 } else {
1169 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1170 .OutputBufferSlot = buffer,
1171 .RegisterIndex = slot,
1172 .ComponentMask = component_mask,
1173 };
1174 }
1175 }
1176
1177 int max_decls = 0;
1178 for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
1179 max_decls = MAX2(max_decls, decls[s]);
1180
1181 uint8_t sbs[MAX_XFB_STREAMS] = { };
1182 for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
1183 if (xfb_info->buffers_written & (1 << b))
1184 sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
1185 }
1186
1187 /* Wa_16011773973:
1188 * If SOL is enabled and SO_DECL state has to be programmed,
1189 * 1. Send 3D State SOL state with SOL disabled
1190 * 2. Send SO_DECL NP state
1191 * 3. Send 3D State SOL with SOL Enabled
1192 */
1193 if (intel_device_info_is_dg2(pipeline->base.device->info))
1194 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so);
1195
1196 uint32_t *dw = anv_batch_emitn(&pipeline->base.batch, 3 + 2 * max_decls,
1197 GENX(3DSTATE_SO_DECL_LIST),
1198 .StreamtoBufferSelects0 = sbs[0],
1199 .StreamtoBufferSelects1 = sbs[1],
1200 .StreamtoBufferSelects2 = sbs[2],
1201 .StreamtoBufferSelects3 = sbs[3],
1202 .NumEntries0 = decls[0],
1203 .NumEntries1 = decls[1],
1204 .NumEntries2 = decls[2],
1205 .NumEntries3 = decls[3]);
1206
1207 for (int i = 0; i < max_decls; i++) {
1208 GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
1209 &(struct GENX(SO_DECL_ENTRY)) {
1210 .Stream0Decl = so_decl[0][i],
1211 .Stream1Decl = so_decl[1][i],
1212 .Stream2Decl = so_decl[2][i],
1213 .Stream3Decl = so_decl[3][i],
1214 });
1215 }
1216 }
1217
1218 #if GFX_VER == 7
1219 # define streamout_state_dw pipeline->gfx7.streamout_state
1220 #else
1221 # define streamout_state_dw pipeline->gfx8.streamout_state
1222 #endif
1223
1224 struct GENX(3DSTATE_STREAMOUT) so = {
1225 GENX(3DSTATE_STREAMOUT_header),
1226 };
1227
1228 if (xfb_info) {
1229 so.SOFunctionEnable = true;
1230 so.SOStatisticsEnable = true;
1231
1232 switch (rs->provoking_vertex) {
1233 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1234 so.ReorderMode = LEADING;
1235 break;
1236
1237 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1238 so.ReorderMode = TRAILING;
1239 break;
1240
1241 default:
1242 unreachable("Invalid provoking vertex mode");
1243 }
1244
1245 so.RenderStreamSelect = rs->rasterization_stream;
1246
1247 #if GFX_VER >= 8
1248 so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
1249 so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
1250 so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
1251 so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
1252 #else
1253 pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
1254 pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
1255 pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
1256 pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
1257
1258 /* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
1259 * is a bit inconvenient because we don't know what buffers will
1260 * actually be enabled until draw time. We do our best here by
1261 * setting them based on buffers_written and we disable them
1262 * as-needed at draw time by setting EndAddress = BaseAddress.
1263 */
1264 so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
1265 so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
1266 so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
1267 so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
1268 #endif
1269
1270 int urb_entry_read_offset = 0;
1271 int urb_entry_read_length =
1272 (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
1273
1274 /* We always read the whole vertex. This could be reduced at some
1275 * point by reading less and offsetting the register index in the
1276 * SO_DECLs.
1277 */
1278 so.Stream0VertexReadOffset = urb_entry_read_offset;
1279 so.Stream0VertexReadLength = urb_entry_read_length - 1;
1280 so.Stream1VertexReadOffset = urb_entry_read_offset;
1281 so.Stream1VertexReadLength = urb_entry_read_length - 1;
1282 so.Stream2VertexReadOffset = urb_entry_read_offset;
1283 so.Stream2VertexReadLength = urb_entry_read_length - 1;
1284 so.Stream3VertexReadOffset = urb_entry_read_offset;
1285 so.Stream3VertexReadLength = urb_entry_read_length - 1;
1286 }
1287
1288 GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
1289 }
1290
1291 static uint32_t
get_sampler_count(const struct anv_shader_bin * bin)1292 get_sampler_count(const struct anv_shader_bin *bin)
1293 {
1294 uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
1295
1296 /* We can potentially have way more than 32 samplers and that's ok.
1297 * However, the 3DSTATE_XS packets only have 3 bits to specify how
1298 * many to pre-fetch and all values above 4 are marked reserved.
1299 */
1300 return MIN2(count_by_4, 4);
1301 }
1302
1303 static UNUSED struct anv_address
get_scratch_address(struct anv_pipeline * pipeline,gl_shader_stage stage,const struct anv_shader_bin * bin)1304 get_scratch_address(struct anv_pipeline *pipeline,
1305 gl_shader_stage stage,
1306 const struct anv_shader_bin *bin)
1307 {
1308 return (struct anv_address) {
1309 .bo = anv_scratch_pool_alloc(pipeline->device,
1310 &pipeline->device->scratch_pool,
1311 stage, bin->prog_data->total_scratch),
1312 .offset = 0,
1313 };
1314 }
1315
1316 static UNUSED uint32_t
get_scratch_space(const struct anv_shader_bin * bin)1317 get_scratch_space(const struct anv_shader_bin *bin)
1318 {
1319 return ffs(bin->prog_data->total_scratch / 2048);
1320 }
1321
1322 static void
emit_3dstate_vs(struct anv_graphics_pipeline * pipeline)1323 emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
1324 {
1325 const struct intel_device_info *devinfo = pipeline->base.device->info;
1326 const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
1327 const struct anv_shader_bin *vs_bin =
1328 pipeline->shaders[MESA_SHADER_VERTEX];
1329
1330 assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
1331
1332 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VS), vs) {
1333 vs.Enable = true;
1334 vs.StatisticsEnable = true;
1335 vs.KernelStartPointer = vs_bin->kernel.offset;
1336 #if GFX_VER >= 8
1337 vs.SIMD8DispatchEnable =
1338 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
1339 #endif
1340
1341 assert(!vs_prog_data->base.base.use_alt_mode);
1342 vs.SingleVertexDispatch = false;
1343 vs.VectorMaskEnable = false;
1344 vs.SamplerCount = get_sampler_count(vs_bin);
1345 vs.BindingTableEntryCount = vs_bin->bind_map.surface_count;
1346 vs.FloatingPointMode = IEEE754;
1347 vs.IllegalOpcodeExceptionEnable = false;
1348 vs.SoftwareExceptionEnable = false;
1349 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1350
1351 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
1352 vs.VertexURBEntryReadOffset = 0;
1353 vs.DispatchGRFStartRegisterForURBData =
1354 vs_prog_data->base.base.dispatch_grf_start_reg;
1355
1356 #if GFX_VER >= 8
1357 vs.UserClipDistanceClipTestEnableBitmask =
1358 vs_prog_data->base.clip_distance_mask;
1359 vs.UserClipDistanceCullTestEnableBitmask =
1360 vs_prog_data->base.cull_distance_mask;
1361 #endif
1362
1363 vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
1364 vs.ScratchSpaceBasePointer =
1365 get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
1366 }
1367 }
1368
1369 static void
emit_3dstate_hs_te_ds(struct anv_graphics_pipeline * pipeline,const struct vk_tessellation_state * ts)1370 emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
1371 const struct vk_tessellation_state *ts)
1372 {
1373 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1374 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs);
1375 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te);
1376 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds);
1377 return;
1378 }
1379
1380 const struct intel_device_info *devinfo = pipeline->base.device->info;
1381 const struct anv_shader_bin *tcs_bin =
1382 pipeline->shaders[MESA_SHADER_TESS_CTRL];
1383 const struct anv_shader_bin *tes_bin =
1384 pipeline->shaders[MESA_SHADER_TESS_EVAL];
1385
1386 const struct elk_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
1387 const struct elk_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1388
1389 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs) {
1390 hs.Enable = true;
1391 hs.StatisticsEnable = true;
1392 hs.KernelStartPointer = tcs_bin->kernel.offset;
1393 hs.SamplerCount = get_sampler_count(tcs_bin);
1394 hs.BindingTableEntryCount = tcs_bin->bind_map.surface_count;
1395
1396 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1397 hs.IncludeVertexHandles = true;
1398 hs.InstanceCount = tcs_prog_data->instances - 1;
1399
1400 hs.VertexURBEntryReadLength = 0;
1401 hs.VertexURBEntryReadOffset = 0;
1402 hs.DispatchGRFStartRegisterForURBData =
1403 tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f;
1404
1405 hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
1406 hs.ScratchSpaceBasePointer =
1407 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
1408 }
1409
1410 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te) {
1411 te.Partitioning = tes_prog_data->partitioning;
1412
1413 if (ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1414 te.OutputTopology = tes_prog_data->output_topology;
1415 } else {
1416 /* When the origin is upper-left, we have to flip the winding order */
1417 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1418 te.OutputTopology = OUTPUT_TRI_CW;
1419 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1420 te.OutputTopology = OUTPUT_TRI_CCW;
1421 } else {
1422 te.OutputTopology = tes_prog_data->output_topology;
1423 }
1424 }
1425
1426 te.TEDomain = tes_prog_data->domain;
1427 te.TEEnable = true;
1428 te.MaximumTessellationFactorOdd = 63.0;
1429 te.MaximumTessellationFactorNotOdd = 64.0;
1430 }
1431
1432 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds) {
1433 ds.Enable = true;
1434 ds.StatisticsEnable = true;
1435 ds.KernelStartPointer = tes_bin->kernel.offset;
1436 ds.SamplerCount = get_sampler_count(tes_bin);
1437 ds.BindingTableEntryCount = tes_bin->bind_map.surface_count;
1438 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1439
1440 ds.ComputeWCoordinateEnable =
1441 tes_prog_data->domain == INTEL_TESS_DOMAIN_TRI;
1442
1443 ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
1444 ds.PatchURBEntryReadOffset = 0;
1445 ds.DispatchGRFStartRegisterForURBData =
1446 tes_prog_data->base.base.dispatch_grf_start_reg;
1447
1448 #if GFX_VER >= 8
1449 ds.DispatchMode =
1450 tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
1451 DISPATCH_MODE_SIMD8_SINGLE_PATCH :
1452 DISPATCH_MODE_SIMD4X2;
1453
1454 ds.UserClipDistanceClipTestEnableBitmask =
1455 tes_prog_data->base.clip_distance_mask;
1456 ds.UserClipDistanceCullTestEnableBitmask =
1457 tes_prog_data->base.cull_distance_mask;
1458 #endif
1459
1460 ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
1461 ds.ScratchSpaceBasePointer =
1462 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
1463 }
1464 }
1465
1466 static void
emit_3dstate_gs(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1467 emit_3dstate_gs(struct anv_graphics_pipeline *pipeline,
1468 const struct vk_rasterization_state *rs)
1469 {
1470 const struct intel_device_info *devinfo = pipeline->base.device->info;
1471 const struct anv_shader_bin *gs_bin =
1472 pipeline->shaders[MESA_SHADER_GEOMETRY];
1473
1474 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
1475 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs);
1476 return;
1477 }
1478
1479 const struct elk_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
1480
1481 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs) {
1482 gs.Enable = true;
1483 gs.StatisticsEnable = true;
1484 gs.KernelStartPointer = gs_bin->kernel.offset;
1485 gs.DispatchMode = gs_prog_data->base.dispatch_mode;
1486
1487 gs.SingleProgramFlow = false;
1488 gs.VectorMaskEnable = false;
1489 gs.SamplerCount = get_sampler_count(gs_bin);
1490 gs.BindingTableEntryCount = gs_bin->bind_map.surface_count;
1491 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
1492 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
1493
1494 if (GFX_VER == 8) {
1495 /* Broadwell is weird. It needs us to divide by 2. */
1496 gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
1497 } else {
1498 gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
1499 }
1500
1501 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1502 gs.OutputTopology = gs_prog_data->output_topology;
1503 gs.ControlDataFormat = gs_prog_data->control_data_format;
1504 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
1505 gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
1506
1507 switch (rs->provoking_vertex) {
1508 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1509 gs.ReorderMode = LEADING;
1510 break;
1511
1512 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1513 gs.ReorderMode = TRAILING;
1514 break;
1515
1516 default:
1517 unreachable("Invalid provoking vertex mode");
1518 }
1519
1520 #if GFX_VER >= 8
1521 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
1522 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
1523 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
1524 gs_prog_data->static_vertex_count : 0;
1525 #endif
1526
1527 gs.VertexURBEntryReadOffset = 0;
1528 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
1529 gs.DispatchGRFStartRegisterForURBData =
1530 gs_prog_data->base.base.dispatch_grf_start_reg;
1531
1532 #if GFX_VER >= 8
1533 gs.UserClipDistanceClipTestEnableBitmask =
1534 gs_prog_data->base.clip_distance_mask;
1535 gs.UserClipDistanceCullTestEnableBitmask =
1536 gs_prog_data->base.cull_distance_mask;
1537 #endif
1538
1539 gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
1540 gs.ScratchSpaceBasePointer =
1541 get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
1542 }
1543 }
1544
1545 static bool
state_has_ds_self_dep(const struct vk_graphics_pipeline_state * state)1546 state_has_ds_self_dep(const struct vk_graphics_pipeline_state *state)
1547 {
1548 return state->pipeline_flags &
1549 VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
1550 }
1551
1552 static void
emit_3dstate_wm(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb,const struct vk_graphics_pipeline_state * state)1553 emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
1554 const struct vk_input_assembly_state *ia,
1555 const struct vk_rasterization_state *rs,
1556 const struct vk_multisample_state *ms,
1557 const struct vk_color_blend_state *cb,
1558 const struct vk_graphics_pipeline_state *state)
1559 {
1560 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1561
1562 struct GENX(3DSTATE_WM) wm = {
1563 GENX(3DSTATE_WM_header),
1564 };
1565 wm.StatisticsEnable = true;
1566 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
1567 wm.LineAntialiasingRegionWidth = _10pixels;
1568 wm.PointRasterizationRule = RASTRULE_UPPER_LEFT;
1569
1570 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1571 if (wm_prog_data->early_fragment_tests) {
1572 wm.EarlyDepthStencilControl = EDSC_PREPS;
1573 } else if (wm_prog_data->has_side_effects) {
1574 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
1575 } else {
1576 wm.EarlyDepthStencilControl = EDSC_NORMAL;
1577 }
1578
1579 #if GFX_VER >= 8
1580 /* Gen8 hardware tries to compute ThreadDispatchEnable for us but
1581 * doesn't take into account KillPixels when no depth or stencil
1582 * writes are enabled. In order for occlusion queries to work
1583 * correctly with no attachments, we need to force-enable PS thread
1584 * dispatch.
1585 *
1586 * The BDW docs are pretty clear that that this bit isn't validated
1587 * and probably shouldn't be used in production:
1588 *
1589 * "This must always be set to Normal. This field should not be
1590 * tested for functional validation."
1591 *
1592 * Unfortunately, however, the other mechanism we have for doing this
1593 * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
1594 * Given two bad options, we choose the one which works.
1595 */
1596 pipeline->force_fragment_thread_dispatch =
1597 wm_prog_data->has_side_effects ||
1598 wm_prog_data->uses_kill;
1599 #endif
1600
1601 wm.BarycentricInterpolationMode =
1602 elk_wm_prog_data_barycentric_modes(wm_prog_data, 0);
1603
1604 #if GFX_VER < 8
1605 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1606 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1607 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1608 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1609
1610 /* If the subpass has a depth or stencil self-dependency, then we
1611 * need to force the hardware to do the depth/stencil write *after*
1612 * fragment shader execution. Otherwise, the writes may hit memory
1613 * before we get around to fetching from the input attachment and we
1614 * may get the depth or stencil value from the current draw rather
1615 * than the previous one.
1616 */
1617 wm.PixelShaderKillsPixel = state_has_ds_self_dep(state) ||
1618 wm_prog_data->uses_kill ||
1619 wm_prog_data->uses_omask;
1620
1621 pipeline->force_fragment_thread_dispatch =
1622 wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
1623 wm_prog_data->has_side_effects ||
1624 wm.PixelShaderKillsPixel;
1625
1626 if (ms != NULL && ms->rasterization_samples > 1) {
1627 if (elk_wm_prog_data_is_persample(wm_prog_data, 0)) {
1628 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1629 } else {
1630 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
1631 }
1632 } else {
1633 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1634 }
1635 #endif
1636
1637 wm.LineStippleEnable = rs->line.stipple.enable;
1638 }
1639
1640 const struct intel_device_info *devinfo = pipeline->base.device->info;
1641 uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
1642 GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
1643 }
1644
1645 static void
emit_3dstate_ps(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb)1646 emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
1647 const struct vk_multisample_state *ms,
1648 const struct vk_color_blend_state *cb)
1649 {
1650 UNUSED const struct intel_device_info *devinfo =
1651 pipeline->base.device->info;
1652 const struct anv_shader_bin *fs_bin =
1653 pipeline->shaders[MESA_SHADER_FRAGMENT];
1654
1655 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1656 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1657 #if GFX_VER == 7
1658 /* Even if no fragments are ever dispatched, gfx7 hardware hangs if
1659 * we don't at least set the maximum number of threads.
1660 */
1661 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1662 #endif
1663 }
1664 return;
1665 }
1666
1667 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1668
1669 #if GFX_VER < 8
1670 /* The hardware wedges if you have this bit set but don't turn on any dual
1671 * source blend factors.
1672 */
1673 bool dual_src_blend = false;
1674 if (wm_prog_data->dual_src_blend && cb) {
1675 for (uint32_t i = 0; i < cb->attachment_count; i++) {
1676 const struct vk_color_blend_attachment_state *a =
1677 &cb->attachments[i];
1678
1679 if (a->blend_enable &&
1680 (is_dual_src_blend_factor(a->src_color_blend_factor) ||
1681 is_dual_src_blend_factor(a->dst_color_blend_factor) ||
1682 is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
1683 is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
1684 dual_src_blend = true;
1685 break;
1686 }
1687 }
1688 }
1689 #endif
1690
1691 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1692 intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
1693 ms != NULL ? ms->rasterization_samples : 1,
1694 0 /* msaa_flags */);
1695
1696 ps.KernelStartPointer0 = fs_bin->kernel.offset +
1697 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1698 ps.KernelStartPointer1 = fs_bin->kernel.offset +
1699 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1700 ps.KernelStartPointer2 = fs_bin->kernel.offset +
1701 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1702
1703 ps.SingleProgramFlow = false;
1704 ps.VectorMaskEnable = GFX_VER >= 8 &&
1705 wm_prog_data->uses_vmask;
1706 ps.SamplerCount = get_sampler_count(fs_bin);
1707 ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
1708 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
1709 wm_prog_data->base.ubo_ranges[0].length;
1710 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
1711 POSOFFSET_SAMPLE: POSOFFSET_NONE;
1712 #if GFX_VER < 8
1713 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
1714 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1715 ps.DualSourceBlendEnable = dual_src_blend;
1716 #endif
1717
1718 #if GFX_VERx10 == 75
1719 /* Haswell requires the sample mask to be set in this packet as well
1720 * as in 3DSTATE_SAMPLE_MASK; the values should match.
1721 */
1722 ps.SampleMask = 0xff;
1723 #endif
1724
1725 #if GFX_VER >= 8
1726 ps.MaximumNumberofThreadsPerPSD =
1727 devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
1728 #else
1729 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1730 #endif
1731
1732 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1733 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1734 ps.DispatchGRFStartRegisterForConstantSetupData1 =
1735 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1736 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1737 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1738
1739 ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
1740 ps.ScratchSpaceBasePointer =
1741 get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
1742 }
1743 }
1744
1745 #if GFX_VER >= 8
1746 static void
emit_3dstate_ps_extra(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs,const struct vk_graphics_pipeline_state * state)1747 emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
1748 const struct vk_rasterization_state *rs,
1749 const struct vk_graphics_pipeline_state *state)
1750 {
1751 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1752
1753 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1754 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps);
1755 return;
1756 }
1757
1758 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps) {
1759 ps.PixelShaderValid = true;
1760 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
1761 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1762 ps.PixelShaderIsPerSample =
1763 elk_wm_prog_data_is_persample(wm_prog_data, 0);
1764 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1765 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1766 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1767
1768 /* If the subpass has a depth or stencil self-dependency, then we need
1769 * to force the hardware to do the depth/stencil write *after* fragment
1770 * shader execution. Otherwise, the writes may hit memory before we get
1771 * around to fetching from the input attachment and we may get the depth
1772 * or stencil value from the current draw rather than the previous one.
1773 */
1774 ps.PixelShaderKillsPixel = state_has_ds_self_dep(state) ||
1775 wm_prog_data->uses_kill;
1776
1777 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1778 }
1779 }
1780 #endif
1781
1782 static void
emit_3dstate_vf_statistics(struct anv_graphics_pipeline * pipeline)1783 emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
1784 {
1785 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
1786 vfs.StatisticsEnable = true;
1787 }
1788 }
1789
1790 static void
compute_kill_pixel(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_graphics_pipeline_state * state)1791 compute_kill_pixel(struct anv_graphics_pipeline *pipeline,
1792 const struct vk_multisample_state *ms,
1793 const struct vk_graphics_pipeline_state *state)
1794 {
1795 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1796 pipeline->kill_pixel = false;
1797 return;
1798 }
1799
1800 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1801
1802 /* This computes the KillPixel portion of the computation for whether or
1803 * not we want to enable the PMA fix on gfx8 or gfx9. It's given by this
1804 * chunk of the giant formula:
1805 *
1806 * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1807 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1808 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1809 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
1810 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1811 *
1812 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
1813 * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
1814 * of an alpha test.
1815 */
1816 pipeline->kill_pixel =
1817 state_has_ds_self_dep(state) ||
1818 wm_prog_data->uses_kill ||
1819 wm_prog_data->uses_omask ||
1820 (ms && ms->alpha_to_coverage_enable);
1821 }
1822
1823 void
genX(graphics_pipeline_emit)1824 genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
1825 const struct vk_graphics_pipeline_state *state)
1826 {
1827 enum intel_urb_deref_block_size urb_deref_block_size;
1828 emit_urb_setup(pipeline, &urb_deref_block_size);
1829
1830 assert(state->rs != NULL);
1831 emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
1832 urb_deref_block_size);
1833 emit_ms_state(pipeline, state->ms);
1834 emit_cb_state(pipeline, state->cb, state->ms);
1835 compute_kill_pixel(pipeline, state->ms, state);
1836
1837 emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs);
1838
1839 #if 0
1840 /* From gfx7_vs_state.c */
1841
1842 /**
1843 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
1844 * Geometry > Geometry Shader > State:
1845 *
1846 * "Note: Because of corruption in IVB:GT2, software needs to flush the
1847 * whole fixed function pipeline when the GS enable changes value in
1848 * the 3DSTATE_GS."
1849 *
1850 * The hardware architects have clarified that in this context "flush the
1851 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
1852 * Stall" bit set.
1853 */
1854 if (device->info->platform == INTEL_PLATFORM_IVB)
1855 gfx7_emit_vs_workaround_flush(elk);
1856 #endif
1857
1858 emit_vertex_input(pipeline, state->vi);
1859
1860 emit_3dstate_vs(pipeline);
1861 emit_3dstate_hs_te_ds(pipeline, state->ts);
1862 emit_3dstate_gs(pipeline, state->rs);
1863
1864 emit_3dstate_vf_statistics(pipeline);
1865
1866 emit_3dstate_streamout(pipeline, state->rs);
1867
1868 emit_3dstate_sbe(pipeline);
1869 emit_3dstate_wm(pipeline, state->ia, state->rs,
1870 state->ms, state->cb, state);
1871 emit_3dstate_ps(pipeline, state->ms, state->cb);
1872 #if GFX_VER >= 8
1873 emit_3dstate_ps_extra(pipeline, state->rs, state);
1874 #endif
1875 }
1876
1877 void
genX(compute_pipeline_emit)1878 genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
1879 {
1880 struct anv_device *device = pipeline->base.device;
1881 const struct intel_device_info *devinfo = device->info;
1882 const struct elk_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1883
1884 anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
1885
1886 const struct intel_cs_dispatch_info dispatch =
1887 elk_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1888 const uint32_t vfe_curbe_allocation =
1889 ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
1890 cs_prog_data->push.cross_thread.regs, 2);
1891
1892 const struct anv_shader_bin *cs_bin = pipeline->cs;
1893
1894 anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
1895 #if GFX_VER > 7
1896 vfe.StackSize = 0;
1897 #else
1898 vfe.GPGPUMode = true;
1899 #endif
1900 vfe.MaximumNumberofThreads =
1901 devinfo->max_cs_threads * devinfo->subslice_total - 1;
1902 vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
1903 vfe.ResetGatewayTimer = true;
1904 vfe.BypassGatewayControl = true;
1905 vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2;
1906 vfe.CURBEAllocationSize = vfe_curbe_allocation;
1907
1908 if (cs_bin->prog_data->total_scratch) {
1909 if (GFX_VER >= 8) {
1910 /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
1911 * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
1912 */
1913 vfe.PerThreadScratchSpace =
1914 ffs(cs_bin->prog_data->total_scratch) - 11;
1915 } else if (GFX_VERx10 == 75) {
1916 /* Haswell's Per Thread Scratch Space is in the range [0, 10]
1917 * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
1918 */
1919 vfe.PerThreadScratchSpace =
1920 ffs(cs_bin->prog_data->total_scratch) - 12;
1921 } else {
1922 /* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
1923 * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
1924 */
1925 vfe.PerThreadScratchSpace =
1926 cs_bin->prog_data->total_scratch / 1024 - 1;
1927 }
1928 vfe.ScratchSpaceBasePointer =
1929 get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
1930 }
1931 }
1932
1933 struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
1934 .KernelStartPointer =
1935 cs_bin->kernel.offset +
1936 elk_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
1937 .SamplerCount = get_sampler_count(cs_bin),
1938 /* We add 1 because the CS indirect parameters buffer isn't accounted
1939 * for in bind_map.surface_count.
1940 */
1941 .BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
1942 .BarrierEnable = cs_prog_data->uses_barrier,
1943 .SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, cs_prog_data->base.total_shared),
1944
1945 #if GFX_VERx10 != 75
1946 .ConstantURBEntryReadOffset = 0,
1947 #endif
1948 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
1949 #if GFX_VERx10 >= 75
1950 .CrossThreadConstantDataReadLength =
1951 cs_prog_data->push.cross_thread.regs,
1952 #endif
1953
1954 .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
1955 };
1956 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
1957 pipeline->interface_descriptor_data,
1958 &desc);
1959 }
1960