1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32
33 #ifndef BRW_EU_H
34 #define BRW_EU_H
35
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include "brw_inst.h"
39 #include "brw_compiler.h"
40 #include "brw_eu_defines.h"
41 #include "brw_isa_info.h"
42 #include "brw_reg.h"
43
44 #include "intel_wa.h"
45 #include "util/bitset.h"
46
47 #ifdef __cplusplus
48 extern "C" {
49 #endif
50
51 struct disasm_info;
52
53 #define BRW_EU_MAX_INSN_STACK 5
54
55 struct brw_insn_state {
56 /* One of BRW_EXECUTE_* */
57 unsigned exec_size:3;
58
59 /* Group in units of channels */
60 unsigned group:5;
61
62 /* One of BRW_MASK_* */
63 unsigned mask_control:1;
64
65 /* Scheduling info for Gfx12+ */
66 struct tgl_swsb swsb;
67
68 bool saturate:1;
69
70 /* One of BRW_ALIGN_* */
71 unsigned access_mode:1;
72
73 /* One of BRW_PREDICATE_* */
74 enum brw_predicate predicate:4;
75
76 bool pred_inv:1;
77
78 /* Flag subreg. Bottom bit is subreg, top bit is reg */
79 unsigned flag_subreg:2;
80
81 bool acc_wr_control:1;
82 };
83
84
85 /* A helper for accessing the last instruction emitted. This makes it easy
86 * to set various bits on an instruction without having to create temporary
87 * variable and assign the emitted instruction to those.
88 */
89 #define brw_last_inst (&p->store[p->nr_insn - 1])
90
91 struct brw_codegen {
92 brw_inst *store;
93 int store_size;
94 unsigned nr_insn;
95 unsigned int next_insn_offset;
96
97 void *mem_ctx;
98
99 /* Allow clients to push/pop instruction state:
100 */
101 struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
102 struct brw_insn_state *current;
103
104 const struct brw_isa_info *isa;
105 const struct intel_device_info *devinfo;
106
107 /* Control flow stacks:
108 * - if_stack contains IF and ELSE instructions which must be patched
109 * (and popped) once the matching ENDIF instruction is encountered.
110 *
111 * Just store the instruction pointer(an index).
112 */
113 int *if_stack;
114 int if_stack_depth;
115 int if_stack_array_size;
116
117 /**
118 * loop_stack contains the instruction pointers of the starts of loops which
119 * must be patched (and popped) once the matching WHILE instruction is
120 * encountered.
121 */
122 int *loop_stack;
123 int loop_stack_depth;
124 int loop_stack_array_size;
125
126 struct brw_shader_reloc *relocs;
127 int num_relocs;
128 int reloc_array_size;
129 };
130
131 struct brw_label {
132 int offset;
133 int number;
134 struct brw_label *next;
135 };
136
137 void brw_pop_insn_state( struct brw_codegen *p );
138 void brw_push_insn_state( struct brw_codegen *p );
139 unsigned brw_get_default_exec_size(struct brw_codegen *p);
140 unsigned brw_get_default_group(struct brw_codegen *p);
141 unsigned brw_get_default_access_mode(struct brw_codegen *p);
142 struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
143 void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
144 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
145 void brw_set_default_saturate( struct brw_codegen *p, bool enable );
146 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
147 void brw_inst_set_group(const struct intel_device_info *devinfo,
148 brw_inst *inst, unsigned group);
149 void brw_set_default_group(struct brw_codegen *p, unsigned group);
150 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
151 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
152 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
153 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
154 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
155
156 void brw_init_codegen(const struct brw_isa_info *isa,
157 struct brw_codegen *p, void *mem_ctx);
158 bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
159 bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
160 const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
161 unsigned *num_relocs);
162 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
163
164 bool brw_should_dump_shader_bin(void);
165 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
166 const char *identifier);
167
168 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
169 const char *identifier);
170
171 void brw_realign(struct brw_codegen *p, unsigned alignment);
172 int brw_append_data(struct brw_codegen *p, void *data,
173 unsigned size, unsigned alignment);
174 brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
175 void brw_add_reloc(struct brw_codegen *p, uint32_t id,
176 enum brw_shader_reloc_type type,
177 uint32_t offset, uint32_t delta);
178 void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
179 void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
180
181 /* Helpers for regular instructions:
182 */
183 #define ALU1(OP) \
184 brw_inst *brw_##OP(struct brw_codegen *p, \
185 struct brw_reg dest, \
186 struct brw_reg src0);
187
188 #define ALU2(OP) \
189 brw_inst *brw_##OP(struct brw_codegen *p, \
190 struct brw_reg dest, \
191 struct brw_reg src0, \
192 struct brw_reg src1);
193
194 #define ALU3(OP) \
195 brw_inst *brw_##OP(struct brw_codegen *p, \
196 struct brw_reg dest, \
197 struct brw_reg src0, \
198 struct brw_reg src1, \
199 struct brw_reg src2);
200
201 ALU1(MOV)
ALU2(SEL)202 ALU2(SEL)
203 ALU1(NOT)
204 ALU2(AND)
205 ALU2(OR)
206 ALU2(XOR)
207 ALU2(SHR)
208 ALU2(SHL)
209 ALU1(DIM)
210 ALU2(ASR)
211 ALU2(ROL)
212 ALU2(ROR)
213 ALU3(CSEL)
214 ALU1(F32TO16)
215 ALU1(F16TO32)
216 ALU2(ADD)
217 ALU3(ADD3)
218 ALU2(AVG)
219 ALU2(MUL)
220 ALU1(FRC)
221 ALU1(RNDD)
222 ALU1(RNDE)
223 ALU1(RNDU)
224 ALU1(RNDZ)
225 ALU2(MAC)
226 ALU2(MACH)
227 ALU1(LZD)
228 ALU2(DP4)
229 ALU2(DPH)
230 ALU2(DP3)
231 ALU2(DP2)
232 ALU3(DP4A)
233 ALU2(LINE)
234 ALU2(PLN)
235 ALU3(MAD)
236 ALU3(LRP)
237 ALU1(BFREV)
238 ALU3(BFE)
239 ALU2(BFI1)
240 ALU3(BFI2)
241 ALU1(FBH)
242 ALU1(FBL)
243 ALU1(CBIT)
244 ALU2(ADDC)
245 ALU2(SUBB)
246
247 #undef ALU1
248 #undef ALU2
249 #undef ALU3
250
251 static inline unsigned
252 reg_unit(const struct intel_device_info *devinfo)
253 {
254 return devinfo->ver >= 20 ? 2 : 1;
255 }
256
257
258 /* Helpers for SEND instruction:
259 */
260
261 /**
262 * Construct a message descriptor immediate with the specified common
263 * descriptor controls.
264 */
265 static inline uint32_t
brw_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)266 brw_message_desc(const struct intel_device_info *devinfo,
267 unsigned msg_length,
268 unsigned response_length,
269 bool header_present)
270 {
271 assert(msg_length % reg_unit(devinfo) == 0);
272 assert(response_length % reg_unit(devinfo) == 0);
273 return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
274 SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
275 SET_BITS(header_present, 19, 19));
276 }
277
278 static inline unsigned
brw_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)279 brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
280 {
281 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
282 }
283
284 static inline unsigned
brw_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)285 brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
286 {
287 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
288 }
289
290 static inline bool
brw_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)291 brw_message_desc_header_present(ASSERTED
292 const struct intel_device_info *devinfo,
293 uint32_t desc)
294 {
295 return GET_BITS(desc, 19, 19);
296 }
297
298 static inline unsigned
brw_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)299 brw_message_ex_desc(const struct intel_device_info *devinfo,
300 unsigned ex_msg_length)
301 {
302 assert(ex_msg_length % reg_unit(devinfo) == 0);
303 return devinfo->ver >= 20 ?
304 SET_BITS(ex_msg_length / reg_unit(devinfo), 10, 6) :
305 SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
306 }
307
308 static inline unsigned
brw_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)309 brw_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
310 uint32_t ex_desc)
311 {
312 return devinfo->ver >= 20 ?
313 GET_BITS(ex_desc, 10, 6) * reg_unit(devinfo) :
314 GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
315 }
316
317 static inline uint32_t
brw_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)318 brw_urb_desc(const struct intel_device_info *devinfo,
319 unsigned msg_type,
320 bool per_slot_offset_present,
321 bool channel_mask_present,
322 unsigned global_offset)
323 {
324 return (SET_BITS(per_slot_offset_present, 17, 17) |
325 SET_BITS(channel_mask_present, 15, 15) |
326 SET_BITS(global_offset, 14, 4) |
327 SET_BITS(msg_type, 3, 0));
328 }
329
330 static inline uint32_t
brw_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)331 brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
332 uint32_t desc)
333 {
334 return GET_BITS(desc, 3, 0);
335 }
336
337 static inline uint32_t
brw_urb_fence_desc(const struct intel_device_info * devinfo)338 brw_urb_fence_desc(const struct intel_device_info *devinfo)
339 {
340 assert(devinfo->has_lsc);
341 return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
342 }
343
344 /**
345 * Construct a message descriptor immediate with the specified sampler
346 * function controls.
347 */
348 static inline uint32_t
brw_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)349 brw_sampler_desc(const struct intel_device_info *devinfo,
350 unsigned binding_table_index,
351 unsigned sampler,
352 unsigned msg_type,
353 unsigned simd_mode,
354 unsigned return_format)
355 {
356 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
357 SET_BITS(sampler, 11, 8));
358
359 /* From GFX20 Bspec: Shared Functions - Message Descriptor -
360 * Sampling Engine:
361 *
362 * Message Type[5] 31 This bit represents the upper bit of message type
363 * 6-bit encoding (c.f. [16:12]). This bit is set
364 * for messages with programmable offsets.
365 */
366 if (devinfo->ver >= 20)
367 return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
368 SET_BITS(simd_mode & 0x3, 18, 17) |
369 SET_BITS(simd_mode >> 2, 29, 29) |
370 SET_BITS(return_format, 30, 30) |
371 SET_BITS(msg_type >> 5, 31, 31);
372
373 /* From the CHV Bspec: Shared Functions - Message Descriptor -
374 * Sampling Engine:
375 *
376 * SIMD Mode[2] 29 This field is the upper bit of the 3-bit
377 * SIMD Mode field.
378 */
379 return desc | SET_BITS(msg_type, 16, 12) |
380 SET_BITS(simd_mode & 0x3, 18, 17) |
381 SET_BITS(simd_mode >> 2, 29, 29) |
382 SET_BITS(return_format, 30, 30);
383 }
384
385 static inline unsigned
brw_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)386 brw_sampler_desc_binding_table_index(UNUSED
387 const struct intel_device_info *devinfo,
388 uint32_t desc)
389 {
390 return GET_BITS(desc, 7, 0);
391 }
392
393 static inline unsigned
brw_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)394 brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
395 uint32_t desc)
396 {
397 return GET_BITS(desc, 11, 8);
398 }
399
400 static inline unsigned
brw_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)401 brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
402 {
403 if (devinfo->ver >= 20)
404 return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
405 else
406 return GET_BITS(desc, 16, 12);
407 }
408
409 static inline unsigned
brw_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)410 brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
411 uint32_t desc)
412 {
413 return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
414 }
415
416 static inline unsigned
brw_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)417 brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
418 uint32_t desc)
419 {
420 return GET_BITS(desc, 30, 30);
421 }
422
423 /**
424 * Construct a message descriptor for the dataport
425 */
426 static inline uint32_t
brw_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)427 brw_dp_desc(const struct intel_device_info *devinfo,
428 unsigned binding_table_index,
429 unsigned msg_type,
430 unsigned msg_control)
431 {
432 return SET_BITS(binding_table_index, 7, 0) |
433 SET_BITS(msg_control, 13, 8) |
434 SET_BITS(msg_type, 18, 14);
435 }
436
437 static inline unsigned
brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)438 brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
439 uint32_t desc)
440 {
441 return GET_BITS(desc, 7, 0);
442 }
443
444 static inline unsigned
brw_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)445 brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
446 {
447 return GET_BITS(desc, 18, 14);
448 }
449
450 static inline unsigned
brw_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)451 brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
452 {
453 return GET_BITS(desc, 13, 8);
454 }
455
456 /**
457 * Construct a message descriptor immediate with the specified dataport read
458 * function controls.
459 */
460 static inline uint32_t
brw_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)461 brw_dp_read_desc(const struct intel_device_info *devinfo,
462 unsigned binding_table_index,
463 unsigned msg_control,
464 unsigned msg_type,
465 unsigned target_cache)
466 {
467 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
468 }
469
470 static inline unsigned
brw_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)471 brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
472 uint32_t desc)
473 {
474 return brw_dp_desc_msg_type(devinfo, desc);
475 }
476
477 static inline unsigned
brw_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)478 brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
479 uint32_t desc)
480 {
481 return brw_dp_desc_msg_control(devinfo, desc);
482 }
483
484 /**
485 * Construct a message descriptor immediate with the specified dataport write
486 * function controls.
487 */
488 static inline uint32_t
brw_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)489 brw_dp_write_desc(const struct intel_device_info *devinfo,
490 unsigned binding_table_index,
491 unsigned msg_control,
492 unsigned msg_type,
493 unsigned send_commit_msg)
494 {
495 assert(!send_commit_msg);
496 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
497 SET_BITS(send_commit_msg, 17, 17);
498 }
499
500 static inline unsigned
brw_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)501 brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
502 uint32_t desc)
503 {
504 return brw_dp_desc_msg_type(devinfo, desc);
505 }
506
507 static inline unsigned
brw_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)508 brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
509 uint32_t desc)
510 {
511 return brw_dp_desc_msg_control(devinfo, desc);
512 }
513
514 /**
515 * Construct a message descriptor immediate with the specified dataport
516 * surface function controls.
517 */
518 static inline uint32_t
brw_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)519 brw_dp_surface_desc(const struct intel_device_info *devinfo,
520 unsigned msg_type,
521 unsigned msg_control)
522 {
523 /* We'll OR in the binding table index later */
524 return brw_dp_desc(devinfo, 0, msg_type, msg_control);
525 }
526
527 static inline uint32_t
brw_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)528 brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
529 unsigned exec_size, /**< 0 for SIMD4x2 */
530 unsigned atomic_op,
531 bool response_expected)
532 {
533 assert(exec_size <= 8 || exec_size == 16);
534
535 unsigned msg_type;
536 if (exec_size > 0) {
537 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
538 } else {
539 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
540 }
541
542 const unsigned msg_control =
543 SET_BITS(atomic_op, 3, 0) |
544 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
545 SET_BITS(response_expected, 5, 5);
546
547 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
548 }
549
550 static inline uint32_t
brw_dp_untyped_atomic_float_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)551 brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
552 unsigned exec_size,
553 unsigned atomic_op,
554 bool response_expected)
555 {
556 assert(exec_size <= 8 || exec_size == 16);
557
558 assert(exec_size > 0);
559 const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
560
561 const unsigned msg_control =
562 SET_BITS(atomic_op, 1, 0) |
563 SET_BITS(exec_size <= 8, 4, 4) |
564 SET_BITS(response_expected, 5, 5);
565
566 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
567 }
568
569 static inline unsigned
brw_mdc_cmask(unsigned num_channels)570 brw_mdc_cmask(unsigned num_channels)
571 {
572 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
573 return 0xf & (0xf << num_channels);
574 }
575
576 static inline uint32_t
brw_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)577 brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
578 unsigned exec_size, /**< 0 for SIMD4x2 */
579 unsigned num_channels,
580 bool write)
581 {
582 assert(exec_size <= 8 || exec_size == 16);
583
584 const unsigned msg_type =
585 write ? HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
586 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
587
588 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
589 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
590 exec_size <= 8 ? 2 : 1;
591
592 const unsigned msg_control =
593 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
594 SET_BITS(simd_mode, 5, 4);
595
596 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
597 }
598
599 static inline unsigned
brw_mdc_ds(unsigned bit_size)600 brw_mdc_ds(unsigned bit_size)
601 {
602 switch (bit_size) {
603 case 8:
604 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
605 case 16:
606 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
607 case 32:
608 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
609 default:
610 unreachable("Unsupported bit_size for byte scattered messages");
611 }
612 }
613
614 static inline uint32_t
brw_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)615 brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
616 unsigned exec_size,
617 unsigned bit_size,
618 bool write)
619 {
620 assert(exec_size <= 8 || exec_size == 16);
621
622 const unsigned msg_type =
623 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
624 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
625
626 assert(exec_size > 0);
627 const unsigned msg_control =
628 SET_BITS(exec_size == 16, 0, 0) |
629 SET_BITS(brw_mdc_ds(bit_size), 3, 2);
630
631 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
632 }
633
634 static inline uint32_t
brw_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)635 brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
636 unsigned exec_size,
637 bool write)
638 {
639 assert(exec_size == 8 || exec_size == 16);
640
641 const unsigned msg_type =
642 write ? GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE :
643 GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
644
645 const unsigned msg_control =
646 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
647 SET_BITS(exec_size == 16, 0, 0);
648
649 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
650 }
651
652 static inline uint32_t
brw_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)653 brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
654 bool align_16B,
655 unsigned num_dwords,
656 bool write)
657 {
658 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
659 assert(!write || align_16B);
660
661 const unsigned msg_type =
662 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
663 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
664 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
665
666 const unsigned msg_control =
667 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
668
669 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
670 }
671
672 static inline uint32_t
brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)673 brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
674 unsigned exec_size, /**< 0 for SIMD4x2 */
675 unsigned num_channels,
676 bool write)
677 {
678 assert(exec_size <= 8 || exec_size == 16);
679
680 unsigned msg_type =
681 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
682 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
683
684 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
685 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
686 exec_size <= 8 ? 2 : 1;
687
688 const unsigned msg_control =
689 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
690 SET_BITS(simd_mode, 5, 4);
691
692 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
693 msg_type, msg_control);
694 }
695
696 static inline uint32_t
brw_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)697 brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
698 bool align_16B,
699 unsigned num_dwords,
700 bool write)
701 {
702 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
703 assert(!write || align_16B);
704
705 unsigned msg_type =
706 write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
707 GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
708
709 unsigned msg_control =
710 SET_BITS(!align_16B, 4, 3) |
711 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
712
713 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
714 msg_type, msg_control);
715 }
716
717 /**
718 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
719 * Skylake PRM).
720 */
721 static inline uint32_t
brw_mdc_a64_ds(unsigned elems)722 brw_mdc_a64_ds(unsigned elems)
723 {
724 switch (elems) {
725 case 1: return 0;
726 case 2: return 1;
727 case 4: return 2;
728 case 8: return 3;
729 default:
730 unreachable("Unsupported elmeent count for A64 scattered message");
731 }
732 }
733
734 static inline uint32_t
brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)735 brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
736 unsigned exec_size, /**< 0 for SIMD4x2 */
737 unsigned bit_size,
738 bool write)
739 {
740 assert(exec_size <= 8 || exec_size == 16);
741
742 unsigned msg_type =
743 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
744 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
745
746 const unsigned msg_control =
747 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
748 SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
749 SET_BITS(exec_size == 16, 4, 4);
750
751 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
752 msg_type, msg_control);
753 }
754
755 static inline uint32_t
brw_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)756 brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
757 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
758 unsigned bit_size,
759 unsigned atomic_op,
760 bool response_expected)
761 {
762 assert(exec_size == 8);
763 assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
764 assert(devinfo->ver >= 12 || bit_size >= 32);
765
766 const unsigned msg_type = bit_size == 16 ?
767 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
768 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
769
770 const unsigned msg_control =
771 SET_BITS(atomic_op, 3, 0) |
772 SET_BITS(bit_size == 64, 4, 4) |
773 SET_BITS(response_expected, 5, 5);
774
775 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
776 msg_type, msg_control);
777 }
778
779 static inline uint32_t
brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)780 brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
781 ASSERTED unsigned exec_size,
782 unsigned bit_size,
783 unsigned atomic_op,
784 bool response_expected)
785 {
786 assert(exec_size == 8);
787 assert(bit_size == 16 || bit_size == 32);
788 assert(devinfo->ver >= 12 || bit_size == 32);
789
790 assert(exec_size > 0);
791 const unsigned msg_type = bit_size == 32 ?
792 GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
793 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
794
795 const unsigned msg_control =
796 SET_BITS(atomic_op, 1, 0) |
797 SET_BITS(response_expected, 5, 5);
798
799 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
800 msg_type, msg_control);
801 }
802
803 static inline uint32_t
brw_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)804 brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
805 unsigned exec_size,
806 unsigned exec_group,
807 unsigned atomic_op,
808 bool response_expected)
809 {
810 assert(exec_size > 0 || exec_group == 0);
811 assert(exec_group % 8 == 0);
812
813 const unsigned msg_type =
814 exec_size == 0 ? HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 :
815 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
816
817 const bool high_sample_mask = (exec_group / 8) % 2 == 1;
818
819 const unsigned msg_control =
820 SET_BITS(atomic_op, 3, 0) |
821 SET_BITS(high_sample_mask, 4, 4) |
822 SET_BITS(response_expected, 5, 5);
823
824 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
825 }
826
827 static inline uint32_t
brw_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)828 brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
829 unsigned exec_size,
830 unsigned exec_group,
831 unsigned num_channels,
832 bool write)
833 {
834 assert(exec_size > 0 || exec_group == 0);
835 assert(exec_group % 8 == 0);
836
837 /* Typed surface reads and writes don't support SIMD16 */
838 assert(exec_size <= 8);
839
840 const unsigned msg_type =
841 write ? HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE :
842 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
843
844 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
845 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
846 1 + ((exec_group / 8) % 2);
847
848 const unsigned msg_control =
849 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
850 SET_BITS(slot_group, 5, 4);
851
852 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
853 }
854
855 static inline uint32_t
brw_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)856 brw_fb_desc(const struct intel_device_info *devinfo,
857 unsigned binding_table_index,
858 unsigned msg_type,
859 unsigned msg_control)
860 {
861 return SET_BITS(binding_table_index, 7, 0) |
862 SET_BITS(msg_control, 13, 8) |
863 SET_BITS(msg_type, 17, 14);
864 }
865
866 static inline unsigned
brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)867 brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
868 uint32_t desc)
869 {
870 return GET_BITS(desc, 7, 0);
871 }
872
873 static inline uint32_t
brw_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)874 brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
875 {
876 return GET_BITS(desc, 13, 8);
877 }
878
879 static inline unsigned
brw_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)880 brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
881 {
882 return GET_BITS(desc, 17, 14);
883 }
884
885 static inline uint32_t
brw_fb_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned exec_size,bool per_sample)886 brw_fb_read_desc(const struct intel_device_info *devinfo,
887 unsigned binding_table_index,
888 unsigned msg_control,
889 unsigned exec_size,
890 bool per_sample)
891 {
892 assert(exec_size == 8 || exec_size == 16);
893
894 return brw_fb_desc(devinfo, binding_table_index,
895 GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
896 SET_BITS(per_sample, 13, 13) |
897 SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
898 }
899
900 static inline uint32_t
brw_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)901 brw_fb_write_desc(const struct intel_device_info *devinfo,
902 unsigned binding_table_index,
903 unsigned msg_control,
904 bool last_render_target,
905 bool coarse_write)
906 {
907 const unsigned msg_type = GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
908
909 assert(devinfo->ver >= 10 || !coarse_write);
910
911 return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
912 SET_BITS(last_render_target, 12, 12) |
913 SET_BITS(coarse_write, 18, 18);
914 }
915
916 static inline bool
brw_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)917 brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
918 uint32_t desc)
919 {
920 return GET_BITS(desc, 12, 12);
921 }
922
923 static inline bool
brw_fb_write_desc_coarse_write(const struct intel_device_info * devinfo,uint32_t desc)924 brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
925 uint32_t desc)
926 {
927 assert(devinfo->ver >= 10);
928 return GET_BITS(desc, 18, 18);
929 }
930
931 static inline bool
lsc_opcode_has_cmask(enum lsc_opcode opcode)932 lsc_opcode_has_cmask(enum lsc_opcode opcode)
933 {
934 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
935 }
936
937 static inline bool
lsc_opcode_has_transpose(enum lsc_opcode opcode)938 lsc_opcode_has_transpose(enum lsc_opcode opcode)
939 {
940 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
941 }
942
943 static inline bool
lsc_opcode_is_store(enum lsc_opcode opcode)944 lsc_opcode_is_store(enum lsc_opcode opcode)
945 {
946 return opcode == LSC_OP_STORE ||
947 opcode == LSC_OP_STORE_CMASK;
948 }
949
950 static inline bool
lsc_opcode_is_atomic(enum lsc_opcode opcode)951 lsc_opcode_is_atomic(enum lsc_opcode opcode)
952 {
953 switch (opcode) {
954 case LSC_OP_ATOMIC_INC:
955 case LSC_OP_ATOMIC_DEC:
956 case LSC_OP_ATOMIC_LOAD:
957 case LSC_OP_ATOMIC_STORE:
958 case LSC_OP_ATOMIC_ADD:
959 case LSC_OP_ATOMIC_SUB:
960 case LSC_OP_ATOMIC_MIN:
961 case LSC_OP_ATOMIC_MAX:
962 case LSC_OP_ATOMIC_UMIN:
963 case LSC_OP_ATOMIC_UMAX:
964 case LSC_OP_ATOMIC_CMPXCHG:
965 case LSC_OP_ATOMIC_FADD:
966 case LSC_OP_ATOMIC_FSUB:
967 case LSC_OP_ATOMIC_FMIN:
968 case LSC_OP_ATOMIC_FMAX:
969 case LSC_OP_ATOMIC_FCMPXCHG:
970 case LSC_OP_ATOMIC_AND:
971 case LSC_OP_ATOMIC_OR:
972 case LSC_OP_ATOMIC_XOR:
973 return true;
974
975 default:
976 return false;
977 }
978 }
979
980 static inline bool
lsc_opcode_is_atomic_float(enum lsc_opcode opcode)981 lsc_opcode_is_atomic_float(enum lsc_opcode opcode)
982 {
983 switch (opcode) {
984 case LSC_OP_ATOMIC_FADD:
985 case LSC_OP_ATOMIC_FSUB:
986 case LSC_OP_ATOMIC_FMIN:
987 case LSC_OP_ATOMIC_FMAX:
988 case LSC_OP_ATOMIC_FCMPXCHG:
989 return true;
990
991 default:
992 return false;
993 }
994 }
995
996 static inline unsigned
lsc_op_num_data_values(unsigned _op)997 lsc_op_num_data_values(unsigned _op)
998 {
999 enum lsc_opcode op = (enum lsc_opcode) _op;
1000
1001 switch (op) {
1002 case LSC_OP_ATOMIC_CMPXCHG:
1003 case LSC_OP_ATOMIC_FCMPXCHG:
1004 return 2;
1005 case LSC_OP_ATOMIC_INC:
1006 case LSC_OP_ATOMIC_DEC:
1007 case LSC_OP_LOAD:
1008 case LSC_OP_LOAD_CMASK:
1009 case LSC_OP_FENCE:
1010 /* XXX: actually check docs */
1011 return 0;
1012 default:
1013 return 1;
1014 }
1015 }
1016
1017 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1018 lsc_op_to_legacy_atomic(unsigned _op)
1019 {
1020 enum lsc_opcode op = (enum lsc_opcode) _op;
1021
1022 switch (op) {
1023 case LSC_OP_ATOMIC_INC:
1024 return BRW_AOP_INC;
1025 case LSC_OP_ATOMIC_DEC:
1026 return BRW_AOP_DEC;
1027 case LSC_OP_ATOMIC_STORE:
1028 return BRW_AOP_MOV;
1029 case LSC_OP_ATOMIC_ADD:
1030 return BRW_AOP_ADD;
1031 case LSC_OP_ATOMIC_SUB:
1032 return BRW_AOP_SUB;
1033 case LSC_OP_ATOMIC_MIN:
1034 return BRW_AOP_IMIN;
1035 case LSC_OP_ATOMIC_MAX:
1036 return BRW_AOP_IMAX;
1037 case LSC_OP_ATOMIC_UMIN:
1038 return BRW_AOP_UMIN;
1039 case LSC_OP_ATOMIC_UMAX:
1040 return BRW_AOP_UMAX;
1041 case LSC_OP_ATOMIC_CMPXCHG:
1042 return BRW_AOP_CMPWR;
1043 case LSC_OP_ATOMIC_FADD:
1044 return BRW_AOP_FADD;
1045 case LSC_OP_ATOMIC_FMIN:
1046 return BRW_AOP_FMIN;
1047 case LSC_OP_ATOMIC_FMAX:
1048 return BRW_AOP_FMAX;
1049 case LSC_OP_ATOMIC_FCMPXCHG:
1050 return BRW_AOP_FCMPWR;
1051 case LSC_OP_ATOMIC_AND:
1052 return BRW_AOP_AND;
1053 case LSC_OP_ATOMIC_OR:
1054 return BRW_AOP_OR;
1055 case LSC_OP_ATOMIC_XOR:
1056 return BRW_AOP_XOR;
1057 /* No LSC op maps to BRW_AOP_PREDEC */
1058 case LSC_OP_ATOMIC_LOAD:
1059 case LSC_OP_ATOMIC_FSUB:
1060 unreachable("no corresponding legacy atomic operation");
1061 case LSC_OP_LOAD:
1062 case LSC_OP_LOAD_CMASK:
1063 case LSC_OP_STORE:
1064 case LSC_OP_STORE_CMASK:
1065 case LSC_OP_FENCE:
1066 unreachable("not an atomic op");
1067 }
1068
1069 unreachable("invalid LSC op");
1070 }
1071
1072 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1073 lsc_data_size_bytes(enum lsc_data_size data_size)
1074 {
1075 switch (data_size) {
1076 case LSC_DATA_SIZE_D8:
1077 return 1;
1078 case LSC_DATA_SIZE_D16:
1079 return 2;
1080 case LSC_DATA_SIZE_D32:
1081 case LSC_DATA_SIZE_D8U32:
1082 case LSC_DATA_SIZE_D16U32:
1083 case LSC_DATA_SIZE_D16BF32:
1084 return 4;
1085 case LSC_DATA_SIZE_D64:
1086 return 8;
1087 default:
1088 unreachable("Unsupported data payload size.");
1089 }
1090 }
1091
1092 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1093 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1094 {
1095 switch (addr_size) {
1096 case LSC_ADDR_SIZE_A16: return 2;
1097 case LSC_ADDR_SIZE_A32: return 4;
1098 case LSC_ADDR_SIZE_A64: return 8;
1099 default:
1100 unreachable("Unsupported address size.");
1101 }
1102 }
1103
1104 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1105 lsc_vector_length(enum lsc_vect_size vect_size)
1106 {
1107 switch (vect_size) {
1108 case LSC_VECT_SIZE_V1: return 1;
1109 case LSC_VECT_SIZE_V2: return 2;
1110 case LSC_VECT_SIZE_V3: return 3;
1111 case LSC_VECT_SIZE_V4: return 4;
1112 case LSC_VECT_SIZE_V8: return 8;
1113 case LSC_VECT_SIZE_V16: return 16;
1114 case LSC_VECT_SIZE_V32: return 32;
1115 case LSC_VECT_SIZE_V64: return 64;
1116 default:
1117 unreachable("Unsupported size of vector");
1118 }
1119 }
1120
1121 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1122 lsc_vect_size(unsigned vect_size)
1123 {
1124 switch(vect_size) {
1125 case 1: return LSC_VECT_SIZE_V1;
1126 case 2: return LSC_VECT_SIZE_V2;
1127 case 3: return LSC_VECT_SIZE_V3;
1128 case 4: return LSC_VECT_SIZE_V4;
1129 case 8: return LSC_VECT_SIZE_V8;
1130 case 16: return LSC_VECT_SIZE_V16;
1131 case 32: return LSC_VECT_SIZE_V32;
1132 case 64: return LSC_VECT_SIZE_V64;
1133 default:
1134 unreachable("Unsupported vector size for dataport");
1135 }
1136 }
1137
1138 static inline uint32_t
lsc_msg_desc(const struct intel_device_info * devinfo,enum lsc_opcode opcode,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,enum lsc_data_size data_sz,unsigned num_channels_or_cmask,bool transpose,unsigned cache_ctrl)1139 lsc_msg_desc(const struct intel_device_info *devinfo,
1140 enum lsc_opcode opcode,
1141 enum lsc_addr_surface_type addr_type,
1142 enum lsc_addr_size addr_sz,
1143 enum lsc_data_size data_sz, unsigned num_channels_or_cmask,
1144 bool transpose, unsigned cache_ctrl)
1145 {
1146 assert(devinfo->has_lsc);
1147 assert(!transpose || lsc_opcode_has_transpose(opcode));
1148
1149 unsigned msg_desc =
1150 SET_BITS(opcode, 5, 0) |
1151 SET_BITS(addr_sz, 8, 7) |
1152 SET_BITS(data_sz, 11, 9) |
1153 SET_BITS(transpose, 15, 15) |
1154 (devinfo->ver >= 20 ? SET_BITS(cache_ctrl, 19, 16) :
1155 SET_BITS(cache_ctrl, 19, 17)) |
1156 SET_BITS(addr_type, 30, 29);
1157
1158 if (lsc_opcode_has_cmask(opcode))
1159 msg_desc |= SET_BITS(num_channels_or_cmask, 15, 12);
1160 else
1161 msg_desc |= SET_BITS(lsc_vect_size(num_channels_or_cmask), 14, 12);
1162
1163 return msg_desc;
1164 }
1165
1166 static inline enum lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1167 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1168 uint32_t desc)
1169 {
1170 assert(devinfo->has_lsc);
1171 return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1172 }
1173
1174 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1175 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1176 uint32_t desc)
1177 {
1178 assert(devinfo->has_lsc);
1179 return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1180 }
1181
1182 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1183 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1184 uint32_t desc)
1185 {
1186 assert(devinfo->has_lsc);
1187 return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1188 }
1189
1190 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1191 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1192 uint32_t desc)
1193 {
1194 assert(devinfo->has_lsc);
1195 assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1196 return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1197 }
1198
1199 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1200 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1201 uint32_t desc)
1202 {
1203 assert(devinfo->has_lsc);
1204 assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1205 return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1206 }
1207
1208 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1209 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1210 uint32_t desc)
1211 {
1212 assert(devinfo->has_lsc);
1213 return GET_BITS(desc, 15, 15);
1214 }
1215
1216 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1217 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1218 uint32_t desc)
1219 {
1220 assert(devinfo->has_lsc);
1221 return devinfo->ver >= 20 ? GET_BITS(desc, 19, 16) : GET_BITS(desc, 19, 17);
1222 }
1223
1224 static inline unsigned
lsc_msg_dest_len(const struct intel_device_info * devinfo,enum lsc_data_size data_sz,unsigned n)1225 lsc_msg_dest_len(const struct intel_device_info *devinfo,
1226 enum lsc_data_size data_sz, unsigned n)
1227 {
1228 return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n,
1229 reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1230 }
1231
1232 static inline unsigned
lsc_msg_addr_len(const struct intel_device_info * devinfo,enum lsc_addr_size addr_sz,unsigned n)1233 lsc_msg_addr_len(const struct intel_device_info *devinfo,
1234 enum lsc_addr_size addr_sz, unsigned n)
1235 {
1236 return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n,
1237 reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1238 }
1239
1240 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1241 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1242 uint32_t desc)
1243 {
1244 assert(devinfo->has_lsc);
1245 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1246 }
1247
1248 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1249 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1250 enum lsc_fence_scope scope,
1251 enum lsc_flush_type flush_type,
1252 bool route_to_lsc)
1253 {
1254 assert(devinfo->has_lsc);
1255
1256 #if INTEL_NEEDS_WA_22017182272
1257 assert(flush_type != LSC_FLUSH_TYPE_DISCARD);
1258 #endif
1259
1260 return SET_BITS(LSC_OP_FENCE, 5, 0) |
1261 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1262 SET_BITS(scope, 11, 9) |
1263 SET_BITS(flush_type, 14, 12) |
1264 SET_BITS(route_to_lsc, 18, 18) |
1265 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1266 }
1267
1268 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1269 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1270 uint32_t desc)
1271 {
1272 assert(devinfo->has_lsc);
1273 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1274 }
1275
1276 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1277 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1278 uint32_t desc)
1279 {
1280 assert(devinfo->has_lsc);
1281 return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1282 }
1283
1284 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1285 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1286 uint32_t desc)
1287 {
1288 assert(devinfo->has_lsc);
1289 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1290 }
1291
1292 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1293 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1294 {
1295 assert(devinfo->has_lsc);
1296 return SET_BITS(bti, 31, 24) |
1297 SET_BITS(0, 23, 12); /* base offset */
1298 }
1299
1300 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1301 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1302 uint32_t ex_desc)
1303 {
1304 assert(devinfo->has_lsc);
1305 return GET_BITS(ex_desc, 23, 12);
1306 }
1307
1308 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1309 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1310 uint32_t ex_desc)
1311 {
1312 assert(devinfo->has_lsc);
1313 return GET_BITS(ex_desc, 31, 24);
1314 }
1315
1316 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1317 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1318 uint32_t ex_desc)
1319 {
1320 assert(devinfo->has_lsc);
1321 return GET_BITS(ex_desc, 31, 12);
1322 }
1323
1324 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1325 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1326 unsigned surface_state_index)
1327 {
1328 assert(devinfo->has_lsc);
1329 return SET_BITS(surface_state_index, 31, 6);
1330 }
1331
1332 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1333 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1334 uint32_t ex_desc)
1335 {
1336 assert(devinfo->has_lsc);
1337 return GET_BITS(ex_desc, 31, 6);
1338 }
1339
1340 static inline uint32_t
brw_mdc_sm2(unsigned exec_size)1341 brw_mdc_sm2(unsigned exec_size)
1342 {
1343 assert(exec_size == 8 || exec_size == 16);
1344 return exec_size > 8;
1345 }
1346
1347 static inline uint32_t
brw_mdc_sm2_exec_size(uint32_t sm2)1348 brw_mdc_sm2_exec_size(uint32_t sm2)
1349 {
1350 assert(sm2 <= 1);
1351 return 8 << sm2;
1352 }
1353
1354 static inline uint32_t
brw_btd_spawn_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size,unsigned msg_type)1355 brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1356 unsigned exec_size, unsigned msg_type)
1357 {
1358 assert(devinfo->has_ray_tracing);
1359 assert(devinfo->ver < 20 || exec_size == 16);
1360
1361 return SET_BITS(0, 19, 19) | /* No header */
1362 SET_BITS(msg_type, 17, 14) |
1363 SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1364 }
1365
1366 static inline uint32_t
brw_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1367 brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1368 uint32_t desc)
1369 {
1370 return GET_BITS(desc, 17, 14);
1371 }
1372
1373 static inline uint32_t
brw_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1374 brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1375 uint32_t desc)
1376 {
1377 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1378 }
1379
1380 static inline uint32_t
brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size)1381 brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1382 unsigned exec_size)
1383 {
1384 assert(devinfo->has_ray_tracing);
1385 assert(devinfo->ver < 20 || exec_size == 16);
1386
1387 return SET_BITS(0, 19, 19) | /* No header */
1388 SET_BITS(0, 17, 14) | /* Message type */
1389 SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1390 }
1391
1392 static inline uint32_t
brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1393 brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1394 uint32_t desc)
1395 {
1396 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1397 }
1398
1399 /**
1400 * Construct a message descriptor immediate with the specified pixel
1401 * interpolator function controls.
1402 */
1403 static inline uint32_t
brw_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1404 brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1405 unsigned msg_type,
1406 bool noperspective,
1407 bool coarse_pixel_rate,
1408 unsigned exec_size,
1409 unsigned group)
1410 {
1411 assert(exec_size == 8 || exec_size == 16);
1412 const bool simd_mode = exec_size == 16;
1413 const bool slot_group = group >= 16;
1414
1415 assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1416 return (SET_BITS(slot_group, 11, 11) |
1417 SET_BITS(msg_type, 13, 12) |
1418 SET_BITS(!!noperspective, 14, 14) |
1419 SET_BITS(coarse_pixel_rate, 15, 15) |
1420 SET_BITS(simd_mode, 16, 16));
1421 }
1422
1423 /**
1424 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1425 * desc. If \p desc is not an immediate it will be transparently loaded to an
1426 * address register using an OR instruction.
1427 */
1428 void
1429 brw_send_indirect_message(struct brw_codegen *p,
1430 unsigned sfid,
1431 struct brw_reg dst,
1432 struct brw_reg payload,
1433 struct brw_reg desc,
1434 unsigned desc_imm,
1435 bool eot);
1436
1437 void
1438 brw_send_indirect_split_message(struct brw_codegen *p,
1439 unsigned sfid,
1440 struct brw_reg dst,
1441 struct brw_reg payload0,
1442 struct brw_reg payload1,
1443 struct brw_reg desc,
1444 unsigned desc_imm,
1445 struct brw_reg ex_desc,
1446 unsigned ex_desc_imm,
1447 bool ex_desc_scratch,
1448 bool ex_bso,
1449 bool eot);
1450
1451 void gfx6_math(struct brw_codegen *p,
1452 struct brw_reg dest,
1453 unsigned function,
1454 struct brw_reg src0,
1455 struct brw_reg src1);
1456
1457 /**
1458 * Return the generation-specific jump distance scaling factor.
1459 *
1460 * Given the number of instructions to jump, we need to scale by
1461 * some number to obtain the actual jump distance to program in an
1462 * instruction.
1463 */
1464 static inline unsigned
brw_jump_scale(const struct intel_device_info * devinfo)1465 brw_jump_scale(const struct intel_device_info *devinfo)
1466 {
1467 /* Broadwell measures jump targets in bytes. */
1468 return 16;
1469 }
1470
1471 void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1472
1473 /* If/else/endif. Works by manipulating the execution flags on each
1474 * channel.
1475 */
1476 brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1477
1478 void brw_ELSE(struct brw_codegen *p);
1479 void brw_ENDIF(struct brw_codegen *p);
1480
1481 /* DO/WHILE loops:
1482 */
1483 brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1484
1485 brw_inst *brw_WHILE(struct brw_codegen *p);
1486
1487 brw_inst *brw_BREAK(struct brw_codegen *p);
1488 brw_inst *brw_CONT(struct brw_codegen *p);
1489 brw_inst *brw_HALT(struct brw_codegen *p);
1490
1491 /* Forward jumps:
1492 */
1493 brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1494 unsigned predicate_control);
1495
1496 void brw_NOP(struct brw_codegen *p);
1497
1498 void brw_WAIT(struct brw_codegen *p);
1499
1500 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1501
1502 /* Special case: there is never a destination, execution size will be
1503 * taken from src0:
1504 */
1505 void brw_CMP(struct brw_codegen *p,
1506 struct brw_reg dest,
1507 unsigned conditional,
1508 struct brw_reg src0,
1509 struct brw_reg src1);
1510
1511 void brw_CMPN(struct brw_codegen *p,
1512 struct brw_reg dest,
1513 unsigned conditional,
1514 struct brw_reg src0,
1515 struct brw_reg src1);
1516
1517 brw_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1518 unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1519 struct brw_reg src1, struct brw_reg src2);
1520
1521 void
1522 brw_memory_fence(struct brw_codegen *p,
1523 struct brw_reg dst,
1524 struct brw_reg src,
1525 enum opcode send_op,
1526 enum brw_message_target sfid,
1527 uint32_t desc,
1528 bool commit_enable,
1529 unsigned bti);
1530
1531 void
1532 brw_broadcast(struct brw_codegen *p,
1533 struct brw_reg dst,
1534 struct brw_reg src,
1535 struct brw_reg idx);
1536
1537 void
1538 brw_float_controls_mode(struct brw_codegen *p,
1539 unsigned mode, unsigned mask);
1540
1541 void
1542 brw_update_reloc_imm(const struct brw_isa_info *isa,
1543 brw_inst *inst,
1544 uint32_t value);
1545
1546 void
1547 brw_MOV_reloc_imm(struct brw_codegen *p,
1548 struct brw_reg dst,
1549 enum brw_reg_type src_type,
1550 uint32_t id, uint32_t base);
1551
1552 unsigned
1553 brw_num_sources_from_inst(const struct brw_isa_info *isa,
1554 const brw_inst *inst);
1555
1556 void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1557
1558 void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1559 unsigned desc, unsigned ex_desc);
1560
1561 static inline void
brw_set_desc(struct brw_codegen * p,brw_inst * insn,unsigned desc)1562 brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1563 {
1564 brw_set_desc_ex(p, insn, desc, 0);
1565 }
1566
1567 void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1568
1569 enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1570 enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1571
1572 /* brw_eu_compact.c */
1573 void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1574 struct disasm_info *disasm);
1575 void brw_uncompact_instruction(const struct brw_isa_info *isa,
1576 brw_inst *dst, brw_compact_inst *src);
1577 bool brw_try_compact_instruction(const struct brw_isa_info *isa,
1578 brw_compact_inst *dst, const brw_inst *src);
1579
1580 void brw_debug_compact_uncompact(const struct brw_isa_info *isa,
1581 brw_inst *orig, brw_inst *uncompacted);
1582
1583 /* brw_eu_validate.c */
1584 bool brw_validate_instruction(const struct brw_isa_info *isa,
1585 const brw_inst *inst, int offset,
1586 unsigned inst_size,
1587 struct disasm_info *disasm);
1588 bool brw_validate_instructions(const struct brw_isa_info *isa,
1589 const void *assembly, int start_offset, int end_offset,
1590 struct disasm_info *disasm);
1591
1592 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1593 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1594 {
1595 brw_inst *insn = (brw_inst *)((char *)store + offset);
1596
1597 if (brw_inst_cmpt_control(devinfo, insn))
1598 return offset + 8;
1599 else
1600 return offset + 16;
1601 }
1602
1603 /** Maximum SEND message length */
1604 #define BRW_MAX_MSG_LENGTH 15
1605
1606 #ifdef __cplusplus
1607 }
1608 #endif
1609
1610 #endif
1611