xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_eu.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 
33 #ifndef BRW_EU_H
34 #define BRW_EU_H
35 
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include "brw_inst.h"
39 #include "brw_compiler.h"
40 #include "brw_eu_defines.h"
41 #include "brw_isa_info.h"
42 #include "brw_reg.h"
43 
44 #include "intel_wa.h"
45 #include "util/bitset.h"
46 
47 #ifdef __cplusplus
48 extern "C" {
49 #endif
50 
51 struct disasm_info;
52 
53 #define BRW_EU_MAX_INSN_STACK 5
54 
55 struct brw_insn_state {
56    /* One of BRW_EXECUTE_* */
57    unsigned exec_size:3;
58 
59    /* Group in units of channels */
60    unsigned group:5;
61 
62    /* One of BRW_MASK_* */
63    unsigned mask_control:1;
64 
65    /* Scheduling info for Gfx12+ */
66    struct tgl_swsb swsb;
67 
68    bool saturate:1;
69 
70    /* One of BRW_ALIGN_* */
71    unsigned access_mode:1;
72 
73    /* One of BRW_PREDICATE_* */
74    enum brw_predicate predicate:4;
75 
76    bool pred_inv:1;
77 
78    /* Flag subreg.  Bottom bit is subreg, top bit is reg */
79    unsigned flag_subreg:2;
80 
81    bool acc_wr_control:1;
82 };
83 
84 
85 /* A helper for accessing the last instruction emitted.  This makes it easy
86  * to set various bits on an instruction without having to create temporary
87  * variable and assign the emitted instruction to those.
88  */
89 #define brw_last_inst (&p->store[p->nr_insn - 1])
90 
91 struct brw_codegen {
92    brw_inst *store;
93    int store_size;
94    unsigned nr_insn;
95    unsigned int next_insn_offset;
96 
97    void *mem_ctx;
98 
99    /* Allow clients to push/pop instruction state:
100     */
101    struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
102    struct brw_insn_state *current;
103 
104    const struct brw_isa_info *isa;
105    const struct intel_device_info *devinfo;
106 
107    /* Control flow stacks:
108     * - if_stack contains IF and ELSE instructions which must be patched
109     *   (and popped) once the matching ENDIF instruction is encountered.
110     *
111     *   Just store the instruction pointer(an index).
112     */
113    int *if_stack;
114    int if_stack_depth;
115    int if_stack_array_size;
116 
117    /**
118     * loop_stack contains the instruction pointers of the starts of loops which
119     * must be patched (and popped) once the matching WHILE instruction is
120     * encountered.
121     */
122    int *loop_stack;
123    int loop_stack_depth;
124    int loop_stack_array_size;
125 
126    struct brw_shader_reloc *relocs;
127    int num_relocs;
128    int reloc_array_size;
129 };
130 
131 struct brw_label {
132    int offset;
133    int number;
134    struct brw_label *next;
135 };
136 
137 void brw_pop_insn_state( struct brw_codegen *p );
138 void brw_push_insn_state( struct brw_codegen *p );
139 unsigned brw_get_default_exec_size(struct brw_codegen *p);
140 unsigned brw_get_default_group(struct brw_codegen *p);
141 unsigned brw_get_default_access_mode(struct brw_codegen *p);
142 struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
143 void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
144 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
145 void brw_set_default_saturate( struct brw_codegen *p, bool enable );
146 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
147 void brw_inst_set_group(const struct intel_device_info *devinfo,
148                         brw_inst *inst, unsigned group);
149 void brw_set_default_group(struct brw_codegen *p, unsigned group);
150 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
151 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
152 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
153 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
154 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
155 
156 void brw_init_codegen(const struct brw_isa_info *isa,
157                       struct brw_codegen *p, void *mem_ctx);
158 bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
159 bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
160 const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
161                                                      unsigned *num_relocs);
162 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
163 
164 bool brw_should_dump_shader_bin(void);
165 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
166                          const char *identifier);
167 
168 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
169                                const char *identifier);
170 
171 void brw_realign(struct brw_codegen *p, unsigned alignment);
172 int brw_append_data(struct brw_codegen *p, void *data,
173                     unsigned size, unsigned alignment);
174 brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
175 void brw_add_reloc(struct brw_codegen *p, uint32_t id,
176                    enum brw_shader_reloc_type type,
177                    uint32_t offset, uint32_t delta);
178 void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
179 void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
180 
181 /* Helpers for regular instructions:
182  */
183 #define ALU1(OP)				\
184 brw_inst *brw_##OP(struct brw_codegen *p,	\
185 	      struct brw_reg dest,		\
186 	      struct brw_reg src0);
187 
188 #define ALU2(OP)				\
189 brw_inst *brw_##OP(struct brw_codegen *p,	\
190 	      struct brw_reg dest,		\
191 	      struct brw_reg src0,		\
192 	      struct brw_reg src1);
193 
194 #define ALU3(OP)				\
195 brw_inst *brw_##OP(struct brw_codegen *p,	\
196 	      struct brw_reg dest,		\
197 	      struct brw_reg src0,		\
198 	      struct brw_reg src1,		\
199 	      struct brw_reg src2);
200 
201 ALU1(MOV)
ALU2(SEL)202 ALU2(SEL)
203 ALU1(NOT)
204 ALU2(AND)
205 ALU2(OR)
206 ALU2(XOR)
207 ALU2(SHR)
208 ALU2(SHL)
209 ALU1(DIM)
210 ALU2(ASR)
211 ALU2(ROL)
212 ALU2(ROR)
213 ALU3(CSEL)
214 ALU1(F32TO16)
215 ALU1(F16TO32)
216 ALU2(ADD)
217 ALU3(ADD3)
218 ALU2(AVG)
219 ALU2(MUL)
220 ALU1(FRC)
221 ALU1(RNDD)
222 ALU1(RNDE)
223 ALU1(RNDU)
224 ALU1(RNDZ)
225 ALU2(MAC)
226 ALU2(MACH)
227 ALU1(LZD)
228 ALU2(DP4)
229 ALU2(DPH)
230 ALU2(DP3)
231 ALU2(DP2)
232 ALU3(DP4A)
233 ALU2(LINE)
234 ALU2(PLN)
235 ALU3(MAD)
236 ALU3(LRP)
237 ALU1(BFREV)
238 ALU3(BFE)
239 ALU2(BFI1)
240 ALU3(BFI2)
241 ALU1(FBH)
242 ALU1(FBL)
243 ALU1(CBIT)
244 ALU2(ADDC)
245 ALU2(SUBB)
246 
247 #undef ALU1
248 #undef ALU2
249 #undef ALU3
250 
251 static inline unsigned
252 reg_unit(const struct intel_device_info *devinfo)
253 {
254    return devinfo->ver >= 20 ? 2 : 1;
255 }
256 
257 
258 /* Helpers for SEND instruction:
259  */
260 
261 /**
262  * Construct a message descriptor immediate with the specified common
263  * descriptor controls.
264  */
265 static inline uint32_t
brw_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)266 brw_message_desc(const struct intel_device_info *devinfo,
267                  unsigned msg_length,
268                  unsigned response_length,
269                  bool header_present)
270 {
271    assert(msg_length % reg_unit(devinfo) == 0);
272    assert(response_length % reg_unit(devinfo) == 0);
273    return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
274            SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
275            SET_BITS(header_present, 19, 19));
276 }
277 
278 static inline unsigned
brw_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)279 brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
280 {
281    return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
282 }
283 
284 static inline unsigned
brw_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)285 brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
286 {
287    return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
288 }
289 
290 static inline bool
brw_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)291 brw_message_desc_header_present(ASSERTED
292                                 const struct intel_device_info *devinfo,
293                                 uint32_t desc)
294 {
295    return GET_BITS(desc, 19, 19);
296 }
297 
298 static inline unsigned
brw_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)299 brw_message_ex_desc(const struct intel_device_info *devinfo,
300                     unsigned ex_msg_length)
301 {
302    assert(ex_msg_length % reg_unit(devinfo) == 0);
303    return devinfo->ver >= 20 ?
304       SET_BITS(ex_msg_length / reg_unit(devinfo), 10, 6) :
305       SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
306 }
307 
308 static inline unsigned
brw_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)309 brw_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
310                             uint32_t ex_desc)
311 {
312    return devinfo->ver >= 20 ?
313       GET_BITS(ex_desc, 10, 6) * reg_unit(devinfo) :
314       GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
315 }
316 
317 static inline uint32_t
brw_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)318 brw_urb_desc(const struct intel_device_info *devinfo,
319              unsigned msg_type,
320              bool per_slot_offset_present,
321              bool channel_mask_present,
322              unsigned global_offset)
323 {
324    return (SET_BITS(per_slot_offset_present, 17, 17) |
325            SET_BITS(channel_mask_present, 15, 15) |
326            SET_BITS(global_offset, 14, 4) |
327            SET_BITS(msg_type, 3, 0));
328 }
329 
330 static inline uint32_t
brw_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)331 brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
332                       uint32_t desc)
333 {
334    return GET_BITS(desc, 3, 0);
335 }
336 
337 static inline uint32_t
brw_urb_fence_desc(const struct intel_device_info * devinfo)338 brw_urb_fence_desc(const struct intel_device_info *devinfo)
339 {
340    assert(devinfo->has_lsc);
341    return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
342 }
343 
344 /**
345  * Construct a message descriptor immediate with the specified sampler
346  * function controls.
347  */
348 static inline uint32_t
brw_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)349 brw_sampler_desc(const struct intel_device_info *devinfo,
350                  unsigned binding_table_index,
351                  unsigned sampler,
352                  unsigned msg_type,
353                  unsigned simd_mode,
354                  unsigned return_format)
355 {
356    const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
357                           SET_BITS(sampler, 11, 8));
358 
359    /* From GFX20 Bspec: Shared Functions - Message Descriptor -
360     * Sampling Engine:
361     *
362     *    Message Type[5]  31  This bit represents the upper bit of message type
363     *                         6-bit encoding (c.f. [16:12]). This bit is set
364     *                         for messages with programmable offsets.
365     */
366    if (devinfo->ver >= 20)
367       return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
368              SET_BITS(simd_mode & 0x3, 18, 17) |
369              SET_BITS(simd_mode >> 2, 29, 29) |
370              SET_BITS(return_format, 30, 30) |
371              SET_BITS(msg_type >> 5, 31, 31);
372 
373    /* From the CHV Bspec: Shared Functions - Message Descriptor -
374     * Sampling Engine:
375     *
376     *   SIMD Mode[2]  29    This field is the upper bit of the 3-bit
377     *                       SIMD Mode field.
378     */
379    return desc | SET_BITS(msg_type, 16, 12) |
380           SET_BITS(simd_mode & 0x3, 18, 17) |
381           SET_BITS(simd_mode >> 2, 29, 29) |
382           SET_BITS(return_format, 30, 30);
383 }
384 
385 static inline unsigned
brw_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)386 brw_sampler_desc_binding_table_index(UNUSED
387                                      const struct intel_device_info *devinfo,
388                                      uint32_t desc)
389 {
390    return GET_BITS(desc, 7, 0);
391 }
392 
393 static inline unsigned
brw_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)394 brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
395                          uint32_t desc)
396 {
397    return GET_BITS(desc, 11, 8);
398 }
399 
400 static inline unsigned
brw_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)401 brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
402 {
403    if (devinfo->ver >= 20)
404       return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
405    else
406       return GET_BITS(desc, 16, 12);
407 }
408 
409 static inline unsigned
brw_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)410 brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
411                            uint32_t desc)
412 {
413    return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
414 }
415 
416 static inline unsigned
brw_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)417 brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
418                                uint32_t desc)
419 {
420    return GET_BITS(desc, 30, 30);
421 }
422 
423 /**
424  * Construct a message descriptor for the dataport
425  */
426 static inline uint32_t
brw_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)427 brw_dp_desc(const struct intel_device_info *devinfo,
428             unsigned binding_table_index,
429             unsigned msg_type,
430             unsigned msg_control)
431 {
432    return SET_BITS(binding_table_index, 7, 0) |
433           SET_BITS(msg_control, 13, 8) |
434           SET_BITS(msg_type, 18, 14);
435 }
436 
437 static inline unsigned
brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)438 brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
439                                 uint32_t desc)
440 {
441    return GET_BITS(desc, 7, 0);
442 }
443 
444 static inline unsigned
brw_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)445 brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
446 {
447    return GET_BITS(desc, 18, 14);
448 }
449 
450 static inline unsigned
brw_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)451 brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
452 {
453    return GET_BITS(desc, 13, 8);
454 }
455 
456 /**
457  * Construct a message descriptor immediate with the specified dataport read
458  * function controls.
459  */
460 static inline uint32_t
brw_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)461 brw_dp_read_desc(const struct intel_device_info *devinfo,
462                  unsigned binding_table_index,
463                  unsigned msg_control,
464                  unsigned msg_type,
465                  unsigned target_cache)
466 {
467    return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
468 }
469 
470 static inline unsigned
brw_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)471 brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
472                           uint32_t desc)
473 {
474    return brw_dp_desc_msg_type(devinfo, desc);
475 }
476 
477 static inline unsigned
brw_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)478 brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
479                              uint32_t desc)
480 {
481    return brw_dp_desc_msg_control(devinfo, desc);
482 }
483 
484 /**
485  * Construct a message descriptor immediate with the specified dataport write
486  * function controls.
487  */
488 static inline uint32_t
brw_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)489 brw_dp_write_desc(const struct intel_device_info *devinfo,
490                   unsigned binding_table_index,
491                   unsigned msg_control,
492                   unsigned msg_type,
493                   unsigned send_commit_msg)
494 {
495    assert(!send_commit_msg);
496    return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
497           SET_BITS(send_commit_msg, 17, 17);
498 }
499 
500 static inline unsigned
brw_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)501 brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
502                            uint32_t desc)
503 {
504    return brw_dp_desc_msg_type(devinfo, desc);
505 }
506 
507 static inline unsigned
brw_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)508 brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
509                               uint32_t desc)
510 {
511    return brw_dp_desc_msg_control(devinfo, desc);
512 }
513 
514 /**
515  * Construct a message descriptor immediate with the specified dataport
516  * surface function controls.
517  */
518 static inline uint32_t
brw_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)519 brw_dp_surface_desc(const struct intel_device_info *devinfo,
520                     unsigned msg_type,
521                     unsigned msg_control)
522 {
523    /* We'll OR in the binding table index later */
524    return brw_dp_desc(devinfo, 0, msg_type, msg_control);
525 }
526 
527 static inline uint32_t
brw_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)528 brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
529                            unsigned exec_size, /**< 0 for SIMD4x2 */
530                            unsigned atomic_op,
531                            bool response_expected)
532 {
533    assert(exec_size <= 8 || exec_size == 16);
534 
535    unsigned msg_type;
536    if (exec_size > 0) {
537       msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
538    } else {
539       msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
540    }
541 
542    const unsigned msg_control =
543       SET_BITS(atomic_op, 3, 0) |
544       SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
545       SET_BITS(response_expected, 5, 5);
546 
547    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
548 }
549 
550 static inline uint32_t
brw_dp_untyped_atomic_float_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)551 brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
552                                  unsigned exec_size,
553                                  unsigned atomic_op,
554                                  bool response_expected)
555 {
556    assert(exec_size <= 8 || exec_size == 16);
557 
558    assert(exec_size > 0);
559    const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
560 
561    const unsigned msg_control =
562       SET_BITS(atomic_op, 1, 0) |
563       SET_BITS(exec_size <= 8, 4, 4) |
564       SET_BITS(response_expected, 5, 5);
565 
566    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
567 }
568 
569 static inline unsigned
brw_mdc_cmask(unsigned num_channels)570 brw_mdc_cmask(unsigned num_channels)
571 {
572    /* See also MDC_CMASK in the SKL PRM Vol 2d. */
573    return 0xf & (0xf << num_channels);
574 }
575 
576 static inline uint32_t
brw_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)577 brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
578                                unsigned exec_size, /**< 0 for SIMD4x2 */
579                                unsigned num_channels,
580                                bool write)
581 {
582    assert(exec_size <= 8 || exec_size == 16);
583 
584    const unsigned msg_type =
585       write ? HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
586               HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
587 
588    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
589    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
590                               exec_size <= 8 ? 2 : 1;
591 
592    const unsigned msg_control =
593       SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
594       SET_BITS(simd_mode, 5, 4);
595 
596    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
597 }
598 
599 static inline unsigned
brw_mdc_ds(unsigned bit_size)600 brw_mdc_ds(unsigned bit_size)
601 {
602    switch (bit_size) {
603    case 8:
604       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
605    case 16:
606       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
607    case 32:
608       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
609    default:
610       unreachable("Unsupported bit_size for byte scattered messages");
611    }
612 }
613 
614 static inline uint32_t
brw_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)615 brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
616                               unsigned exec_size,
617                               unsigned bit_size,
618                               bool write)
619 {
620    assert(exec_size <= 8 || exec_size == 16);
621 
622    const unsigned msg_type =
623       write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
624               HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
625 
626    assert(exec_size > 0);
627    const unsigned msg_control =
628       SET_BITS(exec_size == 16, 0, 0) |
629       SET_BITS(brw_mdc_ds(bit_size), 3, 2);
630 
631    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
632 }
633 
634 static inline uint32_t
brw_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)635 brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
636                                unsigned exec_size,
637                                bool write)
638 {
639    assert(exec_size == 8 || exec_size == 16);
640 
641    const unsigned msg_type =
642       write ? GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE :
643               GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
644 
645    const unsigned msg_control =
646       SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
647       SET_BITS(exec_size == 16, 0, 0);
648 
649    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
650 }
651 
652 static inline uint32_t
brw_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)653 brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
654                            bool align_16B,
655                            unsigned num_dwords,
656                            bool write)
657 {
658    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
659    assert(!write || align_16B);
660 
661    const unsigned msg_type =
662       write ?     GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
663       align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
664                   GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
665 
666    const unsigned msg_control =
667       SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
668 
669    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
670 }
671 
672 static inline uint32_t
brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)673 brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
674                                    unsigned exec_size, /**< 0 for SIMD4x2 */
675                                    unsigned num_channels,
676                                    bool write)
677 {
678    assert(exec_size <= 8 || exec_size == 16);
679 
680    unsigned msg_type =
681       write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
682               GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
683 
684    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
685    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
686                               exec_size <= 8 ? 2 : 1;
687 
688    const unsigned msg_control =
689       SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
690       SET_BITS(simd_mode, 5, 4);
691 
692    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
693                       msg_type, msg_control);
694 }
695 
696 static inline uint32_t
brw_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)697 brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
698                                bool align_16B,
699                                unsigned num_dwords,
700                                bool write)
701 {
702    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
703    assert(!write || align_16B);
704 
705    unsigned msg_type =
706       write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
707               GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
708 
709    unsigned msg_control =
710       SET_BITS(!align_16B, 4, 3) |
711       SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
712 
713    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
714                       msg_type, msg_control);
715 }
716 
717 /**
718  * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
719  * Skylake PRM).
720  */
721 static inline uint32_t
brw_mdc_a64_ds(unsigned elems)722 brw_mdc_a64_ds(unsigned elems)
723 {
724    switch (elems) {
725    case 1:  return 0;
726    case 2:  return 1;
727    case 4:  return 2;
728    case 8:  return 3;
729    default:
730       unreachable("Unsupported elmeent count for A64 scattered message");
731    }
732 }
733 
734 static inline uint32_t
brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)735 brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
736                                   unsigned exec_size, /**< 0 for SIMD4x2 */
737                                   unsigned bit_size,
738                                   bool write)
739 {
740    assert(exec_size <= 8 || exec_size == 16);
741 
742    unsigned msg_type =
743       write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
744               GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
745 
746    const unsigned msg_control =
747       SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
748       SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
749       SET_BITS(exec_size == 16, 4, 4);
750 
751    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
752                       msg_type, msg_control);
753 }
754 
755 static inline uint32_t
brw_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)756 brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
757                                ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
758                                unsigned bit_size,
759                                unsigned atomic_op,
760                                bool response_expected)
761 {
762    assert(exec_size == 8);
763    assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
764    assert(devinfo->ver >= 12 || bit_size >= 32);
765 
766    const unsigned msg_type = bit_size == 16 ?
767       GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
768       GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
769 
770    const unsigned msg_control =
771       SET_BITS(atomic_op, 3, 0) |
772       SET_BITS(bit_size == 64, 4, 4) |
773       SET_BITS(response_expected, 5, 5);
774 
775    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
776                       msg_type, msg_control);
777 }
778 
779 static inline uint32_t
brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)780 brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
781                                      ASSERTED unsigned exec_size,
782                                      unsigned bit_size,
783                                      unsigned atomic_op,
784                                      bool response_expected)
785 {
786    assert(exec_size == 8);
787    assert(bit_size == 16 || bit_size == 32);
788    assert(devinfo->ver >= 12 || bit_size == 32);
789 
790    assert(exec_size > 0);
791    const unsigned msg_type = bit_size == 32 ?
792       GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
793       GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
794 
795    const unsigned msg_control =
796       SET_BITS(atomic_op, 1, 0) |
797       SET_BITS(response_expected, 5, 5);
798 
799    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
800                       msg_type, msg_control);
801 }
802 
803 static inline uint32_t
brw_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)804 brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
805                          unsigned exec_size,
806                          unsigned exec_group,
807                          unsigned atomic_op,
808                          bool response_expected)
809 {
810    assert(exec_size > 0 || exec_group == 0);
811    assert(exec_group % 8 == 0);
812 
813    const unsigned msg_type =
814       exec_size == 0 ? HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 :
815                        HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
816 
817    const bool high_sample_mask = (exec_group / 8) % 2 == 1;
818 
819    const unsigned msg_control =
820       SET_BITS(atomic_op, 3, 0) |
821       SET_BITS(high_sample_mask, 4, 4) |
822       SET_BITS(response_expected, 5, 5);
823 
824    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
825 }
826 
827 static inline uint32_t
brw_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)828 brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
829                              unsigned exec_size,
830                              unsigned exec_group,
831                              unsigned num_channels,
832                              bool write)
833 {
834    assert(exec_size > 0 || exec_group == 0);
835    assert(exec_group % 8 == 0);
836 
837    /* Typed surface reads and writes don't support SIMD16 */
838    assert(exec_size <= 8);
839 
840    const unsigned msg_type =
841       write ? HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE :
842               HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
843 
844    /* See also MDC_SG3 in the SKL PRM Vol 2d. */
845    const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
846                                1 + ((exec_group / 8) % 2);
847 
848    const unsigned msg_control =
849       SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
850       SET_BITS(slot_group, 5, 4);
851 
852    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
853 }
854 
855 static inline uint32_t
brw_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)856 brw_fb_desc(const struct intel_device_info *devinfo,
857             unsigned binding_table_index,
858             unsigned msg_type,
859             unsigned msg_control)
860 {
861    return SET_BITS(binding_table_index, 7, 0) |
862           SET_BITS(msg_control, 13, 8) |
863           SET_BITS(msg_type, 17, 14);
864 }
865 
866 static inline unsigned
brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)867 brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
868                                 uint32_t desc)
869 {
870    return GET_BITS(desc, 7, 0);
871 }
872 
873 static inline uint32_t
brw_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)874 brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
875 {
876    return GET_BITS(desc, 13, 8);
877 }
878 
879 static inline unsigned
brw_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)880 brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
881 {
882    return GET_BITS(desc, 17, 14);
883 }
884 
885 static inline uint32_t
brw_fb_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned exec_size,bool per_sample)886 brw_fb_read_desc(const struct intel_device_info *devinfo,
887                  unsigned binding_table_index,
888                  unsigned msg_control,
889                  unsigned exec_size,
890                  bool per_sample)
891 {
892    assert(exec_size == 8 || exec_size == 16);
893 
894    return brw_fb_desc(devinfo, binding_table_index,
895                       GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
896           SET_BITS(per_sample, 13, 13) |
897           SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
898 }
899 
900 static inline uint32_t
brw_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)901 brw_fb_write_desc(const struct intel_device_info *devinfo,
902                   unsigned binding_table_index,
903                   unsigned msg_control,
904                   bool last_render_target,
905                   bool coarse_write)
906 {
907    const unsigned msg_type = GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
908 
909    assert(devinfo->ver >= 10 || !coarse_write);
910 
911    return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
912           SET_BITS(last_render_target, 12, 12) |
913           SET_BITS(coarse_write, 18, 18);
914 }
915 
916 static inline bool
brw_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)917 brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
918                                      uint32_t desc)
919 {
920    return GET_BITS(desc, 12, 12);
921 }
922 
923 static inline bool
brw_fb_write_desc_coarse_write(const struct intel_device_info * devinfo,uint32_t desc)924 brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
925                                uint32_t desc)
926 {
927    assert(devinfo->ver >= 10);
928    return GET_BITS(desc, 18, 18);
929 }
930 
931 static inline bool
lsc_opcode_has_cmask(enum lsc_opcode opcode)932 lsc_opcode_has_cmask(enum lsc_opcode opcode)
933 {
934    return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
935 }
936 
937 static inline bool
lsc_opcode_has_transpose(enum lsc_opcode opcode)938 lsc_opcode_has_transpose(enum lsc_opcode opcode)
939 {
940    return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
941 }
942 
943 static inline bool
lsc_opcode_is_store(enum lsc_opcode opcode)944 lsc_opcode_is_store(enum lsc_opcode opcode)
945 {
946    return opcode == LSC_OP_STORE ||
947           opcode == LSC_OP_STORE_CMASK;
948 }
949 
950 static inline bool
lsc_opcode_is_atomic(enum lsc_opcode opcode)951 lsc_opcode_is_atomic(enum lsc_opcode opcode)
952 {
953    switch (opcode) {
954    case LSC_OP_ATOMIC_INC:
955    case LSC_OP_ATOMIC_DEC:
956    case LSC_OP_ATOMIC_LOAD:
957    case LSC_OP_ATOMIC_STORE:
958    case LSC_OP_ATOMIC_ADD:
959    case LSC_OP_ATOMIC_SUB:
960    case LSC_OP_ATOMIC_MIN:
961    case LSC_OP_ATOMIC_MAX:
962    case LSC_OP_ATOMIC_UMIN:
963    case LSC_OP_ATOMIC_UMAX:
964    case LSC_OP_ATOMIC_CMPXCHG:
965    case LSC_OP_ATOMIC_FADD:
966    case LSC_OP_ATOMIC_FSUB:
967    case LSC_OP_ATOMIC_FMIN:
968    case LSC_OP_ATOMIC_FMAX:
969    case LSC_OP_ATOMIC_FCMPXCHG:
970    case LSC_OP_ATOMIC_AND:
971    case LSC_OP_ATOMIC_OR:
972    case LSC_OP_ATOMIC_XOR:
973       return true;
974 
975    default:
976       return false;
977    }
978 }
979 
980 static inline bool
lsc_opcode_is_atomic_float(enum lsc_opcode opcode)981 lsc_opcode_is_atomic_float(enum lsc_opcode opcode)
982 {
983    switch (opcode) {
984    case LSC_OP_ATOMIC_FADD:
985    case LSC_OP_ATOMIC_FSUB:
986    case LSC_OP_ATOMIC_FMIN:
987    case LSC_OP_ATOMIC_FMAX:
988    case LSC_OP_ATOMIC_FCMPXCHG:
989       return true;
990 
991    default:
992       return false;
993    }
994 }
995 
996 static inline unsigned
lsc_op_num_data_values(unsigned _op)997 lsc_op_num_data_values(unsigned _op)
998 {
999    enum lsc_opcode op = (enum lsc_opcode) _op;
1000 
1001    switch (op) {
1002    case LSC_OP_ATOMIC_CMPXCHG:
1003    case LSC_OP_ATOMIC_FCMPXCHG:
1004       return 2;
1005    case LSC_OP_ATOMIC_INC:
1006    case LSC_OP_ATOMIC_DEC:
1007    case LSC_OP_LOAD:
1008    case LSC_OP_LOAD_CMASK:
1009    case LSC_OP_FENCE:
1010       /* XXX: actually check docs */
1011       return 0;
1012    default:
1013       return 1;
1014    }
1015 }
1016 
1017 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1018 lsc_op_to_legacy_atomic(unsigned _op)
1019 {
1020    enum lsc_opcode op = (enum lsc_opcode) _op;
1021 
1022    switch (op) {
1023    case LSC_OP_ATOMIC_INC:
1024       return BRW_AOP_INC;
1025    case LSC_OP_ATOMIC_DEC:
1026       return BRW_AOP_DEC;
1027    case LSC_OP_ATOMIC_STORE:
1028       return BRW_AOP_MOV;
1029    case LSC_OP_ATOMIC_ADD:
1030       return BRW_AOP_ADD;
1031    case LSC_OP_ATOMIC_SUB:
1032       return BRW_AOP_SUB;
1033    case LSC_OP_ATOMIC_MIN:
1034       return BRW_AOP_IMIN;
1035    case LSC_OP_ATOMIC_MAX:
1036       return BRW_AOP_IMAX;
1037    case LSC_OP_ATOMIC_UMIN:
1038       return BRW_AOP_UMIN;
1039    case LSC_OP_ATOMIC_UMAX:
1040       return BRW_AOP_UMAX;
1041    case LSC_OP_ATOMIC_CMPXCHG:
1042       return BRW_AOP_CMPWR;
1043    case LSC_OP_ATOMIC_FADD:
1044       return BRW_AOP_FADD;
1045    case LSC_OP_ATOMIC_FMIN:
1046       return BRW_AOP_FMIN;
1047    case LSC_OP_ATOMIC_FMAX:
1048       return BRW_AOP_FMAX;
1049    case LSC_OP_ATOMIC_FCMPXCHG:
1050       return BRW_AOP_FCMPWR;
1051    case LSC_OP_ATOMIC_AND:
1052       return BRW_AOP_AND;
1053    case LSC_OP_ATOMIC_OR:
1054       return BRW_AOP_OR;
1055    case LSC_OP_ATOMIC_XOR:
1056       return BRW_AOP_XOR;
1057    /* No LSC op maps to BRW_AOP_PREDEC */
1058    case LSC_OP_ATOMIC_LOAD:
1059    case LSC_OP_ATOMIC_FSUB:
1060       unreachable("no corresponding legacy atomic operation");
1061    case LSC_OP_LOAD:
1062    case LSC_OP_LOAD_CMASK:
1063    case LSC_OP_STORE:
1064    case LSC_OP_STORE_CMASK:
1065    case LSC_OP_FENCE:
1066       unreachable("not an atomic op");
1067    }
1068 
1069    unreachable("invalid LSC op");
1070 }
1071 
1072 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1073 lsc_data_size_bytes(enum lsc_data_size data_size)
1074 {
1075    switch (data_size) {
1076    case LSC_DATA_SIZE_D8:
1077       return 1;
1078    case LSC_DATA_SIZE_D16:
1079       return 2;
1080    case LSC_DATA_SIZE_D32:
1081    case LSC_DATA_SIZE_D8U32:
1082    case LSC_DATA_SIZE_D16U32:
1083    case LSC_DATA_SIZE_D16BF32:
1084       return 4;
1085    case LSC_DATA_SIZE_D64:
1086       return 8;
1087    default:
1088       unreachable("Unsupported data payload size.");
1089    }
1090 }
1091 
1092 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1093 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1094 {
1095    switch (addr_size) {
1096    case LSC_ADDR_SIZE_A16: return 2;
1097    case LSC_ADDR_SIZE_A32: return 4;
1098    case LSC_ADDR_SIZE_A64: return 8;
1099    default:
1100       unreachable("Unsupported address size.");
1101    }
1102 }
1103 
1104 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1105 lsc_vector_length(enum lsc_vect_size vect_size)
1106 {
1107    switch (vect_size) {
1108    case LSC_VECT_SIZE_V1: return 1;
1109    case LSC_VECT_SIZE_V2: return 2;
1110    case LSC_VECT_SIZE_V3: return 3;
1111    case LSC_VECT_SIZE_V4: return 4;
1112    case LSC_VECT_SIZE_V8: return 8;
1113    case LSC_VECT_SIZE_V16: return 16;
1114    case LSC_VECT_SIZE_V32: return 32;
1115    case LSC_VECT_SIZE_V64: return 64;
1116    default:
1117       unreachable("Unsupported size of vector");
1118    }
1119 }
1120 
1121 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1122 lsc_vect_size(unsigned vect_size)
1123 {
1124    switch(vect_size) {
1125    case 1:  return LSC_VECT_SIZE_V1;
1126    case 2:  return LSC_VECT_SIZE_V2;
1127    case 3:  return LSC_VECT_SIZE_V3;
1128    case 4:  return LSC_VECT_SIZE_V4;
1129    case 8:  return LSC_VECT_SIZE_V8;
1130    case 16: return LSC_VECT_SIZE_V16;
1131    case 32: return LSC_VECT_SIZE_V32;
1132    case 64: return LSC_VECT_SIZE_V64;
1133    default:
1134       unreachable("Unsupported vector size for dataport");
1135    }
1136 }
1137 
1138 static inline uint32_t
lsc_msg_desc(const struct intel_device_info * devinfo,enum lsc_opcode opcode,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,enum lsc_data_size data_sz,unsigned num_channels_or_cmask,bool transpose,unsigned cache_ctrl)1139 lsc_msg_desc(const struct intel_device_info *devinfo,
1140              enum lsc_opcode opcode,
1141              enum lsc_addr_surface_type addr_type,
1142              enum lsc_addr_size addr_sz,
1143              enum lsc_data_size data_sz, unsigned num_channels_or_cmask,
1144              bool transpose, unsigned cache_ctrl)
1145 {
1146    assert(devinfo->has_lsc);
1147    assert(!transpose || lsc_opcode_has_transpose(opcode));
1148 
1149    unsigned msg_desc =
1150       SET_BITS(opcode, 5, 0) |
1151       SET_BITS(addr_sz, 8, 7) |
1152       SET_BITS(data_sz, 11, 9) |
1153       SET_BITS(transpose, 15, 15) |
1154       (devinfo->ver >= 20 ? SET_BITS(cache_ctrl, 19, 16) :
1155                             SET_BITS(cache_ctrl, 19, 17)) |
1156       SET_BITS(addr_type, 30, 29);
1157 
1158    if (lsc_opcode_has_cmask(opcode))
1159       msg_desc |= SET_BITS(num_channels_or_cmask, 15, 12);
1160    else
1161       msg_desc |= SET_BITS(lsc_vect_size(num_channels_or_cmask), 14, 12);
1162 
1163    return msg_desc;
1164 }
1165 
1166 static inline enum lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1167 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1168                     uint32_t desc)
1169 {
1170    assert(devinfo->has_lsc);
1171    return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1172 }
1173 
1174 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1175 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1176                        uint32_t desc)
1177 {
1178    assert(devinfo->has_lsc);
1179    return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1180 }
1181 
1182 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1183 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1184                        uint32_t desc)
1185 {
1186    assert(devinfo->has_lsc);
1187    return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1188 }
1189 
1190 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1191 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1192                        uint32_t desc)
1193 {
1194    assert(devinfo->has_lsc);
1195    assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1196    return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1197 }
1198 
1199 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1200 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1201                    uint32_t desc)
1202 {
1203    assert(devinfo->has_lsc);
1204    assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1205    return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1206 }
1207 
1208 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1209 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1210                        uint32_t desc)
1211 {
1212    assert(devinfo->has_lsc);
1213    return GET_BITS(desc, 15, 15);
1214 }
1215 
1216 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1217 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1218                         uint32_t desc)
1219 {
1220    assert(devinfo->has_lsc);
1221    return devinfo->ver >= 20 ? GET_BITS(desc, 19, 16) : GET_BITS(desc, 19, 17);
1222 }
1223 
1224 static inline unsigned
lsc_msg_dest_len(const struct intel_device_info * devinfo,enum lsc_data_size data_sz,unsigned n)1225 lsc_msg_dest_len(const struct intel_device_info *devinfo,
1226                  enum lsc_data_size data_sz, unsigned n)
1227 {
1228    return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n,
1229                        reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1230 }
1231 
1232 static inline unsigned
lsc_msg_addr_len(const struct intel_device_info * devinfo,enum lsc_addr_size addr_sz,unsigned n)1233 lsc_msg_addr_len(const struct intel_device_info *devinfo,
1234                  enum lsc_addr_size addr_sz, unsigned n)
1235 {
1236    return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n,
1237                        reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1238 }
1239 
1240 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1241 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1242                        uint32_t desc)
1243 {
1244    assert(devinfo->has_lsc);
1245    return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1246 }
1247 
1248 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1249 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1250                    enum lsc_fence_scope scope,
1251                    enum lsc_flush_type flush_type,
1252                    bool route_to_lsc)
1253 {
1254    assert(devinfo->has_lsc);
1255 
1256 #if INTEL_NEEDS_WA_22017182272
1257    assert(flush_type != LSC_FLUSH_TYPE_DISCARD);
1258 #endif
1259 
1260    return SET_BITS(LSC_OP_FENCE, 5, 0) |
1261           SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1262           SET_BITS(scope, 11, 9) |
1263           SET_BITS(flush_type, 14, 12) |
1264           SET_BITS(route_to_lsc, 18, 18) |
1265           SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1266 }
1267 
1268 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1269 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1270                          uint32_t desc)
1271 {
1272    assert(devinfo->has_lsc);
1273    return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1274 }
1275 
1276 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1277 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1278                               uint32_t desc)
1279 {
1280    assert(devinfo->has_lsc);
1281    return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1282 }
1283 
1284 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1285 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1286                                   uint32_t desc)
1287 {
1288    assert(devinfo->has_lsc);
1289    return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1290 }
1291 
1292 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1293 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1294 {
1295    assert(devinfo->has_lsc);
1296    return SET_BITS(bti, 31, 24) |
1297           SET_BITS(0, 23, 12);  /* base offset */
1298 }
1299 
1300 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1301 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1302                             uint32_t ex_desc)
1303 {
1304    assert(devinfo->has_lsc);
1305    return GET_BITS(ex_desc, 23, 12);
1306 }
1307 
1308 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1309 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1310                       uint32_t ex_desc)
1311 {
1312    assert(devinfo->has_lsc);
1313    return GET_BITS(ex_desc, 31, 24);
1314 }
1315 
1316 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1317 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1318                              uint32_t ex_desc)
1319 {
1320    assert(devinfo->has_lsc);
1321    return GET_BITS(ex_desc, 31, 12);
1322 }
1323 
1324 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1325 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1326                 unsigned surface_state_index)
1327 {
1328    assert(devinfo->has_lsc);
1329    return SET_BITS(surface_state_index, 31, 6);
1330 }
1331 
1332 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1333 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1334                       uint32_t ex_desc)
1335 {
1336    assert(devinfo->has_lsc);
1337    return GET_BITS(ex_desc, 31, 6);
1338 }
1339 
1340 static inline uint32_t
brw_mdc_sm2(unsigned exec_size)1341 brw_mdc_sm2(unsigned exec_size)
1342 {
1343    assert(exec_size == 8 || exec_size == 16);
1344    return exec_size > 8;
1345 }
1346 
1347 static inline uint32_t
brw_mdc_sm2_exec_size(uint32_t sm2)1348 brw_mdc_sm2_exec_size(uint32_t sm2)
1349 {
1350    assert(sm2 <= 1);
1351    return 8 << sm2;
1352 }
1353 
1354 static inline uint32_t
brw_btd_spawn_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size,unsigned msg_type)1355 brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1356                    unsigned exec_size, unsigned msg_type)
1357 {
1358    assert(devinfo->has_ray_tracing);
1359    assert(devinfo->ver < 20 || exec_size == 16);
1360 
1361    return SET_BITS(0, 19, 19) | /* No header */
1362           SET_BITS(msg_type, 17, 14) |
1363           SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1364 }
1365 
1366 static inline uint32_t
brw_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1367 brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1368                        uint32_t desc)
1369 {
1370    return GET_BITS(desc, 17, 14);
1371 }
1372 
1373 static inline uint32_t
brw_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1374 brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1375                         uint32_t desc)
1376 {
1377    return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1378 }
1379 
1380 static inline uint32_t
brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size)1381 brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1382                       unsigned exec_size)
1383 {
1384    assert(devinfo->has_ray_tracing);
1385    assert(devinfo->ver < 20 || exec_size == 16);
1386 
1387    return SET_BITS(0, 19, 19) | /* No header */
1388           SET_BITS(0, 17, 14) | /* Message type */
1389           SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1390 }
1391 
1392 static inline uint32_t
brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1393 brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1394                                 uint32_t desc)
1395 {
1396    return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1397 }
1398 
1399 /**
1400  * Construct a message descriptor immediate with the specified pixel
1401  * interpolator function controls.
1402  */
1403 static inline uint32_t
brw_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1404 brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1405                       unsigned msg_type,
1406                       bool noperspective,
1407                       bool coarse_pixel_rate,
1408                       unsigned exec_size,
1409                       unsigned group)
1410 {
1411    assert(exec_size == 8 || exec_size == 16);
1412    const bool simd_mode = exec_size == 16;
1413    const bool slot_group = group >= 16;
1414 
1415    assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1416    return (SET_BITS(slot_group, 11, 11) |
1417            SET_BITS(msg_type, 13, 12) |
1418            SET_BITS(!!noperspective, 14, 14) |
1419            SET_BITS(coarse_pixel_rate, 15, 15) |
1420            SET_BITS(simd_mode, 16, 16));
1421 }
1422 
1423 /**
1424  * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1425  * desc.  If \p desc is not an immediate it will be transparently loaded to an
1426  * address register using an OR instruction.
1427  */
1428 void
1429 brw_send_indirect_message(struct brw_codegen *p,
1430                           unsigned sfid,
1431                           struct brw_reg dst,
1432                           struct brw_reg payload,
1433                           struct brw_reg desc,
1434                           unsigned desc_imm,
1435                           bool eot);
1436 
1437 void
1438 brw_send_indirect_split_message(struct brw_codegen *p,
1439                                 unsigned sfid,
1440                                 struct brw_reg dst,
1441                                 struct brw_reg payload0,
1442                                 struct brw_reg payload1,
1443                                 struct brw_reg desc,
1444                                 unsigned desc_imm,
1445                                 struct brw_reg ex_desc,
1446                                 unsigned ex_desc_imm,
1447                                 bool ex_desc_scratch,
1448                                 bool ex_bso,
1449                                 bool eot);
1450 
1451 void gfx6_math(struct brw_codegen *p,
1452 	       struct brw_reg dest,
1453 	       unsigned function,
1454 	       struct brw_reg src0,
1455 	       struct brw_reg src1);
1456 
1457 /**
1458  * Return the generation-specific jump distance scaling factor.
1459  *
1460  * Given the number of instructions to jump, we need to scale by
1461  * some number to obtain the actual jump distance to program in an
1462  * instruction.
1463  */
1464 static inline unsigned
brw_jump_scale(const struct intel_device_info * devinfo)1465 brw_jump_scale(const struct intel_device_info *devinfo)
1466 {
1467    /* Broadwell measures jump targets in bytes. */
1468    return 16;
1469 }
1470 
1471 void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1472 
1473 /* If/else/endif.  Works by manipulating the execution flags on each
1474  * channel.
1475  */
1476 brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1477 
1478 void brw_ELSE(struct brw_codegen *p);
1479 void brw_ENDIF(struct brw_codegen *p);
1480 
1481 /* DO/WHILE loops:
1482  */
1483 brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1484 
1485 brw_inst *brw_WHILE(struct brw_codegen *p);
1486 
1487 brw_inst *brw_BREAK(struct brw_codegen *p);
1488 brw_inst *brw_CONT(struct brw_codegen *p);
1489 brw_inst *brw_HALT(struct brw_codegen *p);
1490 
1491 /* Forward jumps:
1492  */
1493 brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1494                    unsigned predicate_control);
1495 
1496 void brw_NOP(struct brw_codegen *p);
1497 
1498 void brw_WAIT(struct brw_codegen *p);
1499 
1500 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1501 
1502 /* Special case: there is never a destination, execution size will be
1503  * taken from src0:
1504  */
1505 void brw_CMP(struct brw_codegen *p,
1506 	     struct brw_reg dest,
1507 	     unsigned conditional,
1508 	     struct brw_reg src0,
1509 	     struct brw_reg src1);
1510 
1511 void brw_CMPN(struct brw_codegen *p,
1512               struct brw_reg dest,
1513               unsigned conditional,
1514               struct brw_reg src0,
1515               struct brw_reg src1);
1516 
1517 brw_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1518                    unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1519                    struct brw_reg src1, struct brw_reg src2);
1520 
1521 void
1522 brw_memory_fence(struct brw_codegen *p,
1523                  struct brw_reg dst,
1524                  struct brw_reg src,
1525                  enum opcode send_op,
1526                  enum brw_message_target sfid,
1527                  uint32_t desc,
1528                  bool commit_enable,
1529                  unsigned bti);
1530 
1531 void
1532 brw_broadcast(struct brw_codegen *p,
1533               struct brw_reg dst,
1534               struct brw_reg src,
1535               struct brw_reg idx);
1536 
1537 void
1538 brw_float_controls_mode(struct brw_codegen *p,
1539                         unsigned mode, unsigned mask);
1540 
1541 void
1542 brw_update_reloc_imm(const struct brw_isa_info *isa,
1543                      brw_inst *inst,
1544                      uint32_t value);
1545 
1546 void
1547 brw_MOV_reloc_imm(struct brw_codegen *p,
1548                   struct brw_reg dst,
1549                   enum brw_reg_type src_type,
1550                   uint32_t id, uint32_t base);
1551 
1552 unsigned
1553 brw_num_sources_from_inst(const struct brw_isa_info *isa,
1554                           const brw_inst *inst);
1555 
1556 void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1557 
1558 void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1559                      unsigned desc, unsigned ex_desc);
1560 
1561 static inline void
brw_set_desc(struct brw_codegen * p,brw_inst * insn,unsigned desc)1562 brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1563 {
1564    brw_set_desc_ex(p, insn, desc, 0);
1565 }
1566 
1567 void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1568 
1569 enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1570 enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1571 
1572 /* brw_eu_compact.c */
1573 void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1574                               struct disasm_info *disasm);
1575 void brw_uncompact_instruction(const struct brw_isa_info *isa,
1576                                brw_inst *dst, brw_compact_inst *src);
1577 bool brw_try_compact_instruction(const struct brw_isa_info *isa,
1578                                  brw_compact_inst *dst, const brw_inst *src);
1579 
1580 void brw_debug_compact_uncompact(const struct brw_isa_info *isa,
1581                                  brw_inst *orig, brw_inst *uncompacted);
1582 
1583 /* brw_eu_validate.c */
1584 bool brw_validate_instruction(const struct brw_isa_info *isa,
1585                               const brw_inst *inst, int offset,
1586                               unsigned inst_size,
1587                               struct disasm_info *disasm);
1588 bool brw_validate_instructions(const struct brw_isa_info *isa,
1589                                const void *assembly, int start_offset, int end_offset,
1590                                struct disasm_info *disasm);
1591 
1592 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1593 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1594 {
1595    brw_inst *insn = (brw_inst *)((char *)store + offset);
1596 
1597    if (brw_inst_cmpt_control(devinfo, insn))
1598       return offset + 8;
1599    else
1600       return offset + 16;
1601 }
1602 
1603 /** Maximum SEND message length */
1604 #define BRW_MAX_MSG_LENGTH 15
1605 
1606 #ifdef __cplusplus
1607 }
1608 #endif
1609 
1610 #endif
1611