xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_eu.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 
33 #ifndef ELK_EU_H
34 #define ELK_EU_H
35 
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include "elk_inst.h"
39 #include "elk_compiler.h"
40 #include "elk_eu_defines.h"
41 #include "elk_isa_info.h"
42 #include "elk_reg.h"
43 
44 #include "util/bitset.h"
45 
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49 
50 struct elk_disasm_info;
51 
52 #define ELK_EU_MAX_INSN_STACK 5
53 
54 struct elk_insn_state {
55    /* One of ELK_EXECUTE_* */
56    unsigned exec_size:3;
57 
58    /* Group in units of channels */
59    unsigned group:5;
60 
61    /* Compression control on gfx4-5 */
62    bool compressed:1;
63 
64    /* One of ELK_MASK_* */
65    unsigned mask_control:1;
66 
67    bool saturate:1;
68 
69    /* One of ELK_ALIGN_* */
70    unsigned access_mode:1;
71 
72    /* One of ELK_PREDICATE_* */
73    enum elk_predicate predicate:4;
74 
75    bool pred_inv:1;
76 
77    /* Flag subreg.  Bottom bit is subreg, top bit is reg */
78    unsigned flag_subreg:2;
79 
80    bool acc_wr_control:1;
81 };
82 
83 
84 /* A helper for accessing the last instruction emitted.  This makes it easy
85  * to set various bits on an instruction without having to create temporary
86  * variable and assign the emitted instruction to those.
87  */
88 #define elk_last_inst (&p->store[p->nr_insn - 1])
89 
90 struct elk_codegen {
91    elk_inst *store;
92    int store_size;
93    unsigned nr_insn;
94    unsigned int next_insn_offset;
95 
96    void *mem_ctx;
97 
98    /* Allow clients to push/pop instruction state:
99     */
100    struct elk_insn_state stack[ELK_EU_MAX_INSN_STACK];
101    struct elk_insn_state *current;
102 
103    /** Whether or not the user wants automatic exec sizes
104     *
105     * If true, codegen will try to automatically infer the exec size of an
106     * instruction from the width of the destination register.  If false, it
107     * will take whatever is set by elk_set_default_exec_size verbatim.
108     *
109     * This is set to true by default in elk_init_codegen.
110     */
111    bool automatic_exec_sizes;
112 
113    bool single_program_flow;
114    const struct elk_isa_info *isa;
115    const struct intel_device_info *devinfo;
116 
117    /* Control flow stacks:
118     * - if_stack contains IF and ELSE instructions which must be patched
119     *   (and popped) once the matching ENDIF instruction is encountered.
120     *
121     *   Just store the instruction pointer(an index).
122     */
123    int *if_stack;
124    int if_stack_depth;
125    int if_stack_array_size;
126 
127    /**
128     * loop_stack contains the instruction pointers of the starts of loops which
129     * must be patched (and popped) once the matching WHILE instruction is
130     * encountered.
131     */
132    int *loop_stack;
133    /**
134     * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
135     * blocks they were popping out of, to fix up the mask stack.  This tracks
136     * the IF/ENDIF nesting in each current nested loop level.
137     */
138    int *if_depth_in_loop;
139    int loop_stack_depth;
140    int loop_stack_array_size;
141 
142    struct elk_shader_reloc *relocs;
143    int num_relocs;
144    int reloc_array_size;
145 };
146 
147 struct elk_label {
148    int offset;
149    int number;
150    struct elk_label *next;
151 };
152 
153 void elk_pop_insn_state( struct elk_codegen *p );
154 void elk_push_insn_state( struct elk_codegen *p );
155 unsigned elk_get_default_exec_size(struct elk_codegen *p);
156 unsigned elk_get_default_group(struct elk_codegen *p);
157 unsigned elk_get_default_access_mode(struct elk_codegen *p);
158 void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
159 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
160 void elk_set_default_saturate( struct elk_codegen *p, bool enable );
161 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode );
162 void elk_inst_set_compression(const struct intel_device_info *devinfo,
163                               elk_inst *inst, bool on);
164 void elk_set_default_compression(struct elk_codegen *p, bool on);
165 void elk_inst_set_group(const struct intel_device_info *devinfo,
166                         elk_inst *inst, unsigned group);
167 void elk_set_default_group(struct elk_codegen *p, unsigned group);
168 void elk_set_default_compression_control(struct elk_codegen *p, enum elk_compression c);
169 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc);
170 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
171 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
172 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
173 
174 void elk_init_codegen(const struct elk_isa_info *isa,
175                       struct elk_codegen *p, void *mem_ctx);
176 bool elk_has_jip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
177 bool elk_has_uip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
178 const struct elk_shader_reloc *elk_get_shader_relocs(struct elk_codegen *p,
179                                                      unsigned *num_relocs);
180 const unsigned *elk_get_program( struct elk_codegen *p, unsigned *sz );
181 
182 bool elk_should_dump_shader_bin(void);
183 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
184                          const char *identifier);
185 
186 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
187                                const char *identifier);
188 
189 void elk_realign(struct elk_codegen *p, unsigned alignment);
190 int elk_append_data(struct elk_codegen *p, void *data,
191                     unsigned size, unsigned alignment);
192 elk_inst *elk_next_insn(struct elk_codegen *p, unsigned opcode);
193 void elk_add_reloc(struct elk_codegen *p, uint32_t id,
194                    enum elk_shader_reloc_type type,
195                    uint32_t offset, uint32_t delta);
196 void elk_set_dest(struct elk_codegen *p, elk_inst *insn, struct elk_reg dest);
197 void elk_set_src0(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
198 
199 void elk_gfx6_resolve_implied_move(struct elk_codegen *p,
200 			       struct elk_reg *src,
201 			       unsigned msg_reg_nr);
202 
203 /* Helpers for regular instructions:
204  */
205 #define ALU1(OP)				\
206 elk_inst *elk_##OP(struct elk_codegen *p,	\
207 	      struct elk_reg dest,		\
208 	      struct elk_reg src0);
209 
210 #define ALU2(OP)				\
211 elk_inst *elk_##OP(struct elk_codegen *p,	\
212 	      struct elk_reg dest,		\
213 	      struct elk_reg src0,		\
214 	      struct elk_reg src1);
215 
216 #define ALU3(OP)				\
217 elk_inst *elk_##OP(struct elk_codegen *p,	\
218 	      struct elk_reg dest,		\
219 	      struct elk_reg src0,		\
220 	      struct elk_reg src1,		\
221 	      struct elk_reg src2);
222 
223 ALU1(MOV)
ALU2(SEL)224 ALU2(SEL)
225 ALU1(NOT)
226 ALU2(AND)
227 ALU2(OR)
228 ALU2(XOR)
229 ALU2(SHR)
230 ALU2(SHL)
231 ALU1(DIM)
232 ALU2(ASR)
233 ALU2(ROL)
234 ALU2(ROR)
235 ALU3(CSEL)
236 ALU1(F32TO16)
237 ALU1(F16TO32)
238 ALU2(ADD)
239 ALU2(AVG)
240 ALU2(MUL)
241 ALU1(FRC)
242 ALU1(RNDD)
243 ALU1(RNDE)
244 ALU1(RNDU)
245 ALU1(RNDZ)
246 ALU2(MAC)
247 ALU2(MACH)
248 ALU1(LZD)
249 ALU2(DP4)
250 ALU2(DPH)
251 ALU2(DP3)
252 ALU2(DP2)
253 ALU2(LINE)
254 ALU2(PLN)
255 ALU3(MAD)
256 ALU3(LRP)
257 ALU1(BFREV)
258 ALU3(BFE)
259 ALU2(BFI1)
260 ALU3(BFI2)
261 ALU1(FBH)
262 ALU1(FBL)
263 ALU1(CBIT)
264 ALU2(ADDC)
265 ALU2(SUBB)
266 
267 #undef ALU1
268 #undef ALU2
269 #undef ALU3
270 
271 static inline unsigned
272 reg_unit(const struct intel_device_info *devinfo)
273 {
274    return 1;
275 }
276 
277 
278 /* Helpers for SEND instruction:
279  */
280 
281 /**
282  * Construct a message descriptor immediate with the specified common
283  * descriptor controls.
284  */
285 static inline uint32_t
elk_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)286 elk_message_desc(const struct intel_device_info *devinfo,
287                  unsigned msg_length,
288                  unsigned response_length,
289                  bool header_present)
290 {
291    if (devinfo->ver >= 5) {
292       assert(msg_length % reg_unit(devinfo) == 0);
293       assert(response_length % reg_unit(devinfo) == 0);
294       return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
295               SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
296               SET_BITS(header_present, 19, 19));
297    } else {
298       return (SET_BITS(msg_length, 23, 20) |
299               SET_BITS(response_length, 19, 16));
300    }
301 }
302 
303 static inline unsigned
elk_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)304 elk_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
305 {
306    if (devinfo->ver >= 5)
307       return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
308    else
309       return GET_BITS(desc, 23, 20);
310 }
311 
312 static inline unsigned
elk_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)313 elk_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
314 {
315    if (devinfo->ver >= 5)
316       return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
317    else
318       return GET_BITS(desc, 19, 16);
319 }
320 
321 static inline bool
elk_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)322 elk_message_desc_header_present(ASSERTED
323                                 const struct intel_device_info *devinfo,
324                                 uint32_t desc)
325 {
326    assert(devinfo->ver >= 5);
327    return GET_BITS(desc, 19, 19);
328 }
329 
330 static inline unsigned
elk_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)331 elk_message_ex_desc(const struct intel_device_info *devinfo,
332                     unsigned ex_msg_length)
333 {
334    assert(ex_msg_length % reg_unit(devinfo) == 0);
335    return SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
336 }
337 
338 static inline unsigned
elk_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)339 elk_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
340                             uint32_t ex_desc)
341 {
342    return GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
343 }
344 
345 static inline uint32_t
elk_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)346 elk_urb_desc(const struct intel_device_info *devinfo,
347              unsigned msg_type,
348              bool per_slot_offset_present,
349              bool channel_mask_present,
350              unsigned global_offset)
351 {
352    if (devinfo->ver >= 8) {
353       return (SET_BITS(per_slot_offset_present, 17, 17) |
354               SET_BITS(channel_mask_present, 15, 15) |
355               SET_BITS(global_offset, 14, 4) |
356               SET_BITS(msg_type, 3, 0));
357    } else if (devinfo->ver >= 7) {
358       assert(!channel_mask_present);
359       return (SET_BITS(per_slot_offset_present, 16, 16) |
360               SET_BITS(global_offset, 13, 3) |
361               SET_BITS(msg_type, 3, 0));
362    } else {
363       unreachable("unhandled URB write generation");
364    }
365 }
366 
367 static inline uint32_t
elk_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)368 elk_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
369                       uint32_t desc)
370 {
371    assert(devinfo->ver >= 7);
372    return GET_BITS(desc, 3, 0);
373 }
374 
375 /**
376  * Construct a message descriptor immediate with the specified sampler
377  * function controls.
378  */
379 static inline uint32_t
elk_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)380 elk_sampler_desc(const struct intel_device_info *devinfo,
381                  unsigned binding_table_index,
382                  unsigned sampler,
383                  unsigned msg_type,
384                  unsigned simd_mode,
385                  unsigned return_format)
386 {
387    const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
388                           SET_BITS(sampler, 11, 8));
389 
390    /* From the CHV Bspec: Shared Functions - Message Descriptor -
391     * Sampling Engine:
392     *
393     *   SIMD Mode[2]  29    This field is the upper bit of the 3-bit
394     *                       SIMD Mode field.
395     */
396    if (devinfo->ver >= 8)
397       return desc | SET_BITS(msg_type, 16, 12) |
398              SET_BITS(simd_mode & 0x3, 18, 17) |
399              SET_BITS(simd_mode >> 2, 29, 29) |
400              SET_BITS(return_format, 30, 30);
401    if (devinfo->ver >= 7)
402       return (desc | SET_BITS(msg_type, 16, 12) |
403               SET_BITS(simd_mode, 18, 17));
404    else if (devinfo->ver >= 5)
405       return (desc | SET_BITS(msg_type, 15, 12) |
406               SET_BITS(simd_mode, 17, 16));
407    else if (devinfo->verx10 >= 45)
408       return desc | SET_BITS(msg_type, 15, 12);
409    else
410       return (desc | SET_BITS(return_format, 13, 12) |
411               SET_BITS(msg_type, 15, 14));
412 }
413 
414 static inline unsigned
elk_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)415 elk_sampler_desc_binding_table_index(UNUSED
416                                      const struct intel_device_info *devinfo,
417                                      uint32_t desc)
418 {
419    return GET_BITS(desc, 7, 0);
420 }
421 
422 static inline unsigned
elk_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)423 elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
424                          uint32_t desc)
425 {
426    return GET_BITS(desc, 11, 8);
427 }
428 
429 static inline unsigned
elk_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)430 elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
431 {
432    if (devinfo->ver >= 7)
433       return GET_BITS(desc, 16, 12);
434    else if (devinfo->verx10 >= 45)
435       return GET_BITS(desc, 15, 12);
436    else
437       return GET_BITS(desc, 15, 14);
438 }
439 
440 static inline unsigned
elk_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)441 elk_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
442                            uint32_t desc)
443 {
444    assert(devinfo->ver >= 5);
445    if (devinfo->ver >= 8)
446       return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
447    else if (devinfo->ver >= 7)
448       return GET_BITS(desc, 18, 17);
449    else
450       return GET_BITS(desc, 17, 16);
451 }
452 
453 static  inline unsigned
elk_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)454 elk_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
455                                uint32_t desc)
456 {
457    assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
458    if (devinfo->ver >= 8)
459       return GET_BITS(desc, 30, 30);
460    else
461       return GET_BITS(desc, 13, 12);
462 }
463 
464 /**
465  * Construct a message descriptor for the dataport
466  */
467 static inline uint32_t
elk_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)468 elk_dp_desc(const struct intel_device_info *devinfo,
469             unsigned binding_table_index,
470             unsigned msg_type,
471             unsigned msg_control)
472 {
473    /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
474     * helpers instead.
475     */
476    assert(devinfo->ver >= 6);
477    const unsigned desc = SET_BITS(binding_table_index, 7, 0);
478    if (devinfo->ver >= 8) {
479       return (desc | SET_BITS(msg_control, 13, 8) |
480               SET_BITS(msg_type, 18, 14));
481    } else if (devinfo->ver >= 7) {
482       return (desc | SET_BITS(msg_control, 13, 8) |
483               SET_BITS(msg_type, 17, 14));
484    } else {
485       return (desc | SET_BITS(msg_control, 12, 8) |
486               SET_BITS(msg_type, 16, 13));
487    }
488 }
489 
490 static inline unsigned
elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)491 elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
492                                 uint32_t desc)
493 {
494    return GET_BITS(desc, 7, 0);
495 }
496 
497 static inline unsigned
elk_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)498 elk_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
499 {
500    assert(devinfo->ver >= 6);
501    if (devinfo->ver >= 8)
502       return GET_BITS(desc, 18, 14);
503    else if (devinfo->ver >= 7)
504       return GET_BITS(desc, 17, 14);
505    else
506       return GET_BITS(desc, 16, 13);
507 }
508 
509 static inline unsigned
elk_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)510 elk_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
511 {
512    assert(devinfo->ver >= 6);
513    if (devinfo->ver >= 7)
514       return GET_BITS(desc, 13, 8);
515    else
516       return GET_BITS(desc, 12, 8);
517 }
518 
519 /**
520  * Construct a message descriptor immediate with the specified dataport read
521  * function controls.
522  */
523 static inline uint32_t
elk_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)524 elk_dp_read_desc(const struct intel_device_info *devinfo,
525                  unsigned binding_table_index,
526                  unsigned msg_control,
527                  unsigned msg_type,
528                  unsigned target_cache)
529 {
530    if (devinfo->ver >= 6)
531       return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
532    else if (devinfo->verx10 >= 45)
533       return (SET_BITS(binding_table_index, 7, 0) |
534               SET_BITS(msg_control, 10, 8) |
535               SET_BITS(msg_type, 13, 11) |
536               SET_BITS(target_cache, 15, 14));
537    else
538       return (SET_BITS(binding_table_index, 7, 0) |
539               SET_BITS(msg_control, 11, 8) |
540               SET_BITS(msg_type, 13, 12) |
541               SET_BITS(target_cache, 15, 14));
542 }
543 
544 static inline unsigned
elk_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)545 elk_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
546                           uint32_t desc)
547 {
548    if (devinfo->ver >= 6)
549       return elk_dp_desc_msg_type(devinfo, desc);
550    else if (devinfo->verx10 >= 45)
551       return GET_BITS(desc, 13, 11);
552    else
553       return GET_BITS(desc, 13, 12);
554 }
555 
556 static inline unsigned
elk_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)557 elk_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
558                              uint32_t desc)
559 {
560    if (devinfo->ver >= 6)
561       return elk_dp_desc_msg_control(devinfo, desc);
562    else if (devinfo->verx10 >= 45)
563       return GET_BITS(desc, 10, 8);
564    else
565       return GET_BITS(desc, 11, 8);
566 }
567 
568 /**
569  * Construct a message descriptor immediate with the specified dataport write
570  * function controls.
571  */
572 static inline uint32_t
elk_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)573 elk_dp_write_desc(const struct intel_device_info *devinfo,
574                   unsigned binding_table_index,
575                   unsigned msg_control,
576                   unsigned msg_type,
577                   unsigned send_commit_msg)
578 {
579    assert(devinfo->ver <= 6 || !send_commit_msg);
580    if (devinfo->ver >= 6) {
581       return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
582              SET_BITS(send_commit_msg, 17, 17);
583    } else {
584       return (SET_BITS(binding_table_index, 7, 0) |
585               SET_BITS(msg_control, 11, 8) |
586               SET_BITS(msg_type, 14, 12) |
587               SET_BITS(send_commit_msg, 15, 15));
588    }
589 }
590 
591 static inline unsigned
elk_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)592 elk_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
593                            uint32_t desc)
594 {
595    if (devinfo->ver >= 6)
596       return elk_dp_desc_msg_type(devinfo, desc);
597    else
598       return GET_BITS(desc, 14, 12);
599 }
600 
601 static inline unsigned
elk_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)602 elk_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
603                               uint32_t desc)
604 {
605    if (devinfo->ver >= 6)
606       return elk_dp_desc_msg_control(devinfo, desc);
607    else
608       return GET_BITS(desc, 11, 8);
609 }
610 
611 static inline bool
elk_dp_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)612 elk_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
613                                uint32_t desc)
614 {
615    assert(devinfo->ver <= 6);
616    if (devinfo->ver >= 6)
617       return GET_BITS(desc, 17, 17);
618    else
619       return GET_BITS(desc, 15, 15);
620 }
621 
622 /**
623  * Construct a message descriptor immediate with the specified dataport
624  * surface function controls.
625  */
626 static inline uint32_t
elk_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)627 elk_dp_surface_desc(const struct intel_device_info *devinfo,
628                     unsigned msg_type,
629                     unsigned msg_control)
630 {
631    assert(devinfo->ver >= 7);
632    /* We'll OR in the binding table index later */
633    return elk_dp_desc(devinfo, 0, msg_type, msg_control);
634 }
635 
636 static inline uint32_t
elk_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)637 elk_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
638                            unsigned exec_size, /**< 0 for SIMD4x2 */
639                            unsigned atomic_op,
640                            bool response_expected)
641 {
642    assert(exec_size <= 8 || exec_size == 16);
643 
644    unsigned msg_type;
645    if (devinfo->verx10 >= 75) {
646       if (exec_size > 0) {
647          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
648       } else {
649          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
650       }
651    } else {
652       msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
653    }
654 
655    const unsigned msg_control =
656       SET_BITS(atomic_op, 3, 0) |
657       SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
658       SET_BITS(response_expected, 5, 5);
659 
660    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
661 }
662 
663 static inline unsigned
elk_mdc_cmask(unsigned num_channels)664 elk_mdc_cmask(unsigned num_channels)
665 {
666    /* See also MDC_CMASK in the SKL PRM Vol 2d. */
667    return 0xf & (0xf << num_channels);
668 }
669 
670 static inline unsigned
lsc_cmask(unsigned num_channels)671 lsc_cmask(unsigned num_channels)
672 {
673    assert(num_channels > 0 && num_channels <= 4);
674    return BITSET_MASK(num_channels);
675 }
676 
677 static inline uint32_t
elk_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)678 elk_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
679                                unsigned exec_size, /**< 0 for SIMD4x2 */
680                                unsigned num_channels,
681                                bool write)
682 {
683    assert(exec_size <= 8 || exec_size == 16);
684 
685    unsigned msg_type;
686    if (write) {
687       if (devinfo->verx10 >= 75) {
688          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
689       } else {
690          msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
691       }
692    } else {
693       /* Read */
694       if (devinfo->verx10 >= 75) {
695          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
696       } else {
697          msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
698       }
699    }
700 
701    /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
702    if (write && devinfo->verx10 == 70 && exec_size == 0)
703       exec_size = 8;
704 
705    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
706    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
707                               exec_size <= 8 ? 2 : 1;
708 
709    const unsigned msg_control =
710       SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
711       SET_BITS(simd_mode, 5, 4);
712 
713    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
714 }
715 
716 static inline unsigned
elk_mdc_ds(unsigned bit_size)717 elk_mdc_ds(unsigned bit_size)
718 {
719    switch (bit_size) {
720    case 8:
721       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
722    case 16:
723       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
724    case 32:
725       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
726    default:
727       unreachable("Unsupported bit_size for byte scattered messages");
728    }
729 }
730 
731 static inline uint32_t
elk_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)732 elk_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
733                               unsigned exec_size,
734                               unsigned bit_size,
735                               bool write)
736 {
737    assert(exec_size <= 8 || exec_size == 16);
738 
739    assert(devinfo->verx10 >= 75);
740    const unsigned msg_type =
741       write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
742               HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
743 
744    assert(exec_size > 0);
745    const unsigned msg_control =
746       SET_BITS(exec_size == 16, 0, 0) |
747       SET_BITS(elk_mdc_ds(bit_size), 3, 2);
748 
749    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
750 }
751 
752 static inline uint32_t
elk_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)753 elk_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
754                                unsigned exec_size,
755                                bool write)
756 {
757    assert(exec_size == 8 || exec_size == 16);
758 
759    unsigned msg_type;
760    if (write) {
761       if (devinfo->ver >= 6) {
762          msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
763       } else {
764          msg_type = ELK_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
765       }
766    } else {
767       if (devinfo->ver >= 7) {
768          msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
769       } else if (devinfo->verx10 >= 45) {
770          msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
771       } else {
772          msg_type = ELK_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
773       }
774    }
775 
776    const unsigned msg_control =
777       SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
778       SET_BITS(exec_size == 16, 0, 0);
779 
780    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
781 }
782 
783 static inline uint32_t
elk_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)784 elk_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
785                            bool align_16B,
786                            unsigned num_dwords,
787                            bool write)
788 {
789    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
790    assert(!write || align_16B);
791 
792    const unsigned msg_type =
793       write ?     GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
794       align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
795                   GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
796 
797    const unsigned msg_control =
798       SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
799 
800    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
801 }
802 
803 static inline uint32_t
elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)804 elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
805                                    unsigned exec_size, /**< 0 for SIMD4x2 */
806                                    unsigned num_channels,
807                                    bool write)
808 {
809    assert(exec_size <= 8 || exec_size == 16);
810    assert(devinfo->ver >= 8);
811 
812    unsigned msg_type =
813       write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
814               GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
815 
816    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
817    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
818                               exec_size <= 8 ? 2 : 1;
819 
820    const unsigned msg_control =
821       SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
822       SET_BITS(simd_mode, 5, 4);
823 
824    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
825                       msg_type, msg_control);
826 }
827 
828 static inline uint32_t
elk_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)829 elk_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
830                                bool align_16B,
831                                unsigned num_dwords,
832                                bool write)
833 {
834    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
835    assert(!write || align_16B);
836 
837    unsigned msg_type =
838       write ? GFX8_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
839               GFX8_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
840 
841    unsigned msg_control =
842       SET_BITS(!align_16B, 4, 3) |
843       SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
844 
845    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
846                       msg_type, msg_control);
847 }
848 
849 /**
850  * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
851  * Skylake PRM).
852  */
853 static inline uint32_t
elk_mdc_a64_ds(unsigned elems)854 elk_mdc_a64_ds(unsigned elems)
855 {
856    switch (elems) {
857    case 1:  return 0;
858    case 2:  return 1;
859    case 4:  return 2;
860    case 8:  return 3;
861    default:
862       unreachable("Unsupported elmeent count for A64 scattered message");
863    }
864 }
865 
866 static inline uint32_t
elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)867 elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
868                                   unsigned exec_size, /**< 0 for SIMD4x2 */
869                                   unsigned bit_size,
870                                   bool write)
871 {
872    assert(exec_size <= 8 || exec_size == 16);
873    assert(devinfo->ver >= 8);
874 
875    unsigned msg_type =
876       write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
877               GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
878 
879    const unsigned msg_control =
880       SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
881       SET_BITS(elk_mdc_a64_ds(bit_size / 8), 3, 2) |
882       SET_BITS(exec_size == 16, 4, 4);
883 
884    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
885                       msg_type, msg_control);
886 }
887 
888 static inline uint32_t
elk_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)889 elk_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
890                                ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
891                                unsigned bit_size,
892                                unsigned atomic_op,
893                                bool response_expected)
894 {
895    assert(exec_size == 8);
896    assert(devinfo->ver >= 8);
897    assert(bit_size == 32 || bit_size == 64);
898 
899    const unsigned msg_type = GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
900 
901    const unsigned msg_control =
902       SET_BITS(atomic_op, 3, 0) |
903       SET_BITS(bit_size == 64, 4, 4) |
904       SET_BITS(response_expected, 5, 5);
905 
906    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
907                       msg_type, msg_control);
908 }
909 
910 static inline uint32_t
elk_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)911 elk_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
912                          unsigned exec_size,
913                          unsigned exec_group,
914                          unsigned atomic_op,
915                          bool response_expected)
916 {
917    assert(exec_size > 0 || exec_group == 0);
918    assert(exec_group % 8 == 0);
919 
920    unsigned msg_type;
921    if (devinfo->verx10 >= 75) {
922       if (exec_size == 0) {
923          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
924       } else {
925          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
926       }
927    } else {
928       /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
929       assert(exec_size > 0);
930       msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
931    }
932 
933    const bool high_sample_mask = (exec_group / 8) % 2 == 1;
934 
935    const unsigned msg_control =
936       SET_BITS(atomic_op, 3, 0) |
937       SET_BITS(high_sample_mask, 4, 4) |
938       SET_BITS(response_expected, 5, 5);
939 
940    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
941 }
942 
943 static inline uint32_t
elk_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)944 elk_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
945                              unsigned exec_size,
946                              unsigned exec_group,
947                              unsigned num_channels,
948                              bool write)
949 {
950    assert(exec_size > 0 || exec_group == 0);
951    assert(exec_group % 8 == 0);
952 
953    /* Typed surface reads and writes don't support SIMD16 */
954    assert(exec_size <= 8);
955 
956    unsigned msg_type;
957    if (write) {
958       if (devinfo->verx10 >= 75) {
959          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
960       } else {
961          msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
962       }
963    } else {
964       if (devinfo->verx10 >= 75) {
965          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
966       } else {
967          msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
968       }
969    }
970 
971    /* See also MDC_SG3 in the SKL PRM Vol 2d. */
972    unsigned msg_control;
973    if (devinfo->verx10 >= 75) {
974       /* See also MDC_SG3 in the SKL PRM Vol 2d. */
975       const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
976                                   1 + ((exec_group / 8) % 2);
977 
978       msg_control =
979          SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
980          SET_BITS(slot_group, 5, 4);
981    } else {
982       /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
983       assert(exec_size > 0);
984       const unsigned slot_group = ((exec_group / 8) % 2);
985 
986       msg_control =
987          SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
988          SET_BITS(slot_group, 5, 5);
989    }
990 
991    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
992 }
993 
994 static inline uint32_t
elk_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)995 elk_fb_desc(const struct intel_device_info *devinfo,
996             unsigned binding_table_index,
997             unsigned msg_type,
998             unsigned msg_control)
999 {
1000    /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1001     * helpers instead.
1002     */
1003    assert(devinfo->ver >= 6);
1004    const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1005    if (devinfo->ver >= 7) {
1006       return (desc | SET_BITS(msg_control, 13, 8) |
1007               SET_BITS(msg_type, 17, 14));
1008    } else {
1009       return (desc | SET_BITS(msg_control, 12, 8) |
1010               SET_BITS(msg_type, 16, 13));
1011    }
1012 }
1013 
1014 static inline unsigned
elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1015 elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1016                                 uint32_t desc)
1017 {
1018    return GET_BITS(desc, 7, 0);
1019 }
1020 
1021 static inline uint32_t
elk_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1022 elk_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1023 {
1024    assert(devinfo->ver >= 6);
1025    if (devinfo->ver >= 7)
1026       return GET_BITS(desc, 13, 8);
1027    else
1028       return GET_BITS(desc, 12, 8);
1029 }
1030 
1031 static inline unsigned
elk_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1032 elk_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1033 {
1034    assert(devinfo->ver >= 6);
1035    if (devinfo->ver >= 7)
1036       return GET_BITS(desc, 17, 14);
1037    else
1038       return GET_BITS(desc, 16, 13);
1039 }
1040 
1041 static inline uint32_t
elk_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)1042 elk_fb_write_desc(const struct intel_device_info *devinfo,
1043                   unsigned binding_table_index,
1044                   unsigned msg_control,
1045                   bool last_render_target,
1046                   bool coarse_write)
1047 {
1048    const unsigned msg_type =
1049       devinfo->ver >= 6 ?
1050       GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1051       ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1052 
1053    assert(!coarse_write);
1054 
1055    if (devinfo->ver >= 6) {
1056       return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1057              SET_BITS(last_render_target, 12, 12) |
1058              SET_BITS(coarse_write, 18, 18);
1059    } else {
1060       return (SET_BITS(binding_table_index, 7, 0) |
1061               SET_BITS(msg_control, 11, 8) |
1062               SET_BITS(last_render_target, 11, 11) |
1063               SET_BITS(msg_type, 14, 12));
1064    }
1065 }
1066 
1067 static inline unsigned
elk_fb_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1068 elk_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1069                            uint32_t desc)
1070 {
1071    if (devinfo->ver >= 6)
1072       return elk_fb_desc_msg_type(devinfo, desc);
1073    else
1074       return GET_BITS(desc, 14, 12);
1075 }
1076 
1077 static inline unsigned
elk_fb_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1078 elk_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1079                               uint32_t desc)
1080 {
1081    if (devinfo->ver >= 6)
1082       return elk_fb_desc_msg_control(devinfo, desc);
1083    else
1084       return GET_BITS(desc, 11, 8);
1085 }
1086 
1087 static inline bool
elk_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)1088 elk_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1089                                      uint32_t desc)
1090 {
1091    if (devinfo->ver >= 6)
1092       return GET_BITS(desc, 12, 12);
1093    else
1094       return GET_BITS(desc, 11, 11);
1095 }
1096 
1097 static inline bool
elk_fb_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)1098 elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1099                                uint32_t desc)
1100 {
1101    assert(devinfo->ver <= 6);
1102    if (devinfo->ver >= 6)
1103       return GET_BITS(desc, 17, 17);
1104    else
1105       return GET_BITS(desc, 15, 15);
1106 }
1107 
1108 static inline bool
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)1109 elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
1110 {
1111    return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1112 }
1113 
1114 static inline bool
elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)1115 elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)
1116 {
1117    return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1118 }
1119 
1120 static inline bool
elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)1121 elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)
1122 {
1123    return opcode == LSC_OP_STORE ||
1124           opcode == LSC_OP_STORE_CMASK;
1125 }
1126 
1127 static inline bool
elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)1128 elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)
1129 {
1130    switch (opcode) {
1131    case LSC_OP_ATOMIC_INC:
1132    case LSC_OP_ATOMIC_DEC:
1133    case LSC_OP_ATOMIC_LOAD:
1134    case LSC_OP_ATOMIC_STORE:
1135    case LSC_OP_ATOMIC_ADD:
1136    case LSC_OP_ATOMIC_SUB:
1137    case LSC_OP_ATOMIC_MIN:
1138    case LSC_OP_ATOMIC_MAX:
1139    case LSC_OP_ATOMIC_UMIN:
1140    case LSC_OP_ATOMIC_UMAX:
1141    case LSC_OP_ATOMIC_CMPXCHG:
1142    case LSC_OP_ATOMIC_FADD:
1143    case LSC_OP_ATOMIC_FSUB:
1144    case LSC_OP_ATOMIC_FMIN:
1145    case LSC_OP_ATOMIC_FMAX:
1146    case LSC_OP_ATOMIC_FCMPXCHG:
1147    case LSC_OP_ATOMIC_AND:
1148    case LSC_OP_ATOMIC_OR:
1149    case LSC_OP_ATOMIC_XOR:
1150       return true;
1151 
1152    default:
1153       return false;
1154    }
1155 }
1156 
1157 static inline bool
elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)1158 elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)
1159 {
1160    switch (opcode) {
1161    case LSC_OP_ATOMIC_FADD:
1162    case LSC_OP_ATOMIC_FSUB:
1163    case LSC_OP_ATOMIC_FMIN:
1164    case LSC_OP_ATOMIC_FMAX:
1165    case LSC_OP_ATOMIC_FCMPXCHG:
1166       return true;
1167 
1168    default:
1169       return false;
1170    }
1171 }
1172 
1173 static inline unsigned
lsc_op_num_data_values(unsigned _op)1174 lsc_op_num_data_values(unsigned _op)
1175 {
1176    enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1177 
1178    switch (op) {
1179    case LSC_OP_ATOMIC_CMPXCHG:
1180    case LSC_OP_ATOMIC_FCMPXCHG:
1181       return 2;
1182    case LSC_OP_ATOMIC_INC:
1183    case LSC_OP_ATOMIC_DEC:
1184    case LSC_OP_LOAD:
1185    case LSC_OP_LOAD_CMASK:
1186    case LSC_OP_FENCE:
1187       /* XXX: actually check docs */
1188       return 0;
1189    default:
1190       return 1;
1191    }
1192 }
1193 
1194 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1195 lsc_op_to_legacy_atomic(unsigned _op)
1196 {
1197    enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1198 
1199    switch (op) {
1200    case LSC_OP_ATOMIC_INC:
1201       return ELK_AOP_INC;
1202    case LSC_OP_ATOMIC_DEC:
1203       return ELK_AOP_DEC;
1204    case LSC_OP_ATOMIC_STORE:
1205       return ELK_AOP_MOV;
1206    case LSC_OP_ATOMIC_ADD:
1207       return ELK_AOP_ADD;
1208    case LSC_OP_ATOMIC_SUB:
1209       return ELK_AOP_SUB;
1210    case LSC_OP_ATOMIC_MIN:
1211       return ELK_AOP_IMIN;
1212    case LSC_OP_ATOMIC_MAX:
1213       return ELK_AOP_IMAX;
1214    case LSC_OP_ATOMIC_UMIN:
1215       return ELK_AOP_UMIN;
1216    case LSC_OP_ATOMIC_UMAX:
1217       return ELK_AOP_UMAX;
1218    case LSC_OP_ATOMIC_CMPXCHG:
1219       return ELK_AOP_CMPWR;
1220    case LSC_OP_ATOMIC_FADD:
1221       return ELK_AOP_FADD;
1222    case LSC_OP_ATOMIC_FMIN:
1223       return ELK_AOP_FMIN;
1224    case LSC_OP_ATOMIC_FMAX:
1225       return ELK_AOP_FMAX;
1226    case LSC_OP_ATOMIC_FCMPXCHG:
1227       return ELK_AOP_FCMPWR;
1228    case LSC_OP_ATOMIC_AND:
1229       return ELK_AOP_AND;
1230    case LSC_OP_ATOMIC_OR:
1231       return ELK_AOP_OR;
1232    case LSC_OP_ATOMIC_XOR:
1233       return ELK_AOP_XOR;
1234    /* No LSC op maps to ELK_AOP_PREDEC */
1235    case LSC_OP_ATOMIC_LOAD:
1236    case LSC_OP_ATOMIC_FSUB:
1237       unreachable("no corresponding legacy atomic operation");
1238    case LSC_OP_LOAD:
1239    case LSC_OP_LOAD_CMASK:
1240    case LSC_OP_STORE:
1241    case LSC_OP_STORE_CMASK:
1242    case LSC_OP_FENCE:
1243       unreachable("not an atomic op");
1244    }
1245 
1246    unreachable("invalid LSC op");
1247 }
1248 
1249 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1250 lsc_data_size_bytes(enum lsc_data_size data_size)
1251 {
1252    switch (data_size) {
1253    case LSC_DATA_SIZE_D8:
1254       return 1;
1255    case LSC_DATA_SIZE_D16:
1256       return 2;
1257    case LSC_DATA_SIZE_D32:
1258    case LSC_DATA_SIZE_D8U32:
1259    case LSC_DATA_SIZE_D16U32:
1260    case LSC_DATA_SIZE_D16BF32:
1261       return 4;
1262    case LSC_DATA_SIZE_D64:
1263       return 8;
1264    default:
1265       unreachable("Unsupported data payload size.");
1266    }
1267 }
1268 
1269 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1270 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1271 {
1272    switch (addr_size) {
1273    case LSC_ADDR_SIZE_A16: return 2;
1274    case LSC_ADDR_SIZE_A32: return 4;
1275    case LSC_ADDR_SIZE_A64: return 8;
1276    default:
1277       unreachable("Unsupported address size.");
1278    }
1279 }
1280 
1281 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1282 lsc_vector_length(enum lsc_vect_size vect_size)
1283 {
1284    switch (vect_size) {
1285    case LSC_VECT_SIZE_V1: return 1;
1286    case LSC_VECT_SIZE_V2: return 2;
1287    case LSC_VECT_SIZE_V3: return 3;
1288    case LSC_VECT_SIZE_V4: return 4;
1289    case LSC_VECT_SIZE_V8: return 8;
1290    case LSC_VECT_SIZE_V16: return 16;
1291    case LSC_VECT_SIZE_V32: return 32;
1292    case LSC_VECT_SIZE_V64: return 64;
1293    default:
1294       unreachable("Unsupported size of vector");
1295    }
1296 }
1297 
1298 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1299 lsc_vect_size(unsigned vect_size)
1300 {
1301    switch(vect_size) {
1302    case 1:  return LSC_VECT_SIZE_V1;
1303    case 2:  return LSC_VECT_SIZE_V2;
1304    case 3:  return LSC_VECT_SIZE_V3;
1305    case 4:  return LSC_VECT_SIZE_V4;
1306    case 8:  return LSC_VECT_SIZE_V8;
1307    case 16: return LSC_VECT_SIZE_V16;
1308    case 32: return LSC_VECT_SIZE_V32;
1309    case 64: return LSC_VECT_SIZE_V64;
1310    default:
1311       unreachable("Unsupported vector size for dataport");
1312    }
1313 }
1314 
1315 static inline uint32_t
lsc_msg_desc_wcmask(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest,unsigned cmask)1316 lsc_msg_desc_wcmask(UNUSED const struct intel_device_info *devinfo,
1317              enum elk_lsc_opcode opcode, unsigned simd_size,
1318              enum lsc_addr_surface_type addr_type,
1319              enum lsc_addr_size addr_sz, unsigned num_coordinates,
1320              enum lsc_data_size data_sz, unsigned num_channels,
1321              bool transpose, unsigned cache_ctrl, bool has_dest, unsigned cmask)
1322 {
1323    assert(devinfo->has_lsc);
1324 
1325    unsigned dest_length = !has_dest ? 0 :
1326       DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1327                    reg_unit(devinfo) * REG_SIZE);
1328 
1329    unsigned src0_length =
1330       DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1331                    reg_unit(devinfo) * REG_SIZE);
1332 
1333    assert(!transpose || elk_lsc_opcode_has_transpose(opcode));
1334 
1335    unsigned msg_desc =
1336       SET_BITS(opcode, 5, 0) |
1337       SET_BITS(addr_sz, 8, 7) |
1338       SET_BITS(data_sz, 11, 9) |
1339       SET_BITS(transpose, 15, 15) |
1340       SET_BITS(cache_ctrl, 19, 17) |
1341       SET_BITS(dest_length, 24, 20) |
1342       SET_BITS(src0_length, 28, 25) |
1343       SET_BITS(addr_type, 30, 29);
1344 
1345    if (elk_lsc_opcode_has_cmask(opcode))
1346       msg_desc |= SET_BITS(cmask ? cmask : lsc_cmask(num_channels), 15, 12);
1347    else
1348       msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1349 
1350    return msg_desc;
1351 }
1352 
1353 static inline uint32_t
lsc_msg_desc(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest)1354 lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1355              enum elk_lsc_opcode opcode, unsigned simd_size,
1356              enum lsc_addr_surface_type addr_type,
1357              enum lsc_addr_size addr_sz, unsigned num_coordinates,
1358              enum lsc_data_size data_sz, unsigned num_channels,
1359              bool transpose, unsigned cache_ctrl, bool has_dest)
1360 {
1361    return lsc_msg_desc_wcmask(devinfo, opcode, simd_size, addr_type, addr_sz,
1362          num_coordinates, data_sz, num_channels, transpose, cache_ctrl,
1363          has_dest, 0);
1364 }
1365 
1366 static inline enum elk_lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1367 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1368                     uint32_t desc)
1369 {
1370    assert(devinfo->has_lsc);
1371    return (enum elk_lsc_opcode) GET_BITS(desc, 5, 0);
1372 }
1373 
1374 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1375 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1376                        uint32_t desc)
1377 {
1378    assert(devinfo->has_lsc);
1379    return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1380 }
1381 
1382 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1383 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1384                        uint32_t desc)
1385 {
1386    assert(devinfo->has_lsc);
1387    return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1388 }
1389 
1390 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1391 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1392                        uint32_t desc)
1393 {
1394    assert(devinfo->has_lsc);
1395    assert(!elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1396    return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1397 }
1398 
1399 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1400 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1401                    uint32_t desc)
1402 {
1403    assert(devinfo->has_lsc);
1404    assert(elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1405    return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1406 }
1407 
1408 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1409 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1410                        uint32_t desc)
1411 {
1412    assert(devinfo->has_lsc);
1413    return GET_BITS(desc, 15, 15);
1414 }
1415 
1416 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1417 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1418                         uint32_t desc)
1419 {
1420    assert(devinfo->has_lsc);
1421    return GET_BITS(desc, 19, 17);
1422 }
1423 
1424 static inline unsigned
lsc_msg_desc_dest_len(const struct intel_device_info * devinfo,uint32_t desc)1425 lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1426                       uint32_t desc)
1427 {
1428    assert(devinfo->has_lsc);
1429    return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
1430 }
1431 
1432 static inline unsigned
lsc_msg_desc_src0_len(const struct intel_device_info * devinfo,uint32_t desc)1433 lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1434                       uint32_t desc)
1435 {
1436    assert(devinfo->has_lsc);
1437    return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
1438 }
1439 
1440 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1441 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1442                        uint32_t desc)
1443 {
1444    assert(devinfo->has_lsc);
1445    return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1446 }
1447 
1448 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1449 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1450                    enum lsc_fence_scope scope,
1451                    enum lsc_flush_type flush_type,
1452                    bool route_to_lsc)
1453 {
1454    assert(devinfo->has_lsc);
1455    return SET_BITS(LSC_OP_FENCE, 5, 0) |
1456           SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1457           SET_BITS(scope, 11, 9) |
1458           SET_BITS(flush_type, 14, 12) |
1459           SET_BITS(route_to_lsc, 18, 18) |
1460           SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1461 }
1462 
1463 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1464 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1465                          uint32_t desc)
1466 {
1467    assert(devinfo->has_lsc);
1468    return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1469 }
1470 
1471 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1472 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1473                               uint32_t desc)
1474 {
1475    assert(devinfo->has_lsc);
1476    return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1477 }
1478 
1479 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1480 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1481                                   uint32_t desc)
1482 {
1483    assert(devinfo->has_lsc);
1484    return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1485 }
1486 
1487 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1488 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1489 {
1490    assert(devinfo->has_lsc);
1491    return SET_BITS(bti, 31, 24) |
1492           SET_BITS(0, 23, 12);  /* base offset */
1493 }
1494 
1495 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1496 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1497                             uint32_t ex_desc)
1498 {
1499    assert(devinfo->has_lsc);
1500    return GET_BITS(ex_desc, 23, 12);
1501 }
1502 
1503 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1504 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1505                       uint32_t ex_desc)
1506 {
1507    assert(devinfo->has_lsc);
1508    return GET_BITS(ex_desc, 31, 24);
1509 }
1510 
1511 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1512 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1513                              uint32_t ex_desc)
1514 {
1515    assert(devinfo->has_lsc);
1516    return GET_BITS(ex_desc, 31, 12);
1517 }
1518 
1519 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1520 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1521                 unsigned surface_state_index)
1522 {
1523    assert(devinfo->has_lsc);
1524    return SET_BITS(surface_state_index, 31, 6);
1525 }
1526 
1527 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1528 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1529                       uint32_t ex_desc)
1530 {
1531    assert(devinfo->has_lsc);
1532    return GET_BITS(ex_desc, 31, 6);
1533 }
1534 
1535 static inline uint32_t
elk_mdc_sm2(unsigned exec_size)1536 elk_mdc_sm2(unsigned exec_size)
1537 {
1538    assert(exec_size == 8 || exec_size == 16);
1539    return exec_size > 8;
1540 }
1541 
1542 static inline uint32_t
elk_mdc_sm2_exec_size(uint32_t sm2)1543 elk_mdc_sm2_exec_size(uint32_t sm2)
1544 {
1545    assert(sm2 <= 1);
1546    return 8 << sm2;
1547 }
1548 
1549 static inline uint32_t
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1550 elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1551                        uint32_t desc)
1552 {
1553    return GET_BITS(desc, 17, 14);
1554 }
1555 
1556 static inline uint32_t
elk_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1557 elk_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1558                         uint32_t desc)
1559 {
1560    return elk_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1561 }
1562 
1563 /**
1564  * Construct a message descriptor immediate with the specified pixel
1565  * interpolator function controls.
1566  */
1567 static inline uint32_t
elk_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1568 elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1569                       unsigned msg_type,
1570                       bool noperspective,
1571                       bool coarse_pixel_rate,
1572                       unsigned exec_size,
1573                       unsigned group)
1574 {
1575    assert(exec_size == 8 || exec_size == 16);
1576    const bool simd_mode = exec_size == 16;
1577    const bool slot_group = group >= 16;
1578 
1579    assert(!coarse_pixel_rate);
1580    return (SET_BITS(slot_group, 11, 11) |
1581            SET_BITS(msg_type, 13, 12) |
1582            SET_BITS(!!noperspective, 14, 14) |
1583            SET_BITS(coarse_pixel_rate, 15, 15) |
1584            SET_BITS(simd_mode, 16, 16));
1585 }
1586 
1587 void elk_urb_WRITE(struct elk_codegen *p,
1588 		   struct elk_reg dest,
1589 		   unsigned msg_reg_nr,
1590 		   struct elk_reg src0,
1591                    enum elk_urb_write_flags flags,
1592 		   unsigned msg_length,
1593 		   unsigned response_length,
1594 		   unsigned offset,
1595 		   unsigned swizzle);
1596 
1597 /**
1598  * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1599  * desc.  If \p desc is not an immediate it will be transparently loaded to an
1600  * address register using an OR instruction.
1601  */
1602 void
1603 elk_send_indirect_message(struct elk_codegen *p,
1604                           unsigned sfid,
1605                           struct elk_reg dst,
1606                           struct elk_reg payload,
1607                           struct elk_reg desc,
1608                           unsigned desc_imm,
1609                           bool eot);
1610 
1611 void
1612 elk_send_indirect_split_message(struct elk_codegen *p,
1613                                 unsigned sfid,
1614                                 struct elk_reg dst,
1615                                 struct elk_reg payload0,
1616                                 struct elk_reg payload1,
1617                                 struct elk_reg desc,
1618                                 unsigned desc_imm,
1619                                 struct elk_reg ex_desc,
1620                                 unsigned ex_desc_imm,
1621                                 bool ex_desc_scratch,
1622                                 bool ex_bso,
1623                                 bool eot);
1624 
1625 void elk_ff_sync(struct elk_codegen *p,
1626 		   struct elk_reg dest,
1627 		   unsigned msg_reg_nr,
1628 		   struct elk_reg src0,
1629 		   bool allocate,
1630 		   unsigned response_length,
1631 		   bool eot);
1632 
1633 void elk_svb_write(struct elk_codegen *p,
1634                    struct elk_reg dest,
1635                    unsigned msg_reg_nr,
1636                    struct elk_reg src0,
1637                    unsigned binding_table_index,
1638                    bool   send_commit_msg);
1639 
1640 elk_inst *elk_fb_WRITE(struct elk_codegen *p,
1641                        struct elk_reg payload,
1642                        struct elk_reg implied_header,
1643                        unsigned msg_control,
1644                        unsigned binding_table_index,
1645                        unsigned msg_length,
1646                        unsigned response_length,
1647                        bool eot,
1648                        bool last_render_target,
1649                        bool header_present);
1650 
1651 void elk_SAMPLE(struct elk_codegen *p,
1652 		struct elk_reg dest,
1653 		unsigned msg_reg_nr,
1654 		struct elk_reg src0,
1655 		unsigned binding_table_index,
1656 		unsigned sampler,
1657 		unsigned msg_type,
1658 		unsigned response_length,
1659 		unsigned msg_length,
1660 		unsigned header_present,
1661 		unsigned simd_mode,
1662 		unsigned return_format);
1663 
1664 void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
1665                                       struct elk_reg header,
1666                                       struct elk_reg sampler_index);
1667 
1668 void elk_gfx4_math(struct elk_codegen *p,
1669 	       struct elk_reg dest,
1670 	       unsigned function,
1671 	       unsigned msg_reg_nr,
1672 	       struct elk_reg src,
1673 	       unsigned precision );
1674 
1675 void elk_gfx6_math(struct elk_codegen *p,
1676 	       struct elk_reg dest,
1677 	       unsigned function,
1678 	       struct elk_reg src0,
1679 	       struct elk_reg src1);
1680 
1681 void elk_oword_block_read(struct elk_codegen *p,
1682 			  struct elk_reg dest,
1683 			  struct elk_reg mrf,
1684 			  uint32_t offset,
1685 			  uint32_t bind_table_index);
1686 
1687 unsigned elk_scratch_surface_idx(const struct elk_codegen *p);
1688 
1689 void elk_oword_block_read_scratch(struct elk_codegen *p,
1690 				  struct elk_reg dest,
1691 				  struct elk_reg mrf,
1692 				  int num_regs,
1693 				  unsigned offset);
1694 
1695 void elk_oword_block_write_scratch(struct elk_codegen *p,
1696 				   struct elk_reg mrf,
1697 				   int num_regs,
1698 				   unsigned offset);
1699 
1700 void elk_gfx7_block_read_scratch(struct elk_codegen *p,
1701                              struct elk_reg dest,
1702                              int num_regs,
1703                              unsigned offset);
1704 
1705 /**
1706  * Return the generation-specific jump distance scaling factor.
1707  *
1708  * Given the number of instructions to jump, we need to scale by
1709  * some number to obtain the actual jump distance to program in an
1710  * instruction.
1711  */
1712 static inline unsigned
elk_jump_scale(const struct intel_device_info * devinfo)1713 elk_jump_scale(const struct intel_device_info *devinfo)
1714 {
1715    /* Broadwell measures jump targets in bytes. */
1716    if (devinfo->ver >= 8)
1717       return 16;
1718 
1719    /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1720     * (to support compaction), so each 128-bit instruction requires 2 chunks.
1721     */
1722    if (devinfo->ver >= 5)
1723       return 2;
1724 
1725    /* Gfx4 simply uses the number of 128-bit instructions. */
1726    return 1;
1727 }
1728 
1729 void elk_barrier(struct elk_codegen *p, struct elk_reg src);
1730 
1731 /* If/else/endif.  Works by manipulating the execution flags on each
1732  * channel.
1733  */
1734 elk_inst *elk_IF(struct elk_codegen *p, unsigned execute_size);
1735 elk_inst *elk_gfx6_IF(struct elk_codegen *p, enum elk_conditional_mod conditional,
1736                   struct elk_reg src0, struct elk_reg src1);
1737 
1738 void elk_ELSE(struct elk_codegen *p);
1739 void elk_ENDIF(struct elk_codegen *p);
1740 
1741 /* DO/WHILE loops:
1742  */
1743 elk_inst *elk_DO(struct elk_codegen *p, unsigned execute_size);
1744 
1745 elk_inst *elk_WHILE(struct elk_codegen *p);
1746 
1747 elk_inst *elk_BREAK(struct elk_codegen *p);
1748 elk_inst *elk_CONT(struct elk_codegen *p);
1749 elk_inst *elk_HALT(struct elk_codegen *p);
1750 
1751 /* Forward jumps:
1752  */
1753 void elk_land_fwd_jump(struct elk_codegen *p, int jmp_insn_idx);
1754 
1755 elk_inst *elk_JMPI(struct elk_codegen *p, struct elk_reg index,
1756                    unsigned predicate_control);
1757 
1758 void elk_NOP(struct elk_codegen *p);
1759 
1760 void elk_WAIT(struct elk_codegen *p);
1761 
1762 /* Special case: there is never a destination, execution size will be
1763  * taken from src0:
1764  */
1765 void elk_CMP(struct elk_codegen *p,
1766 	     struct elk_reg dest,
1767 	     unsigned conditional,
1768 	     struct elk_reg src0,
1769 	     struct elk_reg src1);
1770 
1771 void elk_CMPN(struct elk_codegen *p,
1772               struct elk_reg dest,
1773               unsigned conditional,
1774               struct elk_reg src0,
1775               struct elk_reg src1);
1776 
1777 void
1778 elk_untyped_atomic(struct elk_codegen *p,
1779                    struct elk_reg dst,
1780                    struct elk_reg payload,
1781                    struct elk_reg surface,
1782                    unsigned atomic_op,
1783                    unsigned msg_length,
1784                    bool response_expected,
1785                    bool header_present);
1786 
1787 void
1788 elk_untyped_surface_read(struct elk_codegen *p,
1789                          struct elk_reg dst,
1790                          struct elk_reg payload,
1791                          struct elk_reg surface,
1792                          unsigned msg_length,
1793                          unsigned num_channels);
1794 
1795 void
1796 elk_untyped_surface_write(struct elk_codegen *p,
1797                           struct elk_reg payload,
1798                           struct elk_reg surface,
1799                           unsigned msg_length,
1800                           unsigned num_channels,
1801                           bool header_present);
1802 
1803 void
1804 elk_memory_fence(struct elk_codegen *p,
1805                  struct elk_reg dst,
1806                  struct elk_reg src,
1807                  enum elk_opcode send_op,
1808                  enum elk_message_target sfid,
1809                  uint32_t desc,
1810                  bool commit_enable,
1811                  unsigned bti);
1812 
1813 void
1814 elk_pixel_interpolator_query(struct elk_codegen *p,
1815                              struct elk_reg dest,
1816                              struct elk_reg mrf,
1817                              bool noperspective,
1818                              bool coarse_pixel_rate,
1819                              unsigned mode,
1820                              struct elk_reg data,
1821                              unsigned msg_length,
1822                              unsigned response_length);
1823 
1824 void
1825 elk_find_live_channel(struct elk_codegen *p,
1826                       struct elk_reg dst,
1827                       bool last);
1828 
1829 void
1830 elk_broadcast(struct elk_codegen *p,
1831               struct elk_reg dst,
1832               struct elk_reg src,
1833               struct elk_reg idx);
1834 
1835 void
1836 elk_float_controls_mode(struct elk_codegen *p,
1837                         unsigned mode, unsigned mask);
1838 
1839 void
1840 elk_update_reloc_imm(const struct elk_isa_info *isa,
1841                      elk_inst *inst,
1842                      uint32_t value);
1843 
1844 void
1845 elk_MOV_reloc_imm(struct elk_codegen *p,
1846                   struct elk_reg dst,
1847                   enum elk_reg_type src_type,
1848                   uint32_t id);
1849 
1850 unsigned
1851 elk_num_sources_from_inst(const struct elk_isa_info *isa,
1852                           const elk_inst *inst);
1853 
1854 /***********************************************************************
1855  * elk_eu_util.c:
1856  */
1857 
1858 void elk_copy_indirect_to_indirect(struct elk_codegen *p,
1859 				   struct elk_indirect dst_ptr,
1860 				   struct elk_indirect src_ptr,
1861 				   unsigned count);
1862 
1863 void elk_copy_from_indirect(struct elk_codegen *p,
1864 			    struct elk_reg dst,
1865 			    struct elk_indirect ptr,
1866 			    unsigned count);
1867 
1868 void elk_copy4(struct elk_codegen *p,
1869 	       struct elk_reg dst,
1870 	       struct elk_reg src,
1871 	       unsigned count);
1872 
1873 void elk_copy8(struct elk_codegen *p,
1874 	       struct elk_reg dst,
1875 	       struct elk_reg src,
1876 	       unsigned count);
1877 
1878 void elk_math_invert( struct elk_codegen *p,
1879 		      struct elk_reg dst,
1880 		      struct elk_reg src);
1881 
1882 void elk_set_src1(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
1883 
1884 void elk_set_desc_ex(struct elk_codegen *p, elk_inst *insn,
1885                      unsigned desc, unsigned ex_desc);
1886 
1887 static inline void
elk_set_desc(struct elk_codegen * p,elk_inst * insn,unsigned desc)1888 elk_set_desc(struct elk_codegen *p, elk_inst *insn, unsigned desc)
1889 {
1890    elk_set_desc_ex(p, insn, desc, 0);
1891 }
1892 
1893 void elk_set_uip_jip(struct elk_codegen *p, int start_offset);
1894 
1895 enum elk_conditional_mod elk_negate_cmod(enum elk_conditional_mod cmod);
1896 enum elk_conditional_mod elk_swap_cmod(enum elk_conditional_mod cmod);
1897 
1898 /* elk_eu_compact.c */
1899 void elk_compact_instructions(struct elk_codegen *p, int start_offset,
1900                               struct elk_disasm_info *disasm);
1901 void elk_uncompact_instruction(const struct elk_isa_info *isa,
1902                                elk_inst *dst, elk_compact_inst *src);
1903 bool elk_try_compact_instruction(const struct elk_isa_info *isa,
1904                                  elk_compact_inst *dst, const elk_inst *src);
1905 
1906 void elk_debug_compact_uncompact(const struct elk_isa_info *isa,
1907                                  elk_inst *orig, elk_inst *uncompacted);
1908 
1909 /* elk_eu_validate.c */
1910 bool elk_validate_instruction(const struct elk_isa_info *isa,
1911                               const elk_inst *inst, int offset,
1912                               unsigned inst_size,
1913                               struct elk_disasm_info *disasm);
1914 bool elk_validate_instructions(const struct elk_isa_info *isa,
1915                                const void *assembly, int start_offset, int end_offset,
1916                                struct elk_disasm_info *disasm);
1917 
1918 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1919 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1920 {
1921    elk_inst *insn = (elk_inst *)((char *)store + offset);
1922 
1923    if (elk_inst_cmpt_control(devinfo, insn))
1924       return offset + 8;
1925    else
1926       return offset + 16;
1927 }
1928 
1929 /** Maximum SEND message length */
1930 #define ELK_MAX_MSG_LENGTH 15
1931 
1932 /** First MRF register used by pull loads */
1933 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1934 
1935 /** First MRF register used by spills */
1936 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1937 
1938 #ifdef __cplusplus
1939 }
1940 #endif
1941 
1942 #endif
1943