1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32
33 #ifndef ELK_EU_H
34 #define ELK_EU_H
35
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include "elk_inst.h"
39 #include "elk_compiler.h"
40 #include "elk_eu_defines.h"
41 #include "elk_isa_info.h"
42 #include "elk_reg.h"
43
44 #include "util/bitset.h"
45
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49
50 struct elk_disasm_info;
51
52 #define ELK_EU_MAX_INSN_STACK 5
53
54 struct elk_insn_state {
55 /* One of ELK_EXECUTE_* */
56 unsigned exec_size:3;
57
58 /* Group in units of channels */
59 unsigned group:5;
60
61 /* Compression control on gfx4-5 */
62 bool compressed:1;
63
64 /* One of ELK_MASK_* */
65 unsigned mask_control:1;
66
67 bool saturate:1;
68
69 /* One of ELK_ALIGN_* */
70 unsigned access_mode:1;
71
72 /* One of ELK_PREDICATE_* */
73 enum elk_predicate predicate:4;
74
75 bool pred_inv:1;
76
77 /* Flag subreg. Bottom bit is subreg, top bit is reg */
78 unsigned flag_subreg:2;
79
80 bool acc_wr_control:1;
81 };
82
83
84 /* A helper for accessing the last instruction emitted. This makes it easy
85 * to set various bits on an instruction without having to create temporary
86 * variable and assign the emitted instruction to those.
87 */
88 #define elk_last_inst (&p->store[p->nr_insn - 1])
89
90 struct elk_codegen {
91 elk_inst *store;
92 int store_size;
93 unsigned nr_insn;
94 unsigned int next_insn_offset;
95
96 void *mem_ctx;
97
98 /* Allow clients to push/pop instruction state:
99 */
100 struct elk_insn_state stack[ELK_EU_MAX_INSN_STACK];
101 struct elk_insn_state *current;
102
103 /** Whether or not the user wants automatic exec sizes
104 *
105 * If true, codegen will try to automatically infer the exec size of an
106 * instruction from the width of the destination register. If false, it
107 * will take whatever is set by elk_set_default_exec_size verbatim.
108 *
109 * This is set to true by default in elk_init_codegen.
110 */
111 bool automatic_exec_sizes;
112
113 bool single_program_flow;
114 const struct elk_isa_info *isa;
115 const struct intel_device_info *devinfo;
116
117 /* Control flow stacks:
118 * - if_stack contains IF and ELSE instructions which must be patched
119 * (and popped) once the matching ENDIF instruction is encountered.
120 *
121 * Just store the instruction pointer(an index).
122 */
123 int *if_stack;
124 int if_stack_depth;
125 int if_stack_array_size;
126
127 /**
128 * loop_stack contains the instruction pointers of the starts of loops which
129 * must be patched (and popped) once the matching WHILE instruction is
130 * encountered.
131 */
132 int *loop_stack;
133 /**
134 * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
135 * blocks they were popping out of, to fix up the mask stack. This tracks
136 * the IF/ENDIF nesting in each current nested loop level.
137 */
138 int *if_depth_in_loop;
139 int loop_stack_depth;
140 int loop_stack_array_size;
141
142 struct elk_shader_reloc *relocs;
143 int num_relocs;
144 int reloc_array_size;
145 };
146
147 struct elk_label {
148 int offset;
149 int number;
150 struct elk_label *next;
151 };
152
153 void elk_pop_insn_state( struct elk_codegen *p );
154 void elk_push_insn_state( struct elk_codegen *p );
155 unsigned elk_get_default_exec_size(struct elk_codegen *p);
156 unsigned elk_get_default_group(struct elk_codegen *p);
157 unsigned elk_get_default_access_mode(struct elk_codegen *p);
158 void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
159 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
160 void elk_set_default_saturate( struct elk_codegen *p, bool enable );
161 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode );
162 void elk_inst_set_compression(const struct intel_device_info *devinfo,
163 elk_inst *inst, bool on);
164 void elk_set_default_compression(struct elk_codegen *p, bool on);
165 void elk_inst_set_group(const struct intel_device_info *devinfo,
166 elk_inst *inst, unsigned group);
167 void elk_set_default_group(struct elk_codegen *p, unsigned group);
168 void elk_set_default_compression_control(struct elk_codegen *p, enum elk_compression c);
169 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc);
170 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
171 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
172 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
173
174 void elk_init_codegen(const struct elk_isa_info *isa,
175 struct elk_codegen *p, void *mem_ctx);
176 bool elk_has_jip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
177 bool elk_has_uip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
178 const struct elk_shader_reloc *elk_get_shader_relocs(struct elk_codegen *p,
179 unsigned *num_relocs);
180 const unsigned *elk_get_program( struct elk_codegen *p, unsigned *sz );
181
182 bool elk_should_dump_shader_bin(void);
183 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
184 const char *identifier);
185
186 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
187 const char *identifier);
188
189 void elk_realign(struct elk_codegen *p, unsigned alignment);
190 int elk_append_data(struct elk_codegen *p, void *data,
191 unsigned size, unsigned alignment);
192 elk_inst *elk_next_insn(struct elk_codegen *p, unsigned opcode);
193 void elk_add_reloc(struct elk_codegen *p, uint32_t id,
194 enum elk_shader_reloc_type type,
195 uint32_t offset, uint32_t delta);
196 void elk_set_dest(struct elk_codegen *p, elk_inst *insn, struct elk_reg dest);
197 void elk_set_src0(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
198
199 void elk_gfx6_resolve_implied_move(struct elk_codegen *p,
200 struct elk_reg *src,
201 unsigned msg_reg_nr);
202
203 /* Helpers for regular instructions:
204 */
205 #define ALU1(OP) \
206 elk_inst *elk_##OP(struct elk_codegen *p, \
207 struct elk_reg dest, \
208 struct elk_reg src0);
209
210 #define ALU2(OP) \
211 elk_inst *elk_##OP(struct elk_codegen *p, \
212 struct elk_reg dest, \
213 struct elk_reg src0, \
214 struct elk_reg src1);
215
216 #define ALU3(OP) \
217 elk_inst *elk_##OP(struct elk_codegen *p, \
218 struct elk_reg dest, \
219 struct elk_reg src0, \
220 struct elk_reg src1, \
221 struct elk_reg src2);
222
223 ALU1(MOV)
ALU2(SEL)224 ALU2(SEL)
225 ALU1(NOT)
226 ALU2(AND)
227 ALU2(OR)
228 ALU2(XOR)
229 ALU2(SHR)
230 ALU2(SHL)
231 ALU1(DIM)
232 ALU2(ASR)
233 ALU2(ROL)
234 ALU2(ROR)
235 ALU3(CSEL)
236 ALU1(F32TO16)
237 ALU1(F16TO32)
238 ALU2(ADD)
239 ALU2(AVG)
240 ALU2(MUL)
241 ALU1(FRC)
242 ALU1(RNDD)
243 ALU1(RNDE)
244 ALU1(RNDU)
245 ALU1(RNDZ)
246 ALU2(MAC)
247 ALU2(MACH)
248 ALU1(LZD)
249 ALU2(DP4)
250 ALU2(DPH)
251 ALU2(DP3)
252 ALU2(DP2)
253 ALU2(LINE)
254 ALU2(PLN)
255 ALU3(MAD)
256 ALU3(LRP)
257 ALU1(BFREV)
258 ALU3(BFE)
259 ALU2(BFI1)
260 ALU3(BFI2)
261 ALU1(FBH)
262 ALU1(FBL)
263 ALU1(CBIT)
264 ALU2(ADDC)
265 ALU2(SUBB)
266
267 #undef ALU1
268 #undef ALU2
269 #undef ALU3
270
271 static inline unsigned
272 reg_unit(const struct intel_device_info *devinfo)
273 {
274 return 1;
275 }
276
277
278 /* Helpers for SEND instruction:
279 */
280
281 /**
282 * Construct a message descriptor immediate with the specified common
283 * descriptor controls.
284 */
285 static inline uint32_t
elk_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)286 elk_message_desc(const struct intel_device_info *devinfo,
287 unsigned msg_length,
288 unsigned response_length,
289 bool header_present)
290 {
291 if (devinfo->ver >= 5) {
292 assert(msg_length % reg_unit(devinfo) == 0);
293 assert(response_length % reg_unit(devinfo) == 0);
294 return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
295 SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
296 SET_BITS(header_present, 19, 19));
297 } else {
298 return (SET_BITS(msg_length, 23, 20) |
299 SET_BITS(response_length, 19, 16));
300 }
301 }
302
303 static inline unsigned
elk_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)304 elk_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
305 {
306 if (devinfo->ver >= 5)
307 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
308 else
309 return GET_BITS(desc, 23, 20);
310 }
311
312 static inline unsigned
elk_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)313 elk_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
314 {
315 if (devinfo->ver >= 5)
316 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
317 else
318 return GET_BITS(desc, 19, 16);
319 }
320
321 static inline bool
elk_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)322 elk_message_desc_header_present(ASSERTED
323 const struct intel_device_info *devinfo,
324 uint32_t desc)
325 {
326 assert(devinfo->ver >= 5);
327 return GET_BITS(desc, 19, 19);
328 }
329
330 static inline unsigned
elk_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)331 elk_message_ex_desc(const struct intel_device_info *devinfo,
332 unsigned ex_msg_length)
333 {
334 assert(ex_msg_length % reg_unit(devinfo) == 0);
335 return SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
336 }
337
338 static inline unsigned
elk_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)339 elk_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
340 uint32_t ex_desc)
341 {
342 return GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
343 }
344
345 static inline uint32_t
elk_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)346 elk_urb_desc(const struct intel_device_info *devinfo,
347 unsigned msg_type,
348 bool per_slot_offset_present,
349 bool channel_mask_present,
350 unsigned global_offset)
351 {
352 if (devinfo->ver >= 8) {
353 return (SET_BITS(per_slot_offset_present, 17, 17) |
354 SET_BITS(channel_mask_present, 15, 15) |
355 SET_BITS(global_offset, 14, 4) |
356 SET_BITS(msg_type, 3, 0));
357 } else if (devinfo->ver >= 7) {
358 assert(!channel_mask_present);
359 return (SET_BITS(per_slot_offset_present, 16, 16) |
360 SET_BITS(global_offset, 13, 3) |
361 SET_BITS(msg_type, 3, 0));
362 } else {
363 unreachable("unhandled URB write generation");
364 }
365 }
366
367 static inline uint32_t
elk_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)368 elk_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
369 uint32_t desc)
370 {
371 assert(devinfo->ver >= 7);
372 return GET_BITS(desc, 3, 0);
373 }
374
375 /**
376 * Construct a message descriptor immediate with the specified sampler
377 * function controls.
378 */
379 static inline uint32_t
elk_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)380 elk_sampler_desc(const struct intel_device_info *devinfo,
381 unsigned binding_table_index,
382 unsigned sampler,
383 unsigned msg_type,
384 unsigned simd_mode,
385 unsigned return_format)
386 {
387 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
388 SET_BITS(sampler, 11, 8));
389
390 /* From the CHV Bspec: Shared Functions - Message Descriptor -
391 * Sampling Engine:
392 *
393 * SIMD Mode[2] 29 This field is the upper bit of the 3-bit
394 * SIMD Mode field.
395 */
396 if (devinfo->ver >= 8)
397 return desc | SET_BITS(msg_type, 16, 12) |
398 SET_BITS(simd_mode & 0x3, 18, 17) |
399 SET_BITS(simd_mode >> 2, 29, 29) |
400 SET_BITS(return_format, 30, 30);
401 if (devinfo->ver >= 7)
402 return (desc | SET_BITS(msg_type, 16, 12) |
403 SET_BITS(simd_mode, 18, 17));
404 else if (devinfo->ver >= 5)
405 return (desc | SET_BITS(msg_type, 15, 12) |
406 SET_BITS(simd_mode, 17, 16));
407 else if (devinfo->verx10 >= 45)
408 return desc | SET_BITS(msg_type, 15, 12);
409 else
410 return (desc | SET_BITS(return_format, 13, 12) |
411 SET_BITS(msg_type, 15, 14));
412 }
413
414 static inline unsigned
elk_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)415 elk_sampler_desc_binding_table_index(UNUSED
416 const struct intel_device_info *devinfo,
417 uint32_t desc)
418 {
419 return GET_BITS(desc, 7, 0);
420 }
421
422 static inline unsigned
elk_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)423 elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
424 uint32_t desc)
425 {
426 return GET_BITS(desc, 11, 8);
427 }
428
429 static inline unsigned
elk_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)430 elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
431 {
432 if (devinfo->ver >= 7)
433 return GET_BITS(desc, 16, 12);
434 else if (devinfo->verx10 >= 45)
435 return GET_BITS(desc, 15, 12);
436 else
437 return GET_BITS(desc, 15, 14);
438 }
439
440 static inline unsigned
elk_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)441 elk_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
442 uint32_t desc)
443 {
444 assert(devinfo->ver >= 5);
445 if (devinfo->ver >= 8)
446 return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
447 else if (devinfo->ver >= 7)
448 return GET_BITS(desc, 18, 17);
449 else
450 return GET_BITS(desc, 17, 16);
451 }
452
453 static inline unsigned
elk_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)454 elk_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
455 uint32_t desc)
456 {
457 assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
458 if (devinfo->ver >= 8)
459 return GET_BITS(desc, 30, 30);
460 else
461 return GET_BITS(desc, 13, 12);
462 }
463
464 /**
465 * Construct a message descriptor for the dataport
466 */
467 static inline uint32_t
elk_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)468 elk_dp_desc(const struct intel_device_info *devinfo,
469 unsigned binding_table_index,
470 unsigned msg_type,
471 unsigned msg_control)
472 {
473 /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
474 * helpers instead.
475 */
476 assert(devinfo->ver >= 6);
477 const unsigned desc = SET_BITS(binding_table_index, 7, 0);
478 if (devinfo->ver >= 8) {
479 return (desc | SET_BITS(msg_control, 13, 8) |
480 SET_BITS(msg_type, 18, 14));
481 } else if (devinfo->ver >= 7) {
482 return (desc | SET_BITS(msg_control, 13, 8) |
483 SET_BITS(msg_type, 17, 14));
484 } else {
485 return (desc | SET_BITS(msg_control, 12, 8) |
486 SET_BITS(msg_type, 16, 13));
487 }
488 }
489
490 static inline unsigned
elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)491 elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
492 uint32_t desc)
493 {
494 return GET_BITS(desc, 7, 0);
495 }
496
497 static inline unsigned
elk_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)498 elk_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
499 {
500 assert(devinfo->ver >= 6);
501 if (devinfo->ver >= 8)
502 return GET_BITS(desc, 18, 14);
503 else if (devinfo->ver >= 7)
504 return GET_BITS(desc, 17, 14);
505 else
506 return GET_BITS(desc, 16, 13);
507 }
508
509 static inline unsigned
elk_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)510 elk_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
511 {
512 assert(devinfo->ver >= 6);
513 if (devinfo->ver >= 7)
514 return GET_BITS(desc, 13, 8);
515 else
516 return GET_BITS(desc, 12, 8);
517 }
518
519 /**
520 * Construct a message descriptor immediate with the specified dataport read
521 * function controls.
522 */
523 static inline uint32_t
elk_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)524 elk_dp_read_desc(const struct intel_device_info *devinfo,
525 unsigned binding_table_index,
526 unsigned msg_control,
527 unsigned msg_type,
528 unsigned target_cache)
529 {
530 if (devinfo->ver >= 6)
531 return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
532 else if (devinfo->verx10 >= 45)
533 return (SET_BITS(binding_table_index, 7, 0) |
534 SET_BITS(msg_control, 10, 8) |
535 SET_BITS(msg_type, 13, 11) |
536 SET_BITS(target_cache, 15, 14));
537 else
538 return (SET_BITS(binding_table_index, 7, 0) |
539 SET_BITS(msg_control, 11, 8) |
540 SET_BITS(msg_type, 13, 12) |
541 SET_BITS(target_cache, 15, 14));
542 }
543
544 static inline unsigned
elk_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)545 elk_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
546 uint32_t desc)
547 {
548 if (devinfo->ver >= 6)
549 return elk_dp_desc_msg_type(devinfo, desc);
550 else if (devinfo->verx10 >= 45)
551 return GET_BITS(desc, 13, 11);
552 else
553 return GET_BITS(desc, 13, 12);
554 }
555
556 static inline unsigned
elk_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)557 elk_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
558 uint32_t desc)
559 {
560 if (devinfo->ver >= 6)
561 return elk_dp_desc_msg_control(devinfo, desc);
562 else if (devinfo->verx10 >= 45)
563 return GET_BITS(desc, 10, 8);
564 else
565 return GET_BITS(desc, 11, 8);
566 }
567
568 /**
569 * Construct a message descriptor immediate with the specified dataport write
570 * function controls.
571 */
572 static inline uint32_t
elk_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)573 elk_dp_write_desc(const struct intel_device_info *devinfo,
574 unsigned binding_table_index,
575 unsigned msg_control,
576 unsigned msg_type,
577 unsigned send_commit_msg)
578 {
579 assert(devinfo->ver <= 6 || !send_commit_msg);
580 if (devinfo->ver >= 6) {
581 return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
582 SET_BITS(send_commit_msg, 17, 17);
583 } else {
584 return (SET_BITS(binding_table_index, 7, 0) |
585 SET_BITS(msg_control, 11, 8) |
586 SET_BITS(msg_type, 14, 12) |
587 SET_BITS(send_commit_msg, 15, 15));
588 }
589 }
590
591 static inline unsigned
elk_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)592 elk_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
593 uint32_t desc)
594 {
595 if (devinfo->ver >= 6)
596 return elk_dp_desc_msg_type(devinfo, desc);
597 else
598 return GET_BITS(desc, 14, 12);
599 }
600
601 static inline unsigned
elk_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)602 elk_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
603 uint32_t desc)
604 {
605 if (devinfo->ver >= 6)
606 return elk_dp_desc_msg_control(devinfo, desc);
607 else
608 return GET_BITS(desc, 11, 8);
609 }
610
611 static inline bool
elk_dp_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)612 elk_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
613 uint32_t desc)
614 {
615 assert(devinfo->ver <= 6);
616 if (devinfo->ver >= 6)
617 return GET_BITS(desc, 17, 17);
618 else
619 return GET_BITS(desc, 15, 15);
620 }
621
622 /**
623 * Construct a message descriptor immediate with the specified dataport
624 * surface function controls.
625 */
626 static inline uint32_t
elk_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)627 elk_dp_surface_desc(const struct intel_device_info *devinfo,
628 unsigned msg_type,
629 unsigned msg_control)
630 {
631 assert(devinfo->ver >= 7);
632 /* We'll OR in the binding table index later */
633 return elk_dp_desc(devinfo, 0, msg_type, msg_control);
634 }
635
636 static inline uint32_t
elk_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)637 elk_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
638 unsigned exec_size, /**< 0 for SIMD4x2 */
639 unsigned atomic_op,
640 bool response_expected)
641 {
642 assert(exec_size <= 8 || exec_size == 16);
643
644 unsigned msg_type;
645 if (devinfo->verx10 >= 75) {
646 if (exec_size > 0) {
647 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
648 } else {
649 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
650 }
651 } else {
652 msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
653 }
654
655 const unsigned msg_control =
656 SET_BITS(atomic_op, 3, 0) |
657 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
658 SET_BITS(response_expected, 5, 5);
659
660 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
661 }
662
663 static inline unsigned
elk_mdc_cmask(unsigned num_channels)664 elk_mdc_cmask(unsigned num_channels)
665 {
666 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
667 return 0xf & (0xf << num_channels);
668 }
669
670 static inline unsigned
lsc_cmask(unsigned num_channels)671 lsc_cmask(unsigned num_channels)
672 {
673 assert(num_channels > 0 && num_channels <= 4);
674 return BITSET_MASK(num_channels);
675 }
676
677 static inline uint32_t
elk_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)678 elk_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
679 unsigned exec_size, /**< 0 for SIMD4x2 */
680 unsigned num_channels,
681 bool write)
682 {
683 assert(exec_size <= 8 || exec_size == 16);
684
685 unsigned msg_type;
686 if (write) {
687 if (devinfo->verx10 >= 75) {
688 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
689 } else {
690 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
691 }
692 } else {
693 /* Read */
694 if (devinfo->verx10 >= 75) {
695 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
696 } else {
697 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
698 }
699 }
700
701 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
702 if (write && devinfo->verx10 == 70 && exec_size == 0)
703 exec_size = 8;
704
705 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
706 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
707 exec_size <= 8 ? 2 : 1;
708
709 const unsigned msg_control =
710 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
711 SET_BITS(simd_mode, 5, 4);
712
713 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
714 }
715
716 static inline unsigned
elk_mdc_ds(unsigned bit_size)717 elk_mdc_ds(unsigned bit_size)
718 {
719 switch (bit_size) {
720 case 8:
721 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
722 case 16:
723 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
724 case 32:
725 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
726 default:
727 unreachable("Unsupported bit_size for byte scattered messages");
728 }
729 }
730
731 static inline uint32_t
elk_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)732 elk_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
733 unsigned exec_size,
734 unsigned bit_size,
735 bool write)
736 {
737 assert(exec_size <= 8 || exec_size == 16);
738
739 assert(devinfo->verx10 >= 75);
740 const unsigned msg_type =
741 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
742 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
743
744 assert(exec_size > 0);
745 const unsigned msg_control =
746 SET_BITS(exec_size == 16, 0, 0) |
747 SET_BITS(elk_mdc_ds(bit_size), 3, 2);
748
749 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
750 }
751
752 static inline uint32_t
elk_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)753 elk_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
754 unsigned exec_size,
755 bool write)
756 {
757 assert(exec_size == 8 || exec_size == 16);
758
759 unsigned msg_type;
760 if (write) {
761 if (devinfo->ver >= 6) {
762 msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
763 } else {
764 msg_type = ELK_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
765 }
766 } else {
767 if (devinfo->ver >= 7) {
768 msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
769 } else if (devinfo->verx10 >= 45) {
770 msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
771 } else {
772 msg_type = ELK_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
773 }
774 }
775
776 const unsigned msg_control =
777 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
778 SET_BITS(exec_size == 16, 0, 0);
779
780 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
781 }
782
783 static inline uint32_t
elk_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)784 elk_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
785 bool align_16B,
786 unsigned num_dwords,
787 bool write)
788 {
789 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
790 assert(!write || align_16B);
791
792 const unsigned msg_type =
793 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
794 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
795 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
796
797 const unsigned msg_control =
798 SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
799
800 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
801 }
802
803 static inline uint32_t
elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)804 elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
805 unsigned exec_size, /**< 0 for SIMD4x2 */
806 unsigned num_channels,
807 bool write)
808 {
809 assert(exec_size <= 8 || exec_size == 16);
810 assert(devinfo->ver >= 8);
811
812 unsigned msg_type =
813 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
814 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
815
816 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
817 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
818 exec_size <= 8 ? 2 : 1;
819
820 const unsigned msg_control =
821 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
822 SET_BITS(simd_mode, 5, 4);
823
824 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
825 msg_type, msg_control);
826 }
827
828 static inline uint32_t
elk_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)829 elk_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
830 bool align_16B,
831 unsigned num_dwords,
832 bool write)
833 {
834 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
835 assert(!write || align_16B);
836
837 unsigned msg_type =
838 write ? GFX8_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
839 GFX8_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
840
841 unsigned msg_control =
842 SET_BITS(!align_16B, 4, 3) |
843 SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
844
845 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
846 msg_type, msg_control);
847 }
848
849 /**
850 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
851 * Skylake PRM).
852 */
853 static inline uint32_t
elk_mdc_a64_ds(unsigned elems)854 elk_mdc_a64_ds(unsigned elems)
855 {
856 switch (elems) {
857 case 1: return 0;
858 case 2: return 1;
859 case 4: return 2;
860 case 8: return 3;
861 default:
862 unreachable("Unsupported elmeent count for A64 scattered message");
863 }
864 }
865
866 static inline uint32_t
elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)867 elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
868 unsigned exec_size, /**< 0 for SIMD4x2 */
869 unsigned bit_size,
870 bool write)
871 {
872 assert(exec_size <= 8 || exec_size == 16);
873 assert(devinfo->ver >= 8);
874
875 unsigned msg_type =
876 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
877 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
878
879 const unsigned msg_control =
880 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
881 SET_BITS(elk_mdc_a64_ds(bit_size / 8), 3, 2) |
882 SET_BITS(exec_size == 16, 4, 4);
883
884 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
885 msg_type, msg_control);
886 }
887
888 static inline uint32_t
elk_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)889 elk_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
890 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
891 unsigned bit_size,
892 unsigned atomic_op,
893 bool response_expected)
894 {
895 assert(exec_size == 8);
896 assert(devinfo->ver >= 8);
897 assert(bit_size == 32 || bit_size == 64);
898
899 const unsigned msg_type = GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
900
901 const unsigned msg_control =
902 SET_BITS(atomic_op, 3, 0) |
903 SET_BITS(bit_size == 64, 4, 4) |
904 SET_BITS(response_expected, 5, 5);
905
906 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
907 msg_type, msg_control);
908 }
909
910 static inline uint32_t
elk_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)911 elk_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
912 unsigned exec_size,
913 unsigned exec_group,
914 unsigned atomic_op,
915 bool response_expected)
916 {
917 assert(exec_size > 0 || exec_group == 0);
918 assert(exec_group % 8 == 0);
919
920 unsigned msg_type;
921 if (devinfo->verx10 >= 75) {
922 if (exec_size == 0) {
923 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
924 } else {
925 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
926 }
927 } else {
928 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
929 assert(exec_size > 0);
930 msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
931 }
932
933 const bool high_sample_mask = (exec_group / 8) % 2 == 1;
934
935 const unsigned msg_control =
936 SET_BITS(atomic_op, 3, 0) |
937 SET_BITS(high_sample_mask, 4, 4) |
938 SET_BITS(response_expected, 5, 5);
939
940 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
941 }
942
943 static inline uint32_t
elk_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)944 elk_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
945 unsigned exec_size,
946 unsigned exec_group,
947 unsigned num_channels,
948 bool write)
949 {
950 assert(exec_size > 0 || exec_group == 0);
951 assert(exec_group % 8 == 0);
952
953 /* Typed surface reads and writes don't support SIMD16 */
954 assert(exec_size <= 8);
955
956 unsigned msg_type;
957 if (write) {
958 if (devinfo->verx10 >= 75) {
959 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
960 } else {
961 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
962 }
963 } else {
964 if (devinfo->verx10 >= 75) {
965 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
966 } else {
967 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
968 }
969 }
970
971 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
972 unsigned msg_control;
973 if (devinfo->verx10 >= 75) {
974 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
975 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
976 1 + ((exec_group / 8) % 2);
977
978 msg_control =
979 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
980 SET_BITS(slot_group, 5, 4);
981 } else {
982 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
983 assert(exec_size > 0);
984 const unsigned slot_group = ((exec_group / 8) % 2);
985
986 msg_control =
987 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
988 SET_BITS(slot_group, 5, 5);
989 }
990
991 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
992 }
993
994 static inline uint32_t
elk_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)995 elk_fb_desc(const struct intel_device_info *devinfo,
996 unsigned binding_table_index,
997 unsigned msg_type,
998 unsigned msg_control)
999 {
1000 /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1001 * helpers instead.
1002 */
1003 assert(devinfo->ver >= 6);
1004 const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1005 if (devinfo->ver >= 7) {
1006 return (desc | SET_BITS(msg_control, 13, 8) |
1007 SET_BITS(msg_type, 17, 14));
1008 } else {
1009 return (desc | SET_BITS(msg_control, 12, 8) |
1010 SET_BITS(msg_type, 16, 13));
1011 }
1012 }
1013
1014 static inline unsigned
elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1015 elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1016 uint32_t desc)
1017 {
1018 return GET_BITS(desc, 7, 0);
1019 }
1020
1021 static inline uint32_t
elk_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1022 elk_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1023 {
1024 assert(devinfo->ver >= 6);
1025 if (devinfo->ver >= 7)
1026 return GET_BITS(desc, 13, 8);
1027 else
1028 return GET_BITS(desc, 12, 8);
1029 }
1030
1031 static inline unsigned
elk_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1032 elk_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1033 {
1034 assert(devinfo->ver >= 6);
1035 if (devinfo->ver >= 7)
1036 return GET_BITS(desc, 17, 14);
1037 else
1038 return GET_BITS(desc, 16, 13);
1039 }
1040
1041 static inline uint32_t
elk_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)1042 elk_fb_write_desc(const struct intel_device_info *devinfo,
1043 unsigned binding_table_index,
1044 unsigned msg_control,
1045 bool last_render_target,
1046 bool coarse_write)
1047 {
1048 const unsigned msg_type =
1049 devinfo->ver >= 6 ?
1050 GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1051 ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1052
1053 assert(!coarse_write);
1054
1055 if (devinfo->ver >= 6) {
1056 return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1057 SET_BITS(last_render_target, 12, 12) |
1058 SET_BITS(coarse_write, 18, 18);
1059 } else {
1060 return (SET_BITS(binding_table_index, 7, 0) |
1061 SET_BITS(msg_control, 11, 8) |
1062 SET_BITS(last_render_target, 11, 11) |
1063 SET_BITS(msg_type, 14, 12));
1064 }
1065 }
1066
1067 static inline unsigned
elk_fb_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1068 elk_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1069 uint32_t desc)
1070 {
1071 if (devinfo->ver >= 6)
1072 return elk_fb_desc_msg_type(devinfo, desc);
1073 else
1074 return GET_BITS(desc, 14, 12);
1075 }
1076
1077 static inline unsigned
elk_fb_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1078 elk_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1079 uint32_t desc)
1080 {
1081 if (devinfo->ver >= 6)
1082 return elk_fb_desc_msg_control(devinfo, desc);
1083 else
1084 return GET_BITS(desc, 11, 8);
1085 }
1086
1087 static inline bool
elk_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)1088 elk_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1089 uint32_t desc)
1090 {
1091 if (devinfo->ver >= 6)
1092 return GET_BITS(desc, 12, 12);
1093 else
1094 return GET_BITS(desc, 11, 11);
1095 }
1096
1097 static inline bool
elk_fb_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)1098 elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1099 uint32_t desc)
1100 {
1101 assert(devinfo->ver <= 6);
1102 if (devinfo->ver >= 6)
1103 return GET_BITS(desc, 17, 17);
1104 else
1105 return GET_BITS(desc, 15, 15);
1106 }
1107
1108 static inline bool
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)1109 elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
1110 {
1111 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1112 }
1113
1114 static inline bool
elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)1115 elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)
1116 {
1117 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1118 }
1119
1120 static inline bool
elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)1121 elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)
1122 {
1123 return opcode == LSC_OP_STORE ||
1124 opcode == LSC_OP_STORE_CMASK;
1125 }
1126
1127 static inline bool
elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)1128 elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)
1129 {
1130 switch (opcode) {
1131 case LSC_OP_ATOMIC_INC:
1132 case LSC_OP_ATOMIC_DEC:
1133 case LSC_OP_ATOMIC_LOAD:
1134 case LSC_OP_ATOMIC_STORE:
1135 case LSC_OP_ATOMIC_ADD:
1136 case LSC_OP_ATOMIC_SUB:
1137 case LSC_OP_ATOMIC_MIN:
1138 case LSC_OP_ATOMIC_MAX:
1139 case LSC_OP_ATOMIC_UMIN:
1140 case LSC_OP_ATOMIC_UMAX:
1141 case LSC_OP_ATOMIC_CMPXCHG:
1142 case LSC_OP_ATOMIC_FADD:
1143 case LSC_OP_ATOMIC_FSUB:
1144 case LSC_OP_ATOMIC_FMIN:
1145 case LSC_OP_ATOMIC_FMAX:
1146 case LSC_OP_ATOMIC_FCMPXCHG:
1147 case LSC_OP_ATOMIC_AND:
1148 case LSC_OP_ATOMIC_OR:
1149 case LSC_OP_ATOMIC_XOR:
1150 return true;
1151
1152 default:
1153 return false;
1154 }
1155 }
1156
1157 static inline bool
elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)1158 elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)
1159 {
1160 switch (opcode) {
1161 case LSC_OP_ATOMIC_FADD:
1162 case LSC_OP_ATOMIC_FSUB:
1163 case LSC_OP_ATOMIC_FMIN:
1164 case LSC_OP_ATOMIC_FMAX:
1165 case LSC_OP_ATOMIC_FCMPXCHG:
1166 return true;
1167
1168 default:
1169 return false;
1170 }
1171 }
1172
1173 static inline unsigned
lsc_op_num_data_values(unsigned _op)1174 lsc_op_num_data_values(unsigned _op)
1175 {
1176 enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1177
1178 switch (op) {
1179 case LSC_OP_ATOMIC_CMPXCHG:
1180 case LSC_OP_ATOMIC_FCMPXCHG:
1181 return 2;
1182 case LSC_OP_ATOMIC_INC:
1183 case LSC_OP_ATOMIC_DEC:
1184 case LSC_OP_LOAD:
1185 case LSC_OP_LOAD_CMASK:
1186 case LSC_OP_FENCE:
1187 /* XXX: actually check docs */
1188 return 0;
1189 default:
1190 return 1;
1191 }
1192 }
1193
1194 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1195 lsc_op_to_legacy_atomic(unsigned _op)
1196 {
1197 enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1198
1199 switch (op) {
1200 case LSC_OP_ATOMIC_INC:
1201 return ELK_AOP_INC;
1202 case LSC_OP_ATOMIC_DEC:
1203 return ELK_AOP_DEC;
1204 case LSC_OP_ATOMIC_STORE:
1205 return ELK_AOP_MOV;
1206 case LSC_OP_ATOMIC_ADD:
1207 return ELK_AOP_ADD;
1208 case LSC_OP_ATOMIC_SUB:
1209 return ELK_AOP_SUB;
1210 case LSC_OP_ATOMIC_MIN:
1211 return ELK_AOP_IMIN;
1212 case LSC_OP_ATOMIC_MAX:
1213 return ELK_AOP_IMAX;
1214 case LSC_OP_ATOMIC_UMIN:
1215 return ELK_AOP_UMIN;
1216 case LSC_OP_ATOMIC_UMAX:
1217 return ELK_AOP_UMAX;
1218 case LSC_OP_ATOMIC_CMPXCHG:
1219 return ELK_AOP_CMPWR;
1220 case LSC_OP_ATOMIC_FADD:
1221 return ELK_AOP_FADD;
1222 case LSC_OP_ATOMIC_FMIN:
1223 return ELK_AOP_FMIN;
1224 case LSC_OP_ATOMIC_FMAX:
1225 return ELK_AOP_FMAX;
1226 case LSC_OP_ATOMIC_FCMPXCHG:
1227 return ELK_AOP_FCMPWR;
1228 case LSC_OP_ATOMIC_AND:
1229 return ELK_AOP_AND;
1230 case LSC_OP_ATOMIC_OR:
1231 return ELK_AOP_OR;
1232 case LSC_OP_ATOMIC_XOR:
1233 return ELK_AOP_XOR;
1234 /* No LSC op maps to ELK_AOP_PREDEC */
1235 case LSC_OP_ATOMIC_LOAD:
1236 case LSC_OP_ATOMIC_FSUB:
1237 unreachable("no corresponding legacy atomic operation");
1238 case LSC_OP_LOAD:
1239 case LSC_OP_LOAD_CMASK:
1240 case LSC_OP_STORE:
1241 case LSC_OP_STORE_CMASK:
1242 case LSC_OP_FENCE:
1243 unreachable("not an atomic op");
1244 }
1245
1246 unreachable("invalid LSC op");
1247 }
1248
1249 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1250 lsc_data_size_bytes(enum lsc_data_size data_size)
1251 {
1252 switch (data_size) {
1253 case LSC_DATA_SIZE_D8:
1254 return 1;
1255 case LSC_DATA_SIZE_D16:
1256 return 2;
1257 case LSC_DATA_SIZE_D32:
1258 case LSC_DATA_SIZE_D8U32:
1259 case LSC_DATA_SIZE_D16U32:
1260 case LSC_DATA_SIZE_D16BF32:
1261 return 4;
1262 case LSC_DATA_SIZE_D64:
1263 return 8;
1264 default:
1265 unreachable("Unsupported data payload size.");
1266 }
1267 }
1268
1269 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1270 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1271 {
1272 switch (addr_size) {
1273 case LSC_ADDR_SIZE_A16: return 2;
1274 case LSC_ADDR_SIZE_A32: return 4;
1275 case LSC_ADDR_SIZE_A64: return 8;
1276 default:
1277 unreachable("Unsupported address size.");
1278 }
1279 }
1280
1281 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1282 lsc_vector_length(enum lsc_vect_size vect_size)
1283 {
1284 switch (vect_size) {
1285 case LSC_VECT_SIZE_V1: return 1;
1286 case LSC_VECT_SIZE_V2: return 2;
1287 case LSC_VECT_SIZE_V3: return 3;
1288 case LSC_VECT_SIZE_V4: return 4;
1289 case LSC_VECT_SIZE_V8: return 8;
1290 case LSC_VECT_SIZE_V16: return 16;
1291 case LSC_VECT_SIZE_V32: return 32;
1292 case LSC_VECT_SIZE_V64: return 64;
1293 default:
1294 unreachable("Unsupported size of vector");
1295 }
1296 }
1297
1298 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1299 lsc_vect_size(unsigned vect_size)
1300 {
1301 switch(vect_size) {
1302 case 1: return LSC_VECT_SIZE_V1;
1303 case 2: return LSC_VECT_SIZE_V2;
1304 case 3: return LSC_VECT_SIZE_V3;
1305 case 4: return LSC_VECT_SIZE_V4;
1306 case 8: return LSC_VECT_SIZE_V8;
1307 case 16: return LSC_VECT_SIZE_V16;
1308 case 32: return LSC_VECT_SIZE_V32;
1309 case 64: return LSC_VECT_SIZE_V64;
1310 default:
1311 unreachable("Unsupported vector size for dataport");
1312 }
1313 }
1314
1315 static inline uint32_t
lsc_msg_desc_wcmask(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest,unsigned cmask)1316 lsc_msg_desc_wcmask(UNUSED const struct intel_device_info *devinfo,
1317 enum elk_lsc_opcode opcode, unsigned simd_size,
1318 enum lsc_addr_surface_type addr_type,
1319 enum lsc_addr_size addr_sz, unsigned num_coordinates,
1320 enum lsc_data_size data_sz, unsigned num_channels,
1321 bool transpose, unsigned cache_ctrl, bool has_dest, unsigned cmask)
1322 {
1323 assert(devinfo->has_lsc);
1324
1325 unsigned dest_length = !has_dest ? 0 :
1326 DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1327 reg_unit(devinfo) * REG_SIZE);
1328
1329 unsigned src0_length =
1330 DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1331 reg_unit(devinfo) * REG_SIZE);
1332
1333 assert(!transpose || elk_lsc_opcode_has_transpose(opcode));
1334
1335 unsigned msg_desc =
1336 SET_BITS(opcode, 5, 0) |
1337 SET_BITS(addr_sz, 8, 7) |
1338 SET_BITS(data_sz, 11, 9) |
1339 SET_BITS(transpose, 15, 15) |
1340 SET_BITS(cache_ctrl, 19, 17) |
1341 SET_BITS(dest_length, 24, 20) |
1342 SET_BITS(src0_length, 28, 25) |
1343 SET_BITS(addr_type, 30, 29);
1344
1345 if (elk_lsc_opcode_has_cmask(opcode))
1346 msg_desc |= SET_BITS(cmask ? cmask : lsc_cmask(num_channels), 15, 12);
1347 else
1348 msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1349
1350 return msg_desc;
1351 }
1352
1353 static inline uint32_t
lsc_msg_desc(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest)1354 lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1355 enum elk_lsc_opcode opcode, unsigned simd_size,
1356 enum lsc_addr_surface_type addr_type,
1357 enum lsc_addr_size addr_sz, unsigned num_coordinates,
1358 enum lsc_data_size data_sz, unsigned num_channels,
1359 bool transpose, unsigned cache_ctrl, bool has_dest)
1360 {
1361 return lsc_msg_desc_wcmask(devinfo, opcode, simd_size, addr_type, addr_sz,
1362 num_coordinates, data_sz, num_channels, transpose, cache_ctrl,
1363 has_dest, 0);
1364 }
1365
1366 static inline enum elk_lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1367 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1368 uint32_t desc)
1369 {
1370 assert(devinfo->has_lsc);
1371 return (enum elk_lsc_opcode) GET_BITS(desc, 5, 0);
1372 }
1373
1374 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1375 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1376 uint32_t desc)
1377 {
1378 assert(devinfo->has_lsc);
1379 return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1380 }
1381
1382 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1383 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1384 uint32_t desc)
1385 {
1386 assert(devinfo->has_lsc);
1387 return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1388 }
1389
1390 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1391 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1392 uint32_t desc)
1393 {
1394 assert(devinfo->has_lsc);
1395 assert(!elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1396 return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1397 }
1398
1399 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1400 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1401 uint32_t desc)
1402 {
1403 assert(devinfo->has_lsc);
1404 assert(elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1405 return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1406 }
1407
1408 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1409 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1410 uint32_t desc)
1411 {
1412 assert(devinfo->has_lsc);
1413 return GET_BITS(desc, 15, 15);
1414 }
1415
1416 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1417 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1418 uint32_t desc)
1419 {
1420 assert(devinfo->has_lsc);
1421 return GET_BITS(desc, 19, 17);
1422 }
1423
1424 static inline unsigned
lsc_msg_desc_dest_len(const struct intel_device_info * devinfo,uint32_t desc)1425 lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1426 uint32_t desc)
1427 {
1428 assert(devinfo->has_lsc);
1429 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
1430 }
1431
1432 static inline unsigned
lsc_msg_desc_src0_len(const struct intel_device_info * devinfo,uint32_t desc)1433 lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1434 uint32_t desc)
1435 {
1436 assert(devinfo->has_lsc);
1437 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
1438 }
1439
1440 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1441 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1442 uint32_t desc)
1443 {
1444 assert(devinfo->has_lsc);
1445 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1446 }
1447
1448 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1449 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1450 enum lsc_fence_scope scope,
1451 enum lsc_flush_type flush_type,
1452 bool route_to_lsc)
1453 {
1454 assert(devinfo->has_lsc);
1455 return SET_BITS(LSC_OP_FENCE, 5, 0) |
1456 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1457 SET_BITS(scope, 11, 9) |
1458 SET_BITS(flush_type, 14, 12) |
1459 SET_BITS(route_to_lsc, 18, 18) |
1460 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1461 }
1462
1463 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1464 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1465 uint32_t desc)
1466 {
1467 assert(devinfo->has_lsc);
1468 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1469 }
1470
1471 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1472 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1473 uint32_t desc)
1474 {
1475 assert(devinfo->has_lsc);
1476 return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1477 }
1478
1479 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1480 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1481 uint32_t desc)
1482 {
1483 assert(devinfo->has_lsc);
1484 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1485 }
1486
1487 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1488 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1489 {
1490 assert(devinfo->has_lsc);
1491 return SET_BITS(bti, 31, 24) |
1492 SET_BITS(0, 23, 12); /* base offset */
1493 }
1494
1495 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1496 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1497 uint32_t ex_desc)
1498 {
1499 assert(devinfo->has_lsc);
1500 return GET_BITS(ex_desc, 23, 12);
1501 }
1502
1503 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1504 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1505 uint32_t ex_desc)
1506 {
1507 assert(devinfo->has_lsc);
1508 return GET_BITS(ex_desc, 31, 24);
1509 }
1510
1511 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1512 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1513 uint32_t ex_desc)
1514 {
1515 assert(devinfo->has_lsc);
1516 return GET_BITS(ex_desc, 31, 12);
1517 }
1518
1519 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1520 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1521 unsigned surface_state_index)
1522 {
1523 assert(devinfo->has_lsc);
1524 return SET_BITS(surface_state_index, 31, 6);
1525 }
1526
1527 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1528 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1529 uint32_t ex_desc)
1530 {
1531 assert(devinfo->has_lsc);
1532 return GET_BITS(ex_desc, 31, 6);
1533 }
1534
1535 static inline uint32_t
elk_mdc_sm2(unsigned exec_size)1536 elk_mdc_sm2(unsigned exec_size)
1537 {
1538 assert(exec_size == 8 || exec_size == 16);
1539 return exec_size > 8;
1540 }
1541
1542 static inline uint32_t
elk_mdc_sm2_exec_size(uint32_t sm2)1543 elk_mdc_sm2_exec_size(uint32_t sm2)
1544 {
1545 assert(sm2 <= 1);
1546 return 8 << sm2;
1547 }
1548
1549 static inline uint32_t
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1550 elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1551 uint32_t desc)
1552 {
1553 return GET_BITS(desc, 17, 14);
1554 }
1555
1556 static inline uint32_t
elk_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1557 elk_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1558 uint32_t desc)
1559 {
1560 return elk_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1561 }
1562
1563 /**
1564 * Construct a message descriptor immediate with the specified pixel
1565 * interpolator function controls.
1566 */
1567 static inline uint32_t
elk_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1568 elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1569 unsigned msg_type,
1570 bool noperspective,
1571 bool coarse_pixel_rate,
1572 unsigned exec_size,
1573 unsigned group)
1574 {
1575 assert(exec_size == 8 || exec_size == 16);
1576 const bool simd_mode = exec_size == 16;
1577 const bool slot_group = group >= 16;
1578
1579 assert(!coarse_pixel_rate);
1580 return (SET_BITS(slot_group, 11, 11) |
1581 SET_BITS(msg_type, 13, 12) |
1582 SET_BITS(!!noperspective, 14, 14) |
1583 SET_BITS(coarse_pixel_rate, 15, 15) |
1584 SET_BITS(simd_mode, 16, 16));
1585 }
1586
1587 void elk_urb_WRITE(struct elk_codegen *p,
1588 struct elk_reg dest,
1589 unsigned msg_reg_nr,
1590 struct elk_reg src0,
1591 enum elk_urb_write_flags flags,
1592 unsigned msg_length,
1593 unsigned response_length,
1594 unsigned offset,
1595 unsigned swizzle);
1596
1597 /**
1598 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1599 * desc. If \p desc is not an immediate it will be transparently loaded to an
1600 * address register using an OR instruction.
1601 */
1602 void
1603 elk_send_indirect_message(struct elk_codegen *p,
1604 unsigned sfid,
1605 struct elk_reg dst,
1606 struct elk_reg payload,
1607 struct elk_reg desc,
1608 unsigned desc_imm,
1609 bool eot);
1610
1611 void
1612 elk_send_indirect_split_message(struct elk_codegen *p,
1613 unsigned sfid,
1614 struct elk_reg dst,
1615 struct elk_reg payload0,
1616 struct elk_reg payload1,
1617 struct elk_reg desc,
1618 unsigned desc_imm,
1619 struct elk_reg ex_desc,
1620 unsigned ex_desc_imm,
1621 bool ex_desc_scratch,
1622 bool ex_bso,
1623 bool eot);
1624
1625 void elk_ff_sync(struct elk_codegen *p,
1626 struct elk_reg dest,
1627 unsigned msg_reg_nr,
1628 struct elk_reg src0,
1629 bool allocate,
1630 unsigned response_length,
1631 bool eot);
1632
1633 void elk_svb_write(struct elk_codegen *p,
1634 struct elk_reg dest,
1635 unsigned msg_reg_nr,
1636 struct elk_reg src0,
1637 unsigned binding_table_index,
1638 bool send_commit_msg);
1639
1640 elk_inst *elk_fb_WRITE(struct elk_codegen *p,
1641 struct elk_reg payload,
1642 struct elk_reg implied_header,
1643 unsigned msg_control,
1644 unsigned binding_table_index,
1645 unsigned msg_length,
1646 unsigned response_length,
1647 bool eot,
1648 bool last_render_target,
1649 bool header_present);
1650
1651 void elk_SAMPLE(struct elk_codegen *p,
1652 struct elk_reg dest,
1653 unsigned msg_reg_nr,
1654 struct elk_reg src0,
1655 unsigned binding_table_index,
1656 unsigned sampler,
1657 unsigned msg_type,
1658 unsigned response_length,
1659 unsigned msg_length,
1660 unsigned header_present,
1661 unsigned simd_mode,
1662 unsigned return_format);
1663
1664 void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
1665 struct elk_reg header,
1666 struct elk_reg sampler_index);
1667
1668 void elk_gfx4_math(struct elk_codegen *p,
1669 struct elk_reg dest,
1670 unsigned function,
1671 unsigned msg_reg_nr,
1672 struct elk_reg src,
1673 unsigned precision );
1674
1675 void elk_gfx6_math(struct elk_codegen *p,
1676 struct elk_reg dest,
1677 unsigned function,
1678 struct elk_reg src0,
1679 struct elk_reg src1);
1680
1681 void elk_oword_block_read(struct elk_codegen *p,
1682 struct elk_reg dest,
1683 struct elk_reg mrf,
1684 uint32_t offset,
1685 uint32_t bind_table_index);
1686
1687 unsigned elk_scratch_surface_idx(const struct elk_codegen *p);
1688
1689 void elk_oword_block_read_scratch(struct elk_codegen *p,
1690 struct elk_reg dest,
1691 struct elk_reg mrf,
1692 int num_regs,
1693 unsigned offset);
1694
1695 void elk_oword_block_write_scratch(struct elk_codegen *p,
1696 struct elk_reg mrf,
1697 int num_regs,
1698 unsigned offset);
1699
1700 void elk_gfx7_block_read_scratch(struct elk_codegen *p,
1701 struct elk_reg dest,
1702 int num_regs,
1703 unsigned offset);
1704
1705 /**
1706 * Return the generation-specific jump distance scaling factor.
1707 *
1708 * Given the number of instructions to jump, we need to scale by
1709 * some number to obtain the actual jump distance to program in an
1710 * instruction.
1711 */
1712 static inline unsigned
elk_jump_scale(const struct intel_device_info * devinfo)1713 elk_jump_scale(const struct intel_device_info *devinfo)
1714 {
1715 /* Broadwell measures jump targets in bytes. */
1716 if (devinfo->ver >= 8)
1717 return 16;
1718
1719 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1720 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1721 */
1722 if (devinfo->ver >= 5)
1723 return 2;
1724
1725 /* Gfx4 simply uses the number of 128-bit instructions. */
1726 return 1;
1727 }
1728
1729 void elk_barrier(struct elk_codegen *p, struct elk_reg src);
1730
1731 /* If/else/endif. Works by manipulating the execution flags on each
1732 * channel.
1733 */
1734 elk_inst *elk_IF(struct elk_codegen *p, unsigned execute_size);
1735 elk_inst *elk_gfx6_IF(struct elk_codegen *p, enum elk_conditional_mod conditional,
1736 struct elk_reg src0, struct elk_reg src1);
1737
1738 void elk_ELSE(struct elk_codegen *p);
1739 void elk_ENDIF(struct elk_codegen *p);
1740
1741 /* DO/WHILE loops:
1742 */
1743 elk_inst *elk_DO(struct elk_codegen *p, unsigned execute_size);
1744
1745 elk_inst *elk_WHILE(struct elk_codegen *p);
1746
1747 elk_inst *elk_BREAK(struct elk_codegen *p);
1748 elk_inst *elk_CONT(struct elk_codegen *p);
1749 elk_inst *elk_HALT(struct elk_codegen *p);
1750
1751 /* Forward jumps:
1752 */
1753 void elk_land_fwd_jump(struct elk_codegen *p, int jmp_insn_idx);
1754
1755 elk_inst *elk_JMPI(struct elk_codegen *p, struct elk_reg index,
1756 unsigned predicate_control);
1757
1758 void elk_NOP(struct elk_codegen *p);
1759
1760 void elk_WAIT(struct elk_codegen *p);
1761
1762 /* Special case: there is never a destination, execution size will be
1763 * taken from src0:
1764 */
1765 void elk_CMP(struct elk_codegen *p,
1766 struct elk_reg dest,
1767 unsigned conditional,
1768 struct elk_reg src0,
1769 struct elk_reg src1);
1770
1771 void elk_CMPN(struct elk_codegen *p,
1772 struct elk_reg dest,
1773 unsigned conditional,
1774 struct elk_reg src0,
1775 struct elk_reg src1);
1776
1777 void
1778 elk_untyped_atomic(struct elk_codegen *p,
1779 struct elk_reg dst,
1780 struct elk_reg payload,
1781 struct elk_reg surface,
1782 unsigned atomic_op,
1783 unsigned msg_length,
1784 bool response_expected,
1785 bool header_present);
1786
1787 void
1788 elk_untyped_surface_read(struct elk_codegen *p,
1789 struct elk_reg dst,
1790 struct elk_reg payload,
1791 struct elk_reg surface,
1792 unsigned msg_length,
1793 unsigned num_channels);
1794
1795 void
1796 elk_untyped_surface_write(struct elk_codegen *p,
1797 struct elk_reg payload,
1798 struct elk_reg surface,
1799 unsigned msg_length,
1800 unsigned num_channels,
1801 bool header_present);
1802
1803 void
1804 elk_memory_fence(struct elk_codegen *p,
1805 struct elk_reg dst,
1806 struct elk_reg src,
1807 enum elk_opcode send_op,
1808 enum elk_message_target sfid,
1809 uint32_t desc,
1810 bool commit_enable,
1811 unsigned bti);
1812
1813 void
1814 elk_pixel_interpolator_query(struct elk_codegen *p,
1815 struct elk_reg dest,
1816 struct elk_reg mrf,
1817 bool noperspective,
1818 bool coarse_pixel_rate,
1819 unsigned mode,
1820 struct elk_reg data,
1821 unsigned msg_length,
1822 unsigned response_length);
1823
1824 void
1825 elk_find_live_channel(struct elk_codegen *p,
1826 struct elk_reg dst,
1827 bool last);
1828
1829 void
1830 elk_broadcast(struct elk_codegen *p,
1831 struct elk_reg dst,
1832 struct elk_reg src,
1833 struct elk_reg idx);
1834
1835 void
1836 elk_float_controls_mode(struct elk_codegen *p,
1837 unsigned mode, unsigned mask);
1838
1839 void
1840 elk_update_reloc_imm(const struct elk_isa_info *isa,
1841 elk_inst *inst,
1842 uint32_t value);
1843
1844 void
1845 elk_MOV_reloc_imm(struct elk_codegen *p,
1846 struct elk_reg dst,
1847 enum elk_reg_type src_type,
1848 uint32_t id);
1849
1850 unsigned
1851 elk_num_sources_from_inst(const struct elk_isa_info *isa,
1852 const elk_inst *inst);
1853
1854 /***********************************************************************
1855 * elk_eu_util.c:
1856 */
1857
1858 void elk_copy_indirect_to_indirect(struct elk_codegen *p,
1859 struct elk_indirect dst_ptr,
1860 struct elk_indirect src_ptr,
1861 unsigned count);
1862
1863 void elk_copy_from_indirect(struct elk_codegen *p,
1864 struct elk_reg dst,
1865 struct elk_indirect ptr,
1866 unsigned count);
1867
1868 void elk_copy4(struct elk_codegen *p,
1869 struct elk_reg dst,
1870 struct elk_reg src,
1871 unsigned count);
1872
1873 void elk_copy8(struct elk_codegen *p,
1874 struct elk_reg dst,
1875 struct elk_reg src,
1876 unsigned count);
1877
1878 void elk_math_invert( struct elk_codegen *p,
1879 struct elk_reg dst,
1880 struct elk_reg src);
1881
1882 void elk_set_src1(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
1883
1884 void elk_set_desc_ex(struct elk_codegen *p, elk_inst *insn,
1885 unsigned desc, unsigned ex_desc);
1886
1887 static inline void
elk_set_desc(struct elk_codegen * p,elk_inst * insn,unsigned desc)1888 elk_set_desc(struct elk_codegen *p, elk_inst *insn, unsigned desc)
1889 {
1890 elk_set_desc_ex(p, insn, desc, 0);
1891 }
1892
1893 void elk_set_uip_jip(struct elk_codegen *p, int start_offset);
1894
1895 enum elk_conditional_mod elk_negate_cmod(enum elk_conditional_mod cmod);
1896 enum elk_conditional_mod elk_swap_cmod(enum elk_conditional_mod cmod);
1897
1898 /* elk_eu_compact.c */
1899 void elk_compact_instructions(struct elk_codegen *p, int start_offset,
1900 struct elk_disasm_info *disasm);
1901 void elk_uncompact_instruction(const struct elk_isa_info *isa,
1902 elk_inst *dst, elk_compact_inst *src);
1903 bool elk_try_compact_instruction(const struct elk_isa_info *isa,
1904 elk_compact_inst *dst, const elk_inst *src);
1905
1906 void elk_debug_compact_uncompact(const struct elk_isa_info *isa,
1907 elk_inst *orig, elk_inst *uncompacted);
1908
1909 /* elk_eu_validate.c */
1910 bool elk_validate_instruction(const struct elk_isa_info *isa,
1911 const elk_inst *inst, int offset,
1912 unsigned inst_size,
1913 struct elk_disasm_info *disasm);
1914 bool elk_validate_instructions(const struct elk_isa_info *isa,
1915 const void *assembly, int start_offset, int end_offset,
1916 struct elk_disasm_info *disasm);
1917
1918 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1919 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1920 {
1921 elk_inst *insn = (elk_inst *)((char *)store + offset);
1922
1923 if (elk_inst_cmpt_control(devinfo, insn))
1924 return offset + 8;
1925 else
1926 return offset + 16;
1927 }
1928
1929 /** Maximum SEND message length */
1930 #define ELK_MAX_MSG_LENGTH 15
1931
1932 /** First MRF register used by pull loads */
1933 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1934
1935 /** First MRF register used by spills */
1936 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1937
1938 #ifdef __cplusplus
1939 }
1940 #endif
1941
1942 #endif
1943