xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_eu_emit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35 
36 #include "util/ralloc.h"
37 
38 void
brw_set_dest(struct brw_codegen * p,brw_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
40 {
41    const struct intel_device_info *devinfo = p->devinfo;
42 
43    if (dest.file == FIXED_GRF)
44       assert(dest.nr < XE2_MAX_GRF);
45 
46    /* The hardware has a restriction where a destination of size Byte with
47     * a stride of 1 is only allowed for a packed byte MOV. For any other
48     * instruction, the stride must be at least 2, even when the destination
49     * is the NULL register.
50     */
51    if (dest.file == ARF &&
52        dest.nr == BRW_ARF_NULL &&
53        brw_type_size_bytes(dest.type) == 1 &&
54        dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55       dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56    }
57 
58    if (devinfo->ver >= 12 &&
59        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61       assert(dest.file == FIXED_GRF ||
62              dest.file == ARF);
63       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
64       assert(dest.subnr == 0);
65       assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
66              (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
67               dest.vstride == dest.width + 1));
68       assert(!dest.negate && !dest.abs);
69       brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
70       brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
71 
72    } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
73               brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
74       assert(devinfo->ver < 12);
75       assert(dest.file == FIXED_GRF ||
76              dest.file == ARF);
77       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
78       assert(dest.subnr % 16 == 0);
79       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
80              dest.vstride == dest.width + 1);
81       assert(!dest.negate && !dest.abs);
82       brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
83       brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
84       brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
85    } else {
86       brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
87       brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
88 
89       if (dest.address_mode == BRW_ADDRESS_DIRECT) {
90          brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
91 
92          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
93             brw_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
94             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
95                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
96             brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
97          } else {
98             brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
99             brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
100             if (dest.file == FIXED_GRF) {
101                assert(dest.writemask != 0);
102             }
103             /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
104              *    Although Dst.HorzStride is a don't care for Align16, HW needs
105              *    this to be programmed as "01".
106              */
107             brw_inst_set_dst_hstride(devinfo, inst, 1);
108          }
109       } else {
110          brw_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
111 
112          /* These are different sizes in align1 vs align16:
113           */
114          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
115             brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
116                                           dest.indirect_offset);
117             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
118                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
119             brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
120          } else {
121             brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
122                                            dest.indirect_offset);
123             /* even ignored in da16, still need to set as '01' */
124             brw_inst_set_dst_hstride(devinfo, inst, 1);
125          }
126       }
127    }
128 }
129 
130 void
brw_set_src0(struct brw_codegen * p,brw_inst * inst,struct brw_reg reg)131 brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
132 {
133    const struct intel_device_info *devinfo = p->devinfo;
134 
135    if (reg.file == FIXED_GRF)
136       assert(reg.nr < XE2_MAX_GRF);
137 
138    if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND  ||
139        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
140        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
141        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
142       /* Any source modifiers or regions will be ignored, since this just
143        * identifies the GRF to start reading the message contents from.
144        * Check for some likely failures.
145        */
146       assert(!reg.negate);
147       assert(!reg.abs);
148       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
149    }
150 
151    if (devinfo->ver >= 12 &&
152        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
153         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
154       assert(reg.file != IMM);
155       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
156       assert(reg.subnr == 0);
157       assert(has_scalar_region(reg) ||
158              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
159               reg.vstride == reg.width + 1));
160       assert(!reg.negate && !reg.abs);
161       brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
162       brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
163 
164    } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
165               brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
166       assert(reg.file == FIXED_GRF);
167       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
168       assert(reg.subnr % 16 == 0);
169       assert(has_scalar_region(reg) ||
170              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
171               reg.vstride == reg.width + 1));
172       assert(!reg.negate && !reg.abs);
173       brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
174       brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
175    } else {
176       brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
177       brw_inst_set_src0_abs(devinfo, inst, reg.abs);
178       brw_inst_set_src0_negate(devinfo, inst, reg.negate);
179       brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
180 
181       if (reg.file == IMM) {
182          if (reg.type == BRW_TYPE_DF)
183             brw_inst_set_imm_df(devinfo, inst, reg.df);
184          else if (reg.type == BRW_TYPE_UQ ||
185                   reg.type == BRW_TYPE_Q)
186             brw_inst_set_imm_uq(devinfo, inst, reg.u64);
187          else
188             brw_inst_set_imm_ud(devinfo, inst, reg.ud);
189 
190          if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
191             brw_inst_set_src1_reg_file(devinfo, inst,
192                                        ARF);
193             brw_inst_set_src1_reg_hw_type(devinfo, inst,
194                                           brw_inst_src0_reg_hw_type(devinfo, inst));
195          }
196       } else {
197          if (reg.address_mode == BRW_ADDRESS_DIRECT) {
198             brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
199             if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
200                brw_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
201             } else {
202                brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
203             }
204          } else {
205             brw_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
206 
207             if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208                brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
209             } else {
210                brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
211             }
212          }
213 
214          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
215             if (reg.width == BRW_WIDTH_1 &&
216                 brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
217                brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
218                brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
219                brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
220             } else {
221                brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
222                brw_inst_set_src0_width(devinfo, inst, reg.width);
223                brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
224             }
225          } else {
226             brw_inst_set_src0_da16_swiz_x(devinfo, inst,
227                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
228             brw_inst_set_src0_da16_swiz_y(devinfo, inst,
229                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
230             brw_inst_set_src0_da16_swiz_z(devinfo, inst,
231                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
232             brw_inst_set_src0_da16_swiz_w(devinfo, inst,
233                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
234 
235             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
236                /* This is an oddity of the fact we're using the same
237                 * descriptions for registers in align_16 as align_1:
238                 */
239                brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
240             } else {
241                brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
242             }
243          }
244       }
245    }
246 }
247 
248 
249 void
brw_set_src1(struct brw_codegen * p,brw_inst * inst,struct brw_reg reg)250 brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
251 {
252    const struct intel_device_info *devinfo = p->devinfo;
253 
254    if (reg.file == FIXED_GRF)
255       assert(reg.nr < XE2_MAX_GRF);
256 
257    if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
258        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
259        (devinfo->ver >= 12 &&
260         (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
261          brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
262       assert(reg.file == FIXED_GRF ||
263              reg.file == ARF);
264       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
265       assert(reg.subnr == 0);
266       assert(has_scalar_region(reg) ||
267              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
268               reg.vstride == reg.width + 1));
269       assert(!reg.negate && !reg.abs);
270       brw_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
271       brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
272    } else {
273       /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
274        *
275        *    "Accumulator registers may be accessed explicitly as src0
276        *    operands only."
277        */
278       assert(reg.file != ARF ||
279              (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
280 
281       brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
282       brw_inst_set_src1_abs(devinfo, inst, reg.abs);
283       brw_inst_set_src1_negate(devinfo, inst, reg.negate);
284 
285       /* Only src1 can be immediate in two-argument instructions.
286        */
287       assert(brw_inst_src0_reg_file(devinfo, inst) != IMM);
288 
289       if (reg.file == IMM) {
290          /* two-argument instructions can only use 32-bit immediates */
291          assert(brw_type_size_bytes(reg.type) < 8);
292          brw_inst_set_imm_ud(devinfo, inst, reg.ud);
293       } else {
294          /* This is a hardware restriction, which may or may not be lifted
295           * in the future:
296           */
297          assert (reg.address_mode == BRW_ADDRESS_DIRECT);
298          /* assert (reg.file == FIXED_GRF); */
299 
300          brw_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
301          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
302             brw_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
303          } else {
304             brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
305          }
306 
307          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
308             if (reg.width == BRW_WIDTH_1 &&
309                 brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
310                brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
311                brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
312                brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
313             } else {
314                brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
315                brw_inst_set_src1_width(devinfo, inst, reg.width);
316                brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
317             }
318          } else {
319             brw_inst_set_src1_da16_swiz_x(devinfo, inst,
320                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
321             brw_inst_set_src1_da16_swiz_y(devinfo, inst,
322                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
323             brw_inst_set_src1_da16_swiz_z(devinfo, inst,
324                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
325             brw_inst_set_src1_da16_swiz_w(devinfo, inst,
326                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
327 
328             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
329                /* This is an oddity of the fact we're using the same
330                 * descriptions for registers in align_16 as align_1:
331                 */
332                brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
333             } else {
334                brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
335             }
336          }
337       }
338    }
339 }
340 
341 /**
342  * Specify the descriptor and extended descriptor immediate for a SEND(C)
343  * message instruction.
344  */
345 void
brw_set_desc_ex(struct brw_codegen * p,brw_inst * inst,unsigned desc,unsigned ex_desc)346 brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
347                 unsigned desc, unsigned ex_desc)
348 {
349    const struct intel_device_info *devinfo = p->devinfo;
350    assert(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
351           brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
352    if (devinfo->ver < 12)
353       brw_inst_set_src1_file_type(devinfo, inst,
354                                   IMM, BRW_TYPE_UD);
355    brw_inst_set_send_desc(devinfo, inst, desc);
356    if (devinfo->ver >= 9)
357       brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
358 }
359 
360 static void
brw_inst_set_state(const struct brw_isa_info * isa,brw_inst * insn,const struct brw_insn_state * state)361 brw_inst_set_state(const struct brw_isa_info *isa,
362                    brw_inst *insn,
363                    const struct brw_insn_state *state)
364 {
365    const struct intel_device_info *devinfo = isa->devinfo;
366 
367    brw_inst_set_exec_size(devinfo, insn, state->exec_size);
368    brw_inst_set_group(devinfo, insn, state->group);
369    brw_inst_set_access_mode(devinfo, insn, state->access_mode);
370    brw_inst_set_mask_control(devinfo, insn, state->mask_control);
371    if (devinfo->ver >= 12)
372       brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_inst_opcode(isa, insn)));
373    brw_inst_set_saturate(devinfo, insn, state->saturate);
374    brw_inst_set_pred_control(devinfo, insn, state->predicate);
375    brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
376 
377    if (is_3src(isa, brw_inst_opcode(isa, insn)) &&
378        state->access_mode == BRW_ALIGN_16) {
379       brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
380       brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
381    } else {
382       brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
383       brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
384    }
385 
386    if (devinfo->ver < 20)
387       brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
388 }
389 
390 static brw_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)391 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
392 {
393    assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
394    assert(util_is_power_of_two_or_zero(alignment));
395    const unsigned align_insn = MAX2(alignment / sizeof(brw_inst), 1);
396    const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
397    const unsigned new_nr_insn = start_insn + nr_insn;
398 
399    if (p->store_size < new_nr_insn) {
400       p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
401       p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
402    }
403 
404    /* Memset any padding due to alignment to 0.  We don't want to be hashing
405     * or caching a bunch of random bits we got from a memory allocation.
406     */
407    if (p->nr_insn < start_insn) {
408       memset(&p->store[p->nr_insn], 0,
409              (start_insn - p->nr_insn) * sizeof(brw_inst));
410    }
411 
412    assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
413    p->nr_insn = new_nr_insn;
414    p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
415 
416    return &p->store[start_insn];
417 }
418 
419 void
brw_realign(struct brw_codegen * p,unsigned alignment)420 brw_realign(struct brw_codegen *p, unsigned alignment)
421 {
422    brw_append_insns(p, 0, alignment);
423 }
424 
425 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)426 brw_append_data(struct brw_codegen *p, void *data,
427                 unsigned size, unsigned alignment)
428 {
429    unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
430    void *dst = brw_append_insns(p, nr_insn, alignment);
431    memcpy(dst, data, size);
432 
433    /* If it's not a whole number of instructions, memset the end */
434    if (size < nr_insn * sizeof(brw_inst))
435       memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
436 
437    return dst - (void *)p->store;
438 }
439 
440 #define next_insn brw_next_insn
441 brw_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)442 brw_next_insn(struct brw_codegen *p, unsigned opcode)
443 {
444    brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
445 
446    memset(insn, 0, sizeof(*insn));
447    brw_inst_set_opcode(p->isa, insn, opcode);
448 
449    /* Apply the default instruction state */
450    brw_inst_set_state(p->isa, insn, p->current);
451 
452    return insn;
453 }
454 
455 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)456 brw_add_reloc(struct brw_codegen *p, uint32_t id,
457               enum brw_shader_reloc_type type,
458               uint32_t offset, uint32_t delta)
459 {
460    if (p->num_relocs + 1 > p->reloc_array_size) {
461       p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
462       p->relocs = reralloc(p->mem_ctx, p->relocs,
463                            struct brw_shader_reloc, p->reloc_array_size);
464    }
465 
466    p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
467       .id = id,
468       .type = type,
469       .offset = offset,
470       .delta = delta,
471    };
472 }
473 
474 static brw_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)475 brw_alu1(struct brw_codegen *p, unsigned opcode,
476          struct brw_reg dest, struct brw_reg src)
477 {
478    brw_inst *insn = next_insn(p, opcode);
479    brw_set_dest(p, insn, dest);
480    brw_set_src0(p, insn, src);
481    return insn;
482 }
483 
484 static brw_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)485 brw_alu2(struct brw_codegen *p, unsigned opcode,
486          struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
487 {
488    /* 64-bit immediates are only supported on 1-src instructions */
489    assert(src0.file != IMM ||
490           brw_type_size_bytes(src0.type) <= 4);
491    assert(src1.file != IMM ||
492           brw_type_size_bytes(src1.type) <= 4);
493 
494    brw_inst *insn = next_insn(p, opcode);
495    brw_set_dest(p, insn, dest);
496    brw_set_src0(p, insn, src0);
497    brw_set_src1(p, insn, src1);
498    return insn;
499 }
500 
501 static int
get_3src_subreg_nr(struct brw_reg reg)502 get_3src_subreg_nr(struct brw_reg reg)
503 {
504    /* Normally, SubRegNum is in bytes (0..31).  However, 3-src instructions
505     * use 32-bit units (components 0..7).  Since they only support F/D/UD
506     * types, this doesn't lose any flexibility, but uses fewer bits.
507     */
508    return reg.subnr / 4;
509 }
510 
511 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)512 to_3src_align1_vstride(const struct intel_device_info *devinfo,
513                        enum brw_vertical_stride vstride)
514 {
515    switch (vstride) {
516    case BRW_VERTICAL_STRIDE_0:
517       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
518    case BRW_VERTICAL_STRIDE_1:
519       assert(devinfo->ver >= 12);
520       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
521    case BRW_VERTICAL_STRIDE_2:
522       assert(devinfo->ver < 12);
523       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
524    case BRW_VERTICAL_STRIDE_4:
525       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
526    case BRW_VERTICAL_STRIDE_8:
527    case BRW_VERTICAL_STRIDE_16:
528       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
529    default:
530       unreachable("invalid vstride");
531    }
532 }
533 
534 
535 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)536 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
537 {
538    switch (hstride) {
539    case BRW_HORIZONTAL_STRIDE_0:
540       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
541    case BRW_HORIZONTAL_STRIDE_1:
542       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
543    case BRW_HORIZONTAL_STRIDE_2:
544       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
545    case BRW_HORIZONTAL_STRIDE_4:
546       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
547    default:
548       unreachable("invalid hstride");
549    }
550 }
551 
552 static brw_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)553 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
554          struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
555 {
556    const struct intel_device_info *devinfo = p->devinfo;
557    brw_inst *inst = next_insn(p, opcode);
558 
559    assert(dest.nr < XE2_MAX_GRF);
560 
561    if (devinfo->ver >= 10)
562       assert(!(src0.file == IMM &&
563                src2.file == IMM));
564 
565    assert(src0.file == IMM || src0.nr < XE2_MAX_GRF);
566    assert(src1.file != IMM && src1.nr < XE2_MAX_GRF);
567    assert(src2.file == IMM || src2.nr < XE2_MAX_GRF);
568    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
569    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
570    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
571    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
572 
573    if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
574       assert(dest.file == FIXED_GRF ||
575              (dest.file == ARF &&
576               (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
577 
578       brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
579       brw_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
580       brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
581       brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
582 
583       if (brw_type_is_float(dest.type)) {
584          brw_inst_set_3src_a1_exec_type(devinfo, inst,
585                                         BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
586       } else {
587          brw_inst_set_3src_a1_exec_type(devinfo, inst,
588                                         BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
589       }
590 
591       brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
592       brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
593       brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
594       brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
595 
596       if (src0.file == IMM) {
597          brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
598       } else {
599          brw_inst_set_3src_a1_src0_vstride(
600             devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
601          brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
602                                            to_3src_align1_hstride(src0.hstride));
603          brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
604          brw_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
605          brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
606          brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
607       }
608       brw_inst_set_3src_a1_src1_vstride(
609          devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
610       brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
611                                         to_3src_align1_hstride(src1.hstride));
612 
613       brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
614       if (src1.file == ARF) {
615          brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
616       } else {
617          brw_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
618       }
619       brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
620       brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
621 
622       if (src2.file == IMM) {
623          brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
624       } else {
625          brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
626                                            to_3src_align1_hstride(src2.hstride));
627          /* no vstride on src2 */
628          brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
629          brw_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
630          brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
631          brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
632       }
633 
634       assert(src0.file == FIXED_GRF ||
635              src0.file == IMM);
636       assert(src1.file == FIXED_GRF ||
637              (src1.file == ARF &&
638               src1.nr == BRW_ARF_ACCUMULATOR));
639       assert(src2.file == FIXED_GRF ||
640              src2.file == IMM);
641 
642       if (devinfo->ver >= 12) {
643          if (src0.file == IMM) {
644             brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
645          } else {
646             brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
647          }
648 
649          brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
650 
651          if (src2.file == IMM) {
652             brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
653          } else {
654             brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
655          }
656       } else {
657          brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
658          brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
659          brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
660       }
661 
662    } else {
663       assert(dest.file == FIXED_GRF);
664       assert(dest.type == BRW_TYPE_F  ||
665              dest.type == BRW_TYPE_DF ||
666              dest.type == BRW_TYPE_D  ||
667              dest.type == BRW_TYPE_UD ||
668              dest.type == BRW_TYPE_HF);
669       brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
670       brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
671       brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
672 
673       assert(src0.file == FIXED_GRF);
674       brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
675       brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
676       brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
677       brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
678       brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
679       brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
680                                           src0.vstride == BRW_VERTICAL_STRIDE_0);
681 
682       assert(src1.file == FIXED_GRF);
683       brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
684       brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
685       brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
686       brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
687       brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
688       brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
689                                           src1.vstride == BRW_VERTICAL_STRIDE_0);
690 
691       assert(src2.file == FIXED_GRF);
692       brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
693       brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
694       brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
695       brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
696       brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
697       brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
698                                           src2.vstride == BRW_VERTICAL_STRIDE_0);
699 
700       /* Set both the source and destination types based on dest.type,
701        * ignoring the source register types.  The MAD and LRP emitters ensure
702        * that all four types are float.  The BFE and BFI2 emitters, however,
703        * may send us mixed D and UD types and want us to ignore that and use
704        * the destination type.
705        */
706       brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
707       brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
708 
709       /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
710        *
711        *    "Three source instructions can use operands with mixed-mode
712        *     precision. When SrcType field is set to :f or :hf it defines
713        *     precision for source 0 only, and fields Src1Type and Src2Type
714        *     define precision for other source operands:
715        *
716        *     0b = :f. Single precision Float (32-bit).
717        *     1b = :hf. Half precision Float (16-bit)."
718        */
719       if (src1.type == BRW_TYPE_HF)
720          brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
721 
722       if (src2.type == BRW_TYPE_HF)
723          brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
724    }
725 
726    return inst;
727 }
728 
729 static brw_inst *
brw_dpas_three_src(struct brw_codegen * p,enum opcode opcode,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)730 brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
731                    enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
732                    struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
733 {
734    const struct intel_device_info *devinfo = p->devinfo;
735    brw_inst *inst = next_insn(p, opcode);
736 
737    assert(dest.file == FIXED_GRF);
738    brw_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
739                                        FIXED_GRF);
740    brw_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
741    brw_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
742 
743    if (brw_type_is_float(dest.type)) {
744       brw_inst_set_dpas_3src_exec_type(devinfo, inst,
745                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
746    } else {
747       brw_inst_set_dpas_3src_exec_type(devinfo, inst,
748                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
749    }
750 
751    brw_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
752    brw_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
753 
754    brw_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
755    brw_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
756    brw_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
757    brw_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
758 
759    assert(src0.file == FIXED_GRF ||
760           (src0.file == ARF &&
761            src0.nr == BRW_ARF_NULL));
762 
763    brw_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file);
764    brw_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
765    brw_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
766 
767    assert(src1.file == FIXED_GRF);
768 
769    brw_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file);
770    brw_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
771    brw_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
772    brw_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
773 
774    assert(src2.file == FIXED_GRF);
775 
776    brw_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file);
777    brw_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
778    brw_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
779    brw_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
780 
781    return inst;
782 }
783 
784 /***********************************************************************
785  * Convenience routines.
786  */
787 #define ALU1(OP)					\
788 brw_inst *brw_##OP(struct brw_codegen *p,		\
789 	      struct brw_reg dest,			\
790 	      struct brw_reg src0)   			\
791 {							\
792    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
793 }
794 
795 #define ALU2(OP)					\
796 brw_inst *brw_##OP(struct brw_codegen *p,		\
797 	      struct brw_reg dest,			\
798 	      struct brw_reg src0,			\
799 	      struct brw_reg src1)   			\
800 {							\
801    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
802 }
803 
804 #define ALU3(OP)					\
805 brw_inst *brw_##OP(struct brw_codegen *p,		\
806 	      struct brw_reg dest,			\
807 	      struct brw_reg src0,			\
808 	      struct brw_reg src1,			\
809 	      struct brw_reg src2)   			\
810 {                                                       \
811    if (p->current->access_mode == BRW_ALIGN_16) {       \
812       if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
813          src0.swizzle = BRW_SWIZZLE_XXXX;               \
814       if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
815          src1.swizzle = BRW_SWIZZLE_XXXX;               \
816       if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
817          src2.swizzle = BRW_SWIZZLE_XXXX;               \
818    }                                                    \
819    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
820 }
821 
822 #define ALU3F(OP)                                               \
823 brw_inst *brw_##OP(struct brw_codegen *p,         \
824                                  struct brw_reg dest,           \
825                                  struct brw_reg src0,           \
826                                  struct brw_reg src1,           \
827                                  struct brw_reg src2)           \
828 {                                                               \
829    assert(dest.type == BRW_TYPE_F ||                   \
830           dest.type == BRW_TYPE_DF);                   \
831    if (dest.type == BRW_TYPE_F) {                      \
832       assert(src0.type == BRW_TYPE_F);                 \
833       assert(src1.type == BRW_TYPE_F);                 \
834       assert(src2.type == BRW_TYPE_F);                 \
835    } else if (dest.type == BRW_TYPE_DF) {              \
836       assert(src0.type == BRW_TYPE_DF);                \
837       assert(src1.type == BRW_TYPE_DF);                \
838       assert(src2.type == BRW_TYPE_DF);                \
839    }                                                            \
840                                                                 \
841    if (p->current->access_mode == BRW_ALIGN_16) {               \
842       if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
843          src0.swizzle = BRW_SWIZZLE_XXXX;                       \
844       if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
845          src1.swizzle = BRW_SWIZZLE_XXXX;                       \
846       if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
847          src2.swizzle = BRW_SWIZZLE_XXXX;                       \
848    }                                                            \
849    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
850 }
851 
852 ALU2(SEL)
ALU1(NOT)853 ALU1(NOT)
854 ALU2(AND)
855 ALU2(OR)
856 ALU2(XOR)
857 ALU2(SHR)
858 ALU2(SHL)
859 ALU2(ASR)
860 ALU2(ROL)
861 ALU2(ROR)
862 ALU3(CSEL)
863 ALU1(FRC)
864 ALU1(RNDD)
865 ALU1(RNDE)
866 ALU1(RNDU)
867 ALU1(RNDZ)
868 ALU2(MAC)
869 ALU2(MACH)
870 ALU1(LZD)
871 ALU2(DP4)
872 ALU2(DPH)
873 ALU2(DP3)
874 ALU2(DP2)
875 ALU3(DP4A)
876 ALU3(MAD)
877 ALU3F(LRP)
878 ALU1(BFREV)
879 ALU3(BFE)
880 ALU2(BFI1)
881 ALU3(BFI2)
882 ALU1(FBH)
883 ALU1(FBL)
884 ALU1(CBIT)
885 ALU2(ADDC)
886 ALU2(SUBB)
887 ALU3(ADD3)
888 ALU1(MOV)
889 
890 brw_inst *
891 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
892         struct brw_reg src0, struct brw_reg src1)
893 {
894    /* 6.2.2: add */
895    if (src0.type == BRW_TYPE_F ||
896        (src0.file == IMM &&
897 	src0.type == BRW_TYPE_VF)) {
898       assert(src1.type != BRW_TYPE_UD);
899       assert(src1.type != BRW_TYPE_D);
900    }
901 
902    if (src1.type == BRW_TYPE_F ||
903        (src1.file == IMM &&
904 	src1.type == BRW_TYPE_VF)) {
905       assert(src0.type != BRW_TYPE_UD);
906       assert(src0.type != BRW_TYPE_D);
907    }
908 
909    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
910 }
911 
912 brw_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)913 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
914         struct brw_reg src0, struct brw_reg src1)
915 {
916    assert(dest.type == src0.type);
917    assert(src0.type == src1.type);
918    switch (src0.type) {
919    case BRW_TYPE_B:
920    case BRW_TYPE_UB:
921    case BRW_TYPE_W:
922    case BRW_TYPE_UW:
923    case BRW_TYPE_D:
924    case BRW_TYPE_UD:
925       break;
926    default:
927       unreachable("Bad type for brw_AVG");
928    }
929 
930    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
931 }
932 
933 brw_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)934 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
935         struct brw_reg src0, struct brw_reg src1)
936 {
937    /* 6.32.38: mul */
938    if (src0.type == BRW_TYPE_D ||
939        src0.type == BRW_TYPE_UD ||
940        src1.type == BRW_TYPE_D ||
941        src1.type == BRW_TYPE_UD) {
942       assert(dest.type != BRW_TYPE_F);
943    }
944 
945    if (src0.type == BRW_TYPE_F ||
946        (src0.file == IMM &&
947 	src0.type == BRW_TYPE_VF)) {
948       assert(src1.type != BRW_TYPE_UD);
949       assert(src1.type != BRW_TYPE_D);
950    }
951 
952    if (src1.type == BRW_TYPE_F ||
953        (src1.file == IMM &&
954 	src1.type == BRW_TYPE_VF)) {
955       assert(src0.type != BRW_TYPE_UD);
956       assert(src0.type != BRW_TYPE_D);
957    }
958 
959    assert(src0.file != ARF ||
960 	  src0.nr != BRW_ARF_ACCUMULATOR);
961    assert(src1.file != ARF ||
962 	  src1.nr != BRW_ARF_ACCUMULATOR);
963 
964    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
965 }
966 
967 brw_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)968 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
969          struct brw_reg src0, struct brw_reg src1)
970 {
971    src0.vstride = BRW_VERTICAL_STRIDE_0;
972    src0.width = BRW_WIDTH_1;
973    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
974    return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
975 }
976 
977 brw_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)978 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
979         struct brw_reg src0, struct brw_reg src1)
980 {
981    src0.vstride = BRW_VERTICAL_STRIDE_0;
982    src0.width = BRW_WIDTH_1;
983    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
984    src1.vstride = BRW_VERTICAL_STRIDE_8;
985    src1.width = BRW_WIDTH_8;
986    src1.hstride = BRW_HORIZONTAL_STRIDE_1;
987    return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
988 }
989 
990 brw_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)991 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
992          unsigned rcount, struct brw_reg dest, struct brw_reg src0,
993          struct brw_reg src1, struct brw_reg src2)
994 {
995    return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
996                              src1, src2);
997 }
998 
brw_NOP(struct brw_codegen * p)999 void brw_NOP(struct brw_codegen *p)
1000 {
1001    brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1002    memset(insn, 0, sizeof(*insn));
1003    brw_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1004 }
1005 
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1006 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1007 {
1008    brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1009    brw_inst_set_cond_modifier(p->devinfo, insn, func);
1010 }
1011 
1012 /***********************************************************************
1013  * Comparisons, if/else/endif
1014  */
1015 
1016 brw_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1017 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1018          unsigned predicate_control)
1019 {
1020    const struct intel_device_info *devinfo = p->devinfo;
1021    struct brw_reg ip = brw_ip_reg();
1022    brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1023 
1024    brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1025    brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1026    brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1027    brw_inst_set_pred_control(devinfo, inst, predicate_control);
1028 
1029    return inst;
1030 }
1031 
1032 static void
push_if_stack(struct brw_codegen * p,brw_inst * inst)1033 push_if_stack(struct brw_codegen *p, brw_inst *inst)
1034 {
1035    p->if_stack[p->if_stack_depth] = inst - p->store;
1036 
1037    p->if_stack_depth++;
1038    if (p->if_stack_array_size <= p->if_stack_depth) {
1039       p->if_stack_array_size *= 2;
1040       p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1041 			     p->if_stack_array_size);
1042    }
1043 }
1044 
1045 static brw_inst *
pop_if_stack(struct brw_codegen * p)1046 pop_if_stack(struct brw_codegen *p)
1047 {
1048    p->if_stack_depth--;
1049    return &p->store[p->if_stack[p->if_stack_depth]];
1050 }
1051 
1052 static void
push_loop_stack(struct brw_codegen * p,brw_inst * inst)1053 push_loop_stack(struct brw_codegen *p, brw_inst *inst)
1054 {
1055    if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1056       p->loop_stack_array_size *= 2;
1057       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1058 			       p->loop_stack_array_size);
1059    }
1060 
1061    p->loop_stack[p->loop_stack_depth] = inst - p->store;
1062    p->loop_stack_depth++;
1063 }
1064 
1065 static brw_inst *
get_inner_do_insn(struct brw_codegen * p)1066 get_inner_do_insn(struct brw_codegen *p)
1067 {
1068    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1069 }
1070 
1071 /* EU takes the value from the flag register and pushes it onto some
1072  * sort of a stack (presumably merging with any flag value already on
1073  * the stack).  Within an if block, the flags at the top of the stack
1074  * control execution on each channel of the unit, eg. on each of the
1075  * 16 pixel values in our wm programs.
1076  *
1077  * When the matching 'else' instruction is reached (presumably by
1078  * countdown of the instruction count patched in by our ELSE/ENDIF
1079  * functions), the relevant flags are inverted.
1080  *
1081  * When the matching 'endif' instruction is reached, the flags are
1082  * popped off.  If the stack is now empty, normal execution resumes.
1083  */
1084 brw_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1085 brw_IF(struct brw_codegen *p, unsigned execute_size)
1086 {
1087    const struct intel_device_info *devinfo = p->devinfo;
1088    brw_inst *insn;
1089 
1090    insn = next_insn(p, BRW_OPCODE_IF);
1091 
1092    /* Override the defaults for this instruction:
1093     */
1094    brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
1095    if (devinfo->ver < 12)
1096       brw_set_src0(p, insn, brw_imm_d(0));
1097    brw_inst_set_jip(devinfo, insn, 0);
1098    brw_inst_set_uip(devinfo, insn, 0);
1099 
1100    brw_inst_set_exec_size(devinfo, insn, execute_size);
1101    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1102    brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1103    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1104 
1105    push_if_stack(p, insn);
1106    return insn;
1107 }
1108 
1109 /**
1110  * Patch IF and ELSE instructions with appropriate jump targets.
1111  */
1112 static void
patch_IF_ELSE(struct brw_codegen * p,brw_inst * if_inst,brw_inst * else_inst,brw_inst * endif_inst)1113 patch_IF_ELSE(struct brw_codegen *p,
1114               brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
1115 {
1116    const struct intel_device_info *devinfo = p->devinfo;
1117 
1118    assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1119    assert(endif_inst != NULL);
1120    assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1121 
1122    unsigned br = brw_jump_scale(devinfo);
1123 
1124    assert(brw_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1125    brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
1126 
1127    if (else_inst == NULL) {
1128       /* Patch IF -> ENDIF */
1129       brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1130       brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1131    } else {
1132       brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
1133 
1134       /* Patch ELSE -> ENDIF */
1135       /* The IF instruction's JIP should point just past the ELSE */
1136       brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1137       /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1138       brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1139 
1140       if (devinfo->ver < 11) {
1141          /* Set the ELSE instruction to use branch_ctrl with a join
1142           * jump target pointing at the NOP inserted right before
1143           * the ENDIF instruction in order to make sure it is
1144           * executed in all cases, since attempting to do the same
1145           * as on other generations could cause the EU to jump at
1146           * the instruction immediately after the ENDIF due to
1147           * Wa_220160235, which could cause the program to continue
1148           * running with all channels disabled.
1149           */
1150          brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1151          brw_inst_set_branch_control(devinfo, else_inst, true);
1152       } else {
1153          brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1154       }
1155 
1156       /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1157        * JIP and UIP both should point to ENDIF on those
1158        * platforms.
1159        */
1160       brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1161    }
1162 }
1163 
1164 void
brw_ELSE(struct brw_codegen * p)1165 brw_ELSE(struct brw_codegen *p)
1166 {
1167    const struct intel_device_info *devinfo = p->devinfo;
1168    brw_inst *insn;
1169 
1170    insn = next_insn(p, BRW_OPCODE_ELSE);
1171 
1172    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1173    if (devinfo->ver < 12)
1174       brw_set_src0(p, insn, brw_imm_d(0));
1175    brw_inst_set_jip(devinfo, insn, 0);
1176    brw_inst_set_uip(devinfo, insn, 0);
1177 
1178    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1179    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1180 
1181    push_if_stack(p, insn);
1182 }
1183 
1184 void
brw_ENDIF(struct brw_codegen * p)1185 brw_ENDIF(struct brw_codegen *p)
1186 {
1187    const struct intel_device_info *devinfo = p->devinfo;
1188    brw_inst *insn = NULL;
1189    brw_inst *else_inst = NULL;
1190    brw_inst *if_inst = NULL;
1191    brw_inst *tmp;
1192 
1193    assert(p->if_stack_depth > 0);
1194 
1195    if (devinfo->ver < 11 &&
1196        brw_inst_opcode(p->isa, &p->store[p->if_stack[
1197                              p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1198       /* Insert a NOP to be specified as join instruction within the
1199        * ELSE block, which is valid for an ELSE instruction with
1200        * branch_ctrl on.  The ELSE instruction will be set to jump
1201        * here instead of to the ENDIF instruction, since attempting to
1202        * do the latter would prevent the ENDIF from being executed in
1203        * some cases due to Wa_220160235, which could cause the program
1204        * to continue running with all channels disabled.
1205        */
1206       brw_NOP(p);
1207    }
1208 
1209    /*
1210     * A single next_insn() may change the base address of instruction store
1211     * memory(p->store), so call it first before referencing the instruction
1212     * store pointer from an index
1213     */
1214    insn = next_insn(p, BRW_OPCODE_ENDIF);
1215 
1216    /* Pop the IF and (optional) ELSE instructions from the stack */
1217    tmp = pop_if_stack(p);
1218    if (brw_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1219       else_inst = tmp;
1220       tmp = pop_if_stack(p);
1221    }
1222    if_inst = tmp;
1223 
1224    brw_set_src0(p, insn, brw_imm_d(0));
1225 
1226    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1227    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1228 
1229    brw_inst_set_jip(devinfo, insn, 2);
1230    patch_IF_ELSE(p, if_inst, else_inst, insn);
1231 }
1232 
1233 brw_inst *
brw_BREAK(struct brw_codegen * p)1234 brw_BREAK(struct brw_codegen *p)
1235 {
1236    const struct intel_device_info *devinfo = p->devinfo;
1237    brw_inst *insn;
1238 
1239    insn = next_insn(p, BRW_OPCODE_BREAK);
1240    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1241    brw_set_src0(p, insn, brw_imm_d(0x0));
1242    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1243    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1244 
1245    return insn;
1246 }
1247 
1248 brw_inst *
brw_CONT(struct brw_codegen * p)1249 brw_CONT(struct brw_codegen *p)
1250 {
1251    const struct intel_device_info *devinfo = p->devinfo;
1252    brw_inst *insn;
1253 
1254    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1255    brw_set_dest(p, insn, brw_ip_reg());
1256    brw_set_src0(p, insn, brw_imm_d(0x0));
1257 
1258    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1259    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1260    return insn;
1261 }
1262 
1263 brw_inst *
brw_HALT(struct brw_codegen * p)1264 brw_HALT(struct brw_codegen *p)
1265 {
1266    const struct intel_device_info *devinfo = p->devinfo;
1267    brw_inst *insn;
1268 
1269    insn = next_insn(p, BRW_OPCODE_HALT);
1270    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1271    if (devinfo->ver < 12) {
1272       brw_set_src0(p, insn, brw_imm_d(0x0));
1273    }
1274 
1275    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1276    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1277    return insn;
1278 }
1279 
1280 /* DO/WHILE loop:
1281  *
1282  * The DO/WHILE is just an unterminated loop -- break or continue are
1283  * used for control within the loop.  We have a few ways they can be
1284  * done.
1285  *
1286  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1287  * jip and no DO instruction.
1288  *
1289  * For gfx6, there's no more mask stack, so no need for DO.  WHILE
1290  * just points back to the first instruction of the loop.
1291  */
1292 brw_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1293 brw_DO(struct brw_codegen *p, unsigned execute_size)
1294 {
1295    push_loop_stack(p, &p->store[p->nr_insn]);
1296    return &p->store[p->nr_insn];
1297 }
1298 
1299 brw_inst *
brw_WHILE(struct brw_codegen * p)1300 brw_WHILE(struct brw_codegen *p)
1301 {
1302    const struct intel_device_info *devinfo = p->devinfo;
1303    brw_inst *insn, *do_insn;
1304    unsigned br = brw_jump_scale(devinfo);
1305 
1306    insn = next_insn(p, BRW_OPCODE_WHILE);
1307    do_insn = get_inner_do_insn(p);
1308 
1309    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1310    if (devinfo->ver < 12)
1311       brw_set_src0(p, insn, brw_imm_d(0));
1312    brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1313 
1314    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1315 
1316    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1317 
1318    p->loop_stack_depth--;
1319 
1320    return insn;
1321 }
1322 
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1323 void brw_CMP(struct brw_codegen *p,
1324 	     struct brw_reg dest,
1325 	     unsigned conditional,
1326 	     struct brw_reg src0,
1327 	     struct brw_reg src1)
1328 {
1329    const struct intel_device_info *devinfo = p->devinfo;
1330    brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1331 
1332    brw_inst_set_cond_modifier(devinfo, insn, conditional);
1333    brw_set_dest(p, insn, dest);
1334    brw_set_src0(p, insn, src0);
1335    brw_set_src1(p, insn, src1);
1336 }
1337 
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1338 void brw_CMPN(struct brw_codegen *p,
1339               struct brw_reg dest,
1340               unsigned conditional,
1341               struct brw_reg src0,
1342               struct brw_reg src1)
1343 {
1344    const struct intel_device_info *devinfo = p->devinfo;
1345    brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1346 
1347    brw_inst_set_cond_modifier(devinfo, insn, conditional);
1348    brw_set_dest(p, insn, dest);
1349    brw_set_src0(p, insn, src0);
1350    brw_set_src1(p, insn, src1);
1351 }
1352 
1353 /***********************************************************************
1354  * Helpers for the various SEND message types:
1355  */
1356 
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1357 void gfx6_math(struct brw_codegen *p,
1358 	       struct brw_reg dest,
1359 	       unsigned function,
1360 	       struct brw_reg src0,
1361 	       struct brw_reg src1)
1362 {
1363    const struct intel_device_info *devinfo = p->devinfo;
1364    brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1365 
1366    assert(dest.file == FIXED_GRF);
1367 
1368    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1369 
1370    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1371        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1372        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1373       assert(src0.type != BRW_TYPE_F);
1374       assert(src1.type != BRW_TYPE_F);
1375       assert(src1.file == FIXED_GRF ||
1376              src1.file == IMM);
1377       /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1378        *     INT DIV function does not support source modifiers.
1379        */
1380       assert(!src0.negate);
1381       assert(!src0.abs);
1382       assert(!src1.negate);
1383       assert(!src1.abs);
1384    } else {
1385       assert(src0.type == BRW_TYPE_F ||
1386              (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
1387       assert(src1.type == BRW_TYPE_F ||
1388              (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
1389    }
1390 
1391    brw_inst_set_math_function(devinfo, insn, function);
1392 
1393    brw_set_dest(p, insn, dest);
1394    brw_set_src0(p, insn, src0);
1395    brw_set_src1(p, insn, src1);
1396 }
1397 
1398 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,unsigned desc_imm,bool eot)1399 brw_send_indirect_message(struct brw_codegen *p,
1400                           unsigned sfid,
1401                           struct brw_reg dst,
1402                           struct brw_reg payload,
1403                           struct brw_reg desc,
1404                           unsigned desc_imm,
1405                           bool eot)
1406 {
1407    const struct intel_device_info *devinfo = p->devinfo;
1408    struct brw_inst *send;
1409 
1410    dst = retype(dst, BRW_TYPE_UW);
1411 
1412    assert(desc.type == BRW_TYPE_UD);
1413 
1414    if (desc.file == IMM) {
1415       send = next_insn(p, BRW_OPCODE_SEND);
1416       brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1417       brw_set_desc(p, send, desc.ud | desc_imm);
1418    } else {
1419       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1420       struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);
1421 
1422       brw_push_insn_state(p);
1423       brw_set_default_access_mode(p, BRW_ALIGN_1);
1424       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1425       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1426       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1427       brw_set_default_flag_reg(p, 0, 0);
1428       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1429 
1430       /* Load the indirect descriptor to an address register using OR so the
1431        * caller can specify additional descriptor bits with the desc_imm
1432        * immediate.
1433        */
1434       brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
1435 
1436       brw_pop_insn_state(p);
1437 
1438       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1439       send = next_insn(p, BRW_OPCODE_SEND);
1440       brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1441 
1442       if (devinfo->ver >= 12)
1443          brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
1444       else
1445          brw_set_src1(p, send, addr);
1446    }
1447 
1448    brw_set_dest(p, send, dst);
1449    brw_inst_set_sfid(devinfo, send, sfid);
1450    brw_inst_set_eot(devinfo, send, eot);
1451 }
1452 
1453 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,unsigned desc_imm,struct brw_reg ex_desc,unsigned ex_desc_imm,bool ex_desc_scratch,bool ex_bso,bool eot)1454 brw_send_indirect_split_message(struct brw_codegen *p,
1455                                 unsigned sfid,
1456                                 struct brw_reg dst,
1457                                 struct brw_reg payload0,
1458                                 struct brw_reg payload1,
1459                                 struct brw_reg desc,
1460                                 unsigned desc_imm,
1461                                 struct brw_reg ex_desc,
1462                                 unsigned ex_desc_imm,
1463                                 bool ex_desc_scratch,
1464                                 bool ex_bso,
1465                                 bool eot)
1466 {
1467    const struct intel_device_info *devinfo = p->devinfo;
1468    struct brw_inst *send;
1469 
1470    dst = retype(dst, BRW_TYPE_UW);
1471 
1472    assert(desc.type == BRW_TYPE_UD);
1473 
1474    if (desc.file == IMM) {
1475       desc.ud |= desc_imm;
1476    } else {
1477       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1478       struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);
1479 
1480       brw_push_insn_state(p);
1481       brw_set_default_access_mode(p, BRW_ALIGN_1);
1482       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1483       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1484       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1485       brw_set_default_flag_reg(p, 0, 0);
1486       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1487 
1488       /* Load the indirect descriptor to an address register using OR so the
1489        * caller can specify additional descriptor bits with the desc_imm
1490        * immediate.
1491        */
1492       brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
1493 
1494       brw_pop_insn_state(p);
1495       desc = addr;
1496 
1497       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1498    }
1499 
1500    if (ex_desc.file == IMM &&
1501        !ex_desc_scratch &&
1502        (devinfo->ver >= 12 ||
1503         ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
1504       /* ATS-M PRMs, Volume 2d: Command Reference: Structures,
1505        * EU_INSTRUCTION_SEND instruction
1506        *
1507        *    "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
1508        */
1509       assert(!ex_bso);
1510       ex_desc.ud |= ex_desc_imm;
1511    } else {
1512       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1513       struct brw_reg addr = retype(brw_address_reg(2), BRW_TYPE_UD);
1514 
1515       /* On Xe2+ ExBSO addressing is implicitly enabled for the UGM
1516        * shared function.
1517        */
1518       ex_bso |= (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM);
1519 
1520       brw_push_insn_state(p);
1521       brw_set_default_access_mode(p, BRW_ALIGN_1);
1522       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1523       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1524       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1525       brw_set_default_flag_reg(p, 0, 0);
1526       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1527 
1528       /* Load the indirect extended descriptor to an address register using OR
1529        * so the caller can specify additional descriptor bits with the
1530        * desc_imm immediate.
1531        *
1532        * Even though the instruction dispatcher always pulls the SFID and EOT
1533        * fields from the instruction itself, actual external unit which
1534        * processes the message gets the SFID and EOT from the extended
1535        * descriptor which comes from the address register.  If we don't OR
1536        * those two bits in, the external unit may get confused and hang.
1537        */
1538       unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);
1539 
1540       if (ex_desc_scratch) {
1541          assert(devinfo->verx10 >= 125);
1542          brw_AND(p, addr,
1543                  retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
1544                  brw_imm_ud(INTEL_MASK(31, 10)));
1545 
1546          if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
1547             const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
1548             assert(ex_desc_imm == brw_message_ex_desc(devinfo, ex_mlen));
1549             brw_SHR(p, addr, addr, brw_imm_ud(4));
1550          } else {
1551             /* Or the scratch surface offset together with the immediate part
1552              * of the extended descriptor.
1553              */
1554             brw_OR(p, addr, addr, brw_imm_ud(imm_part));
1555          }
1556 
1557       } else if (ex_desc.file == IMM) {
1558          /* ex_desc bits 15:12 don't exist in the instruction encoding prior
1559           * to Gfx12, so we may have fallen back to an indirect extended
1560           * descriptor.
1561           */
1562          brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
1563       } else {
1564          brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
1565       }
1566 
1567       brw_pop_insn_state(p);
1568       ex_desc = addr;
1569 
1570       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1571    }
1572 
1573    send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1574    brw_set_dest(p, send, dst);
1575    brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
1576    brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
1577 
1578    if (desc.file == IMM) {
1579       brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1580       brw_inst_set_send_desc(devinfo, send, desc.ud);
1581    } else {
1582       assert(desc.file == ARF);
1583       assert(desc.nr == BRW_ARF_ADDRESS);
1584       assert(desc.subnr == 0);
1585       brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1586    }
1587 
1588    if (ex_desc.file == IMM) {
1589       brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1590       brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
1591    } else {
1592       assert(ex_desc.file == ARF);
1593       assert(ex_desc.nr == BRW_ARF_ADDRESS);
1594       assert((ex_desc.subnr & 0x3) == 0);
1595       brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1596       brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1597 
1598       if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
1599          const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
1600          brw_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
1601       }
1602    }
1603 
1604    if (ex_bso) {
1605       /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1606        * it is assumed.
1607        *
1608        * BSpec 56890
1609        */
1610       if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1611          brw_inst_set_send_ex_bso(devinfo, send, true);
1612       brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
1613    }
1614    brw_inst_set_sfid(devinfo, send, sfid);
1615    brw_inst_set_eot(devinfo, send, eot);
1616 }
1617 
1618 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_inst * insn,int while_offset,int start_offset)1619 while_jumps_before_offset(const struct intel_device_info *devinfo,
1620                           brw_inst *insn, int while_offset, int start_offset)
1621 {
1622    int scale = 16 / brw_jump_scale(devinfo);
1623    int jip = brw_inst_jip(devinfo, insn);
1624    assert(jip < 0);
1625    return while_offset + jip * scale <= start_offset;
1626 }
1627 
1628 
1629 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1630 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1631 {
1632    int offset;
1633    void *store = p->store;
1634    const struct intel_device_info *devinfo = p->devinfo;
1635 
1636    int depth = 0;
1637 
1638    for (offset = next_offset(devinfo, store, start_offset);
1639         offset < p->next_insn_offset;
1640         offset = next_offset(devinfo, store, offset)) {
1641       brw_inst *insn = store + offset;
1642 
1643       switch (brw_inst_opcode(p->isa, insn)) {
1644       case BRW_OPCODE_IF:
1645          depth++;
1646          break;
1647       case BRW_OPCODE_ENDIF:
1648          if (depth == 0)
1649             return offset;
1650          depth--;
1651          break;
1652       case BRW_OPCODE_WHILE:
1653          /* If the while doesn't jump before our instruction, it's the end
1654           * of a sibling do...while loop.  Ignore it.
1655           */
1656          if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1657             continue;
1658          FALLTHROUGH;
1659       case BRW_OPCODE_ELSE:
1660       case BRW_OPCODE_HALT:
1661          if (depth == 0)
1662             return offset;
1663          break;
1664       default:
1665          break;
1666       }
1667    }
1668 
1669    return 0;
1670 }
1671 
1672 /* There is no DO instruction on gfx6, so to find the end of the loop
1673  * we have to see if the loop is jumping back before our start
1674  * instruction.
1675  */
1676 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1677 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1678 {
1679    const struct intel_device_info *devinfo = p->devinfo;
1680    int offset;
1681    void *store = p->store;
1682 
1683    /* Always start after the instruction (such as a WHILE) we're trying to fix
1684     * up.
1685     */
1686    for (offset = next_offset(devinfo, store, start_offset);
1687         offset < p->next_insn_offset;
1688         offset = next_offset(devinfo, store, offset)) {
1689       brw_inst *insn = store + offset;
1690 
1691       if (brw_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1692 	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1693 	    return offset;
1694       }
1695    }
1696    assert(!"not reached");
1697    return start_offset;
1698 }
1699 
1700 /* After program generation, go back and update the UIP and JIP of
1701  * BREAK, CONT, and HALT instructions to their correct locations.
1702  */
1703 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1704 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1705 {
1706    const struct intel_device_info *devinfo = p->devinfo;
1707    int offset;
1708    int br = brw_jump_scale(devinfo);
1709    int scale = 16 / br;
1710    void *store = p->store;
1711 
1712    for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1713       brw_inst *insn = store + offset;
1714       assert(brw_inst_cmpt_control(devinfo, insn) == 0);
1715 
1716       switch (brw_inst_opcode(p->isa, insn)) {
1717       case BRW_OPCODE_BREAK: {
1718          int block_end_offset = brw_find_next_block_end(p, offset);
1719          assert(block_end_offset != 0);
1720          brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1721 	 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1722          brw_inst_set_uip(devinfo, insn,
1723 	    (brw_find_loop_end(p, offset) - offset) / scale);
1724 	 break;
1725       }
1726 
1727       case BRW_OPCODE_CONTINUE: {
1728          int block_end_offset = brw_find_next_block_end(p, offset);
1729          assert(block_end_offset != 0);
1730          brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1731          brw_inst_set_uip(devinfo, insn,
1732             (brw_find_loop_end(p, offset) - offset) / scale);
1733 
1734          assert(brw_inst_uip(devinfo, insn) != 0);
1735          assert(brw_inst_jip(devinfo, insn) != 0);
1736 	 break;
1737       }
1738 
1739       case BRW_OPCODE_ENDIF: {
1740          int block_end_offset = brw_find_next_block_end(p, offset);
1741          int32_t jump = (block_end_offset == 0) ?
1742                         1 * br : (block_end_offset - offset) / scale;
1743          brw_inst_set_jip(devinfo, insn, jump);
1744 	 break;
1745       }
1746 
1747       case BRW_OPCODE_HALT: {
1748 	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1749 	  *
1750 	  *    "In case of the halt instruction not inside any conditional
1751 	  *     code block, the value of <JIP> and <UIP> should be the
1752 	  *     same. In case of the halt instruction inside conditional code
1753 	  *     block, the <UIP> should be the end of the program, and the
1754 	  *     <JIP> should be end of the most inner conditional code block."
1755 	  *
1756 	  * The uip will have already been set by whoever set up the
1757 	  * instruction.
1758 	  */
1759          int block_end_offset = brw_find_next_block_end(p, offset);
1760 	 if (block_end_offset == 0) {
1761             brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
1762 	 } else {
1763             brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1764 	 }
1765          assert(brw_inst_uip(devinfo, insn) != 0);
1766          assert(brw_inst_jip(devinfo, insn) != 0);
1767 	 break;
1768       }
1769 
1770       default:
1771          break;
1772       }
1773    }
1774 }
1775 
1776 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)1777 brw_set_memory_fence_message(struct brw_codegen *p,
1778                              struct brw_inst *insn,
1779                              enum brw_message_target sfid,
1780                              bool commit_enable,
1781                              unsigned bti)
1782 {
1783    const struct intel_device_info *devinfo = p->devinfo;
1784 
1785    brw_set_desc(p, insn, brw_message_desc(
1786                    devinfo, 1, (commit_enable ? 1 : 0), true));
1787 
1788    brw_inst_set_sfid(devinfo, insn, sfid);
1789 
1790    switch (sfid) {
1791    case GFX6_SFID_DATAPORT_RENDER_CACHE:
1792       brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
1793       break;
1794    case GFX7_SFID_DATAPORT_DATA_CACHE:
1795       brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
1796       break;
1797    default:
1798       unreachable("Not reached");
1799    }
1800 
1801    if (commit_enable)
1802       brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
1803 
1804    assert(devinfo->ver >= 11 || bti == 0);
1805    brw_inst_set_binding_table_index(devinfo, insn, bti);
1806 }
1807 
1808 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_inst * insn,enum brw_message_target sfid,uint32_t desc)1809 gfx12_set_memory_fence_message(struct brw_codegen *p,
1810                                struct brw_inst *insn,
1811                                enum brw_message_target sfid,
1812                                uint32_t desc)
1813 {
1814    const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
1815     /* Completion signaled by write to register. No data returned. */
1816    const unsigned rlen = 1 * reg_unit(p->devinfo);
1817 
1818    brw_inst_set_sfid(p->devinfo, insn, sfid);
1819 
1820    /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see
1821     * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore
1822     * the descriptor value and rebuild a legacy URB fence descriptor.
1823     */
1824    if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
1825       brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
1826                             brw_message_desc(p->devinfo, mlen, rlen, true));
1827    } else {
1828       enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
1829       enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
1830 
1831       if (sfid == GFX12_SFID_TGM) {
1832          scope = LSC_FENCE_TILE;
1833          flush_type = LSC_FLUSH_TYPE_EVICT;
1834       }
1835 
1836       /* Wa_14012437816:
1837        *
1838        *   "For any fence greater than local scope, always set flush type to
1839        *    at least invalidate so that fence goes on properly."
1840        *
1841        *   "The bug is if flush_type is 'None', the scope is always downgraded
1842        *    to 'local'."
1843        *
1844        * Here set scope to NONE_6 instead of NONE, which has the same effect
1845        * as NONE but avoids the downgrade to scope LOCAL.
1846        */
1847       if (intel_needs_workaround(p->devinfo, 14012437816) &&
1848           scope > LSC_FENCE_LOCAL &&
1849           flush_type == LSC_FLUSH_TYPE_NONE) {
1850          flush_type = LSC_FLUSH_TYPE_NONE_6;
1851       }
1852 
1853       brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
1854                                                flush_type, false) |
1855                             brw_message_desc(p->devinfo, mlen, rlen, false));
1856    }
1857 }
1858 
1859 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)1860 brw_memory_fence(struct brw_codegen *p,
1861                  struct brw_reg dst,
1862                  struct brw_reg src,
1863                  enum opcode send_op,
1864                  enum brw_message_target sfid,
1865                  uint32_t desc,
1866                  bool commit_enable,
1867                  unsigned bti)
1868 {
1869    const struct intel_device_info *devinfo = p->devinfo;
1870 
1871    dst = retype(vec1(dst), BRW_TYPE_UW);
1872    src = retype(vec1(src), BRW_TYPE_UD);
1873 
1874    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
1875     * message doesn't write anything back.
1876     */
1877    struct brw_inst *insn = next_insn(p, send_op);
1878    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1879    brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1880    brw_set_dest(p, insn, dst);
1881    brw_set_src0(p, insn, src);
1882 
1883    /* All DG2 hardware requires LSC for fence messages, even A-step */
1884    if (devinfo->has_lsc)
1885       gfx12_set_memory_fence_message(p, insn, sfid, desc);
1886    else
1887       brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
1888 }
1889 
1890 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)1891 brw_broadcast(struct brw_codegen *p,
1892               struct brw_reg dst,
1893               struct brw_reg src,
1894               struct brw_reg idx)
1895 {
1896    const struct intel_device_info *devinfo = p->devinfo;
1897    assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
1898 
1899    brw_push_insn_state(p);
1900    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1901    brw_set_default_exec_size(p, BRW_EXECUTE_1);
1902 
1903    assert(src.file == FIXED_GRF &&
1904           src.address_mode == BRW_ADDRESS_DIRECT);
1905    assert(!src.abs && !src.negate);
1906 
1907    /* Gen12.5 adds the following region restriction:
1908     *
1909     *    "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
1910     *    and Quad-Word data must not be used."
1911     *
1912     * We require the source and destination types to match so stomp to an
1913     * unsigned integer type.
1914     */
1915    assert(src.type == dst.type);
1916    src.type = dst.type =
1917       brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
1918 
1919    if ((src.vstride == 0 && src.hstride == 0) ||
1920        idx.file == IMM) {
1921       /* Trivial, the source is already uniform or the index is a constant.
1922        * We will typically not get here if the optimizer is doing its job, but
1923        * asserting would be mean.
1924        */
1925       const unsigned i = idx.file == IMM ? idx.ud : 0;
1926       src = stride(suboffset(src, i), 0, 1, 0);
1927 
1928       if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
1929          brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1930                     subscript(src, BRW_TYPE_D, 0));
1931          brw_set_default_swsb(p, tgl_swsb_null());
1932          brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1933                     subscript(src, BRW_TYPE_D, 1));
1934       } else {
1935          brw_MOV(p, dst, src);
1936       }
1937    } else {
1938       /* From the Haswell PRM section "Register Region Restrictions":
1939        *
1940        *    "The lower bits of the AddressImmediate must not overflow to
1941        *    change the register address.  The lower 5 bits of Address
1942        *    Immediate when added to lower 5 bits of address register gives
1943        *    the sub-register offset. The upper bits of Address Immediate
1944        *    when added to upper bits of address register gives the register
1945        *    address. Any overflow from sub-register offset is dropped."
1946        *
1947        * Fortunately, for broadcast, we never have a sub-register offset so
1948        * this isn't an issue.
1949        */
1950       assert(src.subnr == 0);
1951 
1952       const struct brw_reg addr =
1953          retype(brw_address_reg(0), BRW_TYPE_UD);
1954       unsigned offset = src.nr * REG_SIZE + src.subnr;
1955       /* Limit in bytes of the signed indirect addressing immediate. */
1956       const unsigned limit = 512;
1957 
1958       brw_push_insn_state(p);
1959       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1960       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1961       brw_set_default_flag_reg(p, 0, 0);
1962 
1963       /* Take into account the component size and horizontal stride. */
1964       assert(src.vstride == src.hstride + src.width);
1965       brw_SHL(p, addr, vec1(idx),
1966               brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
1967                          src.hstride - 1));
1968 
1969       /* We can only address up to limit bytes using the indirect
1970        * addressing immediate, account for the difference if the source
1971        * register is above this limit.
1972        */
1973       if (offset >= limit) {
1974          brw_set_default_swsb(p, tgl_swsb_regdist(1));
1975          brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
1976          offset = offset % limit;
1977       }
1978 
1979       brw_pop_insn_state(p);
1980 
1981       brw_set_default_swsb(p, tgl_swsb_regdist(1));
1982 
1983       /* Use indirect addressing to fetch the specified component. */
1984       if (brw_type_size_bytes(src.type) > 4 &&
1985           (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
1986          /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
1987           *
1988           *   "When source or destination datatype is 64b or operation is
1989           *    integer DWord multiply, indirect addressing must not be
1990           *    used."
1991           *
1992           * We may also not support Q/UQ types.
1993           *
1994           * To work around both of these, we do two integer MOVs instead
1995           * of one 64-bit MOV.  Because no double value should ever cross
1996           * a register boundary, it's safe to use the immediate offset in
1997           * the indirect here to handle adding 4 bytes to the offset and
1998           * avoid the extra ADD to the register file.
1999           */
2000          brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
2001                     retype(brw_vec1_indirect(addr.subnr, offset),
2002                            BRW_TYPE_D));
2003          brw_set_default_swsb(p, tgl_swsb_null());
2004          brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
2005                     retype(brw_vec1_indirect(addr.subnr, offset + 4),
2006                            BRW_TYPE_D));
2007       } else {
2008          brw_MOV(p, dst,
2009                  retype(brw_vec1_indirect(addr.subnr, offset), src.type));
2010       }
2011    }
2012 
2013    brw_pop_insn_state(p);
2014 }
2015 
2016 
2017 /**
2018  * Emit the SEND message for a barrier
2019  */
2020 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)2021 brw_barrier(struct brw_codegen *p, struct brw_reg src)
2022 {
2023    const struct intel_device_info *devinfo = p->devinfo;
2024    struct brw_inst *inst;
2025 
2026    brw_push_insn_state(p);
2027    brw_set_default_access_mode(p, BRW_ALIGN_1);
2028    inst = next_insn(p, BRW_OPCODE_SEND);
2029    brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
2030    brw_set_src0(p, inst, src);
2031    brw_set_src1(p, inst, brw_null_reg());
2032    brw_set_desc(p, inst, brw_message_desc(devinfo,
2033                                           1 * reg_unit(devinfo), 0, false));
2034 
2035    brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
2036    brw_inst_set_gateway_subfuncid(devinfo, inst,
2037                                   BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
2038 
2039    brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
2040    brw_pop_insn_state(p);
2041 }
2042 
2043 
2044 /**
2045  * Emit the wait instruction for a barrier
2046  */
2047 void
brw_WAIT(struct brw_codegen * p)2048 brw_WAIT(struct brw_codegen *p)
2049 {
2050    const struct intel_device_info *devinfo = p->devinfo;
2051    struct brw_inst *insn;
2052 
2053    struct brw_reg src = brw_notification_reg();
2054 
2055    insn = next_insn(p, BRW_OPCODE_WAIT);
2056    brw_set_dest(p, insn, src);
2057    brw_set_src0(p, insn, src);
2058    brw_set_src1(p, insn, brw_null_reg());
2059 
2060    brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
2061    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
2062 }
2063 
2064 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)2065 brw_float_controls_mode(struct brw_codegen *p,
2066                         unsigned mode, unsigned mask)
2067 {
2068    assert(p->current->mask_control == BRW_MASK_DISABLE);
2069 
2070    /* From the Skylake PRM, Volume 7, page 760:
2071     *  "Implementation Restriction on Register Access: When the control
2072     *   register is used as an explicit source and/or destination, hardware
2073     *   does not ensure execution pipeline coherency. Software must set the
2074     *   thread control field to ‘switch’ for an instruction that uses
2075     *   control register as an explicit operand."
2076     *
2077     * On Gfx12+ this is implemented in terms of SWSB annotations instead.
2078     */
2079    brw_set_default_swsb(p, tgl_swsb_regdist(1));
2080 
2081    brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
2082                             brw_imm_ud(~mask));
2083    brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
2084    if (p->devinfo->ver < 12)
2085       brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
2086 
2087    if (mode) {
2088       brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2089                                  brw_imm_ud(mode));
2090       brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2091       if (p->devinfo->ver < 12)
2092          brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2093    }
2094 
2095    if (p->devinfo->ver >= 12)
2096       brw_SYNC(p, TGL_SYNC_NOP);
2097 }
2098 
2099 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_inst * inst,uint32_t value)2100 brw_update_reloc_imm(const struct brw_isa_info *isa,
2101                      brw_inst *inst,
2102                      uint32_t value)
2103 {
2104    const struct intel_device_info *devinfo = isa->devinfo;
2105 
2106    /* Sanity check that the instruction is a MOV of an immediate */
2107    assert(brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2108    assert(brw_inst_src0_reg_file(devinfo, inst) == IMM);
2109 
2110    /* If it was compacted, we can't safely rewrite */
2111    assert(brw_inst_cmpt_control(devinfo, inst) == 0);
2112 
2113    brw_inst_set_imm_ud(devinfo, inst, value);
2114 }
2115 
2116 /* A default value for constants that will be patched at run-time.
2117  * We pick an arbitrary value that prevents instruction compaction.
2118  */
2119 #define DEFAULT_PATCH_IMM 0x4a7cc037
2120 
2121 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id,uint32_t base)2122 brw_MOV_reloc_imm(struct brw_codegen *p,
2123                   struct brw_reg dst,
2124                   enum brw_reg_type src_type,
2125                   uint32_t id,
2126                   uint32_t base)
2127 {
2128    assert(brw_type_size_bytes(src_type) == 4);
2129    assert(brw_type_size_bytes(dst.type) == 4);
2130 
2131    brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2132                  p->next_insn_offset, base);
2133 
2134    brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2135 }
2136