1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35
36 #include "util/ralloc.h"
37
38 void
brw_set_dest(struct brw_codegen * p,brw_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
40 {
41 const struct intel_device_info *devinfo = p->devinfo;
42
43 if (dest.file == FIXED_GRF)
44 assert(dest.nr < XE2_MAX_GRF);
45
46 /* The hardware has a restriction where a destination of size Byte with
47 * a stride of 1 is only allowed for a packed byte MOV. For any other
48 * instruction, the stride must be at least 2, even when the destination
49 * is the NULL register.
50 */
51 if (dest.file == ARF &&
52 dest.nr == BRW_ARF_NULL &&
53 brw_type_size_bytes(dest.type) == 1 &&
54 dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55 dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56 }
57
58 if (devinfo->ver >= 12 &&
59 (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61 assert(dest.file == FIXED_GRF ||
62 dest.file == ARF);
63 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
64 assert(dest.subnr == 0);
65 assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
66 (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
67 dest.vstride == dest.width + 1));
68 assert(!dest.negate && !dest.abs);
69 brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
70 brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
71
72 } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
73 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
74 assert(devinfo->ver < 12);
75 assert(dest.file == FIXED_GRF ||
76 dest.file == ARF);
77 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
78 assert(dest.subnr % 16 == 0);
79 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
80 dest.vstride == dest.width + 1);
81 assert(!dest.negate && !dest.abs);
82 brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
83 brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
84 brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
85 } else {
86 brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
87 brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
88
89 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
90 brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
91
92 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
93 brw_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
94 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
95 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
96 brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
97 } else {
98 brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
99 brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
100 if (dest.file == FIXED_GRF) {
101 assert(dest.writemask != 0);
102 }
103 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
104 * Although Dst.HorzStride is a don't care for Align16, HW needs
105 * this to be programmed as "01".
106 */
107 brw_inst_set_dst_hstride(devinfo, inst, 1);
108 }
109 } else {
110 brw_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
111
112 /* These are different sizes in align1 vs align16:
113 */
114 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
115 brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
116 dest.indirect_offset);
117 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
118 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
119 brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
120 } else {
121 brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
122 dest.indirect_offset);
123 /* even ignored in da16, still need to set as '01' */
124 brw_inst_set_dst_hstride(devinfo, inst, 1);
125 }
126 }
127 }
128 }
129
130 void
brw_set_src0(struct brw_codegen * p,brw_inst * inst,struct brw_reg reg)131 brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
132 {
133 const struct intel_device_info *devinfo = p->devinfo;
134
135 if (reg.file == FIXED_GRF)
136 assert(reg.nr < XE2_MAX_GRF);
137
138 if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
139 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
140 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
141 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
142 /* Any source modifiers or regions will be ignored, since this just
143 * identifies the GRF to start reading the message contents from.
144 * Check for some likely failures.
145 */
146 assert(!reg.negate);
147 assert(!reg.abs);
148 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
149 }
150
151 if (devinfo->ver >= 12 &&
152 (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
153 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
154 assert(reg.file != IMM);
155 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
156 assert(reg.subnr == 0);
157 assert(has_scalar_region(reg) ||
158 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
159 reg.vstride == reg.width + 1));
160 assert(!reg.negate && !reg.abs);
161 brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
162 brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
163
164 } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
165 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
166 assert(reg.file == FIXED_GRF);
167 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
168 assert(reg.subnr % 16 == 0);
169 assert(has_scalar_region(reg) ||
170 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
171 reg.vstride == reg.width + 1));
172 assert(!reg.negate && !reg.abs);
173 brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
174 brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
175 } else {
176 brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
177 brw_inst_set_src0_abs(devinfo, inst, reg.abs);
178 brw_inst_set_src0_negate(devinfo, inst, reg.negate);
179 brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
180
181 if (reg.file == IMM) {
182 if (reg.type == BRW_TYPE_DF)
183 brw_inst_set_imm_df(devinfo, inst, reg.df);
184 else if (reg.type == BRW_TYPE_UQ ||
185 reg.type == BRW_TYPE_Q)
186 brw_inst_set_imm_uq(devinfo, inst, reg.u64);
187 else
188 brw_inst_set_imm_ud(devinfo, inst, reg.ud);
189
190 if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
191 brw_inst_set_src1_reg_file(devinfo, inst,
192 ARF);
193 brw_inst_set_src1_reg_hw_type(devinfo, inst,
194 brw_inst_src0_reg_hw_type(devinfo, inst));
195 }
196 } else {
197 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
198 brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
199 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
200 brw_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
201 } else {
202 brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
203 }
204 } else {
205 brw_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
206
207 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208 brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
209 } else {
210 brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
211 }
212 }
213
214 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
215 if (reg.width == BRW_WIDTH_1 &&
216 brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
217 brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
218 brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
219 brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
220 } else {
221 brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
222 brw_inst_set_src0_width(devinfo, inst, reg.width);
223 brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
224 }
225 } else {
226 brw_inst_set_src0_da16_swiz_x(devinfo, inst,
227 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
228 brw_inst_set_src0_da16_swiz_y(devinfo, inst,
229 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
230 brw_inst_set_src0_da16_swiz_z(devinfo, inst,
231 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
232 brw_inst_set_src0_da16_swiz_w(devinfo, inst,
233 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
234
235 if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
236 /* This is an oddity of the fact we're using the same
237 * descriptions for registers in align_16 as align_1:
238 */
239 brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
240 } else {
241 brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
242 }
243 }
244 }
245 }
246 }
247
248
249 void
brw_set_src1(struct brw_codegen * p,brw_inst * inst,struct brw_reg reg)250 brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
251 {
252 const struct intel_device_info *devinfo = p->devinfo;
253
254 if (reg.file == FIXED_GRF)
255 assert(reg.nr < XE2_MAX_GRF);
256
257 if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
258 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
259 (devinfo->ver >= 12 &&
260 (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
261 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
262 assert(reg.file == FIXED_GRF ||
263 reg.file == ARF);
264 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
265 assert(reg.subnr == 0);
266 assert(has_scalar_region(reg) ||
267 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
268 reg.vstride == reg.width + 1));
269 assert(!reg.negate && !reg.abs);
270 brw_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
271 brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
272 } else {
273 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
274 *
275 * "Accumulator registers may be accessed explicitly as src0
276 * operands only."
277 */
278 assert(reg.file != ARF ||
279 (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
280
281 brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
282 brw_inst_set_src1_abs(devinfo, inst, reg.abs);
283 brw_inst_set_src1_negate(devinfo, inst, reg.negate);
284
285 /* Only src1 can be immediate in two-argument instructions.
286 */
287 assert(brw_inst_src0_reg_file(devinfo, inst) != IMM);
288
289 if (reg.file == IMM) {
290 /* two-argument instructions can only use 32-bit immediates */
291 assert(brw_type_size_bytes(reg.type) < 8);
292 brw_inst_set_imm_ud(devinfo, inst, reg.ud);
293 } else {
294 /* This is a hardware restriction, which may or may not be lifted
295 * in the future:
296 */
297 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
298 /* assert (reg.file == FIXED_GRF); */
299
300 brw_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
301 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
302 brw_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
303 } else {
304 brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
305 }
306
307 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
308 if (reg.width == BRW_WIDTH_1 &&
309 brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
310 brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
311 brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
312 brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
313 } else {
314 brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
315 brw_inst_set_src1_width(devinfo, inst, reg.width);
316 brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
317 }
318 } else {
319 brw_inst_set_src1_da16_swiz_x(devinfo, inst,
320 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
321 brw_inst_set_src1_da16_swiz_y(devinfo, inst,
322 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
323 brw_inst_set_src1_da16_swiz_z(devinfo, inst,
324 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
325 brw_inst_set_src1_da16_swiz_w(devinfo, inst,
326 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
327
328 if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
329 /* This is an oddity of the fact we're using the same
330 * descriptions for registers in align_16 as align_1:
331 */
332 brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
333 } else {
334 brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
335 }
336 }
337 }
338 }
339 }
340
341 /**
342 * Specify the descriptor and extended descriptor immediate for a SEND(C)
343 * message instruction.
344 */
345 void
brw_set_desc_ex(struct brw_codegen * p,brw_inst * inst,unsigned desc,unsigned ex_desc)346 brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
347 unsigned desc, unsigned ex_desc)
348 {
349 const struct intel_device_info *devinfo = p->devinfo;
350 assert(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
351 brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
352 if (devinfo->ver < 12)
353 brw_inst_set_src1_file_type(devinfo, inst,
354 IMM, BRW_TYPE_UD);
355 brw_inst_set_send_desc(devinfo, inst, desc);
356 if (devinfo->ver >= 9)
357 brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
358 }
359
360 static void
brw_inst_set_state(const struct brw_isa_info * isa,brw_inst * insn,const struct brw_insn_state * state)361 brw_inst_set_state(const struct brw_isa_info *isa,
362 brw_inst *insn,
363 const struct brw_insn_state *state)
364 {
365 const struct intel_device_info *devinfo = isa->devinfo;
366
367 brw_inst_set_exec_size(devinfo, insn, state->exec_size);
368 brw_inst_set_group(devinfo, insn, state->group);
369 brw_inst_set_access_mode(devinfo, insn, state->access_mode);
370 brw_inst_set_mask_control(devinfo, insn, state->mask_control);
371 if (devinfo->ver >= 12)
372 brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_inst_opcode(isa, insn)));
373 brw_inst_set_saturate(devinfo, insn, state->saturate);
374 brw_inst_set_pred_control(devinfo, insn, state->predicate);
375 brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
376
377 if (is_3src(isa, brw_inst_opcode(isa, insn)) &&
378 state->access_mode == BRW_ALIGN_16) {
379 brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
380 brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
381 } else {
382 brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
383 brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
384 }
385
386 if (devinfo->ver < 20)
387 brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
388 }
389
390 static brw_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)391 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
392 {
393 assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
394 assert(util_is_power_of_two_or_zero(alignment));
395 const unsigned align_insn = MAX2(alignment / sizeof(brw_inst), 1);
396 const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
397 const unsigned new_nr_insn = start_insn + nr_insn;
398
399 if (p->store_size < new_nr_insn) {
400 p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
401 p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
402 }
403
404 /* Memset any padding due to alignment to 0. We don't want to be hashing
405 * or caching a bunch of random bits we got from a memory allocation.
406 */
407 if (p->nr_insn < start_insn) {
408 memset(&p->store[p->nr_insn], 0,
409 (start_insn - p->nr_insn) * sizeof(brw_inst));
410 }
411
412 assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
413 p->nr_insn = new_nr_insn;
414 p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
415
416 return &p->store[start_insn];
417 }
418
419 void
brw_realign(struct brw_codegen * p,unsigned alignment)420 brw_realign(struct brw_codegen *p, unsigned alignment)
421 {
422 brw_append_insns(p, 0, alignment);
423 }
424
425 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)426 brw_append_data(struct brw_codegen *p, void *data,
427 unsigned size, unsigned alignment)
428 {
429 unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
430 void *dst = brw_append_insns(p, nr_insn, alignment);
431 memcpy(dst, data, size);
432
433 /* If it's not a whole number of instructions, memset the end */
434 if (size < nr_insn * sizeof(brw_inst))
435 memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
436
437 return dst - (void *)p->store;
438 }
439
440 #define next_insn brw_next_insn
441 brw_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)442 brw_next_insn(struct brw_codegen *p, unsigned opcode)
443 {
444 brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
445
446 memset(insn, 0, sizeof(*insn));
447 brw_inst_set_opcode(p->isa, insn, opcode);
448
449 /* Apply the default instruction state */
450 brw_inst_set_state(p->isa, insn, p->current);
451
452 return insn;
453 }
454
455 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)456 brw_add_reloc(struct brw_codegen *p, uint32_t id,
457 enum brw_shader_reloc_type type,
458 uint32_t offset, uint32_t delta)
459 {
460 if (p->num_relocs + 1 > p->reloc_array_size) {
461 p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
462 p->relocs = reralloc(p->mem_ctx, p->relocs,
463 struct brw_shader_reloc, p->reloc_array_size);
464 }
465
466 p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
467 .id = id,
468 .type = type,
469 .offset = offset,
470 .delta = delta,
471 };
472 }
473
474 static brw_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)475 brw_alu1(struct brw_codegen *p, unsigned opcode,
476 struct brw_reg dest, struct brw_reg src)
477 {
478 brw_inst *insn = next_insn(p, opcode);
479 brw_set_dest(p, insn, dest);
480 brw_set_src0(p, insn, src);
481 return insn;
482 }
483
484 static brw_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)485 brw_alu2(struct brw_codegen *p, unsigned opcode,
486 struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
487 {
488 /* 64-bit immediates are only supported on 1-src instructions */
489 assert(src0.file != IMM ||
490 brw_type_size_bytes(src0.type) <= 4);
491 assert(src1.file != IMM ||
492 brw_type_size_bytes(src1.type) <= 4);
493
494 brw_inst *insn = next_insn(p, opcode);
495 brw_set_dest(p, insn, dest);
496 brw_set_src0(p, insn, src0);
497 brw_set_src1(p, insn, src1);
498 return insn;
499 }
500
501 static int
get_3src_subreg_nr(struct brw_reg reg)502 get_3src_subreg_nr(struct brw_reg reg)
503 {
504 /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions
505 * use 32-bit units (components 0..7). Since they only support F/D/UD
506 * types, this doesn't lose any flexibility, but uses fewer bits.
507 */
508 return reg.subnr / 4;
509 }
510
511 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)512 to_3src_align1_vstride(const struct intel_device_info *devinfo,
513 enum brw_vertical_stride vstride)
514 {
515 switch (vstride) {
516 case BRW_VERTICAL_STRIDE_0:
517 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
518 case BRW_VERTICAL_STRIDE_1:
519 assert(devinfo->ver >= 12);
520 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
521 case BRW_VERTICAL_STRIDE_2:
522 assert(devinfo->ver < 12);
523 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
524 case BRW_VERTICAL_STRIDE_4:
525 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
526 case BRW_VERTICAL_STRIDE_8:
527 case BRW_VERTICAL_STRIDE_16:
528 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
529 default:
530 unreachable("invalid vstride");
531 }
532 }
533
534
535 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)536 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
537 {
538 switch (hstride) {
539 case BRW_HORIZONTAL_STRIDE_0:
540 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
541 case BRW_HORIZONTAL_STRIDE_1:
542 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
543 case BRW_HORIZONTAL_STRIDE_2:
544 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
545 case BRW_HORIZONTAL_STRIDE_4:
546 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
547 default:
548 unreachable("invalid hstride");
549 }
550 }
551
552 static brw_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)553 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
554 struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
555 {
556 const struct intel_device_info *devinfo = p->devinfo;
557 brw_inst *inst = next_insn(p, opcode);
558
559 assert(dest.nr < XE2_MAX_GRF);
560
561 if (devinfo->ver >= 10)
562 assert(!(src0.file == IMM &&
563 src2.file == IMM));
564
565 assert(src0.file == IMM || src0.nr < XE2_MAX_GRF);
566 assert(src1.file != IMM && src1.nr < XE2_MAX_GRF);
567 assert(src2.file == IMM || src2.nr < XE2_MAX_GRF);
568 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
569 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
570 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
571 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
572
573 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
574 assert(dest.file == FIXED_GRF ||
575 (dest.file == ARF &&
576 (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
577
578 brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
579 brw_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
580 brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
581 brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
582
583 if (brw_type_is_float(dest.type)) {
584 brw_inst_set_3src_a1_exec_type(devinfo, inst,
585 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
586 } else {
587 brw_inst_set_3src_a1_exec_type(devinfo, inst,
588 BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
589 }
590
591 brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
592 brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
593 brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
594 brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
595
596 if (src0.file == IMM) {
597 brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
598 } else {
599 brw_inst_set_3src_a1_src0_vstride(
600 devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
601 brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
602 to_3src_align1_hstride(src0.hstride));
603 brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
604 brw_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
605 brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
606 brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
607 }
608 brw_inst_set_3src_a1_src1_vstride(
609 devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
610 brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
611 to_3src_align1_hstride(src1.hstride));
612
613 brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
614 if (src1.file == ARF) {
615 brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
616 } else {
617 brw_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
618 }
619 brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
620 brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
621
622 if (src2.file == IMM) {
623 brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
624 } else {
625 brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
626 to_3src_align1_hstride(src2.hstride));
627 /* no vstride on src2 */
628 brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
629 brw_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
630 brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
631 brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
632 }
633
634 assert(src0.file == FIXED_GRF ||
635 src0.file == IMM);
636 assert(src1.file == FIXED_GRF ||
637 (src1.file == ARF &&
638 src1.nr == BRW_ARF_ACCUMULATOR));
639 assert(src2.file == FIXED_GRF ||
640 src2.file == IMM);
641
642 if (devinfo->ver >= 12) {
643 if (src0.file == IMM) {
644 brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
645 } else {
646 brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
647 }
648
649 brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
650
651 if (src2.file == IMM) {
652 brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
653 } else {
654 brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
655 }
656 } else {
657 brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
658 brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
659 brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
660 }
661
662 } else {
663 assert(dest.file == FIXED_GRF);
664 assert(dest.type == BRW_TYPE_F ||
665 dest.type == BRW_TYPE_DF ||
666 dest.type == BRW_TYPE_D ||
667 dest.type == BRW_TYPE_UD ||
668 dest.type == BRW_TYPE_HF);
669 brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
670 brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
671 brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
672
673 assert(src0.file == FIXED_GRF);
674 brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
675 brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
676 brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
677 brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
678 brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
679 brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
680 src0.vstride == BRW_VERTICAL_STRIDE_0);
681
682 assert(src1.file == FIXED_GRF);
683 brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
684 brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
685 brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
686 brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
687 brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
688 brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
689 src1.vstride == BRW_VERTICAL_STRIDE_0);
690
691 assert(src2.file == FIXED_GRF);
692 brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
693 brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
694 brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
695 brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
696 brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
697 brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
698 src2.vstride == BRW_VERTICAL_STRIDE_0);
699
700 /* Set both the source and destination types based on dest.type,
701 * ignoring the source register types. The MAD and LRP emitters ensure
702 * that all four types are float. The BFE and BFI2 emitters, however,
703 * may send us mixed D and UD types and want us to ignore that and use
704 * the destination type.
705 */
706 brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
707 brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
708
709 /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
710 *
711 * "Three source instructions can use operands with mixed-mode
712 * precision. When SrcType field is set to :f or :hf it defines
713 * precision for source 0 only, and fields Src1Type and Src2Type
714 * define precision for other source operands:
715 *
716 * 0b = :f. Single precision Float (32-bit).
717 * 1b = :hf. Half precision Float (16-bit)."
718 */
719 if (src1.type == BRW_TYPE_HF)
720 brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
721
722 if (src2.type == BRW_TYPE_HF)
723 brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
724 }
725
726 return inst;
727 }
728
729 static brw_inst *
brw_dpas_three_src(struct brw_codegen * p,enum opcode opcode,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)730 brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
731 enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
732 struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
733 {
734 const struct intel_device_info *devinfo = p->devinfo;
735 brw_inst *inst = next_insn(p, opcode);
736
737 assert(dest.file == FIXED_GRF);
738 brw_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
739 FIXED_GRF);
740 brw_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
741 brw_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
742
743 if (brw_type_is_float(dest.type)) {
744 brw_inst_set_dpas_3src_exec_type(devinfo, inst,
745 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
746 } else {
747 brw_inst_set_dpas_3src_exec_type(devinfo, inst,
748 BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
749 }
750
751 brw_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
752 brw_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
753
754 brw_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
755 brw_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
756 brw_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
757 brw_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
758
759 assert(src0.file == FIXED_GRF ||
760 (src0.file == ARF &&
761 src0.nr == BRW_ARF_NULL));
762
763 brw_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file);
764 brw_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
765 brw_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
766
767 assert(src1.file == FIXED_GRF);
768
769 brw_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file);
770 brw_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
771 brw_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
772 brw_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
773
774 assert(src2.file == FIXED_GRF);
775
776 brw_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file);
777 brw_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
778 brw_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
779 brw_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
780
781 return inst;
782 }
783
784 /***********************************************************************
785 * Convenience routines.
786 */
787 #define ALU1(OP) \
788 brw_inst *brw_##OP(struct brw_codegen *p, \
789 struct brw_reg dest, \
790 struct brw_reg src0) \
791 { \
792 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
793 }
794
795 #define ALU2(OP) \
796 brw_inst *brw_##OP(struct brw_codegen *p, \
797 struct brw_reg dest, \
798 struct brw_reg src0, \
799 struct brw_reg src1) \
800 { \
801 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
802 }
803
804 #define ALU3(OP) \
805 brw_inst *brw_##OP(struct brw_codegen *p, \
806 struct brw_reg dest, \
807 struct brw_reg src0, \
808 struct brw_reg src1, \
809 struct brw_reg src2) \
810 { \
811 if (p->current->access_mode == BRW_ALIGN_16) { \
812 if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
813 src0.swizzle = BRW_SWIZZLE_XXXX; \
814 if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
815 src1.swizzle = BRW_SWIZZLE_XXXX; \
816 if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
817 src2.swizzle = BRW_SWIZZLE_XXXX; \
818 } \
819 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
820 }
821
822 #define ALU3F(OP) \
823 brw_inst *brw_##OP(struct brw_codegen *p, \
824 struct brw_reg dest, \
825 struct brw_reg src0, \
826 struct brw_reg src1, \
827 struct brw_reg src2) \
828 { \
829 assert(dest.type == BRW_TYPE_F || \
830 dest.type == BRW_TYPE_DF); \
831 if (dest.type == BRW_TYPE_F) { \
832 assert(src0.type == BRW_TYPE_F); \
833 assert(src1.type == BRW_TYPE_F); \
834 assert(src2.type == BRW_TYPE_F); \
835 } else if (dest.type == BRW_TYPE_DF) { \
836 assert(src0.type == BRW_TYPE_DF); \
837 assert(src1.type == BRW_TYPE_DF); \
838 assert(src2.type == BRW_TYPE_DF); \
839 } \
840 \
841 if (p->current->access_mode == BRW_ALIGN_16) { \
842 if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
843 src0.swizzle = BRW_SWIZZLE_XXXX; \
844 if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
845 src1.swizzle = BRW_SWIZZLE_XXXX; \
846 if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
847 src2.swizzle = BRW_SWIZZLE_XXXX; \
848 } \
849 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
850 }
851
852 ALU2(SEL)
ALU1(NOT)853 ALU1(NOT)
854 ALU2(AND)
855 ALU2(OR)
856 ALU2(XOR)
857 ALU2(SHR)
858 ALU2(SHL)
859 ALU2(ASR)
860 ALU2(ROL)
861 ALU2(ROR)
862 ALU3(CSEL)
863 ALU1(FRC)
864 ALU1(RNDD)
865 ALU1(RNDE)
866 ALU1(RNDU)
867 ALU1(RNDZ)
868 ALU2(MAC)
869 ALU2(MACH)
870 ALU1(LZD)
871 ALU2(DP4)
872 ALU2(DPH)
873 ALU2(DP3)
874 ALU2(DP2)
875 ALU3(DP4A)
876 ALU3(MAD)
877 ALU3F(LRP)
878 ALU1(BFREV)
879 ALU3(BFE)
880 ALU2(BFI1)
881 ALU3(BFI2)
882 ALU1(FBH)
883 ALU1(FBL)
884 ALU1(CBIT)
885 ALU2(ADDC)
886 ALU2(SUBB)
887 ALU3(ADD3)
888 ALU1(MOV)
889
890 brw_inst *
891 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
892 struct brw_reg src0, struct brw_reg src1)
893 {
894 /* 6.2.2: add */
895 if (src0.type == BRW_TYPE_F ||
896 (src0.file == IMM &&
897 src0.type == BRW_TYPE_VF)) {
898 assert(src1.type != BRW_TYPE_UD);
899 assert(src1.type != BRW_TYPE_D);
900 }
901
902 if (src1.type == BRW_TYPE_F ||
903 (src1.file == IMM &&
904 src1.type == BRW_TYPE_VF)) {
905 assert(src0.type != BRW_TYPE_UD);
906 assert(src0.type != BRW_TYPE_D);
907 }
908
909 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
910 }
911
912 brw_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)913 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
914 struct brw_reg src0, struct brw_reg src1)
915 {
916 assert(dest.type == src0.type);
917 assert(src0.type == src1.type);
918 switch (src0.type) {
919 case BRW_TYPE_B:
920 case BRW_TYPE_UB:
921 case BRW_TYPE_W:
922 case BRW_TYPE_UW:
923 case BRW_TYPE_D:
924 case BRW_TYPE_UD:
925 break;
926 default:
927 unreachable("Bad type for brw_AVG");
928 }
929
930 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
931 }
932
933 brw_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)934 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
935 struct brw_reg src0, struct brw_reg src1)
936 {
937 /* 6.32.38: mul */
938 if (src0.type == BRW_TYPE_D ||
939 src0.type == BRW_TYPE_UD ||
940 src1.type == BRW_TYPE_D ||
941 src1.type == BRW_TYPE_UD) {
942 assert(dest.type != BRW_TYPE_F);
943 }
944
945 if (src0.type == BRW_TYPE_F ||
946 (src0.file == IMM &&
947 src0.type == BRW_TYPE_VF)) {
948 assert(src1.type != BRW_TYPE_UD);
949 assert(src1.type != BRW_TYPE_D);
950 }
951
952 if (src1.type == BRW_TYPE_F ||
953 (src1.file == IMM &&
954 src1.type == BRW_TYPE_VF)) {
955 assert(src0.type != BRW_TYPE_UD);
956 assert(src0.type != BRW_TYPE_D);
957 }
958
959 assert(src0.file != ARF ||
960 src0.nr != BRW_ARF_ACCUMULATOR);
961 assert(src1.file != ARF ||
962 src1.nr != BRW_ARF_ACCUMULATOR);
963
964 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
965 }
966
967 brw_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)968 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
969 struct brw_reg src0, struct brw_reg src1)
970 {
971 src0.vstride = BRW_VERTICAL_STRIDE_0;
972 src0.width = BRW_WIDTH_1;
973 src0.hstride = BRW_HORIZONTAL_STRIDE_0;
974 return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
975 }
976
977 brw_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)978 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
979 struct brw_reg src0, struct brw_reg src1)
980 {
981 src0.vstride = BRW_VERTICAL_STRIDE_0;
982 src0.width = BRW_WIDTH_1;
983 src0.hstride = BRW_HORIZONTAL_STRIDE_0;
984 src1.vstride = BRW_VERTICAL_STRIDE_8;
985 src1.width = BRW_WIDTH_8;
986 src1.hstride = BRW_HORIZONTAL_STRIDE_1;
987 return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
988 }
989
990 brw_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)991 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
992 unsigned rcount, struct brw_reg dest, struct brw_reg src0,
993 struct brw_reg src1, struct brw_reg src2)
994 {
995 return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
996 src1, src2);
997 }
998
brw_NOP(struct brw_codegen * p)999 void brw_NOP(struct brw_codegen *p)
1000 {
1001 brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1002 memset(insn, 0, sizeof(*insn));
1003 brw_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1004 }
1005
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1006 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1007 {
1008 brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1009 brw_inst_set_cond_modifier(p->devinfo, insn, func);
1010 }
1011
1012 /***********************************************************************
1013 * Comparisons, if/else/endif
1014 */
1015
1016 brw_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1017 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1018 unsigned predicate_control)
1019 {
1020 const struct intel_device_info *devinfo = p->devinfo;
1021 struct brw_reg ip = brw_ip_reg();
1022 brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1023
1024 brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1025 brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1026 brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1027 brw_inst_set_pred_control(devinfo, inst, predicate_control);
1028
1029 return inst;
1030 }
1031
1032 static void
push_if_stack(struct brw_codegen * p,brw_inst * inst)1033 push_if_stack(struct brw_codegen *p, brw_inst *inst)
1034 {
1035 p->if_stack[p->if_stack_depth] = inst - p->store;
1036
1037 p->if_stack_depth++;
1038 if (p->if_stack_array_size <= p->if_stack_depth) {
1039 p->if_stack_array_size *= 2;
1040 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1041 p->if_stack_array_size);
1042 }
1043 }
1044
1045 static brw_inst *
pop_if_stack(struct brw_codegen * p)1046 pop_if_stack(struct brw_codegen *p)
1047 {
1048 p->if_stack_depth--;
1049 return &p->store[p->if_stack[p->if_stack_depth]];
1050 }
1051
1052 static void
push_loop_stack(struct brw_codegen * p,brw_inst * inst)1053 push_loop_stack(struct brw_codegen *p, brw_inst *inst)
1054 {
1055 if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1056 p->loop_stack_array_size *= 2;
1057 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1058 p->loop_stack_array_size);
1059 }
1060
1061 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1062 p->loop_stack_depth++;
1063 }
1064
1065 static brw_inst *
get_inner_do_insn(struct brw_codegen * p)1066 get_inner_do_insn(struct brw_codegen *p)
1067 {
1068 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1069 }
1070
1071 /* EU takes the value from the flag register and pushes it onto some
1072 * sort of a stack (presumably merging with any flag value already on
1073 * the stack). Within an if block, the flags at the top of the stack
1074 * control execution on each channel of the unit, eg. on each of the
1075 * 16 pixel values in our wm programs.
1076 *
1077 * When the matching 'else' instruction is reached (presumably by
1078 * countdown of the instruction count patched in by our ELSE/ENDIF
1079 * functions), the relevant flags are inverted.
1080 *
1081 * When the matching 'endif' instruction is reached, the flags are
1082 * popped off. If the stack is now empty, normal execution resumes.
1083 */
1084 brw_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1085 brw_IF(struct brw_codegen *p, unsigned execute_size)
1086 {
1087 const struct intel_device_info *devinfo = p->devinfo;
1088 brw_inst *insn;
1089
1090 insn = next_insn(p, BRW_OPCODE_IF);
1091
1092 /* Override the defaults for this instruction:
1093 */
1094 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
1095 if (devinfo->ver < 12)
1096 brw_set_src0(p, insn, brw_imm_d(0));
1097 brw_inst_set_jip(devinfo, insn, 0);
1098 brw_inst_set_uip(devinfo, insn, 0);
1099
1100 brw_inst_set_exec_size(devinfo, insn, execute_size);
1101 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1102 brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1103 brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1104
1105 push_if_stack(p, insn);
1106 return insn;
1107 }
1108
1109 /**
1110 * Patch IF and ELSE instructions with appropriate jump targets.
1111 */
1112 static void
patch_IF_ELSE(struct brw_codegen * p,brw_inst * if_inst,brw_inst * else_inst,brw_inst * endif_inst)1113 patch_IF_ELSE(struct brw_codegen *p,
1114 brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
1115 {
1116 const struct intel_device_info *devinfo = p->devinfo;
1117
1118 assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1119 assert(endif_inst != NULL);
1120 assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1121
1122 unsigned br = brw_jump_scale(devinfo);
1123
1124 assert(brw_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1125 brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
1126
1127 if (else_inst == NULL) {
1128 /* Patch IF -> ENDIF */
1129 brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1130 brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1131 } else {
1132 brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
1133
1134 /* Patch ELSE -> ENDIF */
1135 /* The IF instruction's JIP should point just past the ELSE */
1136 brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1137 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1138 brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1139
1140 if (devinfo->ver < 11) {
1141 /* Set the ELSE instruction to use branch_ctrl with a join
1142 * jump target pointing at the NOP inserted right before
1143 * the ENDIF instruction in order to make sure it is
1144 * executed in all cases, since attempting to do the same
1145 * as on other generations could cause the EU to jump at
1146 * the instruction immediately after the ENDIF due to
1147 * Wa_220160235, which could cause the program to continue
1148 * running with all channels disabled.
1149 */
1150 brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1151 brw_inst_set_branch_control(devinfo, else_inst, true);
1152 } else {
1153 brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1154 }
1155
1156 /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1157 * JIP and UIP both should point to ENDIF on those
1158 * platforms.
1159 */
1160 brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1161 }
1162 }
1163
1164 void
brw_ELSE(struct brw_codegen * p)1165 brw_ELSE(struct brw_codegen *p)
1166 {
1167 const struct intel_device_info *devinfo = p->devinfo;
1168 brw_inst *insn;
1169
1170 insn = next_insn(p, BRW_OPCODE_ELSE);
1171
1172 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1173 if (devinfo->ver < 12)
1174 brw_set_src0(p, insn, brw_imm_d(0));
1175 brw_inst_set_jip(devinfo, insn, 0);
1176 brw_inst_set_uip(devinfo, insn, 0);
1177
1178 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1179 brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1180
1181 push_if_stack(p, insn);
1182 }
1183
1184 void
brw_ENDIF(struct brw_codegen * p)1185 brw_ENDIF(struct brw_codegen *p)
1186 {
1187 const struct intel_device_info *devinfo = p->devinfo;
1188 brw_inst *insn = NULL;
1189 brw_inst *else_inst = NULL;
1190 brw_inst *if_inst = NULL;
1191 brw_inst *tmp;
1192
1193 assert(p->if_stack_depth > 0);
1194
1195 if (devinfo->ver < 11 &&
1196 brw_inst_opcode(p->isa, &p->store[p->if_stack[
1197 p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1198 /* Insert a NOP to be specified as join instruction within the
1199 * ELSE block, which is valid for an ELSE instruction with
1200 * branch_ctrl on. The ELSE instruction will be set to jump
1201 * here instead of to the ENDIF instruction, since attempting to
1202 * do the latter would prevent the ENDIF from being executed in
1203 * some cases due to Wa_220160235, which could cause the program
1204 * to continue running with all channels disabled.
1205 */
1206 brw_NOP(p);
1207 }
1208
1209 /*
1210 * A single next_insn() may change the base address of instruction store
1211 * memory(p->store), so call it first before referencing the instruction
1212 * store pointer from an index
1213 */
1214 insn = next_insn(p, BRW_OPCODE_ENDIF);
1215
1216 /* Pop the IF and (optional) ELSE instructions from the stack */
1217 tmp = pop_if_stack(p);
1218 if (brw_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1219 else_inst = tmp;
1220 tmp = pop_if_stack(p);
1221 }
1222 if_inst = tmp;
1223
1224 brw_set_src0(p, insn, brw_imm_d(0));
1225
1226 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1227 brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1228
1229 brw_inst_set_jip(devinfo, insn, 2);
1230 patch_IF_ELSE(p, if_inst, else_inst, insn);
1231 }
1232
1233 brw_inst *
brw_BREAK(struct brw_codegen * p)1234 brw_BREAK(struct brw_codegen *p)
1235 {
1236 const struct intel_device_info *devinfo = p->devinfo;
1237 brw_inst *insn;
1238
1239 insn = next_insn(p, BRW_OPCODE_BREAK);
1240 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1241 brw_set_src0(p, insn, brw_imm_d(0x0));
1242 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1243 brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1244
1245 return insn;
1246 }
1247
1248 brw_inst *
brw_CONT(struct brw_codegen * p)1249 brw_CONT(struct brw_codegen *p)
1250 {
1251 const struct intel_device_info *devinfo = p->devinfo;
1252 brw_inst *insn;
1253
1254 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1255 brw_set_dest(p, insn, brw_ip_reg());
1256 brw_set_src0(p, insn, brw_imm_d(0x0));
1257
1258 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1259 brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1260 return insn;
1261 }
1262
1263 brw_inst *
brw_HALT(struct brw_codegen * p)1264 brw_HALT(struct brw_codegen *p)
1265 {
1266 const struct intel_device_info *devinfo = p->devinfo;
1267 brw_inst *insn;
1268
1269 insn = next_insn(p, BRW_OPCODE_HALT);
1270 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1271 if (devinfo->ver < 12) {
1272 brw_set_src0(p, insn, brw_imm_d(0x0));
1273 }
1274
1275 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1276 brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1277 return insn;
1278 }
1279
1280 /* DO/WHILE loop:
1281 *
1282 * The DO/WHILE is just an unterminated loop -- break or continue are
1283 * used for control within the loop. We have a few ways they can be
1284 * done.
1285 *
1286 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1287 * jip and no DO instruction.
1288 *
1289 * For gfx6, there's no more mask stack, so no need for DO. WHILE
1290 * just points back to the first instruction of the loop.
1291 */
1292 brw_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1293 brw_DO(struct brw_codegen *p, unsigned execute_size)
1294 {
1295 push_loop_stack(p, &p->store[p->nr_insn]);
1296 return &p->store[p->nr_insn];
1297 }
1298
1299 brw_inst *
brw_WHILE(struct brw_codegen * p)1300 brw_WHILE(struct brw_codegen *p)
1301 {
1302 const struct intel_device_info *devinfo = p->devinfo;
1303 brw_inst *insn, *do_insn;
1304 unsigned br = brw_jump_scale(devinfo);
1305
1306 insn = next_insn(p, BRW_OPCODE_WHILE);
1307 do_insn = get_inner_do_insn(p);
1308
1309 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1310 if (devinfo->ver < 12)
1311 brw_set_src0(p, insn, brw_imm_d(0));
1312 brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1313
1314 brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1315
1316 brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1317
1318 p->loop_stack_depth--;
1319
1320 return insn;
1321 }
1322
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1323 void brw_CMP(struct brw_codegen *p,
1324 struct brw_reg dest,
1325 unsigned conditional,
1326 struct brw_reg src0,
1327 struct brw_reg src1)
1328 {
1329 const struct intel_device_info *devinfo = p->devinfo;
1330 brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1331
1332 brw_inst_set_cond_modifier(devinfo, insn, conditional);
1333 brw_set_dest(p, insn, dest);
1334 brw_set_src0(p, insn, src0);
1335 brw_set_src1(p, insn, src1);
1336 }
1337
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1338 void brw_CMPN(struct brw_codegen *p,
1339 struct brw_reg dest,
1340 unsigned conditional,
1341 struct brw_reg src0,
1342 struct brw_reg src1)
1343 {
1344 const struct intel_device_info *devinfo = p->devinfo;
1345 brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1346
1347 brw_inst_set_cond_modifier(devinfo, insn, conditional);
1348 brw_set_dest(p, insn, dest);
1349 brw_set_src0(p, insn, src0);
1350 brw_set_src1(p, insn, src1);
1351 }
1352
1353 /***********************************************************************
1354 * Helpers for the various SEND message types:
1355 */
1356
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1357 void gfx6_math(struct brw_codegen *p,
1358 struct brw_reg dest,
1359 unsigned function,
1360 struct brw_reg src0,
1361 struct brw_reg src1)
1362 {
1363 const struct intel_device_info *devinfo = p->devinfo;
1364 brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1365
1366 assert(dest.file == FIXED_GRF);
1367
1368 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1369
1370 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1371 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1372 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1373 assert(src0.type != BRW_TYPE_F);
1374 assert(src1.type != BRW_TYPE_F);
1375 assert(src1.file == FIXED_GRF ||
1376 src1.file == IMM);
1377 /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1378 * INT DIV function does not support source modifiers.
1379 */
1380 assert(!src0.negate);
1381 assert(!src0.abs);
1382 assert(!src1.negate);
1383 assert(!src1.abs);
1384 } else {
1385 assert(src0.type == BRW_TYPE_F ||
1386 (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
1387 assert(src1.type == BRW_TYPE_F ||
1388 (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
1389 }
1390
1391 brw_inst_set_math_function(devinfo, insn, function);
1392
1393 brw_set_dest(p, insn, dest);
1394 brw_set_src0(p, insn, src0);
1395 brw_set_src1(p, insn, src1);
1396 }
1397
1398 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,unsigned desc_imm,bool eot)1399 brw_send_indirect_message(struct brw_codegen *p,
1400 unsigned sfid,
1401 struct brw_reg dst,
1402 struct brw_reg payload,
1403 struct brw_reg desc,
1404 unsigned desc_imm,
1405 bool eot)
1406 {
1407 const struct intel_device_info *devinfo = p->devinfo;
1408 struct brw_inst *send;
1409
1410 dst = retype(dst, BRW_TYPE_UW);
1411
1412 assert(desc.type == BRW_TYPE_UD);
1413
1414 if (desc.file == IMM) {
1415 send = next_insn(p, BRW_OPCODE_SEND);
1416 brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1417 brw_set_desc(p, send, desc.ud | desc_imm);
1418 } else {
1419 const struct tgl_swsb swsb = brw_get_default_swsb(p);
1420 struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);
1421
1422 brw_push_insn_state(p);
1423 brw_set_default_access_mode(p, BRW_ALIGN_1);
1424 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1425 brw_set_default_exec_size(p, BRW_EXECUTE_1);
1426 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1427 brw_set_default_flag_reg(p, 0, 0);
1428 brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1429
1430 /* Load the indirect descriptor to an address register using OR so the
1431 * caller can specify additional descriptor bits with the desc_imm
1432 * immediate.
1433 */
1434 brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
1435
1436 brw_pop_insn_state(p);
1437
1438 brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1439 send = next_insn(p, BRW_OPCODE_SEND);
1440 brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1441
1442 if (devinfo->ver >= 12)
1443 brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
1444 else
1445 brw_set_src1(p, send, addr);
1446 }
1447
1448 brw_set_dest(p, send, dst);
1449 brw_inst_set_sfid(devinfo, send, sfid);
1450 brw_inst_set_eot(devinfo, send, eot);
1451 }
1452
1453 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,unsigned desc_imm,struct brw_reg ex_desc,unsigned ex_desc_imm,bool ex_desc_scratch,bool ex_bso,bool eot)1454 brw_send_indirect_split_message(struct brw_codegen *p,
1455 unsigned sfid,
1456 struct brw_reg dst,
1457 struct brw_reg payload0,
1458 struct brw_reg payload1,
1459 struct brw_reg desc,
1460 unsigned desc_imm,
1461 struct brw_reg ex_desc,
1462 unsigned ex_desc_imm,
1463 bool ex_desc_scratch,
1464 bool ex_bso,
1465 bool eot)
1466 {
1467 const struct intel_device_info *devinfo = p->devinfo;
1468 struct brw_inst *send;
1469
1470 dst = retype(dst, BRW_TYPE_UW);
1471
1472 assert(desc.type == BRW_TYPE_UD);
1473
1474 if (desc.file == IMM) {
1475 desc.ud |= desc_imm;
1476 } else {
1477 const struct tgl_swsb swsb = brw_get_default_swsb(p);
1478 struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);
1479
1480 brw_push_insn_state(p);
1481 brw_set_default_access_mode(p, BRW_ALIGN_1);
1482 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1483 brw_set_default_exec_size(p, BRW_EXECUTE_1);
1484 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1485 brw_set_default_flag_reg(p, 0, 0);
1486 brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1487
1488 /* Load the indirect descriptor to an address register using OR so the
1489 * caller can specify additional descriptor bits with the desc_imm
1490 * immediate.
1491 */
1492 brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
1493
1494 brw_pop_insn_state(p);
1495 desc = addr;
1496
1497 brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1498 }
1499
1500 if (ex_desc.file == IMM &&
1501 !ex_desc_scratch &&
1502 (devinfo->ver >= 12 ||
1503 ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
1504 /* ATS-M PRMs, Volume 2d: Command Reference: Structures,
1505 * EU_INSTRUCTION_SEND instruction
1506 *
1507 * "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
1508 */
1509 assert(!ex_bso);
1510 ex_desc.ud |= ex_desc_imm;
1511 } else {
1512 const struct tgl_swsb swsb = brw_get_default_swsb(p);
1513 struct brw_reg addr = retype(brw_address_reg(2), BRW_TYPE_UD);
1514
1515 /* On Xe2+ ExBSO addressing is implicitly enabled for the UGM
1516 * shared function.
1517 */
1518 ex_bso |= (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM);
1519
1520 brw_push_insn_state(p);
1521 brw_set_default_access_mode(p, BRW_ALIGN_1);
1522 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1523 brw_set_default_exec_size(p, BRW_EXECUTE_1);
1524 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1525 brw_set_default_flag_reg(p, 0, 0);
1526 brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1527
1528 /* Load the indirect extended descriptor to an address register using OR
1529 * so the caller can specify additional descriptor bits with the
1530 * desc_imm immediate.
1531 *
1532 * Even though the instruction dispatcher always pulls the SFID and EOT
1533 * fields from the instruction itself, actual external unit which
1534 * processes the message gets the SFID and EOT from the extended
1535 * descriptor which comes from the address register. If we don't OR
1536 * those two bits in, the external unit may get confused and hang.
1537 */
1538 unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);
1539
1540 if (ex_desc_scratch) {
1541 assert(devinfo->verx10 >= 125);
1542 brw_AND(p, addr,
1543 retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
1544 brw_imm_ud(INTEL_MASK(31, 10)));
1545
1546 if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
1547 const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
1548 assert(ex_desc_imm == brw_message_ex_desc(devinfo, ex_mlen));
1549 brw_SHR(p, addr, addr, brw_imm_ud(4));
1550 } else {
1551 /* Or the scratch surface offset together with the immediate part
1552 * of the extended descriptor.
1553 */
1554 brw_OR(p, addr, addr, brw_imm_ud(imm_part));
1555 }
1556
1557 } else if (ex_desc.file == IMM) {
1558 /* ex_desc bits 15:12 don't exist in the instruction encoding prior
1559 * to Gfx12, so we may have fallen back to an indirect extended
1560 * descriptor.
1561 */
1562 brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
1563 } else {
1564 brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
1565 }
1566
1567 brw_pop_insn_state(p);
1568 ex_desc = addr;
1569
1570 brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1571 }
1572
1573 send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1574 brw_set_dest(p, send, dst);
1575 brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
1576 brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
1577
1578 if (desc.file == IMM) {
1579 brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1580 brw_inst_set_send_desc(devinfo, send, desc.ud);
1581 } else {
1582 assert(desc.file == ARF);
1583 assert(desc.nr == BRW_ARF_ADDRESS);
1584 assert(desc.subnr == 0);
1585 brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1586 }
1587
1588 if (ex_desc.file == IMM) {
1589 brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1590 brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
1591 } else {
1592 assert(ex_desc.file == ARF);
1593 assert(ex_desc.nr == BRW_ARF_ADDRESS);
1594 assert((ex_desc.subnr & 0x3) == 0);
1595 brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1596 brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1597
1598 if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
1599 const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
1600 brw_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
1601 }
1602 }
1603
1604 if (ex_bso) {
1605 /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1606 * it is assumed.
1607 *
1608 * BSpec 56890
1609 */
1610 if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1611 brw_inst_set_send_ex_bso(devinfo, send, true);
1612 brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
1613 }
1614 brw_inst_set_sfid(devinfo, send, sfid);
1615 brw_inst_set_eot(devinfo, send, eot);
1616 }
1617
1618 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_inst * insn,int while_offset,int start_offset)1619 while_jumps_before_offset(const struct intel_device_info *devinfo,
1620 brw_inst *insn, int while_offset, int start_offset)
1621 {
1622 int scale = 16 / brw_jump_scale(devinfo);
1623 int jip = brw_inst_jip(devinfo, insn);
1624 assert(jip < 0);
1625 return while_offset + jip * scale <= start_offset;
1626 }
1627
1628
1629 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1630 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1631 {
1632 int offset;
1633 void *store = p->store;
1634 const struct intel_device_info *devinfo = p->devinfo;
1635
1636 int depth = 0;
1637
1638 for (offset = next_offset(devinfo, store, start_offset);
1639 offset < p->next_insn_offset;
1640 offset = next_offset(devinfo, store, offset)) {
1641 brw_inst *insn = store + offset;
1642
1643 switch (brw_inst_opcode(p->isa, insn)) {
1644 case BRW_OPCODE_IF:
1645 depth++;
1646 break;
1647 case BRW_OPCODE_ENDIF:
1648 if (depth == 0)
1649 return offset;
1650 depth--;
1651 break;
1652 case BRW_OPCODE_WHILE:
1653 /* If the while doesn't jump before our instruction, it's the end
1654 * of a sibling do...while loop. Ignore it.
1655 */
1656 if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1657 continue;
1658 FALLTHROUGH;
1659 case BRW_OPCODE_ELSE:
1660 case BRW_OPCODE_HALT:
1661 if (depth == 0)
1662 return offset;
1663 break;
1664 default:
1665 break;
1666 }
1667 }
1668
1669 return 0;
1670 }
1671
1672 /* There is no DO instruction on gfx6, so to find the end of the loop
1673 * we have to see if the loop is jumping back before our start
1674 * instruction.
1675 */
1676 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1677 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1678 {
1679 const struct intel_device_info *devinfo = p->devinfo;
1680 int offset;
1681 void *store = p->store;
1682
1683 /* Always start after the instruction (such as a WHILE) we're trying to fix
1684 * up.
1685 */
1686 for (offset = next_offset(devinfo, store, start_offset);
1687 offset < p->next_insn_offset;
1688 offset = next_offset(devinfo, store, offset)) {
1689 brw_inst *insn = store + offset;
1690
1691 if (brw_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1692 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1693 return offset;
1694 }
1695 }
1696 assert(!"not reached");
1697 return start_offset;
1698 }
1699
1700 /* After program generation, go back and update the UIP and JIP of
1701 * BREAK, CONT, and HALT instructions to their correct locations.
1702 */
1703 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1704 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1705 {
1706 const struct intel_device_info *devinfo = p->devinfo;
1707 int offset;
1708 int br = brw_jump_scale(devinfo);
1709 int scale = 16 / br;
1710 void *store = p->store;
1711
1712 for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1713 brw_inst *insn = store + offset;
1714 assert(brw_inst_cmpt_control(devinfo, insn) == 0);
1715
1716 switch (brw_inst_opcode(p->isa, insn)) {
1717 case BRW_OPCODE_BREAK: {
1718 int block_end_offset = brw_find_next_block_end(p, offset);
1719 assert(block_end_offset != 0);
1720 brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1721 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1722 brw_inst_set_uip(devinfo, insn,
1723 (brw_find_loop_end(p, offset) - offset) / scale);
1724 break;
1725 }
1726
1727 case BRW_OPCODE_CONTINUE: {
1728 int block_end_offset = brw_find_next_block_end(p, offset);
1729 assert(block_end_offset != 0);
1730 brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1731 brw_inst_set_uip(devinfo, insn,
1732 (brw_find_loop_end(p, offset) - offset) / scale);
1733
1734 assert(brw_inst_uip(devinfo, insn) != 0);
1735 assert(brw_inst_jip(devinfo, insn) != 0);
1736 break;
1737 }
1738
1739 case BRW_OPCODE_ENDIF: {
1740 int block_end_offset = brw_find_next_block_end(p, offset);
1741 int32_t jump = (block_end_offset == 0) ?
1742 1 * br : (block_end_offset - offset) / scale;
1743 brw_inst_set_jip(devinfo, insn, jump);
1744 break;
1745 }
1746
1747 case BRW_OPCODE_HALT: {
1748 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1749 *
1750 * "In case of the halt instruction not inside any conditional
1751 * code block, the value of <JIP> and <UIP> should be the
1752 * same. In case of the halt instruction inside conditional code
1753 * block, the <UIP> should be the end of the program, and the
1754 * <JIP> should be end of the most inner conditional code block."
1755 *
1756 * The uip will have already been set by whoever set up the
1757 * instruction.
1758 */
1759 int block_end_offset = brw_find_next_block_end(p, offset);
1760 if (block_end_offset == 0) {
1761 brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
1762 } else {
1763 brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1764 }
1765 assert(brw_inst_uip(devinfo, insn) != 0);
1766 assert(brw_inst_jip(devinfo, insn) != 0);
1767 break;
1768 }
1769
1770 default:
1771 break;
1772 }
1773 }
1774 }
1775
1776 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)1777 brw_set_memory_fence_message(struct brw_codegen *p,
1778 struct brw_inst *insn,
1779 enum brw_message_target sfid,
1780 bool commit_enable,
1781 unsigned bti)
1782 {
1783 const struct intel_device_info *devinfo = p->devinfo;
1784
1785 brw_set_desc(p, insn, brw_message_desc(
1786 devinfo, 1, (commit_enable ? 1 : 0), true));
1787
1788 brw_inst_set_sfid(devinfo, insn, sfid);
1789
1790 switch (sfid) {
1791 case GFX6_SFID_DATAPORT_RENDER_CACHE:
1792 brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
1793 break;
1794 case GFX7_SFID_DATAPORT_DATA_CACHE:
1795 brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
1796 break;
1797 default:
1798 unreachable("Not reached");
1799 }
1800
1801 if (commit_enable)
1802 brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
1803
1804 assert(devinfo->ver >= 11 || bti == 0);
1805 brw_inst_set_binding_table_index(devinfo, insn, bti);
1806 }
1807
1808 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_inst * insn,enum brw_message_target sfid,uint32_t desc)1809 gfx12_set_memory_fence_message(struct brw_codegen *p,
1810 struct brw_inst *insn,
1811 enum brw_message_target sfid,
1812 uint32_t desc)
1813 {
1814 const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
1815 /* Completion signaled by write to register. No data returned. */
1816 const unsigned rlen = 1 * reg_unit(p->devinfo);
1817
1818 brw_inst_set_sfid(p->devinfo, insn, sfid);
1819
1820 /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see
1821 * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore
1822 * the descriptor value and rebuild a legacy URB fence descriptor.
1823 */
1824 if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
1825 brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
1826 brw_message_desc(p->devinfo, mlen, rlen, true));
1827 } else {
1828 enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
1829 enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
1830
1831 if (sfid == GFX12_SFID_TGM) {
1832 scope = LSC_FENCE_TILE;
1833 flush_type = LSC_FLUSH_TYPE_EVICT;
1834 }
1835
1836 /* Wa_14012437816:
1837 *
1838 * "For any fence greater than local scope, always set flush type to
1839 * at least invalidate so that fence goes on properly."
1840 *
1841 * "The bug is if flush_type is 'None', the scope is always downgraded
1842 * to 'local'."
1843 *
1844 * Here set scope to NONE_6 instead of NONE, which has the same effect
1845 * as NONE but avoids the downgrade to scope LOCAL.
1846 */
1847 if (intel_needs_workaround(p->devinfo, 14012437816) &&
1848 scope > LSC_FENCE_LOCAL &&
1849 flush_type == LSC_FLUSH_TYPE_NONE) {
1850 flush_type = LSC_FLUSH_TYPE_NONE_6;
1851 }
1852
1853 brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
1854 flush_type, false) |
1855 brw_message_desc(p->devinfo, mlen, rlen, false));
1856 }
1857 }
1858
1859 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)1860 brw_memory_fence(struct brw_codegen *p,
1861 struct brw_reg dst,
1862 struct brw_reg src,
1863 enum opcode send_op,
1864 enum brw_message_target sfid,
1865 uint32_t desc,
1866 bool commit_enable,
1867 unsigned bti)
1868 {
1869 const struct intel_device_info *devinfo = p->devinfo;
1870
1871 dst = retype(vec1(dst), BRW_TYPE_UW);
1872 src = retype(vec1(src), BRW_TYPE_UD);
1873
1874 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
1875 * message doesn't write anything back.
1876 */
1877 struct brw_inst *insn = next_insn(p, send_op);
1878 brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1879 brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1880 brw_set_dest(p, insn, dst);
1881 brw_set_src0(p, insn, src);
1882
1883 /* All DG2 hardware requires LSC for fence messages, even A-step */
1884 if (devinfo->has_lsc)
1885 gfx12_set_memory_fence_message(p, insn, sfid, desc);
1886 else
1887 brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
1888 }
1889
1890 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)1891 brw_broadcast(struct brw_codegen *p,
1892 struct brw_reg dst,
1893 struct brw_reg src,
1894 struct brw_reg idx)
1895 {
1896 const struct intel_device_info *devinfo = p->devinfo;
1897 assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
1898
1899 brw_push_insn_state(p);
1900 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1901 brw_set_default_exec_size(p, BRW_EXECUTE_1);
1902
1903 assert(src.file == FIXED_GRF &&
1904 src.address_mode == BRW_ADDRESS_DIRECT);
1905 assert(!src.abs && !src.negate);
1906
1907 /* Gen12.5 adds the following region restriction:
1908 *
1909 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
1910 * and Quad-Word data must not be used."
1911 *
1912 * We require the source and destination types to match so stomp to an
1913 * unsigned integer type.
1914 */
1915 assert(src.type == dst.type);
1916 src.type = dst.type =
1917 brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
1918
1919 if ((src.vstride == 0 && src.hstride == 0) ||
1920 idx.file == IMM) {
1921 /* Trivial, the source is already uniform or the index is a constant.
1922 * We will typically not get here if the optimizer is doing its job, but
1923 * asserting would be mean.
1924 */
1925 const unsigned i = idx.file == IMM ? idx.ud : 0;
1926 src = stride(suboffset(src, i), 0, 1, 0);
1927
1928 if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
1929 brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1930 subscript(src, BRW_TYPE_D, 0));
1931 brw_set_default_swsb(p, tgl_swsb_null());
1932 brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1933 subscript(src, BRW_TYPE_D, 1));
1934 } else {
1935 brw_MOV(p, dst, src);
1936 }
1937 } else {
1938 /* From the Haswell PRM section "Register Region Restrictions":
1939 *
1940 * "The lower bits of the AddressImmediate must not overflow to
1941 * change the register address. The lower 5 bits of Address
1942 * Immediate when added to lower 5 bits of address register gives
1943 * the sub-register offset. The upper bits of Address Immediate
1944 * when added to upper bits of address register gives the register
1945 * address. Any overflow from sub-register offset is dropped."
1946 *
1947 * Fortunately, for broadcast, we never have a sub-register offset so
1948 * this isn't an issue.
1949 */
1950 assert(src.subnr == 0);
1951
1952 const struct brw_reg addr =
1953 retype(brw_address_reg(0), BRW_TYPE_UD);
1954 unsigned offset = src.nr * REG_SIZE + src.subnr;
1955 /* Limit in bytes of the signed indirect addressing immediate. */
1956 const unsigned limit = 512;
1957
1958 brw_push_insn_state(p);
1959 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1960 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1961 brw_set_default_flag_reg(p, 0, 0);
1962
1963 /* Take into account the component size and horizontal stride. */
1964 assert(src.vstride == src.hstride + src.width);
1965 brw_SHL(p, addr, vec1(idx),
1966 brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
1967 src.hstride - 1));
1968
1969 /* We can only address up to limit bytes using the indirect
1970 * addressing immediate, account for the difference if the source
1971 * register is above this limit.
1972 */
1973 if (offset >= limit) {
1974 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1975 brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
1976 offset = offset % limit;
1977 }
1978
1979 brw_pop_insn_state(p);
1980
1981 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1982
1983 /* Use indirect addressing to fetch the specified component. */
1984 if (brw_type_size_bytes(src.type) > 4 &&
1985 (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
1986 /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
1987 *
1988 * "When source or destination datatype is 64b or operation is
1989 * integer DWord multiply, indirect addressing must not be
1990 * used."
1991 *
1992 * We may also not support Q/UQ types.
1993 *
1994 * To work around both of these, we do two integer MOVs instead
1995 * of one 64-bit MOV. Because no double value should ever cross
1996 * a register boundary, it's safe to use the immediate offset in
1997 * the indirect here to handle adding 4 bytes to the offset and
1998 * avoid the extra ADD to the register file.
1999 */
2000 brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
2001 retype(brw_vec1_indirect(addr.subnr, offset),
2002 BRW_TYPE_D));
2003 brw_set_default_swsb(p, tgl_swsb_null());
2004 brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
2005 retype(brw_vec1_indirect(addr.subnr, offset + 4),
2006 BRW_TYPE_D));
2007 } else {
2008 brw_MOV(p, dst,
2009 retype(brw_vec1_indirect(addr.subnr, offset), src.type));
2010 }
2011 }
2012
2013 brw_pop_insn_state(p);
2014 }
2015
2016
2017 /**
2018 * Emit the SEND message for a barrier
2019 */
2020 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)2021 brw_barrier(struct brw_codegen *p, struct brw_reg src)
2022 {
2023 const struct intel_device_info *devinfo = p->devinfo;
2024 struct brw_inst *inst;
2025
2026 brw_push_insn_state(p);
2027 brw_set_default_access_mode(p, BRW_ALIGN_1);
2028 inst = next_insn(p, BRW_OPCODE_SEND);
2029 brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
2030 brw_set_src0(p, inst, src);
2031 brw_set_src1(p, inst, brw_null_reg());
2032 brw_set_desc(p, inst, brw_message_desc(devinfo,
2033 1 * reg_unit(devinfo), 0, false));
2034
2035 brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
2036 brw_inst_set_gateway_subfuncid(devinfo, inst,
2037 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
2038
2039 brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
2040 brw_pop_insn_state(p);
2041 }
2042
2043
2044 /**
2045 * Emit the wait instruction for a barrier
2046 */
2047 void
brw_WAIT(struct brw_codegen * p)2048 brw_WAIT(struct brw_codegen *p)
2049 {
2050 const struct intel_device_info *devinfo = p->devinfo;
2051 struct brw_inst *insn;
2052
2053 struct brw_reg src = brw_notification_reg();
2054
2055 insn = next_insn(p, BRW_OPCODE_WAIT);
2056 brw_set_dest(p, insn, src);
2057 brw_set_src0(p, insn, src);
2058 brw_set_src1(p, insn, brw_null_reg());
2059
2060 brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
2061 brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
2062 }
2063
2064 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)2065 brw_float_controls_mode(struct brw_codegen *p,
2066 unsigned mode, unsigned mask)
2067 {
2068 assert(p->current->mask_control == BRW_MASK_DISABLE);
2069
2070 /* From the Skylake PRM, Volume 7, page 760:
2071 * "Implementation Restriction on Register Access: When the control
2072 * register is used as an explicit source and/or destination, hardware
2073 * does not ensure execution pipeline coherency. Software must set the
2074 * thread control field to ‘switch’ for an instruction that uses
2075 * control register as an explicit operand."
2076 *
2077 * On Gfx12+ this is implemented in terms of SWSB annotations instead.
2078 */
2079 brw_set_default_swsb(p, tgl_swsb_regdist(1));
2080
2081 brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
2082 brw_imm_ud(~mask));
2083 brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
2084 if (p->devinfo->ver < 12)
2085 brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
2086
2087 if (mode) {
2088 brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2089 brw_imm_ud(mode));
2090 brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2091 if (p->devinfo->ver < 12)
2092 brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2093 }
2094
2095 if (p->devinfo->ver >= 12)
2096 brw_SYNC(p, TGL_SYNC_NOP);
2097 }
2098
2099 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_inst * inst,uint32_t value)2100 brw_update_reloc_imm(const struct brw_isa_info *isa,
2101 brw_inst *inst,
2102 uint32_t value)
2103 {
2104 const struct intel_device_info *devinfo = isa->devinfo;
2105
2106 /* Sanity check that the instruction is a MOV of an immediate */
2107 assert(brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2108 assert(brw_inst_src0_reg_file(devinfo, inst) == IMM);
2109
2110 /* If it was compacted, we can't safely rewrite */
2111 assert(brw_inst_cmpt_control(devinfo, inst) == 0);
2112
2113 brw_inst_set_imm_ud(devinfo, inst, value);
2114 }
2115
2116 /* A default value for constants that will be patched at run-time.
2117 * We pick an arbitrary value that prevents instruction compaction.
2118 */
2119 #define DEFAULT_PATCH_IMM 0x4a7cc037
2120
2121 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id,uint32_t base)2122 brw_MOV_reloc_imm(struct brw_codegen *p,
2123 struct brw_reg dst,
2124 enum brw_reg_type src_type,
2125 uint32_t id,
2126 uint32_t base)
2127 {
2128 assert(brw_type_size_bytes(src_type) == 4);
2129 assert(brw_type_size_bytes(dst.type) == 4);
2130
2131 brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2132 p->next_insn_offset, base);
2133
2134 brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2135 }
2136