1 /*
2 * Copyright © 2017-2018 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #define GPU 600
10
11 #include "ir3_context.h"
12 #include "ir3_image.h"
13
14 /*
15 * Handlers for instructions changed/added in a6xx:
16 *
17 * Starting with a6xx, isam and stbi is used for SSBOs as well; stbi and the
18 * atomic instructions (used for both SSBO and image) use a new instruction
19 * encoding compared to a4xx/a5xx.
20 */
21
22 static void
lower_ssbo_offset(struct ir3_context * ctx,nir_intrinsic_instr * intr,nir_src * offset_src,struct ir3_instruction ** offset,unsigned * imm_offset)23 lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
24 nir_src *offset_src,
25 struct ir3_instruction **offset, unsigned *imm_offset)
26 {
27 if (ctx->compiler->has_ssbo_imm_offsets) {
28 ir3_lower_imm_offset(ctx, intr, offset_src, 7, offset, imm_offset);
29 } else {
30 assert(nir_intrinsic_base(intr) == 0);
31 *offset = ir3_get_src(ctx, offset_src)[0];
32 *imm_offset = 0;
33 }
34 }
35
36 /* src[] = { buffer_index, offset }. No const_index */
37 static void
emit_intrinsic_load_ssbo(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)38 emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
39 struct ir3_instruction **dst)
40 {
41 struct ir3_block *b = ctx->block;
42 struct ir3_instruction *offset;
43 struct ir3_instruction *ldib;
44 unsigned imm_offset_val;
45
46 lower_ssbo_offset(ctx, intr, &intr->src[2], &offset, &imm_offset_val);
47 struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
48
49 ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0,
50 imm_offset, 0);
51 ldib->dsts[0]->wrmask = MASK(intr->num_components);
52 ldib->cat6.iim_val = intr->num_components;
53 ldib->cat6.d = 1;
54 switch (intr->def.bit_size) {
55 case 8:
56 /* This encodes the 8-bit SSBO load and matches blob's encoding of
57 * imageBuffer access using VK_FORMAT_R8 and the dedicated 8-bit
58 * descriptor. No vectorization is possible.
59 */
60 assert(intr->num_components == 1);
61
62 ldib->cat6.type = TYPE_U16;
63 ldib->cat6.typed = true;
64 break;
65 case 16:
66 ldib->cat6.type = TYPE_U16;
67 break;
68 default:
69 ldib->cat6.type = TYPE_U32;
70 break;
71 }
72 ldib->barrier_class = IR3_BARRIER_BUFFER_R;
73 ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
74
75 if (imm_offset_val) {
76 assert(ctx->compiler->has_ssbo_imm_offsets);
77 ldib->flags |= IR3_INSTR_IMM_OFFSET;
78 }
79
80 ir3_handle_bindless_cat6(ldib, intr->src[0]);
81 ir3_handle_nonuniform(ldib, intr);
82
83 ir3_split_dest(b, dst, ldib, 0, intr->num_components);
84 }
85
86 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
87 static void
emit_intrinsic_store_ssbo(struct ir3_context * ctx,nir_intrinsic_instr * intr)88 emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
89 {
90 struct ir3_block *b = ctx->block;
91 struct ir3_instruction *stib, *val, *offset;
92 unsigned wrmask = nir_intrinsic_write_mask(intr);
93 unsigned ncomp = ffs(~wrmask) - 1;
94 unsigned imm_offset_val;
95
96 assert(wrmask == BITFIELD_MASK(intr->num_components));
97
98 /* src0 is offset, src1 is immediate offset, src2 is value:
99 */
100 val = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
101
102 /* Any 8-bit store will be done on a single-component value that additionally
103 * has to be masked to clear up the higher bits or it will malfunction.
104 */
105 if (intr->src[0].ssa->bit_size == 8) {
106 assert(ncomp == 1);
107
108 struct ir3_instruction *mask = create_immed_typed(b, 0xff, TYPE_U8);
109 val = ir3_AND_B(b, val, 0, mask, 0);
110 val->dsts[0]->flags |= IR3_REG_HALF;
111 }
112
113 lower_ssbo_offset(ctx, intr, &intr->src[3], &offset, &imm_offset_val);
114 struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
115
116 stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0,
117 imm_offset, 0, val, 0);
118 stib->cat6.iim_val = ncomp;
119 stib->cat6.d = 1;
120 switch (intr->src[0].ssa->bit_size) {
121 case 8:
122 /* As with ldib, this encodes the 8-bit SSBO store and matches blob's
123 * encoding of imageBuffer access using VK_FORMAT_R8 and the extra 8-bit
124 * descriptor. No vectorization is possible and we have to override the
125 * relevant field anyway.
126 */
127 stib->cat6.type = TYPE_U16;
128 stib->cat6.iim_val = 4;
129 stib->cat6.typed = true;
130 break;
131 case 16:
132 stib->cat6.type = TYPE_U16;
133 break;
134 default:
135 stib->cat6.type = TYPE_U32;
136 break;
137 }
138 stib->barrier_class = IR3_BARRIER_BUFFER_W;
139 stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
140
141 if (imm_offset_val) {
142 assert(ctx->compiler->has_ssbo_imm_offsets);
143 stib->flags |= IR3_INSTR_IMM_OFFSET;
144 }
145
146 ir3_handle_bindless_cat6(stib, intr->src[1]);
147 ir3_handle_nonuniform(stib, intr);
148
149 array_insert(b, b->keeps, stib);
150 }
151
152 static struct ir3_instruction *
emit_atomic(struct ir3_block * b,nir_atomic_op op,struct ir3_instruction * ibo,struct ir3_instruction * src0,struct ir3_instruction * src1)153 emit_atomic(struct ir3_block *b,
154 nir_atomic_op op,
155 struct ir3_instruction *ibo,
156 struct ir3_instruction *src0,
157 struct ir3_instruction *src1)
158 {
159 switch (op) {
160 case nir_atomic_op_iadd:
161 return ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
162 case nir_atomic_op_imin:
163 return ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
164 case nir_atomic_op_umin:
165 return ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
166 case nir_atomic_op_imax:
167 return ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
168 case nir_atomic_op_umax:
169 return ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
170 case nir_atomic_op_iand:
171 return ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
172 case nir_atomic_op_ior:
173 return ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
174 case nir_atomic_op_ixor:
175 return ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
176 case nir_atomic_op_xchg:
177 return ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
178 case nir_atomic_op_cmpxchg:
179 return ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
180 default:
181 unreachable("boo");
182 }
183 }
184
185 /*
186 * SSBO atomic intrinsics
187 *
188 * All of the SSBO atomic memory operations read a value from memory,
189 * compute a new value using one of the operations below, write the new
190 * value to memory, and return the original value read.
191 *
192 * All operations take 3 sources except CompSwap that takes 4. These
193 * sources represent:
194 *
195 * 0: The SSBO buffer index.
196 * 1: The offset into the SSBO buffer of the variable that the atomic
197 * operation will operate on.
198 * 2: The data parameter to the atomic function (i.e. the value to add
199 * in, etc).
200 * 3: For CompSwap only: the second data parameter.
201 */
202 static struct ir3_instruction *
emit_intrinsic_atomic_ssbo(struct ir3_context * ctx,nir_intrinsic_instr * intr)203 emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
204 {
205 struct ir3_block *b = ctx->block;
206 struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy;
207 nir_atomic_op op = nir_intrinsic_atomic_op(intr);
208 type_t type = nir_atomic_op_type(op) == nir_type_int ? TYPE_S32 : TYPE_U32;
209
210 ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
211
212 data = ir3_get_src(ctx, &intr->src[2])[0];
213
214 /* So this gets a bit creative:
215 *
216 * src0 - vecN offset/coords
217 * src1.x - is actually destination register
218 * src1.y - is 'data' except for cmpxchg where src2.y is 'compare'
219 * src1.z - is 'data' for cmpxchg
220 *
221 * The combining src and dest kinda doesn't work out so well with how
222 * scheduling and RA work. So we create a dummy src2 which is tied to the
223 * destination in RA (i.e. must be allocated to the same vec2/vec3
224 * register) and then immediately extract the first component.
225 *
226 * Note that nir already multiplies the offset by four
227 */
228 dummy = create_immed(b, 0);
229
230 if (op == nir_atomic_op_cmpxchg) {
231 src0 = ir3_get_src(ctx, &intr->src[4])[0];
232 struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[3])[0];
233 src1 = ir3_collect(b, dummy, compare, data);
234 } else {
235 src0 = ir3_get_src(ctx, &intr->src[3])[0];
236 src1 = ir3_collect(b, dummy, data);
237 }
238
239 atomic = emit_atomic(b, op, ibo, src0, src1);
240 atomic->cat6.iim_val = 1;
241 atomic->cat6.d = 1;
242 atomic->cat6.type = type;
243 atomic->barrier_class = IR3_BARRIER_BUFFER_W;
244 atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
245 ir3_handle_bindless_cat6(atomic, intr->src[0]);
246
247 /* even if nothing consume the result, we can't DCE the instruction: */
248 array_insert(b, b->keeps, atomic);
249
250 atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
251 ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
252 ir3_handle_nonuniform(atomic, intr);
253 struct ir3_instruction *split;
254 ir3_split_dest(b, &split, atomic, 0, 1);
255 return split;
256 }
257
258 /* src[] = { deref, coord, sample_index }. const_index[] = {} */
259 static void
emit_intrinsic_load_image(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)260 emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
261 struct ir3_instruction **dst)
262 {
263 struct ir3_block *b = ctx->block;
264 struct ir3_instruction *ldib;
265 struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
266 unsigned ncoords = ir3_get_image_coords(intr, NULL);
267
268 ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
269 ir3_create_collect(b, coords, ncoords), 0,
270 create_immed(b, 0), 0);
271 ldib->dsts[0]->wrmask = MASK(intr->num_components);
272 ldib->cat6.iim_val = intr->num_components;
273 ldib->cat6.d = ncoords;
274 ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
275 ldib->cat6.typed = true;
276 ldib->barrier_class = IR3_BARRIER_IMAGE_R;
277 ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
278 ir3_handle_bindless_cat6(ldib, intr->src[0]);
279 ir3_handle_nonuniform(ldib, intr);
280
281 ir3_split_dest(b, dst, ldib, 0, intr->num_components);
282 }
283
284 /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
285 static void
emit_intrinsic_store_image(struct ir3_context * ctx,nir_intrinsic_instr * intr)286 emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
287 {
288 struct ir3_block *b = ctx->block;
289 struct ir3_instruction *stib;
290 struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[3]);
291 struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
292 unsigned ncoords = ir3_get_image_coords(intr, NULL);
293 enum pipe_format format = nir_intrinsic_format(intr);
294 unsigned ncomp = ir3_get_num_components_for_image_format(format);
295
296 /* src0 is offset, src1 is value:
297 */
298 stib =
299 ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
300 ir3_create_collect(b, coords, ncoords), 0, create_immed(b, 0), 0,
301 ir3_create_collect(b, value, ncomp), 0);
302 stib->cat6.iim_val = ncomp;
303 stib->cat6.d = ncoords;
304 stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
305 stib->cat6.typed = true;
306 stib->barrier_class = IR3_BARRIER_IMAGE_W;
307 stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
308 ir3_handle_bindless_cat6(stib, intr->src[0]);
309 ir3_handle_nonuniform(stib, intr);
310
311 array_insert(b, b->keeps, stib);
312 }
313
314 /* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */
315 static struct ir3_instruction *
emit_intrinsic_atomic_image(struct ir3_context * ctx,nir_intrinsic_instr * intr)316 emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
317 {
318 struct ir3_block *b = ctx->block;
319 struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy;
320 struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
321 struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0];
322 unsigned ncoords = ir3_get_image_coords(intr, NULL);
323 nir_atomic_op op = nir_intrinsic_atomic_op(intr);
324
325 ibo = ir3_image_to_ibo(ctx, intr->src[0]);
326
327 /* So this gets a bit creative:
328 *
329 * src0 - vecN offset/coords
330 * src1.x - is actually destination register
331 * src1.y - is 'value' except for cmpxchg where src2.y is 'compare'
332 * src1.z - is 'value' for cmpxchg
333 *
334 * The combining src and dest kinda doesn't work out so well with how
335 * scheduling and RA work. So we create a dummy src2 which is tied to the
336 * destination in RA (i.e. must be allocated to the same vec2/vec3
337 * register) and then immediately extract the first component.
338 */
339 dummy = create_immed(b, 0);
340 src0 = ir3_create_collect(b, coords, ncoords);
341
342 if (op == nir_atomic_op_cmpxchg) {
343 struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0];
344 src1 = ir3_collect(b, dummy, compare, value);
345 } else {
346 src1 = ir3_collect(b, dummy, value);
347 }
348
349 atomic = emit_atomic(b, op, ibo, src0, src1);
350 atomic->cat6.iim_val = 1;
351 atomic->cat6.d = ncoords;
352 atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
353 atomic->cat6.typed = true;
354 atomic->barrier_class = IR3_BARRIER_IMAGE_W;
355 atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
356 ir3_handle_bindless_cat6(atomic, intr->src[0]);
357
358 /* even if nothing consume the result, we can't DCE the instruction: */
359 array_insert(b, b->keeps, atomic);
360
361 atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
362 ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
363 ir3_handle_nonuniform(atomic, intr);
364 struct ir3_instruction *split;
365 ir3_split_dest(b, &split, atomic, 0, 1);
366 return split;
367 }
368
369 static void
emit_intrinsic_image_size(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)370 emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
371 struct ir3_instruction **dst)
372 {
373 struct ir3_block *b = ctx->block;
374 struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
375 struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
376 resinfo->cat6.iim_val = 1;
377 resinfo->cat6.d = intr->num_components;
378 resinfo->cat6.type = TYPE_U32;
379 resinfo->cat6.typed = false;
380 /* resinfo has no writemask and always writes out 3 components: */
381 compile_assert(ctx, intr->num_components <= 3);
382 resinfo->dsts[0]->wrmask = MASK(3);
383 ir3_handle_bindless_cat6(resinfo, intr->src[0]);
384 ir3_handle_nonuniform(resinfo, intr);
385
386 ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
387 }
388
389 static void
emit_intrinsic_load_global_ir3(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)390 emit_intrinsic_load_global_ir3(struct ir3_context *ctx,
391 nir_intrinsic_instr *intr,
392 struct ir3_instruction **dst)
393 {
394 struct ir3_block *b = ctx->block;
395 unsigned dest_components = nir_intrinsic_dest_components(intr);
396 struct ir3_instruction *addr, *offset;
397
398 addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0],
399 ir3_get_src(ctx, &intr->src[0])[1]);
400
401 struct ir3_instruction *load;
402
403 bool const_offset_in_bounds =
404 nir_src_is_const(intr->src[1]) &&
405 nir_src_as_int(intr->src[1]) < (1 << 8) &&
406 nir_src_as_int(intr->src[1]) > -(1 << 8);
407
408 if (const_offset_in_bounds) {
409 load = ir3_LDG(b, addr, 0,
410 create_immed(b, nir_src_as_int(intr->src[1]) * 4),
411 0, create_immed(b, dest_components), 0);
412 } else {
413 unsigned shift = ctx->compiler->gen >= 7 ? 2 : 0;
414 offset = ir3_get_src(ctx, &intr->src[1])[0];
415 if (shift) {
416 /* A7XX TODO: Move to NIR for it to be properly optimized? */
417 offset = ir3_SHL_B(b, offset, 0, create_immed(b, shift), 0);
418 }
419 load =
420 ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
421 create_immed(b, 0), 0, create_immed(b, dest_components), 0);
422 }
423
424 load->cat6.type = type_uint_size(intr->def.bit_size);
425 load->dsts[0]->wrmask = MASK(dest_components);
426
427 load->barrier_class = IR3_BARRIER_BUFFER_R;
428 load->barrier_conflict = IR3_BARRIER_BUFFER_W;
429
430 ir3_split_dest(b, dst, load, 0, dest_components);
431 }
432
433 static void
emit_intrinsic_store_global_ir3(struct ir3_context * ctx,nir_intrinsic_instr * intr)434 emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
435 nir_intrinsic_instr *intr)
436 {
437 struct ir3_block *b = ctx->block;
438 struct ir3_instruction *value, *addr, *offset;
439 unsigned ncomp = nir_intrinsic_src_components(intr, 0);
440
441 addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0],
442 ir3_get_src(ctx, &intr->src[1])[1]);
443
444 value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
445
446 struct ir3_instruction *stg;
447
448 bool const_offset_in_bounds = nir_src_is_const(intr->src[2]) &&
449 nir_src_as_int(intr->src[2]) < (1 << 10) &&
450 nir_src_as_int(intr->src[2]) > -(1 << 10);
451
452 if (const_offset_in_bounds) {
453 stg = ir3_STG(b, addr, 0,
454 create_immed(b, nir_src_as_int(intr->src[2]) * 4), 0,
455 value, 0,
456 create_immed(b, ncomp), 0);
457 } else {
458 offset = ir3_get_src(ctx, &intr->src[2])[0];
459 if (ctx->compiler->gen >= 7) {
460 /* A7XX TODO: Move to NIR for it to be properly optimized? */
461 offset = ir3_SHL_B(b, offset, 0, create_immed(b, 2), 0);
462 }
463 stg =
464 ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
465 create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0);
466 }
467
468 stg->cat6.type = type_uint_size(intr->src[0].ssa->bit_size);
469 stg->cat6.iim_val = 1;
470
471 array_insert(b, b->keeps, stg);
472
473 stg->barrier_class = IR3_BARRIER_BUFFER_W;
474 stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
475 }
476
477 static struct ir3_instruction *
emit_intrinsic_atomic_global(struct ir3_context * ctx,nir_intrinsic_instr * intr)478 emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
479 {
480 struct ir3_block *b = ctx->block;
481 struct ir3_instruction *addr, *atomic, *src1;
482 struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[1])[0];
483 nir_atomic_op op = nir_intrinsic_atomic_op(intr);
484 type_t type = nir_atomic_op_type(op) == nir_type_int ? TYPE_S32 : TYPE_U32;
485
486 addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0],
487 ir3_get_src(ctx, &intr->src[0])[1]);
488
489 if (op == nir_atomic_op_cmpxchg) {
490 struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[2])[0];
491 src1 = ir3_collect(b, compare, value);
492 } else {
493 src1 = value;
494 }
495
496 switch (op) {
497 case nir_atomic_op_iadd:
498 atomic = ir3_ATOMIC_G_ADD(b, addr, 0, src1, 0);
499 break;
500 case nir_atomic_op_imin:
501 atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
502 type = TYPE_S32;
503 break;
504 case nir_atomic_op_umin:
505 atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
506 break;
507 case nir_atomic_op_imax:
508 atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
509 type = TYPE_S32;
510 break;
511 case nir_atomic_op_umax:
512 atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
513 break;
514 case nir_atomic_op_iand:
515 atomic = ir3_ATOMIC_G_AND(b, addr, 0, src1, 0);
516 break;
517 case nir_atomic_op_ior:
518 atomic = ir3_ATOMIC_G_OR(b, addr, 0, src1, 0);
519 break;
520 case nir_atomic_op_ixor:
521 atomic = ir3_ATOMIC_G_XOR(b, addr, 0, src1, 0);
522 break;
523 case nir_atomic_op_xchg:
524 atomic = ir3_ATOMIC_G_XCHG(b, addr, 0, src1, 0);
525 break;
526 case nir_atomic_op_cmpxchg:
527 atomic = ir3_ATOMIC_G_CMPXCHG(b, addr, 0, src1, 0);
528 break;
529 default:
530 unreachable("Unknown global atomic op");
531 }
532
533 atomic->cat6.iim_val = 1;
534 atomic->cat6.d = 1;
535 atomic->cat6.type = type;
536 atomic->barrier_class = IR3_BARRIER_BUFFER_W;
537 atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
538
539 /* even if nothing consume the result, we can't DCE the instruction: */
540 array_insert(b, b->keeps, atomic);
541
542 return atomic;
543 }
544
545 const struct ir3_context_funcs ir3_a6xx_funcs = {
546 .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
547 .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
548 .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
549 .emit_intrinsic_load_image = emit_intrinsic_load_image,
550 .emit_intrinsic_store_image = emit_intrinsic_store_image,
551 .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
552 .emit_intrinsic_image_size = emit_intrinsic_image_size,
553 .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
554 .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
555 .emit_intrinsic_atomic_global = emit_intrinsic_atomic_global,
556 };
557