xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_pack.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_compiler.h"
7 #include "agx_opcodes.h"
8 
9 /* Binary patches needed for branch offsets */
10 struct agx_branch_fixup {
11    /* Offset into the binary to patch */
12    off_t offset;
13 
14    /* Value to patch with will be block->offset */
15    agx_block *block;
16 
17    /* If true, skips to the last instruction of the target block */
18    bool skip_to_end;
19 };
20 
21 static void
pack_assert_internal(const agx_instr * I,bool condition,const char * msg)22 pack_assert_internal(const agx_instr *I, bool condition, const char *msg)
23 {
24    if (!condition) {
25       printf("Packing assertion failed for instruction:\n\n");
26       agx_print_instr(I, stdout);
27       printf("\n%s\n", msg);
28       abort();
29    }
30 }
31 
32 #define pack_assert_msg(I, cond, msg)                                          \
33    pack_assert_internal(I, cond, msg ": " #cond)
34 
35 #define pack_assert(I, cond) pack_assert_internal(I, cond, #cond)
36 
37 static void
assert_register_is_aligned(const agx_instr * I,agx_index reg)38 assert_register_is_aligned(const agx_instr *I, agx_index reg)
39 {
40    pack_assert_msg(I, reg.type == AGX_INDEX_REGISTER, "expecting a register");
41 
42    switch (reg.size) {
43    case AGX_SIZE_16:
44       return;
45    case AGX_SIZE_32:
46       pack_assert_msg(I, (reg.value & 1) == 0, "unaligned reg");
47       return;
48    case AGX_SIZE_64:
49       pack_assert_msg(I, (reg.value & 3) == 0, "unaligned reg");
50       return;
51    }
52 
53    unreachable("Invalid register size");
54 }
55 
56 /* Texturing has its own operands */
57 static unsigned
agx_pack_sample_coords(const agx_instr * I,agx_index index,bool * flag,bool * is_16)58 agx_pack_sample_coords(const agx_instr *I, agx_index index, bool *flag,
59                        bool *is_16)
60 {
61    /* TODO: Do we have a use case for 16-bit coords? */
62    pack_assert_msg(I, index.size == AGX_SIZE_32, "32-bit coordinates");
63    pack_assert_msg(I, index.value < 0x100, "coordinate register bound");
64 
65    *is_16 = false;
66    *flag = index.discard;
67    return index.value;
68 }
69 
70 static unsigned
agx_pack_texture(const agx_instr * I,agx_index base,agx_index index,unsigned * packed_base,unsigned * flag)71 agx_pack_texture(const agx_instr *I, agx_index base, agx_index index,
72                  unsigned *packed_base, unsigned *flag)
73 {
74    if (base.type == AGX_INDEX_IMMEDIATE) {
75       pack_assert(I, base.value == 0);
76 
77       /* Texture state registers */
78       *packed_base = 0;
79 
80       if (index.type == AGX_INDEX_REGISTER) {
81          pack_assert(I, index.size == AGX_SIZE_16);
82          *flag = 1;
83       } else {
84          pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
85          *flag = 0;
86       }
87    } else {
88       pack_assert(I, base.type == AGX_INDEX_UNIFORM);
89       pack_assert(I, base.size == AGX_SIZE_64);
90       pack_assert(I, (base.value & 3) == 0);
91       pack_assert(I, index.size == AGX_SIZE_32);
92 
93       /* Bindless */
94       *packed_base = base.value >> 2;
95       *flag = 3;
96    }
97 
98    return index.value;
99 }
100 
101 static unsigned
agx_pack_sampler(const agx_instr * I,agx_index index,bool * flag)102 agx_pack_sampler(const agx_instr *I, agx_index index, bool *flag)
103 {
104    if (index.type == AGX_INDEX_REGISTER) {
105       pack_assert(I, index.size == AGX_SIZE_16);
106       *flag = 1;
107    } else {
108       pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
109       *flag = 0;
110    }
111 
112    return index.value;
113 }
114 
115 static unsigned
agx_pack_sample_compare_offset(const agx_instr * I,agx_index index)116 agx_pack_sample_compare_offset(const agx_instr *I, agx_index index)
117 {
118    if (index.type == AGX_INDEX_NULL)
119       return 0;
120 
121    pack_assert(I, index.size == AGX_SIZE_32);
122    pack_assert(I, index.value < 0x100);
123    assert_register_is_aligned(I, index);
124    return index.value;
125 }
126 
127 static unsigned
agx_pack_lod(const agx_instr * I,agx_index index,unsigned * lod_mode)128 agx_pack_lod(const agx_instr *I, agx_index index, unsigned *lod_mode)
129 {
130    /* For automatic LOD, the LOD field is unused. Assert as much. */
131    if ((*lod_mode) == AGX_LOD_MODE_AUTO_LOD) {
132       pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
133       pack_assert(I, index.value == 0);
134       return 0;
135    }
136 
137    if (index.type == AGX_INDEX_UNIFORM) {
138       /* Translate LOD mode from register mode to uniform mode */
139       pack_assert(I,
140                   ((*lod_mode) & BITFIELD_BIT(2)) && "must start as reg mode");
141       *lod_mode = (*lod_mode) & ~BITFIELD_BIT(2);
142       pack_assert(I, index.value < 0x200);
143    } else {
144       /* Otherwise must be registers */
145       pack_assert(I, index.type == AGX_INDEX_REGISTER);
146       pack_assert(I, index.value < 0x100);
147    }
148 
149    return index.value;
150 }
151 
152 static unsigned
agx_pack_pbe_source(const agx_instr * I,agx_index index,bool * flag)153 agx_pack_pbe_source(const agx_instr *I, agx_index index, bool *flag)
154 {
155    pack_assert(I, index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
156    assert_register_is_aligned(I, index);
157 
158    *flag = (index.size == AGX_SIZE_32);
159    return index.value;
160 }
161 
162 static unsigned
agx_pack_pbe_lod(const agx_instr * I,agx_index index,bool * flag)163 agx_pack_pbe_lod(const agx_instr *I, agx_index index, bool *flag)
164 {
165    pack_assert(I, index.size == AGX_SIZE_16);
166 
167    if (index.type == AGX_INDEX_IMMEDIATE)
168       *flag = true;
169    else if (index.type == AGX_INDEX_REGISTER)
170       *flag = false;
171    else
172       unreachable("Invalid PBE LOD type");
173 
174    return index.value;
175 }
176 
177 /* Load/stores have their own operands */
178 
179 static unsigned
agx_pack_memory_reg(const agx_instr * I,agx_index index,bool * flag)180 agx_pack_memory_reg(const agx_instr *I, agx_index index, bool *flag)
181 {
182    assert_register_is_aligned(I, index);
183 
184    *flag = (index.size >= AGX_SIZE_32);
185    return index.value;
186 }
187 
188 static unsigned
agx_pack_memory_base(const agx_instr * I,agx_index index,bool * flag)189 agx_pack_memory_base(const agx_instr *I, agx_index index, bool *flag)
190 {
191    pack_assert(I, index.size == AGX_SIZE_64);
192    pack_assert(I, (index.value & 1) == 0);
193 
194    /* Can't seem to access high uniforms from memory instructions */
195    pack_assert(I, index.value < 0x100);
196 
197    if (index.type == AGX_INDEX_UNIFORM) {
198       *flag = 1;
199    } else {
200       pack_assert(I, index.type == AGX_INDEX_REGISTER);
201       *flag = 0;
202    }
203 
204    return index.value;
205 }
206 
207 static unsigned
agx_pack_memory_index(const agx_instr * I,agx_index index,bool * flag)208 agx_pack_memory_index(const agx_instr *I, agx_index index, bool *flag)
209 {
210    if (index.type == AGX_INDEX_IMMEDIATE) {
211       pack_assert(I, index.value < 0x10000);
212       *flag = 1;
213 
214       return index.value;
215    } else {
216       pack_assert(I, index.type == AGX_INDEX_REGISTER);
217       pack_assert(I, index.size == AGX_SIZE_32);
218       pack_assert(I, (index.value & 1) == 0);
219       pack_assert(I, index.value < 0x100);
220 
221       *flag = 0;
222       return index.value;
223    }
224 }
225 
226 static uint16_t
agx_pack_local_base(const agx_instr * I,agx_index index,unsigned * flags)227 agx_pack_local_base(const agx_instr *I, agx_index index, unsigned *flags)
228 {
229    pack_assert(I, index.size == AGX_SIZE_16);
230 
231    if (index.type == AGX_INDEX_IMMEDIATE) {
232       pack_assert(I, index.value == 0);
233       *flags = 2;
234       return 0;
235    } else if (index.type == AGX_INDEX_UNIFORM) {
236       *flags = 1 | ((index.value >> 8) << 1);
237       return index.value & BITFIELD_MASK(7);
238    } else {
239       assert_register_is_aligned(I, index);
240       *flags = 0;
241       return index.value;
242    }
243 }
244 
245 static uint16_t
agx_pack_local_index(const agx_instr * I,agx_index index,bool * flag)246 agx_pack_local_index(const agx_instr *I, agx_index index, bool *flag)
247 {
248    pack_assert(I, index.size == AGX_SIZE_16);
249 
250    if (index.type == AGX_INDEX_IMMEDIATE) {
251       pack_assert(I, index.value < 0x10000);
252       *flag = 1;
253       return index.value;
254    } else {
255       assert_register_is_aligned(I, index);
256       *flag = 0;
257       return index.value;
258    }
259 }
260 
261 static unsigned
agx_pack_atomic_source(const agx_instr * I,agx_index index)262 agx_pack_atomic_source(const agx_instr *I, agx_index index)
263 {
264    pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
265    assert_register_is_aligned(I, index);
266    return index.value;
267 }
268 
269 static unsigned
agx_pack_atomic_dest(const agx_instr * I,agx_index index,bool * flag)270 agx_pack_atomic_dest(const agx_instr *I, agx_index index, bool *flag)
271 {
272    /* Atomic destinstions are optional (e.g. for update with no return) */
273    if (index.type == AGX_INDEX_NULL) {
274       *flag = 0;
275       return 0;
276    }
277 
278    /* But are otherwise registers */
279    pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
280    assert_register_is_aligned(I, index);
281    *flag = 1;
282    return index.value;
283 }
284 
285 /* ALU goes through a common path */
286 
287 static unsigned
agx_pack_alu_dst(const agx_instr * I,agx_index dest)288 agx_pack_alu_dst(const agx_instr *I, agx_index dest)
289 {
290    assert_register_is_aligned(I, dest);
291    unsigned reg = dest.value;
292    enum agx_size size = dest.size;
293    pack_assert(I, reg < 0x100);
294 
295    return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
296           ((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2));
297 }
298 
299 static unsigned
agx_pack_alu_src(const agx_instr * I,agx_index src)300 agx_pack_alu_src(const agx_instr *I, agx_index src)
301 {
302    unsigned value = src.value;
303    enum agx_size size = src.size;
304 
305    if (src.type == AGX_INDEX_IMMEDIATE) {
306       /* Flags 0 for an 8-bit immediate */
307       pack_assert(I, value < 0x100);
308 
309       return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10);
310    } else if (src.type == AGX_INDEX_UNIFORM) {
311       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
312       pack_assert(I, value < AGX_NUM_UNIFORMS);
313 
314       return (value & BITFIELD_MASK(6)) |
315              ((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
316              ((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) |
317              (((value >> 6) & BITFIELD_MASK(2)) << 10);
318    } else {
319       assert_register_is_aligned(I, src);
320       pack_assert(I, !(src.cache && src.discard));
321 
322       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
323       unsigned size_flag = (size == AGX_SIZE_64)   ? 0x3
324                            : (size == AGX_SIZE_32) ? 0x2
325                            : (size == AGX_SIZE_16) ? 0x0
326                                                    : 0x0;
327 
328       return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) |
329              (((value >> 6) & BITFIELD_MASK(2)) << 10);
330    }
331 }
332 
333 static unsigned
agx_pack_cmpsel_src(const agx_instr * I,agx_index src,enum agx_size dest_size)334 agx_pack_cmpsel_src(const agx_instr *I, agx_index src, enum agx_size dest_size)
335 {
336    unsigned value = src.value;
337    ASSERTED enum agx_size size = src.size;
338 
339    if (src.type == AGX_INDEX_IMMEDIATE) {
340       /* Flags 0x4 for an 8-bit immediate */
341       pack_assert(I, value < 0x100);
342 
343       return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10);
344    } else if (src.type == AGX_INDEX_UNIFORM) {
345       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
346       pack_assert(I, size == dest_size);
347       pack_assert(I, value < 0x200);
348 
349       return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) |
350              (((value >> 6) & BITFIELD_MASK(2)) << 10);
351    } else {
352       pack_assert(I, src.type == AGX_INDEX_REGISTER);
353       pack_assert(I, !(src.cache && src.discard));
354       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
355       pack_assert(I, size == dest_size);
356       assert_register_is_aligned(I, src);
357 
358       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
359 
360       return (value & BITFIELD_MASK(6)) | (hint << 6) |
361              (((value >> 6) & BITFIELD_MASK(2)) << 10);
362    }
363 }
364 
365 static unsigned
agx_pack_sample_mask_src(const agx_instr * I,agx_index src)366 agx_pack_sample_mask_src(const agx_instr *I, agx_index src)
367 {
368    unsigned value = src.value;
369    unsigned packed_value =
370       (value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10);
371 
372    if (src.type == AGX_INDEX_IMMEDIATE) {
373       pack_assert(I, value < 0x100);
374       return packed_value | (1 << 7);
375    } else {
376       pack_assert(I, src.type == AGX_INDEX_REGISTER);
377       assert_register_is_aligned(I, src);
378       pack_assert(I, !(src.cache && src.discard));
379 
380       return packed_value;
381    }
382 }
383 
384 static unsigned
agx_pack_float_mod(agx_index src)385 agx_pack_float_mod(agx_index src)
386 {
387    return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0);
388 }
389 
390 static bool
agx_all_16(agx_instr * I)391 agx_all_16(agx_instr *I)
392 {
393    agx_foreach_dest(I, d) {
394       if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
395          return false;
396    }
397 
398    agx_foreach_src(I, s) {
399       if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
400          return false;
401    }
402 
403    return true;
404 }
405 
406 /* Generic pack for ALU instructions, which are quite regular */
407 
408 static void
agx_pack_alu(struct util_dynarray * emission,agx_instr * I)409 agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
410 {
411    struct agx_opcode_info info = agx_opcodes_info[I->op];
412    bool is_16 = agx_all_16(I) && info.encoding_16.exact;
413    struct agx_encoding encoding = is_16 ? info.encoding_16 : info.encoding;
414 
415    pack_assert_msg(I, encoding.exact, "invalid encoding");
416 
417    uint64_t raw = encoding.exact;
418    uint16_t extend = 0;
419 
420    // TODO: assert saturable
421    if (I->saturate)
422       raw |= (1 << 6);
423 
424    if (info.nr_dests) {
425       pack_assert(I, info.nr_dests == 1);
426       unsigned D = agx_pack_alu_dst(I, I->dest[0]);
427       unsigned extend_offset = (sizeof(extend) * 8) - 4;
428 
429       raw |= (D & BITFIELD_MASK(8)) << 7;
430       extend |= ((D >> 8) << extend_offset);
431 
432       if (info.immediates & AGX_IMMEDIATE_INVERT_COND) {
433          raw |= (uint64_t)(I->invert_cond) << 47;
434       }
435    } else if (info.immediates & AGX_IMMEDIATE_NEST) {
436       raw |= (I->invert_cond << 8);
437       raw |= (I->nest << 11);
438       raw |= (I->icond << 13);
439    }
440 
441    for (unsigned s = 0; s < info.nr_srcs; ++s) {
442       bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL ||
443                                     I->op == AGX_OPCODE_FCMPSEL);
444 
445       unsigned src = is_cmpsel
446                         ? agx_pack_cmpsel_src(I, I->src[s], I->dest[0].size)
447                         : agx_pack_alu_src(I, I->src[s]);
448 
449       unsigned src_short = (src & BITFIELD_MASK(10));
450       unsigned src_extend = (src >> 10);
451 
452       /* Size bit always zero and so omitted for 16-bit */
453       if (is_16 && !is_cmpsel)
454          pack_assert(I, (src_short & (1 << 9)) == 0);
455 
456       if (info.is_float || (I->op == AGX_OPCODE_FCMPSEL && !is_cmpsel)) {
457          unsigned fmod = agx_pack_float_mod(I->src[s]);
458          unsigned fmod_offset = is_16 ? 9 : 10;
459          src_short |= (fmod << fmod_offset);
460       } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
461          /* Force unsigned for immediates so uadd_sat works properly */
462          bool zext = I->src[s].abs || I->src[s].type == AGX_INDEX_IMMEDIATE;
463          bool extends = I->src[s].size < AGX_SIZE_64;
464 
465          unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
466 
467          unsigned negate_src = (I->op == AGX_OPCODE_IMAD) ? 2 : 1;
468          pack_assert(I, !I->src[s].neg || s == negate_src);
469          src_short |= sxt;
470       }
471 
472       /* Sources come at predictable offsets */
473       unsigned offset = 16 + (12 * s);
474       raw |= (((uint64_t)src_short) << offset);
475 
476       /* Destination and each source get extended in reverse order */
477       unsigned extend_offset = (sizeof(extend) * 8) - ((s + 3) * 2);
478       extend |= (src_extend << extend_offset);
479    }
480 
481    if ((I->op == AGX_OPCODE_IMAD && I->src[2].neg) ||
482        (I->op == AGX_OPCODE_IADD && I->src[1].neg))
483       raw |= (1 << 27);
484 
485    if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
486       raw |= (I->truth_table & 0x3) << 26;
487       raw |= (uint64_t)(I->truth_table >> 2) << 38;
488    } else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
489       pack_assert(I, I->shift <= 4);
490       raw |= (uint64_t)(I->shift & 1) << 39;
491       raw |= (uint64_t)(I->shift >> 1) << 52;
492    } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
493       raw |= (uint64_t)(I->bfi_mask & 0x3) << 38;
494       raw |= (uint64_t)((I->bfi_mask >> 2) & 0x3) << 50;
495       raw |= (uint64_t)((I->bfi_mask >> 4) & 0x1) << 63;
496    } else if (info.immediates & AGX_IMMEDIATE_SIMD_OP) {
497       raw |= (uint64_t)(I->simd_op & 0x1) << 28;
498       raw |= (uint64_t)((I->simd_op >> 1) & 0x7) << 38;
499       raw |= (uint64_t)((I->simd_op >> 4) & 0x1) << 47;
500    } else if (info.immediates & AGX_IMMEDIATE_SR) {
501       raw |= (uint64_t)(I->sr & 0x3F) << 16;
502       raw |= (uint64_t)(I->sr >> 6) << 26;
503    } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
504       raw |= (uint64_t)(I->imm) << 8;
505    else if (info.immediates & AGX_IMMEDIATE_IMM)
506       raw |= (uint64_t)(I->imm) << 16;
507    else if (info.immediates & AGX_IMMEDIATE_ROUND)
508       raw |= (uint64_t)(I->imm) << 26;
509    else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
510       raw |= (uint64_t)(I->fcond) << 61;
511 
512    /* Determine length bit */
513    unsigned length = encoding.length_short;
514    uint64_t short_mask = BITFIELD64_MASK(8 * length);
515    bool length_bit = (extend || (raw & ~short_mask));
516 
517    if (encoding.extensible && length_bit) {
518       raw |= (1 << 15);
519       length += (length > 8) ? 4 : 2;
520    }
521 
522    /* Pack! */
523    if (length <= sizeof(uint64_t)) {
524       unsigned extend_offset = ((length - sizeof(extend)) * 8);
525 
526       /* XXX: Encode these special cases better */
527       switch (I->op) {
528       case AGX_OPCODE_IADD:
529       case AGX_OPCODE_ICMP_BALLOT:
530       case AGX_OPCODE_ICMP_QUAD_BALLOT:
531       case AGX_OPCODE_FCMP_BALLOT:
532       case AGX_OPCODE_FCMP_QUAD_BALLOT:
533          extend_offset -= 16;
534          break;
535       default:
536          break;
537       }
538 
539       raw |= (uint64_t)extend << extend_offset;
540       memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
541    } else {
542       /* So far, >8 byte ALU is only to store the extend bits */
543       unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
544       unsigned hi = ((uint64_t)extend) << extend_offset;
545 
546       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
547       memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi,
548              length - 8);
549    }
550 }
551 
552 static void
agx_pack_instr(struct util_dynarray * emission,struct util_dynarray * fixups,agx_instr * I,bool needs_g13x_coherency)553 agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
554                agx_instr *I, bool needs_g13x_coherency)
555 {
556    switch (I->op) {
557    case AGX_OPCODE_LD_TILE:
558    case AGX_OPCODE_ST_TILE: {
559       bool load = (I->op == AGX_OPCODE_LD_TILE);
560       unsigned D = agx_pack_alu_dst(I, load ? I->dest[0] : I->src[0]);
561       pack_assert(I, I->mask < 0x10);
562       pack_assert(I, I->pixel_offset < 0x200);
563 
564       agx_index sample_index = load ? I->src[0] : I->src[1];
565       agx_index coords = load ? I->src[1] : I->src[2];
566       pack_assert(I, sample_index.type == AGX_INDEX_REGISTER ||
567                         sample_index.type == AGX_INDEX_IMMEDIATE);
568       pack_assert(I, sample_index.size == AGX_SIZE_16);
569       unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
570       unsigned S = sample_index.value;
571       pack_assert(I, S < 0x100);
572 
573       pack_assert(I, I->explicit_coords == (coords.type == AGX_INDEX_REGISTER));
574       unsigned C = I->explicit_coords ? coords.value : 0;
575 
576       uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
577                      ((uint64_t)(D & BITFIELD_MASK(8)) << 7) | (St << 22) |
578                      ((uint64_t)(I->format) << 24) |
579                      ((uint64_t)(C & BITFIELD_MASK(6)) << 16) |
580                      ((uint64_t)(I->pixel_offset & BITFIELD_MASK(7)) << 28) |
581                      (load || I->explicit_coords ? (1ull << 35) : 0) |
582                      ((uint64_t)(I->mask) << 36) |
583                      ((uint64_t)(I->pixel_offset >> 7) << 40) |
584                      ((uint64_t)(S & BITFIELD_MASK(6)) << 42) |
585                      (I->explicit_coords ? (1ull << 55) : 0) |
586                      ((uint64_t)(S >> 6) << 56) | ((uint64_t)(C >> 6) << 58) |
587                      (((uint64_t)(D >> 8)) << 60);
588 
589       unsigned size = 8;
590       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
591       break;
592    }
593 
594    case AGX_OPCODE_SAMPLE_MASK: {
595       unsigned S = agx_pack_sample_mask_src(I, I->src[1]);
596       unsigned T = I->src[0].value;
597       bool Tt = I->src[0].type == AGX_INDEX_IMMEDIATE;
598       pack_assert(I, Tt || I->src[0].type == AGX_INDEX_REGISTER);
599       uint32_t raw = 0xc1 | (Tt ? BITFIELD_BIT(8) : 0) |
600                      ((T & BITFIELD_MASK(6)) << 9) | ((S & 0xff) << 16) |
601                      ((T >> 6) << 24) | ((S >> 8) << 26);
602 
603       unsigned size = 4;
604       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
605       break;
606    }
607 
608    case AGX_OPCODE_WAIT: {
609       uint64_t raw =
610          agx_opcodes_info[I->op].encoding.exact | (I->scoreboard << 8);
611 
612       unsigned size = 2;
613       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
614       break;
615    }
616 
617    case AGX_OPCODE_ITER:
618    case AGX_OPCODE_ITERPROJ:
619    case AGX_OPCODE_LDCF: {
620       bool flat = (I->op == AGX_OPCODE_LDCF);
621       bool perspective = (I->op == AGX_OPCODE_ITERPROJ);
622       unsigned D = agx_pack_alu_dst(I, I->dest[0]);
623       unsigned channels = (I->channels & 0x3);
624 
625       agx_index src_I = I->src[0];
626       pack_assert(I, src_I.type == AGX_INDEX_IMMEDIATE ||
627                         src_I.type == AGX_INDEX_REGISTER);
628 
629       unsigned cf_I = src_I.value;
630       unsigned cf_J = 0;
631 
632       if (perspective) {
633          agx_index src_J = I->src[1];
634          pack_assert(I, src_J.type == AGX_INDEX_IMMEDIATE);
635          cf_J = src_J.value;
636       }
637 
638       pack_assert(I, cf_I < 0x100);
639       pack_assert(I, cf_J < 0x100);
640 
641       enum agx_interpolation interp = I->interpolation;
642       agx_index sample_index = flat ? agx_null() : I->src[perspective ? 2 : 1];
643 
644       /* Fix up the interpolation enum to distinguish the sample index source */
645       if (interp == AGX_INTERPOLATION_SAMPLE) {
646          if (sample_index.type == AGX_INDEX_REGISTER)
647             interp = AGX_INTERPOLATION_SAMPLE_REGISTER;
648          else
649             pack_assert(I, sample_index.type == AGX_INDEX_IMMEDIATE);
650       } else {
651          sample_index = agx_zero();
652       }
653 
654       bool kill = false;    // TODO: optimize
655       bool forward = false; // TODO: optimize
656 
657       uint64_t raw =
658          0x21 | (flat ? (1 << 7) : 0) | (perspective ? (1 << 6) : 0) |
659          ((D & 0xFF) << 7) | (1ull << 15) | /* XXX */
660          ((cf_I & BITFIELD_MASK(6)) << 16) |
661          ((src_I.type == AGX_INDEX_REGISTER) ? (1 << 23) : 0) |
662          ((cf_J & BITFIELD_MASK(6)) << 24) | (((uint64_t)channels) << 30) |
663          (((uint64_t)sample_index.value) << 32) | (forward ? (1ull << 46) : 0) |
664          (((uint64_t)interp) << 48) | (kill ? (1ull << 52) : 0) |
665          (((uint64_t)(D >> 8)) << 56) | ((uint64_t)(cf_I >> 6) << 58) |
666          ((uint64_t)(cf_J >> 6) << 60);
667 
668       unsigned size = 8;
669       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
670       break;
671    }
672 
673    case AGX_OPCODE_ST_VARY: {
674       agx_index index_src = I->src[0];
675       agx_index value = I->src[1];
676 
677       pack_assert(I, index_src.type == AGX_INDEX_IMMEDIATE ||
678                         index_src.type == AGX_INDEX_REGISTER);
679       pack_assert(I, index_src.value < BITFIELD_MASK(8));
680       pack_assert(I, value.type == AGX_INDEX_REGISTER);
681       pack_assert(I, value.size == AGX_SIZE_32);
682 
683       uint64_t raw = 0x11 | (I->last ? (1 << 7) : 0) |
684                      ((value.value & 0x3F) << 9) |
685                      (((uint64_t)(index_src.value & 0x3F)) << 16) |
686                      (index_src.type == AGX_INDEX_IMMEDIATE ? (1 << 23) : 0) |
687                      ((value.value >> 6) << 24) |
688                      ((index_src.value >> 6) << 26) | (0x8u << 28); /* XXX */
689 
690       unsigned size = 4;
691       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
692       break;
693    }
694 
695    case AGX_OPCODE_DEVICE_LOAD:
696    case AGX_OPCODE_DEVICE_STORE:
697    case AGX_OPCODE_UNIFORM_STORE: {
698       bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
699       bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
700       bool is_store = is_device_store || is_uniform_store;
701       bool has_base = !is_uniform_store;
702 
703       /* Uniform stores are required to be 16-bit. The encoding that should be
704        * 32-bit annoyingly doesn't work. Fix up the format and size so we can
705        * use scalar 32-bit values in the IR and avoid special casing earlier in
706        * the compiler.
707        */
708       enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
709       agx_index reg = is_store ? I->src[0] : I->dest[0];
710       unsigned mask = I->mask;
711 
712       if (is_uniform_store && reg.size != AGX_SIZE_16) {
713          if (reg.size == AGX_SIZE_64) {
714             assert(mask == 1);
715             mask = BITFIELD_MASK(4);
716          } else {
717             assert(reg.size == AGX_SIZE_32);
718             assert(mask == 1 || mask == 3);
719             mask = BITFIELD_MASK(mask == 3 ? 4 : 2);
720          }
721 
722          reg.size = AGX_SIZE_16;
723       }
724 
725       unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
726 
727       bool Rt, At = false, Ot;
728       unsigned R = agx_pack_memory_reg(I, reg, &Rt);
729       unsigned A =
730          has_base ? agx_pack_memory_base(I, I->src[is_store ? 1 : 0], &At) : 0;
731       unsigned O = agx_pack_memory_index(I, I->src[offset_src], &Ot);
732       unsigned u1 = is_uniform_store ? 0 : 1; // XXX
733       unsigned u3 = 0;
734       unsigned u4 = is_uniform_store ? 0 : 4; // XXX
735       unsigned u5 = 0;
736       bool L = true; /* TODO: when would you want short? */
737 
738       pack_assert(I, mask != 0);
739       pack_assert(I, format <= 0x10);
740 
741       uint64_t raw =
742          agx_opcodes_info[I->op].encoding.exact |
743          ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
744          ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
745          (Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
746          (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
747          (u3 << 28) | (I->scoreboard << 30) |
748          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
749          (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
750          (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
751          (((uint64_t)I->shift) << 42) | (((uint64_t)u4) << 44) |
752          (L ? (1ull << 47) : 0) | (((uint64_t)(format >> 3)) << 48) |
753          (((uint64_t)Rt) << 49) | (((uint64_t)u5) << 50) |
754          (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
755 
756       unsigned size = L ? 8 : 6;
757       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
758       break;
759    }
760 
761    case AGX_OPCODE_LOCAL_LOAD:
762    case AGX_OPCODE_LOCAL_STORE: {
763       bool is_load = I->op == AGX_OPCODE_LOCAL_LOAD;
764       bool L = true; /* TODO: when would you want short? */
765       unsigned At;
766       bool Rt, Ot;
767 
768       unsigned R =
769          agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
770       unsigned A = agx_pack_local_base(I, is_load ? I->src[0] : I->src[1], &At);
771       unsigned O =
772          agx_pack_local_index(I, is_load ? I->src[1] : I->src[2], &Ot);
773 
774       uint64_t raw =
775          agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
776          ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
777          ((A & BITFIELD_MASK(6)) << 16) | (At << 22) | (I->format << 24) |
778          ((O & BITFIELD64_MASK(6)) << 28) | (Ot ? BITFIELD64_BIT(34) : 0) |
779          (((uint64_t)I->mask) << 36) | (((uint64_t)(O >> 6)) << 48) |
780          (((uint64_t)(A >> 6)) << 58) | (((uint64_t)(R >> 6)) << 60);
781 
782       unsigned size = L ? 8 : 6;
783       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
784       break;
785    }
786 
787    case AGX_OPCODE_ATOMIC: {
788       bool At, Ot, Rt;
789       unsigned A = agx_pack_memory_base(I, I->src[1], &At);
790       unsigned O = agx_pack_memory_index(I, I->src[2], &Ot);
791       unsigned R = agx_pack_atomic_dest(I, I->dest[0], &Rt);
792       unsigned S = agx_pack_atomic_source(I, I->src[0]);
793 
794       uint64_t raw =
795          agx_opcodes_info[I->op].encoding.exact |
796          (((uint64_t)I->atomic_opc) << 6) | ((R & BITFIELD_MASK(6)) << 10) |
797          ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
798          (Ot ? (1 << 24) : 0) | (I->src[2].abs ? (1 << 25) : 0) | (At << 27) |
799          (I->scoreboard << 30) |
800          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
801          (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
802          (((uint64_t)(R >> 6)) << 40) |
803          (needs_g13x_coherency ? BITFIELD64_BIT(45) : 0) |
804          (Rt ? BITFIELD64_BIT(47) : 0) | (((uint64_t)S) << 48) |
805          (((uint64_t)(O >> 8)) << 56);
806 
807       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
808       break;
809    }
810 
811    case AGX_OPCODE_LOCAL_ATOMIC: {
812       bool L = true; /* TODO: Don't force */
813 
814       unsigned At;
815       bool Rt = false, Ot;
816 
817       bool Ra = I->dest[0].type != AGX_INDEX_NULL;
818       unsigned R = Ra ? agx_pack_memory_reg(I, I->dest[0], &Rt) : 0;
819       unsigned S = agx_pack_atomic_source(I, I->src[0]);
820       unsigned A = agx_pack_local_base(I, I->src[1], &At);
821       unsigned O = agx_pack_local_index(I, I->src[2], &Ot);
822 
823       uint64_t raw =
824          agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
825          ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
826          ((A & BITFIELD_MASK(6)) << 16) | (At << 22) |
827          (((uint64_t)I->atomic_opc) << 24) | ((O & BITFIELD64_MASK(6)) << 28) |
828          (Ot ? BITFIELD64_BIT(34) : 0) | (Ra ? BITFIELD64_BIT(38) : 0) |
829          (((uint64_t)(O >> 6)) << 48) | (((uint64_t)(A >> 6)) << 58) |
830          (((uint64_t)(R >> 6)) << 60);
831 
832       uint64_t raw2 = S;
833 
834       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
835       memcpy(util_dynarray_grow_bytes(emission, 1, 2), &raw2, 2);
836       break;
837    }
838 
839    case AGX_OPCODE_TEXTURE_LOAD:
840    case AGX_OPCODE_IMAGE_LOAD:
841    case AGX_OPCODE_TEXTURE_SAMPLE: {
842       pack_assert(I, I->mask != 0);
843       pack_assert(I, I->format <= 0x10);
844 
845       bool Rt, Ct, St, Cs;
846       unsigned Tt;
847       unsigned U;
848       enum agx_lod_mode lod_mode = I->lod_mode;
849 
850       unsigned R = agx_pack_memory_reg(I, I->dest[0], &Rt);
851       unsigned C = agx_pack_sample_coords(I, I->src[0], &Ct, &Cs);
852       unsigned T = agx_pack_texture(I, I->src[2], I->src[3], &U, &Tt);
853       unsigned S = agx_pack_sampler(I, I->src[4], &St);
854       unsigned O = agx_pack_sample_compare_offset(I, I->src[5]);
855       unsigned D = agx_pack_lod(I, I->src[1], &lod_mode);
856 
857       unsigned q1 = I->shadow;
858       unsigned q2 = I->query_lod ? 2 : 0;
859       unsigned q3 = 12;  // XXX
860       unsigned kill = 0; // helper invocation kill bit
861 
862       /* Set bit 43 for image loads. This seems to makes sure that image loads
863        * get the value written by the latest image store, not some other image
864        * store that was already in flight, fixing
865        *
866        *    KHR-GLES31.core.shader_image_load_store.basic-glsl-misc-fs
867        *
868        * Apple seems to set this bit unconditionally for read/write image loads
869        * and never for readonly image loads. Some sort of cache control.
870        */
871       if (I->op == AGX_OPCODE_IMAGE_LOAD)
872          q3 |= 1;
873 
874       uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
875                         ((I->dim >> 3) << 7) | ((R >> 6) << 8) |
876                         ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
877                         ((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
878                         (I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
879 
880       bool L = (extend != 0);
881 
882       uint64_t raw =
883          0x31 | ((I->op != AGX_OPCODE_TEXTURE_SAMPLE) ? (1 << 6) : 0) |
884          (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
885          (L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) |
886          (Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) |
887          (q2 << 30) | (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
888          (((uint64_t)Tt) << 38) |
889          (((uint64_t)(I->dim & BITFIELD_MASK(3))) << 40) |
890          (((uint64_t)q3) << 43) | (((uint64_t)I->mask) << 48) |
891          (((uint64_t)lod_mode) << 52) |
892          (((uint64_t)(S & BITFIELD_MASK(6))) << 56) | (((uint64_t)St) << 62) |
893          (((uint64_t)I->scoreboard) << 63);
894 
895       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
896       if (L)
897          memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
898 
899       break;
900    }
901 
902    case AGX_OPCODE_IMAGE_WRITE: {
903       bool Ct, Dt, Rt, Cs;
904       unsigned Tt;
905       unsigned U;
906 
907       unsigned R = agx_pack_pbe_source(I, I->src[0], &Rt);
908       unsigned C = agx_pack_sample_coords(I, I->src[1], &Ct, &Cs);
909       unsigned D = agx_pack_pbe_lod(I, I->src[2], &Dt);
910       unsigned T = agx_pack_texture(I, I->src[3], I->src[4], &U, &Tt);
911       bool rtz = false;
912 
913       pack_assert(I, U < (1 << 5));
914       pack_assert(I, D < (1 << 8));
915       pack_assert(I, R < (1 << 8));
916       pack_assert(I, C < (1 << 8));
917       pack_assert(I, T < (1 << 8));
918       pack_assert(I, Tt < (1 << 2));
919 
920       uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
921                      (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
922                      ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
923                      ((D & BITFIELD_MASK(6)) << 24) | (Dt ? (1u << 31) : 0) |
924                      (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
925                      (((uint64_t)Tt) << 38) |
926                      (((uint64_t)I->dim & BITFIELD_MASK(3)) << 40) |
927                      (Cs ? (1ull << 47) : 0) | (((uint64_t)U) << 48) |
928                      (rtz ? (1ull << 53) : 0) |
929                      ((I->dim & BITFIELD_BIT(4)) ? (1ull << 55) : 0) |
930                      (((uint64_t)R >> 6) << 56) | (((uint64_t)C >> 6) << 58) |
931                      (((uint64_t)D >> 6) << 60) | (((uint64_t)T >> 6) << 62);
932 
933       if (raw >> 48) {
934          raw |= BITFIELD_BIT(15);
935          memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
936       } else {
937          memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
938       }
939 
940       break;
941    }
942 
943    case AGX_OPCODE_BLOCK_IMAGE_STORE: {
944       enum agx_format F = I->format;
945       pack_assert(I, F < 0x10);
946 
947       unsigned Tt = 0;
948       pack_assert(I, Tt < 0x4);
949 
950       unsigned U;
951       unsigned T = agx_pack_texture(I, I->src[0], I->src[1], &U, &Tt);
952       pack_assert(I, T < 0x100);
953       pack_assert(I, U < (1 << 5));
954 
955       bool Cs = false;
956       bool Ct = I->src[3].discard;
957       unsigned C = I->src[3].value;
958 
959       agx_index offset = I->src[2];
960       pack_assert(I, offset.size == AGX_SIZE_32);
961       assert_register_is_aligned(I, offset);
962       unsigned R = offset.value;
963 
964       bool unk1 = true;
965 
966       /* This bit has weird behaviour with the interaction of the texture state
967        * index and the tilebuffer offset. Probably best not to use it for now.
968        */
969       unsigned unk3 = 1;
970 
971       uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
972                        (1 << 15) /* we always set length bit for now */ |
973                        ((F & 1) << 8) | ((R & BITFIELD_MASK(6)) << 9) |
974                        ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
975                        (I->explicit_coords ? (1 << 23) : 0) |
976                        (unk1 ? (1u << 31) : 0);
977 
978       uint32_t word1 = (T & BITFIELD_MASK(6)) | (Tt << 6) |
979                        ((I->dim & BITFIELD_MASK(3)) << 8) | (9 << 11) |
980                        (Cs ? (1 << 15) : 0) | (((uint64_t)U) << 16) |
981                        ((I->dim & BITFIELD_BIT(3)) ? (1u << 23) : 0) |
982                        ((R >> 6) << 24) | ((C >> 6) << 26);
983 
984       uint32_t word2 = (F >> 1) | (unk3 ? (1 << 3) : 0) | ((T >> 6) << 14);
985 
986       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
987       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word1, 4);
988       memcpy(util_dynarray_grow_bytes(emission, 1, 2), &word2, 2);
989       break;
990    }
991 
992    case AGX_OPCODE_ZS_EMIT: {
993       agx_index S = I->src[0];
994       if (S.type == AGX_INDEX_IMMEDIATE)
995          pack_assert(I, S.value < BITFIELD_BIT(8));
996       else
997          assert_register_is_aligned(I, S);
998 
999       agx_index T = I->src[1];
1000       assert_register_is_aligned(I, T);
1001 
1002       pack_assert(I, I->zs >= 1 && I->zs <= 3);
1003 
1004       uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
1005                        ((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
1006                        ((S.value & BITFIELD_MASK(6)) << 9) |
1007                        ((T.value & BITFIELD_MASK(6)) << 16) |
1008                        ((T.value >> 6) << 26) | ((S.value >> 6) << 24) |
1009                        (I->zs << 29);
1010 
1011       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
1012       break;
1013    }
1014 
1015    case AGX_OPCODE_JMP_EXEC_ANY:
1016    case AGX_OPCODE_JMP_EXEC_NONE:
1017    case AGX_OPCODE_JMP_EXEC_NONE_AFTER: {
1018       /* We don't implement indirect branches */
1019       pack_assert(I, I->target != NULL);
1020 
1021       /* We'll fix the offset later. */
1022       struct agx_branch_fixup fixup = {
1023          .block = I->target,
1024          .offset = emission->size,
1025          .skip_to_end = I->op == AGX_OPCODE_JMP_EXEC_NONE_AFTER,
1026       };
1027 
1028       util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
1029 
1030       /* The rest of the instruction is fixed */
1031       struct agx_opcode_info info = agx_opcodes_info[I->op];
1032       uint64_t raw = info.encoding.exact;
1033       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1034       break;
1035    }
1036 
1037    case AGX_OPCODE_DOORBELL: {
1038       pack_assert(I, I->imm < BITFIELD_MASK(8));
1039       struct agx_opcode_info info = agx_opcodes_info[I->op];
1040       uint64_t raw = info.encoding.exact | (I->imm << 40);
1041       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1042       break;
1043    }
1044 
1045    case AGX_OPCODE_STACK_UNMAP:
1046    case AGX_OPCODE_STACK_MAP: {
1047       agx_index value = I->op == AGX_OPCODE_STACK_MAP ? I->src[0] : I->dest[0];
1048 
1049       pack_assert(I, value.type == AGX_INDEX_REGISTER);
1050       pack_assert(I, value.size == AGX_SIZE_32);
1051       pack_assert(I, I->imm < BITFIELD_MASK(16));
1052 
1053       unsigned q1 = 0;  // XXX
1054       unsigned q2 = 0;  // XXX
1055       unsigned q3 = 0;  // XXX
1056       unsigned q4 = 16; // XXX
1057       unsigned q5 = 16; // XXX
1058 
1059       struct agx_opcode_info info = agx_opcodes_info[I->op];
1060       uint64_t raw =
1061          info.encoding.exact | (q1 << 8) | ((value.value & 0x3F) << 10) |
1062          ((I->imm & 0xF) << 20) | (1UL << 24) | // XXX
1063          (1UL << 26) |                          // XXX
1064          (q2 << 30) | ((uint64_t)((I->imm >> 4) & 0xF) << 32) |
1065          ((uint64_t)q3 << 37) | ((uint64_t)(value.value >> 6) << 40) |
1066          ((uint64_t)q4 << 42) | (1UL << 47) | // XXX
1067          ((uint64_t)q5 << 48) | ((uint64_t)(I->imm >> 8) << 56);
1068 
1069       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1070       break;
1071    }
1072 
1073    case AGX_OPCODE_STACK_LOAD:
1074    case AGX_OPCODE_STACK_STORE: {
1075       enum agx_format format = I->format;
1076       unsigned mask = I->mask;
1077 
1078       bool is_load = I->op == AGX_OPCODE_STACK_LOAD;
1079       bool L = true; /* TODO: when would you want short? */
1080 
1081       pack_assert(I, mask != 0);
1082       pack_assert(I, format <= 0x10);
1083 
1084       bool Rt, Ot;
1085       unsigned R =
1086          agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
1087       unsigned O =
1088          agx_pack_memory_index(I, is_load ? I->src[0] : I->src[1], &Ot);
1089 
1090       unsigned i1 = 1; // XXX
1091       unsigned i2 = 0; // XXX
1092       unsigned i5 = 4; // XXX
1093 
1094       uint64_t raw =
1095          agx_opcodes_info[I->op].encoding.exact |
1096          ((format & BITFIELD_MASK(2)) << 8) | ((R & BITFIELD_MASK(6)) << 10) |
1097          ((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) |
1098          ((uint64_t)i1 << 26) | ((uint64_t)I->scoreboard << 30) |
1099          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
1100          ((uint64_t)i2 << 36) |
1101          (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
1102          ((uint64_t)i5 << 44) | (L ? (1UL << 47) : 0) |
1103          (((uint64_t)(format >> 2)) << 50) | (((uint64_t)Rt) << 49) |
1104          (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
1105 
1106       unsigned size = L ? 8 : 6;
1107       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
1108       break;
1109    }
1110    case AGX_OPCODE_STACK_ADJUST: {
1111       struct agx_opcode_info info = agx_opcodes_info[I->op];
1112 
1113       unsigned i0 = 0; // XXX
1114       unsigned i1 = 1; // XXX
1115       unsigned i2 = 2; // XXX
1116       unsigned i3 = 0; // XXX
1117       unsigned i4 = 0; // XXX
1118 
1119       uint64_t raw =
1120          info.encoding.exact | ((uint64_t)i0 << 8) | ((uint64_t)i1 << 26) |
1121          ((uint64_t)i2 << 36) | ((uint64_t)i3 << 44) | ((uint64_t)i4 << 50) |
1122          ((I->stack_size & 0xF) << 20) |
1123          ((uint64_t)((I->stack_size >> 4) & 0xF) << 32) | (1UL << 47) | // XXX
1124          ((uint64_t)(I->stack_size >> 8) << 56);
1125 
1126       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1127       break;
1128    }
1129 
1130    default:
1131       agx_pack_alu(emission, I);
1132       return;
1133    }
1134 }
1135 
1136 /* Relative branches may be emitted before their targets, so we patch the
1137  * binary to fix up the branch offsets after the main emit */
1138 
1139 static void
agx_fixup_branch(struct util_dynarray * emission,struct agx_branch_fixup fix)1140 agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
1141 {
1142    /* Branch offset is 2 bytes into the jump instruction */
1143    uint8_t *location = ((uint8_t *)emission->data) + fix.offset + 2;
1144 
1145    off_t target = fix.skip_to_end ? fix.block->last_offset : fix.block->offset;
1146 
1147    /* Offsets are relative to the jump instruction */
1148    int32_t patch = (int32_t)target - (int32_t)fix.offset;
1149 
1150    /* Patch the binary */
1151    memcpy(location, &patch, sizeof(patch));
1152 }
1153 
1154 void
agx_pack_binary(agx_context * ctx,struct util_dynarray * emission)1155 agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
1156 {
1157    struct util_dynarray fixups;
1158    util_dynarray_init(&fixups, ctx);
1159 
1160    agx_foreach_block(ctx, block) {
1161       /* Relative to the start of the binary, the block begins at the current
1162        * number of bytes emitted */
1163       block->offset = emission->size;
1164 
1165       agx_foreach_instr_in_block(block, ins) {
1166          block->last_offset = emission->size;
1167          agx_pack_instr(emission, &fixups, ins,
1168                         ctx->key->dev.needs_g13x_coherency);
1169       }
1170    }
1171 
1172    util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
1173       agx_fixup_branch(emission, *fixup);
1174 
1175    util_dynarray_fini(&fixups);
1176 
1177    /* Dougall calls the instruction in this footer "trap". Match the blob. */
1178    if (!ctx->key->no_stop || ctx->is_preamble) {
1179       for (unsigned i = 0; i < 8; ++i) {
1180          uint16_t trap = agx_opcodes_info[AGX_OPCODE_TRAP].encoding.exact;
1181          util_dynarray_append(emission, uint16_t, trap);
1182       }
1183    }
1184 }
1185