xref: /aosp_15_r20/external/mesa3d/src/freedreno/afuc/emu.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <ctype.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/mman.h>
13 #include <unistd.h>
14 
15 #include "util/u_math.h"
16 
17 #include "freedreno_pm4.h"
18 
19 #include "afuc-isa.h"
20 
21 #include "emu.h"
22 #include "util.h"
23 
24 #define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
25 #define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
26 
27 EMU_SQE_REG(SP);
28 EMU_SQE_REG(STACK0);
29 EMU_CONTROL_REG(DRAW_STATE_SET_HDR);
30 
31 /**
32  * AFUC emulator.  Currently only supports a6xx
33  *
34  * TODO to add a5xx it might be easier to compile this multiple times
35  * with conditional compile to deal with differences between generations.
36  */
37 
38 static uint32_t
emu_alu(struct emu * emu,afuc_opc opc,uint32_t src1,uint32_t src2)39 emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2)
40 {
41    uint64_t tmp;
42    switch (opc) {
43    case OPC_ADD:
44       tmp = (uint64_t)src1 + (uint64_t)src2;
45       emu->carry = tmp >> 32;
46       return (uint32_t)tmp;
47    case OPC_ADDHI:
48       return src1 + src2 + emu->carry;
49    case OPC_SUB:
50       tmp = (uint64_t)src1 - (uint64_t)src2;
51       emu->carry = tmp >> 32;
52       return (uint32_t)tmp;
53    case OPC_SUBHI:
54       return src1 - src2 + emu->carry;
55    case OPC_AND:
56       return src1 & src2;
57    case OPC_OR:
58       return src1 | src2;
59    case OPC_XOR:
60       return src1 ^ src2;
61    case OPC_NOT:
62       return ~src1;
63    case OPC_SHL:
64       return src1 << src2;
65    case OPC_USHR:
66       return src1 >> src2;
67    case OPC_ISHR:
68       return (int32_t)src1 >> src2;
69    case OPC_ROT:
70       if (src2 & 0x80000000)
71          return rotl64(src1, -*(int32_t *)&src2);
72       else
73          return rotl32(src1, src2);
74    case OPC_MUL8:
75       return (src1 & 0xff) * (src2 & 0xff);
76    case OPC_MIN:
77       return MIN2(src1, src2);
78    case OPC_MAX:
79       return MAX2(src1, src2);
80    case OPC_CMP:
81       if (src1 > src2)
82          return 0x00;
83       else if (src1 == src2)
84          return 0x2b;
85       return 0x1e;
86    case OPC_BIC:
87       return src1 & ~src2;
88    case OPC_MSB:
89       if (!src2)
90          return 0;
91       return util_last_bit(src2) - 1;
92    case OPC_SETBIT: {
93       unsigned bit = src2 >> 1;
94       unsigned val = src2 & 1;
95       return (src1 & ~(1u << bit)) | (val << bit);
96    }
97    default:
98       printf("unhandled alu opc: 0x%02x\n", opc);
99       exit(1);
100    }
101 }
102 
103 /**
104  * Helper to calculate load/store address based on LOAD_STORE_HI
105  */
106 static uintptr_t
load_store_addr(struct emu * emu,unsigned gpr)107 load_store_addr(struct emu *emu, unsigned gpr)
108 {
109    EMU_CONTROL_REG(LOAD_STORE_HI);
110 
111    uintptr_t addr = emu_get_reg32(emu, &LOAD_STORE_HI);
112    addr <<= 32;
113 
114    return addr + emu_get_gpr_reg(emu, gpr);
115 }
116 
117 static void
emu_instr(struct emu * emu,struct afuc_instr * instr)118 emu_instr(struct emu *emu, struct afuc_instr *instr)
119 {
120    uint32_t rem = emu_get_gpr_reg(emu, REG_REM);
121 
122    switch (instr->opc) {
123    case OPC_NOP:
124       break;
125    case OPC_MSB:
126    case OPC_ADD ... OPC_BIC: {
127       uint32_t val = emu_alu(emu, instr->opc,
128                              emu_get_gpr_reg(emu, instr->src1),
129                              instr->has_immed ? instr->immed :
130                              emu_get_gpr_reg_alu(emu, instr->src2, instr->peek));
131       emu_set_gpr_reg(emu, instr->dst, val);
132 
133       if (instr->xmov) {
134          unsigned m = MIN2(instr->xmov, rem);
135 
136          assert(m <= 3);
137 
138          if (m == 1) {
139             emu_set_gpr_reg(emu, REG_REM, --rem);
140             emu_dump_state_change(emu);
141             emu_set_gpr_reg(emu, REG_DATA,
142                             emu_get_gpr_reg(emu, instr->src2));
143          } else if (m == 2) {
144             emu_set_gpr_reg(emu, REG_REM, --rem);
145             emu_dump_state_change(emu);
146             emu_set_gpr_reg(emu, REG_DATA,
147                             emu_get_gpr_reg(emu, instr->src2));
148             emu_set_gpr_reg(emu, REG_REM, --rem);
149             emu_dump_state_change(emu);
150             emu_set_gpr_reg(emu, REG_DATA,
151                             emu_get_gpr_reg(emu, instr->src2));
152          } else if (m == 3) {
153             emu_set_gpr_reg(emu, REG_REM, --rem);
154             emu_dump_state_change(emu);
155             emu_set_gpr_reg(emu, REG_DATA,
156                             emu_get_gpr_reg(emu, instr->src2));
157             emu_set_gpr_reg(emu, REG_REM, --rem);
158             emu_dump_state_change(emu);
159             emu_set_gpr_reg(emu, instr->dst,
160                             emu_get_gpr_reg(emu, instr->src2));
161             emu_set_gpr_reg(emu, REG_REM, --rem);
162             emu_dump_state_change(emu);
163             emu_set_gpr_reg(emu, REG_DATA,
164                             emu_get_gpr_reg(emu, instr->src2));
165          }
166       }
167       break;
168    }
169    case OPC_MOVI: {
170       uint32_t val = instr->immed << instr->shift;
171       emu_set_gpr_reg(emu, instr->dst, val);
172       break;
173    }
174    case OPC_SETBITI: {
175       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
176       emu_set_gpr_reg(emu, instr->dst, src | (1u << instr->bit));
177       break;
178    }
179    case OPC_CLRBIT: {
180       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
181       emu_set_gpr_reg(emu, instr->dst, src & ~(1u << instr->bit));
182       break;
183    }
184    case OPC_UBFX: {
185       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
186       unsigned lo = instr->bit, hi = instr->immed;
187       uint32_t dst = (src >> lo) & BITFIELD_MASK(hi - lo + 1);
188       emu_set_gpr_reg(emu, instr->dst, dst);
189       break;
190    }
191    case OPC_BFI: {
192       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
193       unsigned lo = instr->bit, hi = instr->immed;
194       src = (src & BITFIELD_MASK(hi - lo + 1)) << lo;
195       emu_set_gpr_reg(emu, instr->dst, emu_get_gpr_reg(emu, instr->dst) | src);
196       break;
197    }
198    case OPC_CWRITE: {
199       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
200       uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
201       uint32_t reg = src2 + instr->immed;
202 
203       if (instr->preincrement) {
204          emu_set_gpr_reg(emu, instr->src2, reg);
205       }
206 
207       emu_set_control_reg(emu, reg, src1);
208 
209       for (unsigned i = 0; i < instr->sds; i++) {
210          uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
211 
212          /* TODO: There is likely a DRAW_STATE_SET_BASE register on a6xx, as
213           * there is on a7xx, and we should be writing that instead of setting
214           * the base directly.
215           */
216          if (reg == emu_reg_offset(&DRAW_STATE_SET_HDR))
217             emu_set_draw_state_base(emu, i, src1);
218       }
219       break;
220    }
221    case OPC_CREAD: {
222       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
223 
224       if (instr->preincrement) {
225          emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed);
226       }
227 
228       emu_set_gpr_reg(emu, instr->dst,
229                       emu_get_control_reg(emu, src1 + instr->immed));
230       break;
231    }
232    case OPC_SWRITE: {
233       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
234       uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
235 
236       if (instr->preincrement) {
237          emu_set_gpr_reg(emu, instr->src2, src2 + instr->immed);
238       }
239 
240       emu_set_sqe_reg(emu, src2 + instr->immed, src1);
241       break;
242    }
243    case OPC_SREAD: {
244       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
245 
246       if (instr->preincrement) {
247          emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed);
248       }
249 
250       emu_set_gpr_reg(emu, instr->dst,
251                       emu_get_sqe_reg(emu, src1 + instr->immed));
252       break;
253    }
254    case OPC_LOAD: {
255       uintptr_t addr = load_store_addr(emu, instr->src1) +
256             instr->immed;
257 
258       if (instr->preincrement) {
259          uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
260          emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed);
261       }
262 
263       uint32_t val = emu_mem_read_dword(emu, addr);
264 
265       emu_set_gpr_reg(emu, instr->dst, val);
266 
267       break;
268    }
269    case OPC_STORE: {
270       uintptr_t addr = load_store_addr(emu, instr->src2) +
271             instr->immed;
272 
273       if (instr->preincrement) {
274          uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
275          emu_set_gpr_reg(emu, instr->src2, src2 + instr->immed);
276       }
277 
278       uint32_t val = emu_get_gpr_reg(emu, instr->src1);
279 
280       emu_mem_write_dword(emu, addr, val);
281 
282       break;
283    }
284    case OPC_BRNEI ... OPC_BREQB: {
285       uint32_t off = emu->gpr_regs.pc + instr->offset;
286       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
287 
288       if (instr->opc == OPC_BRNEI) {
289          if (src != instr->immed)
290             emu->branch_target = off;
291       } else if (instr->opc == OPC_BREQI) {
292          if (src == instr->immed)
293             emu->branch_target = off;
294       } else if (instr->opc == OPC_BRNEB) {
295          if (!(src & (1 << instr->bit)))
296             emu->branch_target = off;
297       } else if (instr->opc == OPC_BREQB) {
298          if (src & (1 << instr->bit))
299             emu->branch_target = off;
300       } else {
301          assert(0);
302       }
303       break;
304    }
305    case OPC_RET: {
306       unsigned sp = emu_get_reg32(emu, &SP);
307       assert(sp > 0);
308 
309       /* counter-part to 'call' instruction, also has a delay slot: */
310       emu->branch_target = emu_get_sqe_reg(emu, emu_reg_offset(&STACK0) + sp - 1);
311       emu_set_reg32(emu, &SP, sp - 1);
312 
313       break;
314    }
315    case OPC_CALL: {
316       unsigned sp = emu_get_reg32(emu, &SP);
317       assert(sp + emu_reg_offset(&STACK0) < ARRAY_SIZE(emu->sqe_regs.val));
318 
319       /* call looks to have same delay-slot behavior as branch/etc, so
320        * presumably the return PC is two instructions later:
321        */
322       emu_set_sqe_reg(emu, emu_reg_offset(&STACK0) + sp, emu->gpr_regs.pc + 2);
323       emu_set_reg32(emu, &SP, sp + 1);
324       emu->branch_target = instr->literal;
325 
326       break;
327    }
328    case OPC_WAITIN: {
329       assert(!emu->branch_target);
330       emu->run_mode = false;
331       emu->waitin = true;
332       break;
333    }
334    case OPC_BL: {
335       emu_set_gpr_reg(emu, REG_LR, emu->gpr_regs.pc + 2);
336       emu->branch_target = instr->literal;
337       break;
338    }
339    case OPC_JUMPR: {
340       emu->branch_target = emu_get_gpr_reg(emu, instr->src1);
341       break;
342    }
343    case OPC_SRET: {
344       emu->branch_target = emu_get_gpr_reg(emu, REG_LR);
345       /* TODO: read $sp and check for stack overflow? */
346       break;
347    }
348    case OPC_SETSECURE: {
349       // TODO this acts like a conditional branch, but in which case
350       // does it branch?
351       break;
352    }
353    default:
354       printf("unhandled opc: 0x%02x\n", instr->opc);
355       exit(1);
356    }
357 
358    if (instr->rep) {
359       assert(rem > 0);
360       emu_set_gpr_reg(emu, REG_REM, --rem);
361    }
362 }
363 
364 void
emu_step(struct emu * emu)365 emu_step(struct emu *emu)
366 {
367    struct afuc_instr *instr;
368    bool decoded =
369       afuc_isa_decode((void *)&instr, (void *)&emu->instrs[emu->gpr_regs.pc],
370                       &(struct isa_decode_options){
371                          .gpu_id = gpuver,
372                       });
373 
374    if (!decoded) {
375       uint32_t instr_val = emu->instrs[emu->gpr_regs.pc];
376       if ((instr_val >> 27) == 0) {
377          /* This is printed as an undecoded literal to show the immediate
378           * payload, but when executing it's just a NOP.
379           */
380          instr = calloc(1, sizeof(struct afuc_instr));
381          instr->opc = OPC_NOP;
382       } else {
383          printf("unmatched instruction: 0x%08x\n", instr_val);
384          exit(1);
385       }
386    }
387 
388    emu_main_prompt(emu);
389 
390    uint32_t branch_target = emu->branch_target;
391    emu->branch_target = 0;
392 
393    bool waitin = emu->waitin;
394    emu->waitin = false;
395 
396    if (instr->rep) {
397       do {
398          if (!emu_get_gpr_reg(emu, REG_REM))
399             break;
400 
401          emu_clear_state_change(emu);
402          emu_instr(emu, instr);
403 
404          /* defer last state-change dump until after any
405           * post-delay-slot handling below:
406           */
407          if (emu_get_gpr_reg(emu, REG_REM))
408             emu_dump_state_change(emu);
409       } while (true);
410    } else {
411       emu_clear_state_change(emu);
412       emu_instr(emu, instr);
413    }
414 
415    emu->gpr_regs.pc++;
416 
417    if (branch_target) {
418       emu->gpr_regs.pc = branch_target;
419    }
420 
421    if (waitin) {
422       uint32_t hdr = emu_get_gpr_reg(emu, 1);
423       uint32_t id, count;
424 
425       if (pkt_is_type4(hdr)) {
426          id = afuc_pm4_id("PKT4");
427          count = type4_pkt_size(hdr);
428 
429          /* Possibly a hack, not sure what the hw actually
430           * does here, but we want to mask out the pkt
431           * type field from the hdr, so that PKT4 handler
432           * doesn't see it and interpret it as part as the
433           * register offset:
434           */
435          emu->gpr_regs.val[1] &= 0x0fffffff;
436       } else if (pkt_is_type7(hdr)) {
437          id = cp_type7_opcode(hdr);
438          count = type7_pkt_size(hdr);
439       } else {
440          printf("Invalid opcode: 0x%08x\n", hdr);
441          exit(1);  /* GPU goes *boom* */
442       }
443 
444       assert(id < ARRAY_SIZE(emu->jmptbl));
445 
446       emu_set_gpr_reg(emu, REG_REM, count);
447       emu->gpr_regs.pc = emu->jmptbl[id];
448    }
449 
450    emu_dump_state_change(emu);
451 
452    free(instr);
453 }
454 
455 void
emu_run_bootstrap(struct emu * emu)456 emu_run_bootstrap(struct emu *emu)
457 {
458    EMU_CONTROL_REG(THREAD_SYNC);
459 
460    emu->quiet = true;
461    emu->run_mode = true;
462    emu->bootstrap_mode = true;
463    emu->bootstrap_finished = false;
464 
465    if (gpuver == 6 && emu->processor == EMU_PROC_LPAC) {
466       /* Emulate what the SQE bootstrap routine does after launching LPAC */
467       emu_set_reg32(emu, &THREAD_SYNC, 1u << 0);
468    }
469 
470    while (!emu->bootstrap_finished && !emu->waitin) {
471       emu_step(emu);
472    }
473 
474    emu->bootstrap_mode = false;
475 }
476 
477 
478 static void
check_access(struct emu * emu,uintptr_t gpuaddr,unsigned sz)479 check_access(struct emu *emu, uintptr_t gpuaddr, unsigned sz)
480 {
481    if ((gpuaddr % sz) != 0) {
482       printf("unaligned access fault: %p\n", (void *)gpuaddr);
483       exit(1);
484    }
485 
486    if ((gpuaddr + sz) >= EMU_MEMORY_SIZE) {
487       printf("iova fault: %p\n", (void *)gpuaddr);
488       exit(1);
489    }
490 }
491 
492 uint32_t
emu_mem_read_dword(struct emu * emu,uintptr_t gpuaddr)493 emu_mem_read_dword(struct emu *emu, uintptr_t gpuaddr)
494 {
495    check_access(emu, gpuaddr, 4);
496    return *(uint32_t *)(emu->gpumem + gpuaddr);
497 }
498 
499 static void
mem_write_dword(struct emu * emu,uintptr_t gpuaddr,uint32_t val)500 mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
501 {
502    check_access(emu, gpuaddr, 4);
503    *(uint32_t *)(emu->gpumem + gpuaddr) = val;
504 }
505 
506 void
emu_mem_write_dword(struct emu * emu,uintptr_t gpuaddr,uint32_t val)507 emu_mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
508 {
509    mem_write_dword(emu, gpuaddr, val);
510    assert(emu->gpumem_written == ~0);
511    emu->gpumem_written = gpuaddr;
512 }
513 
514 void
emu_init(struct emu * emu)515 emu_init(struct emu *emu)
516 {
517    emu->gpumem = mmap(NULL, EMU_MEMORY_SIZE,
518                       PROT_READ | PROT_WRITE,
519                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
520                       0, 0);
521    if (emu->gpumem == MAP_FAILED) {
522       printf("Could not allocate GPU memory: %s\n", strerror(errno));
523       exit(1);
524    }
525 
526    /* Copy the instructions into GPU memory: */
527    for (unsigned i = 0; i < emu->sizedwords; i++) {
528       mem_write_dword(emu, EMU_INSTR_BASE + (4 * i), emu->instrs[i]);
529    }
530 
531    EMU_GPU_REG(CP_SQE_INSTR_BASE);
532    EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
533    EMU_CONTROL_REG(BV_INSTR_BASE);
534    EMU_CONTROL_REG(LPAC_INSTR_BASE);
535 
536    /* Setup the address of the SQE fw, just use the normal CPU ptr address: */
537    switch (emu->processor) {
538    case EMU_PROC_SQE:
539       emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE);
540       break;
541    case EMU_PROC_BV:
542       emu_set_reg64(emu, &BV_INSTR_BASE, EMU_INSTR_BASE);
543       break;
544    case EMU_PROC_LPAC:
545       if (gpuver >= 7)
546          emu_set_reg64(emu, &LPAC_INSTR_BASE, EMU_INSTR_BASE);
547       else
548          emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
549       break;
550    }
551 
552    if (emu->fw_id == AFUC_A750) {
553       emu_set_control_reg(emu, 0, 7 << 28);
554       emu_set_control_reg(emu, 2, 0x40 << 8);
555    } else if (emu->fw_id == AFUC_A730 || emu->fw_id == AFUC_A740) {
556       emu_set_control_reg(emu, 0xef, 1 << 21);
557       emu_set_control_reg(emu, 0, 7 << 28);
558    } else if (emu->fw_id == AFUC_A660) {
559       emu_set_control_reg(emu, 0, 3 << 28);
560    } else if (emu->fw_id == AFUC_A650) {
561       emu_set_control_reg(emu, 0, 1 << 28);
562    }
563 }
564 
565 void
emu_fini(struct emu * emu)566 emu_fini(struct emu *emu)
567 {
568    uint32_t *instrs = emu->instrs;
569    unsigned sizedwords = emu->sizedwords;
570    unsigned fw_id = emu->fw_id;
571 
572    munmap(emu->gpumem, EMU_MEMORY_SIZE);
573    memset(emu, 0, sizeof(*emu));
574 
575    emu->instrs = instrs;
576    emu->sizedwords = sizedwords;
577    emu->fw_id = fw_id;
578 }
579