xref: /aosp_15_r20/external/mesa3d/src/nouveau/mme/mme_fermi_sim.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Mary Guillemard
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_fermi_sim.h"
6 
7 #include <inttypes.h>
8 
9 #include "mme_fermi.h"
10 #include "util/u_math.h"
11 
12 #include "nv_push_cl9097.h"
13 #include "nv_push_cl902d.h"
14 
15 struct mme_fermi_sim {
16    const struct mme_sim_state_ops *state_ops;
17    void *state_handler;
18 
19    struct {
20       unsigned mthd:16;
21       unsigned inc:4;
22       bool has_mthd:1;
23    } mthd;
24 
25    uint32_t regs[7];
26    uint32_t alu_carry;
27    uint16_t ip;
28    uint16_t next_ip;
29 };
30 
load_param(struct mme_fermi_sim * sim)31 static uint32_t load_param(struct mme_fermi_sim *sim)
32 {
33    return sim->state_ops->load(sim->state_handler);
34 }
35 
load_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg)36 static uint32_t load_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg)
37 {
38    if (reg == MME_FERMI_REG_ZERO) {
39       return 0;
40    }
41 
42    return sim->regs[reg - 1];
43 }
44 
store_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg,uint32_t val)45 static void store_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg, uint32_t val)
46 {
47    if (reg == MME_FERMI_REG_ZERO) {
48       return;
49    }
50 
51    sim->regs[reg - 1] = val;
52 }
53 
load_imm(const struct mme_fermi_inst * inst)54 static int32_t load_imm(const struct mme_fermi_inst *inst)
55 {
56    return util_mask_sign_extend(inst->imm, 18);
57 }
58 
load_state(struct mme_fermi_sim * sim,uint16_t addr)59 static uint32_t load_state(struct mme_fermi_sim *sim, uint16_t addr)
60 {
61    return sim->state_ops->state(sim->state_handler, addr);
62 }
63 
eval_bfe_lsl(uint32_t value,uint32_t src_bit,uint32_t dst_bit,uint8_t size)64 static uint32_t eval_bfe_lsl(uint32_t value, uint32_t src_bit, uint32_t dst_bit, uint8_t size)
65 {
66    if (dst_bit > 31 || src_bit > 31) {
67       return 0;
68    }
69 
70    return ((value >> src_bit) & BITFIELD_MASK(size)) << dst_bit;
71 }
72 
eval_op(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)73 static uint32_t eval_op(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst) {
74    assert(inst->op != MME_FERMI_OP_BRANCH);
75 
76    uint32_t x = load_reg(sim, inst->src[0]);
77    uint32_t y = load_reg(sim, inst->src[1]);
78 
79    switch (inst->op) {
80       case MME_FERMI_OP_ALU_REG: {
81          uint32_t res = 0;
82 
83          switch (inst->alu_op) {
84             case MME_FERMI_ALU_OP_ADD:
85                res = x + y;
86                sim->alu_carry = res < x;
87                break;
88             case MME_FERMI_ALU_OP_ADDC:
89                res = x + y + sim->alu_carry;
90                sim->alu_carry = res < x;
91                break;
92             case MME_FERMI_ALU_OP_SUB:
93                res = x - y;
94                sim->alu_carry = res > x;
95                break;
96             case MME_FERMI_ALU_OP_SUBB:
97                res = x - y - sim->alu_carry;
98                sim->alu_carry = res > x;
99                break;
100             case MME_FERMI_ALU_OP_XOR:
101                res = x ^ y;
102                break;
103             case MME_FERMI_ALU_OP_OR:
104                res = x | y;
105                break;
106             case MME_FERMI_ALU_OP_AND:
107                res = x & y;
108                break;
109             case MME_FERMI_ALU_OP_AND_NOT:
110                res = x & ~y;
111                break;
112             case MME_FERMI_ALU_OP_NAND:
113                res = ~(x & y);
114                break;
115             default:
116                unreachable("Unhandled ALU op");
117          }
118 
119          return res;
120       }
121       case MME_FERMI_OP_ADD_IMM:
122          return x + load_imm(inst);
123       case MME_FERMI_OP_MERGE:
124          return (x & ~(BITFIELD_MASK(inst->bitfield.size) << inst->bitfield.dst_bit)) | (((y >> inst->bitfield.src_bit) & BITFIELD_MASK(inst->bitfield.size)) << inst->bitfield.dst_bit);
125       case MME_FERMI_OP_BFE_LSL_IMM:
126          return eval_bfe_lsl(y, x, inst->bitfield.dst_bit, inst->bitfield.size);
127       case MME_FERMI_OP_BFE_LSL_REG:
128          return eval_bfe_lsl(y, inst->bitfield.src_bit, x, inst->bitfield.size);
129       case MME_FERMI_OP_STATE:
130          return load_state(sim, (x + load_imm(inst)) * 4);
131       // TODO: reverse MME_FERMI_OP_UNK6
132       default:
133          unreachable("Unhandled op");
134    }
135 }
136 
137 static void
set_mthd(struct mme_fermi_sim * sim,uint32_t val)138 set_mthd(struct mme_fermi_sim *sim, uint32_t val)
139 {
140    sim->mthd.mthd = (val & 0xfff) << 2;
141    sim->mthd.inc = (val >> 12) & 0xf;
142    sim->mthd.has_mthd = true;
143 }
144 
145 static void
emit_mthd(struct mme_fermi_sim * sim,uint32_t val)146 emit_mthd(struct mme_fermi_sim *sim, uint32_t val)
147 {
148    // TODO: understand what happens on hardware when no mthd has been set.
149    if (!sim->mthd.has_mthd)
150       return;
151 
152    sim->state_ops->mthd(sim->state_handler, sim->mthd.mthd, val);
153    sim->mthd.mthd += sim->mthd.inc * 4;
154 }
155 
156 static void
eval_inst(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)157 eval_inst(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst)
158 {
159    if (inst->op == MME_FERMI_OP_BRANCH) {
160       uint32_t val = load_reg(sim, inst->src[0]);
161       bool cond = inst->branch.not_zero ? val != 0 : val == 0;
162 
163       if (cond) {
164          int32_t offset = load_imm(inst);
165          assert((int)sim->ip + offset >= 0);
166          assert((int)sim->ip + offset < 0x1000);
167          sim->next_ip = sim->ip + offset;
168       }
169    } else {
170       uint32_t scratch = eval_op(sim, inst);
171       switch (inst->assign_op) {
172          case MME_FERMI_ASSIGN_OP_LOAD:
173             store_reg(sim, inst->dst, load_param(sim));
174             break;
175          case MME_FERMI_ASSIGN_OP_MOVE:
176             store_reg(sim, inst->dst, scratch);
177             break;
178          case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR:
179             store_reg(sim, inst->dst, scratch);
180             set_mthd(sim, scratch);
181             break;
182          case MME_FERMI_ASSIGN_OP_LOAD_EMIT:
183             store_reg(sim, inst->dst, load_param(sim));
184             emit_mthd(sim, scratch);
185             break;
186          case MME_FERMI_ASSIGN_OP_MOVE_EMIT:
187             store_reg(sim, inst->dst, scratch);
188             emit_mthd(sim, scratch);
189             break;
190          case MME_FERMI_ASSIGN_OP_LOAD_SET_MADDR:
191             store_reg(sim, inst->dst, scratch);
192             set_mthd(sim, scratch);
193             break;
194          case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT:
195             store_reg(sim, inst->dst, scratch);
196             set_mthd(sim, scratch);
197             emit_mthd(sim, load_param(sim));
198             break;
199          case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT_HIGH:
200             store_reg(sim, inst->dst, scratch);
201             set_mthd(sim, scratch);
202             emit_mthd(sim, (scratch >> 12) & 0x3f);
203             break;
204          default:
205             unreachable("Unhandled ASSIGN op");
206       }
207    }
208 }
209 
210 void
mme_fermi_sim_core(uint32_t inst_count,const struct mme_fermi_inst * insts,const struct mme_sim_state_ops * state_ops,void * state_handler)211 mme_fermi_sim_core(uint32_t inst_count, const struct mme_fermi_inst *insts,
212                    const struct mme_sim_state_ops *state_ops,
213                    void *state_handler)
214 {
215    struct mme_fermi_sim sim = {
216       .state_ops = state_ops,
217       .state_handler = state_handler,
218    };
219 
220    sim.ip = 0;
221    /* First preload first argument in R1*/
222    store_reg(&sim, MME_FERMI_REG_R1, load_param(&sim));
223 
224    bool end_next = false;
225    bool ignore_next_exit = false;
226    bool should_delay_branch = false;
227 
228    while (!end_next) {
229       assert(sim.ip < inst_count);
230       const struct mme_fermi_inst *inst = &insts[sim.ip];
231 
232       if (!should_delay_branch) {
233          sim.next_ip = sim.ip + 1;
234       }
235 
236       eval_inst(&sim, inst);
237 
238       should_delay_branch = inst->op == MME_FERMI_OP_BRANCH && !inst->branch.no_delay;
239 
240       if (should_delay_branch) {
241          sim.ip = sim.ip + 1;
242       } else {
243          sim.ip = sim.next_ip;
244       }
245 
246       if (inst->end_next && should_delay_branch) {
247          ignore_next_exit = true;
248          continue;
249       }
250 
251       end_next = inst->end_next && !ignore_next_exit;
252       ignore_next_exit = false;
253    }
254 
255    // Handle delay slot at exit
256    assert(sim.ip < inst_count);
257    eval_inst(&sim, &insts[sim.ip]);
258 }
259 
260 struct mme_fermi_state_sim {
261    uint32_t param_count;
262    const uint32_t *params;
263 
264    /* Bound memory ranges */
265    uint32_t mem_count;
266    struct mme_fermi_sim_mem *mems;
267 
268    /* SET_MME_SHADOW_SCRATCH(i) */
269    uint32_t scratch[MME_FERMI_SCRATCH_COUNT];
270 
271    struct {
272       uint32_t addr_hi;
273       uint32_t addr_lo;
274       uint32_t data;
275    } report_sem;
276 };
277 
278 static uint32_t *
find_mem(struct mme_fermi_state_sim * sim,uint64_t addr,const char * op_desc)279 find_mem(struct mme_fermi_state_sim *sim, uint64_t addr, const char *op_desc)
280 {
281    for (uint32_t i = 0; i < sim->mem_count; i++) {
282       if (addr < sim->mems[i].addr)
283          continue;
284 
285       uint64_t offset = addr - sim->mems[i].addr;
286       if (offset >= sim->mems[i].size)
287          continue;
288 
289       assert(sim->mems[i].data != NULL);
290       return (uint32_t *)((char *)sim->mems[i].data + offset);
291    }
292 
293    fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
294    abort();
295 }
296 
297 static uint32_t
mme_fermi_state_sim_load(void * _sim)298 mme_fermi_state_sim_load(void *_sim)
299 {
300    struct mme_fermi_state_sim *sim = _sim;
301 
302    assert(sim->param_count > 0);
303    uint32_t data = *sim->params;
304    sim->params++;
305    sim->param_count--;
306 
307    return data;
308 }
309 
310 static uint32_t
mme_fermi_state_sim_state(void * _sim,uint16_t addr)311 mme_fermi_state_sim_state(void *_sim, uint16_t addr)
312 {
313    struct mme_fermi_state_sim *sim = _sim;
314    assert(addr % 4 == 0);
315 
316    if (NV9097_SET_MME_SHADOW_SCRATCH(0) <= addr &&
317        addr < NV9097_CALL_MME_MACRO(0)) {
318       uint32_t i = (addr - NV9097_SET_MME_SHADOW_SCRATCH(0)) / 4;
319       assert(i <= ARRAY_SIZE(sim->scratch));
320       return sim->scratch[i];
321    }
322 
323    return 0;
324 }
325 
326 static void
mme_fermi_state_sim_mthd(void * _sim,uint16_t addr,uint32_t data)327 mme_fermi_state_sim_mthd(void *_sim, uint16_t addr, uint32_t data)
328 {
329    struct mme_fermi_state_sim *sim = _sim;
330    assert(addr % 4 == 0);
331 
332    switch (addr) {
333    case NV9097_SET_REPORT_SEMAPHORE_A:
334       sim->report_sem.addr_hi = data;
335       break;
336    case NV9097_SET_REPORT_SEMAPHORE_B:
337       sim->report_sem.addr_lo = data;
338       break;
339    case NV9097_SET_REPORT_SEMAPHORE_C:
340       sim->report_sem.data = data;
341       break;
342    case NV9097_SET_REPORT_SEMAPHORE_D: {
343       assert(data == 0x10000000);
344       uint64_t sem_report_addr =
345          ((uint64_t)sim->report_sem.addr_hi << 32) | sim->report_sem.addr_lo;
346       uint32_t *mem = find_mem(sim, sem_report_addr, "SET_REPORT_SEMAPHORE");
347       *mem = sim->report_sem.data;
348       break;
349    }
350    default:
351       if (NV9097_SET_MME_SHADOW_SCRATCH(0) <= addr &&
352           addr < NV9097_CALL_MME_MACRO(0)) {
353          uint32_t i = (addr - NV9097_SET_MME_SHADOW_SCRATCH(0)) / 4;
354          assert(i <= ARRAY_SIZE(sim->scratch));
355          sim->scratch[i] = data;
356       } else {
357          fprintf(stdout, "%s:\n", P_PARSE_NV9097_MTHD(addr));
358          P_DUMP_NV9097_MTHD_DATA(stdout, addr, data, "    ");
359       }
360       break;
361    }
362 }
363 
364 static const struct mme_sim_state_ops mme_fermi_state_sim_ops = {
365    .load = mme_fermi_state_sim_load,
366    .state = mme_fermi_state_sim_state,
367    .mthd = mme_fermi_state_sim_mthd,
368 };
369 
370 void
mme_fermi_sim(uint32_t inst_count,const struct mme_fermi_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_fermi_sim_mem * mems)371 mme_fermi_sim(uint32_t inst_count, const struct mme_fermi_inst *insts,
372               uint32_t param_count, const uint32_t *params,
373               uint32_t mem_count, struct mme_fermi_sim_mem *mems)
374 {
375    const uint32_t zero = 0;
376    struct mme_fermi_state_sim state_sim = {
377       /* We need at least one param because the first param is always
378        * preloaded into $r1.
379        */
380       .param_count = MAX2(1, param_count),
381       .params = param_count == 0 ? &zero : params,
382       .mem_count = mem_count,
383       .mems = mems,
384    };
385 
386    mme_fermi_sim_core(inst_count, insts, &mme_fermi_state_sim_ops, &state_sim);
387 }
388