1 /*
2 * Copyright © 2022 Mary Guillemard
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_fermi_sim.h"
6
7 #include <inttypes.h>
8
9 #include "mme_fermi.h"
10 #include "util/u_math.h"
11
12 #include "nv_push_cl9097.h"
13 #include "nv_push_cl902d.h"
14
15 struct mme_fermi_sim {
16 const struct mme_sim_state_ops *state_ops;
17 void *state_handler;
18
19 struct {
20 unsigned mthd:16;
21 unsigned inc:4;
22 bool has_mthd:1;
23 } mthd;
24
25 uint32_t regs[7];
26 uint32_t alu_carry;
27 uint16_t ip;
28 uint16_t next_ip;
29 };
30
load_param(struct mme_fermi_sim * sim)31 static uint32_t load_param(struct mme_fermi_sim *sim)
32 {
33 return sim->state_ops->load(sim->state_handler);
34 }
35
load_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg)36 static uint32_t load_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg)
37 {
38 if (reg == MME_FERMI_REG_ZERO) {
39 return 0;
40 }
41
42 return sim->regs[reg - 1];
43 }
44
store_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg,uint32_t val)45 static void store_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg, uint32_t val)
46 {
47 if (reg == MME_FERMI_REG_ZERO) {
48 return;
49 }
50
51 sim->regs[reg - 1] = val;
52 }
53
load_imm(const struct mme_fermi_inst * inst)54 static int32_t load_imm(const struct mme_fermi_inst *inst)
55 {
56 return util_mask_sign_extend(inst->imm, 18);
57 }
58
load_state(struct mme_fermi_sim * sim,uint16_t addr)59 static uint32_t load_state(struct mme_fermi_sim *sim, uint16_t addr)
60 {
61 return sim->state_ops->state(sim->state_handler, addr);
62 }
63
eval_bfe_lsl(uint32_t value,uint32_t src_bit,uint32_t dst_bit,uint8_t size)64 static uint32_t eval_bfe_lsl(uint32_t value, uint32_t src_bit, uint32_t dst_bit, uint8_t size)
65 {
66 if (dst_bit > 31 || src_bit > 31) {
67 return 0;
68 }
69
70 return ((value >> src_bit) & BITFIELD_MASK(size)) << dst_bit;
71 }
72
eval_op(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)73 static uint32_t eval_op(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst) {
74 assert(inst->op != MME_FERMI_OP_BRANCH);
75
76 uint32_t x = load_reg(sim, inst->src[0]);
77 uint32_t y = load_reg(sim, inst->src[1]);
78
79 switch (inst->op) {
80 case MME_FERMI_OP_ALU_REG: {
81 uint32_t res = 0;
82
83 switch (inst->alu_op) {
84 case MME_FERMI_ALU_OP_ADD:
85 res = x + y;
86 sim->alu_carry = res < x;
87 break;
88 case MME_FERMI_ALU_OP_ADDC:
89 res = x + y + sim->alu_carry;
90 sim->alu_carry = res < x;
91 break;
92 case MME_FERMI_ALU_OP_SUB:
93 res = x - y;
94 sim->alu_carry = res > x;
95 break;
96 case MME_FERMI_ALU_OP_SUBB:
97 res = x - y - sim->alu_carry;
98 sim->alu_carry = res > x;
99 break;
100 case MME_FERMI_ALU_OP_XOR:
101 res = x ^ y;
102 break;
103 case MME_FERMI_ALU_OP_OR:
104 res = x | y;
105 break;
106 case MME_FERMI_ALU_OP_AND:
107 res = x & y;
108 break;
109 case MME_FERMI_ALU_OP_AND_NOT:
110 res = x & ~y;
111 break;
112 case MME_FERMI_ALU_OP_NAND:
113 res = ~(x & y);
114 break;
115 default:
116 unreachable("Unhandled ALU op");
117 }
118
119 return res;
120 }
121 case MME_FERMI_OP_ADD_IMM:
122 return x + load_imm(inst);
123 case MME_FERMI_OP_MERGE:
124 return (x & ~(BITFIELD_MASK(inst->bitfield.size) << inst->bitfield.dst_bit)) | (((y >> inst->bitfield.src_bit) & BITFIELD_MASK(inst->bitfield.size)) << inst->bitfield.dst_bit);
125 case MME_FERMI_OP_BFE_LSL_IMM:
126 return eval_bfe_lsl(y, x, inst->bitfield.dst_bit, inst->bitfield.size);
127 case MME_FERMI_OP_BFE_LSL_REG:
128 return eval_bfe_lsl(y, inst->bitfield.src_bit, x, inst->bitfield.size);
129 case MME_FERMI_OP_STATE:
130 return load_state(sim, (x + load_imm(inst)) * 4);
131 // TODO: reverse MME_FERMI_OP_UNK6
132 default:
133 unreachable("Unhandled op");
134 }
135 }
136
137 static void
set_mthd(struct mme_fermi_sim * sim,uint32_t val)138 set_mthd(struct mme_fermi_sim *sim, uint32_t val)
139 {
140 sim->mthd.mthd = (val & 0xfff) << 2;
141 sim->mthd.inc = (val >> 12) & 0xf;
142 sim->mthd.has_mthd = true;
143 }
144
145 static void
emit_mthd(struct mme_fermi_sim * sim,uint32_t val)146 emit_mthd(struct mme_fermi_sim *sim, uint32_t val)
147 {
148 // TODO: understand what happens on hardware when no mthd has been set.
149 if (!sim->mthd.has_mthd)
150 return;
151
152 sim->state_ops->mthd(sim->state_handler, sim->mthd.mthd, val);
153 sim->mthd.mthd += sim->mthd.inc * 4;
154 }
155
156 static void
eval_inst(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)157 eval_inst(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst)
158 {
159 if (inst->op == MME_FERMI_OP_BRANCH) {
160 uint32_t val = load_reg(sim, inst->src[0]);
161 bool cond = inst->branch.not_zero ? val != 0 : val == 0;
162
163 if (cond) {
164 int32_t offset = load_imm(inst);
165 assert((int)sim->ip + offset >= 0);
166 assert((int)sim->ip + offset < 0x1000);
167 sim->next_ip = sim->ip + offset;
168 }
169 } else {
170 uint32_t scratch = eval_op(sim, inst);
171 switch (inst->assign_op) {
172 case MME_FERMI_ASSIGN_OP_LOAD:
173 store_reg(sim, inst->dst, load_param(sim));
174 break;
175 case MME_FERMI_ASSIGN_OP_MOVE:
176 store_reg(sim, inst->dst, scratch);
177 break;
178 case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR:
179 store_reg(sim, inst->dst, scratch);
180 set_mthd(sim, scratch);
181 break;
182 case MME_FERMI_ASSIGN_OP_LOAD_EMIT:
183 store_reg(sim, inst->dst, load_param(sim));
184 emit_mthd(sim, scratch);
185 break;
186 case MME_FERMI_ASSIGN_OP_MOVE_EMIT:
187 store_reg(sim, inst->dst, scratch);
188 emit_mthd(sim, scratch);
189 break;
190 case MME_FERMI_ASSIGN_OP_LOAD_SET_MADDR:
191 store_reg(sim, inst->dst, scratch);
192 set_mthd(sim, scratch);
193 break;
194 case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT:
195 store_reg(sim, inst->dst, scratch);
196 set_mthd(sim, scratch);
197 emit_mthd(sim, load_param(sim));
198 break;
199 case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT_HIGH:
200 store_reg(sim, inst->dst, scratch);
201 set_mthd(sim, scratch);
202 emit_mthd(sim, (scratch >> 12) & 0x3f);
203 break;
204 default:
205 unreachable("Unhandled ASSIGN op");
206 }
207 }
208 }
209
210 void
mme_fermi_sim_core(uint32_t inst_count,const struct mme_fermi_inst * insts,const struct mme_sim_state_ops * state_ops,void * state_handler)211 mme_fermi_sim_core(uint32_t inst_count, const struct mme_fermi_inst *insts,
212 const struct mme_sim_state_ops *state_ops,
213 void *state_handler)
214 {
215 struct mme_fermi_sim sim = {
216 .state_ops = state_ops,
217 .state_handler = state_handler,
218 };
219
220 sim.ip = 0;
221 /* First preload first argument in R1*/
222 store_reg(&sim, MME_FERMI_REG_R1, load_param(&sim));
223
224 bool end_next = false;
225 bool ignore_next_exit = false;
226 bool should_delay_branch = false;
227
228 while (!end_next) {
229 assert(sim.ip < inst_count);
230 const struct mme_fermi_inst *inst = &insts[sim.ip];
231
232 if (!should_delay_branch) {
233 sim.next_ip = sim.ip + 1;
234 }
235
236 eval_inst(&sim, inst);
237
238 should_delay_branch = inst->op == MME_FERMI_OP_BRANCH && !inst->branch.no_delay;
239
240 if (should_delay_branch) {
241 sim.ip = sim.ip + 1;
242 } else {
243 sim.ip = sim.next_ip;
244 }
245
246 if (inst->end_next && should_delay_branch) {
247 ignore_next_exit = true;
248 continue;
249 }
250
251 end_next = inst->end_next && !ignore_next_exit;
252 ignore_next_exit = false;
253 }
254
255 // Handle delay slot at exit
256 assert(sim.ip < inst_count);
257 eval_inst(&sim, &insts[sim.ip]);
258 }
259
260 struct mme_fermi_state_sim {
261 uint32_t param_count;
262 const uint32_t *params;
263
264 /* Bound memory ranges */
265 uint32_t mem_count;
266 struct mme_fermi_sim_mem *mems;
267
268 /* SET_MME_SHADOW_SCRATCH(i) */
269 uint32_t scratch[MME_FERMI_SCRATCH_COUNT];
270
271 struct {
272 uint32_t addr_hi;
273 uint32_t addr_lo;
274 uint32_t data;
275 } report_sem;
276 };
277
278 static uint32_t *
find_mem(struct mme_fermi_state_sim * sim,uint64_t addr,const char * op_desc)279 find_mem(struct mme_fermi_state_sim *sim, uint64_t addr, const char *op_desc)
280 {
281 for (uint32_t i = 0; i < sim->mem_count; i++) {
282 if (addr < sim->mems[i].addr)
283 continue;
284
285 uint64_t offset = addr - sim->mems[i].addr;
286 if (offset >= sim->mems[i].size)
287 continue;
288
289 assert(sim->mems[i].data != NULL);
290 return (uint32_t *)((char *)sim->mems[i].data + offset);
291 }
292
293 fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
294 abort();
295 }
296
297 static uint32_t
mme_fermi_state_sim_load(void * _sim)298 mme_fermi_state_sim_load(void *_sim)
299 {
300 struct mme_fermi_state_sim *sim = _sim;
301
302 assert(sim->param_count > 0);
303 uint32_t data = *sim->params;
304 sim->params++;
305 sim->param_count--;
306
307 return data;
308 }
309
310 static uint32_t
mme_fermi_state_sim_state(void * _sim,uint16_t addr)311 mme_fermi_state_sim_state(void *_sim, uint16_t addr)
312 {
313 struct mme_fermi_state_sim *sim = _sim;
314 assert(addr % 4 == 0);
315
316 if (NV9097_SET_MME_SHADOW_SCRATCH(0) <= addr &&
317 addr < NV9097_CALL_MME_MACRO(0)) {
318 uint32_t i = (addr - NV9097_SET_MME_SHADOW_SCRATCH(0)) / 4;
319 assert(i <= ARRAY_SIZE(sim->scratch));
320 return sim->scratch[i];
321 }
322
323 return 0;
324 }
325
326 static void
mme_fermi_state_sim_mthd(void * _sim,uint16_t addr,uint32_t data)327 mme_fermi_state_sim_mthd(void *_sim, uint16_t addr, uint32_t data)
328 {
329 struct mme_fermi_state_sim *sim = _sim;
330 assert(addr % 4 == 0);
331
332 switch (addr) {
333 case NV9097_SET_REPORT_SEMAPHORE_A:
334 sim->report_sem.addr_hi = data;
335 break;
336 case NV9097_SET_REPORT_SEMAPHORE_B:
337 sim->report_sem.addr_lo = data;
338 break;
339 case NV9097_SET_REPORT_SEMAPHORE_C:
340 sim->report_sem.data = data;
341 break;
342 case NV9097_SET_REPORT_SEMAPHORE_D: {
343 assert(data == 0x10000000);
344 uint64_t sem_report_addr =
345 ((uint64_t)sim->report_sem.addr_hi << 32) | sim->report_sem.addr_lo;
346 uint32_t *mem = find_mem(sim, sem_report_addr, "SET_REPORT_SEMAPHORE");
347 *mem = sim->report_sem.data;
348 break;
349 }
350 default:
351 if (NV9097_SET_MME_SHADOW_SCRATCH(0) <= addr &&
352 addr < NV9097_CALL_MME_MACRO(0)) {
353 uint32_t i = (addr - NV9097_SET_MME_SHADOW_SCRATCH(0)) / 4;
354 assert(i <= ARRAY_SIZE(sim->scratch));
355 sim->scratch[i] = data;
356 } else {
357 fprintf(stdout, "%s:\n", P_PARSE_NV9097_MTHD(addr));
358 P_DUMP_NV9097_MTHD_DATA(stdout, addr, data, " ");
359 }
360 break;
361 }
362 }
363
364 static const struct mme_sim_state_ops mme_fermi_state_sim_ops = {
365 .load = mme_fermi_state_sim_load,
366 .state = mme_fermi_state_sim_state,
367 .mthd = mme_fermi_state_sim_mthd,
368 };
369
370 void
mme_fermi_sim(uint32_t inst_count,const struct mme_fermi_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_fermi_sim_mem * mems)371 mme_fermi_sim(uint32_t inst_count, const struct mme_fermi_inst *insts,
372 uint32_t param_count, const uint32_t *params,
373 uint32_t mem_count, struct mme_fermi_sim_mem *mems)
374 {
375 const uint32_t zero = 0;
376 struct mme_fermi_state_sim state_sim = {
377 /* We need at least one param because the first param is always
378 * preloaded into $r1.
379 */
380 .param_count = MAX2(1, param_count),
381 .params = param_count == 0 ? &zero : params,
382 .mem_count = mem_count,
383 .mems = mems,
384 };
385
386 mme_fermi_sim_core(inst_count, insts, &mme_fermi_state_sim_ops, &state_sim);
387 }
388