1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_tu104_sim.h"
6
7 #include <inttypes.h>
8
9 #include "mme_tu104.h"
10 #include "util/u_math.h"
11
12 #include "nv_push_clc597.h"
13
14 struct mme_tu104_sim {
15 const struct mme_sim_state_ops *state_ops;
16 void *state_handler;
17
18 uint32_t load[2];
19
20 struct {
21 unsigned mthd:16;
22 unsigned inc:4;
23 bool has_mthd:1;
24 unsigned _pad:5;
25 unsigned data_len:8;
26 uint32_t data[8];
27 } mthd;
28
29 uint32_t set_regs;
30 uint32_t regs[23];
31 uint32_t alu_res[2];
32 uint32_t alu_carry;
33
34 uint16_t ip;
35 uint16_t next_ip;
36 bool stop;
37
38 uint32_t loop_count;
39 uint16_t loop_start;
40 uint16_t loop_end;
41 };
42
43 static bool
inst_loads_reg(const struct mme_tu104_inst * inst,enum mme_tu104_reg reg)44 inst_loads_reg(const struct mme_tu104_inst *inst,
45 enum mme_tu104_reg reg)
46 {
47 return inst->pred == reg ||
48 inst->alu[0].src[0] == reg ||
49 inst->alu[0].src[1] == reg ||
50 inst->alu[1].src[0] == reg ||
51 inst->alu[1].src[1] == reg;
52 }
53
54 static bool
inst_loads_out(const struct mme_tu104_inst * inst,enum mme_tu104_out_op out)55 inst_loads_out(const struct mme_tu104_inst *inst,
56 enum mme_tu104_out_op out)
57 {
58 return inst->out[0].mthd == out ||
59 inst->out[0].emit == out ||
60 inst->out[1].mthd == out ||
61 inst->out[1].emit == out;
62 }
63
64 static void
load_params(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst)65 load_params(struct mme_tu104_sim *sim,
66 const struct mme_tu104_inst *inst)
67 {
68 const bool has_load0 = inst_loads_reg(inst, MME_TU104_REG_LOAD0) ||
69 inst_loads_out(inst, MME_TU104_OUT_OP_LOAD0);
70 const bool has_load1 = inst_loads_reg(inst, MME_TU104_REG_LOAD1) ||
71 inst_loads_out(inst, MME_TU104_OUT_OP_LOAD1);
72 assert(has_load0 || !has_load1);
73
74 if (has_load0)
75 sim->load[0] = sim->state_ops->load(sim->state_handler);
76
77 if (has_load1)
78 sim->load[1] = sim->state_ops->load(sim->state_handler);
79 }
80
81 static uint32_t
load_state(struct mme_tu104_sim * sim,uint16_t state)82 load_state(struct mme_tu104_sim *sim, uint16_t state)
83 {
84 return sim->state_ops->state(sim->state_handler, state);
85 }
86
87 static void
flush_mthd(struct mme_tu104_sim * sim)88 flush_mthd(struct mme_tu104_sim *sim)
89 {
90 if (!sim->mthd.has_mthd)
91 return;
92
93 for (uint32_t i = 0; i < sim->mthd.data_len; i++) {
94 sim->state_ops->mthd(sim->state_handler,
95 sim->mthd.mthd,
96 sim->mthd.data[i]);
97 sim->mthd.mthd += sim->mthd.inc * 4;
98 }
99
100 sim->mthd.has_mthd = false;
101 }
102
103 static void
eval_extended(struct mme_tu104_sim * sim,uint32_t x,uint32_t y)104 eval_extended(struct mme_tu104_sim *sim,
105 uint32_t x, uint32_t y)
106 {
107 /* The only extended method we know about appears to be some sort of
108 * barrier required when using READ_FIFOED.
109 */
110 assert(x == 0x1000);
111 assert(y == 1);
112 flush_mthd(sim);
113 if (sim->state_ops->barrier)
114 sim->state_ops->barrier(sim->state_handler);
115 }
116
117 static uint32_t
load_reg(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t imm_idx,enum mme_tu104_reg reg)118 load_reg(struct mme_tu104_sim *sim,
119 const struct mme_tu104_inst *inst,
120 uint32_t imm_idx, enum mme_tu104_reg reg)
121 {
122 if (reg <= MME_TU104_REG_R23) {
123 assert(sim->set_regs & BITFIELD_BIT(reg));
124 return sim->regs[reg];
125 }
126
127 switch (reg) {
128 case MME_TU104_REG_ZERO:
129 return 0;
130 case MME_TU104_REG_IMM:
131 assert(imm_idx < 2);
132 /* Immediates are treated as signed for ALU ops */
133 return (int16_t)inst->imm[imm_idx];
134 case MME_TU104_REG_IMMPAIR:
135 assert(imm_idx < 2);
136 /* Immediates are treated as signed for ALU ops */
137 return (int16_t)inst->imm[1 - imm_idx];
138 case MME_TU104_REG_IMM32:
139 return ((uint32_t)inst->imm[0] << 16) | inst->imm[1];
140 case MME_TU104_REG_LOAD0:
141 return sim->load[0];
142 case MME_TU104_REG_LOAD1:
143 return sim->load[1];
144 default:
145 unreachable("Unhandled register type");
146 }
147 }
148
149 static uint8_t
load_pred(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst)150 load_pred(struct mme_tu104_sim *sim,
151 const struct mme_tu104_inst *inst)
152 {
153 if (inst->pred_mode == MME_TU104_PRED_UUUU)
154 return 0xf;
155
156 uint32_t val = load_reg(sim, inst, -1, inst->pred);
157 const char *pred = mme_tu104_pred_to_str(inst->pred_mode);
158
159 uint8_t mask = 0;
160 for (unsigned i = 0; i < 4; i++) {
161 if (pred[i] != (val ? 'T' : 'F'))
162 mask |= BITFIELD_BIT(i);
163 }
164
165 return mask;
166 }
167
168 static void
store_reg(struct mme_tu104_sim * sim,enum mme_tu104_reg reg,uint32_t val)169 store_reg(struct mme_tu104_sim *sim,
170 enum mme_tu104_reg reg,
171 uint32_t val)
172 {
173 if (reg <= MME_TU104_REG_R23) {
174 sim->set_regs |= BITFIELD_BIT(reg);
175 sim->regs[reg] = val;
176 } else if (reg <= MME_TU104_REG_ZERO) {
177 /* Do nothing */
178 } else {
179 unreachable("Unhandled register type");
180 }
181 }
182
183 static bool
eval_cond(enum mme_tu104_alu_op op,uint32_t x,uint32_t y)184 eval_cond(enum mme_tu104_alu_op op, uint32_t x, uint32_t y)
185 {
186 switch (op) {
187 case MME_TU104_ALU_OP_BLT:
188 case MME_TU104_ALU_OP_SLT:
189 return (int32_t)x < (int32_t)y;
190 case MME_TU104_ALU_OP_BLTU:
191 case MME_TU104_ALU_OP_SLTU:
192 return (uint32_t)x < (uint32_t)y;
193 case MME_TU104_ALU_OP_BLE:
194 case MME_TU104_ALU_OP_SLE:
195 return (int32_t)x <= (int32_t)y;
196 case MME_TU104_ALU_OP_BLEU:
197 case MME_TU104_ALU_OP_SLEU:
198 return (uint32_t)x <= (uint32_t)y;
199 case MME_TU104_ALU_OP_BEQ:
200 case MME_TU104_ALU_OP_SEQ:
201 return x == y;
202 default:
203 unreachable("Not a comparison op");
204 }
205 }
206
207 static void
eval_alu(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t alu_idx)208 eval_alu(struct mme_tu104_sim *sim,
209 const struct mme_tu104_inst *inst,
210 uint32_t alu_idx)
211 {
212 const struct mme_tu104_alu *alu = &inst->alu[alu_idx];
213 const uint32_t x = load_reg(sim, inst, alu_idx, alu->src[0]);
214 const uint32_t y = load_reg(sim, inst, alu_idx, alu->src[1]);
215
216 uint32_t res = 0;
217 switch (inst->alu[alu_idx].op) {
218 case MME_TU104_ALU_OP_ADD:
219 res = x + y;
220 sim->alu_carry = res < x;
221 break;
222 case MME_TU104_ALU_OP_ADDC:
223 assert(alu_idx == 1);
224 assert(inst->alu[0].op == MME_TU104_ALU_OP_ADD);
225 res = x + y + sim->alu_carry;
226 break;
227 case MME_TU104_ALU_OP_SUB:
228 res = x - y;
229 sim->alu_carry = res > x;
230 break;
231 case MME_TU104_ALU_OP_SUBB:
232 assert(alu_idx == 1);
233 assert(inst->alu[0].op == MME_TU104_ALU_OP_SUB);
234 res = x - y - sim->alu_carry;
235 break;
236 case MME_TU104_ALU_OP_MUL: {
237 /* Sign extend but use uint64_t for the multiply so that we avoid
238 * undefined behavior from possible signed multiply roll-over.
239 */
240 const uint64_t x_u64 = (int64_t)(int32_t)x;
241 const uint64_t y_u64 = (int64_t)(int32_t)y;
242 const uint64_t xy_u64 = x_u64 * y_u64;
243 res = xy_u64;
244 sim->alu_carry = xy_u64 >> 32;
245 break;
246 }
247 case MME_TU104_ALU_OP_MULH:
248 assert(inst->alu[alu_idx].src[0] == MME_TU104_REG_ZERO);
249 assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
250 res = sim->alu_carry;
251 break;
252 case MME_TU104_ALU_OP_MULU: {
253 const uint64_t x_u64 = x;
254 const uint64_t y_u64 = y;
255 const uint64_t xy_u64 = x_u64 * y_u64;
256 res = xy_u64;
257 sim->alu_carry = xy_u64 >> 32;
258 break;
259 }
260 case MME_TU104_ALU_OP_EXTENDED:
261 eval_extended(sim, x, y);
262 break;
263 case MME_TU104_ALU_OP_CLZ:
264 res = __builtin_clz(x);
265 break;
266 case MME_TU104_ALU_OP_SLL:
267 res = x << (y & 31);
268 break;
269 case MME_TU104_ALU_OP_SRL:
270 res = x >> (y & 31);
271 break;
272 case MME_TU104_ALU_OP_SRA:
273 res = (int32_t)x >> (y & 31);
274 break;
275 case MME_TU104_ALU_OP_AND:
276 res = x & y;
277 break;
278 case MME_TU104_ALU_OP_NAND:
279 res = ~(x & y);
280 break;
281 case MME_TU104_ALU_OP_OR:
282 res = x | y;
283 break;
284 case MME_TU104_ALU_OP_XOR:
285 res = x ^ y;
286 break;
287 case MME_TU104_ALU_OP_MERGE: {
288 uint16_t immed = inst->imm[alu_idx];
289 uint32_t dst_pos = (immed >> 10) & 0x3f;
290 uint32_t bits = (immed >> 5) & 0x1f;
291 uint32_t src_pos = (immed >> 0) & 0x1f;
292 res = (x & ~(BITFIELD_MASK(bits) << dst_pos)) |
293 (((y >> src_pos) & BITFIELD_MASK(bits)) << dst_pos);
294 break;
295 }
296 case MME_TU104_ALU_OP_SLT:
297 case MME_TU104_ALU_OP_SLTU:
298 case MME_TU104_ALU_OP_SLE:
299 case MME_TU104_ALU_OP_SLEU:
300 case MME_TU104_ALU_OP_SEQ:
301 res = eval_cond(inst->alu[alu_idx].op, x, y) ? ~0u : 0u;
302 break;
303 case MME_TU104_ALU_OP_STATE:
304 flush_mthd(sim);
305 res = load_state(sim, (uint16_t)(x + y) * 4);
306 break;
307 case MME_TU104_ALU_OP_LOOP:
308 assert(sim->loop_count == 0);
309 assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
310 assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
311 sim->loop_count = MAX2(1, x) - 1;
312 sim->loop_start = sim->ip;
313 sim->loop_end = sim->ip + inst->imm[alu_idx] - 1;
314 assert(sim->loop_end > sim->ip);
315 break;
316 case MME_TU104_ALU_OP_JAL: {
317 assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
318 assert(inst->alu[alu_idx].src[0] == MME_TU104_REG_ZERO);
319 assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
320 /* No idea what bit 15 does. The NVIDIA blob always sets it. */
321 assert(inst->imm[alu_idx] & BITFIELD_BIT(15));
322 uint16_t offset = (inst->imm[alu_idx] & BITFIELD_MASK(15));
323 sim->next_ip = sim->ip + offset;
324 res = 0;
325 break;
326 }
327 case MME_TU104_ALU_OP_BLT:
328 case MME_TU104_ALU_OP_BLTU:
329 case MME_TU104_ALU_OP_BLE:
330 case MME_TU104_ALU_OP_BLEU:
331 case MME_TU104_ALU_OP_BEQ: {
332 assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
333 bool expect = (inst->imm[alu_idx] & BITFIELD_BIT(15)) != 0;
334 if (eval_cond(inst->alu[alu_idx].op, x, y) == expect) {
335 int16_t offset = util_mask_sign_extend(inst->imm[alu_idx], 13);
336 if ((uint16_t)offset == 0xf000) {
337 sim->stop = true;
338 break;
339 }
340
341 assert((int)sim->ip + offset >= 0);
342 assert((int)sim->ip + offset < 0x1000);
343 sim->next_ip = sim->ip + offset;
344 }
345 break;
346 }
347 case MME_TU104_ALU_OP_DREAD: {
348 assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
349 uint32_t *dram = sim->state_ops->map_dram(sim->state_handler, x);
350 res = *dram;
351 break;
352 }
353 case MME_TU104_ALU_OP_DWRITE: {
354 assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
355 uint32_t *dram = sim->state_ops->map_dram(sim->state_handler, x);
356 *dram = y;
357 break;
358 }
359 default:
360 unreachable("Unhandled ALU op");
361 }
362
363 sim->alu_res[alu_idx] = res;
364 store_reg(sim, inst->alu[alu_idx].dst, res);
365 }
366
367 static uint32_t
load_out(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,enum mme_tu104_out_op op)368 load_out(struct mme_tu104_sim *sim,
369 const struct mme_tu104_inst *inst,
370 enum mme_tu104_out_op op)
371 {
372 switch (op) {
373 case MME_TU104_OUT_OP_ALU0:
374 return sim->alu_res[0];
375 case MME_TU104_OUT_OP_ALU1:
376 return sim->alu_res[1];
377 case MME_TU104_OUT_OP_LOAD0:
378 return sim->load[0];
379 case MME_TU104_OUT_OP_LOAD1:
380 return sim->load[1];
381 case MME_TU104_OUT_OP_IMM0:
382 return inst->imm[0];
383 case MME_TU104_OUT_OP_IMM1:
384 return inst->imm[1];
385 case MME_TU104_OUT_OP_IMMHIGH0:
386 return inst->imm[0] >> 12;
387 case MME_TU104_OUT_OP_IMMHIGH1:
388 return inst->imm[1] >> 12;
389 case MME_TU104_OUT_OP_IMM32:
390 return ((uint32_t)inst->imm[0] << 16) | inst->imm[1];
391 default:
392 unreachable("Unhandled output op");
393 }
394 }
395
396 static void
eval_out(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t out_idx)397 eval_out(struct mme_tu104_sim *sim,
398 const struct mme_tu104_inst *inst,
399 uint32_t out_idx)
400 {
401 if (inst->out[out_idx].mthd != MME_TU104_OUT_OP_NONE) {
402 uint32_t data = load_out(sim, inst, inst->out[out_idx].mthd);
403
404 flush_mthd(sim);
405 sim->mthd.mthd = (data & 0xfff) << 2;
406 sim->mthd.inc = (data >> 12) & 0xf;
407 sim->mthd.has_mthd = true;
408 sim->mthd.data_len = 0;
409 }
410
411 if (inst->out[out_idx].emit != MME_TU104_OUT_OP_NONE) {
412 uint32_t data = load_out(sim, inst, inst->out[out_idx].emit);
413
414 assert(sim->mthd.data_len < ARRAY_SIZE(sim->mthd.data));
415 sim->mthd.data[sim->mthd.data_len++] = data;
416 }
417 }
418
419 void
mme_tu104_sim_core(uint32_t inst_count,const struct mme_tu104_inst * insts,const struct mme_sim_state_ops * state_ops,void * state_handler)420 mme_tu104_sim_core(uint32_t inst_count, const struct mme_tu104_inst *insts,
421 const struct mme_sim_state_ops *state_ops,
422 void *state_handler)
423 {
424 struct mme_tu104_sim sim = {
425 .state_ops = state_ops,
426 .state_handler = state_handler,
427 };
428
429 bool end_next = false;
430 while (true) {
431 assert(sim.ip < inst_count);
432 const struct mme_tu104_inst *inst = &insts[sim.ip];
433 sim.next_ip = sim.ip + 1;
434
435 load_params(&sim, inst);
436
437 uint8_t pred = load_pred(&sim, inst);
438
439 /* No idea why the HW has this rule but it does */
440 assert(inst->alu[0].op != MME_TU104_ALU_OP_STATE ||
441 inst->alu[1].op != MME_TU104_ALU_OP_STATE);
442
443 if (pred & BITFIELD_BIT(0))
444 eval_alu(&sim, inst, 0);
445 if (pred & BITFIELD_BIT(1))
446 eval_alu(&sim, inst, 1);
447 if (pred & BITFIELD_BIT(2))
448 eval_out(&sim, inst, 0);
449 if (pred & BITFIELD_BIT(3))
450 eval_out(&sim, inst, 1);
451
452 if (end_next || sim.stop)
453 break;
454
455 end_next = inst->end_next;
456
457 if (sim.loop_count > 0 && sim.ip == sim.loop_end) {
458 sim.loop_count--;
459 sim.next_ip = sim.loop_start + 1;
460 }
461
462 sim.ip = sim.next_ip;
463 }
464
465 flush_mthd(&sim);
466 }
467
468 struct mme_tu104_state_sim {
469 uint32_t param_count;
470 const uint32_t *params;
471
472 /* Bound memory ranges */
473 uint32_t mem_count;
474 struct mme_tu104_sim_mem *mems;
475
476 /* SET_MME_MEM_ADDRESS_A/B */
477 uint64_t mem_addr_lo;
478 uint64_t mem_addr_hi;
479
480 /* RAM, accessed by DREAD/DWRITE */
481 struct {
482 uint32_t data[MME_TU104_DRAM_COUNT];
483
484 /* SET_MME_MEM_RAM_ADDRESS */
485 uint32_t addr;
486 } ram;
487
488 struct {
489 struct {
490 uint32_t data[1024];
491 uint32_t count;
492 } read_fifo;
493 } dma;
494
495 /* NVC597_SET_MME_SHADOW_SCRATCH(i) */
496 uint32_t scratch[MME_TU104_SCRATCH_COUNT];
497
498 struct {
499 uint32_t addr_hi;
500 uint32_t addr_lo;
501 uint32_t data;
502 } report_sem;
503 };
504
505 static uint32_t *
find_mem(struct mme_tu104_state_sim * sim,uint64_t addr,const char * op_desc)506 find_mem(struct mme_tu104_state_sim *sim, uint64_t addr, const char *op_desc)
507 {
508 for (uint32_t i = 0; i < sim->mem_count; i++) {
509 if (addr < sim->mems[i].addr)
510 continue;
511
512 uint64_t offset = addr - sim->mems[i].addr;
513 if (offset >= sim->mems[i].size)
514 continue;
515
516 assert(sim->mems[i].data != NULL);
517 return (uint32_t *)((char *)sim->mems[i].data + offset);
518 }
519
520 fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
521 abort();
522 }
523
524 static uint32_t
mme_tu104_state_sim_load(void * _sim)525 mme_tu104_state_sim_load(void *_sim)
526 {
527 struct mme_tu104_state_sim *sim = _sim;
528
529 assert(sim->param_count > 0);
530 uint32_t data = *sim->params;
531 sim->params++;
532 sim->param_count--;
533
534 return data;
535 }
536
537 static uint32_t
mme_tu104_state_sim_state(void * _sim,uint16_t addr)538 mme_tu104_state_sim_state(void *_sim, uint16_t addr)
539 {
540 struct mme_tu104_state_sim *sim = _sim;
541 assert(addr % 4 == 0);
542
543 if (NVC597_SET_MME_SHADOW_SCRATCH(0) <= addr &&
544 addr < NVC597_CALL_MME_MACRO(0)) {
545 uint32_t i = (addr - NVC597_SET_MME_SHADOW_SCRATCH(0)) / 4;
546 assert(i <= ARRAY_SIZE(sim->scratch));
547 return sim->scratch[i];
548 }
549
550 return 0;
551 }
552
553 static void
mme_tu104_state_sim_mthd(void * _sim,uint16_t addr,uint32_t data)554 mme_tu104_state_sim_mthd(void *_sim, uint16_t addr, uint32_t data)
555 {
556 struct mme_tu104_state_sim *sim = _sim;
557 assert(addr % 4 == 0);
558
559 switch (addr) {
560 case NVC597_SET_REPORT_SEMAPHORE_A:
561 sim->report_sem.addr_hi = data;
562 break;
563 case NVC597_SET_REPORT_SEMAPHORE_B:
564 sim->report_sem.addr_lo = data;
565 break;
566 case NVC597_SET_REPORT_SEMAPHORE_C:
567 sim->report_sem.data = data;
568 break;
569 case NVC597_SET_REPORT_SEMAPHORE_D: {
570 assert(data == 0x10000000);
571 uint64_t sem_report_addr =
572 ((uint64_t)sim->report_sem.addr_hi << 32) | sim->report_sem.addr_lo;
573 uint32_t *mem = find_mem(sim, sem_report_addr, "SET_REPORT_SEMAPHORE");
574 *mem = sim->report_sem.data;
575 break;
576 }
577 case NVC597_SET_MME_DATA_RAM_ADDRESS:
578 sim->ram.addr = data;
579 break;
580 case NVC597_SET_MME_MEM_ADDRESS_A:
581 sim->mem_addr_hi = data;
582 break;
583 case NVC597_SET_MME_MEM_ADDRESS_B:
584 sim->mem_addr_lo = data;
585 break;
586 case NVC597_MME_DMA_READ_FIFOED:
587 sim->dma.read_fifo.count = data;
588 break;
589 default:
590 if (NVC597_SET_MME_SHADOW_SCRATCH(0) <= addr &&
591 addr < NVC597_CALL_MME_MACRO(0)) {
592 uint32_t i = (addr - NVC597_SET_MME_SHADOW_SCRATCH(0)) / 4;
593 assert(i <= ARRAY_SIZE(sim->scratch));
594 sim->scratch[i] = data;
595 } else {
596 fprintf(stdout, "%s:\n", P_PARSE_NVC597_MTHD(addr));
597 P_DUMP_NVC597_MTHD_DATA(stdout, addr, data, " ");
598 }
599 break;
600 }
601 }
602
603 static void
mme_tu104_state_sim_barrier(void * _sim)604 mme_tu104_state_sim_barrier(void *_sim)
605 {
606 struct mme_tu104_state_sim *sim = _sim;
607
608 if (sim->dma.read_fifo.count == 0)
609 return;
610
611 const uint64_t mem_addr =
612 ((uint64_t)sim->mem_addr_hi << 32) | sim->mem_addr_lo;
613
614 for (uint32_t i = 0; i < sim->dma.read_fifo.count; i++) {
615 uint32_t *src = find_mem(sim, mem_addr + i * 4,
616 "MME_DMA_READ_FIFOED");
617 assert(src != NULL);
618 sim->dma.read_fifo.data[i] = *src;
619 }
620
621 sim->param_count = sim->dma.read_fifo.count;
622 sim->params = sim->dma.read_fifo.data;
623 }
624
625 static uint32_t *
mme_tu104_state_sim_map_dram(void * _sim,uint32_t idx)626 mme_tu104_state_sim_map_dram(void *_sim, uint32_t idx)
627 {
628 struct mme_tu104_state_sim *sim = _sim;
629
630 assert(idx < ARRAY_SIZE(sim->ram.data));
631 return &sim->ram.data[idx];
632 }
633
634 static const struct mme_sim_state_ops mme_tu104_state_sim_ops = {
635 .load = mme_tu104_state_sim_load,
636 .state = mme_tu104_state_sim_state,
637 .mthd = mme_tu104_state_sim_mthd,
638 .barrier = mme_tu104_state_sim_barrier,
639 .map_dram = mme_tu104_state_sim_map_dram,
640 };
641
642 void
mme_tu104_sim(uint32_t inst_count,const struct mme_tu104_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_tu104_sim_mem * mems)643 mme_tu104_sim(uint32_t inst_count, const struct mme_tu104_inst *insts,
644 uint32_t param_count, const uint32_t *params,
645 uint32_t mem_count, struct mme_tu104_sim_mem *mems)
646 {
647 struct mme_tu104_state_sim state_sim = {
648 .param_count = param_count,
649 .params = params,
650 .mem_count = mem_count,
651 .mems = mems,
652 };
653
654 mme_tu104_sim_core(inst_count, insts, &mme_tu104_state_sim_ops, &state_sim);
655 }
656