xref: /aosp_15_r20/external/mesa3d/src/nouveau/mme/mme_tu104_sim.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_tu104_sim.h"
6 
7 #include <inttypes.h>
8 
9 #include "mme_tu104.h"
10 #include "util/u_math.h"
11 
12 #include "nv_push_clc597.h"
13 
14 struct mme_tu104_sim {
15    const struct mme_sim_state_ops *state_ops;
16    void *state_handler;
17 
18    uint32_t load[2];
19 
20    struct {
21       unsigned mthd:16;
22       unsigned inc:4;
23       bool has_mthd:1;
24       unsigned _pad:5;
25       unsigned data_len:8;
26       uint32_t data[8];
27    } mthd;
28 
29    uint32_t set_regs;
30    uint32_t regs[23];
31    uint32_t alu_res[2];
32    uint32_t alu_carry;
33 
34    uint16_t ip;
35    uint16_t next_ip;
36    bool stop;
37 
38    uint32_t loop_count;
39    uint16_t loop_start;
40    uint16_t loop_end;
41 };
42 
43 static bool
inst_loads_reg(const struct mme_tu104_inst * inst,enum mme_tu104_reg reg)44 inst_loads_reg(const struct mme_tu104_inst *inst,
45                enum mme_tu104_reg reg)
46 {
47    return inst->pred == reg ||
48           inst->alu[0].src[0] == reg ||
49           inst->alu[0].src[1] == reg ||
50           inst->alu[1].src[0] == reg ||
51           inst->alu[1].src[1] == reg;
52 }
53 
54 static bool
inst_loads_out(const struct mme_tu104_inst * inst,enum mme_tu104_out_op out)55 inst_loads_out(const struct mme_tu104_inst *inst,
56                enum mme_tu104_out_op out)
57 {
58    return inst->out[0].mthd == out ||
59           inst->out[0].emit == out ||
60           inst->out[1].mthd == out ||
61           inst->out[1].emit == out;
62 }
63 
64 static void
load_params(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst)65 load_params(struct mme_tu104_sim *sim,
66             const struct mme_tu104_inst *inst)
67 {
68    const bool has_load0 = inst_loads_reg(inst, MME_TU104_REG_LOAD0) ||
69                           inst_loads_out(inst, MME_TU104_OUT_OP_LOAD0);
70    const bool has_load1 = inst_loads_reg(inst, MME_TU104_REG_LOAD1) ||
71                           inst_loads_out(inst, MME_TU104_OUT_OP_LOAD1);
72    assert(has_load0 || !has_load1);
73 
74    if (has_load0)
75       sim->load[0] = sim->state_ops->load(sim->state_handler);
76 
77    if (has_load1)
78       sim->load[1] = sim->state_ops->load(sim->state_handler);
79 }
80 
81 static uint32_t
load_state(struct mme_tu104_sim * sim,uint16_t state)82 load_state(struct mme_tu104_sim *sim, uint16_t state)
83 {
84    return sim->state_ops->state(sim->state_handler, state);
85 }
86 
87 static void
flush_mthd(struct mme_tu104_sim * sim)88 flush_mthd(struct mme_tu104_sim *sim)
89 {
90    if (!sim->mthd.has_mthd)
91       return;
92 
93    for (uint32_t i = 0; i < sim->mthd.data_len; i++) {
94       sim->state_ops->mthd(sim->state_handler,
95                            sim->mthd.mthd,
96                            sim->mthd.data[i]);
97       sim->mthd.mthd += sim->mthd.inc * 4;
98    }
99 
100    sim->mthd.has_mthd = false;
101 }
102 
103 static void
eval_extended(struct mme_tu104_sim * sim,uint32_t x,uint32_t y)104 eval_extended(struct mme_tu104_sim *sim,
105               uint32_t x, uint32_t y)
106 {
107    /* The only extended method we know about appears to be some sort of
108     * barrier required when using READ_FIFOED.
109     */
110    assert(x == 0x1000);
111    assert(y == 1);
112    flush_mthd(sim);
113    if (sim->state_ops->barrier)
114       sim->state_ops->barrier(sim->state_handler);
115 }
116 
117 static uint32_t
load_reg(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t imm_idx,enum mme_tu104_reg reg)118 load_reg(struct mme_tu104_sim *sim,
119          const struct mme_tu104_inst *inst,
120          uint32_t imm_idx, enum mme_tu104_reg reg)
121 {
122    if (reg <= MME_TU104_REG_R23) {
123       assert(sim->set_regs & BITFIELD_BIT(reg));
124       return sim->regs[reg];
125    }
126 
127    switch (reg) {
128    case MME_TU104_REG_ZERO:
129       return 0;
130    case MME_TU104_REG_IMM:
131       assert(imm_idx < 2);
132       /* Immediates are treated as signed for ALU ops */
133       return (int16_t)inst->imm[imm_idx];
134    case MME_TU104_REG_IMMPAIR:
135       assert(imm_idx < 2);
136       /* Immediates are treated as signed for ALU ops */
137       return (int16_t)inst->imm[1 - imm_idx];
138    case MME_TU104_REG_IMM32:
139       return ((uint32_t)inst->imm[0] << 16) | inst->imm[1];
140    case MME_TU104_REG_LOAD0:
141       return sim->load[0];
142    case MME_TU104_REG_LOAD1:
143       return sim->load[1];
144    default:
145       unreachable("Unhandled register type");
146    }
147 }
148 
149 static uint8_t
load_pred(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst)150 load_pred(struct mme_tu104_sim *sim,
151           const struct mme_tu104_inst *inst)
152 {
153    if (inst->pred_mode == MME_TU104_PRED_UUUU)
154       return 0xf;
155 
156    uint32_t val = load_reg(sim, inst, -1, inst->pred);
157    const char *pred = mme_tu104_pred_to_str(inst->pred_mode);
158 
159    uint8_t mask = 0;
160    for (unsigned i = 0; i < 4; i++) {
161       if (pred[i] != (val ? 'T' : 'F'))
162          mask |= BITFIELD_BIT(i);
163    }
164 
165    return mask;
166 }
167 
168 static void
store_reg(struct mme_tu104_sim * sim,enum mme_tu104_reg reg,uint32_t val)169 store_reg(struct mme_tu104_sim *sim,
170           enum mme_tu104_reg reg,
171           uint32_t val)
172 {
173    if (reg <= MME_TU104_REG_R23) {
174       sim->set_regs |= BITFIELD_BIT(reg);
175       sim->regs[reg] = val;
176    } else if (reg <= MME_TU104_REG_ZERO) {
177       /* Do nothing */
178    } else {
179       unreachable("Unhandled register type");
180    }
181 }
182 
183 static bool
eval_cond(enum mme_tu104_alu_op op,uint32_t x,uint32_t y)184 eval_cond(enum mme_tu104_alu_op op, uint32_t x, uint32_t y)
185 {
186    switch (op) {
187    case MME_TU104_ALU_OP_BLT:
188    case MME_TU104_ALU_OP_SLT:
189       return (int32_t)x < (int32_t)y;
190    case MME_TU104_ALU_OP_BLTU:
191    case MME_TU104_ALU_OP_SLTU:
192       return (uint32_t)x < (uint32_t)y;
193    case MME_TU104_ALU_OP_BLE:
194    case MME_TU104_ALU_OP_SLE:
195       return (int32_t)x <= (int32_t)y;
196    case MME_TU104_ALU_OP_BLEU:
197    case MME_TU104_ALU_OP_SLEU:
198       return (uint32_t)x <= (uint32_t)y;
199    case MME_TU104_ALU_OP_BEQ:
200    case MME_TU104_ALU_OP_SEQ:
201       return x == y;
202    default:
203       unreachable("Not a comparison op");
204    }
205 }
206 
207 static void
eval_alu(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t alu_idx)208 eval_alu(struct mme_tu104_sim *sim,
209          const struct mme_tu104_inst *inst,
210          uint32_t alu_idx)
211 {
212    const struct mme_tu104_alu *alu = &inst->alu[alu_idx];
213    const uint32_t x = load_reg(sim, inst, alu_idx, alu->src[0]);
214    const uint32_t y = load_reg(sim, inst, alu_idx, alu->src[1]);
215 
216    uint32_t res = 0;
217    switch (inst->alu[alu_idx].op) {
218    case MME_TU104_ALU_OP_ADD:
219       res = x + y;
220       sim->alu_carry = res < x;
221       break;
222    case MME_TU104_ALU_OP_ADDC:
223       assert(alu_idx == 1);
224       assert(inst->alu[0].op == MME_TU104_ALU_OP_ADD);
225       res = x + y + sim->alu_carry;
226       break;
227    case MME_TU104_ALU_OP_SUB:
228       res = x - y;
229       sim->alu_carry = res > x;
230       break;
231    case MME_TU104_ALU_OP_SUBB:
232       assert(alu_idx == 1);
233       assert(inst->alu[0].op == MME_TU104_ALU_OP_SUB);
234       res = x - y - sim->alu_carry;
235       break;
236    case MME_TU104_ALU_OP_MUL: {
237       /* Sign extend but use uint64_t for the multiply so that we avoid
238        * undefined behavior from possible signed multiply roll-over.
239        */
240       const uint64_t x_u64 = (int64_t)(int32_t)x;
241       const uint64_t y_u64 = (int64_t)(int32_t)y;
242       const uint64_t xy_u64 = x_u64 * y_u64;
243       res = xy_u64;
244       sim->alu_carry = xy_u64 >> 32;
245       break;
246    }
247    case MME_TU104_ALU_OP_MULH:
248       assert(inst->alu[alu_idx].src[0] == MME_TU104_REG_ZERO);
249       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
250       res = sim->alu_carry;
251       break;
252    case MME_TU104_ALU_OP_MULU: {
253       const uint64_t x_u64 = x;
254       const uint64_t y_u64 = y;
255       const uint64_t xy_u64 = x_u64 * y_u64;
256       res = xy_u64;
257       sim->alu_carry = xy_u64 >> 32;
258       break;
259    }
260    case MME_TU104_ALU_OP_EXTENDED:
261       eval_extended(sim, x, y);
262       break;
263    case MME_TU104_ALU_OP_CLZ:
264       res = __builtin_clz(x);
265       break;
266    case MME_TU104_ALU_OP_SLL:
267       res = x << (y & 31);
268       break;
269    case MME_TU104_ALU_OP_SRL:
270       res = x >> (y & 31);
271       break;
272    case MME_TU104_ALU_OP_SRA:
273       res = (int32_t)x >> (y & 31);
274       break;
275    case MME_TU104_ALU_OP_AND:
276       res = x & y;
277       break;
278    case MME_TU104_ALU_OP_NAND:
279       res = ~(x & y);
280       break;
281    case MME_TU104_ALU_OP_OR:
282       res = x | y;
283       break;
284    case MME_TU104_ALU_OP_XOR:
285       res = x ^ y;
286       break;
287    case MME_TU104_ALU_OP_MERGE: {
288       uint16_t immed = inst->imm[alu_idx];
289       uint32_t dst_pos  = (immed >> 10) & 0x3f;
290       uint32_t bits     = (immed >> 5)  & 0x1f;
291       uint32_t src_pos  = (immed >> 0)  & 0x1f;
292       res = (x & ~(BITFIELD_MASK(bits) << dst_pos)) |
293             (((y >> src_pos) & BITFIELD_MASK(bits)) << dst_pos);
294       break;
295    }
296    case MME_TU104_ALU_OP_SLT:
297    case MME_TU104_ALU_OP_SLTU:
298    case MME_TU104_ALU_OP_SLE:
299    case MME_TU104_ALU_OP_SLEU:
300    case MME_TU104_ALU_OP_SEQ:
301       res = eval_cond(inst->alu[alu_idx].op, x, y) ? ~0u : 0u;
302       break;
303    case MME_TU104_ALU_OP_STATE:
304       flush_mthd(sim);
305       res = load_state(sim, (uint16_t)(x + y) * 4);
306       break;
307    case MME_TU104_ALU_OP_LOOP:
308       assert(sim->loop_count == 0);
309       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
310       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
311       sim->loop_count = MAX2(1, x) - 1;
312       sim->loop_start = sim->ip;
313       sim->loop_end = sim->ip + inst->imm[alu_idx] - 1;
314       assert(sim->loop_end > sim->ip);
315       break;
316    case MME_TU104_ALU_OP_JAL: {
317       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
318       assert(inst->alu[alu_idx].src[0] == MME_TU104_REG_ZERO);
319       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
320       /* No idea what bit 15 does.  The NVIDIA blob always sets it. */
321       assert(inst->imm[alu_idx] & BITFIELD_BIT(15));
322       uint16_t offset = (inst->imm[alu_idx] & BITFIELD_MASK(15));
323       sim->next_ip = sim->ip + offset;
324       res = 0;
325       break;
326    }
327    case MME_TU104_ALU_OP_BLT:
328    case MME_TU104_ALU_OP_BLTU:
329    case MME_TU104_ALU_OP_BLE:
330    case MME_TU104_ALU_OP_BLEU:
331    case MME_TU104_ALU_OP_BEQ: {
332       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
333       bool expect = (inst->imm[alu_idx] & BITFIELD_BIT(15)) != 0;
334       if (eval_cond(inst->alu[alu_idx].op, x, y) == expect) {
335          int16_t offset = util_mask_sign_extend(inst->imm[alu_idx], 13);
336          if ((uint16_t)offset == 0xf000) {
337             sim->stop = true;
338             break;
339          }
340 
341          assert((int)sim->ip + offset >= 0);
342          assert((int)sim->ip + offset < 0x1000);
343          sim->next_ip = sim->ip + offset;
344       }
345       break;
346    }
347    case MME_TU104_ALU_OP_DREAD: {
348       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
349       uint32_t *dram = sim->state_ops->map_dram(sim->state_handler, x);
350       res = *dram;
351       break;
352    }
353    case MME_TU104_ALU_OP_DWRITE: {
354       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
355       uint32_t *dram = sim->state_ops->map_dram(sim->state_handler, x);
356       *dram = y;
357       break;
358    }
359    default:
360       unreachable("Unhandled ALU op");
361    }
362 
363    sim->alu_res[alu_idx] = res;
364    store_reg(sim, inst->alu[alu_idx].dst, res);
365 }
366 
367 static uint32_t
load_out(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,enum mme_tu104_out_op op)368 load_out(struct mme_tu104_sim *sim,
369          const struct mme_tu104_inst *inst,
370          enum mme_tu104_out_op op)
371 {
372    switch (op) {
373    case MME_TU104_OUT_OP_ALU0:
374       return sim->alu_res[0];
375    case MME_TU104_OUT_OP_ALU1:
376       return sim->alu_res[1];
377    case MME_TU104_OUT_OP_LOAD0:
378       return sim->load[0];
379    case MME_TU104_OUT_OP_LOAD1:
380       return sim->load[1];
381    case MME_TU104_OUT_OP_IMM0:
382       return inst->imm[0];
383    case MME_TU104_OUT_OP_IMM1:
384       return inst->imm[1];
385    case MME_TU104_OUT_OP_IMMHIGH0:
386       return inst->imm[0] >> 12;
387    case MME_TU104_OUT_OP_IMMHIGH1:
388       return inst->imm[1] >> 12;
389    case MME_TU104_OUT_OP_IMM32:
390       return ((uint32_t)inst->imm[0] << 16) | inst->imm[1];
391    default:
392       unreachable("Unhandled output op");
393    }
394 }
395 
396 static void
eval_out(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t out_idx)397 eval_out(struct mme_tu104_sim *sim,
398          const struct mme_tu104_inst *inst,
399          uint32_t out_idx)
400 {
401    if (inst->out[out_idx].mthd != MME_TU104_OUT_OP_NONE) {
402       uint32_t data = load_out(sim, inst, inst->out[out_idx].mthd);
403 
404       flush_mthd(sim);
405       sim->mthd.mthd = (data & 0xfff) << 2;
406       sim->mthd.inc = (data >> 12) & 0xf;
407       sim->mthd.has_mthd = true;
408       sim->mthd.data_len = 0;
409    }
410 
411    if (inst->out[out_idx].emit != MME_TU104_OUT_OP_NONE) {
412       uint32_t data = load_out(sim, inst, inst->out[out_idx].emit);
413 
414       assert(sim->mthd.data_len < ARRAY_SIZE(sim->mthd.data));
415       sim->mthd.data[sim->mthd.data_len++] = data;
416    }
417 }
418 
419 void
mme_tu104_sim_core(uint32_t inst_count,const struct mme_tu104_inst * insts,const struct mme_sim_state_ops * state_ops,void * state_handler)420 mme_tu104_sim_core(uint32_t inst_count, const struct mme_tu104_inst *insts,
421                    const struct mme_sim_state_ops *state_ops,
422                    void *state_handler)
423 {
424    struct mme_tu104_sim sim = {
425       .state_ops = state_ops,
426       .state_handler = state_handler,
427    };
428 
429    bool end_next = false;
430    while (true) {
431       assert(sim.ip < inst_count);
432       const struct mme_tu104_inst *inst = &insts[sim.ip];
433       sim.next_ip = sim.ip + 1;
434 
435       load_params(&sim, inst);
436 
437       uint8_t pred = load_pred(&sim, inst);
438 
439       /* No idea why the HW has this rule but it does */
440       assert(inst->alu[0].op != MME_TU104_ALU_OP_STATE ||
441              inst->alu[1].op != MME_TU104_ALU_OP_STATE);
442 
443       if (pred & BITFIELD_BIT(0))
444          eval_alu(&sim, inst, 0);
445       if (pred & BITFIELD_BIT(1))
446          eval_alu(&sim, inst, 1);
447       if (pred & BITFIELD_BIT(2))
448          eval_out(&sim, inst, 0);
449       if (pred & BITFIELD_BIT(3))
450          eval_out(&sim, inst, 1);
451 
452       if (end_next || sim.stop)
453          break;
454 
455       end_next = inst->end_next;
456 
457       if (sim.loop_count > 0 && sim.ip == sim.loop_end) {
458          sim.loop_count--;
459          sim.next_ip = sim.loop_start + 1;
460       }
461 
462       sim.ip = sim.next_ip;
463    }
464 
465    flush_mthd(&sim);
466 }
467 
468 struct mme_tu104_state_sim {
469    uint32_t param_count;
470    const uint32_t *params;
471 
472    /* Bound memory ranges */
473    uint32_t mem_count;
474    struct mme_tu104_sim_mem *mems;
475 
476    /* SET_MME_MEM_ADDRESS_A/B */
477    uint64_t mem_addr_lo;
478    uint64_t mem_addr_hi;
479 
480    /* RAM, accessed by DREAD/DWRITE */
481    struct {
482       uint32_t data[MME_TU104_DRAM_COUNT];
483 
484       /* SET_MME_MEM_RAM_ADDRESS */
485       uint32_t addr;
486    } ram;
487 
488    struct {
489       struct {
490          uint32_t data[1024];
491          uint32_t count;
492       } read_fifo;
493    } dma;
494 
495    /* NVC597_SET_MME_SHADOW_SCRATCH(i) */
496    uint32_t scratch[MME_TU104_SCRATCH_COUNT];
497 
498    struct {
499       uint32_t addr_hi;
500       uint32_t addr_lo;
501       uint32_t data;
502    } report_sem;
503 };
504 
505 static uint32_t *
find_mem(struct mme_tu104_state_sim * sim,uint64_t addr,const char * op_desc)506 find_mem(struct mme_tu104_state_sim *sim, uint64_t addr, const char *op_desc)
507 {
508    for (uint32_t i = 0; i < sim->mem_count; i++) {
509       if (addr < sim->mems[i].addr)
510          continue;
511 
512       uint64_t offset = addr - sim->mems[i].addr;
513       if (offset >= sim->mems[i].size)
514          continue;
515 
516       assert(sim->mems[i].data != NULL);
517       return (uint32_t *)((char *)sim->mems[i].data + offset);
518    }
519 
520    fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
521    abort();
522 }
523 
524 static uint32_t
mme_tu104_state_sim_load(void * _sim)525 mme_tu104_state_sim_load(void *_sim)
526 {
527    struct mme_tu104_state_sim *sim = _sim;
528 
529    assert(sim->param_count > 0);
530    uint32_t data = *sim->params;
531    sim->params++;
532    sim->param_count--;
533 
534    return data;
535 }
536 
537 static uint32_t
mme_tu104_state_sim_state(void * _sim,uint16_t addr)538 mme_tu104_state_sim_state(void *_sim, uint16_t addr)
539 {
540    struct mme_tu104_state_sim *sim = _sim;
541    assert(addr % 4 == 0);
542 
543    if (NVC597_SET_MME_SHADOW_SCRATCH(0) <= addr &&
544        addr < NVC597_CALL_MME_MACRO(0)) {
545       uint32_t i = (addr - NVC597_SET_MME_SHADOW_SCRATCH(0)) / 4;
546       assert(i <= ARRAY_SIZE(sim->scratch));
547       return sim->scratch[i];
548    }
549 
550    return 0;
551 }
552 
553 static void
mme_tu104_state_sim_mthd(void * _sim,uint16_t addr,uint32_t data)554 mme_tu104_state_sim_mthd(void *_sim, uint16_t addr, uint32_t data)
555 {
556    struct mme_tu104_state_sim *sim = _sim;
557    assert(addr % 4 == 0);
558 
559    switch (addr) {
560    case NVC597_SET_REPORT_SEMAPHORE_A:
561       sim->report_sem.addr_hi = data;
562       break;
563    case NVC597_SET_REPORT_SEMAPHORE_B:
564       sim->report_sem.addr_lo = data;
565       break;
566    case NVC597_SET_REPORT_SEMAPHORE_C:
567       sim->report_sem.data = data;
568       break;
569    case NVC597_SET_REPORT_SEMAPHORE_D: {
570       assert(data == 0x10000000);
571       uint64_t sem_report_addr =
572          ((uint64_t)sim->report_sem.addr_hi << 32) | sim->report_sem.addr_lo;
573       uint32_t *mem = find_mem(sim, sem_report_addr, "SET_REPORT_SEMAPHORE");
574       *mem = sim->report_sem.data;
575       break;
576    }
577    case NVC597_SET_MME_DATA_RAM_ADDRESS:
578       sim->ram.addr = data;
579       break;
580    case NVC597_SET_MME_MEM_ADDRESS_A:
581       sim->mem_addr_hi = data;
582       break;
583    case NVC597_SET_MME_MEM_ADDRESS_B:
584       sim->mem_addr_lo = data;
585       break;
586    case NVC597_MME_DMA_READ_FIFOED:
587       sim->dma.read_fifo.count = data;
588       break;
589    default:
590       if (NVC597_SET_MME_SHADOW_SCRATCH(0) <= addr &&
591           addr < NVC597_CALL_MME_MACRO(0)) {
592          uint32_t i = (addr - NVC597_SET_MME_SHADOW_SCRATCH(0)) / 4;
593          assert(i <= ARRAY_SIZE(sim->scratch));
594          sim->scratch[i] = data;
595       } else {
596          fprintf(stdout, "%s:\n", P_PARSE_NVC597_MTHD(addr));
597          P_DUMP_NVC597_MTHD_DATA(stdout, addr, data, "    ");
598       }
599       break;
600    }
601 }
602 
603 static void
mme_tu104_state_sim_barrier(void * _sim)604 mme_tu104_state_sim_barrier(void *_sim)
605 {
606    struct mme_tu104_state_sim *sim = _sim;
607 
608    if (sim->dma.read_fifo.count == 0)
609       return;
610 
611    const uint64_t mem_addr =
612       ((uint64_t)sim->mem_addr_hi << 32) | sim->mem_addr_lo;
613 
614    for (uint32_t i = 0; i < sim->dma.read_fifo.count; i++) {
615       uint32_t *src = find_mem(sim, mem_addr + i * 4,
616                                "MME_DMA_READ_FIFOED");
617       assert(src != NULL);
618       sim->dma.read_fifo.data[i] = *src;
619    }
620 
621    sim->param_count = sim->dma.read_fifo.count;
622    sim->params = sim->dma.read_fifo.data;
623 }
624 
625 static uint32_t *
mme_tu104_state_sim_map_dram(void * _sim,uint32_t idx)626 mme_tu104_state_sim_map_dram(void *_sim, uint32_t idx)
627 {
628    struct mme_tu104_state_sim *sim = _sim;
629 
630    assert(idx < ARRAY_SIZE(sim->ram.data));
631    return &sim->ram.data[idx];
632 }
633 
634 static const struct mme_sim_state_ops mme_tu104_state_sim_ops = {
635    .load = mme_tu104_state_sim_load,
636    .state = mme_tu104_state_sim_state,
637    .mthd = mme_tu104_state_sim_mthd,
638    .barrier = mme_tu104_state_sim_barrier,
639    .map_dram = mme_tu104_state_sim_map_dram,
640 };
641 
642 void
mme_tu104_sim(uint32_t inst_count,const struct mme_tu104_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_tu104_sim_mem * mems)643 mme_tu104_sim(uint32_t inst_count, const struct mme_tu104_inst *insts,
644               uint32_t param_count, const uint32_t *params,
645               uint32_t mem_count, struct mme_tu104_sim_mem *mems)
646 {
647    struct mme_tu104_state_sim state_sim = {
648       .param_count = param_count,
649       .params = params,
650       .mem_count = mem_count,
651       .mems = mems,
652    };
653 
654    mme_tu104_sim_core(inst_count, insts, &mme_tu104_state_sim_ops, &state_sim);
655 }
656