1 /*
2 * Copyright © 2022 Mary Guillemard
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_runner.h"
6
7 #include "mme_fermi_sim.h"
8 /* for VOLTA_A */
9 #include "nv_push_clc397.h"
10
11 class mme_fermi_sim_test : public ::testing::Test, public mme_hw_runner {
12 public:
13 mme_fermi_sim_test();
14 ~mme_fermi_sim_test();
15
16 void SetUp();
17 void test_macro(const mme_builder *b,
18 const std::vector<uint32_t>& macro,
19 const std::vector<uint32_t>& params);
20 };
21
mme_fermi_sim_test()22 mme_fermi_sim_test::mme_fermi_sim_test() :
23 ::testing::Test(),
24 mme_hw_runner()
25 { }
26
~mme_fermi_sim_test()27 mme_fermi_sim_test::~mme_fermi_sim_test()
28 { }
29
30 void
SetUp()31 mme_fermi_sim_test::SetUp()
32 {
33 ASSERT_TRUE(set_up_hw(FERMI_A, VOLTA_A));
34 }
35
36 void
test_macro(const mme_builder * b,const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)37 mme_fermi_sim_test::test_macro(const mme_builder *b,
38 const std::vector<uint32_t>& macro,
39 const std::vector<uint32_t>& params)
40 {
41 const uint32_t data_dwords = DATA_BO_SIZE / sizeof(uint32_t);
42
43 std::vector<mme_fermi_inst> insts(macro.size());
44 mme_fermi_decode(&insts[0], ¯o[0], macro.size());
45
46 /* First, make a copy of the data and simulate the macro */
47 std::vector<uint32_t> sim_data(data, data + (DATA_BO_SIZE / 4));
48 mme_fermi_sim_mem sim_mem = {
49 .addr = data_addr,
50 .data = &sim_data[0],
51 .size = DATA_BO_SIZE,
52 };
53 mme_fermi_sim(insts.size(), &insts[0],
54 params.size(), params.size() ? ¶ms[0] : NULL,
55 1, &sim_mem);
56
57 run_macro(macro, params);
58
59 /* Check the results */
60 for (uint32_t i = 0; i < data_dwords; i++)
61 ASSERT_EQ(data[i], sim_data[i]);
62 }
63
64 static mme_fermi_reg
mme_fermi_value_as_reg(mme_value val)65 mme_fermi_value_as_reg(mme_value val)
66 {
67 assert(val.type == MME_VALUE_TYPE_REG);
68 return (mme_fermi_reg)(MME_FERMI_REG_ZERO + val.reg);
69 }
70
TEST_F(mme_fermi_sim_test,sanity)71 TEST_F(mme_fermi_sim_test, sanity)
72 {
73 const uint32_t canary = 0xc0ffee01;
74
75 mme_builder b;
76 mme_builder_init(&b, devinfo);
77
78 mme_store_imm_addr(&b, data_addr, mme_imm(canary), false);
79
80 auto macro = mme_builder_finish_vec(&b);
81
82 std::vector<uint32_t> params;
83 test_macro(&b, macro, params);
84 }
85
TEST_F(mme_fermi_sim_test,add)86 TEST_F(mme_fermi_sim_test, add)
87 {
88 mme_builder b;
89 mme_builder_init(&b, devinfo);
90
91 mme_value x = mme_load(&b);
92 mme_value y = mme_load(&b);
93 mme_value sum = mme_add(&b, x, y);
94 mme_store_imm_addr(&b, data_addr, sum, true);
95
96 auto macro = mme_builder_finish_vec(&b);
97
98 std::vector<uint32_t> params;
99 params.push_back(25);
100 params.push_back(138);
101
102 test_macro(&b, macro, params);
103 }
104
TEST_F(mme_fermi_sim_test,add_imm)105 TEST_F(mme_fermi_sim_test, add_imm)
106 {
107 mme_builder b;
108 mme_builder_init(&b, devinfo);
109
110 mme_value x = mme_load(&b);
111
112 mme_value v0 = mme_add(&b, x, mme_imm(0x00000001));
113 mme_store_imm_addr(&b, data_addr + 0, v0, true);
114
115 mme_value v1 = mme_add(&b, x, mme_imm(0xffffffff));
116 mme_store_imm_addr(&b, data_addr + 4, v1, true);
117
118 mme_value v2 = mme_add(&b, x, mme_imm(0xffff8000));
119 mme_store_imm_addr(&b, data_addr + 8, v2, true);
120
121 mme_value v3 = mme_add(&b, mme_imm(0x00000001), x);
122 mme_store_imm_addr(&b, data_addr + 12, v3, true);
123
124 mme_value v4 = mme_add(&b, mme_imm(0xffffffff), x);
125 mme_store_imm_addr(&b, data_addr + 16, v4, true);
126
127 mme_value v5 = mme_add(&b, mme_imm(0xffff8000), x);
128 mme_store_imm_addr(&b, data_addr + 20, v5, true);
129
130 mme_value v6 = mme_add(&b, mme_zero(), mme_imm(0x00000001));
131 mme_store_imm_addr(&b, data_addr + 24, v6, true);
132
133 mme_value v7 = mme_add(&b, mme_zero(), mme_imm(0xffffffff));
134 mme_store_imm_addr(&b, data_addr + 28, v7, true);
135
136 mme_value v8 = mme_add(&b, mme_zero(), mme_imm(0xffff8000));
137 mme_store_imm_addr(&b, data_addr + 32, v8, true);
138
139 auto macro = mme_builder_finish_vec(&b);
140
141 uint32_t vals[] = {
142 0x0000ffff,
143 0x00008000,
144 0x0001ffff,
145 0xffffffff,
146 };
147
148 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
149 reset_push();
150
151 std::vector<uint32_t> params;
152 params.push_back(vals[i]);
153
154 test_macro(&b, macro, params);
155 }
156 }
157
TEST_F(mme_fermi_sim_test,add_imm_no_carry)158 TEST_F(mme_fermi_sim_test, add_imm_no_carry)
159 {
160 mme_builder b;
161 mme_builder_init(&b, devinfo);
162
163 mme_value x_lo = mme_load(&b);
164 mme_value x_hi = mme_load(&b);
165
166 mme_value v1_lo = mme_alloc_reg(&b);
167 mme_value v1_hi = mme_alloc_reg(&b);
168 mme_fermi_asm(&b, i) {
169 i.op = MME_FERMI_OP_ADD_IMM;
170 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
171 i.dst = mme_fermi_value_as_reg(v1_lo);
172 i.src[0] = mme_fermi_value_as_reg(x_lo);
173 i.imm = 0x0001;
174 }
175
176 mme_fermi_asm(&b, i) {
177 i.op = MME_FERMI_OP_ADD_IMM;
178 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
179 i.dst = mme_fermi_value_as_reg(v1_hi);
180 i.src[0] = mme_fermi_value_as_reg(x_hi);
181 i.imm = 0x0000;
182 }
183 mme_store_imm_addr(&b, data_addr + 0, v1_lo, true);
184 mme_store_imm_addr(&b, data_addr + 4, v1_hi, true);
185
186 mme_value v2_lo = mme_alloc_reg(&b);
187 mme_value v2_hi = mme_alloc_reg(&b);
188 mme_fermi_asm(&b, i) {
189 i.op = MME_FERMI_OP_ADD_IMM;
190 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
191 i.dst = mme_fermi_value_as_reg(v2_lo);
192 i.src[0] = mme_fermi_value_as_reg(x_lo);
193 i.imm = 0x0000;
194 }
195
196 mme_fermi_asm(&b, i) {
197 i.op = MME_FERMI_OP_ADD_IMM;
198 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
199 i.dst = mme_fermi_value_as_reg(v2_hi);
200 i.src[0] = mme_fermi_value_as_reg(x_hi);
201 i.imm = 0x0001;
202 }
203 mme_store_imm_addr(&b, data_addr + 8, v2_lo, true);
204 mme_store_imm_addr(&b, data_addr + 12, v2_hi, true);
205
206 mme_value v3_lo = mme_alloc_reg(&b);
207 mme_value v3_hi = mme_alloc_reg(&b);
208 mme_fermi_asm(&b, i) {
209 i.op = MME_FERMI_OP_ADD_IMM;
210 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
211 i.dst = mme_fermi_value_as_reg(v2_lo);
212 i.src[0] = mme_fermi_value_as_reg(x_lo);
213 i.imm = 0x0000;
214 }
215
216 mme_fermi_asm(&b, i) {
217 i.op = MME_FERMI_OP_ADD_IMM;
218 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
219 i.dst = mme_fermi_value_as_reg(v2_hi);
220 i.src[0] = mme_fermi_value_as_reg(x_hi);
221 i.imm = 0xffff;
222 }
223 mme_store_imm_addr(&b, data_addr + 16, v3_lo, true);
224 mme_store_imm_addr(&b, data_addr + 20, v3_hi, true);
225
226 mme_value v4_lo = mme_alloc_reg(&b);
227 mme_value v4_hi = mme_alloc_reg(&b);
228 mme_fermi_asm(&b, i) {
229 i.op = MME_FERMI_OP_ADD_IMM;
230 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
231 i.dst = mme_fermi_value_as_reg(v2_lo);
232 i.src[0] = mme_fermi_value_as_reg(x_lo);
233 i.imm = 0x0000;
234 }
235
236 mme_fermi_asm(&b, i) {
237 i.op = MME_FERMI_OP_ADD_IMM;
238 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
239 i.dst = mme_fermi_value_as_reg(v2_hi);
240 i.src[0] = mme_fermi_value_as_reg(x_hi);
241 i.imm = 0x8000;
242 }
243 mme_store_imm_addr(&b, data_addr + 24, v4_lo, true);
244 mme_store_imm_addr(&b, data_addr + 28, v4_hi, true);
245
246 auto macro = mme_builder_finish_vec(&b);
247
248 uint64_t vals[] = {
249 0x0000ffffffffffffull,
250 0x0000ffffffff8000ull,
251 0x0000ffff00000000ull,
252 0x0000800000000000ull,
253 0x00008000ffffffffull,
254 0x0001ffff00000000ull,
255 0xffffffff00000000ull,
256 0xffffffffffffffffull,
257 };
258
259 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
260 reset_push();
261
262 std::vector<uint32_t> params;
263 params.push_back(low32(vals[i]));
264 params.push_back(high32(vals[i]));
265
266 test_macro(&b, macro, params);
267 }
268 }
269
TEST_F(mme_fermi_sim_test,addc)270 TEST_F(mme_fermi_sim_test, addc)
271 {
272 mme_builder b;
273 mme_builder_init(&b, devinfo);
274
275 struct mme_value64 x = { mme_load(&b), mme_load(&b) };
276 struct mme_value64 y = { mme_load(&b), mme_load(&b) };
277
278 struct mme_value64 sum = mme_add64(&b, x, y);
279
280 mme_store_imm_addr(&b, data_addr + 0, sum.lo, true);
281 mme_store_imm_addr(&b, data_addr + 4, sum.hi, true);
282
283 auto macro = mme_builder_finish_vec(&b);
284
285 std::vector<uint32_t> params;
286 params.push_back(0x80008650);
287 params.push_back(0x596);
288 params.push_back(0x8000a8f6);
289 params.push_back(0x836);
290
291 test_macro(&b, macro, params);
292 }
293
TEST_F(mme_fermi_sim_test,add_imm_carry)294 TEST_F(mme_fermi_sim_test, add_imm_carry)
295 {
296 mme_builder b;
297 mme_builder_init(&b, devinfo);
298
299 mme_value max = mme_load(&b);
300
301 mme_value add_res = mme_alloc_reg(&b);
302 mme_value add_imm_res = mme_alloc_reg(&b);
303 mme_value carry = mme_alloc_reg(&b);
304
305 /* Dummy add clears the carry register */
306 mme_fermi_asm(&b, i) {
307 i.op = MME_FERMI_OP_ALU_REG;
308 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
309 i.dst = mme_fermi_value_as_reg(add_res);
310 i.src[0] = MME_FERMI_REG_ZERO;
311 i.src[1] = MME_FERMI_REG_ZERO;
312 i.alu_op = MME_FERMI_ALU_OP_ADD;
313 }
314
315 /* ADD_IMM should not touch carry */
316 mme_fermi_asm(&b, i) {
317 i.op = MME_FERMI_OP_ADD_IMM;
318 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
319 i.dst = mme_fermi_value_as_reg(add_imm_res);
320 i.src[0] = mme_fermi_value_as_reg(max);
321 i.imm = 1;
322 }
323
324 /* Grab the carry */
325 mme_fermi_asm(&b, i) {
326 i.op = MME_FERMI_OP_ALU_REG;
327 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
328 i.dst = mme_fermi_value_as_reg(carry);
329 i.src[0] = MME_FERMI_REG_ZERO;
330 i.src[1] = MME_FERMI_REG_ZERO;
331 i.alu_op = MME_FERMI_ALU_OP_ADDC;
332 }
333
334 /* Store everything after all that ALU so none of the stores mess up the
335 * carry behind our back.
336 */
337 mme_store_imm_addr(&b, data_addr + 0, add_res, false);
338 mme_store_imm_addr(&b, data_addr + 4, add_imm_res, false);
339 mme_store_imm_addr(&b, data_addr + 8, carry, false);
340
341 /* Set carry to 1 */
342 mme_fermi_asm(&b, i) {
343 i.op = MME_FERMI_OP_ALU_REG;
344 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
345 i.dst = mme_fermi_value_as_reg(add_res);
346 i.src[0] = mme_fermi_value_as_reg(max);
347 i.src[1] = mme_fermi_value_as_reg(max);
348 i.alu_op = MME_FERMI_ALU_OP_ADD;
349 }
350
351 /* ADD_IMM should not touch carry */
352 mme_fermi_asm(&b, i) {
353 i.op = MME_FERMI_OP_ADD_IMM;
354 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
355 i.dst = mme_fermi_value_as_reg(add_imm_res);
356 i.src[0] = MME_FERMI_REG_ZERO;
357 i.imm = 1;
358 }
359
360 /* Grab the carry */
361 mme_fermi_asm(&b, i) {
362 i.op = MME_FERMI_OP_ALU_REG;
363 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
364 i.dst = mme_fermi_value_as_reg(carry);
365 i.src[0] = MME_FERMI_REG_ZERO;
366 i.src[1] = MME_FERMI_REG_ZERO;
367 i.alu_op = MME_FERMI_ALU_OP_ADDC;
368 }
369
370 mme_store_imm_addr(&b, data_addr + 12, add_res, true);
371 mme_store_imm_addr(&b, data_addr + 16, add_imm_res, true);
372 mme_store_imm_addr(&b, data_addr + 20, carry, true);
373
374 auto macro = mme_builder_finish_vec(&b);
375
376 std::vector<uint32_t> params;
377 params.push_back(UINT32_MAX);
378
379 test_macro(&b, macro, params);
380 }
381
TEST_F(mme_fermi_sim_test,sub)382 TEST_F(mme_fermi_sim_test, sub)
383 {
384 mme_builder b;
385 mme_builder_init(&b, devinfo);
386
387 mme_value x = mme_load(&b);
388 mme_value y = mme_load(&b);
389 mme_value diff = mme_sub(&b, x, y);
390 mme_store_imm_addr(&b, data_addr, diff, true);
391
392 auto macro = mme_builder_finish_vec(&b);
393
394 std::vector<uint32_t> params;
395 params.push_back(25);
396 params.push_back(138);
397
398 test_macro(&b, macro, params);
399 }
400
TEST_F(mme_fermi_sim_test,subb)401 TEST_F(mme_fermi_sim_test, subb)
402 {
403 mme_builder b;
404 mme_builder_init(&b, devinfo);
405
406 struct mme_value64 x = { mme_load(&b), mme_load(&b) };
407 struct mme_value64 y = { mme_load(&b), mme_load(&b) };
408
409 struct mme_value64 sum = mme_sub64(&b, x, y);
410
411 mme_store_imm_addr(&b, data_addr + 0, sum.lo, true);
412 mme_store_imm_addr(&b, data_addr + 4, sum.hi, true);
413
414 auto macro = mme_builder_finish_vec(&b);
415
416 std::vector<uint32_t> params;
417 params.push_back(0x80008650);
418 params.push_back(0x596);
419 params.push_back(0x8000a8f6);
420 params.push_back(0x836);
421
422 test_macro(&b, macro, params);
423 }
424
425 #define SHIFT_TEST(op) \
426 TEST_F(mme_fermi_sim_test, op) \
427 { \
428 mme_builder b; \
429 mme_builder_init(&b, devinfo); \
430 \
431 mme_value val = mme_load(&b); \
432 mme_value shift1 = mme_load(&b); \
433 mme_value shift2 = mme_load(&b); \
434 mme_store_imm_addr(&b, data_addr + 0, mme_##op(&b, val, shift1), true); \
435 mme_store_imm_addr(&b, data_addr + 4, mme_##op(&b, val, shift2), true); \
436 \
437 auto macro = mme_builder_finish_vec(&b); \
438 \
439 std::vector<uint32_t> params; \
440 params.push_back(0x0c406fe0); \
441 params.push_back(5); \
442 params.push_back(51); \
443 \
444 test_macro(&b, macro, params); \
445 }
446
447 SHIFT_TEST(sll)
SHIFT_TEST(srl)448 SHIFT_TEST(srl)
449
450 #undef SHIFT_TEST
451
452 TEST_F(mme_fermi_sim_test, bfe)
453 {
454 const uint32_t canary = 0xc0ffee01;
455
456 mme_builder b;
457 mme_builder_init(&b, devinfo);
458
459 mme_value val = mme_load(&b);
460 mme_value pos = mme_load(&b);
461
462 mme_store_imm_addr(&b, data_addr + 0, mme_bfe(&b, val, pos, 1), true);
463 mme_store_imm_addr(&b, data_addr + 4, mme_bfe(&b, val, pos, 2), true);
464 mme_store_imm_addr(&b, data_addr + 8, mme_bfe(&b, val, pos, 5), true);
465
466 auto macro = mme_builder_finish_vec(&b);
467
468 for (unsigned i = 0; i < 31; i++) {
469 std::vector<uint32_t> params;
470 params.push_back(canary);
471 params.push_back(i);
472
473 test_macro(&b, macro, params);
474
475 ASSERT_EQ(data[0], (canary >> i) & 0x1);
476 ASSERT_EQ(data[1], (canary >> i) & 0x3);
477 ASSERT_EQ(data[2], (canary >> i) & 0x1f);
478 }
479 }
480
TEST_F(mme_fermi_sim_test,not)481 TEST_F(mme_fermi_sim_test, not)
482 {
483 mme_builder b;
484 mme_builder_init(&b, devinfo);
485
486 mme_value x = mme_load(&b);
487 mme_value v1 = mme_not(&b, x);
488 mme_store_imm_addr(&b, data_addr + 0, v1, true);
489
490 auto macro = mme_builder_finish_vec(&b);
491
492 std::vector<uint32_t> params;
493 params.push_back(0x0c406fe0);
494
495 test_macro(&b, macro, params);
496 }
497
498 #define BITOP_TEST(op) \
499 TEST_F(mme_fermi_sim_test, op) \
500 { \
501 mme_builder b; \
502 mme_builder_init(&b, devinfo); \
503 \
504 mme_value x = mme_load(&b); \
505 mme_value y = mme_load(&b); \
506 mme_value v1 = mme_##op(&b, x, y); \
507 mme_value v2 = mme_##op(&b, x, mme_imm(0xffff8000)); \
508 mme_value v3 = mme_##op(&b, x, mme_imm(0xffffffff)); \
509 mme_store_imm_addr(&b, data_addr + 0, v1, true); \
510 mme_store_imm_addr(&b, data_addr + 4, v2, true); \
511 mme_store_imm_addr(&b, data_addr + 8, v3, true); \
512 \
513 auto macro = mme_builder_finish_vec(&b); \
514 \
515 std::vector<uint32_t> params; \
516 params.push_back(0x0c406fe0); \
517 params.push_back(0x00fff0c0); \
518 \
519 test_macro(&b, macro, params); \
520 }
521
522 BITOP_TEST(and)
BITOP_TEST(and_not)523 BITOP_TEST(and_not)
524 BITOP_TEST(nand)
525 BITOP_TEST(or)
526 BITOP_TEST(xor)
527
528 #undef BITOP_TEST
529
530 static bool c_ine(int32_t x, int32_t y) { return x != y; };
c_ieq(int32_t x,int32_t y)531 static bool c_ieq(int32_t x, int32_t y) { return x == y; };
532
533 #define IF_TEST(op) \
534 TEST_F(mme_fermi_sim_test, if_##op) \
535 { \
536 mme_builder b; \
537 mme_builder_init(&b, devinfo); \
538 \
539 mme_value x = mme_load(&b); \
540 mme_value y = mme_load(&b); \
541 mme_value i = mme_mov(&b, mme_zero()); \
542 \
543 mme_start_if_##op(&b, x, y); \
544 { \
545 mme_add_to(&b, i, i, mme_imm(1)); \
546 mme_add_to(&b, i, i, mme_imm(1)); \
547 } \
548 mme_end_if(&b); \
549 mme_add_to(&b, i, i, mme_imm(1)); \
550 mme_add_to(&b, i, i, mme_imm(1)); \
551 mme_add_to(&b, i, i, mme_imm(1)); \
552 \
553 mme_store_imm_addr(&b, data_addr + 0, i, true); \
554 \
555 auto macro = mme_builder_finish_vec(&b); \
556 \
557 uint32_t vals[] = {23, 56, (uint32_t)-5, (uint32_t)-10, 56, 14}; \
558 \
559 for (uint32_t i = 0; i < ARRAY_SIZE(vals) - 1; i++) { \
560 reset_push(); \
561 \
562 std::vector<uint32_t> params; \
563 params.push_back(vals[i + 0]); \
564 params.push_back(vals[i + 1]); \
565 \
566 test_macro(&b, macro, params); \
567 \
568 ASSERT_EQ(data[0], c_##op(params[0], params[1]) ? 5 : 3); \
569 } \
570 }
571
572 IF_TEST(ieq)
IF_TEST(ine)573 IF_TEST(ine)
574
575 #undef IF_TEST
576
577 static inline void
578 mme_fermi_inc_whole_inst(mme_builder *b, mme_value val)
579 {
580 mme_fermi_asm(b, i) {
581 i.op = MME_FERMI_OP_ADD_IMM;
582 i.assign_op = MME_FERMI_ASSIGN_OP_MOVE;
583 i.dst = mme_fermi_value_as_reg(val);
584 i.src[0] = mme_fermi_value_as_reg(val);
585 i.imm = 1;
586 }
587 }
588
589 #define WHILE_TEST(op, start, step, bound) \
590 TEST_F(mme_fermi_sim_test, while_##op) \
591 { \
592 mme_builder b; \
593 mme_builder_init(&b, devinfo); \
594 \
595 mme_value x = mme_mov(&b, mme_zero()); \
596 mme_value y = mme_mov(&b, mme_zero()); \
597 mme_value z = mme_mov(&b, mme_imm(start)); \
598 mme_value w = mme_mov(&b, mme_zero()); \
599 mme_value v = mme_mov(&b, mme_zero()); \
600 \
601 for (uint32_t j = 0; j < 5; j++) \
602 mme_fermi_inc_whole_inst(&b, x); \
603 mme_store_imm_addr(&b, data_addr + 0, x, true); \
604 \
605 mme_while(&b, op, z, mme_imm(bound)) { \
606 for (uint32_t j = 0; j < 5; j++) \
607 mme_fermi_inc_whole_inst(&b, y); \
608 \
609 mme_add_to(&b, z, z, mme_imm(step)); \
610 \
611 for (uint32_t j = 0; j < 5; j++) \
612 mme_fermi_inc_whole_inst(&b, w); \
613 } \
614 mme_store_imm_addr(&b, data_addr + 4, y, true); \
615 mme_store_imm_addr(&b, data_addr + 8, z, true); \
616 mme_store_imm_addr(&b, data_addr + 12, w, true); \
617 \
618 for (uint32_t j = 0; j < 5; j++) \
619 mme_fermi_inc_whole_inst(&b, v); \
620 \
621 mme_store_imm_addr(&b, data_addr + 16, v, true); \
622 \
623 auto macro = mme_builder_finish_vec(&b); \
624 \
625 uint32_t end = (uint32_t)(start), count = 0; \
626 while (c_##op(end, (bound))) { \
627 end += (uint32_t)(step); \
628 count++; \
629 } \
630 \
631 std::vector<uint32_t> params; \
632 test_macro(&b, macro, params); \
633 ASSERT_EQ(data[0], 5); \
634 ASSERT_EQ(data[1], 5 * count); \
635 ASSERT_EQ(data[2], end); \
636 ASSERT_EQ(data[3], 5 * count); \
637 ASSERT_EQ(data[4], 5); \
638 }
639
640 WHILE_TEST(ieq, 0, 5, 0)
641 WHILE_TEST(ine, 0, 1, 7)
642
643 #undef WHILE_TWST
644
645
TEST_F(mme_fermi_sim_test,loop)646 TEST_F(mme_fermi_sim_test, loop)
647 {
648 mme_builder b;
649 mme_builder_init(&b, devinfo);
650
651 mme_value count = mme_load(&b);
652
653 mme_value x = mme_mov(&b, mme_zero());
654 mme_value y = mme_mov(&b, mme_zero());
655
656 mme_loop(&b, count) {
657 mme_fermi_asm(&b, i) { } /* noop */
658 mme_add_to(&b, x, x, count);
659 }
660 mme_add_to(&b, y, y, mme_imm(1));
661 mme_fermi_asm(&b, i) { } /* noop */
662 mme_fermi_asm(&b, i) { } /* noop */
663 mme_fermi_asm(&b, i) { } /* noop */
664
665 mme_store_imm_addr(&b, data_addr + 0, count, true);
666 mme_store_imm_addr(&b, data_addr + 4, x, true);
667 mme_store_imm_addr(&b, data_addr + 8, y, true);
668
669 auto macro = mme_builder_finish_vec(&b);
670
671 uint32_t counts[] = {0, 1, 5, 9};
672
673 for (uint32_t i = 0; i < ARRAY_SIZE(counts); i++) {
674 reset_push();
675
676 std::vector<uint32_t> params;
677 params.push_back(counts[i]);
678
679 test_macro(&b, macro, params);
680 ASSERT_EQ(data[0], counts[i]);
681 ASSERT_EQ(data[1], counts[i] * counts[i]);
682 ASSERT_EQ(data[2], 1);
683 }
684 }
685
TEST_F(mme_fermi_sim_test,merge)686 TEST_F(mme_fermi_sim_test, merge)
687 {
688 mme_builder b;
689 mme_builder_init(&b, devinfo);
690
691 mme_value x = mme_load(&b);
692 mme_value y = mme_load(&b);
693
694 mme_value m1 = mme_merge(&b, x, y, 12, 12, 20);
695 mme_store_imm_addr(&b, data_addr + 0, m1, true);
696
697 mme_value m2 = mme_merge(&b, x, y, 12, 8, 20);
698 mme_store_imm_addr(&b, data_addr + 4, m2, true);
699
700 mme_value m3 = mme_merge(&b, x, y, 8, 12, 20);
701 mme_store_imm_addr(&b, data_addr + 8, m3, true);
702
703 mme_value m4 = mme_merge(&b, x, y, 12, 16, 8);
704 mme_store_imm_addr(&b, data_addr + 12, m4, true);
705
706 mme_value m5 = mme_merge(&b, x, y, 24, 12, 8);
707 mme_store_imm_addr(&b, data_addr + 16, m5, true);
708
709 auto macro = mme_builder_finish_vec(&b);
710
711 std::vector<uint32_t> params;
712 params.push_back(0x0c406fe0);
713 params.push_back(0x76543210u);
714
715 test_macro(&b, macro, params);
716 }
717
TEST_F(mme_fermi_sim_test,branch_delay_slot)718 TEST_F(mme_fermi_sim_test, branch_delay_slot)
719 {
720 mme_builder b;
721 mme_builder_init(&b, devinfo);
722
723 mme_value x = mme_load(&b);
724 mme_value y = mme_load(&b);
725
726 mme_fermi_asm(&b, i) {
727 i.op = MME_FERMI_OP_BRANCH;
728 i.src[0] = MME_FERMI_REG_ZERO;
729 i.imm = 2;
730 i.branch.no_delay = false;
731 i.branch.not_zero = false;
732 }
733
734 mme_value res = mme_add(&b, x, y);
735
736 mme_store_imm_addr(&b, data_addr + 0, res, true);
737
738 auto macro = mme_builder_finish_vec(&b);
739
740 std::vector<uint32_t> params;
741 params.push_back(3);
742 params.push_back(1);
743
744 test_macro(&b, macro, params);
745 ASSERT_EQ(data[0], 4);
746 }
747
TEST_F(mme_fermi_sim_test,state)748 TEST_F(mme_fermi_sim_test, state)
749 {
750 mme_builder b;
751 mme_builder_init(&b, devinfo);
752
753 mme_value x = mme_load(&b);
754 mme_value y = mme_load(&b);
755
756 mme_mthd(&b, NV9097_SET_MME_SHADOW_SCRATCH(5));
757 mme_emit(&b, x);
758
759 mme_mthd(&b, NV9097_SET_MME_SHADOW_SCRATCH(8));
760 mme_emit(&b, y);
761
762 mme_value y2 = mme_state(&b, NV9097_SET_MME_SHADOW_SCRATCH(8));
763 mme_value x2 = mme_state(&b, NV9097_SET_MME_SHADOW_SCRATCH(5));
764
765 mme_store_imm_addr(&b, data_addr + 0, y2, true);
766 mme_store_imm_addr(&b, data_addr + 4, x2, true);
767
768 auto macro = mme_builder_finish_vec(&b);
769
770 std::vector<uint32_t> params;
771 params.push_back(-10);
772 params.push_back(5);
773
774 test_macro(&b, macro, params);
775 }
776
TEST_F(mme_fermi_sim_test,scratch_limit)777 TEST_F(mme_fermi_sim_test, scratch_limit)
778 {
779 static const uint32_t chunk_size = 32;
780
781 mme_builder b;
782 mme_builder_init(&b, devinfo);
783
784 mme_value start = mme_load(&b);
785 mme_value count = mme_load(&b);
786
787 mme_value i = mme_mov(&b, start);
788 mme_loop(&b, count) {
789 mme_mthd_arr(&b, NV9097_SET_MME_SHADOW_SCRATCH(0), i);
790 mme_emit(&b, i);
791 mme_add_to(&b, i, i, mme_imm(1));
792 }
793 mme_free_reg(&b, i);
794
795 mme_value j = mme_mov(&b, start);
796 mme_free_reg(&b, start);
797 struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
798
799 mme_loop(&b, count) {
800 mme_value x = mme_state_arr(&b, NV9097_SET_MME_SHADOW_SCRATCH(0), j);
801 mme_store(&b, addr, x, true);
802 mme_add_to(&b, j, j, mme_imm(1));
803 mme_add64_to(&b, addr, addr, mme_imm64(4));
804 }
805 mme_free_reg(&b, j);
806 mme_free_reg(&b, count);
807
808 auto macro = mme_builder_finish_vec(&b);
809
810 for (uint32_t i = 0; i < MME_FERMI_SCRATCH_COUNT; i += chunk_size) {
811 reset_push();
812
813 push_macro(0, macro);
814
815 P_1INC(p, NV9097, CALL_MME_MACRO(0));
816 P_INLINE_DATA(p, i);
817 P_INLINE_DATA(p, chunk_size);
818
819 submit_push();
820
821 for (uint32_t j = 0; j < chunk_size; j++)
822 ASSERT_EQ(data[j], i + j);
823 }
824 }
825
TEST_F(mme_fermi_sim_test,load_imm_to_reg)826 TEST_F(mme_fermi_sim_test, load_imm_to_reg)
827 {
828 mme_builder b;
829 mme_builder_init(&b, devinfo);
830
831 uint32_t vals[] = {
832 0x0001ffff,
833 0x1ffff000,
834 0x0007ffff,
835 0x00080000,
836 0x7fffffff,
837 0x80000000,
838 0xffffffff,
839 };
840
841 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++)
842 mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(vals[i]), false);
843
844 auto macro = mme_builder_finish_vec(&b);
845
846 std::vector<uint32_t> params;
847
848 test_macro(&b, macro, params);
849
850 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++)
851 ASSERT_EQ(data[i], vals[i]);
852 }
853