1 /*
2 * Copyright © 2022 Mary Guillemard
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_builder.h"
6
7 #include <stdio.h>
8 #include <stdlib.h>
9
10 #include "util/u_math.h"
11
12 void
mme_fermi_builder_init(struct mme_builder * b)13 mme_fermi_builder_init(struct mme_builder *b)
14 {
15 /* R0 is reserved for the zero register */
16 mme_reg_alloc_init(&b->reg_alloc, 0xfe);
17
18 /* Pre-allocate R1 for the first parameter value */
19 ASSERTED struct mme_value r1 = mme_reg_alloc_alloc(&b->reg_alloc);
20 assert(r1.reg == 1);
21 }
22
23 static inline bool
mme_fermi_is_zero_or_reg(struct mme_value x)24 mme_fermi_is_zero_or_reg(struct mme_value x)
25 {
26 switch (x.type) {
27 case MME_VALUE_TYPE_ZERO: return true;
28 case MME_VALUE_TYPE_IMM: return x.imm == 0;
29 case MME_VALUE_TYPE_REG: return true;
30 default: unreachable("Invalid MME value type");
31 }
32 }
33
34 static inline bool
mme_fermi_is_zero_or_imm(struct mme_value x)35 mme_fermi_is_zero_or_imm(struct mme_value x)
36 {
37 switch (x.type) {
38 case MME_VALUE_TYPE_ZERO: return true;
39 case MME_VALUE_TYPE_IMM: return true;
40 case MME_VALUE_TYPE_REG: return false;
41 default: unreachable("Invalid MME value type");
42 }
43 }
44
45 static inline enum mme_fermi_reg
mme_value_alu_reg(struct mme_value val)46 mme_value_alu_reg(struct mme_value val)
47 {
48 assert(mme_fermi_is_zero_or_reg(val));
49
50 switch (val.type) {
51 case MME_VALUE_TYPE_ZERO:
52 return MME_FERMI_REG_ZERO;
53 case MME_VALUE_TYPE_REG:
54 assert(val.reg > 0 && val.reg <= 7);
55 return MME_FERMI_REG_ZERO + val.reg;
56 case MME_VALUE_TYPE_IMM:
57 return MME_FERMI_REG_ZERO;
58 }
59 unreachable("Invalid value type");
60 }
61
62 static inline uint32_t
mme_value_alu_imm(struct mme_value val)63 mme_value_alu_imm(struct mme_value val)
64 {
65 assert(mme_fermi_is_zero_or_imm(val));
66
67 switch (val.type) {
68 case MME_VALUE_TYPE_ZERO:
69 return 0;
70 case MME_VALUE_TYPE_IMM:
71 return val.imm;
72 case MME_VALUE_TYPE_REG:
73 return 0;
74 }
75 unreachable("Invalid value type");
76 }
77
78 static inline void
mme_free_reg_if_tmp(struct mme_builder * b,struct mme_value data,struct mme_value maybe_tmp)79 mme_free_reg_if_tmp(struct mme_builder *b,
80 struct mme_value data,
81 struct mme_value maybe_tmp)
82 {
83 if (!mme_is_zero(data) &&
84 !mme_is_zero(maybe_tmp) &&
85 data.type != maybe_tmp.type)
86 mme_free_reg(b, maybe_tmp);
87 }
88
89 static void
mme_fermi_new_inst(struct mme_fermi_builder * b)90 mme_fermi_new_inst(struct mme_fermi_builder *b)
91 {
92 struct mme_fermi_inst noop = { MME_FERMI_INST_DEFAULTS };
93 assert(b->inst_count < ARRAY_SIZE(b->insts));
94 b->insts[b->inst_count] = noop;
95 b->inst_count++;
96 b->inst_parts = 0;
97 }
98
99 static struct mme_fermi_inst *
mme_fermi_cur_inst(struct mme_fermi_builder * b)100 mme_fermi_cur_inst(struct mme_fermi_builder *b)
101 {
102 assert(b->inst_count > 0 && b->inst_count < ARRAY_SIZE(b->insts));
103 return &b->insts[b->inst_count - 1];
104 }
105
106 void
mme_fermi_add_inst(struct mme_builder * b,const struct mme_fermi_inst * inst)107 mme_fermi_add_inst(struct mme_builder *b,
108 const struct mme_fermi_inst *inst)
109 {
110 struct mme_fermi_builder *fb = &b->fermi;
111
112 if (fb->inst_parts || fb->inst_count == 0)
113 mme_fermi_new_inst(fb);
114
115 *mme_fermi_cur_inst(fb) = *inst;
116 mme_fermi_new_inst(fb);
117 }
118
119 static inline void
mme_fermi_set_inst_parts(struct mme_fermi_builder * b,enum mme_fermi_instr_parts parts)120 mme_fermi_set_inst_parts(struct mme_fermi_builder *b,
121 enum mme_fermi_instr_parts parts)
122 {
123 assert(!(b->inst_parts & parts));
124 b->inst_parts |= parts;
125 }
126
127 static inline bool
mme_fermi_next_inst_can_fit_a_full_inst(struct mme_fermi_builder * b)128 mme_fermi_next_inst_can_fit_a_full_inst(struct mme_fermi_builder *b)
129 {
130 return !mme_fermi_is_empty(b) && b->inst_parts == 0;
131 }
132
133 void
mme_fermi_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)134 mme_fermi_mthd_arr(struct mme_builder *b,
135 uint16_t mthd, struct mme_value index)
136 {
137 struct mme_fermi_builder *fb = &b->fermi;
138 struct mme_value src_reg = mme_zero();
139
140 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
141 mme_fermi_new_inst(fb);
142
143 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
144
145 uint32_t mthd_imm = (1 << 12) | (mthd >> 2);
146
147 if (index.type == MME_VALUE_TYPE_REG) {
148 src_reg = index;
149 } else if (index.type == MME_VALUE_TYPE_IMM) {
150 mthd_imm += index.imm;
151 }
152
153 inst->op = MME_FERMI_OP_ADD_IMM;
154 inst->src[0] = mme_value_alu_reg(src_reg);
155 inst->imm = mthd_imm;
156 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR;
157 inst->dst = MME_FERMI_REG_ZERO;
158
159 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
160 MME_FERMI_INSTR_PART_ASSIGN);
161 }
162
163 static inline bool
mme_fermi_prev_inst_can_emit(struct mme_fermi_builder * b,struct mme_value data)164 mme_fermi_prev_inst_can_emit(struct mme_fermi_builder *b, struct mme_value data) {
165 if (mme_fermi_is_empty(b)) {
166 return false;
167 }
168
169 if ((b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN) == MME_FERMI_INSTR_PART_ASSIGN) {
170 struct mme_fermi_inst *inst = mme_fermi_cur_inst(b);
171
172 if (inst->assign_op == MME_FERMI_ASSIGN_OP_MOVE && data.type == MME_VALUE_TYPE_REG &&
173 mme_value_alu_reg(data) == inst->dst) {
174 return true;
175 }
176 }
177
178 return false;
179 }
180
181 static inline bool
mme_fermi_next_inst_can_emit(struct mme_fermi_builder * fb,struct mme_value data)182 mme_fermi_next_inst_can_emit(struct mme_fermi_builder *fb,
183 struct mme_value data)
184 {
185 if (mme_fermi_is_empty(fb))
186 return false;
187
188 if (fb->inst_parts == 0)
189 return true;
190
191 return mme_fermi_prev_inst_can_emit(fb, data);
192 }
193
194 static inline struct mme_value
mme_fermi_reg(uint32_t reg)195 mme_fermi_reg(uint32_t reg)
196 {
197 struct mme_value val = {
198 .type = MME_VALUE_TYPE_REG,
199 .reg = reg,
200 };
201 return val;
202 }
203
204 static bool
is_int18(uint32_t i)205 is_int18(uint32_t i)
206 {
207 return i == (uint32_t)util_mask_sign_extend(i, 18);
208 }
209
210 static inline void
mme_fermi_add_imm18(struct mme_fermi_builder * fb,struct mme_value dst,struct mme_value src,uint32_t imm)211 mme_fermi_add_imm18(struct mme_fermi_builder *fb,
212 struct mme_value dst,
213 struct mme_value src,
214 uint32_t imm)
215 {
216 assert(dst.type == MME_VALUE_TYPE_REG &&
217 mme_fermi_is_zero_or_reg(src) && is_int18(imm));
218
219 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) {
220 mme_fermi_new_inst(fb);
221 }
222
223 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
224
225 inst->op = MME_FERMI_OP_ADD_IMM;
226 inst->src[0] = mme_value_alu_reg(src);
227 inst->imm = imm;
228 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
229 inst->dst = mme_value_alu_reg(dst);
230
231 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
232 MME_FERMI_INSTR_PART_ASSIGN);
233 }
234
235 static bool
mme_fermi_bfe_lsl_can_use_imm(struct mme_fermi_builder * b,struct mme_value src_bits,struct mme_value dst_bits)236 mme_fermi_bfe_lsl_can_use_imm(struct mme_fermi_builder *b,
237 struct mme_value src_bits,
238 struct mme_value dst_bits)
239 {
240 return (mme_fermi_is_zero_or_reg(src_bits) &&
241 mme_fermi_is_zero_or_imm(dst_bits) &&
242 mme_value_alu_imm(dst_bits) <= 31);
243 }
244
245 static bool
mme_fermi_bfe_lsl_can_use_reg(struct mme_fermi_builder * b,struct mme_value src_bits,struct mme_value dst_bits)246 mme_fermi_bfe_lsl_can_use_reg(struct mme_fermi_builder *b,
247 struct mme_value src_bits,
248 struct mme_value dst_bits)
249 {
250 return (mme_fermi_is_zero_or_imm(src_bits) &&
251 mme_fermi_is_zero_or_reg(dst_bits) &&
252 mme_value_alu_imm(src_bits) <= 31);
253 }
254
255 static void
mme_fermi_bfe(struct mme_fermi_builder * fb,struct mme_value dst_reg,struct mme_value src_bits,struct mme_value src_reg,struct mme_value dst_bits,uint32_t size)256 mme_fermi_bfe(struct mme_fermi_builder *fb,
257 struct mme_value dst_reg,
258 struct mme_value src_bits,
259 struct mme_value src_reg,
260 struct mme_value dst_bits,
261 uint32_t size)
262 {
263 assert(dst_reg.type == MME_VALUE_TYPE_REG &&
264 mme_fermi_is_zero_or_reg(src_reg) &&
265 (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits) ||
266 mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)));
267
268 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
269 mme_fermi_new_inst(fb);
270
271 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
272
273 if (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits)) {
274 inst->op = MME_FERMI_OP_BFE_LSL_IMM;
275 inst->src[0] = mme_value_alu_reg(src_bits);
276 inst->src[1] = mme_value_alu_reg(src_reg);
277 inst->bitfield.dst_bit = mme_value_alu_imm(dst_bits);
278 inst->bitfield.size = size;
279 } else if (mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)) {
280 inst->op = MME_FERMI_OP_BFE_LSL_REG;
281 inst->src[0] = mme_value_alu_reg(dst_bits);
282 inst->src[1] = mme_value_alu_reg(src_reg);
283 inst->bitfield.src_bit = mme_value_alu_imm(src_bits);
284 inst->bitfield.size = size;
285 }
286
287 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
288 inst->dst = mme_value_alu_reg(dst_reg);
289
290 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
291 MME_FERMI_INSTR_PART_ASSIGN);
292 }
293
294 static void
mme_fermi_sll_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y)295 mme_fermi_sll_to(struct mme_builder *b,
296 struct mme_value dst,
297 struct mme_value x,
298 struct mme_value y)
299 {
300 struct mme_fermi_builder *fb = &b->fermi;
301 assert(mme_fermi_is_zero_or_reg(dst));
302
303 if (x.type == MME_VALUE_TYPE_REG) {
304 mme_fermi_bfe(fb, dst, mme_zero(), x, y, 31);
305 } else {
306 assert(y.type != MME_VALUE_TYPE_REG || y.reg != dst.reg);
307 mme_mov_to(b, dst, x);
308 mme_fermi_bfe(fb, dst, mme_zero(), dst, y, 31);
309 }
310 }
311
312 static void
mme_fermi_srl_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y)313 mme_fermi_srl_to(struct mme_builder *b,
314 struct mme_value dst,
315 struct mme_value x,
316 struct mme_value y)
317 {
318 struct mme_fermi_builder *fb = &b->fermi;
319 assert(mme_fermi_is_zero_or_reg(dst));
320
321 if (x.type == MME_VALUE_TYPE_REG) {
322 mme_fermi_bfe(fb, dst, y, x, mme_zero(), 31);
323 } else {
324 assert(y.type != MME_VALUE_TYPE_REG || y.reg != dst.reg);
325 mme_mov_to(b, dst, x);
326 mme_fermi_bfe(fb, dst, y, dst, mme_zero(), 31);
327 }
328 }
329
330 void
mme_fermi_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)331 mme_fermi_bfe_to(struct mme_builder *b, struct mme_value dst,
332 struct mme_value x, struct mme_value pos, uint8_t bits)
333 {
334 struct mme_fermi_builder *fb = &b->fermi;
335 assert(mme_fermi_is_zero_or_reg(dst));
336
337 mme_fermi_bfe(fb, dst, pos, x, mme_zero(), bits);
338 }
339
340 void
mme_fermi_umul_32x32_32_to_free_srcs(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y)341 mme_fermi_umul_32x32_32_to_free_srcs(struct mme_builder *b,
342 struct mme_value dst,
343 struct mme_value x,
344 struct mme_value y)
345 {
346 mme_while (b, ine, x, mme_zero()) {
347 struct mme_value lsb = mme_and(b, x, mme_imm(1));
348 mme_if (b, ine, lsb, mme_zero()) {
349 mme_add_to(b, dst, dst, y);
350 }
351 mme_free_reg(b, lsb);
352 mme_srl_to(b, x, x, mme_imm(1u));
353 mme_sll_to(b, y, y, mme_imm(1u));
354 }
355 mme_free_reg(b, x);
356 mme_free_reg(b, y);
357 }
358
359 void
mme_fermi_umul_32x64_64_to_free_srcs(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value64 y)360 mme_fermi_umul_32x64_64_to_free_srcs(struct mme_builder *b,
361 struct mme_value64 dst,
362 struct mme_value x,
363 struct mme_value64 y)
364 {
365 mme_while (b, ine, x, mme_zero()) {
366 struct mme_value lsb = mme_and(b, x, mme_imm(1));
367 mme_if (b, ine, lsb, mme_zero()) {
368 mme_add64_to(b, dst, dst, y);
369 }
370 mme_free_reg(b, lsb);
371 mme_srl_to(b, x, x, mme_imm(1u));
372 /* y = y << 1 */
373 mme_add64_to(b, y, y, y);
374 }
375 mme_free_reg(b, x);
376 mme_free_reg64(b, y);
377 }
378
379 static struct mme_value
mme_fermi_load_imm_to_reg(struct mme_builder * b,struct mme_value data)380 mme_fermi_load_imm_to_reg(struct mme_builder *b, struct mme_value data)
381 {
382 struct mme_fermi_builder *fb = &b->fermi;
383
384 assert(data.type == MME_VALUE_TYPE_IMM ||
385 data.type == MME_VALUE_TYPE_ZERO);
386
387 /* If the immediate is zero, we can simplify this */
388 if (mme_is_zero(data)) {
389 return mme_zero();
390 } else {
391 uint32_t imm = data.imm;
392
393 struct mme_value dst = mme_alloc_reg(b);
394
395 if (is_int18(imm)) {
396 mme_fermi_add_imm18(fb, dst, mme_zero(), imm);
397 } else {
398 /* TODO: a possible optimisation involve searching for the first bit
399 * offset and see if it can fit in 16 bits.
400 */
401 uint32_t high_bits = imm >> 16;
402 uint32_t low_bits = imm & UINT16_MAX;
403
404 mme_fermi_add_imm18(fb, dst, mme_zero(), high_bits);
405 mme_fermi_sll_to(b, dst, dst, mme_imm(16));
406 mme_fermi_add_imm18(fb, dst, dst, low_bits);
407 }
408
409 return dst;
410 }
411 }
412
413 static inline struct mme_value
mme_fermi_value_as_reg(struct mme_builder * b,struct mme_value data)414 mme_fermi_value_as_reg(struct mme_builder *b,
415 struct mme_value data)
416 {
417 if (data.type == MME_VALUE_TYPE_REG || mme_is_zero(data)) {
418 return data;
419 }
420
421 return mme_fermi_load_imm_to_reg(b, data);
422 }
423
mme_fermi_emit(struct mme_builder * b,struct mme_value data)424 void mme_fermi_emit(struct mme_builder *b,
425 struct mme_value data)
426 {
427 struct mme_fermi_builder *fb = &b->fermi;
428 struct mme_fermi_inst *inst;
429
430 /* Check if previous assign was to the same dst register and modify assign
431 * mode if needed
432 */
433 if (mme_fermi_prev_inst_can_emit(fb, data)) {
434 inst = mme_fermi_cur_inst(fb);
435 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
436 } else {
437 struct mme_value data_reg = mme_fermi_value_as_reg(b, data);
438
439 /* Because of mme_fermi_value_as_reg, it is possible that a new load
440 * that can be simplify
441 */
442 if (mme_fermi_prev_inst_can_emit(fb, data_reg)) {
443 inst = mme_fermi_cur_inst(fb);
444 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
445 } else {
446 if (!mme_fermi_next_inst_can_emit(fb, data))
447 mme_fermi_new_inst(fb);
448
449 inst = mme_fermi_cur_inst(fb);
450 inst->op = MME_FERMI_OP_ALU_REG;
451 inst->alu_op = MME_FERMI_ALU_OP_ADD;
452 inst->src[0] = mme_value_alu_reg(data_reg);
453 inst->src[1] = MME_FERMI_REG_ZERO;
454 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
455 inst->dst = MME_FERMI_REG_ZERO;
456
457 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
458 MME_FERMI_INSTR_PART_ASSIGN);
459 }
460
461 mme_free_reg_if_tmp(b, data, data_reg);
462 }
463 }
464
465 static void
mme_fermi_branch(struct mme_fermi_builder * fb,enum mme_fermi_reg src,int32_t offset,bool if_zero)466 mme_fermi_branch(struct mme_fermi_builder *fb,
467 enum mme_fermi_reg src, int32_t offset, bool if_zero)
468 {
469 if (fb->inst_parts || mme_fermi_is_empty(fb))
470 mme_fermi_new_inst(fb);
471
472 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
473
474 inst->op = MME_FERMI_OP_BRANCH;
475 inst->src[0] = src;
476 inst->imm = offset;
477 inst->branch.no_delay = true;
478 inst->branch.not_zero = if_zero;
479
480 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
481 MME_FERMI_INSTR_PART_ASSIGN);
482 }
483
484 static void
mme_fermi_start_cf(struct mme_builder * b,enum mme_cf_type type,struct mme_value cond,bool is_zero)485 mme_fermi_start_cf(struct mme_builder *b,
486 enum mme_cf_type type,
487 struct mme_value cond,
488 bool is_zero)
489 {
490 struct mme_fermi_builder *fb = &b->fermi;
491
492 /* The condition here is inverted because we want to branch and skip the
493 * block when the condition fails.
494 */
495 assert(mme_fermi_is_zero_or_reg(cond));
496 mme_fermi_branch(fb, mme_value_alu_reg(cond), 0, is_zero);
497
498 uint16_t ip = fb->inst_count - 1;
499 assert(fb->insts[ip].op == MME_FERMI_OP_BRANCH);
500
501 assert(fb->cf_depth < ARRAY_SIZE(fb->cf_stack));
502 fb->cf_stack[fb->cf_depth++] = (struct mme_cf) {
503 .type = type,
504 .start_ip = ip,
505 };
506
507 /* The inside of control-flow needs to start with a new instruction */
508 mme_fermi_new_inst(fb);
509 }
510
511 static struct mme_cf
mme_fermi_end_cf(struct mme_builder * b,enum mme_cf_type type)512 mme_fermi_end_cf(struct mme_builder *b, enum mme_cf_type type)
513 {
514 struct mme_fermi_builder *fb = &b->fermi;
515
516 if (fb->inst_parts)
517 mme_fermi_new_inst(fb);
518
519 assert(fb->cf_depth > 0);
520 struct mme_cf cf = fb->cf_stack[--fb->cf_depth];
521 assert(cf.type == type);
522
523 assert(fb->insts[cf.start_ip].op == MME_FERMI_OP_BRANCH);
524 fb->insts[cf.start_ip].imm = fb->inst_count - cf.start_ip - 1;
525
526 return cf;
527 }
528
529 static struct mme_value
mme_fermi_neq(struct mme_builder * b,struct mme_value x,struct mme_value y)530 mme_fermi_neq(struct mme_builder *b, struct mme_value x, struct mme_value y)
531 {
532 struct mme_fermi_builder *fb = &b->fermi;
533
534 /* Generate some value that's non-zero if x != y */
535 struct mme_value res = mme_alloc_reg(b);
536 if (x.type == MME_VALUE_TYPE_IMM && is_int18(-x.imm)) {
537 mme_fermi_add_imm18(fb, res, y, -x.imm);
538 } else if (y.type == MME_VALUE_TYPE_IMM && is_int18(-y.imm)) {
539 mme_fermi_add_imm18(fb, res, x, -y.imm);
540 } else {
541 mme_xor_to(b, res, x, y);
542 }
543 return res;
544 }
545
546 void
mme_fermi_start_if(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)547 mme_fermi_start_if(struct mme_builder *b,
548 enum mme_cmp_op op,
549 bool if_true,
550 struct mme_value x,
551 struct mme_value y)
552 {
553 assert(op == MME_CMP_OP_EQ);
554
555 if (mme_is_zero(x)) {
556 mme_fermi_start_cf(b, MME_CF_TYPE_IF, y, if_true);
557 } else if (mme_is_zero(y)) {
558 mme_fermi_start_cf(b, MME_CF_TYPE_IF, x, if_true);
559 } else {
560 struct mme_value tmp = mme_fermi_neq(b, x, y);
561 mme_fermi_start_cf(b, MME_CF_TYPE_IF, tmp, if_true);
562 mme_free_reg(b, tmp);
563 }
564 }
565
566 void
mme_fermi_end_if(struct mme_builder * b)567 mme_fermi_end_if(struct mme_builder *b)
568 {
569 mme_fermi_end_cf(b, MME_CF_TYPE_IF);
570 }
571
572 void
mme_fermi_start_while(struct mme_builder * b)573 mme_fermi_start_while(struct mme_builder *b)
574 {
575 mme_fermi_start_cf(b, MME_CF_TYPE_WHILE, mme_zero(), false);
576 }
577
578 static void
mme_fermi_end_while_zero(struct mme_builder * b,struct mme_cf cf,struct mme_value cond,bool is_zero)579 mme_fermi_end_while_zero(struct mme_builder *b,
580 struct mme_cf cf,
581 struct mme_value cond,
582 bool is_zero)
583 {
584 struct mme_fermi_builder *fb = &b->fermi;
585
586 if (fb->inst_parts)
587 mme_fermi_new_inst(fb);
588
589 int delta = fb->inst_count - cf.start_ip - 2;
590 mme_fermi_branch(fb, mme_value_alu_reg(cond), -delta, !is_zero);
591 }
592
593 void
mme_fermi_end_while(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)594 mme_fermi_end_while(struct mme_builder *b,
595 enum mme_cmp_op op,
596 bool if_true,
597 struct mme_value x,
598 struct mme_value y)
599 {
600 assert(op == MME_CMP_OP_EQ);
601
602 struct mme_cf cf = mme_fermi_end_cf(b, MME_CF_TYPE_WHILE);
603
604 if (mme_is_zero(x)) {
605 mme_fermi_end_while_zero(b, cf, y, if_true);
606 } else if (mme_is_zero(y)) {
607 mme_fermi_end_while_zero(b, cf, x, if_true);
608 } else {
609 struct mme_value tmp = mme_fermi_neq(b, x, y);
610 mme_fermi_end_while_zero(b, cf, tmp, if_true);
611 mme_free_reg(b, tmp);
612 }
613 }
614
615 void
mme_fermi_start_loop(struct mme_builder * b,struct mme_value count)616 mme_fermi_start_loop(struct mme_builder *b,
617 struct mme_value count)
618 {
619 struct mme_fermi_builder *fb = &b->fermi;
620
621 assert(mme_is_zero(fb->loop_counter));
622 fb->loop_counter = mme_mov(b, count);
623
624 mme_start_while(b);
625 }
626
627 void
mme_fermi_end_loop(struct mme_builder * b)628 mme_fermi_end_loop(struct mme_builder *b)
629 {
630 struct mme_fermi_builder *fb = &b->fermi;
631
632 mme_sub_to(b, fb->loop_counter, fb->loop_counter, mme_imm(1));
633 mme_fermi_end_while(b, MME_CMP_OP_EQ, false, fb->loop_counter, mme_zero());
634
635 mme_free_reg(b, fb->loop_counter);
636 fb->loop_counter = mme_zero();
637 }
638
639 static inline bool
mme_fermi_next_inst_can_load_to(struct mme_fermi_builder * b)640 mme_fermi_next_inst_can_load_to(struct mme_fermi_builder *b)
641 {
642 return !mme_fermi_is_empty(b) && !(b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN);
643 }
644
mme_fermi_load_to(struct mme_builder * b,struct mme_value dst)645 void mme_fermi_load_to(struct mme_builder *b,
646 struct mme_value dst)
647 {
648 struct mme_fermi_builder *fb = &b->fermi;
649
650 assert(dst.type == MME_VALUE_TYPE_REG ||
651 dst.type == MME_VALUE_TYPE_ZERO);
652
653 if (!fb->first_loaded) {
654 struct mme_value r1 = {
655 .type = MME_VALUE_TYPE_REG,
656 .reg = 1,
657 };
658 mme_mov_to(b, dst, r1);
659 mme_free_reg(b, r1);
660 fb->first_loaded = true;
661 return;
662 }
663
664 if (!mme_fermi_next_inst_can_load_to(fb))
665 mme_fermi_new_inst(fb);
666
667 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
668
669 inst->assign_op = MME_FERMI_ASSIGN_OP_LOAD;
670 inst->dst = mme_value_alu_reg(dst);
671
672 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_ASSIGN);
673 }
674
675
676 struct mme_value
mme_fermi_load(struct mme_builder * b)677 mme_fermi_load(struct mme_builder *b)
678 {
679 struct mme_fermi_builder *fb = &b->fermi;
680
681 if (!fb->first_loaded) {
682 struct mme_value r1 = {
683 .type = MME_VALUE_TYPE_REG,
684 .reg = 1,
685 };
686 fb->first_loaded = true;
687 return r1;
688 }
689
690 struct mme_value dst = mme_alloc_reg(b);
691 mme_fermi_load_to(b, dst);
692
693 return dst;
694 }
695
696 static enum mme_fermi_alu_op
mme_to_fermi_alu_op(enum mme_alu_op op)697 mme_to_fermi_alu_op(enum mme_alu_op op)
698 {
699 switch (op) {
700 #define ALU_CASE(op) case MME_ALU_OP_##op: return MME_FERMI_ALU_OP_##op;
701 ALU_CASE(ADD)
702 ALU_CASE(ADDC)
703 ALU_CASE(SUB)
704 ALU_CASE(SUBB)
705 ALU_CASE(AND)
706 ALU_CASE(AND_NOT)
707 ALU_CASE(NAND)
708 ALU_CASE(OR)
709 ALU_CASE(XOR)
710 #undef ALU_CASE
711 default:
712 unreachable("Unsupported MME ALU op");
713 }
714 }
715
716 static bool
is_imm18_nonzero(struct mme_value x)717 is_imm18_nonzero(struct mme_value x)
718 {
719 return x.type == MME_VALUE_TYPE_IMM && x.imm != 0 && is_int18(x.imm);
720 }
721
722 static void
mme_fermi_build_alu(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y,bool need_carry)723 mme_fermi_build_alu(struct mme_builder *b,
724 struct mme_value dst,
725 enum mme_alu_op op,
726 struct mme_value x,
727 struct mme_value y,
728 bool need_carry)
729 {
730 struct mme_fermi_builder *fb = &b->fermi;
731
732 switch (op) {
733 case MME_ALU_OP_ADD:
734 if (is_imm18_nonzero(x) && !need_carry) {
735 mme_fermi_add_imm18(fb, dst, y, x.imm);
736 return;
737 }
738 if (is_imm18_nonzero(y) && !need_carry) {
739 mme_fermi_add_imm18(fb, dst, x, y.imm);
740 return;
741 }
742 break;
743 case MME_ALU_OP_SUB:
744 if (y.type == MME_VALUE_TYPE_IMM && is_int18(-y.imm) && !need_carry) {
745 mme_fermi_add_imm18(fb, dst, x, -y.imm);
746 return;
747 }
748 break;
749 case MME_ALU_OP_MUL:
750 x = mme_mov(b, x);
751 y = mme_mov(b, y);
752 mme_fermi_umul_32x32_32_to_free_srcs(b, dst, x, y);
753 return;
754 case MME_ALU_OP_SLL:
755 mme_fermi_sll_to(b, dst, x, y);
756 return;
757 case MME_ALU_OP_SRL:
758 mme_fermi_srl_to(b, dst, x, y);
759 return;
760 case MME_ALU_OP_NOT:
761 mme_and_not_to(b, dst, mme_imm(~(uint32_t)0), x);
762 return;
763 default:
764 break;
765 }
766
767 assert(mme_fermi_is_zero_or_reg(dst));
768
769 struct mme_value x_reg = mme_fermi_value_as_reg(b, x);
770 struct mme_value y_reg = mme_fermi_value_as_reg(b, y);
771
772 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
773 mme_fermi_new_inst(fb);
774
775 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
776 inst->op = MME_FERMI_OP_ALU_REG;
777 inst->alu_op = mme_to_fermi_alu_op(op);
778 inst->src[0] = mme_value_alu_reg(x_reg);
779 inst->src[1] = mme_value_alu_reg(y_reg);
780 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
781 inst->dst = mme_value_alu_reg(dst);
782
783 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
784 MME_FERMI_INSTR_PART_ASSIGN);
785
786 mme_free_reg_if_tmp(b, x, x_reg);
787 mme_free_reg_if_tmp(b, y, y_reg);
788 }
789
790 void
mme_fermi_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)791 mme_fermi_alu_to(struct mme_builder *b,
792 struct mme_value dst,
793 enum mme_alu_op op,
794 struct mme_value x,
795 struct mme_value y)
796 {
797 mme_fermi_build_alu(b, dst, op, x, y, false);
798 }
799
800 void
mme_fermi_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)801 mme_fermi_alu64_to(struct mme_builder *b,
802 struct mme_value64 dst,
803 enum mme_alu_op op_lo,
804 enum mme_alu_op op_hi,
805 struct mme_value64 x,
806 struct mme_value64 y)
807 {
808 assert(dst.lo.type == MME_VALUE_TYPE_REG);
809 assert(dst.hi.type == MME_VALUE_TYPE_REG);
810
811 mme_fermi_build_alu(b, dst.lo, op_lo, x.lo, y.lo, true);
812 mme_fermi_build_alu(b, dst.hi, op_hi, x.hi, y.hi, true);
813 }
814
mme_fermi_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)815 void mme_fermi_state_arr_to(struct mme_builder *b,
816 struct mme_value dst,
817 uint16_t state,
818 struct mme_value index)
819 {
820 struct mme_fermi_builder *fb = &b->fermi;
821
822 assert(mme_fermi_is_zero_or_reg(dst));
823 assert(state % 4 == 0);
824
825 struct mme_value index_reg = mme_fermi_value_as_reg(b, index);
826
827 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
828 mme_fermi_new_inst(fb);
829
830 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
831 inst->op = MME_FERMI_OP_STATE;
832 inst->src[0] = mme_value_alu_reg(index_reg);
833 inst->src[1] = MME_FERMI_REG_ZERO;
834 inst->imm = state >> 2;
835 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
836 inst->dst = mme_value_alu_reg(dst);
837
838 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
839 MME_FERMI_INSTR_PART_ASSIGN);
840
841 mme_free_reg_if_tmp(b, index, index_reg);
842 }
843
844 void
mme_fermi_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)845 mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst,
846 struct mme_value x, struct mme_value y,
847 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
848 {
849 struct mme_fermi_builder *fb = &b->fermi;
850
851 assert(mme_fermi_is_zero_or_reg(dst));
852 assert(dst_pos < 32);
853 assert(bits < 32);
854 assert(src_pos < 32);
855
856 struct mme_value x_reg = mme_fermi_value_as_reg(b, x);
857 struct mme_value y_reg = mme_fermi_value_as_reg(b, y);
858
859 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
860 mme_fermi_new_inst(fb);
861
862 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
863
864 inst->op = MME_FERMI_OP_MERGE;
865 inst->src[0] = mme_value_alu_reg(x_reg);
866 inst->src[1] = mme_value_alu_reg(y_reg);
867 inst->bitfield.dst_bit = dst_pos;
868 inst->bitfield.src_bit = src_pos;
869 inst->bitfield.size = bits;
870
871 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
872 inst->dst = mme_value_alu_reg(dst);
873
874 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
875 MME_FERMI_INSTR_PART_ASSIGN);
876
877 mme_free_reg_if_tmp(b, x, x_reg);
878 mme_free_reg_if_tmp(b, y, y_reg);
879 }
880
881 uint32_t *
mme_fermi_builder_finish(struct mme_fermi_builder * b,size_t * size_out)882 mme_fermi_builder_finish(struct mme_fermi_builder *b, size_t *size_out)
883 {
884 assert(b->cf_depth == 0);
885
886 /* TODO: If there are at least two instructions and we can guarantee the
887 * last two instructions get exeucted (not in control-flow), we don't need
888 * to add a pair of NOPs.
889 */
890 mme_fermi_new_inst(b);
891 mme_fermi_new_inst(b);
892
893 b->insts[b->inst_count - 2].end_next = true;
894
895 size_t enc_size = b->inst_count * sizeof(uint32_t);
896 uint32_t *enc = malloc(enc_size);
897 if (enc != NULL) {
898 mme_fermi_encode(enc, b->inst_count, b->insts);
899 *size_out = enc_size;
900 }
901 return enc;
902 }
903
904 void
mme_fermi_builder_dump(struct mme_builder * b,FILE * fp)905 mme_fermi_builder_dump(struct mme_builder *b, FILE *fp)
906 {
907 struct mme_fermi_builder *fb = &b->fermi;
908
909 mme_fermi_print(fp, fb->insts, fb->inst_count);
910 }
911