1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #ifndef MME_BUILDER_H
6 #define MME_BUILDER_H
7
8 #include "mme_value.h"
9 #include "mme_tu104.h"
10 #include "nv_device_info.h"
11
12 #include "util/bitscan.h"
13 #include "util/enum_operators.h"
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 enum mme_alu_op {
20 MME_ALU_OP_ADD,
21 MME_ALU_OP_ADDC,
22 MME_ALU_OP_SUB,
23 MME_ALU_OP_SUBB,
24 MME_ALU_OP_MUL,
25 MME_ALU_OP_MULH,
26 MME_ALU_OP_MULU,
27 MME_ALU_OP_CLZ,
28 MME_ALU_OP_SLL,
29 MME_ALU_OP_SRL,
30 MME_ALU_OP_SRA,
31 MME_ALU_OP_NOT,
32 MME_ALU_OP_AND,
33 MME_ALU_OP_AND_NOT,
34 MME_ALU_OP_NAND,
35 MME_ALU_OP_OR,
36 MME_ALU_OP_XOR,
37 MME_ALU_OP_SLT,
38 MME_ALU_OP_SLTU,
39 MME_ALU_OP_SLE,
40 MME_ALU_OP_SLEU,
41 MME_ALU_OP_SEQ,
42 MME_ALU_OP_DREAD,
43 MME_ALU_OP_DWRITE,
44 };
45
46 enum mme_cmp_op {
47 MME_CMP_OP_LT,
48 MME_CMP_OP_LTU,
49 MME_CMP_OP_LE,
50 MME_CMP_OP_LEU,
51 MME_CMP_OP_EQ,
52 };
53
54 enum mme_cf_type {
55 MME_CF_TYPE_IF,
56 MME_CF_TYPE_LOOP,
57 MME_CF_TYPE_WHILE,
58 };
59
60 struct mme_cf {
61 enum mme_cf_type type;
62 uint16_t start_ip;
63 };
64
65 struct mme_builder;
66
67 #include "mme_tu104_builder.h"
68 #include "mme_fermi_builder.h"
69
70 #define MME_CLS_FERMI 0x9000
71 #define MME_CLS_TURING 0xc500
72
73 struct mme_builder {
74 const struct nv_device_info *devinfo;
75 struct mme_reg_alloc reg_alloc;
76 union {
77 struct mme_tu104_builder tu104;
78 struct mme_fermi_builder fermi;
79 };
80 };
81
82 static inline void
mme_builder_init(struct mme_builder * b,const struct nv_device_info * dev)83 mme_builder_init(struct mme_builder *b, const struct nv_device_info *dev)
84 {
85 memset(b, 0, sizeof(*b));
86 b->devinfo = dev;
87
88 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
89 mme_tu104_builder_init(b);
90 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
91 mme_fermi_builder_init(b);
92 else
93 unreachable("Unsupported GPU class");
94 }
95
96 static inline uint32_t *
mme_builder_finish(struct mme_builder * b,size_t * size_out)97 mme_builder_finish(struct mme_builder *b, size_t *size_out)
98 {
99 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
100 return mme_tu104_builder_finish(&b->tu104, size_out);
101 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
102 return mme_fermi_builder_finish(&b->fermi, size_out);
103 else
104 unreachable("Unsupported GPU class");
105 }
106
107 static inline void
mme_builder_dump(struct mme_builder * b,FILE * fp)108 mme_builder_dump(struct mme_builder *b, FILE *fp)
109 {
110 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
111 mme_tu104_builder_dump(b, fp);
112 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
113 mme_fermi_builder_dump(b, fp);
114 else
115 unreachable("Unsupported GPU class");
116 }
117
118 static inline struct mme_value
mme_alloc_reg(struct mme_builder * b)119 mme_alloc_reg(struct mme_builder *b)
120 {
121 return mme_reg_alloc_alloc(&b->reg_alloc);
122 }
123
124 static inline void
mme_realloc_reg(struct mme_builder * b,struct mme_value value)125 mme_realloc_reg(struct mme_builder *b, struct mme_value value)
126 {
127 return mme_reg_alloc_realloc(&b->reg_alloc, value);
128 }
129
130 static inline void
mme_free_reg(struct mme_builder * b,struct mme_value val)131 mme_free_reg(struct mme_builder *b, struct mme_value val)
132 {
133 mme_reg_alloc_free(&b->reg_alloc, val);
134 }
135
136 static inline struct mme_value64
mme_alloc_reg64(struct mme_builder * b)137 mme_alloc_reg64(struct mme_builder *b)
138 {
139 struct mme_value lo = mme_alloc_reg(b);
140 struct mme_value hi = mme_alloc_reg(b);
141 return mme_value64(lo, hi);
142 }
143
144 static inline void
mme_free_reg64(struct mme_builder * b,struct mme_value64 val)145 mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
146 {
147 mme_reg_alloc_free(&b->reg_alloc, val.lo);
148 mme_reg_alloc_free(&b->reg_alloc, val.hi);
149 }
150
151 static inline void
mme_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)152 mme_alu_to(struct mme_builder *b,
153 struct mme_value dst,
154 enum mme_alu_op op,
155 struct mme_value x,
156 struct mme_value y)
157 {
158 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
159 mme_tu104_alu_to(b, dst, op, x, y);
160 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
161 mme_fermi_alu_to(b, dst, op, x, y);
162 else
163 unreachable("Unsupported GPU class");
164 }
165
166 static inline struct mme_value
mme_alu(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)167 mme_alu(struct mme_builder *b,
168 enum mme_alu_op op,
169 struct mme_value x,
170 struct mme_value y)
171 {
172 struct mme_value dst = mme_alloc_reg(b);
173 mme_alu_to(b, dst, op, x, y);
174 return dst;
175 }
176
177 static inline void
mme_alu_no_dst(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)178 mme_alu_no_dst(struct mme_builder *b,
179 enum mme_alu_op op,
180 struct mme_value x,
181 struct mme_value y)
182 {
183 mme_alu_to(b, mme_zero(), op, x, y);
184 }
185
186 static inline void
mme_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)187 mme_alu64_to(struct mme_builder *b,
188 struct mme_value64 dst,
189 enum mme_alu_op op_lo,
190 enum mme_alu_op op_hi,
191 struct mme_value64 x,
192 struct mme_value64 y)
193 {
194 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
195 mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y);
196 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
197 mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y);
198 else
199 unreachable("Unsupported GPU class");
200 }
201
202 static inline struct mme_value64
mme_alu64(struct mme_builder * b,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)203 mme_alu64(struct mme_builder *b,
204 enum mme_alu_op op_lo, enum mme_alu_op op_hi,
205 struct mme_value64 x, struct mme_value64 y)
206 {
207 struct mme_value64 dst = {
208 mme_alloc_reg(b),
209 mme_alloc_reg(b),
210 };
211 mme_alu64_to(b, dst, op_lo, op_hi, x, y);
212 return dst;
213 }
214
215 #define MME_DEF_ALU1(op, OP) \
216 static inline void \
217 mme_##op##_to(struct mme_builder *b, struct mme_value dst, \
218 struct mme_value x) \
219 { \
220 mme_alu_to(b, dst, MME_ALU_OP_##OP, x, mme_zero()); \
221 } \
222 \
223 static inline struct mme_value \
224 mme_##op(struct mme_builder *b, \
225 struct mme_value x) \
226 { \
227 return mme_alu(b, MME_ALU_OP_##OP, x, mme_zero()); \
228 }
229
230 #define MME_DEF_ALU2(op, OP) \
231 static inline void \
232 mme_##op##_to(struct mme_builder *b, struct mme_value dst, \
233 struct mme_value x, struct mme_value y) \
234 { \
235 mme_alu_to(b, dst, MME_ALU_OP_##OP, x, y); \
236 } \
237 \
238 static inline struct mme_value \
239 mme_##op(struct mme_builder *b, \
240 struct mme_value x, struct mme_value y) \
241 { \
242 return mme_alu(b, MME_ALU_OP_##OP, x, y); \
243 }
244
245 MME_DEF_ALU1(mov, ADD);
246 MME_DEF_ALU2(add, ADD);
247 MME_DEF_ALU2(sub, SUB);
248 MME_DEF_ALU2(mul, MUL);
249 MME_DEF_ALU1(clz, CLZ);
250 MME_DEF_ALU2(sll, SLL);
251 MME_DEF_ALU2(srl, SRL);
252 MME_DEF_ALU2(sra, SRA);
253 MME_DEF_ALU1(not, NOT);
254 MME_DEF_ALU2(and, AND);
255 MME_DEF_ALU2(and_not,AND_NOT);
256 MME_DEF_ALU2(nand, NAND);
257 MME_DEF_ALU2(or, OR);
258 MME_DEF_ALU2(xor, XOR);
259 MME_DEF_ALU2(slt, SLT);
260 MME_DEF_ALU2(sltu, SLTU);
261 MME_DEF_ALU2(sle, SLE);
262 MME_DEF_ALU2(sleu, SLEU);
263 MME_DEF_ALU2(seq, SEQ);
264 MME_DEF_ALU1(dread, DREAD);
265
266 #undef MME_DEF_ALU1
267 #undef MME_DEF_ALU2
268
269 static inline void
mme_mov64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x)270 mme_mov64_to(struct mme_builder *b, struct mme_value64 dst,
271 struct mme_value64 x)
272 {
273 mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
274 }
275
276 static inline struct mme_value64
mme_mov64(struct mme_builder * b,struct mme_value64 x)277 mme_mov64(struct mme_builder *b, struct mme_value64 x)
278 {
279 return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
280 }
281
282 static inline void
mme_add64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)283 mme_add64_to(struct mme_builder *b, struct mme_value64 dst,
284 struct mme_value64 x, struct mme_value64 y)
285 {
286 mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
287 }
288
289 static inline struct mme_value64
mme_add64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)290 mme_add64(struct mme_builder *b,
291 struct mme_value64 x, struct mme_value64 y)
292 {
293 return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
294 }
295
296 static inline void
mme_sub64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)297 mme_sub64_to(struct mme_builder *b, struct mme_value64 dst,
298 struct mme_value64 x, struct mme_value64 y)
299 {
300 mme_alu64_to(b, dst, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
301 }
302
303 static inline struct mme_value64
mme_sub64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)304 mme_sub64(struct mme_builder *b,
305 struct mme_value64 x, struct mme_value64 y)
306 {
307 return mme_alu64(b, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
308 }
309
310 static inline struct mme_value
mme_mul_32x32_32_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value y)311 mme_mul_32x32_32_free_srcs(struct mme_builder *b,
312 struct mme_value x, struct mme_value y)
313 {
314 assert(x.type == MME_VALUE_TYPE_REG);
315 assert(y.type == MME_VALUE_TYPE_REG);
316 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
317 struct mme_value dst = mme_mul(b, x, y);
318 mme_free_reg(b, x);
319 mme_free_reg(b, y);
320 return dst;
321 } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
322 struct mme_value dst = mme_alloc_reg(b);
323 mme_fermi_umul_32x32_32_to_free_srcs(b, dst, x, y);
324 return dst;
325 } else {
326 unreachable("Unsupported GPU class");
327 }
328 }
329
330 static inline void
mme_imul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)331 mme_imul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
332 struct mme_value x, struct mme_value y)
333 {
334 mme_alu64_to(b, dst, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
335 mme_value64(x, mme_zero()),
336 mme_value64(y, mme_zero()));
337 }
338
339 static inline struct mme_value64
mme_imul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)340 mme_imul_32x32_64(struct mme_builder *b,
341 struct mme_value x, struct mme_value y)
342 {
343 return mme_alu64(b, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
344 mme_value64(x, mme_zero()),
345 mme_value64(y, mme_zero()));
346 }
347
348 static inline void
mme_umul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)349 mme_umul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
350 struct mme_value x, struct mme_value y)
351 {
352 assert(b->devinfo->cls_eng3d >= MME_CLS_TURING);
353 mme_alu64_to(b, dst, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
354 mme_value64(x, mme_zero()),
355 mme_value64(y, mme_zero()));
356 }
357
358 static inline struct mme_value64
mme_umul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)359 mme_umul_32x32_64(struct mme_builder *b,
360 struct mme_value x, struct mme_value y)
361 {
362 assert(b->devinfo->cls_eng3d >= MME_CLS_TURING);
363 return mme_alu64(b, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
364 mme_value64(x, mme_zero()),
365 mme_value64(y, mme_zero()));
366 }
367
368 static inline struct mme_value64
mme_umul_32x32_64_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value y)369 mme_umul_32x32_64_free_srcs(struct mme_builder *b,
370 struct mme_value x, struct mme_value y)
371 {
372 assert(x.type == MME_VALUE_TYPE_REG);
373 assert(y.type == MME_VALUE_TYPE_REG);
374 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
375 struct mme_value64 dst = mme_umul_32x32_64(b, x, y);
376 mme_free_reg(b, x);
377 mme_free_reg(b, y);
378 return dst;
379 } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
380 struct mme_value y_hi = mme_mov(b, mme_zero());
381 struct mme_value64 dst = mme_alloc_reg64(b);
382 mme_fermi_umul_32x64_64_to_free_srcs(b, dst, x, mme_value64(y, y_hi));
383 return dst;
384 } else {
385 unreachable("Unsupported GPU class");
386 }
387 }
388
389 static inline struct mme_value64
mme_umul_32x64_64_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value64 y)390 mme_umul_32x64_64_free_srcs(struct mme_builder *b,
391 struct mme_value x, struct mme_value64 y)
392 {
393 assert(x.type == MME_VALUE_TYPE_REG);
394 assert(y.lo.type == MME_VALUE_TYPE_REG);
395 assert(y.hi.type == MME_VALUE_TYPE_REG);
396 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
397 struct mme_value64 dst = mme_umul_32x32_64(b, x, y.lo);
398 struct mme_value tmp = mme_mul(b, x, y.hi);
399 mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
400 mme_free_reg(b, x);
401 mme_free_reg64(b, y);
402 return dst;
403 } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
404 struct mme_value64 dst = mme_alloc_reg64(b);
405 mme_fermi_umul_32x64_64_to_free_srcs(b, dst, x, y);
406 return dst;
407 } else {
408 unreachable("Unsupported GPU class");
409 }
410 }
411
412 static inline struct mme_value64
mme_mul64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)413 mme_mul64(struct mme_builder *b,
414 struct mme_value64 x, struct mme_value64 y)
415 {
416 if (mme_is_zero(x.hi) && mme_is_zero(y.hi))
417 return mme_umul_32x32_64(b, x.lo, y.lo);
418
419 struct mme_value64 dst = mme_umul_32x32_64(b, x.lo, y.lo);
420 struct mme_value tmp = mme_alloc_reg(b);
421
422 mme_mul_to(b, tmp, x.lo, y.hi);
423 mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
424
425 mme_mul_to(b, tmp, x.hi, y.lo);
426 mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
427
428 mme_free_reg(b, tmp);
429
430 return dst;
431 }
432
433 static inline void
mme_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)434 mme_bfe_to(struct mme_builder *b, struct mme_value dst,
435 struct mme_value x, struct mme_value pos, uint8_t bits)
436 {
437 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
438 mme_srl_to(b, dst, x, pos);
439 mme_and_to(b, dst, dst, mme_imm(BITFIELD_MASK(bits)));
440 } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
441 mme_fermi_bfe_to(b, dst, x, pos, bits);
442 } else {
443 unreachable("Unsupported GPU class");
444 }
445 }
446
447 static inline struct mme_value
mme_bfe(struct mme_builder * b,struct mme_value x,struct mme_value pos,uint8_t bits)448 mme_bfe(struct mme_builder *b,
449 struct mme_value x, struct mme_value pos, uint8_t bits)
450 {
451 struct mme_value dst = mme_alloc_reg(b);
452 mme_bfe_to(b, dst, x, pos, bits);
453 return dst;
454 }
455
456 static inline void
mme_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)457 mme_merge_to(struct mme_builder *b, struct mme_value dst,
458 struct mme_value x, struct mme_value y,
459 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
460 {
461 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
462 mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
463 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
464 mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
465 else
466 unreachable("Unsupported GPU class");
467 }
468
469 static inline struct mme_value
mme_merge(struct mme_builder * b,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)470 mme_merge(struct mme_builder *b,
471 struct mme_value x, struct mme_value y,
472 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
473 {
474 struct mme_value dst = mme_alloc_reg(b);
475 mme_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
476 return dst;
477 }
478
479 #define mme_set_field(b, x, FIELD, val) \
480 mme_merge_to(b, x, x, val, DRF_LO(FIELD), DRF_BITS(FIELD), 0)
481
482 #define mme_set_field_enum(b, x, FIELD, ENUM) \
483 mme_set_field(b, x, FIELD, mme_imm(FIELD##_##ENUM)) \
484
485 static inline void
mme_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)486 mme_state_arr_to(struct mme_builder *b, struct mme_value dst,
487 uint16_t state, struct mme_value index)
488 {
489 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
490 mme_tu104_state_arr_to(b, dst, state, index);
491 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
492 mme_fermi_state_arr_to(b, dst, state, index);
493 else
494 unreachable("Unsupported GPU class");
495 }
496
497 static inline void
mme_state_to(struct mme_builder * b,struct mme_value dst,uint16_t state)498 mme_state_to(struct mme_builder *b, struct mme_value dst,
499 uint16_t state)
500 {
501 mme_state_arr_to(b, dst, state, mme_zero());
502 }
503
504 static inline struct mme_value
mme_state_arr(struct mme_builder * b,uint16_t state,struct mme_value index)505 mme_state_arr(struct mme_builder *b,
506 uint16_t state, struct mme_value index)
507 {
508 struct mme_value dst = mme_alloc_reg(b);
509 mme_state_arr_to(b, dst, state, index);
510 return dst;
511 }
512
513 static inline struct mme_value
mme_state(struct mme_builder * b,uint16_t state)514 mme_state(struct mme_builder *b,
515 uint16_t state)
516 {
517 struct mme_value dst = mme_alloc_reg(b);
518 mme_state_to(b, dst, state);
519 return dst;
520 }
521
522 static inline void
mme_dwrite(struct mme_builder * b,struct mme_value idx,struct mme_value val)523 mme_dwrite(struct mme_builder *b,
524 struct mme_value idx, struct mme_value val)
525 {
526 mme_alu_no_dst(b, MME_ALU_OP_DWRITE, idx, val);
527 }
528
529 static inline void
mme_load_to(struct mme_builder * b,struct mme_value dst)530 mme_load_to(struct mme_builder *b, struct mme_value dst)
531 {
532 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
533 mme_tu104_load_to(b, dst);
534 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
535 mme_fermi_load_to(b, dst);
536 else
537 unreachable("Unsupported GPU class");
538 }
539
540 static inline struct mme_value
mme_tu104_load(struct mme_builder * b)541 mme_tu104_load(struct mme_builder *b)
542 {
543 struct mme_value dst = mme_alloc_reg(b);
544 mme_tu104_load_to(b, dst);
545 return dst;
546 }
547
548 static inline struct mme_value
mme_load(struct mme_builder * b)549 mme_load(struct mme_builder *b)
550 {
551 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
552 return mme_tu104_load(b);
553 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
554 return mme_fermi_load(b);
555 else
556 unreachable("Unsupported GPU class");
557 }
558
559 static inline struct mme_value64
mme_load_addr64(struct mme_builder * b)560 mme_load_addr64(struct mme_builder *b)
561 {
562 struct mme_value hi = mme_load(b);
563 struct mme_value lo = mme_load(b);
564 return mme_value64(lo, hi);
565 }
566
567 static inline void
mme_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)568 mme_mthd_arr(struct mme_builder *b, uint16_t mthd,
569 struct mme_value index)
570 {
571 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
572 mme_tu104_mthd(b, mthd, index);
573 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
574 mme_fermi_mthd_arr(b, mthd, index);
575 else
576 unreachable("Unsupported GPU class");
577 }
578
579 static inline void
mme_mthd(struct mme_builder * b,uint16_t mthd)580 mme_mthd(struct mme_builder *b, uint16_t mthd)
581 {
582 mme_mthd_arr(b, mthd, mme_zero());
583 }
584
585 static inline void
mme_emit(struct mme_builder * b,struct mme_value data)586 mme_emit(struct mme_builder *b,
587 struct mme_value data)
588 {
589 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
590 mme_tu104_emit(b, data);
591 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
592 mme_fermi_emit(b, data);
593 else
594 unreachable("Unsupported GPU class");
595 }
596
597 static inline void
mme_emit_addr64(struct mme_builder * b,struct mme_value64 addr)598 mme_emit_addr64(struct mme_builder *b, struct mme_value64 addr)
599 {
600 mme_emit(b, addr.hi);
601 mme_emit(b, addr.lo);
602 }
603
604 static inline void
mme_tu104_read_fifoed(struct mme_builder * b,struct mme_value64 addr,struct mme_value count)605 mme_tu104_read_fifoed(struct mme_builder *b,
606 struct mme_value64 addr,
607 struct mme_value count)
608 {
609 mme_mthd(b, 0x0550 /* NVC597_SET_MME_MEM_ADDRESS_A */);
610 mme_emit_addr64(b, addr);
611
612 mme_mthd(b, 0x0560 /* NVC597_MME_DMA_READ_FIFOED */);
613 mme_emit(b, count);
614
615 mme_tu104_load_barrier(b);
616 }
617
618 static inline void
mme_start_loop(struct mme_builder * b,struct mme_value count)619 mme_start_loop(struct mme_builder *b, struct mme_value count)
620 {
621 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
622 mme_tu104_start_loop(b, count);
623 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
624 mme_fermi_start_loop(b, count);
625 else
626 unreachable("Unsupported GPU class");
627 }
628
629 static inline void
mme_end_loop(struct mme_builder * b)630 mme_end_loop(struct mme_builder *b)
631 {
632 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
633 mme_tu104_end_loop(b);
634 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
635 mme_fermi_end_loop(b);
636 else
637 unreachable("Unsupported GPU class");
638 }
639
640 #define mme_loop(b, count) \
641 for (bool run = (mme_start_loop((b), count), true); run; \
642 run = false, mme_end_loop(b))
643
644 #define MME_DEF_START_IF(op, OP, if_true) \
645 static inline void \
646 mme_start_if_##op(struct mme_builder *b, \
647 struct mme_value x, struct mme_value y) \
648 { \
649 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
650 mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
651 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) \
652 mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
653 else \
654 unreachable("Unsupported GPU class"); \
655 }
656
MME_DEF_START_IF(ilt,LT,true)657 MME_DEF_START_IF(ilt, LT, true)
658 MME_DEF_START_IF(ult, LTU, true)
659 MME_DEF_START_IF(ile, LE, true)
660 MME_DEF_START_IF(ule, LEU, true)
661 MME_DEF_START_IF(ieq, EQ, true)
662 MME_DEF_START_IF(ige, LT, false)
663 MME_DEF_START_IF(uge, LTU, false)
664 MME_DEF_START_IF(igt, LE, false)
665 MME_DEF_START_IF(ugt, LEU, false)
666 MME_DEF_START_IF(ine, EQ, false)
667
668 #undef MME_DEF_START_IF
669
670 static inline void
671 mme_end_if(struct mme_builder *b)
672 {
673 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
674 mme_tu104_end_if(b);
675 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
676 mme_fermi_end_if(b);
677 else
678 unreachable("Unsupported GPU class");
679 }
680
681 #define mme_if(b, cmp, x, y) \
682 for (bool run = (mme_start_if_##cmp((b), x, y), true); run; \
683 run = false, mme_end_if(b))
684
685 static inline void
mme_start_while(struct mme_builder * b)686 mme_start_while(struct mme_builder *b)
687 {
688 if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
689 mme_tu104_start_while(b);
690 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
691 mme_fermi_start_while(b);
692 else
693 unreachable("Unsupported GPU class");
694 }
695
696 #define MME_DEF_END_WHILE(op, OP, if_true) \
697 static inline void \
698 mme_end_while_##op(struct mme_builder *b, \
699 struct mme_value x, struct mme_value y) \
700 { \
701 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
702 mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
703 else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) \
704 mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
705 else \
706 unreachable("Unsupported GPU class"); \
707 }
708
MME_DEF_END_WHILE(ilt,LT,true)709 MME_DEF_END_WHILE(ilt, LT, true)
710 MME_DEF_END_WHILE(ult, LTU, true)
711 MME_DEF_END_WHILE(ile, LE, true)
712 MME_DEF_END_WHILE(ule, LEU, true)
713 MME_DEF_END_WHILE(ieq, EQ, true)
714 MME_DEF_END_WHILE(ige, LT, false)
715 MME_DEF_END_WHILE(uge, LTU, false)
716 MME_DEF_END_WHILE(igt, LE, false)
717 MME_DEF_END_WHILE(ugt, LEU, false)
718 MME_DEF_END_WHILE(ine, EQ, false)
719
720 #define mme_while(b, cmp, x, y) \
721 for (bool run = (mme_start_while(b), true); run; \
722 run = false, mme_end_while_##cmp((b), x, y))
723
724 #define MME_DEF_EXIT(op, OP, if_true) \
725 static inline void \
726 mme_exit_if_##op(struct mme_builder *b, \
727 struct mme_value x, struct mme_value y) \
728 { \
729 if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
730 mme_tu104_exit_if(b, MME_CMP_OP_##OP, if_true, x, y); \
731 else \
732 unreachable("Unsupported GPU class"); \
733 }
734
735 MME_DEF_EXIT(ilt, LT, true)
736 MME_DEF_EXIT(ult, LTU, true)
737 MME_DEF_EXIT(ile, LE, true)
738 MME_DEF_EXIT(ule, LEU, true)
739 MME_DEF_EXIT(ieq, EQ, true)
740 MME_DEF_EXIT(ige, LT, false)
741 MME_DEF_EXIT(uge, LTU, false)
742 MME_DEF_EXIT(igt, LE, false)
743 MME_DEF_EXIT(ugt, LEU, false)
744 MME_DEF_EXIT(ine, EQ, false)
745
746 #undef MME_DEF_EXIT
747
748 #define mme_exit_if(b, cmp, x, y) \
749 mme_exit_if_##cmp(b, x, y)
750
751 static inline void
752 mme_exit(struct mme_builder *b)
753 {
754 mme_exit_if_ieq(b, mme_zero(), mme_zero());
755 }
756
757 #ifdef __cplusplus
758 }
759 #endif
760
761 #endif /* MME_BUILDER_H */
762
763