xref: /aosp_15_r20/external/mesa3d/src/nouveau/mme/mme_builder.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #ifndef MME_BUILDER_H
6 #define MME_BUILDER_H
7 
8 #include "mme_value.h"
9 #include "mme_tu104.h"
10 #include "nv_device_info.h"
11 
12 #include "util/bitscan.h"
13 #include "util/enum_operators.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 
19 enum mme_alu_op {
20    MME_ALU_OP_ADD,
21    MME_ALU_OP_ADDC,
22    MME_ALU_OP_SUB,
23    MME_ALU_OP_SUBB,
24    MME_ALU_OP_MUL,
25    MME_ALU_OP_MULH,
26    MME_ALU_OP_MULU,
27    MME_ALU_OP_CLZ,
28    MME_ALU_OP_SLL,
29    MME_ALU_OP_SRL,
30    MME_ALU_OP_SRA,
31    MME_ALU_OP_NOT,
32    MME_ALU_OP_AND,
33    MME_ALU_OP_AND_NOT,
34    MME_ALU_OP_NAND,
35    MME_ALU_OP_OR,
36    MME_ALU_OP_XOR,
37    MME_ALU_OP_SLT,
38    MME_ALU_OP_SLTU,
39    MME_ALU_OP_SLE,
40    MME_ALU_OP_SLEU,
41    MME_ALU_OP_SEQ,
42    MME_ALU_OP_DREAD,
43    MME_ALU_OP_DWRITE,
44 };
45 
46 enum mme_cmp_op {
47    MME_CMP_OP_LT,
48    MME_CMP_OP_LTU,
49    MME_CMP_OP_LE,
50    MME_CMP_OP_LEU,
51    MME_CMP_OP_EQ,
52 };
53 
54 enum mme_cf_type {
55    MME_CF_TYPE_IF,
56    MME_CF_TYPE_LOOP,
57    MME_CF_TYPE_WHILE,
58 };
59 
60 struct mme_cf {
61    enum mme_cf_type type;
62    uint16_t start_ip;
63 };
64 
65 struct mme_builder;
66 
67 #include "mme_tu104_builder.h"
68 #include "mme_fermi_builder.h"
69 
70 #define MME_CLS_FERMI 0x9000
71 #define MME_CLS_TURING 0xc500
72 
73 struct mme_builder {
74    const struct nv_device_info *devinfo;
75    struct mme_reg_alloc reg_alloc;
76    union {
77       struct mme_tu104_builder tu104;
78       struct mme_fermi_builder fermi;
79    };
80 };
81 
82 static inline void
mme_builder_init(struct mme_builder * b,const struct nv_device_info * dev)83 mme_builder_init(struct mme_builder *b, const struct nv_device_info *dev)
84 {
85    memset(b, 0, sizeof(*b));
86    b->devinfo = dev;
87 
88    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
89       mme_tu104_builder_init(b);
90    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
91       mme_fermi_builder_init(b);
92    else
93       unreachable("Unsupported GPU class");
94 }
95 
96 static inline uint32_t *
mme_builder_finish(struct mme_builder * b,size_t * size_out)97 mme_builder_finish(struct mme_builder *b, size_t *size_out)
98 {
99    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
100       return mme_tu104_builder_finish(&b->tu104, size_out);
101    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
102       return mme_fermi_builder_finish(&b->fermi, size_out);
103    else
104       unreachable("Unsupported GPU class");
105 }
106 
107 static inline void
mme_builder_dump(struct mme_builder * b,FILE * fp)108 mme_builder_dump(struct mme_builder *b, FILE *fp)
109 {
110    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
111       mme_tu104_builder_dump(b, fp);
112    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
113       mme_fermi_builder_dump(b, fp);
114    else
115       unreachable("Unsupported GPU class");
116 }
117 
118 static inline struct mme_value
mme_alloc_reg(struct mme_builder * b)119 mme_alloc_reg(struct mme_builder *b)
120 {
121    return mme_reg_alloc_alloc(&b->reg_alloc);
122 }
123 
124 static inline void
mme_realloc_reg(struct mme_builder * b,struct mme_value value)125 mme_realloc_reg(struct mme_builder *b, struct mme_value value)
126 {
127    return mme_reg_alloc_realloc(&b->reg_alloc, value);
128 }
129 
130 static inline void
mme_free_reg(struct mme_builder * b,struct mme_value val)131 mme_free_reg(struct mme_builder *b, struct mme_value val)
132 {
133    mme_reg_alloc_free(&b->reg_alloc, val);
134 }
135 
136 static inline struct mme_value64
mme_alloc_reg64(struct mme_builder * b)137 mme_alloc_reg64(struct mme_builder *b)
138 {
139    struct mme_value lo = mme_alloc_reg(b);
140    struct mme_value hi = mme_alloc_reg(b);
141    return mme_value64(lo, hi);
142 }
143 
144 static inline void
mme_free_reg64(struct mme_builder * b,struct mme_value64 val)145 mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
146 {
147    mme_reg_alloc_free(&b->reg_alloc, val.lo);
148    mme_reg_alloc_free(&b->reg_alloc, val.hi);
149 }
150 
151 static inline void
mme_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)152 mme_alu_to(struct mme_builder *b,
153            struct mme_value dst,
154            enum mme_alu_op op,
155            struct mme_value x,
156            struct mme_value y)
157 {
158    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
159       mme_tu104_alu_to(b, dst, op, x, y);
160    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
161       mme_fermi_alu_to(b, dst, op, x, y);
162    else
163       unreachable("Unsupported GPU class");
164 }
165 
166 static inline struct mme_value
mme_alu(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)167 mme_alu(struct mme_builder *b,
168         enum mme_alu_op op,
169         struct mme_value x,
170         struct mme_value y)
171 {
172    struct mme_value dst = mme_alloc_reg(b);
173    mme_alu_to(b, dst, op, x, y);
174    return dst;
175 }
176 
177 static inline void
mme_alu_no_dst(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)178 mme_alu_no_dst(struct mme_builder *b,
179                enum mme_alu_op op,
180                struct mme_value x,
181                struct mme_value y)
182 {
183    mme_alu_to(b, mme_zero(), op, x, y);
184 }
185 
186 static inline void
mme_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)187 mme_alu64_to(struct mme_builder *b,
188              struct mme_value64 dst,
189              enum mme_alu_op op_lo,
190              enum mme_alu_op op_hi,
191              struct mme_value64 x,
192              struct mme_value64 y)
193 {
194    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
195       mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y);
196    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
197       mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y);
198    else
199       unreachable("Unsupported GPU class");
200 }
201 
202 static inline struct mme_value64
mme_alu64(struct mme_builder * b,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)203 mme_alu64(struct mme_builder *b,
204           enum mme_alu_op op_lo, enum mme_alu_op op_hi,
205           struct mme_value64 x, struct mme_value64 y)
206 {
207    struct mme_value64 dst = {
208       mme_alloc_reg(b),
209       mme_alloc_reg(b),
210    };
211    mme_alu64_to(b, dst, op_lo, op_hi, x, y);
212    return dst;
213 }
214 
215 #define MME_DEF_ALU1(op, OP)                                \
216 static inline void                                          \
217 mme_##op##_to(struct mme_builder *b, struct mme_value dst,  \
218               struct mme_value x)                           \
219 {                                                           \
220    mme_alu_to(b, dst, MME_ALU_OP_##OP, x, mme_zero());      \
221 }                                                           \
222                                                             \
223 static inline struct mme_value                              \
224 mme_##op(struct mme_builder *b,                             \
225          struct mme_value x)                                \
226 {                                                           \
227    return mme_alu(b, MME_ALU_OP_##OP, x, mme_zero());       \
228 }
229 
230 #define MME_DEF_ALU2(op, OP)                                \
231 static inline void                                          \
232 mme_##op##_to(struct mme_builder *b, struct mme_value dst,  \
233               struct mme_value x, struct mme_value y)       \
234 {                                                           \
235    mme_alu_to(b, dst, MME_ALU_OP_##OP, x, y);               \
236 }                                                           \
237                                                             \
238 static inline struct mme_value                              \
239 mme_##op(struct mme_builder *b,                             \
240          struct mme_value x, struct mme_value y)            \
241 {                                                           \
242    return mme_alu(b, MME_ALU_OP_##OP, x, y);                \
243 }
244 
245 MME_DEF_ALU1(mov,    ADD);
246 MME_DEF_ALU2(add,    ADD);
247 MME_DEF_ALU2(sub,    SUB);
248 MME_DEF_ALU2(mul,    MUL);
249 MME_DEF_ALU1(clz,    CLZ);
250 MME_DEF_ALU2(sll,    SLL);
251 MME_DEF_ALU2(srl,    SRL);
252 MME_DEF_ALU2(sra,    SRA);
253 MME_DEF_ALU1(not,    NOT);
254 MME_DEF_ALU2(and,    AND);
255 MME_DEF_ALU2(and_not,AND_NOT);
256 MME_DEF_ALU2(nand,   NAND);
257 MME_DEF_ALU2(or,     OR);
258 MME_DEF_ALU2(xor,    XOR);
259 MME_DEF_ALU2(slt,    SLT);
260 MME_DEF_ALU2(sltu,   SLTU);
261 MME_DEF_ALU2(sle,    SLE);
262 MME_DEF_ALU2(sleu,   SLEU);
263 MME_DEF_ALU2(seq,    SEQ);
264 MME_DEF_ALU1(dread,  DREAD);
265 
266 #undef MME_DEF_ALU1
267 #undef MME_DEF_ALU2
268 
269 static inline void
mme_mov64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x)270 mme_mov64_to(struct mme_builder *b, struct mme_value64 dst,
271              struct mme_value64 x)
272 {
273    mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
274 }
275 
276 static inline struct mme_value64
mme_mov64(struct mme_builder * b,struct mme_value64 x)277 mme_mov64(struct mme_builder *b, struct mme_value64 x)
278 {
279    return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
280 }
281 
282 static inline void
mme_add64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)283 mme_add64_to(struct mme_builder *b, struct mme_value64 dst,
284              struct mme_value64 x, struct mme_value64 y)
285 {
286    mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
287 }
288 
289 static inline struct mme_value64
mme_add64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)290 mme_add64(struct mme_builder *b,
291           struct mme_value64 x, struct mme_value64 y)
292 {
293    return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
294 }
295 
296 static inline void
mme_sub64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)297 mme_sub64_to(struct mme_builder *b, struct mme_value64 dst,
298              struct mme_value64 x, struct mme_value64 y)
299 {
300    mme_alu64_to(b, dst, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
301 }
302 
303 static inline struct mme_value64
mme_sub64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)304 mme_sub64(struct mme_builder *b,
305           struct mme_value64 x, struct mme_value64 y)
306 {
307    return mme_alu64(b, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
308 }
309 
310 static inline struct mme_value
mme_mul_32x32_32_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value y)311 mme_mul_32x32_32_free_srcs(struct mme_builder *b,
312                            struct mme_value x, struct mme_value y)
313 {
314    assert(x.type == MME_VALUE_TYPE_REG);
315    assert(y.type == MME_VALUE_TYPE_REG);
316    if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
317       struct mme_value dst = mme_mul(b, x, y);
318       mme_free_reg(b, x);
319       mme_free_reg(b, y);
320       return dst;
321    } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
322       struct mme_value dst = mme_alloc_reg(b);
323       mme_fermi_umul_32x32_32_to_free_srcs(b, dst, x, y);
324       return dst;
325    } else {
326       unreachable("Unsupported GPU class");
327    }
328 }
329 
330 static inline void
mme_imul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)331 mme_imul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
332                      struct mme_value x, struct mme_value y)
333 {
334    mme_alu64_to(b, dst, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
335                 mme_value64(x, mme_zero()),
336                 mme_value64(y, mme_zero()));
337 }
338 
339 static inline struct mme_value64
mme_imul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)340 mme_imul_32x32_64(struct mme_builder *b,
341                   struct mme_value x, struct mme_value y)
342 {
343    return mme_alu64(b, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
344                     mme_value64(x, mme_zero()),
345                     mme_value64(y, mme_zero()));
346 }
347 
348 static inline void
mme_umul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)349 mme_umul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
350                      struct mme_value x, struct mme_value y)
351 {
352    assert(b->devinfo->cls_eng3d >= MME_CLS_TURING);
353    mme_alu64_to(b, dst, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
354                 mme_value64(x, mme_zero()),
355                 mme_value64(y, mme_zero()));
356 }
357 
358 static inline struct mme_value64
mme_umul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)359 mme_umul_32x32_64(struct mme_builder *b,
360                   struct mme_value x, struct mme_value y)
361 {
362    assert(b->devinfo->cls_eng3d >= MME_CLS_TURING);
363    return mme_alu64(b, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
364                     mme_value64(x, mme_zero()),
365                     mme_value64(y, mme_zero()));
366 }
367 
368 static inline struct mme_value64
mme_umul_32x32_64_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value y)369 mme_umul_32x32_64_free_srcs(struct mme_builder *b,
370                             struct mme_value x, struct mme_value y)
371 {
372    assert(x.type == MME_VALUE_TYPE_REG);
373    assert(y.type == MME_VALUE_TYPE_REG);
374    if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
375       struct mme_value64 dst = mme_umul_32x32_64(b, x, y);
376       mme_free_reg(b, x);
377       mme_free_reg(b, y);
378       return dst;
379    } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
380       struct mme_value y_hi = mme_mov(b, mme_zero());
381       struct mme_value64 dst = mme_alloc_reg64(b);
382       mme_fermi_umul_32x64_64_to_free_srcs(b, dst, x, mme_value64(y, y_hi));
383       return dst;
384    } else {
385       unreachable("Unsupported GPU class");
386    }
387 }
388 
389 static inline struct mme_value64
mme_umul_32x64_64_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value64 y)390 mme_umul_32x64_64_free_srcs(struct mme_builder *b,
391                             struct mme_value x, struct mme_value64 y)
392 {
393    assert(x.type == MME_VALUE_TYPE_REG);
394    assert(y.lo.type == MME_VALUE_TYPE_REG);
395    assert(y.hi.type == MME_VALUE_TYPE_REG);
396    if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
397       struct mme_value64 dst = mme_umul_32x32_64(b, x, y.lo);
398       struct mme_value tmp = mme_mul(b, x, y.hi);
399       mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
400       mme_free_reg(b, x);
401       mme_free_reg64(b, y);
402       return dst;
403    } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
404       struct mme_value64 dst = mme_alloc_reg64(b);
405       mme_fermi_umul_32x64_64_to_free_srcs(b, dst, x, y);
406       return dst;
407    } else {
408       unreachable("Unsupported GPU class");
409    }
410 }
411 
412 static inline struct mme_value64
mme_mul64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)413 mme_mul64(struct mme_builder *b,
414           struct mme_value64 x, struct mme_value64 y)
415 {
416    if (mme_is_zero(x.hi) && mme_is_zero(y.hi))
417       return mme_umul_32x32_64(b, x.lo, y.lo);
418 
419    struct mme_value64 dst = mme_umul_32x32_64(b, x.lo, y.lo);
420    struct mme_value tmp = mme_alloc_reg(b);
421 
422    mme_mul_to(b, tmp, x.lo, y.hi);
423    mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
424 
425    mme_mul_to(b, tmp, x.hi, y.lo);
426    mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
427 
428    mme_free_reg(b, tmp);
429 
430    return dst;
431 }
432 
433 static inline void
mme_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)434 mme_bfe_to(struct mme_builder *b, struct mme_value dst,
435            struct mme_value x, struct mme_value pos, uint8_t bits)
436 {
437    if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
438       mme_srl_to(b, dst, x, pos);
439       mme_and_to(b, dst, dst, mme_imm(BITFIELD_MASK(bits)));
440    } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
441       mme_fermi_bfe_to(b, dst, x, pos, bits);
442    } else {
443       unreachable("Unsupported GPU class");
444    }
445 }
446 
447 static inline struct mme_value
mme_bfe(struct mme_builder * b,struct mme_value x,struct mme_value pos,uint8_t bits)448 mme_bfe(struct mme_builder *b,
449         struct mme_value x, struct mme_value pos, uint8_t bits)
450 {
451    struct mme_value dst = mme_alloc_reg(b);
452    mme_bfe_to(b, dst, x, pos, bits);
453    return dst;
454 }
455 
456 static inline void
mme_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)457 mme_merge_to(struct mme_builder *b, struct mme_value dst,
458              struct mme_value x, struct mme_value y,
459              uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
460 {
461    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
462       mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
463   else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
464       mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
465    else
466       unreachable("Unsupported GPU class");
467 }
468 
469 static inline struct mme_value
mme_merge(struct mme_builder * b,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)470 mme_merge(struct mme_builder *b,
471           struct mme_value x, struct mme_value y,
472           uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
473 {
474    struct mme_value dst = mme_alloc_reg(b);
475    mme_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
476    return dst;
477 }
478 
479 #define mme_set_field(b, x, FIELD, val) \
480    mme_merge_to(b, x, x, val, DRF_LO(FIELD), DRF_BITS(FIELD), 0)
481 
482 #define mme_set_field_enum(b, x, FIELD, ENUM) \
483    mme_set_field(b, x, FIELD, mme_imm(FIELD##_##ENUM)) \
484 
485 static inline void
mme_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)486 mme_state_arr_to(struct mme_builder *b, struct mme_value dst,
487                  uint16_t state, struct mme_value index)
488 {
489    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
490       mme_tu104_state_arr_to(b, dst, state, index);
491    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
492       mme_fermi_state_arr_to(b, dst, state, index);
493    else
494       unreachable("Unsupported GPU class");
495 }
496 
497 static inline void
mme_state_to(struct mme_builder * b,struct mme_value dst,uint16_t state)498 mme_state_to(struct mme_builder *b, struct mme_value dst,
499              uint16_t state)
500 {
501    mme_state_arr_to(b, dst, state, mme_zero());
502 }
503 
504 static inline struct mme_value
mme_state_arr(struct mme_builder * b,uint16_t state,struct mme_value index)505 mme_state_arr(struct mme_builder *b,
506               uint16_t state, struct mme_value index)
507 {
508    struct mme_value dst = mme_alloc_reg(b);
509    mme_state_arr_to(b, dst, state, index);
510    return dst;
511 }
512 
513 static inline struct mme_value
mme_state(struct mme_builder * b,uint16_t state)514 mme_state(struct mme_builder *b,
515           uint16_t state)
516 {
517    struct mme_value dst = mme_alloc_reg(b);
518    mme_state_to(b, dst, state);
519    return dst;
520 }
521 
522 static inline void
mme_dwrite(struct mme_builder * b,struct mme_value idx,struct mme_value val)523 mme_dwrite(struct mme_builder *b,
524            struct mme_value idx, struct mme_value val)
525 {
526    mme_alu_no_dst(b, MME_ALU_OP_DWRITE, idx, val);
527 }
528 
529 static inline void
mme_load_to(struct mme_builder * b,struct mme_value dst)530 mme_load_to(struct mme_builder *b, struct mme_value dst)
531 {
532    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
533       mme_tu104_load_to(b, dst);
534    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
535       mme_fermi_load_to(b, dst);
536    else
537       unreachable("Unsupported GPU class");
538 }
539 
540 static inline struct mme_value
mme_tu104_load(struct mme_builder * b)541 mme_tu104_load(struct mme_builder *b)
542 {
543    struct mme_value dst = mme_alloc_reg(b);
544    mme_tu104_load_to(b, dst);
545    return dst;
546 }
547 
548 static inline struct mme_value
mme_load(struct mme_builder * b)549 mme_load(struct mme_builder *b)
550 {
551    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
552       return mme_tu104_load(b);
553    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
554       return mme_fermi_load(b);
555    else
556       unreachable("Unsupported GPU class");
557 }
558 
559 static inline struct mme_value64
mme_load_addr64(struct mme_builder * b)560 mme_load_addr64(struct mme_builder *b)
561 {
562    struct mme_value hi = mme_load(b);
563    struct mme_value lo = mme_load(b);
564    return mme_value64(lo, hi);
565 }
566 
567 static inline void
mme_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)568 mme_mthd_arr(struct mme_builder *b, uint16_t mthd,
569              struct mme_value index)
570 {
571    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
572       mme_tu104_mthd(b, mthd, index);
573    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
574       mme_fermi_mthd_arr(b, mthd, index);
575    else
576       unreachable("Unsupported GPU class");
577 }
578 
579 static inline void
mme_mthd(struct mme_builder * b,uint16_t mthd)580 mme_mthd(struct mme_builder *b, uint16_t mthd)
581 {
582    mme_mthd_arr(b, mthd, mme_zero());
583 }
584 
585 static inline void
mme_emit(struct mme_builder * b,struct mme_value data)586 mme_emit(struct mme_builder *b,
587          struct mme_value data)
588 {
589    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
590       mme_tu104_emit(b, data);
591    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
592       mme_fermi_emit(b, data);
593    else
594       unreachable("Unsupported GPU class");
595 }
596 
597 static inline void
mme_emit_addr64(struct mme_builder * b,struct mme_value64 addr)598 mme_emit_addr64(struct mme_builder *b, struct mme_value64 addr)
599 {
600    mme_emit(b, addr.hi);
601    mme_emit(b, addr.lo);
602 }
603 
604 static inline void
mme_tu104_read_fifoed(struct mme_builder * b,struct mme_value64 addr,struct mme_value count)605 mme_tu104_read_fifoed(struct mme_builder *b,
606                       struct mme_value64 addr,
607                       struct mme_value count)
608 {
609    mme_mthd(b, 0x0550 /* NVC597_SET_MME_MEM_ADDRESS_A */);
610    mme_emit_addr64(b, addr);
611 
612    mme_mthd(b, 0x0560 /* NVC597_MME_DMA_READ_FIFOED */);
613    mme_emit(b, count);
614 
615    mme_tu104_load_barrier(b);
616 }
617 
618 static inline void
mme_start_loop(struct mme_builder * b,struct mme_value count)619 mme_start_loop(struct mme_builder *b, struct mme_value count)
620 {
621    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
622       mme_tu104_start_loop(b, count);
623    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
624       mme_fermi_start_loop(b, count);
625    else
626       unreachable("Unsupported GPU class");
627 }
628 
629 static inline void
mme_end_loop(struct mme_builder * b)630 mme_end_loop(struct mme_builder *b)
631 {
632    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
633       mme_tu104_end_loop(b);
634    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
635       mme_fermi_end_loop(b);
636    else
637       unreachable("Unsupported GPU class");
638 }
639 
640 #define mme_loop(b, count) \
641    for (bool run = (mme_start_loop((b), count), true); run; \
642         run = false, mme_end_loop(b))
643 
644 #define MME_DEF_START_IF(op, OP, if_true)                         \
645 static inline void                                                \
646 mme_start_if_##op(struct mme_builder *b,                          \
647                   struct mme_value x, struct mme_value y)         \
648 {                                                                 \
649    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)                   \
650       mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y);      \
651    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)               \
652       mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y);      \
653    else                                                           \
654       unreachable("Unsupported GPU class");                       \
655 }
656 
MME_DEF_START_IF(ilt,LT,true)657 MME_DEF_START_IF(ilt,   LT,  true)
658 MME_DEF_START_IF(ult,   LTU, true)
659 MME_DEF_START_IF(ile,   LE,  true)
660 MME_DEF_START_IF(ule,   LEU, true)
661 MME_DEF_START_IF(ieq,   EQ,  true)
662 MME_DEF_START_IF(ige,   LT,  false)
663 MME_DEF_START_IF(uge,   LTU, false)
664 MME_DEF_START_IF(igt,   LE,  false)
665 MME_DEF_START_IF(ugt,   LEU, false)
666 MME_DEF_START_IF(ine,   EQ,  false)
667 
668 #undef MME_DEF_START_IF
669 
670 static inline void
671 mme_end_if(struct mme_builder *b)
672 {
673    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
674       mme_tu104_end_if(b);
675    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
676       mme_fermi_end_if(b);
677    else
678       unreachable("Unsupported GPU class");
679 }
680 
681 #define mme_if(b, cmp, x, y) \
682    for (bool run = (mme_start_if_##cmp((b), x, y), true); run; \
683         run = false, mme_end_if(b))
684 
685 static inline void
mme_start_while(struct mme_builder * b)686 mme_start_while(struct mme_builder *b)
687 {
688    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
689       mme_tu104_start_while(b);
690    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
691       mme_fermi_start_while(b);
692    else
693       unreachable("Unsupported GPU class");
694 }
695 
696 #define MME_DEF_END_WHILE(op, OP, if_true)                        \
697 static inline void                                                \
698 mme_end_while_##op(struct mme_builder *b,                         \
699                    struct mme_value x, struct mme_value y)        \
700 {                                                                 \
701    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)                   \
702       mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y);     \
703    else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)               \
704       mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y);     \
705    else                                                           \
706       unreachable("Unsupported GPU class");                       \
707 }
708 
MME_DEF_END_WHILE(ilt,LT,true)709 MME_DEF_END_WHILE(ilt,   LT,  true)
710 MME_DEF_END_WHILE(ult,   LTU, true)
711 MME_DEF_END_WHILE(ile,   LE,  true)
712 MME_DEF_END_WHILE(ule,   LEU, true)
713 MME_DEF_END_WHILE(ieq,   EQ,  true)
714 MME_DEF_END_WHILE(ige,   LT,  false)
715 MME_DEF_END_WHILE(uge,   LTU, false)
716 MME_DEF_END_WHILE(igt,   LE,  false)
717 MME_DEF_END_WHILE(ugt,   LEU, false)
718 MME_DEF_END_WHILE(ine,   EQ,  false)
719 
720 #define mme_while(b, cmp, x, y) \
721    for (bool run = (mme_start_while(b), true); run; \
722         run = false, mme_end_while_##cmp((b), x, y))
723 
724 #define MME_DEF_EXIT(op, OP, if_true)                             \
725 static inline void                                                \
726 mme_exit_if_##op(struct mme_builder *b,                           \
727                  struct mme_value x, struct mme_value y)          \
728 {                                                                 \
729    if (b->devinfo->cls_eng3d >= MME_CLS_TURING)                   \
730       mme_tu104_exit_if(b, MME_CMP_OP_##OP, if_true, x, y);       \
731    else                                                           \
732       unreachable("Unsupported GPU class");                       \
733 }
734 
735 MME_DEF_EXIT(ilt,   LT,  true)
736 MME_DEF_EXIT(ult,   LTU, true)
737 MME_DEF_EXIT(ile,   LE,  true)
738 MME_DEF_EXIT(ule,   LEU, true)
739 MME_DEF_EXIT(ieq,   EQ,  true)
740 MME_DEF_EXIT(ige,   LT,  false)
741 MME_DEF_EXIT(uge,   LTU, false)
742 MME_DEF_EXIT(igt,   LE,  false)
743 MME_DEF_EXIT(ugt,   LEU, false)
744 MME_DEF_EXIT(ine,   EQ,  false)
745 
746 #undef MME_DEF_EXIT
747 
748 #define mme_exit_if(b, cmp, x, y) \
749    mme_exit_if_##cmp(b, x, y)
750 
751 static inline void
752 mme_exit(struct mme_builder *b)
753 {
754    mme_exit_if_ieq(b, mme_zero(), mme_zero());
755 }
756 
757 #ifdef __cplusplus
758 }
759 #endif
760 
761 #endif /* MME_BUILDER_H */
762 
763