xref: /aosp_15_r20/external/mesa3d/src/broadcom/compiler/vir_to_qpu.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27 
28 static inline struct qpu_reg
qpu_reg(int index)29 qpu_reg(int index)
30 {
31         struct qpu_reg reg = {
32                 .magic = false,
33                 .index = index,
34         };
35         return reg;
36 }
37 
38 static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41         struct qpu_reg reg = {
42                 .magic = true,
43                 .index = waddr,
44         };
45         return reg;
46 }
47 
48 struct v3d_qpu_instr
v3d_qpu_nop(void)49 v3d_qpu_nop(void)
50 {
51         struct v3d_qpu_instr instr = {
52                 .type = V3D_QPU_INSTR_TYPE_ALU,
53                 .alu = {
54                         .add = {
55                                 .op = V3D_QPU_A_NOP,
56                                 .waddr = V3D_QPU_WADDR_NOP,
57                                 .magic_write = true,
58                         },
59                         .mul = {
60                                 .op = V3D_QPU_M_NOP,
61                                 .waddr = V3D_QPU_WADDR_NOP,
62                                 .magic_write = true,
63                         },
64                 }
65         };
66 
67         return instr;
68 }
69 
70 static struct qinst *
vir_nop(void)71 vir_nop(void)
72 {
73         struct qreg undef = vir_nop_reg();
74         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
75 
76         return qinst;
77 }
78 
79 static struct qinst *
new_qpu_nop_before(struct qinst * inst)80 new_qpu_nop_before(struct qinst *inst)
81 {
82         struct qinst *q = vir_nop();
83 
84         list_addtail(&q->link, &inst->link);
85 
86         return q;
87 }
88 
89 static void
v3d71_set_src(struct v3d_qpu_instr * instr,uint8_t * raddr,struct qpu_reg src)90 v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
91 {
92         /* If we have a small immediate move it from inst->raddr_b to the
93          * corresponding raddr.
94          */
95         if (src.smimm) {
96                 assert(instr->sig.small_imm_a || instr->sig.small_imm_b ||
97                        instr->sig.small_imm_c || instr->sig.small_imm_d);
98                 *raddr = instr->raddr_b;
99                 return;
100         }
101 
102         assert(!src.magic);
103         *raddr = src.index;
104 }
105 
106 /**
107  * Allocates the src register (accumulator or register file) into the RADDR
108  * fields of the instruction.
109  */
110 static void
v3d42_set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,struct qpu_reg src)111 v3d42_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
112 {
113         if (src.smimm) {
114                 assert(instr->sig.small_imm_b);
115                 *mux = V3D_QPU_MUX_B;
116                 return;
117         }
118 
119         if (src.magic) {
120                 assert(src.index >= V3D_QPU_WADDR_R0 &&
121                        src.index <= V3D_QPU_WADDR_R5);
122                 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
123                 return;
124         }
125 
126         if (instr->alu.add.a.mux != V3D_QPU_MUX_A &&
127             instr->alu.add.b.mux != V3D_QPU_MUX_A &&
128             instr->alu.mul.a.mux != V3D_QPU_MUX_A &&
129             instr->alu.mul.b.mux != V3D_QPU_MUX_A) {
130                 instr->raddr_a = src.index;
131                 *mux = V3D_QPU_MUX_A;
132         } else {
133                 if (instr->raddr_a == src.index) {
134                         *mux = V3D_QPU_MUX_A;
135                 } else {
136                         assert(!(instr->alu.add.a.mux == V3D_QPU_MUX_B &&
137                                  instr->alu.add.b.mux == V3D_QPU_MUX_B &&
138                                  instr->alu.mul.a.mux == V3D_QPU_MUX_B &&
139                                  instr->alu.mul.b.mux == V3D_QPU_MUX_B) ||
140                                src.index == instr->raddr_b);
141 
142                         instr->raddr_b = src.index;
143                         *mux = V3D_QPU_MUX_B;
144                 }
145         }
146 }
147 
148 /*
149  * The main purpose of the following wrapper is to make calling set_src
150  * cleaner. This is the reason it receives both mux and raddr pointers. Those
151  * will be filled or not based on the device version.
152  */
153 static void
set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,uint8_t * raddr,struct qpu_reg src,const struct v3d_device_info * devinfo)154 set_src(struct v3d_qpu_instr *instr,
155         enum v3d_qpu_mux *mux,
156         uint8_t *raddr,
157         struct qpu_reg src,
158         const struct v3d_device_info *devinfo)
159 {
160         if (devinfo->ver < 71)
161                 return v3d42_set_src(instr, mux, src);
162         else
163                 return v3d71_set_src(instr, raddr, src);
164 }
165 
166 static bool
v3d42_mov_src_and_dst_equal(struct qinst * qinst)167 v3d42_mov_src_and_dst_equal(struct qinst *qinst)
168 {
169         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
170         if (qinst->qpu.alu.mul.magic_write) {
171                 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
172                         return false;
173 
174                 if (qinst->qpu.alu.mul.a.mux !=
175                     V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
176                         return false;
177                 }
178         } else {
179                 int raddr;
180 
181                 switch (qinst->qpu.alu.mul.a.mux) {
182                 case V3D_QPU_MUX_A:
183                         raddr = qinst->qpu.raddr_a;
184                         break;
185                 case V3D_QPU_MUX_B:
186                         raddr = qinst->qpu.raddr_b;
187                         break;
188                 default:
189                         return false;
190                 }
191                 if (raddr != waddr)
192                         return false;
193         }
194 
195         return true;
196 }
197 
198 static bool
v3d71_mov_src_and_dst_equal(struct qinst * qinst)199 v3d71_mov_src_and_dst_equal(struct qinst *qinst)
200 {
201         if (qinst->qpu.alu.mul.magic_write)
202                 return false;
203 
204         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
205         int raddr;
206 
207         raddr = qinst->qpu.alu.mul.a.raddr;
208         if (raddr != waddr)
209                 return false;
210 
211         return true;
212 }
213 
214 static bool
mov_src_and_dst_equal(struct qinst * qinst,const struct v3d_device_info * devinfo)215 mov_src_and_dst_equal(struct qinst *qinst,
216                       const struct v3d_device_info *devinfo)
217 {
218         if (devinfo->ver < 71)
219                 return v3d42_mov_src_and_dst_equal(qinst);
220         else
221                 return v3d71_mov_src_and_dst_equal(qinst);
222 }
223 
224 
225 static bool
is_no_op_mov(struct qinst * qinst,const struct v3d_device_info * devinfo)226 is_no_op_mov(struct qinst *qinst,
227              const struct v3d_device_info *devinfo)
228 {
229         static const struct v3d_qpu_sig no_sig = {0};
230 
231         /* Make sure it's just a lone MOV. We only check for M_MOV. Although
232          * for V3D 7.x there is also A_MOV, we don't need to check for it as
233          * we always emit using M_MOV. We could use A_MOV later on the
234          * squedule to improve performance
235          */
236         if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
237             qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
238             qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
239             memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
240                 return false;
241         }
242 
243         if (!mov_src_and_dst_equal(qinst, devinfo))
244                 return false;
245 
246         /* No packing or flags updates, or we need to execute the
247          * instruction.
248          */
249         if (qinst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
250             qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
251             qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
252             qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
253             qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
254                 return false;
255         }
256 
257         return true;
258 }
259 
260 static void
v3d_generate_code_block(struct v3d_compile * c,struct qblock * block,struct qpu_reg * temp_registers)261 v3d_generate_code_block(struct v3d_compile *c,
262                         struct qblock *block,
263                         struct qpu_reg *temp_registers)
264 {
265         vir_for_each_inst_safe(qinst, block) {
266 #if 0
267                 fprintf(stderr, "translating qinst to qpu: ");
268                 vir_dump_inst(c, qinst);
269                 fprintf(stderr, "\n");
270 #endif
271 
272                 if (vir_has_uniform(qinst))
273                         c->num_uniforms++;
274 
275                 int nsrc = vir_get_nsrc(qinst);
276                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
277                 for (int i = 0; i < nsrc; i++) {
278                         int index = qinst->src[i].index;
279                         switch (qinst->src[i].file) {
280                         case QFILE_REG:
281                                 src[i] = qpu_reg(qinst->src[i].index);
282                                 break;
283                         case QFILE_MAGIC:
284                                 src[i] = qpu_magic(qinst->src[i].index);
285                                 break;
286                         case QFILE_NULL:
287                                 /* QFILE_NULL is an undef, so we can load
288                                  * anything. Using a reg that doesn't have
289                                  * sched. restrictions.
290                                  */
291                                 src[i] = qpu_reg(5);
292                                 break;
293                         case QFILE_LOAD_IMM:
294                                 assert(!"not reached");
295                                 break;
296                         case QFILE_TEMP:
297                                 src[i] = temp_registers[index];
298                                 break;
299                         case QFILE_SMALL_IMM:
300                                 src[i].smimm = true;
301                                 break;
302                         }
303                 }
304 
305                 struct qpu_reg dst;
306                 switch (qinst->dst.file) {
307                 case QFILE_NULL:
308                         dst = qpu_magic(V3D_QPU_WADDR_NOP);
309                         break;
310 
311                 case QFILE_REG:
312                         dst = qpu_reg(qinst->dst.index);
313                         break;
314 
315                 case QFILE_MAGIC:
316                         dst = qpu_magic(qinst->dst.index);
317                         break;
318 
319                 case QFILE_TEMP:
320                         dst = temp_registers[qinst->dst.index];
321                         break;
322 
323                 case QFILE_SMALL_IMM:
324                 case QFILE_LOAD_IMM:
325                         assert(!"not reached");
326                         break;
327                 }
328 
329                 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
330                         if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {
331                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
332                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
333 
334                                 bool use_rf;
335                                 if (c->devinfo->has_accumulators) {
336                                         use_rf = !dst.magic ||
337                                                  dst.index != V3D_QPU_WADDR_R5;
338                                 } else {
339                                         use_rf = dst.magic || dst.index != 0;
340                                 }
341 
342                                 if (use_rf) {
343                                         if (qinst->qpu.sig.ldunif) {
344                                            qinst->qpu.sig.ldunif = false;
345                                            qinst->qpu.sig.ldunifrf = true;
346                                         } else {
347                                            qinst->qpu.sig.ldunifa = false;
348                                            qinst->qpu.sig.ldunifarf = true;
349                                         }
350                                         qinst->qpu.sig_addr = dst.index;
351                                         qinst->qpu.sig_magic = dst.magic;
352                                 }
353                         } else if (v3d_qpu_sig_writes_address(c->devinfo,
354                                                        &qinst->qpu.sig)) {
355                                 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
356                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
357 
358                                 qinst->qpu.sig_addr = dst.index;
359                                 qinst->qpu.sig_magic = dst.magic;
360                         } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
361                                 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
362 
363                                 if (nsrc >= 1) {
364                                         set_src(&qinst->qpu,
365                                                 &qinst->qpu.alu.add.a.mux,
366                                                 &qinst->qpu.alu.add.a.raddr,
367                                                 src[0], c->devinfo);
368                                 }
369                                 if (nsrc >= 2) {
370                                         set_src(&qinst->qpu,
371                                                 &qinst->qpu.alu.add.b.mux,
372                                                 &qinst->qpu.alu.add.b.raddr,
373                                                 src[1], c->devinfo);
374                                 }
375 
376                                 qinst->qpu.alu.add.waddr = dst.index;
377                                 qinst->qpu.alu.add.magic_write = dst.magic;
378                         } else {
379                                 if (nsrc >= 1) {
380                                         set_src(&qinst->qpu,
381                                                 &qinst->qpu.alu.mul.a.mux,
382                                                 &qinst->qpu.alu.mul.a.raddr,
383                                                 src[0], c->devinfo);
384                                 }
385                                 if (nsrc >= 2) {
386                                         set_src(&qinst->qpu,
387                                                 &qinst->qpu.alu.mul.b.mux,
388                                                 &qinst->qpu.alu.mul.b.raddr,
389                                                 src[1], c->devinfo);
390                                 }
391 
392                                 qinst->qpu.alu.mul.waddr = dst.index;
393                                 qinst->qpu.alu.mul.magic_write = dst.magic;
394 
395                                 if (is_no_op_mov(qinst, c->devinfo)) {
396                                         vir_remove_instruction(c, qinst);
397                                         continue;
398                                 }
399                         }
400                 } else {
401                         assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
402                 }
403         }
404 }
405 
406 static bool
reads_uniform(const struct v3d_device_info * devinfo,uint64_t instruction)407 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
408 {
409         struct v3d_qpu_instr qpu;
410         ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
411         assert(ok);
412 
413         if (qpu.sig.ldunif ||
414             qpu.sig.ldunifrf ||
415             qpu.sig.ldtlbu ||
416             qpu.sig.wrtmuc) {
417                 return true;
418         }
419 
420         if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
421                 return true;
422 
423         if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
424                 if (qpu.alu.add.magic_write &&
425                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
426                         return true;
427                 }
428 
429                 if (qpu.alu.mul.magic_write &&
430                     v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
431                         return true;
432                 }
433         }
434 
435         return false;
436 }
437 
438 static void
v3d_dump_qpu(struct v3d_compile * c)439 v3d_dump_qpu(struct v3d_compile *c)
440 {
441         fprintf(stderr, "%s prog %d/%d QPU:\n",
442                 vir_get_stage_name(c),
443                 c->program_id, c->variant_id);
444 
445         int next_uniform = 0;
446         for (int i = 0; i < c->qpu_inst_count; i++) {
447                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
448                 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
449 
450                 if (reads_uniform(c->devinfo, c->qpu_insts[i])) {
451                         fprintf(stderr, " (");
452                         vir_dump_uniform(c->uniform_contents[next_uniform],
453                                          c->uniform_data[next_uniform]);
454                         fprintf(stderr, ")");
455                         next_uniform++;
456                 }
457                 fprintf(stderr, "\n");
458                 ralloc_free((void *)str);
459         }
460 
461         /* Make sure our dumping lined up. */
462         assert(next_uniform == c->num_uniforms);
463 
464         fprintf(stderr, "\n");
465 }
466 
467 void
v3d_vir_to_qpu(struct v3d_compile * c,struct qpu_reg * temp_registers)468 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
469 {
470         /* Reset the uniform count to how many will be actually loaded by the
471          * generated QPU code.
472          */
473         c->num_uniforms = 0;
474 
475         vir_for_each_block(block, c)
476                 v3d_generate_code_block(c, block, temp_registers);
477 
478         v3d_qpu_schedule_instructions(c);
479 
480         c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
481         int i = 0;
482         vir_for_each_inst_inorder(inst, c) {
483                 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
484                                              &c->qpu_insts[i++]);
485                 if (!ok) {
486                         fprintf(stderr, "Failed to pack instruction %d:\n", i);
487                         vir_dump_inst(c, inst);
488                         fprintf(stderr, "\n");
489                         c->compilation_result = V3D_COMPILATION_FAILED;
490                         return;
491                 }
492 
493                 if (v3d_qpu_is_nop(&inst->qpu))
494                         c->nop_count++;
495         }
496         assert(i == c->qpu_inst_count);
497 
498         if (V3D_DBG(QPU) ||
499             v3d_debug_flag_for_shader_stage(c->s->info.stage)) {
500                 v3d_dump_qpu(c);
501         }
502 
503         qpu_validate(c);
504 
505         free(temp_registers);
506 }
507