1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
qpu_reg(int index)29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
qpu_magic(enum v3d_qpu_waddr waddr)39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 struct v3d_qpu_instr
v3d_qpu_nop(void)49 v3d_qpu_nop(void)
50 {
51 struct v3d_qpu_instr instr = {
52 .type = V3D_QPU_INSTR_TYPE_ALU,
53 .alu = {
54 .add = {
55 .op = V3D_QPU_A_NOP,
56 .waddr = V3D_QPU_WADDR_NOP,
57 .magic_write = true,
58 },
59 .mul = {
60 .op = V3D_QPU_M_NOP,
61 .waddr = V3D_QPU_WADDR_NOP,
62 .magic_write = true,
63 },
64 }
65 };
66
67 return instr;
68 }
69
70 static struct qinst *
vir_nop(void)71 vir_nop(void)
72 {
73 struct qreg undef = vir_nop_reg();
74 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
75
76 return qinst;
77 }
78
79 static struct qinst *
new_qpu_nop_before(struct qinst * inst)80 new_qpu_nop_before(struct qinst *inst)
81 {
82 struct qinst *q = vir_nop();
83
84 list_addtail(&q->link, &inst->link);
85
86 return q;
87 }
88
89 static void
v3d71_set_src(struct v3d_qpu_instr * instr,uint8_t * raddr,struct qpu_reg src)90 v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
91 {
92 /* If we have a small immediate move it from inst->raddr_b to the
93 * corresponding raddr.
94 */
95 if (src.smimm) {
96 assert(instr->sig.small_imm_a || instr->sig.small_imm_b ||
97 instr->sig.small_imm_c || instr->sig.small_imm_d);
98 *raddr = instr->raddr_b;
99 return;
100 }
101
102 assert(!src.magic);
103 *raddr = src.index;
104 }
105
106 /**
107 * Allocates the src register (accumulator or register file) into the RADDR
108 * fields of the instruction.
109 */
110 static void
v3d42_set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,struct qpu_reg src)111 v3d42_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
112 {
113 if (src.smimm) {
114 assert(instr->sig.small_imm_b);
115 *mux = V3D_QPU_MUX_B;
116 return;
117 }
118
119 if (src.magic) {
120 assert(src.index >= V3D_QPU_WADDR_R0 &&
121 src.index <= V3D_QPU_WADDR_R5);
122 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
123 return;
124 }
125
126 if (instr->alu.add.a.mux != V3D_QPU_MUX_A &&
127 instr->alu.add.b.mux != V3D_QPU_MUX_A &&
128 instr->alu.mul.a.mux != V3D_QPU_MUX_A &&
129 instr->alu.mul.b.mux != V3D_QPU_MUX_A) {
130 instr->raddr_a = src.index;
131 *mux = V3D_QPU_MUX_A;
132 } else {
133 if (instr->raddr_a == src.index) {
134 *mux = V3D_QPU_MUX_A;
135 } else {
136 assert(!(instr->alu.add.a.mux == V3D_QPU_MUX_B &&
137 instr->alu.add.b.mux == V3D_QPU_MUX_B &&
138 instr->alu.mul.a.mux == V3D_QPU_MUX_B &&
139 instr->alu.mul.b.mux == V3D_QPU_MUX_B) ||
140 src.index == instr->raddr_b);
141
142 instr->raddr_b = src.index;
143 *mux = V3D_QPU_MUX_B;
144 }
145 }
146 }
147
148 /*
149 * The main purpose of the following wrapper is to make calling set_src
150 * cleaner. This is the reason it receives both mux and raddr pointers. Those
151 * will be filled or not based on the device version.
152 */
153 static void
set_src(struct v3d_qpu_instr * instr,enum v3d_qpu_mux * mux,uint8_t * raddr,struct qpu_reg src,const struct v3d_device_info * devinfo)154 set_src(struct v3d_qpu_instr *instr,
155 enum v3d_qpu_mux *mux,
156 uint8_t *raddr,
157 struct qpu_reg src,
158 const struct v3d_device_info *devinfo)
159 {
160 if (devinfo->ver < 71)
161 return v3d42_set_src(instr, mux, src);
162 else
163 return v3d71_set_src(instr, raddr, src);
164 }
165
166 static bool
v3d42_mov_src_and_dst_equal(struct qinst * qinst)167 v3d42_mov_src_and_dst_equal(struct qinst *qinst)
168 {
169 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
170 if (qinst->qpu.alu.mul.magic_write) {
171 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
172 return false;
173
174 if (qinst->qpu.alu.mul.a.mux !=
175 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
176 return false;
177 }
178 } else {
179 int raddr;
180
181 switch (qinst->qpu.alu.mul.a.mux) {
182 case V3D_QPU_MUX_A:
183 raddr = qinst->qpu.raddr_a;
184 break;
185 case V3D_QPU_MUX_B:
186 raddr = qinst->qpu.raddr_b;
187 break;
188 default:
189 return false;
190 }
191 if (raddr != waddr)
192 return false;
193 }
194
195 return true;
196 }
197
198 static bool
v3d71_mov_src_and_dst_equal(struct qinst * qinst)199 v3d71_mov_src_and_dst_equal(struct qinst *qinst)
200 {
201 if (qinst->qpu.alu.mul.magic_write)
202 return false;
203
204 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
205 int raddr;
206
207 raddr = qinst->qpu.alu.mul.a.raddr;
208 if (raddr != waddr)
209 return false;
210
211 return true;
212 }
213
214 static bool
mov_src_and_dst_equal(struct qinst * qinst,const struct v3d_device_info * devinfo)215 mov_src_and_dst_equal(struct qinst *qinst,
216 const struct v3d_device_info *devinfo)
217 {
218 if (devinfo->ver < 71)
219 return v3d42_mov_src_and_dst_equal(qinst);
220 else
221 return v3d71_mov_src_and_dst_equal(qinst);
222 }
223
224
225 static bool
is_no_op_mov(struct qinst * qinst,const struct v3d_device_info * devinfo)226 is_no_op_mov(struct qinst *qinst,
227 const struct v3d_device_info *devinfo)
228 {
229 static const struct v3d_qpu_sig no_sig = {0};
230
231 /* Make sure it's just a lone MOV. We only check for M_MOV. Although
232 * for V3D 7.x there is also A_MOV, we don't need to check for it as
233 * we always emit using M_MOV. We could use A_MOV later on the
234 * squedule to improve performance
235 */
236 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
237 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
238 qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
239 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
240 return false;
241 }
242
243 if (!mov_src_and_dst_equal(qinst, devinfo))
244 return false;
245
246 /* No packing or flags updates, or we need to execute the
247 * instruction.
248 */
249 if (qinst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
250 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
251 qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
252 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
253 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
254 return false;
255 }
256
257 return true;
258 }
259
260 static void
v3d_generate_code_block(struct v3d_compile * c,struct qblock * block,struct qpu_reg * temp_registers)261 v3d_generate_code_block(struct v3d_compile *c,
262 struct qblock *block,
263 struct qpu_reg *temp_registers)
264 {
265 vir_for_each_inst_safe(qinst, block) {
266 #if 0
267 fprintf(stderr, "translating qinst to qpu: ");
268 vir_dump_inst(c, qinst);
269 fprintf(stderr, "\n");
270 #endif
271
272 if (vir_has_uniform(qinst))
273 c->num_uniforms++;
274
275 int nsrc = vir_get_nsrc(qinst);
276 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
277 for (int i = 0; i < nsrc; i++) {
278 int index = qinst->src[i].index;
279 switch (qinst->src[i].file) {
280 case QFILE_REG:
281 src[i] = qpu_reg(qinst->src[i].index);
282 break;
283 case QFILE_MAGIC:
284 src[i] = qpu_magic(qinst->src[i].index);
285 break;
286 case QFILE_NULL:
287 /* QFILE_NULL is an undef, so we can load
288 * anything. Using a reg that doesn't have
289 * sched. restrictions.
290 */
291 src[i] = qpu_reg(5);
292 break;
293 case QFILE_LOAD_IMM:
294 assert(!"not reached");
295 break;
296 case QFILE_TEMP:
297 src[i] = temp_registers[index];
298 break;
299 case QFILE_SMALL_IMM:
300 src[i].smimm = true;
301 break;
302 }
303 }
304
305 struct qpu_reg dst;
306 switch (qinst->dst.file) {
307 case QFILE_NULL:
308 dst = qpu_magic(V3D_QPU_WADDR_NOP);
309 break;
310
311 case QFILE_REG:
312 dst = qpu_reg(qinst->dst.index);
313 break;
314
315 case QFILE_MAGIC:
316 dst = qpu_magic(qinst->dst.index);
317 break;
318
319 case QFILE_TEMP:
320 dst = temp_registers[qinst->dst.index];
321 break;
322
323 case QFILE_SMALL_IMM:
324 case QFILE_LOAD_IMM:
325 assert(!"not reached");
326 break;
327 }
328
329 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
330 if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {
331 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
332 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
333
334 bool use_rf;
335 if (c->devinfo->has_accumulators) {
336 use_rf = !dst.magic ||
337 dst.index != V3D_QPU_WADDR_R5;
338 } else {
339 use_rf = dst.magic || dst.index != 0;
340 }
341
342 if (use_rf) {
343 if (qinst->qpu.sig.ldunif) {
344 qinst->qpu.sig.ldunif = false;
345 qinst->qpu.sig.ldunifrf = true;
346 } else {
347 qinst->qpu.sig.ldunifa = false;
348 qinst->qpu.sig.ldunifarf = true;
349 }
350 qinst->qpu.sig_addr = dst.index;
351 qinst->qpu.sig_magic = dst.magic;
352 }
353 } else if (v3d_qpu_sig_writes_address(c->devinfo,
354 &qinst->qpu.sig)) {
355 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
356 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
357
358 qinst->qpu.sig_addr = dst.index;
359 qinst->qpu.sig_magic = dst.magic;
360 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
361 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
362
363 if (nsrc >= 1) {
364 set_src(&qinst->qpu,
365 &qinst->qpu.alu.add.a.mux,
366 &qinst->qpu.alu.add.a.raddr,
367 src[0], c->devinfo);
368 }
369 if (nsrc >= 2) {
370 set_src(&qinst->qpu,
371 &qinst->qpu.alu.add.b.mux,
372 &qinst->qpu.alu.add.b.raddr,
373 src[1], c->devinfo);
374 }
375
376 qinst->qpu.alu.add.waddr = dst.index;
377 qinst->qpu.alu.add.magic_write = dst.magic;
378 } else {
379 if (nsrc >= 1) {
380 set_src(&qinst->qpu,
381 &qinst->qpu.alu.mul.a.mux,
382 &qinst->qpu.alu.mul.a.raddr,
383 src[0], c->devinfo);
384 }
385 if (nsrc >= 2) {
386 set_src(&qinst->qpu,
387 &qinst->qpu.alu.mul.b.mux,
388 &qinst->qpu.alu.mul.b.raddr,
389 src[1], c->devinfo);
390 }
391
392 qinst->qpu.alu.mul.waddr = dst.index;
393 qinst->qpu.alu.mul.magic_write = dst.magic;
394
395 if (is_no_op_mov(qinst, c->devinfo)) {
396 vir_remove_instruction(c, qinst);
397 continue;
398 }
399 }
400 } else {
401 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
402 }
403 }
404 }
405
406 static bool
reads_uniform(const struct v3d_device_info * devinfo,uint64_t instruction)407 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
408 {
409 struct v3d_qpu_instr qpu;
410 ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
411 assert(ok);
412
413 if (qpu.sig.ldunif ||
414 qpu.sig.ldunifrf ||
415 qpu.sig.ldtlbu ||
416 qpu.sig.wrtmuc) {
417 return true;
418 }
419
420 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
421 return true;
422
423 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
424 if (qpu.alu.add.magic_write &&
425 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
426 return true;
427 }
428
429 if (qpu.alu.mul.magic_write &&
430 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
431 return true;
432 }
433 }
434
435 return false;
436 }
437
438 static void
v3d_dump_qpu(struct v3d_compile * c)439 v3d_dump_qpu(struct v3d_compile *c)
440 {
441 fprintf(stderr, "%s prog %d/%d QPU:\n",
442 vir_get_stage_name(c),
443 c->program_id, c->variant_id);
444
445 int next_uniform = 0;
446 for (int i = 0; i < c->qpu_inst_count; i++) {
447 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
448 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
449
450 if (reads_uniform(c->devinfo, c->qpu_insts[i])) {
451 fprintf(stderr, " (");
452 vir_dump_uniform(c->uniform_contents[next_uniform],
453 c->uniform_data[next_uniform]);
454 fprintf(stderr, ")");
455 next_uniform++;
456 }
457 fprintf(stderr, "\n");
458 ralloc_free((void *)str);
459 }
460
461 /* Make sure our dumping lined up. */
462 assert(next_uniform == c->num_uniforms);
463
464 fprintf(stderr, "\n");
465 }
466
467 void
v3d_vir_to_qpu(struct v3d_compile * c,struct qpu_reg * temp_registers)468 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
469 {
470 /* Reset the uniform count to how many will be actually loaded by the
471 * generated QPU code.
472 */
473 c->num_uniforms = 0;
474
475 vir_for_each_block(block, c)
476 v3d_generate_code_block(c, block, temp_registers);
477
478 v3d_qpu_schedule_instructions(c);
479
480 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
481 int i = 0;
482 vir_for_each_inst_inorder(inst, c) {
483 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
484 &c->qpu_insts[i++]);
485 if (!ok) {
486 fprintf(stderr, "Failed to pack instruction %d:\n", i);
487 vir_dump_inst(c, inst);
488 fprintf(stderr, "\n");
489 c->compilation_result = V3D_COMPILATION_FAILED;
490 return;
491 }
492
493 if (v3d_qpu_is_nop(&inst->qpu))
494 c->nop_count++;
495 }
496 assert(i == c->qpu_inst_count);
497
498 if (V3D_DBG(QPU) ||
499 v3d_debug_flag_for_shader_stage(c->s->info.stage)) {
500 v3d_dump_qpu(c);
501 }
502
503 qpu_validate(c);
504
505 free(temp_registers);
506 }
507