xref: /aosp_15_r20/external/mesa3d/src/broadcom/compiler/qpu_validate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file
26  *
27  * Validates the QPU instruction sequence after register allocation and
28  * scheduling.
29  */
30 
31 #include <assert.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
36 
37 struct v3d_qpu_validate_state {
38         struct v3d_compile *c;
39         const struct v3d_qpu_instr *last;
40         int ip;
41         int last_sfu_write;
42         int last_branch_ip;
43         int last_thrsw_ip;
44         int first_tlb_z_write;
45 
46         /* Set when we've found the last-THRSW signal, or if we were started
47          * in single-segment mode.
48          */
49         bool last_thrsw_found;
50 
51         /* Set when we've found the THRSW after the last THRSW */
52         bool thrend_found;
53 
54         int thrsw_count;
55 
56         bool rtop_hazard;
57         bool rtop_valid;
58 };
59 
60 static void
fail_instr(struct v3d_qpu_validate_state * state,const char * msg)61 fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
62 {
63         struct v3d_compile *c = state->c;
64 
65         fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
66 
67         int dump_ip = 0;
68         vir_for_each_inst_inorder(inst, c) {
69                 v3d_qpu_dump(c->devinfo, &inst->qpu);
70 
71                 if (dump_ip++ == state->ip)
72                         fprintf(stderr, " *** ERROR ***");
73 
74                 fprintf(stderr, "\n");
75         }
76 
77         fprintf(stderr, "\n");
78         abort();
79 }
80 
81 static bool
in_branch_delay_slots(struct v3d_qpu_validate_state * state)82 in_branch_delay_slots(struct v3d_qpu_validate_state *state)
83 {
84         return (state->ip - state->last_branch_ip) < 3;
85 }
86 
87 static bool
in_thrsw_delay_slots(struct v3d_qpu_validate_state * state)88 in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
89 {
90         return (state->ip - state->last_thrsw_ip) < 3;
91 }
92 
93 static bool
qpu_magic_waddr_matches(const struct v3d_qpu_instr * inst,bool (* predicate)(enum v3d_qpu_waddr waddr))94 qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
95                         bool (*predicate)(enum v3d_qpu_waddr waddr))
96 {
97         if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
98                 return false;
99 
100         if (inst->alu.add.op != V3D_QPU_A_NOP &&
101             inst->alu.add.magic_write &&
102             predicate(inst->alu.add.waddr))
103                 return true;
104 
105         if (inst->alu.mul.op != V3D_QPU_M_NOP &&
106             inst->alu.mul.magic_write &&
107             predicate(inst->alu.mul.waddr))
108                 return true;
109 
110         return false;
111 }
112 
113 static void
qpu_validate_inst(struct v3d_qpu_validate_state * state,struct qinst * qinst)114 qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
115 {
116         const struct v3d_device_info *devinfo = state->c->devinfo;
117 
118         if (qinst->is_tlb_z_write && state->ip < state->first_tlb_z_write)
119                 state->first_tlb_z_write = state->ip;
120 
121         const struct v3d_qpu_instr *inst = &qinst->qpu;
122 
123         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
124             state->first_tlb_z_write >= 0 &&
125             state->ip > state->first_tlb_z_write &&
126             inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
127             inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
128             inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
129             inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
130                 fail_instr(state, "Implicit branch MSF read after TLB Z write");
131         }
132 
133         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
134                 return;
135 
136         if (inst->alu.mul.op == V3D_QPU_M_MULTOP)
137             state->rtop_valid = true;
138 
139         if (inst->alu.mul.op == V3D_QPU_M_UMUL24) {
140             if (state->rtop_hazard)
141                 fail_instr(state, "UMUL24 reads rtop from MULTOP but it got cleared by a previous THRSW");
142             state->rtop_valid = false;
143             state->rtop_hazard = false;
144         }
145 
146         if (inst->alu.add.op == V3D_QPU_A_SETMSF &&
147             state->first_tlb_z_write >= 0 &&
148             state->ip > state->first_tlb_z_write) {
149                 fail_instr(state, "SETMSF after TLB Z write");
150         }
151 
152         if (state->first_tlb_z_write >= 0 &&
153             state->ip > state->first_tlb_z_write &&
154             inst->alu.add.op == V3D_QPU_A_MSF) {
155                 fail_instr(state, "MSF read after TLB Z write");
156         }
157 
158         if (devinfo->ver < 71) {
159                 if (inst->sig.small_imm_a || inst->sig.small_imm_c ||
160                     inst->sig.small_imm_d) {
161                         fail_instr(state, "small imm a/c/d added after V3D 7.1");
162                 }
163         } else {
164                 if ((inst->sig.small_imm_a || inst->sig.small_imm_b) &&
165                     !vir_is_add(qinst)) {
166                         fail_instr(state, "small imm a/b used but no ADD inst");
167                 }
168                 if ((inst->sig.small_imm_c || inst->sig.small_imm_d) &&
169                     !vir_is_mul(qinst)) {
170                         fail_instr(state, "small imm c/d used but no MUL inst");
171                 }
172                 if (inst->sig.small_imm_a + inst->sig.small_imm_b +
173                     inst->sig.small_imm_c + inst->sig.small_imm_d > 1) {
174                         fail_instr(state, "only one small immediate can be "
175                                    "enabled per instruction");
176                 }
177         }
178 
179         /* LDVARY writes r5 two instructions later and LDUNIF writes
180          * r5 one instruction later, which is illegal to have
181          * together.
182          */
183         if (state->last && state->last->sig.ldvary &&
184             (inst->sig.ldunif || inst->sig.ldunifa)) {
185                 fail_instr(state, "LDUNIF after a LDVARY");
186         }
187 
188         /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
189          *
190          * FIXME: This would not check correctly for V3D 4.2 versions lower
191          * than V3D 4.2.14, but that is not a real issue because the simulator
192          * will still catch this, and we are not really targeting any such
193          * versions anyway.
194          */
195         if (state->c->devinfo->ver < 42) {
196                 bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
197                                                           state->last->sig.ldunifrf));
198                 bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
199                                                            state->last->sig.ldunifarf));
200                 bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
201                 bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
202                 if ((last_reads_ldunif && reads_ldunifa) ||
203                     (last_reads_ldunifa && reads_ldunif)) {
204                         fail_instr(state,
205                                    "LDUNIF and LDUNIFA can't be next to each other");
206                 }
207         }
208 
209         int tmu_writes = 0;
210         int sfu_writes = 0;
211         int vpm_writes = 0;
212         int tlb_writes = 0;
213         int tsy_writes = 0;
214 
215         if (inst->alu.add.op != V3D_QPU_A_NOP) {
216                 if (inst->alu.add.magic_write) {
217                         if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
218                                                        inst->alu.add.waddr)) {
219                                 tmu_writes++;
220                         }
221                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
222                                 sfu_writes++;
223                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
224                                 vpm_writes++;
225                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
226                                 tlb_writes++;
227                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
228                                 tsy_writes++;
229                 }
230         }
231 
232         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
233                 if (inst->alu.mul.magic_write) {
234                         if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
235                                                        inst->alu.mul.waddr)) {
236                                 tmu_writes++;
237                         }
238                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
239                                 sfu_writes++;
240                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
241                                 vpm_writes++;
242                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
243                                 tlb_writes++;
244                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
245                                 tsy_writes++;
246                 }
247         }
248 
249         if (in_thrsw_delay_slots(state)) {
250                 /* There's no way you want to start SFU during the THRSW delay
251                  * slots, since the result would land in the other thread.
252                  */
253                 if (sfu_writes) {
254                         fail_instr(state,
255                                    "SFU write started during THRSW delay slots ");
256                 }
257 
258                 if (inst->sig.ldvary) {
259                         if (devinfo->ver == 42)
260                                 fail_instr(state, "LDVARY during THRSW delay slots");
261                         if (devinfo->ver >= 71 &&
262                             state->ip - state->last_thrsw_ip == 2) {
263                                 fail_instr(state, "LDVARY in 2nd THRSW delay slot");
264                         }
265                 }
266         }
267 
268         (void)qpu_magic_waddr_matches; /* XXX */
269 
270         /* SFU r4 results come back two instructions later.  No doing
271          * r4 read/writes or other SFU lookups until it's done.
272          */
273         if (state->ip - state->last_sfu_write < 2) {
274                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
275                         fail_instr(state, "R4 read too soon after SFU");
276 
277                 if (v3d_qpu_writes_r4(devinfo, inst))
278                         fail_instr(state, "R4 write too soon after SFU");
279 
280                 if (sfu_writes)
281                         fail_instr(state, "SFU write too soon after SFU");
282         }
283 
284         /* XXX: The docs say VPM can happen with the others, but the simulator
285          * disagrees.
286          */
287         if (tmu_writes +
288             sfu_writes +
289             vpm_writes +
290             tlb_writes +
291             tsy_writes +
292             (devinfo->ver == 42 ? inst->sig.ldtmu : 0) +
293             inst->sig.ldtlb +
294             inst->sig.ldvpm +
295             inst->sig.ldtlbu > 1) {
296                 fail_instr(state,
297                            "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
298         }
299 
300         if (sfu_writes)
301                 state->last_sfu_write = state->ip;
302 
303         if (inst->sig.thrsw) {
304                 if (in_branch_delay_slots(state))
305                         fail_instr(state, "THRSW in a branch delay slot.");
306 
307                 if (state->last_thrsw_found)
308                         state->thrend_found = true;
309 
310                 if (state->last_thrsw_ip == state->ip - 1) {
311                         /* If it's the second THRSW in a row, then it's just a
312                          * last-thrsw signal.
313                          */
314                         if (state->last_thrsw_found)
315                                 fail_instr(state, "Two last-THRSW signals");
316                         state->last_thrsw_found = true;
317                 } else {
318                         if (in_thrsw_delay_slots(state)) {
319                                 fail_instr(state,
320                                            "THRSW too close to another THRSW.");
321                         }
322                         state->thrsw_count++;
323                         state->last_thrsw_ip = state->ip;
324                 }
325         }
326 
327         if (state->thrend_found &&
328             state->last_thrsw_ip - state->ip <= 2 &&
329             inst->type == V3D_QPU_INSTR_TYPE_ALU) {
330                 if ((inst->alu.add.op != V3D_QPU_A_NOP &&
331                      !inst->alu.add.magic_write)) {
332                         if (devinfo->ver == 42) {
333                                 fail_instr(state, "RF write after THREND");
334                         } else if (devinfo->ver >= 71) {
335                                 if (state->last_thrsw_ip - state->ip == 0) {
336                                         fail_instr(state,
337                                                    "ADD RF write at THREND");
338                                 }
339                                 if (inst->alu.add.waddr == 2 ||
340                                     inst->alu.add.waddr == 3) {
341                                         fail_instr(state,
342                                                    "RF2-3 write after THREND");
343                                 }
344                         }
345                 }
346 
347                 if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
348                      !inst->alu.mul.magic_write)) {
349                         if (devinfo->ver == 42) {
350                                 fail_instr(state, "RF write after THREND");
351                         } else if (devinfo->ver >= 71) {
352                                 if (state->last_thrsw_ip - state->ip == 0) {
353                                         fail_instr(state,
354                                                    "MUL RF write at THREND");
355                                 }
356 
357                                 if (inst->alu.mul.waddr == 2 ||
358                                     inst->alu.mul.waddr == 3) {
359                                         fail_instr(state,
360                                                    "RF2-3 write after THREND");
361                                 }
362                         }
363                 }
364 
365                 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
366                     !inst->sig_magic) {
367                         if (devinfo->ver == 42) {
368                                 fail_instr(state, "RF write after THREND");
369                         } else if (devinfo->ver >= 71 &&
370                                    (inst->sig_addr == 2 ||
371                                     inst->sig_addr == 3)) {
372                                 fail_instr(state, "RF2-3 write after THREND");
373                         }
374                 }
375 
376                 /* GFXH-1625: No TMUWT in the last instruction */
377                 if (state->last_thrsw_ip - state->ip == 2 &&
378                     inst->alu.add.op == V3D_QPU_A_TMUWT)
379                         fail_instr(state, "TMUWT in last instruction");
380         }
381 
382         if (state->rtop_valid && state->ip == state->last_thrsw_ip + 2) {
383                 state->rtop_hazard = true;
384                 state->rtop_valid = false;
385         }
386 
387         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
388                 if (in_branch_delay_slots(state))
389                         fail_instr(state, "branch in a branch delay slot.");
390                 if (in_thrsw_delay_slots(state))
391                         fail_instr(state, "branch in a THRSW delay slot.");
392                 state->last_branch_ip = state->ip;
393         }
394 }
395 
396 static void
qpu_validate_block(struct v3d_qpu_validate_state * state,struct qblock * block)397 qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
398 {
399         vir_for_each_inst(qinst, block) {
400                 qpu_validate_inst(state, qinst);
401 
402                 state->last = &qinst->qpu;
403                 state->ip++;
404         }
405 }
406 
407 /**
408  * Checks for the instruction restrictions from page 37 ("Summary of
409  * Instruction Restrictions").
410  */
411 void
qpu_validate(struct v3d_compile * c)412 qpu_validate(struct v3d_compile *c)
413 {
414         /* We don't want to do validation in release builds, but we want to
415          * keep compiling the validation code to make sure it doesn't get
416          * broken.
417          */
418 #if !MESA_DEBUG
419         return;
420 #endif
421 
422         struct v3d_qpu_validate_state state = {
423                 .c = c,
424                 .last_sfu_write = -10,
425                 .last_thrsw_ip = -10,
426                 .last_branch_ip = -10,
427                 .first_tlb_z_write = INT_MAX,
428                 .ip = 0,
429 
430                 .last_thrsw_found = !c->last_thrsw,
431                 .rtop_hazard = false,
432                 .rtop_valid = false,
433         };
434 
435         vir_for_each_block(block, c) {
436                 qpu_validate_block(&state, block);
437         }
438 
439         if (state.thrsw_count > 1 && !state.last_thrsw_found) {
440                 fail_instr(&state,
441                            "thread switch found without last-THRSW in program");
442         }
443 
444         if (!state.thrend_found)
445                 fail_instr(&state, "No program-end THRSW found");
446 }
447