xref: /aosp_15_r20/external/mesa3d/src/broadcom/qpu/qpu_instr.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdlib.h>
25 #include <string.h>
26 #include "util/macros.h"
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29 
30 const char *
v3d_qpu_magic_waddr_name(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)31 v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32                          enum v3d_qpu_waddr waddr)
33 {
34         /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35         if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36                 return "tmu";
37 
38         /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
39          */
40         if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
41                 return "quad";
42 
43         if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
44                 return "rep";
45 
46         static const char *waddr_magic[] = {
47                 [V3D_QPU_WADDR_R0] = "r0",
48                 [V3D_QPU_WADDR_R1] = "r1",
49                 [V3D_QPU_WADDR_R2] = "r2",
50                 [V3D_QPU_WADDR_R3] = "r3",
51                 [V3D_QPU_WADDR_R4] = "r4",
52                 [V3D_QPU_WADDR_R5] = "r5",
53                 [V3D_QPU_WADDR_NOP] = "-",
54                 [V3D_QPU_WADDR_TLB] = "tlb",
55                 [V3D_QPU_WADDR_TLBU] = "tlbu",
56                 [V3D_QPU_WADDR_UNIFA] = "unifa",
57                 [V3D_QPU_WADDR_TMUL] = "tmul",
58                 [V3D_QPU_WADDR_TMUD] = "tmud",
59                 [V3D_QPU_WADDR_TMUA] = "tmua",
60                 [V3D_QPU_WADDR_TMUAU] = "tmuau",
61                 [V3D_QPU_WADDR_VPM] = "vpm",
62                 [V3D_QPU_WADDR_VPMU] = "vpmu",
63                 [V3D_QPU_WADDR_SYNC] = "sync",
64                 [V3D_QPU_WADDR_SYNCU] = "syncu",
65                 [V3D_QPU_WADDR_SYNCB] = "syncb",
66                 [V3D_QPU_WADDR_RECIP] = "recip",
67                 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
68                 [V3D_QPU_WADDR_EXP] = "exp",
69                 [V3D_QPU_WADDR_LOG] = "log",
70                 [V3D_QPU_WADDR_SIN] = "sin",
71                 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
72                 [V3D_QPU_WADDR_TMUC] = "tmuc",
73                 [V3D_QPU_WADDR_TMUS] = "tmus",
74                 [V3D_QPU_WADDR_TMUT] = "tmut",
75                 [V3D_QPU_WADDR_TMUR] = "tmur",
76                 [V3D_QPU_WADDR_TMUI] = "tmui",
77                 [V3D_QPU_WADDR_TMUB] = "tmub",
78                 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
79                 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
80                 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
81                 [V3D_QPU_WADDR_TMUSF] = "tmusf",
82                 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
83                 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
84                 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
85                 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
86                 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
87                 [V3D_QPU_WADDR_R5REP] = "r5rep",
88         };
89 
90         return waddr_magic[waddr];
91 }
92 
93 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)94 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
95 {
96         static const char *op_names[] = {
97                 [V3D_QPU_A_FADD] = "fadd",
98                 [V3D_QPU_A_FADDNF] = "faddnf",
99                 [V3D_QPU_A_VFPACK] = "vfpack",
100                 [V3D_QPU_A_ADD] = "add",
101                 [V3D_QPU_A_SUB] = "sub",
102                 [V3D_QPU_A_FSUB] = "fsub",
103                 [V3D_QPU_A_MIN] = "min",
104                 [V3D_QPU_A_MAX] = "max",
105                 [V3D_QPU_A_UMIN] = "umin",
106                 [V3D_QPU_A_UMAX] = "umax",
107                 [V3D_QPU_A_SHL] = "shl",
108                 [V3D_QPU_A_SHR] = "shr",
109                 [V3D_QPU_A_ASR] = "asr",
110                 [V3D_QPU_A_ROR] = "ror",
111                 [V3D_QPU_A_FMIN] = "fmin",
112                 [V3D_QPU_A_FMAX] = "fmax",
113                 [V3D_QPU_A_VFMIN] = "vfmin",
114                 [V3D_QPU_A_AND] = "and",
115                 [V3D_QPU_A_OR] = "or",
116                 [V3D_QPU_A_XOR] = "xor",
117                 [V3D_QPU_A_VADD] = "vadd",
118                 [V3D_QPU_A_VSUB] = "vsub",
119                 [V3D_QPU_A_NOT] = "not",
120                 [V3D_QPU_A_NEG] = "neg",
121                 [V3D_QPU_A_FLAPUSH] = "flapush",
122                 [V3D_QPU_A_FLBPUSH] = "flbpush",
123                 [V3D_QPU_A_FLPOP] = "flpop",
124                 [V3D_QPU_A_RECIP] = "recip",
125                 [V3D_QPU_A_SETMSF] = "setmsf",
126                 [V3D_QPU_A_SETREVF] = "setrevf",
127                 [V3D_QPU_A_NOP] = "nop",
128                 [V3D_QPU_A_TIDX] = "tidx",
129                 [V3D_QPU_A_EIDX] = "eidx",
130                 [V3D_QPU_A_LR] = "lr",
131                 [V3D_QPU_A_VFLA] = "vfla",
132                 [V3D_QPU_A_VFLNA] = "vflna",
133                 [V3D_QPU_A_VFLB] = "vflb",
134                 [V3D_QPU_A_VFLNB] = "vflnb",
135                 [V3D_QPU_A_FXCD] = "fxcd",
136                 [V3D_QPU_A_XCD] = "xcd",
137                 [V3D_QPU_A_FYCD] = "fycd",
138                 [V3D_QPU_A_YCD] = "ycd",
139                 [V3D_QPU_A_MSF] = "msf",
140                 [V3D_QPU_A_REVF] = "revf",
141                 [V3D_QPU_A_VDWWT] = "vdwwt",
142                 [V3D_QPU_A_IID] = "iid",
143                 [V3D_QPU_A_SAMPID] = "sampid",
144                 [V3D_QPU_A_BARRIERID] = "barrierid",
145                 [V3D_QPU_A_TMUWT] = "tmuwt",
146                 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
147                 [V3D_QPU_A_VPMWT] = "vpmwt",
148                 [V3D_QPU_A_FLAFIRST] = "flafirst",
149                 [V3D_QPU_A_FLNAFIRST] = "flnafirst",
150                 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
151                 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
152                 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
153                 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
154                 [V3D_QPU_A_LDVPMP] = "ldvpmp",
155                 [V3D_QPU_A_RSQRT] = "rsqrt",
156                 [V3D_QPU_A_EXP] = "exp",
157                 [V3D_QPU_A_LOG] = "log",
158                 [V3D_QPU_A_SIN] = "sin",
159                 [V3D_QPU_A_RSQRT2] = "rsqrt2",
160                 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
161                 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
162                 [V3D_QPU_A_FCMP] = "fcmp",
163                 [V3D_QPU_A_VFMAX] = "vfmax",
164                 [V3D_QPU_A_FROUND] = "fround",
165                 [V3D_QPU_A_FTOIN] = "ftoin",
166                 [V3D_QPU_A_FTRUNC] = "ftrunc",
167                 [V3D_QPU_A_FTOIZ] = "ftoiz",
168                 [V3D_QPU_A_FFLOOR] = "ffloor",
169                 [V3D_QPU_A_FTOUZ] = "ftouz",
170                 [V3D_QPU_A_FCEIL] = "fceil",
171                 [V3D_QPU_A_FTOC] = "ftoc",
172                 [V3D_QPU_A_FDX] = "fdx",
173                 [V3D_QPU_A_FDY] = "fdy",
174                 [V3D_QPU_A_STVPMV] = "stvpmv",
175                 [V3D_QPU_A_STVPMD] = "stvpmd",
176                 [V3D_QPU_A_STVPMP] = "stvpmp",
177                 [V3D_QPU_A_ITOF] = "itof",
178                 [V3D_QPU_A_CLZ] = "clz",
179                 [V3D_QPU_A_UTOF] = "utof",
180                 [V3D_QPU_A_MOV] = "mov",
181                 [V3D_QPU_A_FMOV] = "fmov",
182                 [V3D_QPU_A_VPACK] = "vpack",
183                 [V3D_QPU_A_V8PACK] = "v8pack",
184                 [V3D_QPU_A_V10PACK] = "v10pack",
185                 [V3D_QPU_A_V11FPACK] = "v11fpack",
186                 [V3D_QPU_A_BALLOT] = "ballot",
187                 [V3D_QPU_A_BCASTF] = "bcastf",
188                 [V3D_QPU_A_ALLEQ] = "alleq",
189                 [V3D_QPU_A_ALLFEQ] = "allfeq",
190                 [V3D_QPU_A_ROTQ] = "rotq",
191                 [V3D_QPU_A_ROT] = "rot",
192                 [V3D_QPU_A_SHUFFLE] = "shuffle",
193         };
194 
195         if (op >= ARRAY_SIZE(op_names))
196                 return NULL;
197 
198         return op_names[op];
199 }
200 
201 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)202 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
203 {
204         static const char *op_names[] = {
205                 [V3D_QPU_M_ADD] = "add",
206                 [V3D_QPU_M_SUB] = "sub",
207                 [V3D_QPU_M_UMUL24] = "umul24",
208                 [V3D_QPU_M_VFMUL] = "vfmul",
209                 [V3D_QPU_M_SMUL24] = "smul24",
210                 [V3D_QPU_M_MULTOP] = "multop",
211                 [V3D_QPU_M_FMOV] = "fmov",
212                 [V3D_QPU_M_MOV] = "mov",
213                 [V3D_QPU_M_NOP] = "nop",
214                 [V3D_QPU_M_FMUL] = "fmul",
215                 [V3D_QPU_M_FTOUNORM16] = "ftounorm16",
216                 [V3D_QPU_M_FTOSNORM16] = "ftosnorm16",
217                 [V3D_QPU_M_VFTOUNORM8] = "vftounorm8",
218                 [V3D_QPU_M_VFTOSNORM8] = "vftosnorm8",
219                 [V3D_QPU_M_VFTOUNORM10LO] = "vftounorm10lo",
220                 [V3D_QPU_M_VFTOUNORM10HI] = "vftounorm10hi",
221         };
222 
223         if (op >= ARRAY_SIZE(op_names))
224                 return NULL;
225 
226         return op_names[op];
227 }
228 
229 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)230 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
231 {
232         switch (cond) {
233         case V3D_QPU_COND_NONE:
234                 return "";
235         case V3D_QPU_COND_IFA:
236                 return ".ifa";
237         case V3D_QPU_COND_IFB:
238                 return ".ifb";
239         case V3D_QPU_COND_IFNA:
240                 return ".ifna";
241         case V3D_QPU_COND_IFNB:
242                 return ".ifnb";
243         default:
244                 unreachable("bad cond value");
245         }
246 }
247 
248 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)249 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
250 {
251         switch (cond) {
252         case V3D_QPU_BRANCH_COND_ALWAYS:
253                 return "";
254         case V3D_QPU_BRANCH_COND_A0:
255                 return ".a0";
256         case V3D_QPU_BRANCH_COND_NA0:
257                 return ".na0";
258         case V3D_QPU_BRANCH_COND_ALLA:
259                 return ".alla";
260         case V3D_QPU_BRANCH_COND_ANYNA:
261                 return ".anyna";
262         case V3D_QPU_BRANCH_COND_ANYA:
263                 return ".anya";
264         case V3D_QPU_BRANCH_COND_ALLNA:
265                 return ".allna";
266         default:
267                 unreachable("bad branch cond value");
268         }
269 }
270 
271 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)272 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
273 {
274         switch (msfign) {
275         case V3D_QPU_MSFIGN_NONE:
276                 return "";
277         case V3D_QPU_MSFIGN_P:
278                 return "p";
279         case V3D_QPU_MSFIGN_Q:
280                 return "q";
281         default:
282                 unreachable("bad branch cond value");
283         }
284 }
285 
286 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)287 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
288 {
289         switch (pf) {
290         case V3D_QPU_PF_NONE:
291                 return "";
292         case V3D_QPU_PF_PUSHZ:
293                 return ".pushz";
294         case V3D_QPU_PF_PUSHN:
295                 return ".pushn";
296         case V3D_QPU_PF_PUSHC:
297                 return ".pushc";
298         default:
299                 unreachable("bad pf value");
300         }
301 }
302 
303 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)304 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
305 {
306         switch (uf) {
307         case V3D_QPU_UF_NONE:
308                 return "";
309         case V3D_QPU_UF_ANDZ:
310                 return ".andz";
311         case V3D_QPU_UF_ANDNZ:
312                 return ".andnz";
313         case V3D_QPU_UF_NORZ:
314                 return ".norz";
315         case V3D_QPU_UF_NORNZ:
316                 return ".nornz";
317         case V3D_QPU_UF_ANDN:
318                 return ".andn";
319         case V3D_QPU_UF_ANDNN:
320                 return ".andnn";
321         case V3D_QPU_UF_NORN:
322                 return ".norn";
323         case V3D_QPU_UF_NORNN:
324                 return ".nornn";
325         case V3D_QPU_UF_ANDC:
326                 return ".andc";
327         case V3D_QPU_UF_ANDNC:
328                 return ".andnc";
329         case V3D_QPU_UF_NORC:
330                 return ".norc";
331         case V3D_QPU_UF_NORNC:
332                 return ".nornc";
333         default:
334                 unreachable("bad pf value");
335         }
336 }
337 
338 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)339 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
340 {
341         switch (pack) {
342         case V3D_QPU_PACK_NONE:
343                 return "";
344         case V3D_QPU_PACK_L:
345                 return ".l";
346         case V3D_QPU_PACK_H:
347                 return ".h";
348         default:
349                 unreachable("bad pack value");
350         }
351 }
352 
353 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)354 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
355 {
356         switch (unpack) {
357         case V3D_QPU_UNPACK_NONE:
358                 return "";
359         case V3D_QPU_UNPACK_L:
360                 return ".l";
361         case V3D_QPU_UNPACK_H:
362                 return ".h";
363         case V3D_QPU_UNPACK_ABS:
364                 return ".abs";
365         case V3D_QPU_UNPACK_REPLICATE_32F_16:
366                 return ".ff";
367         case V3D_QPU_UNPACK_REPLICATE_L_16:
368                 return ".ll";
369         case V3D_QPU_UNPACK_REPLICATE_H_16:
370                 return ".hh";
371         case V3D_QPU_UNPACK_SWAP_16:
372                 return ".swp";
373         case V3D71_QPU_UNPACK_SAT:
374                 return ".sat";
375         case V3D71_QPU_UNPACK_NSAT:
376                 return ".nsat";
377         case V3D71_QPU_UNPACK_MAX0:
378                 return ".max0";
379         default:
380                 unreachable("bad unpack value");
381         }
382 }
383 
384 #define D	1
385 #define A	2
386 #define B	4
387 static const uint8_t add_op_args[] = {
388         [V3D_QPU_A_FADD] = D | A | B,
389         [V3D_QPU_A_FADDNF] = D | A | B,
390         [V3D_QPU_A_VFPACK] = D | A | B,
391         [V3D_QPU_A_ADD] = D | A | B,
392         [V3D_QPU_A_VFPACK] = D | A | B,
393         [V3D_QPU_A_SUB] = D | A | B,
394         [V3D_QPU_A_VFPACK] = D | A | B,
395         [V3D_QPU_A_FSUB] = D | A | B,
396         [V3D_QPU_A_MIN] = D | A | B,
397         [V3D_QPU_A_MAX] = D | A | B,
398         [V3D_QPU_A_UMIN] = D | A | B,
399         [V3D_QPU_A_UMAX] = D | A | B,
400         [V3D_QPU_A_SHL] = D | A | B,
401         [V3D_QPU_A_SHR] = D | A | B,
402         [V3D_QPU_A_ASR] = D | A | B,
403         [V3D_QPU_A_ROR] = D | A | B,
404         [V3D_QPU_A_FMIN] = D | A | B,
405         [V3D_QPU_A_FMAX] = D | A | B,
406         [V3D_QPU_A_VFMIN] = D | A | B,
407 
408         [V3D_QPU_A_AND] = D | A | B,
409         [V3D_QPU_A_OR] = D | A | B,
410         [V3D_QPU_A_XOR] = D | A | B,
411 
412         [V3D_QPU_A_VADD] = D | A | B,
413         [V3D_QPU_A_VSUB] = D | A | B,
414         [V3D_QPU_A_NOT] = D | A,
415         [V3D_QPU_A_NEG] = D | A,
416         [V3D_QPU_A_FLAPUSH] = D | A,
417         [V3D_QPU_A_FLBPUSH] = D | A,
418         [V3D_QPU_A_FLPOP] = D | A,
419         [V3D_QPU_A_RECIP] = D | A,
420         [V3D_QPU_A_SETMSF] = D | A,
421         [V3D_QPU_A_SETREVF] = D | A,
422         [V3D_QPU_A_NOP] = 0,
423         [V3D_QPU_A_TIDX] = D,
424         [V3D_QPU_A_EIDX] = D,
425         [V3D_QPU_A_LR] = D,
426         [V3D_QPU_A_VFLA] = D,
427         [V3D_QPU_A_VFLNA] = D,
428         [V3D_QPU_A_VFLB] = D,
429         [V3D_QPU_A_VFLNB] = D,
430 
431         [V3D_QPU_A_FXCD] = D,
432         [V3D_QPU_A_XCD] = D,
433         [V3D_QPU_A_FYCD] = D,
434         [V3D_QPU_A_YCD] = D,
435 
436         [V3D_QPU_A_MSF] = D,
437         [V3D_QPU_A_REVF] = D,
438         [V3D_QPU_A_VDWWT] = D,
439         [V3D_QPU_A_IID] = D,
440         [V3D_QPU_A_SAMPID] = D,
441         [V3D_QPU_A_BARRIERID] = D,
442         [V3D_QPU_A_TMUWT] = D,
443         [V3D_QPU_A_VPMWT] = D,
444         [V3D_QPU_A_FLAFIRST] = D,
445         [V3D_QPU_A_FLNAFIRST] = D,
446 
447         [V3D_QPU_A_VPMSETUP] = D | A,
448 
449         [V3D_QPU_A_LDVPMV_IN] = D | A,
450         [V3D_QPU_A_LDVPMV_OUT] = D | A,
451         [V3D_QPU_A_LDVPMD_IN] = D | A,
452         [V3D_QPU_A_LDVPMD_OUT] = D | A,
453         [V3D_QPU_A_LDVPMP] = D | A,
454         [V3D_QPU_A_RSQRT] = D | A,
455         [V3D_QPU_A_EXP] = D | A,
456         [V3D_QPU_A_LOG] = D | A,
457         [V3D_QPU_A_SIN] = D | A,
458         [V3D_QPU_A_RSQRT2] = D | A,
459         [V3D_QPU_A_LDVPMG_IN] = D | A | B,
460         [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
461 
462         /* FIXME: MOVABSNEG */
463 
464         [V3D_QPU_A_FCMP] = D | A | B,
465         [V3D_QPU_A_VFMAX] = D | A | B,
466 
467         [V3D_QPU_A_FROUND] = D | A,
468         [V3D_QPU_A_FTOIN] = D | A,
469         [V3D_QPU_A_FTRUNC] = D | A,
470         [V3D_QPU_A_FTOIZ] = D | A,
471         [V3D_QPU_A_FFLOOR] = D | A,
472         [V3D_QPU_A_FTOUZ] = D | A,
473         [V3D_QPU_A_FCEIL] = D | A,
474         [V3D_QPU_A_FTOC] = D | A,
475 
476         [V3D_QPU_A_FDX] = D | A,
477         [V3D_QPU_A_FDY] = D | A,
478 
479         [V3D_QPU_A_STVPMV] = A | B,
480         [V3D_QPU_A_STVPMD] = A | B,
481         [V3D_QPU_A_STVPMP] = A | B,
482 
483         [V3D_QPU_A_ITOF] = D | A,
484         [V3D_QPU_A_CLZ] = D | A,
485         [V3D_QPU_A_UTOF] = D | A,
486 
487         [V3D_QPU_A_MOV] = D | A,
488         [V3D_QPU_A_FMOV] = D | A,
489         [V3D_QPU_A_VPACK] = D | A | B,
490         [V3D_QPU_A_V8PACK] = D | A | B,
491         [V3D_QPU_A_V10PACK] = D | A | B,
492         [V3D_QPU_A_V11FPACK] = D | A | B,
493 
494         [V3D_QPU_A_BALLOT] = D | A,
495         [V3D_QPU_A_BCASTF] = D | A,
496         [V3D_QPU_A_ALLEQ] = D | A,
497         [V3D_QPU_A_ALLFEQ] = D | A,
498         [V3D_QPU_A_ROTQ] = D | A | B,
499         [V3D_QPU_A_ROT] = D | A | B,
500         [V3D_QPU_A_SHUFFLE] = D | A | B,
501 };
502 
503 static const uint8_t mul_op_args[] = {
504         [V3D_QPU_M_ADD] = D | A | B,
505         [V3D_QPU_M_SUB] = D | A | B,
506         [V3D_QPU_M_UMUL24] = D | A | B,
507         [V3D_QPU_M_VFMUL] = D | A | B,
508         [V3D_QPU_M_SMUL24] = D | A | B,
509         [V3D_QPU_M_MULTOP] = D | A | B,
510         [V3D_QPU_M_FMOV] = D | A,
511         [V3D_QPU_M_NOP] = 0,
512         [V3D_QPU_M_MOV] = D | A,
513         [V3D_QPU_M_FMUL] = D | A | B,
514         [V3D_QPU_M_FTOUNORM16] = D | A,
515         [V3D_QPU_M_FTOSNORM16] = D | A,
516         [V3D_QPU_M_VFTOUNORM8] = D | A,
517         [V3D_QPU_M_VFTOSNORM8] = D | A,
518         [V3D_QPU_M_VFTOUNORM10LO] = D | A,
519         [V3D_QPU_M_VFTOUNORM10HI] = D | A,
520 };
521 
522 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)523 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
524 {
525         assert(op < ARRAY_SIZE(add_op_args));
526 
527         return add_op_args[op] & D;
528 }
529 
530 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)531 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
532 {
533         assert(op < ARRAY_SIZE(mul_op_args));
534 
535         return mul_op_args[op] & D;
536 }
537 
538 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)539 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
540 {
541         assert(op < ARRAY_SIZE(add_op_args));
542 
543         uint8_t args = add_op_args[op];
544         if (args & B)
545                 return 2;
546         else if (args & A)
547                 return 1;
548         else
549                 return 0;
550 }
551 
552 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)553 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
554 {
555         assert(op < ARRAY_SIZE(mul_op_args));
556 
557         uint8_t args = mul_op_args[op];
558         if (args & B)
559                 return 2;
560         else if (args & A)
561                 return 1;
562         else
563                 return 0;
564 }
565 
566 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)567 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
568 {
569         switch (cond) {
570         case V3D_QPU_COND_IFA:
571                 return V3D_QPU_COND_IFNA;
572         case V3D_QPU_COND_IFNA:
573                 return V3D_QPU_COND_IFA;
574         case V3D_QPU_COND_IFB:
575                 return V3D_QPU_COND_IFNB;
576         case V3D_QPU_COND_IFNB:
577                 return V3D_QPU_COND_IFB;
578         default:
579                 unreachable("Non-invertible cond");
580         }
581 }
582 
583 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)584 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
585 {
586         switch (waddr) {
587         case V3D_QPU_WADDR_RECIP:
588         case V3D_QPU_WADDR_RSQRT:
589         case V3D_QPU_WADDR_EXP:
590         case V3D_QPU_WADDR_LOG:
591         case V3D_QPU_WADDR_SIN:
592         case V3D_QPU_WADDR_RSQRT2:
593                 return true;
594         default:
595                 return false;
596         }
597 }
598 
599 bool
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)600 v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
601                            enum v3d_qpu_waddr waddr)
602 {
603         if (devinfo->ver >= 40) {
604                 return ((waddr >= V3D_QPU_WADDR_TMUD &&
605                          waddr <= V3D_QPU_WADDR_TMUAU) ||
606                        (waddr >= V3D_QPU_WADDR_TMUC &&
607                         waddr <= V3D_QPU_WADDR_TMUHSLOD));
608         } else {
609                 return ((waddr >= V3D_QPU_WADDR_TMU &&
610                          waddr <= V3D_QPU_WADDR_TMUAU) ||
611                        (waddr >= V3D_QPU_WADDR_TMUC &&
612                         waddr <= V3D_QPU_WADDR_TMUHSLOD));
613         }
614 }
615 
616 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)617 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
618 {
619         return (inst->sig.ldtmu ||
620                 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
621                  inst->alu.add.op == V3D_QPU_A_TMUWT));
622 }
623 
624 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)625 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
626 {
627         return (waddr == V3D_QPU_WADDR_TLB ||
628                 waddr == V3D_QPU_WADDR_TLBU);
629 }
630 
631 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)632 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
633 {
634         return (waddr == V3D_QPU_WADDR_VPM ||
635                 waddr == V3D_QPU_WADDR_VPMU);
636 }
637 
638 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)639 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
640 {
641         return (waddr == V3D_QPU_WADDR_SYNC ||
642                 waddr == V3D_QPU_WADDR_SYNCB ||
643                 waddr == V3D_QPU_WADDR_SYNCU);
644 }
645 
646 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)647 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
648 {
649         switch (waddr) {
650         case V3D_QPU_WADDR_VPMU:
651         case V3D_QPU_WADDR_TLBU:
652         case V3D_QPU_WADDR_TMUAU:
653         case V3D_QPU_WADDR_SYNCU:
654                 return true;
655         default:
656                 return false;
657         }
658 }
659 
660 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)661 v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
662 {
663         switch (op) {
664         case V3D_QPU_A_VPMSETUP:
665         case V3D_QPU_A_LDVPMV_IN:
666         case V3D_QPU_A_LDVPMV_OUT:
667         case V3D_QPU_A_LDVPMD_IN:
668         case V3D_QPU_A_LDVPMD_OUT:
669         case V3D_QPU_A_LDVPMP:
670         case V3D_QPU_A_LDVPMG_IN:
671         case V3D_QPU_A_LDVPMG_OUT:
672                 return true;
673         default:
674                 return false;
675         }
676 }
677 
678 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)679 v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
680 {
681         switch (op) {
682         case V3D_QPU_A_VPMSETUP:
683         case V3D_QPU_A_STVPMV:
684         case V3D_QPU_A_STVPMD:
685         case V3D_QPU_A_STVPMP:
686                 return true;
687         default:
688                 return false;
689         }
690 }
691 
692 bool
v3d_qpu_reads_tlb(const struct v3d_qpu_instr * inst)693 v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst)
694 {
695         return inst->sig.ldtlb || inst->sig.ldtlbu;
696 }
697 
698 bool
v3d_qpu_writes_tlb(const struct v3d_qpu_instr * inst)699 v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst)
700 {
701         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
702                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
703                     inst->alu.add.magic_write &&
704                     v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
705                         return true;
706                 }
707 
708                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
709                     inst->alu.mul.magic_write &&
710                     v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
711                         return true;
712                 }
713         }
714 
715         return false;
716 }
717 
718 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)719 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
720 {
721         return  v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst);
722 }
723 
724 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)725 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
726 {
727         return v3d_qpu_instr_is_sfu(inst) || v3d_qpu_instr_is_legacy_sfu(inst);
728 }
729 
730 /* Checks whether the instruction implements a SFU operation by the writing
731  * to specific magic register addresses instead of using SFU ALU opcodes.
732  */
733 bool
v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr * inst)734 v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst)
735 {
736         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
737                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
738                     inst->alu.add.magic_write &&
739                     v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
740                         return true;
741                 }
742 
743                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
744                     inst->alu.mul.magic_write &&
745                     v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
746                         return true;
747                 }
748         }
749 
750         return false;
751 }
752 
753 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)754 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
755 {
756         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
757                 switch (inst->alu.add.op) {
758                 case V3D_QPU_A_RECIP:
759                 case V3D_QPU_A_RSQRT:
760                 case V3D_QPU_A_EXP:
761                 case V3D_QPU_A_LOG:
762                 case V3D_QPU_A_SIN:
763                 case V3D_QPU_A_RSQRT2:
764                 case V3D_QPU_A_BALLOT:
765                 case V3D_QPU_A_BCASTF:
766                 case V3D_QPU_A_ALLEQ:
767                 case V3D_QPU_A_ALLFEQ:
768                 case V3D_QPU_A_ROTQ:
769                 case V3D_QPU_A_ROT:
770                 case V3D_QPU_A_SHUFFLE:
771                         return true;
772                 default:
773                         return false;
774                 }
775         }
776         return false;
777 }
778 
779 bool
v3d_qpu_writes_tmu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)780 v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
781                    const struct v3d_qpu_instr *inst)
782 {
783         return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
784                 ((inst->alu.add.op != V3D_QPU_A_NOP &&
785                   inst->alu.add.magic_write &&
786                   v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
787                  (inst->alu.mul.op != V3D_QPU_M_NOP &&
788                   inst->alu.mul.magic_write &&
789                   v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
790 }
791 
792 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)793 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
794                             const struct v3d_qpu_instr *inst)
795 {
796         return v3d_qpu_writes_tmu(devinfo, inst) &&
797                (!inst->alu.add.magic_write ||
798                 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
799                (!inst->alu.mul.magic_write ||
800                 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
801 }
802 
803 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)804 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
805 {
806         if (inst->sig.ldvpm)
807                 return true;
808 
809         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
810                 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
811                         return true;
812         }
813 
814         return false;
815 }
816 
817 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)818 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
819 {
820         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
821                 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
822                         return true;
823 
824                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
825                     inst->alu.add.magic_write &&
826                     v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
827                         return true;
828                 }
829 
830                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
831                     inst->alu.mul.magic_write &&
832                     v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
833                         return true;
834                 }
835         }
836 
837         return false;
838 }
839 
840 bool
v3d_qpu_writes_unifa(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)841 v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
842                      const struct v3d_qpu_instr *inst)
843 {
844         if (devinfo->ver < 40)
845                 return false;
846 
847         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
848                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
849                     inst->alu.add.magic_write &&
850                     inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
851                         return true;
852                 }
853 
854                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
855                     inst->alu.mul.magic_write &&
856                     inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
857                         return true;
858                 }
859 
860                 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
861                     inst->sig_magic &&
862                     inst->sig_addr == V3D_QPU_WADDR_UNIFA) {
863                         return true;
864                 }
865         }
866 
867         return false;
868 }
869 
870 bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)871 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
872 {
873         return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
874                inst->alu.add.op == V3D_QPU_A_VPMWT;
875 }
876 
877 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)878 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
879 {
880         return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
881 }
882 
883 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)884 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
885 {
886         return v3d_qpu_reads_vpm(inst) ||
887                v3d_qpu_writes_vpm(inst) ||
888                v3d_qpu_waits_vpm(inst);
889 }
890 
891 static bool
qpu_writes_magic_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)892 qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
893                                   const struct v3d_qpu_instr *inst,
894                                   uint32_t waddr)
895 {
896         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
897                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
898                     inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
899                         return true;
900 
901                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
902                     inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
903                         return true;
904         }
905 
906         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
907             inst->sig_magic && inst->sig_addr == waddr) {
908                 return true;
909         }
910 
911         return false;
912 }
913 
914 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)915 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
916                   const struct v3d_qpu_instr *inst)
917 {
918         if(!devinfo->has_accumulators)
919                 return false;
920 
921         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
922                 return true;
923 
924         return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
925 }
926 
927 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)928 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
929                   const struct v3d_qpu_instr *inst)
930 {
931         if (!devinfo->has_accumulators)
932                 return false;
933 
934         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
935                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
936                     inst->alu.add.magic_write &&
937                     (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
938                      v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
939                         return true;
940                 }
941 
942                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
943                     inst->alu.mul.magic_write &&
944                     (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
945                      v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
946                         return true;
947                 }
948         }
949 
950         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
951                 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
952                         return true;
953         } else if (inst->sig.ldtmu) {
954                 return true;
955         }
956 
957         return false;
958 }
959 
960 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)961 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
962                   const struct v3d_qpu_instr *inst)
963 {
964         if (!devinfo->has_accumulators)
965                 return false;
966 
967         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
968                 return true;
969 
970         return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
971 }
972 
973 bool
v3d_qpu_writes_accum(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)974 v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
975                      const struct v3d_qpu_instr *inst)
976 {
977         if (!devinfo->has_accumulators)
978                 return false;
979 
980         if (v3d_qpu_writes_r5(devinfo, inst))
981                 return true;
982         if (v3d_qpu_writes_r4(devinfo, inst))
983                 return true;
984         if (v3d_qpu_writes_r3(devinfo, inst))
985                 return true;
986         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
987                 return true;
988         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
989                 return true;
990         if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
991                 return true;
992 
993         return false;
994 }
995 
996 bool
v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)997 v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
998                               const struct v3d_qpu_instr *inst)
999 {
1000         if (devinfo->ver >= 71 &&
1001             (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
1002                 return true;
1003         }
1004 
1005         return false;
1006 }
1007 
1008 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)1009 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
1010 {
1011         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
1012         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
1013 
1014         return ((add_nsrc > 0 && inst->alu.add.a.mux == mux) ||
1015                 (add_nsrc > 1 && inst->alu.add.b.mux == mux) ||
1016                 (mul_nsrc > 0 && inst->alu.mul.a.mux == mux) ||
1017                 (mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
1018 }
1019 
1020 bool
v3d71_qpu_reads_raddr(const struct v3d_qpu_instr * inst,uint8_t raddr)1021 v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
1022 {
1023         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
1024         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
1025 
1026         return (add_nsrc > 0 && !inst->sig.small_imm_a && inst->alu.add.a.raddr == raddr) ||
1027                (add_nsrc > 1 && !inst->sig.small_imm_b && inst->alu.add.b.raddr == raddr) ||
1028                (mul_nsrc > 0 && !inst->sig.small_imm_c && inst->alu.mul.a.raddr == raddr) ||
1029                (mul_nsrc > 1 && !inst->sig.small_imm_d && inst->alu.mul.b.raddr == raddr);
1030 }
1031 
1032 bool
v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint8_t waddr)1033 v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
1034                                   const struct v3d_qpu_instr *inst,
1035                                   uint8_t waddr)
1036 {
1037         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1038                 return false;
1039 
1040         if (v3d_qpu_add_op_has_dst(inst->alu.add.op) &&
1041             !inst->alu.add.magic_write &&
1042             inst->alu.add.waddr == waddr) {
1043                 return true;
1044         }
1045 
1046         if (v3d_qpu_mul_op_has_dst(inst->alu.mul.op) &&
1047             !inst->alu.mul.magic_write &&
1048             inst->alu.mul.waddr == waddr) {
1049                 return true;
1050         }
1051 
1052         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
1053             !inst->sig_magic && inst->sig_addr == waddr) {
1054                 return true;
1055         }
1056 
1057         return false;
1058 }
1059 
1060 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)1061 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
1062                            const struct v3d_qpu_sig *sig)
1063 {
1064         if (devinfo->ver < 41)
1065                 return false;
1066 
1067         return (sig->ldunifrf ||
1068                 sig->ldunifarf ||
1069                 sig->ldvary ||
1070                 sig->ldtmu ||
1071                 sig->ldtlb ||
1072                 sig->ldtlbu);
1073 }
1074 
1075 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)1076 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
1077 {
1078         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
1079                 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
1080         } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
1081                 if (inst->flags.ac != V3D_QPU_COND_NONE ||
1082                     inst->flags.mc != V3D_QPU_COND_NONE ||
1083                     inst->flags.auf != V3D_QPU_UF_NONE ||
1084                     inst->flags.muf != V3D_QPU_UF_NONE)
1085                         return true;
1086 
1087                 switch (inst->alu.add.op) {
1088                 case V3D_QPU_A_VFLA:
1089                 case V3D_QPU_A_VFLNA:
1090                 case V3D_QPU_A_VFLB:
1091                 case V3D_QPU_A_VFLNB:
1092                 case V3D_QPU_A_FLAPUSH:
1093                 case V3D_QPU_A_FLBPUSH:
1094                 case V3D_QPU_A_FLAFIRST:
1095                 case V3D_QPU_A_FLNAFIRST:
1096                         return true;
1097                 default:
1098                         break;
1099                 }
1100         }
1101 
1102         return false;
1103 }
1104 
1105 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)1106 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
1107 {
1108         if (inst->flags.apf != V3D_QPU_PF_NONE ||
1109             inst->flags.mpf != V3D_QPU_PF_NONE ||
1110             inst->flags.auf != V3D_QPU_UF_NONE ||
1111             inst->flags.muf != V3D_QPU_UF_NONE) {
1112                 return true;
1113         }
1114 
1115         return false;
1116 }
1117 
1118 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)1119 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
1120 {
1121         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1122                 return false;
1123 
1124         switch (inst->alu.add.op) {
1125         case V3D_QPU_A_FADD:
1126         case V3D_QPU_A_FADDNF:
1127         case V3D_QPU_A_FSUB:
1128         case V3D_QPU_A_FMIN:
1129         case V3D_QPU_A_FMAX:
1130         case V3D_QPU_A_FCMP:
1131         case V3D_QPU_A_FROUND:
1132         case V3D_QPU_A_FTRUNC:
1133         case V3D_QPU_A_FFLOOR:
1134         case V3D_QPU_A_FCEIL:
1135         case V3D_QPU_A_FDX:
1136         case V3D_QPU_A_FDY:
1137         case V3D_QPU_A_FTOIN:
1138         case V3D_QPU_A_FTOIZ:
1139         case V3D_QPU_A_FTOUZ:
1140         case V3D_QPU_A_FTOC:
1141         case V3D_QPU_A_VFPACK:
1142                 return true;
1143                 break;
1144         default:
1145                 break;
1146         }
1147 
1148         switch (inst->alu.mul.op) {
1149         case V3D_QPU_M_FMOV:
1150         case V3D_QPU_M_FMUL:
1151                 return true;
1152                 break;
1153         default:
1154                 break;
1155         }
1156 
1157         return false;
1158 }
1159 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)1160 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1161 {
1162         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1163                 return false;
1164 
1165         switch (inst->alu.add.op) {
1166         case V3D_QPU_A_VFMIN:
1167         case V3D_QPU_A_VFMAX:
1168                 return true;
1169                 break;
1170         default:
1171                 break;
1172         }
1173 
1174         switch (inst->alu.mul.op) {
1175         case V3D_QPU_M_VFMUL:
1176                 return true;
1177                 break;
1178         default:
1179                 break;
1180         }
1181 
1182         return false;
1183 }
1184 
1185 bool
v3d_qpu_is_nop(struct v3d_qpu_instr * inst)1186 v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1187 {
1188         static const struct v3d_qpu_sig nosig = { 0 };
1189 
1190         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1191                 return false;
1192         if (inst->alu.add.op != V3D_QPU_A_NOP)
1193                 return false;
1194         if (inst->alu.mul.op != V3D_QPU_M_NOP)
1195                 return false;
1196         if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1197                 return false;
1198         return true;
1199 }
1200