1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include "util/macros.h"
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 const char *
v3d_qpu_magic_waddr_name(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)31 v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32 enum v3d_qpu_waddr waddr)
33 {
34 /* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35 if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36 return "tmu";
37
38 /* V3D 7.x QUAD and REP aliases R5 and R5REPT in the table below
39 */
40 if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_QUAD)
41 return "quad";
42
43 if (devinfo->ver >= 71 && waddr == V3D_QPU_WADDR_REP)
44 return "rep";
45
46 static const char *waddr_magic[] = {
47 [V3D_QPU_WADDR_R0] = "r0",
48 [V3D_QPU_WADDR_R1] = "r1",
49 [V3D_QPU_WADDR_R2] = "r2",
50 [V3D_QPU_WADDR_R3] = "r3",
51 [V3D_QPU_WADDR_R4] = "r4",
52 [V3D_QPU_WADDR_R5] = "r5",
53 [V3D_QPU_WADDR_NOP] = "-",
54 [V3D_QPU_WADDR_TLB] = "tlb",
55 [V3D_QPU_WADDR_TLBU] = "tlbu",
56 [V3D_QPU_WADDR_UNIFA] = "unifa",
57 [V3D_QPU_WADDR_TMUL] = "tmul",
58 [V3D_QPU_WADDR_TMUD] = "tmud",
59 [V3D_QPU_WADDR_TMUA] = "tmua",
60 [V3D_QPU_WADDR_TMUAU] = "tmuau",
61 [V3D_QPU_WADDR_VPM] = "vpm",
62 [V3D_QPU_WADDR_VPMU] = "vpmu",
63 [V3D_QPU_WADDR_SYNC] = "sync",
64 [V3D_QPU_WADDR_SYNCU] = "syncu",
65 [V3D_QPU_WADDR_SYNCB] = "syncb",
66 [V3D_QPU_WADDR_RECIP] = "recip",
67 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
68 [V3D_QPU_WADDR_EXP] = "exp",
69 [V3D_QPU_WADDR_LOG] = "log",
70 [V3D_QPU_WADDR_SIN] = "sin",
71 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
72 [V3D_QPU_WADDR_TMUC] = "tmuc",
73 [V3D_QPU_WADDR_TMUS] = "tmus",
74 [V3D_QPU_WADDR_TMUT] = "tmut",
75 [V3D_QPU_WADDR_TMUR] = "tmur",
76 [V3D_QPU_WADDR_TMUI] = "tmui",
77 [V3D_QPU_WADDR_TMUB] = "tmub",
78 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
79 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
80 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
81 [V3D_QPU_WADDR_TMUSF] = "tmusf",
82 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
83 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
84 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
85 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
86 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
87 [V3D_QPU_WADDR_R5REP] = "r5rep",
88 };
89
90 return waddr_magic[waddr];
91 }
92
93 const char *
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)94 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
95 {
96 static const char *op_names[] = {
97 [V3D_QPU_A_FADD] = "fadd",
98 [V3D_QPU_A_FADDNF] = "faddnf",
99 [V3D_QPU_A_VFPACK] = "vfpack",
100 [V3D_QPU_A_ADD] = "add",
101 [V3D_QPU_A_SUB] = "sub",
102 [V3D_QPU_A_FSUB] = "fsub",
103 [V3D_QPU_A_MIN] = "min",
104 [V3D_QPU_A_MAX] = "max",
105 [V3D_QPU_A_UMIN] = "umin",
106 [V3D_QPU_A_UMAX] = "umax",
107 [V3D_QPU_A_SHL] = "shl",
108 [V3D_QPU_A_SHR] = "shr",
109 [V3D_QPU_A_ASR] = "asr",
110 [V3D_QPU_A_ROR] = "ror",
111 [V3D_QPU_A_FMIN] = "fmin",
112 [V3D_QPU_A_FMAX] = "fmax",
113 [V3D_QPU_A_VFMIN] = "vfmin",
114 [V3D_QPU_A_AND] = "and",
115 [V3D_QPU_A_OR] = "or",
116 [V3D_QPU_A_XOR] = "xor",
117 [V3D_QPU_A_VADD] = "vadd",
118 [V3D_QPU_A_VSUB] = "vsub",
119 [V3D_QPU_A_NOT] = "not",
120 [V3D_QPU_A_NEG] = "neg",
121 [V3D_QPU_A_FLAPUSH] = "flapush",
122 [V3D_QPU_A_FLBPUSH] = "flbpush",
123 [V3D_QPU_A_FLPOP] = "flpop",
124 [V3D_QPU_A_RECIP] = "recip",
125 [V3D_QPU_A_SETMSF] = "setmsf",
126 [V3D_QPU_A_SETREVF] = "setrevf",
127 [V3D_QPU_A_NOP] = "nop",
128 [V3D_QPU_A_TIDX] = "tidx",
129 [V3D_QPU_A_EIDX] = "eidx",
130 [V3D_QPU_A_LR] = "lr",
131 [V3D_QPU_A_VFLA] = "vfla",
132 [V3D_QPU_A_VFLNA] = "vflna",
133 [V3D_QPU_A_VFLB] = "vflb",
134 [V3D_QPU_A_VFLNB] = "vflnb",
135 [V3D_QPU_A_FXCD] = "fxcd",
136 [V3D_QPU_A_XCD] = "xcd",
137 [V3D_QPU_A_FYCD] = "fycd",
138 [V3D_QPU_A_YCD] = "ycd",
139 [V3D_QPU_A_MSF] = "msf",
140 [V3D_QPU_A_REVF] = "revf",
141 [V3D_QPU_A_VDWWT] = "vdwwt",
142 [V3D_QPU_A_IID] = "iid",
143 [V3D_QPU_A_SAMPID] = "sampid",
144 [V3D_QPU_A_BARRIERID] = "barrierid",
145 [V3D_QPU_A_TMUWT] = "tmuwt",
146 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
147 [V3D_QPU_A_VPMWT] = "vpmwt",
148 [V3D_QPU_A_FLAFIRST] = "flafirst",
149 [V3D_QPU_A_FLNAFIRST] = "flnafirst",
150 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
151 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
152 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
153 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
154 [V3D_QPU_A_LDVPMP] = "ldvpmp",
155 [V3D_QPU_A_RSQRT] = "rsqrt",
156 [V3D_QPU_A_EXP] = "exp",
157 [V3D_QPU_A_LOG] = "log",
158 [V3D_QPU_A_SIN] = "sin",
159 [V3D_QPU_A_RSQRT2] = "rsqrt2",
160 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
161 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
162 [V3D_QPU_A_FCMP] = "fcmp",
163 [V3D_QPU_A_VFMAX] = "vfmax",
164 [V3D_QPU_A_FROUND] = "fround",
165 [V3D_QPU_A_FTOIN] = "ftoin",
166 [V3D_QPU_A_FTRUNC] = "ftrunc",
167 [V3D_QPU_A_FTOIZ] = "ftoiz",
168 [V3D_QPU_A_FFLOOR] = "ffloor",
169 [V3D_QPU_A_FTOUZ] = "ftouz",
170 [V3D_QPU_A_FCEIL] = "fceil",
171 [V3D_QPU_A_FTOC] = "ftoc",
172 [V3D_QPU_A_FDX] = "fdx",
173 [V3D_QPU_A_FDY] = "fdy",
174 [V3D_QPU_A_STVPMV] = "stvpmv",
175 [V3D_QPU_A_STVPMD] = "stvpmd",
176 [V3D_QPU_A_STVPMP] = "stvpmp",
177 [V3D_QPU_A_ITOF] = "itof",
178 [V3D_QPU_A_CLZ] = "clz",
179 [V3D_QPU_A_UTOF] = "utof",
180 [V3D_QPU_A_MOV] = "mov",
181 [V3D_QPU_A_FMOV] = "fmov",
182 [V3D_QPU_A_VPACK] = "vpack",
183 [V3D_QPU_A_V8PACK] = "v8pack",
184 [V3D_QPU_A_V10PACK] = "v10pack",
185 [V3D_QPU_A_V11FPACK] = "v11fpack",
186 [V3D_QPU_A_BALLOT] = "ballot",
187 [V3D_QPU_A_BCASTF] = "bcastf",
188 [V3D_QPU_A_ALLEQ] = "alleq",
189 [V3D_QPU_A_ALLFEQ] = "allfeq",
190 [V3D_QPU_A_ROTQ] = "rotq",
191 [V3D_QPU_A_ROT] = "rot",
192 [V3D_QPU_A_SHUFFLE] = "shuffle",
193 };
194
195 if (op >= ARRAY_SIZE(op_names))
196 return NULL;
197
198 return op_names[op];
199 }
200
201 const char *
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)202 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
203 {
204 static const char *op_names[] = {
205 [V3D_QPU_M_ADD] = "add",
206 [V3D_QPU_M_SUB] = "sub",
207 [V3D_QPU_M_UMUL24] = "umul24",
208 [V3D_QPU_M_VFMUL] = "vfmul",
209 [V3D_QPU_M_SMUL24] = "smul24",
210 [V3D_QPU_M_MULTOP] = "multop",
211 [V3D_QPU_M_FMOV] = "fmov",
212 [V3D_QPU_M_MOV] = "mov",
213 [V3D_QPU_M_NOP] = "nop",
214 [V3D_QPU_M_FMUL] = "fmul",
215 [V3D_QPU_M_FTOUNORM16] = "ftounorm16",
216 [V3D_QPU_M_FTOSNORM16] = "ftosnorm16",
217 [V3D_QPU_M_VFTOUNORM8] = "vftounorm8",
218 [V3D_QPU_M_VFTOSNORM8] = "vftosnorm8",
219 [V3D_QPU_M_VFTOUNORM10LO] = "vftounorm10lo",
220 [V3D_QPU_M_VFTOUNORM10HI] = "vftounorm10hi",
221 };
222
223 if (op >= ARRAY_SIZE(op_names))
224 return NULL;
225
226 return op_names[op];
227 }
228
229 const char *
v3d_qpu_cond_name(enum v3d_qpu_cond cond)230 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
231 {
232 switch (cond) {
233 case V3D_QPU_COND_NONE:
234 return "";
235 case V3D_QPU_COND_IFA:
236 return ".ifa";
237 case V3D_QPU_COND_IFB:
238 return ".ifb";
239 case V3D_QPU_COND_IFNA:
240 return ".ifna";
241 case V3D_QPU_COND_IFNB:
242 return ".ifnb";
243 default:
244 unreachable("bad cond value");
245 }
246 }
247
248 const char *
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)249 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
250 {
251 switch (cond) {
252 case V3D_QPU_BRANCH_COND_ALWAYS:
253 return "";
254 case V3D_QPU_BRANCH_COND_A0:
255 return ".a0";
256 case V3D_QPU_BRANCH_COND_NA0:
257 return ".na0";
258 case V3D_QPU_BRANCH_COND_ALLA:
259 return ".alla";
260 case V3D_QPU_BRANCH_COND_ANYNA:
261 return ".anyna";
262 case V3D_QPU_BRANCH_COND_ANYA:
263 return ".anya";
264 case V3D_QPU_BRANCH_COND_ALLNA:
265 return ".allna";
266 default:
267 unreachable("bad branch cond value");
268 }
269 }
270
271 const char *
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)272 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
273 {
274 switch (msfign) {
275 case V3D_QPU_MSFIGN_NONE:
276 return "";
277 case V3D_QPU_MSFIGN_P:
278 return "p";
279 case V3D_QPU_MSFIGN_Q:
280 return "q";
281 default:
282 unreachable("bad branch cond value");
283 }
284 }
285
286 const char *
v3d_qpu_pf_name(enum v3d_qpu_pf pf)287 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
288 {
289 switch (pf) {
290 case V3D_QPU_PF_NONE:
291 return "";
292 case V3D_QPU_PF_PUSHZ:
293 return ".pushz";
294 case V3D_QPU_PF_PUSHN:
295 return ".pushn";
296 case V3D_QPU_PF_PUSHC:
297 return ".pushc";
298 default:
299 unreachable("bad pf value");
300 }
301 }
302
303 const char *
v3d_qpu_uf_name(enum v3d_qpu_uf uf)304 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
305 {
306 switch (uf) {
307 case V3D_QPU_UF_NONE:
308 return "";
309 case V3D_QPU_UF_ANDZ:
310 return ".andz";
311 case V3D_QPU_UF_ANDNZ:
312 return ".andnz";
313 case V3D_QPU_UF_NORZ:
314 return ".norz";
315 case V3D_QPU_UF_NORNZ:
316 return ".nornz";
317 case V3D_QPU_UF_ANDN:
318 return ".andn";
319 case V3D_QPU_UF_ANDNN:
320 return ".andnn";
321 case V3D_QPU_UF_NORN:
322 return ".norn";
323 case V3D_QPU_UF_NORNN:
324 return ".nornn";
325 case V3D_QPU_UF_ANDC:
326 return ".andc";
327 case V3D_QPU_UF_ANDNC:
328 return ".andnc";
329 case V3D_QPU_UF_NORC:
330 return ".norc";
331 case V3D_QPU_UF_NORNC:
332 return ".nornc";
333 default:
334 unreachable("bad pf value");
335 }
336 }
337
338 const char *
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)339 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
340 {
341 switch (pack) {
342 case V3D_QPU_PACK_NONE:
343 return "";
344 case V3D_QPU_PACK_L:
345 return ".l";
346 case V3D_QPU_PACK_H:
347 return ".h";
348 default:
349 unreachable("bad pack value");
350 }
351 }
352
353 const char *
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)354 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
355 {
356 switch (unpack) {
357 case V3D_QPU_UNPACK_NONE:
358 return "";
359 case V3D_QPU_UNPACK_L:
360 return ".l";
361 case V3D_QPU_UNPACK_H:
362 return ".h";
363 case V3D_QPU_UNPACK_ABS:
364 return ".abs";
365 case V3D_QPU_UNPACK_REPLICATE_32F_16:
366 return ".ff";
367 case V3D_QPU_UNPACK_REPLICATE_L_16:
368 return ".ll";
369 case V3D_QPU_UNPACK_REPLICATE_H_16:
370 return ".hh";
371 case V3D_QPU_UNPACK_SWAP_16:
372 return ".swp";
373 case V3D71_QPU_UNPACK_SAT:
374 return ".sat";
375 case V3D71_QPU_UNPACK_NSAT:
376 return ".nsat";
377 case V3D71_QPU_UNPACK_MAX0:
378 return ".max0";
379 default:
380 unreachable("bad unpack value");
381 }
382 }
383
384 #define D 1
385 #define A 2
386 #define B 4
387 static const uint8_t add_op_args[] = {
388 [V3D_QPU_A_FADD] = D | A | B,
389 [V3D_QPU_A_FADDNF] = D | A | B,
390 [V3D_QPU_A_VFPACK] = D | A | B,
391 [V3D_QPU_A_ADD] = D | A | B,
392 [V3D_QPU_A_VFPACK] = D | A | B,
393 [V3D_QPU_A_SUB] = D | A | B,
394 [V3D_QPU_A_VFPACK] = D | A | B,
395 [V3D_QPU_A_FSUB] = D | A | B,
396 [V3D_QPU_A_MIN] = D | A | B,
397 [V3D_QPU_A_MAX] = D | A | B,
398 [V3D_QPU_A_UMIN] = D | A | B,
399 [V3D_QPU_A_UMAX] = D | A | B,
400 [V3D_QPU_A_SHL] = D | A | B,
401 [V3D_QPU_A_SHR] = D | A | B,
402 [V3D_QPU_A_ASR] = D | A | B,
403 [V3D_QPU_A_ROR] = D | A | B,
404 [V3D_QPU_A_FMIN] = D | A | B,
405 [V3D_QPU_A_FMAX] = D | A | B,
406 [V3D_QPU_A_VFMIN] = D | A | B,
407
408 [V3D_QPU_A_AND] = D | A | B,
409 [V3D_QPU_A_OR] = D | A | B,
410 [V3D_QPU_A_XOR] = D | A | B,
411
412 [V3D_QPU_A_VADD] = D | A | B,
413 [V3D_QPU_A_VSUB] = D | A | B,
414 [V3D_QPU_A_NOT] = D | A,
415 [V3D_QPU_A_NEG] = D | A,
416 [V3D_QPU_A_FLAPUSH] = D | A,
417 [V3D_QPU_A_FLBPUSH] = D | A,
418 [V3D_QPU_A_FLPOP] = D | A,
419 [V3D_QPU_A_RECIP] = D | A,
420 [V3D_QPU_A_SETMSF] = D | A,
421 [V3D_QPU_A_SETREVF] = D | A,
422 [V3D_QPU_A_NOP] = 0,
423 [V3D_QPU_A_TIDX] = D,
424 [V3D_QPU_A_EIDX] = D,
425 [V3D_QPU_A_LR] = D,
426 [V3D_QPU_A_VFLA] = D,
427 [V3D_QPU_A_VFLNA] = D,
428 [V3D_QPU_A_VFLB] = D,
429 [V3D_QPU_A_VFLNB] = D,
430
431 [V3D_QPU_A_FXCD] = D,
432 [V3D_QPU_A_XCD] = D,
433 [V3D_QPU_A_FYCD] = D,
434 [V3D_QPU_A_YCD] = D,
435
436 [V3D_QPU_A_MSF] = D,
437 [V3D_QPU_A_REVF] = D,
438 [V3D_QPU_A_VDWWT] = D,
439 [V3D_QPU_A_IID] = D,
440 [V3D_QPU_A_SAMPID] = D,
441 [V3D_QPU_A_BARRIERID] = D,
442 [V3D_QPU_A_TMUWT] = D,
443 [V3D_QPU_A_VPMWT] = D,
444 [V3D_QPU_A_FLAFIRST] = D,
445 [V3D_QPU_A_FLNAFIRST] = D,
446
447 [V3D_QPU_A_VPMSETUP] = D | A,
448
449 [V3D_QPU_A_LDVPMV_IN] = D | A,
450 [V3D_QPU_A_LDVPMV_OUT] = D | A,
451 [V3D_QPU_A_LDVPMD_IN] = D | A,
452 [V3D_QPU_A_LDVPMD_OUT] = D | A,
453 [V3D_QPU_A_LDVPMP] = D | A,
454 [V3D_QPU_A_RSQRT] = D | A,
455 [V3D_QPU_A_EXP] = D | A,
456 [V3D_QPU_A_LOG] = D | A,
457 [V3D_QPU_A_SIN] = D | A,
458 [V3D_QPU_A_RSQRT2] = D | A,
459 [V3D_QPU_A_LDVPMG_IN] = D | A | B,
460 [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
461
462 /* FIXME: MOVABSNEG */
463
464 [V3D_QPU_A_FCMP] = D | A | B,
465 [V3D_QPU_A_VFMAX] = D | A | B,
466
467 [V3D_QPU_A_FROUND] = D | A,
468 [V3D_QPU_A_FTOIN] = D | A,
469 [V3D_QPU_A_FTRUNC] = D | A,
470 [V3D_QPU_A_FTOIZ] = D | A,
471 [V3D_QPU_A_FFLOOR] = D | A,
472 [V3D_QPU_A_FTOUZ] = D | A,
473 [V3D_QPU_A_FCEIL] = D | A,
474 [V3D_QPU_A_FTOC] = D | A,
475
476 [V3D_QPU_A_FDX] = D | A,
477 [V3D_QPU_A_FDY] = D | A,
478
479 [V3D_QPU_A_STVPMV] = A | B,
480 [V3D_QPU_A_STVPMD] = A | B,
481 [V3D_QPU_A_STVPMP] = A | B,
482
483 [V3D_QPU_A_ITOF] = D | A,
484 [V3D_QPU_A_CLZ] = D | A,
485 [V3D_QPU_A_UTOF] = D | A,
486
487 [V3D_QPU_A_MOV] = D | A,
488 [V3D_QPU_A_FMOV] = D | A,
489 [V3D_QPU_A_VPACK] = D | A | B,
490 [V3D_QPU_A_V8PACK] = D | A | B,
491 [V3D_QPU_A_V10PACK] = D | A | B,
492 [V3D_QPU_A_V11FPACK] = D | A | B,
493
494 [V3D_QPU_A_BALLOT] = D | A,
495 [V3D_QPU_A_BCASTF] = D | A,
496 [V3D_QPU_A_ALLEQ] = D | A,
497 [V3D_QPU_A_ALLFEQ] = D | A,
498 [V3D_QPU_A_ROTQ] = D | A | B,
499 [V3D_QPU_A_ROT] = D | A | B,
500 [V3D_QPU_A_SHUFFLE] = D | A | B,
501 };
502
503 static const uint8_t mul_op_args[] = {
504 [V3D_QPU_M_ADD] = D | A | B,
505 [V3D_QPU_M_SUB] = D | A | B,
506 [V3D_QPU_M_UMUL24] = D | A | B,
507 [V3D_QPU_M_VFMUL] = D | A | B,
508 [V3D_QPU_M_SMUL24] = D | A | B,
509 [V3D_QPU_M_MULTOP] = D | A | B,
510 [V3D_QPU_M_FMOV] = D | A,
511 [V3D_QPU_M_NOP] = 0,
512 [V3D_QPU_M_MOV] = D | A,
513 [V3D_QPU_M_FMUL] = D | A | B,
514 [V3D_QPU_M_FTOUNORM16] = D | A,
515 [V3D_QPU_M_FTOSNORM16] = D | A,
516 [V3D_QPU_M_VFTOUNORM8] = D | A,
517 [V3D_QPU_M_VFTOSNORM8] = D | A,
518 [V3D_QPU_M_VFTOUNORM10LO] = D | A,
519 [V3D_QPU_M_VFTOUNORM10HI] = D | A,
520 };
521
522 bool
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)523 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
524 {
525 assert(op < ARRAY_SIZE(add_op_args));
526
527 return add_op_args[op] & D;
528 }
529
530 bool
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)531 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
532 {
533 assert(op < ARRAY_SIZE(mul_op_args));
534
535 return mul_op_args[op] & D;
536 }
537
538 int
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)539 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
540 {
541 assert(op < ARRAY_SIZE(add_op_args));
542
543 uint8_t args = add_op_args[op];
544 if (args & B)
545 return 2;
546 else if (args & A)
547 return 1;
548 else
549 return 0;
550 }
551
552 int
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)553 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
554 {
555 assert(op < ARRAY_SIZE(mul_op_args));
556
557 uint8_t args = mul_op_args[op];
558 if (args & B)
559 return 2;
560 else if (args & A)
561 return 1;
562 else
563 return 0;
564 }
565
566 enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)567 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
568 {
569 switch (cond) {
570 case V3D_QPU_COND_IFA:
571 return V3D_QPU_COND_IFNA;
572 case V3D_QPU_COND_IFNA:
573 return V3D_QPU_COND_IFA;
574 case V3D_QPU_COND_IFB:
575 return V3D_QPU_COND_IFNB;
576 case V3D_QPU_COND_IFNB:
577 return V3D_QPU_COND_IFB;
578 default:
579 unreachable("Non-invertible cond");
580 }
581 }
582
583 bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)584 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
585 {
586 switch (waddr) {
587 case V3D_QPU_WADDR_RECIP:
588 case V3D_QPU_WADDR_RSQRT:
589 case V3D_QPU_WADDR_EXP:
590 case V3D_QPU_WADDR_LOG:
591 case V3D_QPU_WADDR_SIN:
592 case V3D_QPU_WADDR_RSQRT2:
593 return true;
594 default:
595 return false;
596 }
597 }
598
599 bool
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr)600 v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
601 enum v3d_qpu_waddr waddr)
602 {
603 if (devinfo->ver >= 40) {
604 return ((waddr >= V3D_QPU_WADDR_TMUD &&
605 waddr <= V3D_QPU_WADDR_TMUAU) ||
606 (waddr >= V3D_QPU_WADDR_TMUC &&
607 waddr <= V3D_QPU_WADDR_TMUHSLOD));
608 } else {
609 return ((waddr >= V3D_QPU_WADDR_TMU &&
610 waddr <= V3D_QPU_WADDR_TMUAU) ||
611 (waddr >= V3D_QPU_WADDR_TMUC &&
612 waddr <= V3D_QPU_WADDR_TMUHSLOD));
613 }
614 }
615
616 bool
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr * inst)617 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
618 {
619 return (inst->sig.ldtmu ||
620 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
621 inst->alu.add.op == V3D_QPU_A_TMUWT));
622 }
623
624 bool
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)625 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
626 {
627 return (waddr == V3D_QPU_WADDR_TLB ||
628 waddr == V3D_QPU_WADDR_TLBU);
629 }
630
631 bool
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)632 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
633 {
634 return (waddr == V3D_QPU_WADDR_VPM ||
635 waddr == V3D_QPU_WADDR_VPMU);
636 }
637
638 bool
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)639 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
640 {
641 return (waddr == V3D_QPU_WADDR_SYNC ||
642 waddr == V3D_QPU_WADDR_SYNCB ||
643 waddr == V3D_QPU_WADDR_SYNCU);
644 }
645
646 bool
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)647 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
648 {
649 switch (waddr) {
650 case V3D_QPU_WADDR_VPMU:
651 case V3D_QPU_WADDR_TLBU:
652 case V3D_QPU_WADDR_TMUAU:
653 case V3D_QPU_WADDR_SYNCU:
654 return true;
655 default:
656 return false;
657 }
658 }
659
660 static bool
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)661 v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)
662 {
663 switch (op) {
664 case V3D_QPU_A_VPMSETUP:
665 case V3D_QPU_A_LDVPMV_IN:
666 case V3D_QPU_A_LDVPMV_OUT:
667 case V3D_QPU_A_LDVPMD_IN:
668 case V3D_QPU_A_LDVPMD_OUT:
669 case V3D_QPU_A_LDVPMP:
670 case V3D_QPU_A_LDVPMG_IN:
671 case V3D_QPU_A_LDVPMG_OUT:
672 return true;
673 default:
674 return false;
675 }
676 }
677
678 static bool
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)679 v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
680 {
681 switch (op) {
682 case V3D_QPU_A_VPMSETUP:
683 case V3D_QPU_A_STVPMV:
684 case V3D_QPU_A_STVPMD:
685 case V3D_QPU_A_STVPMP:
686 return true;
687 default:
688 return false;
689 }
690 }
691
692 bool
v3d_qpu_reads_tlb(const struct v3d_qpu_instr * inst)693 v3d_qpu_reads_tlb(const struct v3d_qpu_instr *inst)
694 {
695 return inst->sig.ldtlb || inst->sig.ldtlbu;
696 }
697
698 bool
v3d_qpu_writes_tlb(const struct v3d_qpu_instr * inst)699 v3d_qpu_writes_tlb(const struct v3d_qpu_instr *inst)
700 {
701 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
702 if (inst->alu.add.op != V3D_QPU_A_NOP &&
703 inst->alu.add.magic_write &&
704 v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
705 return true;
706 }
707
708 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
709 inst->alu.mul.magic_write &&
710 v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
711 return true;
712 }
713 }
714
715 return false;
716 }
717
718 bool
v3d_qpu_uses_tlb(const struct v3d_qpu_instr * inst)719 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
720 {
721 return v3d_qpu_writes_tlb(inst) || v3d_qpu_reads_tlb(inst);
722 }
723
724 bool
v3d_qpu_uses_sfu(const struct v3d_qpu_instr * inst)725 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
726 {
727 return v3d_qpu_instr_is_sfu(inst) || v3d_qpu_instr_is_legacy_sfu(inst);
728 }
729
730 /* Checks whether the instruction implements a SFU operation by the writing
731 * to specific magic register addresses instead of using SFU ALU opcodes.
732 */
733 bool
v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr * inst)734 v3d_qpu_instr_is_legacy_sfu(const struct v3d_qpu_instr *inst)
735 {
736 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
737 if (inst->alu.add.op != V3D_QPU_A_NOP &&
738 inst->alu.add.magic_write &&
739 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
740 return true;
741 }
742
743 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
744 inst->alu.mul.magic_write &&
745 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
746 return true;
747 }
748 }
749
750 return false;
751 }
752
753 bool
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr * inst)754 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
755 {
756 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
757 switch (inst->alu.add.op) {
758 case V3D_QPU_A_RECIP:
759 case V3D_QPU_A_RSQRT:
760 case V3D_QPU_A_EXP:
761 case V3D_QPU_A_LOG:
762 case V3D_QPU_A_SIN:
763 case V3D_QPU_A_RSQRT2:
764 case V3D_QPU_A_BALLOT:
765 case V3D_QPU_A_BCASTF:
766 case V3D_QPU_A_ALLEQ:
767 case V3D_QPU_A_ALLFEQ:
768 case V3D_QPU_A_ROTQ:
769 case V3D_QPU_A_ROT:
770 case V3D_QPU_A_SHUFFLE:
771 return true;
772 default:
773 return false;
774 }
775 }
776 return false;
777 }
778
779 bool
v3d_qpu_writes_tmu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)780 v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
781 const struct v3d_qpu_instr *inst)
782 {
783 return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
784 ((inst->alu.add.op != V3D_QPU_A_NOP &&
785 inst->alu.add.magic_write &&
786 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
787 (inst->alu.mul.op != V3D_QPU_M_NOP &&
788 inst->alu.mul.magic_write &&
789 v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
790 }
791
792 bool
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)793 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
794 const struct v3d_qpu_instr *inst)
795 {
796 return v3d_qpu_writes_tmu(devinfo, inst) &&
797 (!inst->alu.add.magic_write ||
798 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
799 (!inst->alu.mul.magic_write ||
800 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
801 }
802
803 bool
v3d_qpu_reads_vpm(const struct v3d_qpu_instr * inst)804 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
805 {
806 if (inst->sig.ldvpm)
807 return true;
808
809 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
810 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
811 return true;
812 }
813
814 return false;
815 }
816
817 bool
v3d_qpu_writes_vpm(const struct v3d_qpu_instr * inst)818 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
819 {
820 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
821 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
822 return true;
823
824 if (inst->alu.add.op != V3D_QPU_A_NOP &&
825 inst->alu.add.magic_write &&
826 v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
827 return true;
828 }
829
830 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
831 inst->alu.mul.magic_write &&
832 v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
833 return true;
834 }
835 }
836
837 return false;
838 }
839
840 bool
v3d_qpu_writes_unifa(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)841 v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
842 const struct v3d_qpu_instr *inst)
843 {
844 if (devinfo->ver < 40)
845 return false;
846
847 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
848 if (inst->alu.add.op != V3D_QPU_A_NOP &&
849 inst->alu.add.magic_write &&
850 inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
851 return true;
852 }
853
854 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
855 inst->alu.mul.magic_write &&
856 inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
857 return true;
858 }
859
860 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
861 inst->sig_magic &&
862 inst->sig_addr == V3D_QPU_WADDR_UNIFA) {
863 return true;
864 }
865 }
866
867 return false;
868 }
869
870 bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr * inst)871 v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
872 {
873 return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
874 inst->alu.add.op == V3D_QPU_A_VPMWT;
875 }
876
877 bool
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr * inst)878 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
879 {
880 return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
881 }
882
883 bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr * inst)884 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
885 {
886 return v3d_qpu_reads_vpm(inst) ||
887 v3d_qpu_writes_vpm(inst) ||
888 v3d_qpu_waits_vpm(inst);
889 }
890
891 static bool
qpu_writes_magic_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)892 qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
893 const struct v3d_qpu_instr *inst,
894 uint32_t waddr)
895 {
896 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
897 if (inst->alu.add.op != V3D_QPU_A_NOP &&
898 inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
899 return true;
900
901 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
902 inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
903 return true;
904 }
905
906 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
907 inst->sig_magic && inst->sig_addr == waddr) {
908 return true;
909 }
910
911 return false;
912 }
913
914 bool
v3d_qpu_writes_r3(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)915 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
916 const struct v3d_qpu_instr *inst)
917 {
918 if(!devinfo->has_accumulators)
919 return false;
920
921 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
922 return true;
923
924 return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
925 }
926
927 bool
v3d_qpu_writes_r4(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)928 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
929 const struct v3d_qpu_instr *inst)
930 {
931 if (!devinfo->has_accumulators)
932 return false;
933
934 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
935 if (inst->alu.add.op != V3D_QPU_A_NOP &&
936 inst->alu.add.magic_write &&
937 (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
938 v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
939 return true;
940 }
941
942 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
943 inst->alu.mul.magic_write &&
944 (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
945 v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
946 return true;
947 }
948 }
949
950 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
951 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
952 return true;
953 } else if (inst->sig.ldtmu) {
954 return true;
955 }
956
957 return false;
958 }
959
960 bool
v3d_qpu_writes_r5(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)961 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
962 const struct v3d_qpu_instr *inst)
963 {
964 if (!devinfo->has_accumulators)
965 return false;
966
967 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
968 return true;
969
970 return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
971 }
972
973 bool
v3d_qpu_writes_accum(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)974 v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
975 const struct v3d_qpu_instr *inst)
976 {
977 if (!devinfo->has_accumulators)
978 return false;
979
980 if (v3d_qpu_writes_r5(devinfo, inst))
981 return true;
982 if (v3d_qpu_writes_r4(devinfo, inst))
983 return true;
984 if (v3d_qpu_writes_r3(devinfo, inst))
985 return true;
986 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
987 return true;
988 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
989 return true;
990 if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
991 return true;
992
993 return false;
994 }
995
996 bool
v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)997 v3d_qpu_writes_rf0_implicitly(const struct v3d_device_info *devinfo,
998 const struct v3d_qpu_instr *inst)
999 {
1000 if (devinfo->ver >= 71 &&
1001 (inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa)) {
1002 return true;
1003 }
1004
1005 return false;
1006 }
1007
1008 bool
v3d_qpu_uses_mux(const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)1009 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
1010 {
1011 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
1012 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
1013
1014 return ((add_nsrc > 0 && inst->alu.add.a.mux == mux) ||
1015 (add_nsrc > 1 && inst->alu.add.b.mux == mux) ||
1016 (mul_nsrc > 0 && inst->alu.mul.a.mux == mux) ||
1017 (mul_nsrc > 1 && inst->alu.mul.b.mux == mux));
1018 }
1019
1020 bool
v3d71_qpu_reads_raddr(const struct v3d_qpu_instr * inst,uint8_t raddr)1021 v3d71_qpu_reads_raddr(const struct v3d_qpu_instr *inst, uint8_t raddr)
1022 {
1023 int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
1024 int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
1025
1026 return (add_nsrc > 0 && !inst->sig.small_imm_a && inst->alu.add.a.raddr == raddr) ||
1027 (add_nsrc > 1 && !inst->sig.small_imm_b && inst->alu.add.b.raddr == raddr) ||
1028 (mul_nsrc > 0 && !inst->sig.small_imm_c && inst->alu.mul.a.raddr == raddr) ||
1029 (mul_nsrc > 1 && !inst->sig.small_imm_d && inst->alu.mul.b.raddr == raddr);
1030 }
1031
1032 bool
v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint8_t waddr)1033 v3d71_qpu_writes_waddr_explicitly(const struct v3d_device_info *devinfo,
1034 const struct v3d_qpu_instr *inst,
1035 uint8_t waddr)
1036 {
1037 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1038 return false;
1039
1040 if (v3d_qpu_add_op_has_dst(inst->alu.add.op) &&
1041 !inst->alu.add.magic_write &&
1042 inst->alu.add.waddr == waddr) {
1043 return true;
1044 }
1045
1046 if (v3d_qpu_mul_op_has_dst(inst->alu.mul.op) &&
1047 !inst->alu.mul.magic_write &&
1048 inst->alu.mul.waddr == waddr) {
1049 return true;
1050 }
1051
1052 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
1053 !inst->sig_magic && inst->sig_addr == waddr) {
1054 return true;
1055 }
1056
1057 return false;
1058 }
1059
1060 bool
v3d_qpu_sig_writes_address(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig)1061 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
1062 const struct v3d_qpu_sig *sig)
1063 {
1064 if (devinfo->ver < 41)
1065 return false;
1066
1067 return (sig->ldunifrf ||
1068 sig->ldunifarf ||
1069 sig->ldvary ||
1070 sig->ldtmu ||
1071 sig->ldtlb ||
1072 sig->ldtlbu);
1073 }
1074
1075 bool
v3d_qpu_reads_flags(const struct v3d_qpu_instr * inst)1076 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
1077 {
1078 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
1079 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
1080 } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
1081 if (inst->flags.ac != V3D_QPU_COND_NONE ||
1082 inst->flags.mc != V3D_QPU_COND_NONE ||
1083 inst->flags.auf != V3D_QPU_UF_NONE ||
1084 inst->flags.muf != V3D_QPU_UF_NONE)
1085 return true;
1086
1087 switch (inst->alu.add.op) {
1088 case V3D_QPU_A_VFLA:
1089 case V3D_QPU_A_VFLNA:
1090 case V3D_QPU_A_VFLB:
1091 case V3D_QPU_A_VFLNB:
1092 case V3D_QPU_A_FLAPUSH:
1093 case V3D_QPU_A_FLBPUSH:
1094 case V3D_QPU_A_FLAFIRST:
1095 case V3D_QPU_A_FLNAFIRST:
1096 return true;
1097 default:
1098 break;
1099 }
1100 }
1101
1102 return false;
1103 }
1104
1105 bool
v3d_qpu_writes_flags(const struct v3d_qpu_instr * inst)1106 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
1107 {
1108 if (inst->flags.apf != V3D_QPU_PF_NONE ||
1109 inst->flags.mpf != V3D_QPU_PF_NONE ||
1110 inst->flags.auf != V3D_QPU_UF_NONE ||
1111 inst->flags.muf != V3D_QPU_UF_NONE) {
1112 return true;
1113 }
1114
1115 return false;
1116 }
1117
1118 bool
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr * inst)1119 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
1120 {
1121 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1122 return false;
1123
1124 switch (inst->alu.add.op) {
1125 case V3D_QPU_A_FADD:
1126 case V3D_QPU_A_FADDNF:
1127 case V3D_QPU_A_FSUB:
1128 case V3D_QPU_A_FMIN:
1129 case V3D_QPU_A_FMAX:
1130 case V3D_QPU_A_FCMP:
1131 case V3D_QPU_A_FROUND:
1132 case V3D_QPU_A_FTRUNC:
1133 case V3D_QPU_A_FFLOOR:
1134 case V3D_QPU_A_FCEIL:
1135 case V3D_QPU_A_FDX:
1136 case V3D_QPU_A_FDY:
1137 case V3D_QPU_A_FTOIN:
1138 case V3D_QPU_A_FTOIZ:
1139 case V3D_QPU_A_FTOUZ:
1140 case V3D_QPU_A_FTOC:
1141 case V3D_QPU_A_VFPACK:
1142 return true;
1143 break;
1144 default:
1145 break;
1146 }
1147
1148 switch (inst->alu.mul.op) {
1149 case V3D_QPU_M_FMOV:
1150 case V3D_QPU_M_FMUL:
1151 return true;
1152 break;
1153 default:
1154 break;
1155 }
1156
1157 return false;
1158 }
1159 bool
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr * inst)1160 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1161 {
1162 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1163 return false;
1164
1165 switch (inst->alu.add.op) {
1166 case V3D_QPU_A_VFMIN:
1167 case V3D_QPU_A_VFMAX:
1168 return true;
1169 break;
1170 default:
1171 break;
1172 }
1173
1174 switch (inst->alu.mul.op) {
1175 case V3D_QPU_M_VFMUL:
1176 return true;
1177 break;
1178 default:
1179 break;
1180 }
1181
1182 return false;
1183 }
1184
1185 bool
v3d_qpu_is_nop(struct v3d_qpu_instr * inst)1186 v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1187 {
1188 static const struct v3d_qpu_sig nosig = { 0 };
1189
1190 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1191 return false;
1192 if (inst->alu.add.op != V3D_QPU_A_NOP)
1193 return false;
1194 if (inst->alu.mul.op != V3D_QPU_M_NOP)
1195 return false;
1196 if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1197 return false;
1198 return true;
1199 }
1200