1 /*
2 * Copyright © 2017 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef _AFUC_H_
7 #define _AFUC_H_
8
9 #include <stdbool.h>
10
11 #include "util/macros.h"
12
13 /*
14 TODO kernel debugfs to inject packet into rb for easier experimentation. It
15 should trigger reloading pfp/me and resetting gpu..
16
17 Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs,
18 should be restricted to CAP_ADMIN and probably compile option too (default=n).
19 if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from
20 RB.
21 */
22
23 typedef enum {
24 OPC_NOP,
25
26 #define ALU(name) \
27 OPC_##name, \
28 OPC_##name##I,
29 ALU(ADD) /* add immediate */
30 ALU(ADDHI) /* add immediate (hi 32b of 64b) */
31 ALU(SUB) /* subtract immediate */
32 ALU(SUBHI) /* subtract immediate (hi 32b of 64b) */
33 ALU(AND) /* AND immediate */
34 ALU(OR) /* OR immediate */
35 ALU(XOR) /* XOR immediate */
36 ALU(NOT) /* bitwise not of immed (src1 ignored) */
37 ALU(SHL) /* shift-left immediate */
38 ALU(USHR) /* unsigned shift right by immediate */
39 ALU(ISHR) /* signed shift right by immediate */
40 ALU(ROT) /* rotate left (left shift with wrap-around) */
41 ALU(MUL8) /* 8bit multiply by immediate */
42 ALU(MIN)
43 ALU(MAX)
44 ALU(CMP) /* compare src to immed */
45 ALU(BIC) /* AND with second source negated */
46 OPC_SETBIT, /* Set or clear a bit dynamically */
47 OPC_MOVI, /* move immediate */
48 OPC_SETBITI, /* Set a bit */
49 OPC_CLRBIT, /* Clear a bit */
50 OPC_UBFX, /* Unsigned BitField eXtract */
51 OPC_BFI, /* BitField Insert */
52 #undef ALU
53
54 /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
55 * same as if src2 == 1). src1 is ignored. Note that this overlaps
56 * with STORE, so it can only be used with the two-source encoding.
57 */
58 OPC_MSB,
59
60 /* These seem something to do with setting some external state..
61 * doesn't seem to map *directly* to registers, but I guess that
62 * is where things end up. For example, this sequence in the
63 * CP_INDIRECT_BUFFER handler:
64 *
65 * mov $02, $data ; low 32b of IB target address
66 * mov $03, $data ; high 32b of IB target
67 * mov $04, $data ; IB size in dwords
68 * breq $04, 0x0, #l23 (#69, 04a2)
69 * and $05, $18, 0x0003
70 * shl $05, $05, 0x0002
71 * cwrite $02, [$05 + 0x0b0], 0x8
72 * cwrite $03, [$05 + 0x0b1], 0x8
73 * cwrite $04, [$05 + 0x0b2], 0x8
74 *
75 * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
76 * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value
77 * for RB->IB1 vs IB1->IB2.
78 */
79 OPC_CWRITE,
80 OPC_CREAD,
81
82 /* A6xx added new opcodes that let you read/write directly to memory (and
83 * bypass the IOMMU?).
84 */
85 OPC_STORE,
86 OPC_LOAD,
87
88 /* A6xx added new opcodes that let you read/write the state of the
89 * SQE processor itself, like the call stack. This is mostly used by
90 * preemption but is also used to set the preempt routine entrypoint.
91 */
92 OPC_SREAD,
93 OPC_SWRITE,
94
95 OPC_BRNEI, /* relative branch (if $src != immed) */
96 OPC_BREQI, /* relative branch (if $src == immed) */
97 OPC_BRNEB, /* relative branch (if bit not set) */
98 OPC_BREQB, /* relative branch (if bit is set) */
99 OPC_RET, /* return */
100 OPC_IRET, /* return from preemption interrupt handler */
101 OPC_CALL, /* "function" call */
102 OPC_WAITIN, /* wait for input (ie. wait for WPTR to advance) */
103 OPC_BL, /* Branch and Link (same as the MIPS/ARM instruction) */
104 OPC_SETSECURE, /* switch secure mode on/off */
105 OPC_JUMPR, /* indirect jump with a register offset */
106 OPC_SRET, /* Return instruction to use with "bl" */
107 OPC_JUMPA, /* Absolute jump instruction */
108
109 /* pseudo-opcodes without an actual encoding */
110 OPC_BREQ,
111 OPC_BRNE,
112 OPC_JUMP,
113 OPC_RAW_LITERAL,
114 OPC_JUMPTBL,
115 } afuc_opc;
116
117 /**
118 * Special GPR registers:
119 *
120 * Notes: (applicable to a6xx, double check a5xx)
121 *
122 * 0x1a:
123 * $sp
124 * 0x1b:
125 * $lr: written by bl
126 * 0x1d:
127 * $addr: writes configure GPU reg address to read/write
128 * (does not respect CP_PROTECT)
129 * $memdata: reads from FIFO filled based on MEM_READ_DWORDS/
130 * MEM_READ_ADDR
131 * 0x1e: (note different mnemonic for src vs dst)
132 * $usraddr: writes configure GPU reg address to read/write,
133 * respecting CP_PROTECT
134 * $regdata: reads from FIFO filled based on REG_READ_DWORDS/
135 * REG_READ_ADDR
136 * 0x1f:
137 * $data: reads from from pm4 input stream
138 * $data: writes to stream configured by write to $addr
139 * or $usraddr
140 */
141 typedef enum {
142 REG_SP = 0x1a,
143 REG_LR = 0x1b,
144 REG_REM = 0x1c,
145 REG_MEMDATA = 0x1d, /* when used as src */
146 REG_ADDR = 0x1d, /* when used as dst */
147 REG_REGDATA = 0x1e, /* when used as src */
148 REG_USRADDR = 0x1e, /* when used as dst */
149 REG_DATA = 0x1f,
150 } afuc_reg;
151
152 struct afuc_instr {
153 afuc_opc opc;
154
155 uint8_t dst;
156 uint8_t src1;
157 uint8_t src2;
158 uint32_t immed;
159 uint8_t shift;
160 uint8_t bit;
161 uint8_t xmov;
162 uint8_t sds;
163 uint32_t literal;
164 int offset;
165 const char *label;
166
167 bool has_immed : 1;
168 bool has_shift : 1;
169 bool has_bit : 1;
170 bool is_literal : 1;
171 bool rep : 1;
172 bool preincrement : 1;
173 bool peek : 1;
174 };
175
176 /* Literal offsets are sometimes encoded as NOP instructions, which on a6xx+
177 * must have a high 8 bits of 0x01.
178 */
179 static inline uint32_t
afuc_nop_literal(uint32_t x,unsigned gpuver)180 afuc_nop_literal(uint32_t x, unsigned gpuver)
181 {
182 assert((x >> 24) == 0);
183 return gpuver < 6 ? x : x | (1 << 24);
184 }
185
186 void print_control_reg(uint32_t id);
187 void print_sqe_reg(uint32_t id);
188 void print_pipe_reg(uint32_t id);
189
190 #endif /* _AFUC_H_ */
191