1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2022 Google, Inc.
3*61046927SAndroid Build Coastguard Worker * Copyright © 2022 Valve Corporation
4*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker */
6*61046927SAndroid Build Coastguard Worker
7*61046927SAndroid Build Coastguard Worker #include "util/macros.h"
8*61046927SAndroid Build Coastguard Worker #include "crashdec.h"
9*61046927SAndroid Build Coastguard Worker #include "cffdec.h"
10*61046927SAndroid Build Coastguard Worker
11*61046927SAndroid Build Coastguard Worker #define MAX_PREFETCH_IBS 4
12*61046927SAndroid Build Coastguard Worker
13*61046927SAndroid Build Coastguard Worker /* CP_INDIRECT_BUFFER contains an optimization to read ahead and start
14*61046927SAndroid Build Coastguard Worker * fetching up to 3 subsequent CP_INDIRECT_BUFFER contents into the ROQ before
15*61046927SAndroid Build Coastguard Worker * starting to execute the current IB. This effectively combines them into one
16*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER. The result is that if the ROQ is fast enough and
17*61046927SAndroid Build Coastguard Worker * prefetches some of the extra IBs before the first IB finishes, the ROQ may
18*61046927SAndroid Build Coastguard Worker * be in a different IB than the CP is processing. That is, normally we'd have
19*61046927SAndroid Build Coastguard Worker * a situation like this:
20*61046927SAndroid Build Coastguard Worker *
21*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER
22*61046927SAndroid Build Coastguard Worker * ...
23*61046927SAndroid Build Coastguard Worker * CP_FOO <- PFP/SQE is reading from here
24*61046927SAndroid Build Coastguard Worker * ...
25*61046927SAndroid Build Coastguard Worker * CP_BAR <- ROQ has prefetched up to here
26*61046927SAndroid Build Coastguard Worker *
27*61046927SAndroid Build Coastguard Worker * where CP_IB*_BASE and CP_IB*_REM_SIZE point to CP_BAR and the difference
28*61046927SAndroid Build Coastguard Worker * between CP_FOO and CP_BAR is given by CP_ROQ_AVAIL_IBn::REM, but instead we
29*61046927SAndroid Build Coastguard Worker * may get a situation like this:
30*61046927SAndroid Build Coastguard Worker *
31*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER
32*61046927SAndroid Build Coastguard Worker * ...
33*61046927SAndroid Build Coastguard Worker * CP_FOO <- PFP/SQE is reading here
34*61046927SAndroid Build Coastguard Worker * ...
35*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER
36*61046927SAndroid Build Coastguard Worker * ...
37*61046927SAndroid Build Coastguard Worker * CP_BAR <- ROQ has prefetched up to here
38*61046927SAndroid Build Coastguard Worker *
39*61046927SAndroid Build Coastguard Worker * in this case, the "rem" we get with CP_ROQ_AVAIL_IBn::REM added will be
40*61046927SAndroid Build Coastguard Worker * larger than the size of the second IB, indicating that we need to back up
41*61046927SAndroid Build Coastguard Worker * to the IB before it. This can theoretically even happen recursively with
42*61046927SAndroid Build Coastguard Worker * IB2:
43*61046927SAndroid Build Coastguard Worker *
44*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER:
45*61046927SAndroid Build Coastguard Worker * ...
46*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER:
47*61046927SAndroid Build Coastguard Worker * ...
48*61046927SAndroid Build Coastguard Worker * CP_FOO <- PFP/SQE IB2 is reading here
49*61046927SAndroid Build Coastguard Worker * ...
50*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER:
51*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER:
52*61046927SAndroid Build Coastguard Worker * ...
53*61046927SAndroid Build Coastguard Worker * CP_BAR <- ROQ IB2 has prefetched up to here
54*61046927SAndroid Build Coastguard Worker * ...
55*61046927SAndroid Build Coastguard Worker * CP_BAZ <- PFP/SQE IB1 is reading here
56*61046927SAndroid Build Coastguard Worker *
57*61046927SAndroid Build Coastguard Worker * Here the ROQ has prefetched the second IB1, then when processing the IB2 at
58*61046927SAndroid Build Coastguard Worker * the end of the first IB1 it peeks ahead in ROQ and sees another IB2 right
59*61046927SAndroid Build Coastguard Worker * afterward in the second IB1 and starts prefetching that too, so that the
60*61046927SAndroid Build Coastguard Worker * ROQ is in a different IB1 *and* IB2 from the CP.
61*61046927SAndroid Build Coastguard Worker *
62*61046927SAndroid Build Coastguard Worker * To account for this when locating the position that the SQE was at in the
63*61046927SAndroid Build Coastguard Worker * cmdstream at the time of the crash, we do a pre-pass scanning the
64*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER packets, keeping a history of previous IB's so that we
65*61046927SAndroid Build Coastguard Worker * can backtrack (because CP_IBn_BASE can be several IB's ahead of SQE). Once
66*61046927SAndroid Build Coastguard Worker * we find the IB1 position that is being read into ROQ, we backtrack until
67*61046927SAndroid Build Coastguard Worker * we find the IB1 position that SQE is at, and (roughly) repeat the process
68*61046927SAndroid Build Coastguard Worker * in IB2. This has one calculation in that we need to start scanning for the
69*61046927SAndroid Build Coastguard Worker * CP_INDIRECT_BUFFER to IB2 from before the detected IB1 position.
70*61046927SAndroid Build Coastguard Worker */
71*61046927SAndroid Build Coastguard Worker
72*61046927SAndroid Build Coastguard Worker struct ib {
73*61046927SAndroid Build Coastguard Worker uint64_t ibaddr;
74*61046927SAndroid Build Coastguard Worker uint32_t ibsize;
75*61046927SAndroid Build Coastguard Worker };
76*61046927SAndroid Build Coastguard Worker
77*61046927SAndroid Build Coastguard Worker struct prefetch_state {
78*61046927SAndroid Build Coastguard Worker struct ib history[MAX_PREFETCH_IBS];
79*61046927SAndroid Build Coastguard Worker unsigned num, next;
80*61046927SAndroid Build Coastguard Worker };
81*61046927SAndroid Build Coastguard Worker
82*61046927SAndroid Build Coastguard Worker static void
push_ib(struct prefetch_state * s,struct ib * ib)83*61046927SAndroid Build Coastguard Worker push_ib(struct prefetch_state *s, struct ib *ib)
84*61046927SAndroid Build Coastguard Worker {
85*61046927SAndroid Build Coastguard Worker s->history[s->next++ % ARRAY_SIZE(s->history)] = *ib;
86*61046927SAndroid Build Coastguard Worker s->num = MIN2(s->num + 1, ARRAY_SIZE(s->history));
87*61046927SAndroid Build Coastguard Worker }
88*61046927SAndroid Build Coastguard Worker
89*61046927SAndroid Build Coastguard Worker static struct ib *
get_ib(struct prefetch_state * s,int n)90*61046927SAndroid Build Coastguard Worker get_ib(struct prefetch_state *s, int n)
91*61046927SAndroid Build Coastguard Worker {
92*61046927SAndroid Build Coastguard Worker if ((n >= s->num) || (n < 0))
93*61046927SAndroid Build Coastguard Worker return NULL;
94*61046927SAndroid Build Coastguard Worker int idx = s->next - (s->num - n);
95*61046927SAndroid Build Coastguard Worker return &s->history[idx % ARRAY_SIZE(s->history)];
96*61046927SAndroid Build Coastguard Worker }
97*61046927SAndroid Build Coastguard Worker
98*61046927SAndroid Build Coastguard Worker static void
reset_state(struct prefetch_state * s)99*61046927SAndroid Build Coastguard Worker reset_state(struct prefetch_state *s)
100*61046927SAndroid Build Coastguard Worker {
101*61046927SAndroid Build Coastguard Worker s->num = s->next = 0;
102*61046927SAndroid Build Coastguard Worker }
103*61046927SAndroid Build Coastguard Worker
104*61046927SAndroid Build Coastguard Worker /**
105*61046927SAndroid Build Coastguard Worker * Once we find the ROQ prefetch position, work backwards to find the SQE
106*61046927SAndroid Build Coastguard Worker * position.
107*61046927SAndroid Build Coastguard Worker */
108*61046927SAndroid Build Coastguard Worker static struct ib *
reverse_prefetch(struct prefetch_state * s,int lvl)109*61046927SAndroid Build Coastguard Worker reverse_prefetch(struct prefetch_state *s, int lvl)
110*61046927SAndroid Build Coastguard Worker {
111*61046927SAndroid Build Coastguard Worker unsigned rem = options.ibs[lvl].rem;
112*61046927SAndroid Build Coastguard Worker
113*61046927SAndroid Build Coastguard Worker for (int n = s->num - 1; n >= 0; n--) {
114*61046927SAndroid Build Coastguard Worker struct ib *ib = get_ib(s, n);
115*61046927SAndroid Build Coastguard Worker if (ib->ibsize > rem) {
116*61046927SAndroid Build Coastguard Worker options.ibs[lvl].crash_found = 1;
117*61046927SAndroid Build Coastguard Worker options.ibs[lvl].base = ib->ibaddr;
118*61046927SAndroid Build Coastguard Worker options.ibs[lvl].rem = rem;
119*61046927SAndroid Build Coastguard Worker
120*61046927SAndroid Build Coastguard Worker return ib;
121*61046927SAndroid Build Coastguard Worker }
122*61046927SAndroid Build Coastguard Worker rem -= ib->ibsize;
123*61046927SAndroid Build Coastguard Worker }
124*61046927SAndroid Build Coastguard Worker
125*61046927SAndroid Build Coastguard Worker return NULL;
126*61046927SAndroid Build Coastguard Worker }
127*61046927SAndroid Build Coastguard Worker
128*61046927SAndroid Build Coastguard Worker /**
129*61046927SAndroid Build Coastguard Worker * Scan cmdstream looking for CP_INDIRECT_BUFFER packets, tracking history
130*61046927SAndroid Build Coastguard Worker * of consecutive CP_INDIRECT_BUFFER packets, until we find the one that
131*61046927SAndroid Build Coastguard Worker * matches CP_IBn_BASE.
132*61046927SAndroid Build Coastguard Worker */
133*61046927SAndroid Build Coastguard Worker static struct ib *
scan_cmdstream(struct prefetch_state * s,int lvl,uint32_t * dwords,uint32_t sizedwords)134*61046927SAndroid Build Coastguard Worker scan_cmdstream(struct prefetch_state *s, int lvl, uint32_t *dwords, uint32_t sizedwords)
135*61046927SAndroid Build Coastguard Worker {
136*61046927SAndroid Build Coastguard Worker int dwords_left = sizedwords;
137*61046927SAndroid Build Coastguard Worker uint32_t count = 0; /* dword count including packet header */
138*61046927SAndroid Build Coastguard Worker uint32_t val;
139*61046927SAndroid Build Coastguard Worker
140*61046927SAndroid Build Coastguard Worker while (dwords_left > 0) {
141*61046927SAndroid Build Coastguard Worker if (pkt_is_opcode(dwords[0], &val, &count)) {
142*61046927SAndroid Build Coastguard Worker if (!strcmp(pktname(val), "CP_INDIRECT_BUFFER")) {
143*61046927SAndroid Build Coastguard Worker uint64_t ibaddr;
144*61046927SAndroid Build Coastguard Worker uint32_t ibsize;
145*61046927SAndroid Build Coastguard Worker
146*61046927SAndroid Build Coastguard Worker parse_cp_indirect(&dwords[1], count - 1, &ibaddr, &ibsize);
147*61046927SAndroid Build Coastguard Worker push_ib(s, &(struct ib){ ibaddr, ibsize });
148*61046927SAndroid Build Coastguard Worker
149*61046927SAndroid Build Coastguard Worker /* If we've found the IB indicated by CP_IBn_BASE, then we can
150*61046927SAndroid Build Coastguard Worker * search backwards from here to find the SQE position:
151*61046927SAndroid Build Coastguard Worker */
152*61046927SAndroid Build Coastguard Worker if (ibaddr == options.ibs[lvl].base)
153*61046927SAndroid Build Coastguard Worker return reverse_prefetch(s, lvl);
154*61046927SAndroid Build Coastguard Worker
155*61046927SAndroid Build Coastguard Worker goto next_pkt;
156*61046927SAndroid Build Coastguard Worker }
157*61046927SAndroid Build Coastguard Worker } else if (pkt_is_regwrite(dwords[0], &val, &count)) {
158*61046927SAndroid Build Coastguard Worker } else {
159*61046927SAndroid Build Coastguard Worker count = find_next_packet(dwords, dwords_left);
160*61046927SAndroid Build Coastguard Worker }
161*61046927SAndroid Build Coastguard Worker
162*61046927SAndroid Build Coastguard Worker /* prefetch only happens across consecutive CP_INDIRECT_BUFFER, so
163*61046927SAndroid Build Coastguard Worker * any other packet resets the state:
164*61046927SAndroid Build Coastguard Worker */
165*61046927SAndroid Build Coastguard Worker reset_state(s);
166*61046927SAndroid Build Coastguard Worker
167*61046927SAndroid Build Coastguard Worker next_pkt:
168*61046927SAndroid Build Coastguard Worker dwords += count;
169*61046927SAndroid Build Coastguard Worker dwords_left -= count;
170*61046927SAndroid Build Coastguard Worker }
171*61046927SAndroid Build Coastguard Worker
172*61046927SAndroid Build Coastguard Worker return NULL;
173*61046927SAndroid Build Coastguard Worker }
174*61046927SAndroid Build Coastguard Worker
175*61046927SAndroid Build Coastguard Worker void
handle_prefetch(uint32_t * dwords,uint32_t sizedwords)176*61046927SAndroid Build Coastguard Worker handle_prefetch(uint32_t *dwords, uint32_t sizedwords)
177*61046927SAndroid Build Coastguard Worker {
178*61046927SAndroid Build Coastguard Worker struct prefetch_state rb_state = {};
179*61046927SAndroid Build Coastguard Worker struct ib *ib1 = scan_cmdstream(&rb_state, 1, dwords, sizedwords);
180*61046927SAndroid Build Coastguard Worker
181*61046927SAndroid Build Coastguard Worker if (!ib1)
182*61046927SAndroid Build Coastguard Worker return;
183*61046927SAndroid Build Coastguard Worker
184*61046927SAndroid Build Coastguard Worker /* If the gpu crashed in IB1, we can skip the rest: */
185*61046927SAndroid Build Coastguard Worker if (!options.ibs[2].rem)
186*61046927SAndroid Build Coastguard Worker return;
187*61046927SAndroid Build Coastguard Worker
188*61046927SAndroid Build Coastguard Worker struct prefetch_state ib1_state = {};
189*61046927SAndroid Build Coastguard Worker
190*61046927SAndroid Build Coastguard Worker /* Once we find the actual IB1 position, we need to find the IB2 position.
191*61046927SAndroid Build Coastguard Worker * But because IB2 prefetch can span IB1 CP_INDIRECT_BUFFER targets. But
192*61046927SAndroid Build Coastguard Worker * there are a limited # of buffers that can be prefetched, and we already
193*61046927SAndroid Build Coastguard Worker * have a history of enough RB->IB1 IB's, so we can simply scan forward
194*61046927SAndroid Build Coastguard Worker * from our oldest history entry until we find the IB2 match..
195*61046927SAndroid Build Coastguard Worker */
196*61046927SAndroid Build Coastguard Worker for (int n = 0; n < rb_state.num; n++) {
197*61046927SAndroid Build Coastguard Worker struct ib *ib = get_ib(&rb_state, n);
198*61046927SAndroid Build Coastguard Worker uint32_t *ibaddr = hostptr(ib->ibaddr);
199*61046927SAndroid Build Coastguard Worker if (!ibaddr)
200*61046927SAndroid Build Coastguard Worker break;
201*61046927SAndroid Build Coastguard Worker struct ib *ib2 = scan_cmdstream(&ib1_state, 2, ibaddr, ib->ibsize);
202*61046927SAndroid Build Coastguard Worker
203*61046927SAndroid Build Coastguard Worker /* If the crash happens in IB2, but IB1 has a sequence of CP_INDIRECT_BUFFER's
204*61046927SAndroid Build Coastguard Worker * then IB1 could actually be further ahead than IB2, ie:
205*61046927SAndroid Build Coastguard Worker *
206*61046927SAndroid Build Coastguard Worker * IB1:CP_INDIRECT_BUFFER
207*61046927SAndroid Build Coastguard Worker * IB2: .. crash somewhere in here ..
208*61046927SAndroid Build Coastguard Worker * IB1:CP_INDIRECT_BUFFER
209*61046927SAndroid Build Coastguard Worker * IB1:CP_INDIRECT_BUFFER <-- detected IB1 position
210*61046927SAndroid Build Coastguard Worker *
211*61046927SAndroid Build Coastguard Worker * Our logic for detecting the correct IB1 position is not incorrect.
212*61046927SAndroid Build Coastguard Worker * It is just that SQE has already consumed some additional IB's. So
213*61046927SAndroid Build Coastguard Worker * reset the IB1 crash position back to the oldest RB->IB1 IB that we
214*61046927SAndroid Build Coastguard Worker * remember.
215*61046927SAndroid Build Coastguard Worker *
216*61046927SAndroid Build Coastguard Worker * This isn't *quite* correct, but cffdec will only mark the crash when
217*61046927SAndroid Build Coastguard Worker * it finds the location in IB2 if we've determined that the crash is
218*61046927SAndroid Build Coastguard Worker * in IB2, but will only consider the address in IB2 if it has seen the
219*61046927SAndroid Build Coastguard Worker * IB1 base.
220*61046927SAndroid Build Coastguard Worker *
221*61046927SAndroid Build Coastguard Worker * The main case we are trying to account for here is GMEM mode crash in
222*61046927SAndroid Build Coastguard Worker * IB2 which *isn't* the first bin/tile. Ie. the crash happens later
223*61046927SAndroid Build Coastguard Worker * than the first time we encounter the IB2 crash address.
224*61046927SAndroid Build Coastguard Worker *
225*61046927SAndroid Build Coastguard Worker * This approach works in practice because there will be some other pkts
226*61046927SAndroid Build Coastguard Worker * in IB1 to setup for the next tile, breaking up prefetch.
227*61046927SAndroid Build Coastguard Worker */
228*61046927SAndroid Build Coastguard Worker if (ib2) {
229*61046927SAndroid Build Coastguard Worker assert(options.ibs[2].crash_found);
230*61046927SAndroid Build Coastguard Worker struct ib *first_rb_ib = get_ib(&rb_state, 0);
231*61046927SAndroid Build Coastguard Worker
232*61046927SAndroid Build Coastguard Worker options.ibs[1].base = first_rb_ib->ibaddr;
233*61046927SAndroid Build Coastguard Worker options.ibs[1].rem = first_rb_ib->ibsize;
234*61046927SAndroid Build Coastguard Worker
235*61046927SAndroid Build Coastguard Worker break;
236*61046927SAndroid Build Coastguard Worker }
237*61046927SAndroid Build Coastguard Worker
238*61046927SAndroid Build Coastguard Worker if (ib == ib1)
239*61046927SAndroid Build Coastguard Worker break;
240*61046927SAndroid Build Coastguard Worker }
241*61046927SAndroid Build Coastguard Worker }
242