1 /*
2 * Copyright © 2022 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_cs.h"
7
8 #include <arpa/inet.h>
9 #include <netinet/in.h>
10 #include <sys/socket.h>
11
12 #include "tu_device.h"
13
14 /* A simple implementations of breadcrumbs tracking of GPU progress
15 * intended to be a last resort when debugging unrecoverable hangs.
16 *
17 * This implementation aims to handle cases where we cannot do anything
18 * after the hang, which is achieved by:
19 * - On GPU after each breadcrumb we wait until CPU acks it and sends udp
20 * packet to the remote host;
21 * - At specified breadcrumb require explicit user input to continue
22 * execution up to the next breadcrumb.
23 *
24 * For usage see freedreno.rst
25 */
26
27 struct breadcrumbs_context
28 {
29 char remote_host[64];
30 int remote_port;
31 uint32_t breadcrumb_breakpoint;
32 uint32_t breadcrumb_breakpoint_hits;
33
34 bool thread_stop;
35 pthread_t breadcrumbs_thread;
36
37 struct tu_device *device;
38
39 uint32_t breadcrumb_idx;
40 };
41
42 static void *
sync_gpu_with_cpu(void * _job)43 sync_gpu_with_cpu(void *_job)
44 {
45 struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job;
46 struct tu6_global *global = ctx->device->global_bo_map;
47 uint32_t last_breadcrumb = 0;
48 uint32_t breakpoint_hits = 0;
49
50 int s = socket(AF_INET, SOCK_DGRAM, 0);
51
52 if (s < 0) {
53 mesa_loge("TU_BREADCRUMBS: Error while creating socket");
54 return NULL;
55 }
56
57 struct sockaddr_in to_addr;
58 to_addr.sin_family = AF_INET;
59 to_addr.sin_port = htons(ctx->remote_port);
60 to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host);
61
62 /* Run until we know that no more work would be submitted,
63 * because each breadcrumb requires an ack from cpu side and without
64 * the ack GPU would timeout.
65 */
66 while (!ctx->thread_stop) {
67 uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno;
68
69 if (current_breadcrumb != last_breadcrumb) {
70 last_breadcrumb = current_breadcrumb;
71
72 uint32_t data = htonl(last_breadcrumb);
73 if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr,
74 sizeof(to_addr)) < 0) {
75 mesa_loge("TU_BREADCRUMBS: sendto failed");
76 goto fail;
77 }
78
79 if (last_breadcrumb >= ctx->breadcrumb_breakpoint &&
80 breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) {
81 printf("GPU is on breadcrumb %d, continue?", last_breadcrumb);
82 while (getchar() != 'y')
83 ;
84 }
85
86 if (ctx->breadcrumb_breakpoint == last_breadcrumb)
87 breakpoint_hits++;
88
89 /* ack that we received the value */
90 global->breadcrumb_cpu_sync_seqno = last_breadcrumb;
91 }
92 }
93
94 fail:
95 close(s);
96
97 return NULL;
98 }
99
100 /* Same as tu_cs_emit_pkt7 but without instrumentation */
101 static inline void
emit_pkt7(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)102 emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
103 {
104 tu_cs_reserve(cs, cnt + 1);
105 tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
106 }
107
108 void
tu_breadcrumbs_init(struct tu_device * device)109 tu_breadcrumbs_init(struct tu_device *device)
110 {
111 const char *breadcrumbs_opt = NULL;
112 #ifdef TU_BREADCRUMBS_ENABLED
113 breadcrumbs_opt = os_get_option("TU_BREADCRUMBS");
114 #endif
115
116 device->breadcrumbs_ctx = NULL;
117 if (!breadcrumbs_opt) {
118 return;
119 }
120
121 struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) malloc(
122 sizeof(struct breadcrumbs_context));
123 ctx->device = device;
124 ctx->breadcrumb_idx = 0;
125 ctx->thread_stop = false;
126
127 if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host,
128 &ctx->remote_port, &ctx->breadcrumb_breakpoint,
129 &ctx->breadcrumb_breakpoint_hits) != 4) {
130 free(ctx);
131 mesa_loge("Wrong TU_BREADCRUMBS value");
132 return;
133 }
134
135 device->breadcrumbs_ctx = ctx;
136
137 struct tu6_global *global = device->global_bo_map;
138 global->breadcrumb_cpu_sync_seqno = 0;
139 global->breadcrumb_gpu_sync_seqno = 0;
140
141 pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx);
142 }
143
144 void
tu_breadcrumbs_finish(struct tu_device * device)145 tu_breadcrumbs_finish(struct tu_device *device)
146 {
147 struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
148 if (!ctx || ctx->thread_stop)
149 return;
150
151 ctx->thread_stop = true;
152 pthread_join(ctx->breadcrumbs_thread, NULL);
153
154 free(ctx);
155 }
156
157 void
tu_cs_emit_sync_breadcrumb(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)158 tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
159 {
160 /* TODO: we may run out of space if we add breadcrumbs
161 * to non-growable CS.
162 */
163 if (cs->mode != TU_CS_MODE_GROW)
164 return;
165
166 struct tu_device *device = cs->device;
167 struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
168 if (!ctx || ctx->thread_stop)
169 return;
170
171 bool before_packet = (cnt != 0);
172
173 if (before_packet) {
174 switch (opcode) {
175 case CP_EXEC_CS_INDIRECT:
176 case CP_EXEC_CS:
177 case CP_DRAW_INDX:
178 case CP_DRAW_INDX_OFFSET:
179 case CP_DRAW_INDIRECT:
180 case CP_DRAW_INDX_INDIRECT:
181 case CP_DRAW_INDIRECT_MULTI:
182 case CP_DRAW_AUTO:
183 case CP_BLIT:
184 // case CP_SET_DRAW_STATE:
185 // case CP_LOAD_STATE6_FRAG:
186 // case CP_LOAD_STATE6_GEOM:
187 break;
188 default:
189 return;
190 };
191 } else {
192 assert(cs->breadcrumb_emit_after == 0);
193 }
194
195 uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx);
196
197 if (ctx->breadcrumb_breakpoint != -1 &&
198 current_breadcrumb < ctx->breadcrumb_breakpoint)
199 return;
200
201 emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
202 emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
203 emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
204
205 emit_pkt7(cs, CP_MEM_WRITE, 3);
206 tu_cs_emit_qw(
207 cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno));
208 tu_cs_emit(cs, current_breadcrumb);
209
210 /* Wait until CPU acknowledges the value written by GPU */
211 emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
212 tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
213 CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
214 tu_cs_emit_qw(
215 cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno));
216 tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb));
217 tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0));
218 tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
219
220 if (before_packet)
221 cs->breadcrumb_emit_after = cnt;
222 }
223