1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <[email protected]>
4 * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <[email protected]>
5 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 */
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/types.h>
10 #include <dirent.h>
11 #include <limits.h>
12 #include <unistd.h>
13 #include <errno.h>
14
15 #include "trace-local.h"
16 #include "trace-msg.h"
17
18 static struct trace_guest *guests;
19 static size_t guests_len;
20
get_guest_by_cid(unsigned int guest_cid)21 static struct trace_guest *get_guest_by_cid(unsigned int guest_cid)
22 {
23 int i;
24
25 if (!guests)
26 return NULL;
27
28 for (i = 0; i < guests_len; i++)
29 if (guest_cid == guests[i].cid)
30 return guests + i;
31 return NULL;
32 }
33
get_guest_by_name(const char * name)34 static struct trace_guest *get_guest_by_name(const char *name)
35 {
36 int i;
37
38 if (!guests)
39 return NULL;
40
41 for (i = 0; i < guests_len; i++)
42 if (strcmp(name, guests[i].name) == 0)
43 return guests + i;
44 return NULL;
45 }
46
trace_have_guests_pid(void)47 bool trace_have_guests_pid(void)
48 {
49 for (int i = 0; i < guests_len; i++) {
50 if (guests[i].pid < 0)
51 return false;
52 }
53
54 return true;
55 }
56
add_guest(unsigned int cid,const char * name)57 static struct trace_guest *add_guest(unsigned int cid, const char *name)
58 {
59 guests = realloc(guests, (guests_len + 1) * sizeof(*guests));
60 if (!guests)
61 die("allocating new guest");
62 memset(&guests[guests_len], 0, sizeof(struct trace_guest));
63 guests[guests_len].name = strdup(name);
64 if (!guests[guests_len].name)
65 die("allocating guest name");
66 guests[guests_len].cid = cid;
67 guests[guests_len].pid = -1;
68 guests_len++;
69
70 return &guests[guests_len - 1];
71 }
72
start_trace_connect(void)73 static struct tracefs_instance *start_trace_connect(void)
74 {
75 struct tracefs_instance *open_instance;
76
77 open_instance = tracefs_instance_create("vsock_find_pid");
78 if (!open_instance)
79 return NULL;
80
81 tracefs_event_enable(open_instance, "sched", "sched_waking");
82 tracefs_event_enable(open_instance, "kvm", "kvm_exit");
83 tracefs_trace_on(open_instance);
84 return open_instance;
85 }
86
87 struct pids {
88 struct pids *next;
89 int pid;
90 };
91
92 struct trace_fields {
93 struct tep_event *sched_waking;
94 struct tep_event *kvm_exit;
95 struct tep_format_field *common_pid;
96 struct tep_format_field *sched_next;
97 struct pids *pids;
98 int found_pid;
99 };
100
free_pids(struct pids * pids)101 static void free_pids(struct pids *pids)
102 {
103 struct pids *next;
104
105 while (pids) {
106 next = pids;
107 pids = pids->next;
108 free(next);
109 }
110 }
111
add_pid(struct pids ** pids,int pid)112 static void add_pid(struct pids **pids, int pid)
113 {
114 struct pids *new_pid;
115
116 new_pid = malloc(sizeof(*new_pid));
117 if (!new_pid)
118 return;
119
120 new_pid->pid = pid;
121 new_pid->next = *pids;
122 *pids = new_pid;
123 }
124
match_pid(struct pids * pids,int pid)125 static bool match_pid(struct pids *pids, int pid)
126 {
127 while (pids) {
128 if (pids->pid == pid)
129 return true;
130 pids = pids->next;
131 }
132 return false;
133 }
134
callback(struct tep_event * event,struct tep_record * record,int cpu,void * data)135 static int callback(struct tep_event *event, struct tep_record *record, int cpu,
136 void *data)
137 {
138 struct trace_fields *fields = data;
139 struct tep_handle *tep = event->tep;
140 unsigned long long val;
141 int flags;
142 int type;
143 int pid;
144 int ret;
145
146 ret = tep_read_number_field(fields->common_pid, record->data, &val);
147 if (ret < 0)
148 return 0;
149
150 flags = tep_data_flags(tep, record);
151
152 /* Ignore events in interrupts */
153 if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
154 return 0;
155
156 /*
157 * First make sure that this event comes from a PID from
158 * this task (or a task woken by this task)
159 */
160 pid = val;
161 if (!match_pid(fields->pids, pid))
162 return 0;
163
164 type = tep_data_type(tep, record);
165
166 /*
167 * If this event is a kvm_exit, we have our PID
168 * and we can stop processing.
169 */
170 if (type == fields->kvm_exit->id) {
171 fields->found_pid = pid;
172 return -1;
173 }
174
175 if (type != fields->sched_waking->id)
176 return 0;
177
178 ret = tep_read_number_field(fields->sched_next, record->data, &val);
179 if (ret < 0)
180 return 0;
181
182 /* This is a task woken by our task or a chain of wake ups */
183 add_pid(&fields->pids, (int)val);
184 return 0;
185 }
186
find_tgid(int pid)187 static int find_tgid(int pid)
188 {
189 FILE *fp;
190 char *path;
191 char *buf = NULL;
192 char *save;
193 size_t l = 0;
194 int tgid = -1;
195
196 if (asprintf(&path, "/proc/%d/status", pid) < 0)
197 return -1;
198
199 fp = fopen(path, "r");
200 free(path);
201 if (!fp)
202 return -1;
203
204 while (getline(&buf, &l, fp) > 0) {
205 char *tok;
206
207 if (strncmp(buf, "Tgid:", 5) != 0)
208 continue;
209 tok = strtok_r(buf, ":", &save);
210 if (!tok)
211 continue;
212 tok = strtok_r(NULL, ":", &save);
213 if (!tok)
214 continue;
215 while (isspace(*tok))
216 tok++;
217 tgid = strtol(tok, NULL, 0);
218 break;
219 }
220 free(buf);
221 fclose(fp);
222
223 return tgid;
224 }
225
stop_trace_connect(struct tracefs_instance * open_instance)226 static int stop_trace_connect(struct tracefs_instance *open_instance)
227 {
228 const char *systems[] = { "kvm", "sched", NULL};
229 struct tep_handle *tep;
230 struct trace_fields trace_fields;
231 int tgid = -1;
232
233 if (!open_instance)
234 return -1;
235
236 /* The connection is finished, stop tracing, we have what we want */
237 tracefs_trace_off(open_instance);
238 tracefs_event_disable(open_instance, NULL, NULL);
239
240 tep = tracefs_local_events_system(NULL, systems);
241
242 trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
243 if (!trace_fields.sched_waking)
244 goto out;
245 trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
246 if (!trace_fields.kvm_exit)
247 goto out;
248 trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
249 "common_pid");
250 if (!trace_fields.common_pid)
251 goto out;
252 trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
253 "pid");
254 if (!trace_fields.sched_next)
255 goto out;
256
257 trace_fields.found_pid = -1;
258 trace_fields.pids = NULL;
259 add_pid(&trace_fields.pids, getpid());
260 tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
261 free_pids(trace_fields.pids);
262 out:
263 tracefs_instance_destroy(open_instance);
264 tracefs_instance_free(open_instance);
265
266 if (trace_fields.found_pid > 0)
267 tgid = find_tgid(trace_fields.found_pid);
268
269 return tgid;
270 }
271
272 /*
273 * In order to find the guest that is associated to the given cid,
274 * trace the sched_waking and kvm_exit events, connect to the cid
275 * (doesn't matter what port, use -1 to not connect to anything)
276 * and find what task gets woken up from this code and calls kvm_exit,
277 * then that is the task that is running the guest.
278 * Then look at the /proc/<guest-pid>/status file to find the task group
279 * id (Tgid), and this is the PID of the task running all the threads.
280 */
find_pid_by_cid(struct trace_guest * guest)281 static void find_pid_by_cid(struct trace_guest *guest)
282 {
283 struct tracefs_instance *instance;
284 int fd;
285
286 instance = start_trace_connect();
287 fd = trace_vsock_open(guest->cid, -1);
288 guest->pid = stop_trace_connect(instance);
289 /* Just in case! */
290 if (fd >= 0)
291 close(fd);
292 }
293
trace_get_guest(unsigned int cid,const char * name)294 struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
295 {
296 struct trace_guest *guest = NULL;
297
298 if (name) {
299 guest = get_guest_by_name(name);
300 if (guest)
301 return guest;
302 }
303
304 if (cid > 0) {
305 guest = get_guest_by_cid(cid);
306 if (!guest && name) {
307 guest = add_guest(cid, name);
308 if (guest)
309 find_pid_by_cid(guest);
310 }
311 }
312 return guest;
313 }
314
315 #define VM_CID_CMD "virsh dumpxml"
316 #define VM_CID_LINE "<cid auto="
317 #define VM_CID_ID "address='"
read_guest_cid(char * name)318 static void read_guest_cid(char *name)
319 {
320 struct trace_guest *guest;
321 char *cmd = NULL;
322 char line[512];
323 char *cid;
324 unsigned int cid_id = 0;
325 FILE *f;
326
327 asprintf(&cmd, "%s %s", VM_CID_CMD, name);
328 f = popen(cmd, "r");
329 free(cmd);
330 if (f == NULL)
331 return;
332
333 while (fgets(line, sizeof(line), f) != NULL) {
334 if (!strstr(line, VM_CID_LINE))
335 continue;
336 cid = strstr(line, VM_CID_ID);
337 if (!cid)
338 continue;
339 cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10);
340 if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE)
341 continue;
342 guest = add_guest(cid_id, name);
343 if (guest)
344 find_pid_by_cid(guest);
345 break;
346 }
347
348 /* close */
349 pclose(f);
350 }
351
352 #define VM_NAME_CMD "virsh list --name"
read_qemu_guests(void)353 void read_qemu_guests(void)
354 {
355 char name[256];
356 FILE *f;
357
358 f = popen(VM_NAME_CMD, "r");
359 if (f == NULL)
360 return;
361
362 while (fgets(name, sizeof(name), f) != NULL) {
363 if (name[0] == '\n')
364 continue;
365 if (name[strlen(name) - 1] == '\n')
366 name[strlen(name) - 1] = '\0';
367 read_guest_cid(name);
368 }
369
370 /* close */
371 pclose(f);
372 }
373
get_guest_vcpu_pid(unsigned int guest_cid,unsigned int guest_vcpu)374 int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu)
375 {
376 int i;
377
378 if (!guests)
379 return -1;
380
381 for (i = 0; i < guests_len; i++) {
382 if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max)
383 continue;
384 if (guest_cid == guests[i].cid)
385 return guests[i].cpu_pid[guest_vcpu];
386 }
387 return -1;
388 }
389