xref: /aosp_15_r20/external/trace-cmd/lib/trace-cmd/trace-timesync-kvm.c (revision 58e6ee5f017f6a8912852c892d18457e4bafb554)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * Copyright (C) 2020, VMware, Tzvetomir Stoyanov [email protected]>
4  *
5  */
6 
7 #include <fcntl.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <sys/stat.h>
11 #include <dirent.h>
12 #include <ctype.h>
13 
14 #include "trace-cmd.h"
15 #include "trace-cmd-private.h"
16 #include "tracefs.h"
17 #include "trace-tsync-local.h"
18 
19 #define KVM_DEBUG_FS "/sys/kernel/debug/kvm"
20 #define KVM_DEBUG_OFFSET_FILE	"tsc-offset"
21 #define KVM_DEBUG_SCALING_FILE	"tsc-scaling-ratio"
22 #define KVM_DEBUG_FRACTION_FILE	"tsc-scaling-ratio-frac-bits"
23 #define KVM_DEBUG_VCPU_DIR	"vcpu"
24 
25 /* default KVM scaling values, taken from the Linux kernel */
26 #define KVM_SCALING_AMD_DEFAULT		(1ULL<<32)
27 #define KVM_SCALING_INTEL_DEFAULT	(1ULL<<48)
28 
29 #define KVM_SYNC_PKT_REQUEST	1
30 #define KVM_SYNC_PKT_RESPONSE	2
31 
32 typedef __s64 s64;
33 
34 #define KVM_ACCURACY	0
35 #define KVM_NAME	"kvm"
36 
37 struct kvm_clock_sync {
38 	int vcpu_count;
39 	char **vcpu_offsets;
40 	char **vcpu_scalings;
41 	char **vcpu_frac;
42 	int marker_fd;
43 	struct tep_handle *tep;
44 	int raw_id;
45 	unsigned long long ts;
46 };
47 
48 struct kvm_clock_offset_msg {
49 	s64	ts;
50 	s64	offset;
51 	s64	scaling;
52 	s64	frac;
53 };
54 
read_ll_from_file(char * file,long long * res)55 static int read_ll_from_file(char *file, long long *res)
56 {
57 	char buf[32];
58 	int ret;
59 	int fd;
60 
61 	if (!file)
62 		return -1;
63 	fd = open(file, O_RDONLY | O_NONBLOCK);
64 	if (fd < 0)
65 		return -1;
66 	ret = read(fd, buf, 32);
67 	close(fd);
68 	if (ret <= 0)
69 		return -1;
70 
71 	*res = strtoll(buf, NULL, 0);
72 
73 	return 0;
74 }
75 
kvm_scaling_check_vm_cpu(char * vname,char * cpu)76 static bool kvm_scaling_check_vm_cpu(char *vname, char *cpu)
77 {
78 	long long scaling, frac;
79 	bool has_scaling = false;
80 	bool has_frac = false;
81 	char *path;
82 	int ret;
83 
84 	if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_SCALING_FILE) < 0)
85 		return false;
86 	ret = read_ll_from_file(path, &scaling);
87 	free(path);
88 	if (!ret)
89 		has_scaling = true;
90 
91 	if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_FRACTION_FILE) < 0)
92 		return false;
93 	ret = read_ll_from_file(path, &frac);
94 	free(path);
95 	if (!ret)
96 		has_frac = true;
97 
98 	if (has_scaling != has_frac)
99 		return false;
100 
101 	return true;
102 }
103 
kvm_scaling_check_vm(char * name)104 static bool kvm_scaling_check_vm(char *name)
105 {
106 	struct dirent *entry;
107 	char *vdir;
108 	DIR *dir;
109 
110 	if (asprintf(&vdir, "%s/%s", KVM_DEBUG_FS, name) < 0)
111 		return true;
112 
113 	dir = opendir(vdir);
114 	if (!dir) {
115 		free(vdir);
116 		return true;
117 	}
118 	while ((entry = readdir(dir))) {
119 		if (entry->d_type == DT_DIR && !strncmp(entry->d_name, "vcpu", 4) &&
120 		    !kvm_scaling_check_vm_cpu(vdir, entry->d_name))
121 			break;
122 	}
123 
124 	closedir(dir);
125 	free(vdir);
126 	return entry == NULL;
127 }
kvm_scaling_check(void)128 static bool kvm_scaling_check(void)
129 {
130 	struct dirent *entry;
131 	DIR *dir;
132 
133 	dir = opendir(KVM_DEBUG_FS);
134 	if (!dir)
135 		return true;
136 
137 	while ((entry = readdir(dir))) {
138 		if (entry->d_type == DT_DIR && isdigit(entry->d_name[0]) &&
139 		    !kvm_scaling_check_vm(entry->d_name))
140 			break;
141 	}
142 	closedir(dir);
143 	return entry == NULL;
144 }
145 
kvm_support_check(bool guest)146 static bool kvm_support_check(bool guest)
147 {
148 	struct stat st;
149 	int ret;
150 
151 	if (guest)
152 		return true;
153 
154 	ret = stat(KVM_DEBUG_FS, &st);
155 	if (ret < 0)
156 		return false;
157 
158 	if (!S_ISDIR(st.st_mode))
159 		return false;
160 
161 	return kvm_scaling_check();
162 }
163 
kvm_open_vcpu_dir(struct kvm_clock_sync * kvm,int cpu,char * dir_str)164 static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
165 {
166 	struct dirent *entry;
167 	char path[PATH_MAX];
168 	DIR *dir;
169 
170 	dir = opendir(dir_str);
171 	if (!dir)
172 		goto error;
173 	while ((entry = readdir(dir))) {
174 		if (entry->d_type != DT_DIR) {
175 			if (!strcmp(entry->d_name, KVM_DEBUG_OFFSET_FILE)) {
176 				snprintf(path, sizeof(path), "%s/%s",
177 					 dir_str, entry->d_name);
178 				kvm->vcpu_offsets[cpu] = strdup(path);
179 			}
180 			if (!strcmp(entry->d_name, KVM_DEBUG_SCALING_FILE)) {
181 				snprintf(path, sizeof(path), "%s/%s",
182 					 dir_str, entry->d_name);
183 				kvm->vcpu_scalings[cpu] = strdup(path);
184 			}
185 			if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) {
186 				snprintf(path, sizeof(path), "%s/%s",
187 					 dir_str, entry->d_name);
188 				kvm->vcpu_frac[cpu] = strdup(path);
189 			}
190 		}
191 	}
192 	if (!kvm->vcpu_offsets[cpu])
193 		goto error;
194 	closedir(dir);
195 	return 0;
196 
197 error:
198 	if (dir)
199 		closedir(dir);
200 	free(kvm->vcpu_offsets[cpu]);
201 	kvm->vcpu_offsets[cpu] = NULL;
202 	free(kvm->vcpu_scalings[cpu]);
203 	kvm->vcpu_scalings[cpu] = NULL;
204 	free(kvm->vcpu_frac[cpu]);
205 	kvm->vcpu_frac[cpu] = NULL;
206 	return -1;
207 }
208 
kvm_open_debug_files(struct kvm_clock_sync * kvm,int pid)209 static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid)
210 {
211 	char *vm_dir_str = NULL;
212 	struct dirent *entry;
213 	char *pid_str = NULL;
214 	char path[PATH_MAX];
215 	long vcpu;
216 	DIR *dir;
217 	int i;
218 
219 	dir = opendir(KVM_DEBUG_FS);
220 	if (!dir)
221 		goto error;
222 	if (asprintf(&pid_str, "%d-", pid) <= 0)
223 		goto error;
224 	while ((entry = readdir(dir))) {
225 		if (!(entry->d_type == DT_DIR &&
226 		    !strncmp(entry->d_name, pid_str, strlen(pid_str))))
227 			continue;
228 		asprintf(&vm_dir_str, "%s/%s", KVM_DEBUG_FS, entry->d_name);
229 		break;
230 	}
231 	closedir(dir);
232 	dir = NULL;
233 	if (!vm_dir_str)
234 		goto error;
235 	dir = opendir(vm_dir_str);
236 	if (!dir)
237 		goto error;
238 	while ((entry = readdir(dir))) {
239 		if (!(entry->d_type == DT_DIR &&
240 		    !strncmp(entry->d_name, KVM_DEBUG_VCPU_DIR, strlen(KVM_DEBUG_VCPU_DIR))))
241 			continue;
242 		vcpu =  strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10);
243 		if (vcpu < 0 || vcpu >= kvm->vcpu_count)
244 			continue;
245 		snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name);
246 		if (kvm_open_vcpu_dir(kvm, vcpu, path) < 0)
247 			goto error;
248 	}
249 	for (i = 0; i < kvm->vcpu_count; i++) {
250 		if (!kvm->vcpu_offsets[i])
251 			goto error;
252 	}
253 	closedir(dir);
254 	free(pid_str);
255 	free(vm_dir_str);
256 	return 0;
257 error:
258 	free(pid_str);
259 	free(vm_dir_str);
260 	if (dir)
261 		closedir(dir);
262 	return -1;
263 }
264 
kvm_clock_sync_init_host(struct tracecmd_time_sync * tsync,struct kvm_clock_sync * kvm)265 static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
266 				    struct kvm_clock_sync *kvm)
267 {
268 	kvm->vcpu_count = tsync->vcpu_count;
269 	kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
270 	kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
271 	kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
272 	if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
273 		goto error;
274 	if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
275 		goto error;
276 	return 0;
277 
278 error:
279 	free(kvm->vcpu_offsets);
280 	free(kvm->vcpu_scalings);
281 	free(kvm->vcpu_frac);
282 	return -1;
283 }
284 
kvm_clock_sync_init_guest(struct tracecmd_time_sync * tsync,struct kvm_clock_sync * kvm)285 static int kvm_clock_sync_init_guest(struct tracecmd_time_sync *tsync,
286 				     struct kvm_clock_sync *kvm)
287 {
288 	const char *systems[] = {"ftrace", NULL};
289 	struct clock_sync_context *clock_context;
290 	struct tep_event *raw;
291 	char *path;
292 
293 	clock_context = (struct clock_sync_context *)tsync->context;
294 	path = tracefs_instance_get_dir(clock_context->instance);
295 	if (!path)
296 		goto error;
297 	kvm->tep = tracefs_local_events_system(path, systems);
298 	tracefs_put_tracing_file(path);
299 	if (!kvm->tep)
300 		goto error;
301 	raw = tep_find_event_by_name(kvm->tep, "ftrace", "raw_data");
302 	if (!raw)
303 		goto error;
304 
305 	kvm->raw_id = raw->id;
306 	tep_set_file_bigendian(kvm->tep, tracecmd_host_bigendian());
307 	tep_set_local_bigendian(kvm->tep, tracecmd_host_bigendian());
308 
309 	path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw");
310 	if (!path)
311 		goto error;
312 	kvm->marker_fd = open(path, O_WRONLY);
313 	tracefs_put_tracing_file(path);
314 
315 	return 0;
316 
317 error:
318 	if (kvm->tep)
319 		tep_free(kvm->tep);
320 	if (kvm->marker_fd >= 0)
321 		close(kvm->marker_fd);
322 
323 	return -1;
324 }
325 
kvm_clock_sync_init(struct tracecmd_time_sync * tsync)326 static int kvm_clock_sync_init(struct tracecmd_time_sync *tsync)
327 {
328 	struct clock_sync_context *clock_context;
329 	struct kvm_clock_sync *kvm;
330 	int ret;
331 
332 	if (!tsync || !tsync->context)
333 		return -1;
334 	clock_context = (struct clock_sync_context *)tsync->context;
335 
336 	if (!kvm_support_check(clock_context->is_guest))
337 		return -1;
338 	kvm = calloc(1, sizeof(struct kvm_clock_sync));
339 	if (!kvm)
340 		return -1;
341 	kvm->marker_fd = -1;
342 	if (clock_context->is_guest)
343 		ret = kvm_clock_sync_init_guest(tsync, kvm);
344 	else
345 		ret = kvm_clock_sync_init_host(tsync, kvm);
346 	if (ret < 0)
347 		goto error;
348 
349 	clock_context->proto_data = kvm;
350 	return 0;
351 
352 error:
353 	free(kvm);
354 	return -1;
355 }
356 
kvm_clock_sync_free(struct tracecmd_time_sync * tsync)357 static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
358 {
359 	struct clock_sync_context *clock_context;
360 	struct kvm_clock_sync *kvm = NULL;
361 	int i;
362 
363 	clock_context = (struct clock_sync_context *)tsync->context;
364 	if (clock_context)
365 		kvm = (struct kvm_clock_sync *)clock_context->proto_data;
366 	if (kvm) {
367 		for (i = 0; i < kvm->vcpu_count; i++) {
368 			free(kvm->vcpu_offsets[i]);
369 			kvm->vcpu_offsets[i] = NULL;
370 			free(kvm->vcpu_scalings[i]);
371 			kvm->vcpu_scalings[i] = NULL;
372 			free(kvm->vcpu_frac[i]);
373 			kvm->vcpu_frac[i] = NULL;
374 		}
375 		if (kvm->tep)
376 			tep_free(kvm->tep);
377 		if (kvm->marker_fd >= 0)
378 			close(kvm->marker_fd);
379 		free(kvm);
380 	}
381 	return -1;
382 }
383 
kvm_clock_host(struct tracecmd_time_sync * tsync,long long * offset,long long * scaling,long long * frac,long long * timestamp,unsigned int cpu)384 static int kvm_clock_host(struct tracecmd_time_sync *tsync,
385 			  long long *offset, long long *scaling, long long *frac,
386 			  long long *timestamp, unsigned int cpu)
387 {
388 	char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
389 	struct clock_sync_context *clock_context;
390 	struct kvm_clock_offset_msg packet;
391 	struct kvm_clock_sync *kvm = NULL;
392 	long long kvm_scaling = 1;
393 	unsigned int sync_msg;
394 	long long kvm_offset;
395 	long long kvm_frac = 0;
396 	unsigned int size;
397 	char *msg;
398 	int ret;
399 
400 	clock_context = (struct clock_sync_context *)tsync->context;
401 	if (clock_context)
402 		kvm = (struct kvm_clock_sync *)clock_context->proto_data;
403 	if (!kvm || !kvm->vcpu_offsets || !kvm->vcpu_offsets[0])
404 		return -1;
405 	if (cpu >= kvm->vcpu_count)
406 		return -1;
407 	ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
408 	if (ret < 0)
409 		return -1;
410 
411 	if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
412 		read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
413 		if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
414 		    kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
415 			kvm_scaling = 1;
416 	}
417 
418 	if (kvm->vcpu_frac && kvm->vcpu_frac[cpu] && kvm_scaling != 1)
419 		ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
420 	msg = (char *)&packet;
421 	size = sizeof(packet);
422 	ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
423 					  sync_proto, &sync_msg,
424 					  &size, &msg);
425 	if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
426 	    sync_msg != KVM_SYNC_PKT_REQUEST)
427 		return -1;
428 
429 	packet.offset = -kvm_offset;
430 	packet.scaling = kvm_scaling;
431 	packet.frac = kvm_frac;
432 	ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
433 					  KVM_SYNC_PKT_RESPONSE, sizeof(packet),
434 					  (char *)&packet);
435 	if (ret)
436 		return -1;
437 
438 	*scaling = packet.scaling;
439 	*offset = packet.offset;
440 	*frac = kvm_frac;
441 	*timestamp = packet.ts;
442 
443 	return 0;
444 }
445 
446 #define KVM_EVENT_MARKER	"kvm sync event"
kvm_marker_find(struct tep_event * event,struct tep_record * record,int cpu,void * context)447 static int kvm_marker_find(struct tep_event *event, struct tep_record *record,
448 			   int cpu, void *context)
449 {
450 	struct kvm_clock_sync *kvm = (struct kvm_clock_sync *)context;
451 	struct tep_format_field *field;
452 	struct tep_format_field *id;
453 	char *marker;
454 
455 	/* Make sure this is our event */
456 	if (event->id != kvm->raw_id)
457 		return 0;
458 	id = tep_find_field(event, "id");
459 	field = tep_find_field(event, "buf");
460 	if (field && id &&
461 	    record->size >= (id->offset + strlen(KVM_EVENT_MARKER) + 1)) {
462 		marker = (char *)(record->data + id->offset);
463 		if (!strcmp(marker, KVM_EVENT_MARKER)) {
464 			kvm->ts = record->ts;
465 			return 1;
466 		}
467 	}
468 
469 	return 0;
470 }
471 
kvm_clock_guest(struct tracecmd_time_sync * tsync,long long * offset,long long * scaling,long long * frac,long long * timestamp)472 static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
473 			   long long *offset,
474 			   long long *scaling,
475 			   long long *frac,
476 			   long long *timestamp)
477 {
478 	char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
479 	struct clock_sync_context *clock_context;
480 	struct kvm_clock_offset_msg packet;
481 	struct kvm_clock_sync *kvm = NULL;
482 	unsigned int sync_msg;
483 	unsigned int size;
484 	char *msg;
485 	int ret;
486 
487 	clock_context = (struct clock_sync_context *)tsync->context;
488 	if (clock_context)
489 		kvm = (struct kvm_clock_sync *)clock_context->proto_data;
490 	if (!kvm)
491 		return -1;
492 	kvm->ts = 0;
493 	memset(&packet, 0, sizeof(packet));
494 	tracefs_instance_file_write(clock_context->instance, "trace", "\0");
495 	write(kvm->marker_fd, KVM_EVENT_MARKER, strlen(KVM_EVENT_MARKER) + 1);
496 	kvm->ts = 0;
497 	tracefs_iterate_raw_events(kvm->tep, clock_context->instance,
498 				   NULL, 0, kvm_marker_find, kvm);
499 	packet.ts = kvm->ts;
500 	ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
501 					  KVM_SYNC_PKT_REQUEST, sizeof(packet),
502 					  (char *)&packet);
503 	if (ret)
504 		return -1;
505 	msg = (char *)&packet;
506 	size = sizeof(packet);
507 	ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
508 					  sync_proto, &sync_msg,
509 					  &size, &msg);
510 	if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) ||
511 	    sync_msg != KVM_SYNC_PKT_RESPONSE)
512 		return -1;
513 
514 	*scaling = packet.scaling;
515 	*offset = packet.offset;
516 	*frac = packet.frac;
517 	*timestamp = packet.ts;
518 	return 0;
519 }
520 
kvm_clock_sync_calc(struct tracecmd_time_sync * tsync,long long * offset,long long * scaling,long long * frac,long long * timestamp,unsigned int cpu)521 static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
522 			       long long *offset, long long *scaling, long long *frac,
523 			       long long *timestamp, unsigned int cpu)
524 {
525 	struct clock_sync_context *clock_context;
526 	int ret;
527 
528 	if (!tsync || !tsync->context)
529 		return -1;
530 
531 	clock_context = (struct clock_sync_context *)tsync->context;
532 
533 	if (clock_context->is_guest)
534 		ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp);
535 	else
536 		ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu);
537 	return ret;
538 }
539 
kvm_clock_sync_register(void)540 int kvm_clock_sync_register(void)
541 {
542 	int role = TRACECMD_TIME_SYNC_ROLE_GUEST;
543 	int clock = 0;
544 
545 	if (kvm_support_check(false)) {
546 		role |= TRACECMD_TIME_SYNC_ROLE_HOST;
547 		clock = TRACECMD_CLOCK_X86_TSC;
548 	}
549 	return tracecmd_tsync_proto_register(KVM_NAME, KVM_ACCURACY,
550 					     role, clock, 0,
551 					     kvm_clock_sync_init,
552 					     kvm_clock_sync_free,
553 					     kvm_clock_sync_calc);
554 }
555 
kvm_clock_sync_unregister(void)556 int kvm_clock_sync_unregister(void)
557 {
558 	return tracecmd_tsync_proto_unregister(KVM_NAME);
559 }
560