1#!/usr/bin/env python 2# 3# trace Trace a function and print a trace message based on its 4# parameters, with an optional filter. 5# 6# usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S] [-c cgroup_path] 7# [-M MAX_EVENTS] [-s SYMBOLFILES] [-T] [-t] [-K] [-U] [-a] [-I header] 8# [-A] 9# probe [probe ...] 10# 11# Licensed under the Apache License, Version 2.0 (the "License") 12# Copyright (C) 2016 Sasha Goldshtein. 13 14from __future__ import print_function 15from bcc import BPF, USDT, StrcmpRewrite 16from functools import partial 17from time import strftime 18import time 19import argparse 20import re 21import ctypes as ct 22import os 23import traceback 24import sys 25 26class Probe(object): 27 probe_count = 0 28 streq_index = 0 29 max_events = None 30 event_count = 0 31 first_ts = 0 32 first_ts_real = None 33 print_time = False 34 print_unix_timestamp = False 35 use_localtime = True 36 time_field = False 37 print_cpu = False 38 print_address = False 39 tgid = -1 40 pid = -1 41 uid = -1 42 page_cnt = None 43 build_id_enabled = False 44 aggregate = False 45 symcount = {} 46 47 @classmethod 48 def configure(cls, args): 49 cls.max_events = args.max_events 50 cls.print_time = args.timestamp or args.time 51 cls.print_unix_timestamp = args.unix_timestamp 52 cls.use_localtime = not args.timestamp 53 cls.time_field = cls.print_time and (not cls.use_localtime) 54 cls.print_cpu = args.print_cpu 55 cls.print_address = args.address 56 cls.first_ts = BPF.monotonic_time() 57 cls.first_ts_real = time.time() 58 cls.tgid = args.tgid or -1 59 cls.pid = args.pid or -1 60 cls.uid = args.uid or -1 61 cls.page_cnt = args.buffer_pages 62 cls.bin_cmp = args.bin_cmp 63 cls.build_id_enabled = args.sym_file_list is not None 64 cls.aggregate = args.aggregate 65 if cls.aggregate and cls.max_events is None: 66 raise ValueError("-M/--max-events should be specified" 67 " with -A/--aggregate") 68 69 def __init__(self, probe, string_size, kernel_stack, user_stack, 70 cgroup_map_name, name, msg_filter): 71 self.usdt = None 72 self.streq_functions = "" 73 self.raw_probe = probe 74 self.string_size = string_size 75 self.kernel_stack = kernel_stack 76 self.user_stack = user_stack 77 self.probe_user_list = set() 78 Probe.probe_count += 1 79 self._parse_probe() 80 self.probe_num = Probe.probe_count 81 self.probe_name = "probe_%s_%d" % \ 82 (self._display_function(), self.probe_num) 83 self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', 84 self.probe_name) 85 self.cgroup_map_name = cgroup_map_name 86 if name is None: 87 # An empty bytestring is always contained in the command 88 # name so this will always succeed. 89 self.name = b'' 90 else: 91 self.name = name.encode('ascii') 92 self.msg_filter = msg_filter 93 # compiler can generate proper codes for function 94 # signatures with "syscall__" prefix 95 if self.is_syscall_kprobe: 96 self.probe_name = "syscall__" + self.probe_name[6:] 97 98 def __str__(self): 99 return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, 100 self.library, self._display_function(), self.filter, 101 self.types, self.values) 102 103 def is_default_action(self): 104 return self.python_format == "" 105 106 def _bail(self, error): 107 raise ValueError("error in probe '%s': %s" % 108 (self.raw_probe, error)) 109 110 def _parse_probe(self): 111 text = self.raw_probe 112 113 # There might be a function signature preceding the actual 114 # filter/print part, or not. Find the probe specifier first -- 115 # it ends with either a space or an open paren ( for the 116 # function signature part. 117 # opt. signature 118 # probespec | rest 119 # --------- ---------- -- 120 (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)', 121 text).groups() 122 123 self._parse_spec(spec) 124 # Remove the parens 125 self.signature = sig[1:-1] if sig else None 126 if self.signature and self.probe_type in ['u', 't']: 127 self._bail("USDT and tracepoint probes can't have " + 128 "a function signature; use arg1, arg2, " + 129 "... instead") 130 131 text = rest.lstrip() 132 # If we now have a (, wait for the balanced closing ) and that 133 # will be the predicate 134 self.filter = None 135 if len(text) > 0 and text[0] == "(": 136 balance = 1 137 for i in range(1, len(text)): 138 if text[i] == "(": 139 balance += 1 140 if text[i] == ")": 141 balance -= 1 142 if balance == 0: 143 self._parse_filter(text[:i + 1]) 144 text = text[i + 1:] 145 break 146 if self.filter is None: 147 self._bail("unmatched end of predicate") 148 149 if self.filter is None: 150 self.filter = "1" 151 152 # The remainder of the text is the printf action 153 self._parse_action(text.lstrip()) 154 155 def _parse_offset(self, func_and_offset): 156 func, offset_str = func_and_offset.split("+") 157 try: 158 if "x" in offset_str or "X" in offset_str: 159 offset = int(offset_str, 16) 160 else: 161 offset = int(offset_str) 162 except ValueError: 163 self._bail("invalid offset format " + 164 " '%s', must be decimal or hexadecimal" % offset_str) 165 166 return func, offset 167 168 def _parse_spec(self, spec): 169 parts = spec.split(":") 170 # Two special cases: 'func' means 'p::func', 'lib:func' means 171 # 'p:lib:func'. Other combinations need to provide an empty 172 # value between delimiters, e.g. 'r::func' for a kretprobe on 173 # the function func. 174 if len(parts) == 1: 175 parts = ["p", "", parts[0]] 176 elif len(parts) == 2: 177 parts = ["p", parts[0], parts[1]] 178 if len(parts[0]) == 0: 179 self.probe_type = "p" 180 elif parts[0] in ["p", "r", "t", "u"]: 181 self.probe_type = parts[0] 182 else: 183 self._bail("probe type must be '', 'p', 't', 'r', " + 184 "or 'u', but got '%s'" % parts[0]) 185 self.offset = 0 186 if "+" in parts[-1]: 187 parts[-1], self.offset = self._parse_offset(parts[-1]) 188 189 if self.probe_type == "t": 190 self.tp_category = parts[1] 191 self.tp_event = parts[2] 192 self.library = "" # kernel 193 self.function = "" # from TRACEPOINT_PROBE 194 elif self.probe_type == "u": 195 # u:<library>[:<provider>]:<probe> where :<provider> is optional 196 self.library = parts[1] 197 self.usdt_name = ":".join(parts[2:]) 198 self.function = "" # no function, just address 199 # We will discover the USDT provider by matching on 200 # the USDT name in the specified library 201 self._find_usdt_probe() 202 else: 203 self.library = ':'.join(parts[1:-1]) 204 self.function = parts[-1] 205 206 # only x64 syscalls needs checking, no other syscall wrapper yet. 207 self.is_syscall_kprobe = False 208 if self.probe_type == "p" and len(self.library) == 0 and \ 209 self.function[:10] == "__x64_sys_": 210 self.is_syscall_kprobe = True 211 212 def _find_usdt_probe(self): 213 target = Probe.pid if Probe.pid and Probe.pid != -1 \ 214 else Probe.tgid 215 self.usdt = USDT(path=self.library, pid=target) 216 217 parts = self.usdt_name.split(":") 218 if len(parts) == 1: 219 provider_name = None 220 usdt_name = parts[0].encode("ascii") 221 else: 222 provider_name = parts[0].encode("ascii") 223 usdt_name = parts[1].encode("ascii") 224 for probe in self.usdt.enumerate_probes(): 225 if ((not provider_name or probe.provider == provider_name) 226 and probe.name == usdt_name): 227 return # Found it, will enable later 228 self._bail("unrecognized USDT probe %s" % self.usdt_name) 229 230 def _parse_filter(self, filt): 231 self.filter = self._rewrite_expr(filt) 232 233 def _parse_types(self, fmt): 234 for match in re.finditer( 235 r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt): 236 self.types.append(match.group(1)) 237 fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt) 238 fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt) 239 fmt = re.sub('%K|%U', '%s', fmt) 240 self.python_format = fmt.strip('"') 241 242 def _parse_action(self, action): 243 self.values = [] 244 self.types = [] 245 self.python_format = "" 246 if len(action) == 0: 247 return 248 249 action = action.strip() 250 match = re.search(r'(\".*?\"),?(.*)', action) 251 if match is None: 252 self._bail("expected format string in \"s") 253 254 self.raw_format = match.group(1) 255 self._parse_types(self.raw_format) 256 for part in re.split('(?<!"),', match.group(2)): 257 part = self._rewrite_expr(part) 258 if len(part) > 0: 259 self.values.append(part) 260 261 aliases_arg = { 262 "arg1": "PT_REGS_PARM1(ctx)", 263 "arg2": "PT_REGS_PARM2(ctx)", 264 "arg3": "PT_REGS_PARM3(ctx)", 265 "arg4": "PT_REGS_PARM4(ctx)", 266 "arg5": "PT_REGS_PARM5(ctx)", 267 "arg6": "PT_REGS_PARM6(ctx)", 268 } 269 270 aliases_indarg = { 271 "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" 272 " bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})", 273 "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" 274 " bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})", 275 "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" 276 " bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})", 277 "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" 278 " bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})", 279 "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" 280 " bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})", 281 "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);" 282 " bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})", 283 } 284 285 aliases_common = { 286 "retval": "PT_REGS_RC(ctx)", 287 "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", 288 "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", 289 "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", 290 "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)", 291 "$cpu": "bpf_get_smp_processor_id()", 292 "$task": "((struct task_struct *)bpf_get_current_task())" 293 } 294 295 def _rewrite_expr(self, expr): 296 # Find the occurances of any arg[1-6]@user. Use it later to 297 # identify bpf_probe_read_user 298 for matches in re.finditer(r'(arg[1-6])(@user)', expr): 299 if matches.group(1).strip() not in self.probe_user_list: 300 self.probe_user_list.add(matches.group(1).strip()) 301 # Remove @user occurrences from arg before resolving to its 302 # corresponding aliases. 303 expr = re.sub(r'(arg[1-6])@user', r'\1', expr) 304 rdict = StrcmpRewrite.rewrite_expr(expr, 305 self.bin_cmp, self.library, 306 self.probe_user_list, self.streq_functions, 307 Probe.streq_index) 308 expr = rdict["expr"] 309 self.streq_functions = rdict["streq_functions"] 310 Probe.streq_index = rdict["probeid"] 311 alias_to_check = Probe.aliases_indarg \ 312 if self.is_syscall_kprobe \ 313 else Probe.aliases_arg 314 # For USDT probes, we replace argN values with the 315 # actual arguments for that probe obtained using 316 # bpf_readarg_N macros emitted at BPF construction. 317 if not self.probe_type == "u": 318 for alias, replacement in alias_to_check.items(): 319 expr = expr.replace(alias, replacement) 320 for alias, replacement in Probe.aliases_common.items(): 321 expr = expr.replace(alias, replacement) 322 return expr 323 324 c_type = {"u": "unsigned int", "d": "int", 325 "lu": "unsigned long", "ld": "long", 326 "llu": "unsigned long long", "lld": "long long", 327 "hu": "unsigned short", "hd": "short", 328 "x": "unsigned int", "lx": "unsigned long", 329 "llx": "unsigned long long", 330 "c": "char", "K": "unsigned long long", 331 "U": "unsigned long long"} 332 fmt_types = c_type.keys() 333 334 def _generate_field_decl(self, idx): 335 field_type = self.types[idx] 336 if field_type == "s": 337 return "char v%d[%d];\n" % (idx, self.string_size) 338 if field_type in Probe.fmt_types: 339 return "%s v%d;\n" % (Probe.c_type[field_type], idx) 340 self._bail("unrecognized format specifier %s" % field_type) 341 342 def _generate_data_decl(self): 343 # The BPF program will populate values into the struct 344 # according to the format string, and the Python program will 345 # construct the final display string. 346 self.events_name = "%s_events" % self.probe_name 347 self.struct_name = "%s_data_t" % self.probe_name 348 self.stacks_name = "%s_stacks" % self.probe_name 349 stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \ 350 else "BPF_STACK_TRACE_BUILDID" 351 stack_table = "%s(%s, 1024);" % (stack_type, self.stacks_name) \ 352 if (self.kernel_stack or self.user_stack) else "" 353 data_fields = "" 354 for i, field_type in enumerate(self.types): 355 data_fields += " " + \ 356 self._generate_field_decl(i) 357 time_str = "u64 timestamp_ns;" if self.time_field else "" 358 cpu_str = "int cpu;" if self.print_cpu else "" 359 kernel_stack_str = " int kernel_stack_id;" \ 360 if self.kernel_stack else "" 361 user_stack_str = " int user_stack_id;" \ 362 if self.user_stack else "" 363 364 text = """ 365struct %s 366{ 367%s 368%s 369 u32 tgid; 370 u32 pid; 371 char comm[TASK_COMM_LEN]; 372%s 373%s 374%s 375 u32 uid; 376}; 377 378BPF_PERF_OUTPUT(%s); 379%s 380""" 381 return text % (self.struct_name, time_str, cpu_str, data_fields, 382 kernel_stack_str, user_stack_str, 383 self.events_name, stack_table) 384 385 def _generate_field_assign(self, idx): 386 field_type = self.types[idx] 387 expr = self.values[idx].strip() 388 text = "" 389 if self.probe_type == "u" and expr[0:3] == "arg": 390 arg_index = int(expr[3]) 391 arg_ctype = self.usdt.get_probe_arg_ctype( 392 self.usdt_name, arg_index - 1) 393 text = (" %s %s = 0;\n" + 394 " bpf_usdt_readarg(%s, ctx, &%s);\n") \ 395 % (arg_ctype, expr, expr[3], expr) 396 probe_read_func = "bpf_probe_read_kernel" 397 if field_type == "s": 398 if self.library: 399 probe_read_func = "bpf_probe_read_user" 400 else: 401 alias_to_check = Probe.aliases_indarg \ 402 if self.is_syscall_kprobe \ 403 else Probe.aliases_arg 404 for arg, alias in alias_to_check.items(): 405 if alias == expr and arg in self.probe_user_list: 406 probe_read_func = "bpf_probe_read_user" 407 break 408 return text + """ 409 if (%s != 0) { 410 void *__tmp = (void *)%s; 411 %s(&__data.v%d, sizeof(__data.v%d), __tmp); 412 } 413 """ % (expr, expr, probe_read_func, idx, idx) 414 if field_type in Probe.fmt_types: 415 return text + " __data.v%d = (%s)%s;\n" % \ 416 (idx, Probe.c_type[field_type], expr) 417 self._bail("unrecognized field type %s" % field_type) 418 419 def _generate_usdt_filter_read(self): 420 text = "" 421 if self.probe_type != "u": 422 return text 423 for arg, _ in Probe.aliases_arg.items(): 424 if not (arg in self.filter): 425 continue 426 arg_index = int(arg.replace("arg", "")) 427 arg_ctype = self.usdt.get_probe_arg_ctype( 428 self.usdt_name, arg_index - 1) 429 if not arg_ctype: 430 self._bail("Unable to determine type of {} " 431 "in the filter".format(arg)) 432 text += """ 433 {} {}_filter; 434 bpf_usdt_readarg({}, ctx, &{}_filter); 435 """.format(arg_ctype, arg, arg_index, arg) 436 self.filter = self.filter.replace( 437 arg, "{}_filter".format(arg)) 438 return text 439 440 def generate_program(self, include_self): 441 data_decl = self._generate_data_decl() 442 if Probe.pid != -1: 443 pid_filter = """ 444 if (__pid != %d) { return 0; } 445 """ % Probe.pid 446 # uprobes can have a built-in tgid filter passed to 447 # attach_uprobe, hence the check here -- for kprobes, we 448 # need to do the tgid test by hand: 449 elif len(self.library) == 0 and Probe.tgid != -1: 450 pid_filter = """ 451 if (__tgid != %d) { return 0; } 452 """ % Probe.tgid 453 elif not include_self: 454 pid_filter = """ 455 if (__tgid == %d) { return 0; } 456 """ % os.getpid() 457 else: 458 pid_filter = "" 459 460 if Probe.uid != -1: 461 uid_filter = """ 462 if (__uid != %d) { return 0; } 463 """ % Probe.uid 464 else: 465 uid_filter = "" 466 467 if self.cgroup_map_name is not None: 468 cgroup_filter = """ 469 if (%s.check_current_task(0) <= 0) { return 0; } 470 """ % self.cgroup_map_name 471 else: 472 cgroup_filter = "" 473 474 prefix = "" 475 signature = "struct pt_regs *ctx" 476 if self.signature: 477 signature += ", " + self.signature 478 479 data_fields = "" 480 for i, expr in enumerate(self.values): 481 data_fields += self._generate_field_assign(i) 482 483 if self.probe_type == "t": 484 heading = "TRACEPOINT_PROBE(%s, %s)" % \ 485 (self.tp_category, self.tp_event) 486 ctx_name = "args" 487 else: 488 heading = "int %s(%s)" % (self.probe_name, signature) 489 ctx_name = "ctx" 490 491 time_str = """ 492 __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else "" 493 cpu_str = """ 494 __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else "" 495 stack_trace = "" 496 if self.user_stack: 497 stack_trace += """ 498 __data.user_stack_id = %s.get_stackid( 499 %s, BPF_F_USER_STACK 500 );""" % (self.stacks_name, ctx_name) 501 if self.kernel_stack: 502 stack_trace += """ 503 __data.kernel_stack_id = %s.get_stackid( 504 %s, 0 505 );""" % (self.stacks_name, ctx_name) 506 507 text = heading + """ 508{ 509 u64 __pid_tgid = bpf_get_current_pid_tgid(); 510 u32 __tgid = __pid_tgid >> 32; 511 u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half 512 u32 __uid = bpf_get_current_uid_gid(); 513 %s 514 %s 515 %s 516 %s 517 %s 518 if (!(%s)) return 0; 519 520 struct %s __data = {0}; 521 %s 522 %s 523 __data.tgid = __tgid; 524 __data.pid = __pid; 525 __data.uid = __uid; 526 bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); 527%s 528%s 529 %s.perf_submit(%s, &__data, sizeof(__data)); 530 return 0; 531} 532""" 533 text = text % (pid_filter, uid_filter, cgroup_filter, prefix, 534 self._generate_usdt_filter_read(), self.filter, 535 self.struct_name, time_str, cpu_str, data_fields, 536 stack_trace, self.events_name, ctx_name) 537 538 return self.streq_functions + data_decl + "\n" + text 539 540 @classmethod 541 def _time_off_str(cls, timestamp_ns): 542 offset = 1e-9 * (timestamp_ns - cls.first_ts) 543 if cls.print_unix_timestamp: 544 return "%.6f" % (offset + cls.first_ts_real) 545 else: 546 return "%.6f" % offset 547 548 def _display_function(self): 549 if self.probe_type == 'p' or self.probe_type == 'r': 550 return self.function 551 elif self.probe_type == 'u': 552 return self.usdt_name 553 else: # self.probe_type == 't' 554 return self.tp_event 555 556 def _stack_to_string(self, bpf, stack_id, tgid): 557 if stack_id < 0: 558 return (" %d" % stack_id) 559 560 stackstr = '' 561 stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) 562 for addr in stack: 563 stackstr += ' ' 564 if Probe.print_address: 565 stackstr += ("%16x " % addr) 566 symstr = bpf.sym(addr, tgid, show_module=True, show_offset=True) 567 stackstr += ('%s\n' % (symstr.decode('utf-8'))) 568 569 return stackstr 570 571 def _format_message(self, bpf, tgid, values): 572 # Replace each %K with kernel sym and %U with user sym in tgid 573 kernel_placeholders = [i for i, t in enumerate(self.types) 574 if t == 'K'] 575 user_placeholders = [i for i, t in enumerate(self.types) 576 if t == 'U'] 577 string_placeholders = [i for i, t in enumerate(self.types) 578 if t == 's'] 579 for kp in kernel_placeholders: 580 values[kp] = bpf.ksym(values[kp], show_offset=True) 581 for up in user_placeholders: 582 values[up] = bpf.sym(values[up], tgid, 583 show_module=True, show_offset=True) 584 for sp in string_placeholders: 585 values[sp] = values[sp].decode('utf-8', 'replace') 586 return self.python_format % tuple(values) 587 588 def print_aggregate_events(self): 589 for k, v in sorted(self.symcount.items(), key=lambda item: \ 590 item[1], reverse=True): 591 print("%s-->COUNT %d\n\n" % (k, v), end="") 592 593 def print_event(self, bpf, cpu, data, size): 594 event = bpf[self.events_name].event(data) 595 if self.name not in event.comm: 596 return 597 values = list(map(lambda i: getattr(event, "v%d" % i), 598 range(0, len(self.values)))) 599 msg = self._format_message(bpf, event.tgid, values) 600 if self.msg_filter and self.msg_filter not in msg: 601 return 602 eventstr = '' 603 if Probe.print_time: 604 time = strftime("%H:%M:%S") if Probe.use_localtime else \ 605 Probe._time_off_str(event.timestamp_ns) 606 if Probe.print_unix_timestamp: 607 eventstr += ("%-17s " % time[:17]) 608 else: 609 eventstr += ("%-8s " % time[:8]) 610 if Probe.print_cpu: 611 eventstr += ("%-3s " % event.cpu) 612 eventstr += ("%-7d %-7d %-15s %-16s %s\n" % 613 (event.tgid, event.pid, 614 event.comm.decode('utf-8', 'replace'), 615 self._display_function(), msg)) 616 617 if self.kernel_stack: 618 eventstr += self._stack_to_string(bpf, event.kernel_stack_id, -1) 619 if self.user_stack: 620 eventstr += self._stack_to_string(bpf, event.user_stack_id, event.tgid) 621 622 if self.aggregate is False: 623 print(eventstr, end="") 624 if self.kernel_stack or self.user_stack: 625 print("") 626 else: 627 if eventstr in self.symcount: 628 self.symcount[eventstr] += 1 629 else: 630 self.symcount[eventstr] = 1 631 632 Probe.event_count += 1 633 if Probe.max_events is not None and \ 634 Probe.event_count >= Probe.max_events: 635 if self.aggregate: 636 self.print_aggregate_events() 637 sys.stdout.flush() 638 exit() 639 640 def attach(self, bpf, verbose): 641 if len(self.library) == 0: 642 self._attach_k(bpf) 643 else: 644 self._attach_u(bpf) 645 callback = partial(self.print_event, bpf) 646 bpf[self.events_name].open_perf_buffer(callback, 647 page_cnt=self.page_cnt) 648 649 def _attach_k(self, bpf): 650 if self.probe_type == "r": 651 bpf.attach_kretprobe(event=self.function, 652 fn_name=self.probe_name) 653 elif self.probe_type == "p": 654 bpf.attach_kprobe(event=self.function, 655 fn_name=self.probe_name, 656 event_off=self.offset) 657 # Note that tracepoints don't need an explicit attach 658 659 def _attach_u(self, bpf): 660 libpath = BPF.find_library(self.library) 661 if libpath is None: 662 # This might be an executable (e.g. 'bash') 663 libpath = BPF.find_exe(self.library) 664 if libpath is None or len(libpath) == 0: 665 self._bail("unable to find library %s" % self.library) 666 667 if self.probe_type == "u": 668 pass # Was already enabled by the BPF constructor 669 elif self.probe_type == "r": 670 bpf.attach_uretprobe(name=libpath, 671 sym=self.function, 672 fn_name=self.probe_name, 673 pid=Probe.tgid) 674 else: 675 bpf.attach_uprobe(name=libpath, 676 sym=self.function, 677 fn_name=self.probe_name, 678 pid=Probe.tgid, 679 sym_off=self.offset) 680 681class Tool(object): 682 DEFAULT_PERF_BUFFER_PAGES = 64 683 examples = """ 684EXAMPLES: 685 686trace do_sys_open 687 Trace the open syscall and print a default trace message when entered 688trace kfree_skb+0x12 689 Trace the kfree_skb kernel function after the instruction on the 0x12 offset 690trace 'do_sys_open "%s", arg2@user' 691 Trace the open syscall and print the filename being opened @user is 692 added to arg2 in kprobes to ensure that char * should be copied from 693 the userspace stack to the bpf stack. If not specified, previous 694 behaviour is expected. 695 696trace 'do_sys_open "%s", arg2@user' -n main 697 Trace the open syscall and only print event that process names containing "main" 698trace 'do_sys_open "%s", arg2@user' --uid 1001 699 Trace the open syscall and only print event that processes with user ID 1001 700trace 'do_sys_open "%s", arg2@user' -f config 701 Trace the open syscall and print the filename being opened filtered by "config" 702trace 'sys_read (arg3 > 20000) "read %d bytes", arg3' 703 Trace the read syscall and print a message for reads >20000 bytes 704trace 'r::do_sys_open "%llx", retval' 705 Trace the return from the open syscall and print the return value 706trace 'c:open (arg2 == 42) "%s %d", arg1, arg2' 707 Trace the open() call from libc only if the flags (arg2) argument is 42 708trace 'c:malloc "size = %d", arg1' 709 Trace malloc calls and print the size being allocated 710trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3' 711 Trace the write() call from libc to monitor writes to STDOUT 712trace 'r::__kmalloc (retval == 0) "kmalloc failed!"' 713 Trace returns from __kmalloc which returned a null pointer 714trace 'r:c:malloc (retval) "allocated = %x", retval' 715 Trace returns from malloc and print non-NULL allocated buffers 716trace 't:block:block_rq_complete "sectors=%d", args->nr_sector' 717 Trace the block_rq_complete kernel tracepoint and print # of tx sectors 718trace 'u:pthread:pthread_create (arg4 != 0)' 719 Trace the USDT probe pthread_create when its 4th argument is non-zero 720trace 'u:pthread:libpthread:pthread_create (arg4 != 0)' 721 Ditto, but the provider name "libpthread" is specified. 722trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec' 723 Trace the nanosleep syscall and print the sleep duration in ns 724trace -c /sys/fs/cgroup/system.slice/workload.service '__x64_sys_nanosleep' '__x64_sys_clone' 725 Trace nanosleep/clone syscall calls only under workload.service 726 cgroup hierarchy. 727trace -I 'linux/fs.h' \\ 728 'p::uprobe_register(struct inode *inode) "a_ops = %llx", inode->i_mapping->a_ops' 729 Trace the uprobe_register inode mapping ops, and the symbol can be found 730 in /proc/kallsyms 731trace -I 'kernel/sched/sched.h' \\ 732 'p::__account_cfs_rq_runtime(struct cfs_rq *cfs_rq) "%d", cfs_rq->runtime_remaining' 733 Trace the cfs scheduling runqueue remaining runtime. The struct cfs_rq is defined 734 in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel 735 package. So this command needs to run at the kernel source tree root directory 736 so that the added header file can be found by the compiler. 737trace -I 'net/sock.h' \\ 738 'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)' 739 Trace udpv6 sendmsg calls only if socket's destination port is equal 740 to 53 (DNS; 13568 in big endian order) 741trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users' 742 Trace the number of users accessing the file system of the current task 743trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U 744 Trace inet_pton system call and use the specified libraries/executables for 745 symbol resolution. 746""" 747 748 def __init__(self): 749 parser = argparse.ArgumentParser(description="Attach to " + 750 "functions and print trace messages.", 751 formatter_class=argparse.RawDescriptionHelpFormatter, 752 epilog=Tool.examples) 753 parser.add_argument("-b", "--buffer-pages", type=int, 754 default=Tool.DEFAULT_PERF_BUFFER_PAGES, 755 help="number of pages to use for perf_events ring buffer " 756 "(default: %(default)d)") 757 # we'll refer to the userspace concepts of "pid" and "tid" by 758 # their kernel names -- tgid and pid -- inside the script 759 parser.add_argument("-p", "--pid", type=int, metavar="PID", 760 dest="tgid", help="id of the process to trace (optional)") 761 parser.add_argument("-L", "--tid", type=int, metavar="TID", 762 dest="pid", help="id of the thread to trace (optional)") 763 parser.add_argument("--uid", type=int, metavar="UID", 764 dest="uid", help="id of the user to trace (optional)") 765 parser.add_argument("-v", "--verbose", action="store_true", 766 help="print resulting BPF program code before executing") 767 parser.add_argument("-Z", "--string-size", type=int, 768 default=80, help="maximum size to read from strings") 769 parser.add_argument("-S", "--include-self", 770 action="store_true", 771 help="do not filter trace's own pid from the trace") 772 parser.add_argument("-M", "--max-events", type=int, 773 help="number of events to print before quitting") 774 parser.add_argument("-t", "--timestamp", action="store_true", 775 help="print timestamp column (offset from trace start)") 776 parser.add_argument("-u", "--unix-timestamp", action="store_true", 777 help="print UNIX timestamp instead of offset from trace start, requires -t") 778 parser.add_argument("-T", "--time", action="store_true", 779 help="print time column") 780 parser.add_argument("-C", "--print_cpu", action="store_true", 781 help="print CPU id") 782 parser.add_argument("-c", "--cgroup-path", type=str, 783 metavar="CGROUP_PATH", dest="cgroup_path", 784 help="cgroup path") 785 parser.add_argument("-n", "--name", type=str, 786 help="only print process names containing this name") 787 parser.add_argument("-f", "--msg-filter", type=str, dest="msg_filter", 788 help="only print the msg of event containing this string") 789 parser.add_argument("-B", "--bin_cmp", action="store_true", 790 help="allow to use STRCMP with binary values") 791 parser.add_argument('-s', "--sym_file_list", type=str, 792 metavar="SYM_FILE_LIST", dest="sym_file_list", 793 help="comma separated list of symbol files to use \ 794 for symbol resolution") 795 parser.add_argument("-K", "--kernel-stack", 796 action="store_true", help="output kernel stack trace") 797 parser.add_argument("-U", "--user-stack", 798 action="store_true", help="output user stack trace") 799 parser.add_argument("-a", "--address", action="store_true", 800 help="print virtual address in stacks") 801 parser.add_argument(metavar="probe", dest="probes", nargs="+", 802 help="probe specifier (see examples)") 803 parser.add_argument("-I", "--include", action="append", 804 metavar="header", 805 help="additional header files to include in the BPF program " 806 "as either full path, " 807 "or relative to current working directory, " 808 "or relative to default kernel header search path") 809 parser.add_argument("-A", "--aggregate", action="store_true", 810 help="aggregate amount of each trace") 811 parser.add_argument("--ebpf", action="store_true", 812 help=argparse.SUPPRESS) 813 self.args = parser.parse_args() 814 if self.args.tgid and self.args.pid: 815 parser.error("only one of -p and -L may be specified") 816 if self.args.cgroup_path is not None: 817 self.cgroup_map_name = "__cgroup" 818 else: 819 self.cgroup_map_name = None 820 821 def _create_probes(self): 822 Probe.configure(self.args) 823 self.probes = [] 824 for probe_spec in self.args.probes: 825 self.probes.append(Probe( 826 probe_spec, self.args.string_size, 827 self.args.kernel_stack, self.args.user_stack, 828 self.cgroup_map_name, self.args.name, self.args.msg_filter)) 829 830 def _generate_program(self): 831 self.program = """ 832#include <linux/ptrace.h> 833#include <linux/sched.h> /* For TASK_COMM_LEN */ 834 835""" 836 for include in (self.args.include or []): 837 if include.startswith((".", "/")): 838 include = os.path.abspath(include) 839 self.program += "#include \"%s\"\n" % include 840 else: 841 self.program += "#include <%s>\n" % include 842 self.program += BPF.generate_auto_includes( 843 map(lambda p: p.raw_probe, self.probes)) 844 if self.cgroup_map_name is not None: 845 self.program += "BPF_CGROUP_ARRAY(%s, 1);\n" % \ 846 self.cgroup_map_name 847 for probe in self.probes: 848 self.program += probe.generate_program( 849 self.args.include_self) 850 851 if self.args.verbose or self.args.ebpf: 852 print(self.program) 853 if self.args.ebpf: 854 exit() 855 856 def _attach_probes(self): 857 usdt_contexts = [] 858 for probe in self.probes: 859 if probe.usdt: 860 # USDT probes must be enabled before the BPF object 861 # is initialized, because that's where the actual 862 # uprobe is being attached. 863 probe.usdt.enable_probe( 864 probe.usdt_name, probe.probe_name) 865 if self.args.verbose: 866 print(probe.usdt.get_text()) 867 usdt_contexts.append(probe.usdt) 868 self.bpf = BPF(text=self.program, usdt_contexts=usdt_contexts) 869 if self.args.sym_file_list is not None: 870 print("Note: Kernel bpf will report stack map with ip/build_id") 871 map(lambda x: self.bpf.add_module(x), self.args.sym_file_list.split(',')) 872 873 # if cgroup filter is requested, update the cgroup array map 874 if self.cgroup_map_name is not None: 875 cgroup_array = self.bpf.get_table(self.cgroup_map_name) 876 cgroup_array[0] = self.args.cgroup_path 877 878 for probe in self.probes: 879 if self.args.verbose: 880 print(probe) 881 probe.attach(self.bpf, self.args.verbose) 882 883 def _main_loop(self): 884 all_probes_trivial = all(map(Probe.is_default_action, 885 self.probes)) 886 887 # Print header 888 if self.args.timestamp or self.args.time: 889 col_fmt = "%-17s " if self.args.unix_timestamp else "%-8s " 890 print(col_fmt % "TIME", end="") 891 if self.args.print_cpu: 892 print("%-3s " % "CPU", end="") 893 print("%-7s %-7s %-15s %-16s %s" % 894 ("PID", "TID", "COMM", "FUNC", 895 "-" if not all_probes_trivial else "")) 896 sys.stdout.flush() 897 898 while True: 899 self.bpf.perf_buffer_poll() 900 901 def run(self): 902 try: 903 self._create_probes() 904 self._generate_program() 905 self._attach_probes() 906 self._main_loop() 907 except: 908 exc_info = sys.exc_info() 909 sys_exit = exc_info[0] is SystemExit 910 if self.args.verbose: 911 traceback.print_exc() 912 elif not sys_exit: 913 print(exc_info[1]) 914 exit(0 if sys_exit else 1) 915 916if __name__ == "__main__": 917 Tool().run() 918