xref: /aosp_15_r20/external/bcc/tools/trace.py (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2#
3# trace         Trace a function and print a trace message based on its
4#               parameters, with an optional filter.
5#
6# usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S] [-c cgroup_path]
7#              [-M MAX_EVENTS] [-s SYMBOLFILES] [-T] [-t] [-K] [-U] [-a] [-I header]
8#              [-A]
9#              probe [probe ...]
10#
11# Licensed under the Apache License, Version 2.0 (the "License")
12# Copyright (C) 2016 Sasha Goldshtein.
13
14from __future__ import print_function
15from bcc import BPF, USDT, StrcmpRewrite
16from functools import partial
17from time import strftime
18import time
19import argparse
20import re
21import ctypes as ct
22import os
23import traceback
24import sys
25
26class Probe(object):
27        probe_count = 0
28        streq_index = 0
29        max_events = None
30        event_count = 0
31        first_ts = 0
32        first_ts_real = None
33        print_time = False
34        print_unix_timestamp = False
35        use_localtime = True
36        time_field = False
37        print_cpu = False
38        print_address = False
39        tgid = -1
40        pid = -1
41        uid = -1
42        page_cnt = None
43        build_id_enabled = False
44        aggregate = False
45        symcount = {}
46
47        @classmethod
48        def configure(cls, args):
49                cls.max_events = args.max_events
50                cls.print_time = args.timestamp or args.time
51                cls.print_unix_timestamp = args.unix_timestamp
52                cls.use_localtime = not args.timestamp
53                cls.time_field = cls.print_time and (not cls.use_localtime)
54                cls.print_cpu = args.print_cpu
55                cls.print_address = args.address
56                cls.first_ts = BPF.monotonic_time()
57                cls.first_ts_real = time.time()
58                cls.tgid = args.tgid or -1
59                cls.pid = args.pid or -1
60                cls.uid = args.uid or -1
61                cls.page_cnt = args.buffer_pages
62                cls.bin_cmp = args.bin_cmp
63                cls.build_id_enabled = args.sym_file_list is not None
64                cls.aggregate = args.aggregate
65                if cls.aggregate and cls.max_events is None:
66                        raise ValueError("-M/--max-events should be specified"
67                                         " with -A/--aggregate")
68
69        def __init__(self, probe, string_size, kernel_stack, user_stack,
70                     cgroup_map_name, name, msg_filter):
71                self.usdt = None
72                self.streq_functions = ""
73                self.raw_probe = probe
74                self.string_size = string_size
75                self.kernel_stack = kernel_stack
76                self.user_stack = user_stack
77                self.probe_user_list = set()
78                Probe.probe_count += 1
79                self._parse_probe()
80                self.probe_num = Probe.probe_count
81                self.probe_name = "probe_%s_%d" % \
82                                (self._display_function(), self.probe_num)
83                self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
84                                         self.probe_name)
85                self.cgroup_map_name = cgroup_map_name
86                if name is None:
87                    # An empty bytestring is always contained in the command
88                    # name so this will always succeed.
89                    self.name = b''
90                else:
91                    self.name = name.encode('ascii')
92                self.msg_filter = msg_filter
93                # compiler can generate proper codes for function
94                # signatures with "syscall__" prefix
95                if self.is_syscall_kprobe:
96                        self.probe_name = "syscall__" + self.probe_name[6:]
97
98        def __str__(self):
99                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
100                        self.library, self._display_function(), self.filter,
101                        self.types, self.values)
102
103        def is_default_action(self):
104                return self.python_format == ""
105
106        def _bail(self, error):
107                raise ValueError("error in probe '%s': %s" %
108                                 (self.raw_probe, error))
109
110        def _parse_probe(self):
111                text = self.raw_probe
112
113                # There might be a function signature preceding the actual
114                # filter/print part, or not. Find the probe specifier first --
115                # it ends with either a space or an open paren ( for the
116                # function signature part.
117                #                                          opt. signature
118                #                               probespec       |      rest
119                #                               ---------  ----------   --
120                (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
121                                             text).groups()
122
123                self._parse_spec(spec)
124                # Remove the parens
125                self.signature = sig[1:-1] if sig else None
126                if self.signature and self.probe_type in ['u', 't']:
127                        self._bail("USDT and tracepoint probes can't have " +
128                                   "a function signature; use arg1, arg2, " +
129                                   "... instead")
130
131                text = rest.lstrip()
132                # If we now have a (, wait for the balanced closing ) and that
133                # will be the predicate
134                self.filter = None
135                if len(text) > 0 and text[0] == "(":
136                        balance = 1
137                        for i in range(1, len(text)):
138                                if text[i] == "(":
139                                        balance += 1
140                                if text[i] == ")":
141                                        balance -= 1
142                                if balance == 0:
143                                        self._parse_filter(text[:i + 1])
144                                        text = text[i + 1:]
145                                        break
146                        if self.filter is None:
147                                self._bail("unmatched end of predicate")
148
149                if self.filter is None:
150                        self.filter = "1"
151
152                # The remainder of the text is the printf action
153                self._parse_action(text.lstrip())
154
155        def _parse_offset(self, func_and_offset):
156                func, offset_str = func_and_offset.split("+")
157                try:
158                        if "x" in offset_str or "X" in offset_str:
159                                offset = int(offset_str, 16)
160                        else:
161                                offset = int(offset_str)
162                except ValueError:
163                        self._bail("invalid offset format " +
164                                   " '%s', must be decimal or hexadecimal" % offset_str)
165
166                return func, offset
167
168        def _parse_spec(self, spec):
169                parts = spec.split(":")
170                # Two special cases: 'func' means 'p::func', 'lib:func' means
171                # 'p:lib:func'. Other combinations need to provide an empty
172                # value between delimiters, e.g. 'r::func' for a kretprobe on
173                # the function func.
174                if len(parts) == 1:
175                        parts = ["p", "", parts[0]]
176                elif len(parts) == 2:
177                        parts = ["p", parts[0], parts[1]]
178                if len(parts[0]) == 0:
179                        self.probe_type = "p"
180                elif parts[0] in ["p", "r", "t", "u"]:
181                        self.probe_type = parts[0]
182                else:
183                        self._bail("probe type must be '', 'p', 't', 'r', " +
184                                   "or 'u', but got '%s'" % parts[0])
185                self.offset = 0
186                if "+" in parts[-1]:
187                        parts[-1], self.offset = self._parse_offset(parts[-1])
188
189                if self.probe_type == "t":
190                        self.tp_category = parts[1]
191                        self.tp_event = parts[2]
192                        self.library = ""       # kernel
193                        self.function = ""      # from TRACEPOINT_PROBE
194                elif self.probe_type == "u":
195                        # u:<library>[:<provider>]:<probe> where :<provider> is optional
196                        self.library = parts[1]
197                        self.usdt_name = ":".join(parts[2:])
198                        self.function = ""      # no function, just address
199                        # We will discover the USDT provider by matching on
200                        # the USDT name in the specified library
201                        self._find_usdt_probe()
202                else:
203                        self.library = ':'.join(parts[1:-1])
204                        self.function = parts[-1]
205
206                # only x64 syscalls needs checking, no other syscall wrapper yet.
207                self.is_syscall_kprobe = False
208                if self.probe_type == "p" and len(self.library) == 0 and \
209                   self.function[:10] == "__x64_sys_":
210                        self.is_syscall_kprobe = True
211
212        def _find_usdt_probe(self):
213                target = Probe.pid if Probe.pid and Probe.pid != -1 \
214                                   else Probe.tgid
215                self.usdt = USDT(path=self.library, pid=target)
216
217                parts = self.usdt_name.split(":")
218                if len(parts) == 1:
219                        provider_name = None
220                        usdt_name = parts[0].encode("ascii")
221                else:
222                        provider_name = parts[0].encode("ascii")
223                        usdt_name = parts[1].encode("ascii")
224                for probe in self.usdt.enumerate_probes():
225                        if ((not provider_name or probe.provider == provider_name)
226                                        and probe.name == usdt_name):
227                                return  # Found it, will enable later
228                self._bail("unrecognized USDT probe %s" % self.usdt_name)
229
230        def _parse_filter(self, filt):
231                self.filter = self._rewrite_expr(filt)
232
233        def _parse_types(self, fmt):
234                for match in re.finditer(
235                            r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt):
236                        self.types.append(match.group(1))
237                fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt)
238                fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt)
239                fmt = re.sub('%K|%U', '%s', fmt)
240                self.python_format = fmt.strip('"')
241
242        def _parse_action(self, action):
243                self.values = []
244                self.types = []
245                self.python_format = ""
246                if len(action) == 0:
247                        return
248
249                action = action.strip()
250                match = re.search(r'(\".*?\"),?(.*)', action)
251                if match is None:
252                        self._bail("expected format string in \"s")
253
254                self.raw_format = match.group(1)
255                self._parse_types(self.raw_format)
256                for part in re.split('(?<!"),', match.group(2)):
257                        part = self._rewrite_expr(part)
258                        if len(part) > 0:
259                                self.values.append(part)
260
261        aliases_arg = {
262                "arg1": "PT_REGS_PARM1(ctx)",
263                "arg2": "PT_REGS_PARM2(ctx)",
264                "arg3": "PT_REGS_PARM3(ctx)",
265                "arg4": "PT_REGS_PARM4(ctx)",
266                "arg5": "PT_REGS_PARM5(ctx)",
267                "arg6": "PT_REGS_PARM6(ctx)",
268        }
269
270        aliases_indarg = {
271                "arg1": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
272                        "  bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
273                "arg2": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
274                        "  bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
275                "arg3": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
276                        "  bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
277                "arg4": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
278                        "  bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
279                "arg5": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
280                        "  bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
281                "arg6": "({u64 _val; struct pt_regs *_ctx = (struct pt_regs *)PT_REGS_PARM1(ctx);"
282                        "  bpf_probe_read_kernel(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
283        }
284
285        aliases_common = {
286                "retval": "PT_REGS_RC(ctx)",
287                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
288                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
289                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
290                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
291                "$cpu": "bpf_get_smp_processor_id()",
292                "$task": "((struct task_struct *)bpf_get_current_task())"
293        }
294
295        def _rewrite_expr(self, expr):
296                # Find the occurances of any arg[1-6]@user. Use it later to
297                # identify bpf_probe_read_user
298                for matches in re.finditer(r'(arg[1-6])(@user)', expr):
299                    if matches.group(1).strip() not in self.probe_user_list:
300                        self.probe_user_list.add(matches.group(1).strip())
301                # Remove @user occurrences from arg before resolving to its
302                # corresponding aliases.
303                expr = re.sub(r'(arg[1-6])@user', r'\1', expr)
304                rdict = StrcmpRewrite.rewrite_expr(expr,
305                            self.bin_cmp, self.library,
306                            self.probe_user_list, self.streq_functions,
307                            Probe.streq_index)
308                expr = rdict["expr"]
309                self.streq_functions = rdict["streq_functions"]
310                Probe.streq_index = rdict["probeid"]
311                alias_to_check = Probe.aliases_indarg \
312                                    if self.is_syscall_kprobe \
313                                    else Probe.aliases_arg
314                # For USDT probes, we replace argN values with the
315                # actual arguments for that probe obtained using
316                # bpf_readarg_N macros emitted at BPF construction.
317                if not self.probe_type == "u":
318                    for alias, replacement in alias_to_check.items():
319                        expr = expr.replace(alias, replacement)
320                for alias, replacement in Probe.aliases_common.items():
321                    expr = expr.replace(alias, replacement)
322                return expr
323
324        c_type = {"u": "unsigned int", "d": "int",
325                  "lu": "unsigned long", "ld": "long",
326                  "llu": "unsigned long long", "lld": "long long",
327                  "hu": "unsigned short", "hd": "short",
328                  "x": "unsigned int", "lx": "unsigned long",
329                  "llx": "unsigned long long",
330                  "c": "char", "K": "unsigned long long",
331                  "U": "unsigned long long"}
332        fmt_types = c_type.keys()
333
334        def _generate_field_decl(self, idx):
335                field_type = self.types[idx]
336                if field_type == "s":
337                        return "char v%d[%d];\n" % (idx, self.string_size)
338                if field_type in Probe.fmt_types:
339                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
340                self._bail("unrecognized format specifier %s" % field_type)
341
342        def _generate_data_decl(self):
343                # The BPF program will populate values into the struct
344                # according to the format string, and the Python program will
345                # construct the final display string.
346                self.events_name = "%s_events" % self.probe_name
347                self.struct_name = "%s_data_t" % self.probe_name
348                self.stacks_name = "%s_stacks" % self.probe_name
349                stack_type = "BPF_STACK_TRACE" if self.build_id_enabled is False \
350                             else "BPF_STACK_TRACE_BUILDID"
351                stack_table = "%s(%s, 1024);" % (stack_type, self.stacks_name) \
352                              if (self.kernel_stack or self.user_stack) else ""
353                data_fields = ""
354                for i, field_type in enumerate(self.types):
355                        data_fields += "        " + \
356                                       self._generate_field_decl(i)
357                time_str = "u64 timestamp_ns;" if self.time_field else ""
358                cpu_str = "int cpu;" if self.print_cpu else ""
359                kernel_stack_str = "       int kernel_stack_id;" \
360                                   if self.kernel_stack else ""
361                user_stack_str = "       int user_stack_id;" \
362                                 if self.user_stack else ""
363
364                text = """
365struct %s
366{
367%s
368%s
369        u32 tgid;
370        u32 pid;
371        char comm[TASK_COMM_LEN];
372%s
373%s
374%s
375        u32 uid;
376};
377
378BPF_PERF_OUTPUT(%s);
379%s
380"""
381                return text % (self.struct_name, time_str, cpu_str, data_fields,
382                               kernel_stack_str, user_stack_str,
383                               self.events_name, stack_table)
384
385        def _generate_field_assign(self, idx):
386                field_type = self.types[idx]
387                expr = self.values[idx].strip()
388                text = ""
389                if self.probe_type == "u" and expr[0:3] == "arg":
390                        arg_index = int(expr[3])
391                        arg_ctype = self.usdt.get_probe_arg_ctype(
392                                self.usdt_name, arg_index - 1)
393                        text = ("        %s %s = 0;\n" +
394                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
395                                % (arg_ctype, expr, expr[3], expr)
396                probe_read_func = "bpf_probe_read_kernel"
397                if field_type == "s":
398                        if self.library:
399                            probe_read_func = "bpf_probe_read_user"
400                        else:
401                            alias_to_check = Probe.aliases_indarg \
402                                                if self.is_syscall_kprobe \
403                                                else Probe.aliases_arg
404                            for arg, alias in alias_to_check.items():
405                                if alias == expr and arg in self.probe_user_list:
406                                    probe_read_func = "bpf_probe_read_user"
407                                    break
408                        return text + """
409        if (%s != 0) {
410                void *__tmp = (void *)%s;
411                %s(&__data.v%d, sizeof(__data.v%d), __tmp);
412        }
413                """ % (expr, expr, probe_read_func, idx, idx)
414                if field_type in Probe.fmt_types:
415                        return text + "        __data.v%d = (%s)%s;\n" % \
416                                        (idx, Probe.c_type[field_type], expr)
417                self._bail("unrecognized field type %s" % field_type)
418
419        def _generate_usdt_filter_read(self):
420            text = ""
421            if self.probe_type != "u":
422                    return text
423            for arg, _ in Probe.aliases_arg.items():
424                    if not (arg in self.filter):
425                            continue
426                    arg_index = int(arg.replace("arg", ""))
427                    arg_ctype = self.usdt.get_probe_arg_ctype(
428                            self.usdt_name, arg_index - 1)
429                    if not arg_ctype:
430                            self._bail("Unable to determine type of {} "
431                                       "in the filter".format(arg))
432                    text += """
433        {} {}_filter;
434        bpf_usdt_readarg({}, ctx, &{}_filter);
435                    """.format(arg_ctype, arg, arg_index, arg)
436                    self.filter = self.filter.replace(
437                            arg, "{}_filter".format(arg))
438            return text
439
440        def generate_program(self, include_self):
441                data_decl = self._generate_data_decl()
442                if Probe.pid != -1:
443                        pid_filter = """
444        if (__pid != %d) { return 0; }
445                """ % Probe.pid
446                # uprobes can have a built-in tgid filter passed to
447                # attach_uprobe, hence the check here -- for kprobes, we
448                # need to do the tgid test by hand:
449                elif len(self.library) == 0 and Probe.tgid != -1:
450                        pid_filter = """
451        if (__tgid != %d) { return 0; }
452                """ % Probe.tgid
453                elif not include_self:
454                        pid_filter = """
455        if (__tgid == %d) { return 0; }
456                """ % os.getpid()
457                else:
458                        pid_filter = ""
459
460                if Probe.uid != -1:
461                        uid_filter = """
462        if (__uid != %d) { return 0; }
463                """ % Probe.uid
464                else:
465                        uid_filter = ""
466
467                if self.cgroup_map_name is not None:
468                        cgroup_filter = """
469        if (%s.check_current_task(0) <= 0) { return 0; }
470                """ % self.cgroup_map_name
471                else:
472                        cgroup_filter = ""
473
474                prefix = ""
475                signature = "struct pt_regs *ctx"
476                if self.signature:
477                        signature += ", " + self.signature
478
479                data_fields = ""
480                for i, expr in enumerate(self.values):
481                        data_fields += self._generate_field_assign(i)
482
483                if self.probe_type == "t":
484                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
485                                  (self.tp_category, self.tp_event)
486                        ctx_name = "args"
487                else:
488                        heading = "int %s(%s)" % (self.probe_name, signature)
489                        ctx_name = "ctx"
490
491                time_str = """
492        __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else ""
493                cpu_str = """
494        __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else ""
495                stack_trace = ""
496                if self.user_stack:
497                        stack_trace += """
498        __data.user_stack_id = %s.get_stackid(
499          %s, BPF_F_USER_STACK
500        );""" % (self.stacks_name, ctx_name)
501                if self.kernel_stack:
502                        stack_trace += """
503        __data.kernel_stack_id = %s.get_stackid(
504          %s, 0
505        );""" % (self.stacks_name, ctx_name)
506
507                text = heading + """
508{
509        u64 __pid_tgid = bpf_get_current_pid_tgid();
510        u32 __tgid = __pid_tgid >> 32;
511        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
512        u32 __uid = bpf_get_current_uid_gid();
513        %s
514        %s
515        %s
516        %s
517        %s
518        if (!(%s)) return 0;
519
520        struct %s __data = {0};
521        %s
522        %s
523        __data.tgid = __tgid;
524        __data.pid = __pid;
525        __data.uid = __uid;
526        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
527%s
528%s
529        %s.perf_submit(%s, &__data, sizeof(__data));
530        return 0;
531}
532"""
533                text = text % (pid_filter, uid_filter, cgroup_filter, prefix,
534                               self._generate_usdt_filter_read(), self.filter,
535                               self.struct_name, time_str, cpu_str, data_fields,
536                               stack_trace, self.events_name, ctx_name)
537
538                return self.streq_functions + data_decl + "\n" + text
539
540        @classmethod
541        def _time_off_str(cls, timestamp_ns):
542            offset = 1e-9 * (timestamp_ns - cls.first_ts)
543            if cls.print_unix_timestamp:
544                return "%.6f" % (offset + cls.first_ts_real)
545            else:
546                return "%.6f" % offset
547
548        def _display_function(self):
549                if self.probe_type == 'p' or self.probe_type == 'r':
550                        return self.function
551                elif self.probe_type == 'u':
552                        return self.usdt_name
553                else:   # self.probe_type == 't'
554                        return self.tp_event
555
556        def _stack_to_string(self, bpf, stack_id, tgid):
557            if stack_id < 0:
558                return ("        %d" % stack_id)
559
560            stackstr = ''
561            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
562            for addr in stack:
563                stackstr += '        '
564                if Probe.print_address:
565                    stackstr += ("%16x " % addr)
566                symstr = bpf.sym(addr, tgid, show_module=True, show_offset=True)
567                stackstr += ('%s\n' % (symstr.decode('utf-8')))
568
569            return stackstr
570
571        def _format_message(self, bpf, tgid, values):
572                # Replace each %K with kernel sym and %U with user sym in tgid
573                kernel_placeholders = [i for i, t in enumerate(self.types)
574                                       if t == 'K']
575                user_placeholders = [i for i, t in enumerate(self.types)
576                                     if t == 'U']
577                string_placeholders = [i for i, t in enumerate(self.types)
578                                       if t == 's']
579                for kp in kernel_placeholders:
580                    values[kp] = bpf.ksym(values[kp], show_offset=True)
581                for up in user_placeholders:
582                    values[up] = bpf.sym(values[up], tgid,
583                                         show_module=True, show_offset=True)
584                for sp in string_placeholders:
585                    values[sp] = values[sp].decode('utf-8', 'replace')
586                return self.python_format % tuple(values)
587
588        def print_aggregate_events(self):
589                for k, v in sorted(self.symcount.items(), key=lambda item: \
590                                   item[1], reverse=True):
591                    print("%s-->COUNT %d\n\n" % (k, v), end="")
592
593        def print_event(self, bpf, cpu, data, size):
594                event = bpf[self.events_name].event(data)
595                if self.name not in event.comm:
596                    return
597                values = list(map(lambda i: getattr(event, "v%d" % i),
598                             range(0, len(self.values))))
599                msg = self._format_message(bpf, event.tgid, values)
600                if self.msg_filter and self.msg_filter not in msg:
601                    return
602                eventstr = ''
603                if Probe.print_time:
604                    time = strftime("%H:%M:%S") if Probe.use_localtime else \
605                           Probe._time_off_str(event.timestamp_ns)
606                    if Probe.print_unix_timestamp:
607                        eventstr += ("%-17s " % time[:17])
608                    else:
609                        eventstr += ("%-8s " % time[:8])
610                if Probe.print_cpu:
611                    eventstr += ("%-3s " % event.cpu)
612                eventstr += ("%-7d %-7d %-15s %-16s %s\n" %
613                      (event.tgid, event.pid,
614                       event.comm.decode('utf-8', 'replace'),
615                       self._display_function(), msg))
616
617                if self.kernel_stack:
618                        eventstr += self._stack_to_string(bpf, event.kernel_stack_id, -1)
619                if self.user_stack:
620                        eventstr += self._stack_to_string(bpf, event.user_stack_id, event.tgid)
621
622                if self.aggregate is False:
623                    print(eventstr, end="")
624                    if self.kernel_stack or self.user_stack:
625                        print("")
626                else:
627                    if eventstr in self.symcount:
628                        self.symcount[eventstr] += 1
629                    else:
630                        self.symcount[eventstr] = 1
631
632                Probe.event_count += 1
633                if Probe.max_events is not None and \
634                   Probe.event_count >= Probe.max_events:
635                    if self.aggregate:
636                        self.print_aggregate_events()
637                    sys.stdout.flush()
638                    exit()
639
640        def attach(self, bpf, verbose):
641                if len(self.library) == 0:
642                        self._attach_k(bpf)
643                else:
644                        self._attach_u(bpf)
645                callback = partial(self.print_event, bpf)
646                bpf[self.events_name].open_perf_buffer(callback,
647                        page_cnt=self.page_cnt)
648
649        def _attach_k(self, bpf):
650                if self.probe_type == "r":
651                        bpf.attach_kretprobe(event=self.function,
652                                             fn_name=self.probe_name)
653                elif self.probe_type == "p":
654                        bpf.attach_kprobe(event=self.function,
655                                          fn_name=self.probe_name,
656                                          event_off=self.offset)
657                # Note that tracepoints don't need an explicit attach
658
659        def _attach_u(self, bpf):
660                libpath = BPF.find_library(self.library)
661                if libpath is None:
662                        # This might be an executable (e.g. 'bash')
663                        libpath = BPF.find_exe(self.library)
664                if libpath is None or len(libpath) == 0:
665                        self._bail("unable to find library %s" % self.library)
666
667                if self.probe_type == "u":
668                        pass    # Was already enabled by the BPF constructor
669                elif self.probe_type == "r":
670                        bpf.attach_uretprobe(name=libpath,
671                                             sym=self.function,
672                                             fn_name=self.probe_name,
673                                             pid=Probe.tgid)
674                else:
675                        bpf.attach_uprobe(name=libpath,
676                                          sym=self.function,
677                                          fn_name=self.probe_name,
678                                          pid=Probe.tgid,
679                                          sym_off=self.offset)
680
681class Tool(object):
682        DEFAULT_PERF_BUFFER_PAGES = 64
683        examples = """
684EXAMPLES:
685
686trace do_sys_open
687        Trace the open syscall and print a default trace message when entered
688trace kfree_skb+0x12
689        Trace the kfree_skb kernel function after the instruction on the 0x12 offset
690trace 'do_sys_open "%s", arg2@user'
691        Trace the open syscall and print the filename being opened @user is
692        added to arg2 in kprobes to ensure that char * should be copied from
693        the userspace stack to the bpf stack. If not specified, previous
694        behaviour is expected.
695
696trace 'do_sys_open "%s", arg2@user' -n main
697        Trace the open syscall and only print event that process names containing "main"
698trace 'do_sys_open "%s", arg2@user' --uid 1001
699        Trace the open syscall and only print event that processes with user ID 1001
700trace 'do_sys_open "%s", arg2@user' -f config
701        Trace the open syscall and print the filename being opened filtered by "config"
702trace 'sys_read (arg3 > 20000) "read %d bytes", arg3'
703        Trace the read syscall and print a message for reads >20000 bytes
704trace 'r::do_sys_open "%llx", retval'
705        Trace the return from the open syscall and print the return value
706trace 'c:open (arg2 == 42) "%s %d", arg1, arg2'
707        Trace the open() call from libc only if the flags (arg2) argument is 42
708trace 'c:malloc "size = %d", arg1'
709        Trace malloc calls and print the size being allocated
710trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3'
711        Trace the write() call from libc to monitor writes to STDOUT
712trace 'r::__kmalloc (retval == 0) "kmalloc failed!"'
713        Trace returns from __kmalloc which returned a null pointer
714trace 'r:c:malloc (retval) "allocated = %x", retval'
715        Trace returns from malloc and print non-NULL allocated buffers
716trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
717        Trace the block_rq_complete kernel tracepoint and print # of tx sectors
718trace 'u:pthread:pthread_create (arg4 != 0)'
719        Trace the USDT probe pthread_create when its 4th argument is non-zero
720trace 'u:pthread:libpthread:pthread_create (arg4 != 0)'
721        Ditto, but the provider name "libpthread" is specified.
722trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec'
723        Trace the nanosleep syscall and print the sleep duration in ns
724trace -c /sys/fs/cgroup/system.slice/workload.service '__x64_sys_nanosleep' '__x64_sys_clone'
725        Trace nanosleep/clone syscall calls only under workload.service
726        cgroup hierarchy.
727trace -I 'linux/fs.h' \\
728      'p::uprobe_register(struct inode *inode) "a_ops = %llx", inode->i_mapping->a_ops'
729        Trace the uprobe_register inode mapping ops, and the symbol can be found
730        in /proc/kallsyms
731trace -I 'kernel/sched/sched.h' \\
732      'p::__account_cfs_rq_runtime(struct cfs_rq *cfs_rq) "%d", cfs_rq->runtime_remaining'
733        Trace the cfs scheduling runqueue remaining runtime. The struct cfs_rq is defined
734        in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel
735        package.  So this command needs to run at the kernel source tree root directory
736        so that the added header file can be found by the compiler.
737trace -I 'net/sock.h' \\
738      'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)'
739        Trace udpv6 sendmsg calls only if socket's destination port is equal
740        to 53 (DNS; 13568 in big endian order)
741trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users'
742        Trace the number of users accessing the file system of the current task
743trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
744        Trace inet_pton system call and use the specified libraries/executables for
745        symbol resolution.
746"""
747
748        def __init__(self):
749                parser = argparse.ArgumentParser(description="Attach to " +
750                  "functions and print trace messages.",
751                  formatter_class=argparse.RawDescriptionHelpFormatter,
752                  epilog=Tool.examples)
753                parser.add_argument("-b", "--buffer-pages", type=int,
754                  default=Tool.DEFAULT_PERF_BUFFER_PAGES,
755                  help="number of pages to use for perf_events ring buffer "
756                       "(default: %(default)d)")
757                # we'll refer to the userspace concepts of "pid" and "tid" by
758                # their kernel names -- tgid and pid -- inside the script
759                parser.add_argument("-p", "--pid", type=int, metavar="PID",
760                  dest="tgid", help="id of the process to trace (optional)")
761                parser.add_argument("-L", "--tid", type=int, metavar="TID",
762                  dest="pid", help="id of the thread to trace (optional)")
763                parser.add_argument("--uid", type=int, metavar="UID",
764                  dest="uid", help="id of the user to trace (optional)")
765                parser.add_argument("-v", "--verbose", action="store_true",
766                  help="print resulting BPF program code before executing")
767                parser.add_argument("-Z", "--string-size", type=int,
768                  default=80, help="maximum size to read from strings")
769                parser.add_argument("-S", "--include-self",
770                  action="store_true",
771                  help="do not filter trace's own pid from the trace")
772                parser.add_argument("-M", "--max-events", type=int,
773                  help="number of events to print before quitting")
774                parser.add_argument("-t", "--timestamp", action="store_true",
775                  help="print timestamp column (offset from trace start)")
776                parser.add_argument("-u", "--unix-timestamp", action="store_true",
777                  help="print UNIX timestamp instead of offset from trace start, requires -t")
778                parser.add_argument("-T", "--time", action="store_true",
779                  help="print time column")
780                parser.add_argument("-C", "--print_cpu", action="store_true",
781                  help="print CPU id")
782                parser.add_argument("-c", "--cgroup-path", type=str,
783                  metavar="CGROUP_PATH", dest="cgroup_path",
784                  help="cgroup path")
785                parser.add_argument("-n", "--name", type=str,
786                                    help="only print process names containing this name")
787                parser.add_argument("-f", "--msg-filter", type=str, dest="msg_filter",
788                                    help="only print the msg of event containing this string")
789                parser.add_argument("-B", "--bin_cmp", action="store_true",
790                  help="allow to use STRCMP with binary values")
791                parser.add_argument('-s', "--sym_file_list", type=str,
792                  metavar="SYM_FILE_LIST", dest="sym_file_list",
793                  help="comma separated list of symbol files to use \
794                  for symbol resolution")
795                parser.add_argument("-K", "--kernel-stack",
796                  action="store_true", help="output kernel stack trace")
797                parser.add_argument("-U", "--user-stack",
798                  action="store_true", help="output user stack trace")
799                parser.add_argument("-a", "--address", action="store_true",
800                  help="print virtual address in stacks")
801                parser.add_argument(metavar="probe", dest="probes", nargs="+",
802                  help="probe specifier (see examples)")
803                parser.add_argument("-I", "--include", action="append",
804                  metavar="header",
805                  help="additional header files to include in the BPF program "
806                       "as either full path, "
807                       "or relative to current working directory, "
808                       "or relative to default kernel header search path")
809                parser.add_argument("-A", "--aggregate", action="store_true",
810                  help="aggregate amount of each trace")
811                parser.add_argument("--ebpf", action="store_true",
812                  help=argparse.SUPPRESS)
813                self.args = parser.parse_args()
814                if self.args.tgid and self.args.pid:
815                        parser.error("only one of -p and -L may be specified")
816                if self.args.cgroup_path is not None:
817                        self.cgroup_map_name = "__cgroup"
818                else:
819                        self.cgroup_map_name = None
820
821        def _create_probes(self):
822                Probe.configure(self.args)
823                self.probes = []
824                for probe_spec in self.args.probes:
825                        self.probes.append(Probe(
826                                probe_spec, self.args.string_size,
827                                self.args.kernel_stack, self.args.user_stack,
828                                self.cgroup_map_name, self.args.name, self.args.msg_filter))
829
830        def _generate_program(self):
831                self.program = """
832#include <linux/ptrace.h>
833#include <linux/sched.h>        /* For TASK_COMM_LEN */
834
835"""
836                for include in (self.args.include or []):
837                        if include.startswith((".", "/")):
838                                include = os.path.abspath(include)
839                                self.program += "#include \"%s\"\n" % include
840                        else:
841                                self.program += "#include <%s>\n" % include
842                self.program += BPF.generate_auto_includes(
843                        map(lambda p: p.raw_probe, self.probes))
844                if self.cgroup_map_name is not None:
845                        self.program += "BPF_CGROUP_ARRAY(%s, 1);\n" % \
846                                        self.cgroup_map_name
847                for probe in self.probes:
848                        self.program += probe.generate_program(
849                                        self.args.include_self)
850
851                if self.args.verbose or self.args.ebpf:
852                        print(self.program)
853                        if self.args.ebpf:
854                                exit()
855
856        def _attach_probes(self):
857                usdt_contexts = []
858                for probe in self.probes:
859                    if probe.usdt:
860                        # USDT probes must be enabled before the BPF object
861                        # is initialized, because that's where the actual
862                        # uprobe is being attached.
863                        probe.usdt.enable_probe(
864                                probe.usdt_name, probe.probe_name)
865                        if self.args.verbose:
866                                print(probe.usdt.get_text())
867                        usdt_contexts.append(probe.usdt)
868                self.bpf = BPF(text=self.program, usdt_contexts=usdt_contexts)
869                if self.args.sym_file_list is not None:
870                  print("Note: Kernel bpf will report stack map with ip/build_id")
871                  map(lambda x: self.bpf.add_module(x), self.args.sym_file_list.split(','))
872
873                # if cgroup filter is requested, update the cgroup array map
874                if self.cgroup_map_name is not None:
875                        cgroup_array = self.bpf.get_table(self.cgroup_map_name)
876                        cgroup_array[0] = self.args.cgroup_path
877
878                for probe in self.probes:
879                        if self.args.verbose:
880                                print(probe)
881                        probe.attach(self.bpf, self.args.verbose)
882
883        def _main_loop(self):
884                all_probes_trivial = all(map(Probe.is_default_action,
885                                             self.probes))
886
887                # Print header
888                if self.args.timestamp or self.args.time:
889                    col_fmt = "%-17s " if self.args.unix_timestamp else "%-8s "
890                    print(col_fmt % "TIME", end="")
891                if self.args.print_cpu:
892                    print("%-3s " % "CPU", end="")
893                print("%-7s %-7s %-15s %-16s %s" %
894                      ("PID", "TID", "COMM", "FUNC",
895                      "-" if not all_probes_trivial else ""))
896                sys.stdout.flush()
897
898                while True:
899                        self.bpf.perf_buffer_poll()
900
901        def run(self):
902                try:
903                        self._create_probes()
904                        self._generate_program()
905                        self._attach_probes()
906                        self._main_loop()
907                except:
908                        exc_info = sys.exc_info()
909                        sys_exit = exc_info[0] is SystemExit
910                        if self.args.verbose:
911                                traceback.print_exc()
912                        elif not sys_exit:
913                                print(exc_info[1])
914                        exit(0 if sys_exit else 1)
915
916if __name__ == "__main__":
917        Tool().run()
918