1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# shmsnoop Trace shm*() syscalls. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# USAGE: shmsnoop [-h] [-T] [-x] [-p PID] [-d DURATION] [-t TID] [-n NAME] 8# 9# Copyright (c) 2018 Jiri Olsa. 10# Licensed under the Apache License, Version 2.0 (the "License") 11# 12# 08-Oct-2018 Jiri Olsa Created this. 13 14from __future__ import print_function 15from bcc import ArgString, BPF 16import argparse 17from datetime import datetime, timedelta 18 19# arguments 20examples = """examples: 21 ./shmsnoop # trace all shm*() syscalls 22 ./shmsnoop -T # include timestamps 23 ./shmsnoop -p 181 # only trace PID 181 24 ./shmsnoop -t 123 # only trace TID 123 25 ./shmsnoop -d 10 # trace for 10 seconds only 26 ./shmsnoop -n main # only print process names containing "main" 27""" 28parser = argparse.ArgumentParser( 29 description="Trace shm*() syscalls", 30 formatter_class=argparse.RawDescriptionHelpFormatter, 31 epilog=examples) 32parser.add_argument("-T", "--timestamp", action="store_true", 33 help="include timestamp on output") 34parser.add_argument("-p", "--pid", 35 help="trace this PID only") 36parser.add_argument("-t", "--tid", 37 help="trace this TID only") 38parser.add_argument("-d", "--duration", 39 help="total duration of trace in seconds") 40parser.add_argument("-n", "--name", 41 type=ArgString, 42 help="only print process names containing this name") 43parser.add_argument("--ebpf", action="store_true", 44 help=argparse.SUPPRESS) 45args = parser.parse_args() 46debug = 0 47if args.duration: 48 args.duration = timedelta(seconds=int(args.duration)) 49 50# define BPF program 51bpf_text = """ 52#include <uapi/linux/ptrace.h> 53#include <uapi/linux/limits.h> 54#include <linux/sched.h> 55 56struct val_t { 57 u64 id; 58 u64 ts; 59 int sys; 60 unsigned long key; 61 unsigned long size; 62 unsigned long shmflg; 63 unsigned long shmid; 64 unsigned long cmd; 65 unsigned long buf; 66 unsigned long shmaddr; 67 unsigned long ret; 68 char comm[TASK_COMM_LEN]; 69}; 70 71BPF_HASH(infotmp, u64, struct val_t); 72BPF_PERF_OUTPUT(events); 73 74enum { 75 SYS_SHMGET, 76 SYS_SHMAT, 77 SYS_SHMDT, 78 SYS_SHMCTL, 79}; 80 81static int enter(struct val_t *val) 82{ 83 u64 id = bpf_get_current_pid_tgid(); 84 u32 pid = id >> 32; // PID is higher part 85 u32 tid = id; // Cast and get the lower part 86 87 FILTER 88 89 val->id = id; 90 infotmp.update(&id, val); 91 return 0; 92} 93 94int trace_return(struct pt_regs *ctx) 95{ 96 u64 id = bpf_get_current_pid_tgid(); 97 u64 tsp = bpf_ktime_get_ns(); 98 struct val_t *val; 99 100 val = infotmp.lookup(&id); 101 if (val == 0) 102 return 0; 103 104 if (bpf_get_current_comm(&val->comm, sizeof(val->comm)) != 0) 105 goto out; 106 107 val->ts = tsp / 1000; 108 val->ret = PT_REGS_RC(ctx); 109 events.perf_submit(ctx, val, sizeof(*val)); 110 111out: 112 infotmp.delete(&id); 113 return 0; 114} 115 116int syscall__shmget(struct pt_regs *ctx, u64 key, u64 size, u64 shmflg) 117{ 118 struct val_t val = { 119 .sys = SYS_SHMGET, 120 }; 121 122 val.key = key; 123 val.size = size; 124 val.shmflg = shmflg; 125 return enter(&val); 126}; 127 128int syscall__shmat(struct pt_regs *ctx, u64 shmid, u64 shmaddr, u64 shmflg) 129{ 130 struct val_t val = { 131 .sys = SYS_SHMAT, 132 }; 133 134 val.shmid = shmid; 135 val.shmaddr = shmaddr; 136 val.shmflg = shmflg; 137 return enter(&val); 138}; 139 140int syscall__shmdt(struct pt_regs *ctx, u64 shmaddr) 141{ 142 struct val_t val = { 143 .sys = SYS_SHMDT, 144 }; 145 146 val.shmaddr = shmaddr; 147 return enter(&val); 148}; 149 150int syscall__shmctl(struct pt_regs *ctx, u64 shmid, u64 cmd, u64 buf) 151{ 152 struct val_t val = { 153 .sys = SYS_SHMCTL, 154 }; 155 156 val.shmid = shmid; 157 val.cmd = cmd; 158 val.buf = buf; 159 return enter(&val); 160}; 161 162""" 163if args.tid: # TID trumps PID 164 bpf_text = bpf_text.replace('FILTER', 165 'if (tid != %s) { return 0; }' % args.tid) 166elif args.pid: 167 bpf_text = bpf_text.replace('FILTER', 168 'if (pid != %s) { return 0; }' % args.pid) 169else: 170 bpf_text = bpf_text.replace('FILTER', '') 171 172if debug or args.ebpf: 173 print(bpf_text) 174 if args.ebpf: 175 exit() 176 177# initialize BPF 178b = BPF(text=bpf_text) 179 180syscall_fnname = b.get_syscall_fnname("shmget") 181if BPF.ksymname(syscall_fnname) != -1: 182 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmget") 183 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 184 185syscall_fnname = b.get_syscall_fnname("shmat") 186if BPF.ksymname(syscall_fnname) != -1: 187 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmat") 188 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 189 190syscall_fnname = b.get_syscall_fnname("shmdt") 191if BPF.ksymname(syscall_fnname) != -1: 192 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmdt") 193 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 194 195syscall_fnname = b.get_syscall_fnname("shmctl") 196if BPF.ksymname(syscall_fnname) != -1: 197 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__shmctl") 198 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 199 200TASK_COMM_LEN = 16 # linux/sched.h 201 202SYS_SHMGET = 0 203SYS_SHMAT = 1 204SYS_SHMDT = 2 205SYS_SHMCTL = 3 206 207initial_ts = 0 208 209# header 210if args.timestamp: 211 print("%-14s" % ("TIME(s)"), end="") 212print("%-6s %-16s %6s %16s ARGs" % 213 ("TID" if args.tid else "PID", "COMM", "SYS", "RET")) 214 215def sys_name(sys): 216 switcher = { 217 SYS_SHMGET: "SHMGET", 218 SYS_SHMAT: "SHMAT", 219 SYS_SHMDT: "SHMDT", 220 SYS_SHMCTL: "SHMCTL", 221 } 222 return switcher.get(sys, "N/A") 223 224shmget_flags = [ 225 { 'name' : 'IPC_CREAT', 'value' : 0o1000 }, 226 { 'name' : 'IPC_EXCL', 'value' : 0o2000 }, 227 { 'name' : 'SHM_HUGETLB', 'value' : 0o4000 }, 228 { 'name' : 'SHM_HUGE_2MB', 'value' : 21 << 26 }, 229 { 'name' : 'SHM_HUGE_1GB', 'value' : 30 << 26 }, 230 { 'name' : 'SHM_NORESERVE', 'value' : 0o10000 }, 231 { 'name' : 'SHM_EXEC', 'value' : 0o100000 } 232] 233 234shmat_flags = [ 235 { 'name' : 'SHM_RDONLY', 'value' : 0o10000 }, 236 { 'name' : 'SHM_RND', 'value' : 0o20000 }, 237 { 'name' : 'SHM_REMAP', 'value' : 0o40000 }, 238 { 'name' : 'SHM_EXEC', 'value' : 0o100000 }, 239] 240 241def shmflg_str(val, flags): 242 cur = filter(lambda x : x['value'] & val, flags) 243 str = "0x%x" % val 244 245 if (not val): 246 return str 247 248 str += " (" 249 cnt = 0 250 for x in cur: 251 if cnt: 252 str += "|" 253 str += x['name'] 254 val &= ~x['value'] 255 cnt += 1 256 257 if val != 0 or not cnt: 258 if cnt: 259 str += "|" 260 str += "0%o" % val 261 262 str += ")" 263 return str 264 265# process event 266def print_event(cpu, data, size): 267 event = b["events"].event(data) 268 global initial_ts 269 270 if not initial_ts: 271 initial_ts = event.ts 272 273 if args.name and bytes(args.name) not in event.comm: 274 return 275 276 if args.timestamp: 277 delta = event.ts - initial_ts 278 print("%-14.9f" % (float(delta) / 1000000), end="") 279 280 print("%-6d %-16s %6s %16lx " % 281 (event.id & 0xffffffff if args.tid else event.id >> 32, 282 event.comm.decode(), sys_name(event.sys), event.ret), end = '') 283 284 if event.sys == SYS_SHMGET: 285 print("key: 0x%lx, size: %lu, shmflg: %s" % 286 (event.key, event.size, shmflg_str(event.shmflg, shmget_flags))) 287 288 if event.sys == SYS_SHMAT: 289 print("shmid: 0x%lx, shmaddr: 0x%lx, shmflg: %s" % 290 (event.shmid, event.shmaddr, shmflg_str(event.shmflg, shmat_flags))) 291 292 if event.sys == SYS_SHMDT: 293 print("shmaddr: 0x%lx" % (event.shmaddr)) 294 295 if event.sys == SYS_SHMCTL: 296 print("shmid: 0x%lx, cmd: %lu, buf: 0x%x" % (event.shmid, event.cmd, event.buf)) 297 298# loop with callback to print_event 299b["events"].open_perf_buffer(print_event, page_cnt=64) 300start_time = datetime.now() 301while not args.duration or datetime.now() - start_time < args.duration: 302 try: 303 b.perf_buffer_poll(timeout=1000) 304 except KeyboardInterrupt: 305 exit() 306