1#!/usr/bin/env python 2# 3# memleak Trace and display outstanding allocations to detect 4# memory leaks in user-mode processes and the kernel. 5# 6# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] 7# [--combined-only] [--wa-missing-free] [-s SAMPLE_RATE] 8# [-T TOP] [-z MIN_SIZE] [-Z MAX_SIZE] [-O OBJ] 9# [interval] [count] 10# 11# Licensed under the Apache License, Version 2.0 (the "License") 12# Copyright (C) 2016 Sasha Goldshtein. 13 14from bcc import BPF 15from time import sleep 16from datetime import datetime 17import resource 18import argparse 19import subprocess 20import os 21import sys 22 23class Allocation(object): 24 def __init__(self, stack, size): 25 self.stack = stack 26 self.count = 1 27 self.size = size 28 29 def update(self, size): 30 self.count += 1 31 self.size += size 32 33def run_command_get_output(command): 34 p = subprocess.Popen(command.split(), 35 stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 36 return iter(p.stdout.readline, b'') 37 38def run_command_get_pid(command): 39 p = subprocess.Popen(command.split()) 40 return p.pid 41 42examples = """ 43EXAMPLES: 44 45./memleak -p $(pidof allocs) 46 Trace allocations and display a summary of "leaked" (outstanding) 47 allocations every 5 seconds 48./memleak -p $(pidof allocs) -t 49 Trace allocations and display each individual allocator function call 50./memleak -ap $(pidof allocs) 10 51 Trace allocations and display allocated addresses, sizes, and stacks 52 every 10 seconds for outstanding allocations 53./memleak -c "./allocs" 54 Run the specified command and trace its allocations 55./memleak 56 Trace allocations in kernel mode and display a summary of outstanding 57 allocations every 5 seconds 58./memleak -o 60000 59 Trace allocations in kernel mode and display a summary of outstanding 60 allocations that are at least one minute (60 seconds) old 61./memleak -s 5 62 Trace roughly every 5th allocation, to reduce overhead 63""" 64 65description = """ 66Trace outstanding memory allocations that weren't freed. 67Supports both user-mode allocations made with libc functions and kernel-mode 68allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding 69memory release functions. 70""" 71 72parser = argparse.ArgumentParser(description=description, 73 formatter_class=argparse.RawDescriptionHelpFormatter, 74 epilog=examples) 75parser.add_argument("-p", "--pid", type=int, default=-1, 76 help="the PID to trace; if not specified, trace kernel allocs") 77parser.add_argument("-t", "--trace", action="store_true", 78 help="print trace messages for each alloc/free call") 79parser.add_argument("interval", nargs="?", default=5, type=int, 80 help="interval in seconds to print outstanding allocations") 81parser.add_argument("count", nargs="?", type=int, 82 help="number of times to print the report before exiting") 83parser.add_argument("-a", "--show-allocs", default=False, action="store_true", 84 help="show allocation addresses and sizes as well as call stacks") 85parser.add_argument("-o", "--older", default=500, type=int, 86 help="prune allocations younger than this age in milliseconds") 87parser.add_argument("-c", "--command", 88 help="execute and trace the specified command") 89parser.add_argument("--combined-only", default=False, action="store_true", 90 help="show combined allocation statistics only") 91parser.add_argument("--wa-missing-free", default=False, action="store_true", 92 help="Workaround to alleviate misjudgments when free is missing") 93parser.add_argument("-s", "--sample-rate", default=1, type=int, 94 help="sample every N-th allocation to decrease the overhead") 95parser.add_argument("-T", "--top", type=int, default=10, 96 help="display only this many top allocating stacks (by size)") 97parser.add_argument("-z", "--min-size", type=int, 98 help="capture only allocations larger than this size") 99parser.add_argument("-Z", "--max-size", type=int, 100 help="capture only allocations smaller than this size") 101parser.add_argument("-O", "--obj", type=str, default="c", 102 help="attach to allocator functions in the specified object") 103parser.add_argument("--ebpf", action="store_true", 104 help=argparse.SUPPRESS) 105parser.add_argument("--percpu", default=False, action="store_true", 106 help="trace percpu allocations") 107 108args = parser.parse_args() 109 110pid = args.pid 111command = args.command 112kernel_trace = (pid == -1 and command is None) 113trace_all = args.trace 114interval = args.interval 115min_age_ns = 1e6 * args.older 116sample_every_n = args.sample_rate 117num_prints = args.count 118top_stacks = args.top 119min_size = args.min_size 120max_size = args.max_size 121obj = args.obj 122 123if min_size is not None and max_size is not None and min_size > max_size: 124 print("min_size (-z) can't be greater than max_size (-Z)") 125 exit(1) 126 127if command is not None: 128 print("Executing '%s' and tracing the resulting process." % command) 129 pid = run_command_get_pid(command) 130 131bpf_source = """ 132#include <uapi/linux/ptrace.h> 133 134struct alloc_info_t { 135 u64 size; 136 u64 timestamp_ns; 137 int stack_id; 138}; 139 140struct combined_alloc_info_t { 141 u64 total_size; 142 u64 number_of_allocs; 143}; 144 145BPF_HASH(sizes, u64); 146BPF_HASH(allocs, u64, struct alloc_info_t, 1000000); 147BPF_HASH(memptrs, u64, u64); 148BPF_STACK_TRACE(stack_traces, 10240); 149BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10240); 150 151static inline void update_statistics_add(u64 stack_id, u64 sz) { 152 struct combined_alloc_info_t *existing_cinfo; 153 struct combined_alloc_info_t cinfo = {0}; 154 155 existing_cinfo = combined_allocs.lookup(&stack_id); 156 if (existing_cinfo != 0) 157 cinfo = *existing_cinfo; 158 159 cinfo.total_size += sz; 160 cinfo.number_of_allocs += 1; 161 162 combined_allocs.update(&stack_id, &cinfo); 163} 164 165static inline void update_statistics_del(u64 stack_id, u64 sz) { 166 struct combined_alloc_info_t *existing_cinfo; 167 struct combined_alloc_info_t cinfo = {0}; 168 169 existing_cinfo = combined_allocs.lookup(&stack_id); 170 if (existing_cinfo != 0) 171 cinfo = *existing_cinfo; 172 173 if (sz >= cinfo.total_size) 174 cinfo.total_size = 0; 175 else 176 cinfo.total_size -= sz; 177 178 if (cinfo.number_of_allocs > 0) 179 cinfo.number_of_allocs -= 1; 180 181 combined_allocs.update(&stack_id, &cinfo); 182} 183 184static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) { 185 SIZE_FILTER 186 if (SAMPLE_EVERY_N > 1) { 187 u64 ts = bpf_ktime_get_ns(); 188 if (ts % SAMPLE_EVERY_N != 0) 189 return 0; 190 } 191 192 u64 pid = bpf_get_current_pid_tgid(); 193 u64 size64 = size; 194 sizes.update(&pid, &size64); 195 196 if (SHOULD_PRINT) 197 bpf_trace_printk("alloc entered, size = %u\\n", size); 198 return 0; 199} 200 201static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) { 202 u64 pid = bpf_get_current_pid_tgid(); 203 u64* size64 = sizes.lookup(&pid); 204 struct alloc_info_t info = {0}; 205 206 if (size64 == 0) 207 return 0; // missed alloc entry 208 209 info.size = *size64; 210 sizes.delete(&pid); 211 212 if (address != 0) { 213 info.timestamp_ns = bpf_ktime_get_ns(); 214 info.stack_id = stack_traces.get_stackid(ctx, STACK_FLAGS); 215 allocs.update(&address, &info); 216 update_statistics_add(info.stack_id, info.size); 217 } 218 219 if (SHOULD_PRINT) { 220 bpf_trace_printk("alloc exited, size = %lu, result = %lx\\n", 221 info.size, address); 222 } 223 return 0; 224} 225 226static inline int gen_alloc_exit(struct pt_regs *ctx) { 227 return gen_alloc_exit2(ctx, PT_REGS_RC(ctx)); 228} 229 230static inline int gen_free_enter(struct pt_regs *ctx, void *address) { 231 u64 addr = (u64)address; 232 struct alloc_info_t *info = allocs.lookup(&addr); 233 if (info == 0) 234 return 0; 235 236 allocs.delete(&addr); 237 update_statistics_del(info->stack_id, info->size); 238 239 if (SHOULD_PRINT) { 240 bpf_trace_printk("free entered, address = %lx, size = %lu\\n", 241 address, info->size); 242 } 243 return 0; 244} 245 246int malloc_enter(struct pt_regs *ctx, size_t size) { 247 return gen_alloc_enter(ctx, size); 248} 249 250int malloc_exit(struct pt_regs *ctx) { 251 return gen_alloc_exit(ctx); 252} 253 254int free_enter(struct pt_regs *ctx, void *address) { 255 return gen_free_enter(ctx, address); 256} 257 258int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) { 259 return gen_alloc_enter(ctx, nmemb * size); 260} 261 262int calloc_exit(struct pt_regs *ctx) { 263 return gen_alloc_exit(ctx); 264} 265 266int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) { 267 gen_free_enter(ctx, ptr); 268 return gen_alloc_enter(ctx, size); 269} 270 271int realloc_exit(struct pt_regs *ctx) { 272 return gen_alloc_exit(ctx); 273} 274 275int mmap_enter(struct pt_regs *ctx) { 276 size_t size = (size_t)PT_REGS_PARM2(ctx); 277 return gen_alloc_enter(ctx, size); 278} 279 280int mmap_exit(struct pt_regs *ctx) { 281 return gen_alloc_exit(ctx); 282} 283 284int munmap_enter(struct pt_regs *ctx, void *address) { 285 return gen_free_enter(ctx, address); 286} 287 288int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment, 289 size_t size) { 290 u64 memptr64 = (u64)(size_t)memptr; 291 u64 pid = bpf_get_current_pid_tgid(); 292 293 memptrs.update(&pid, &memptr64); 294 return gen_alloc_enter(ctx, size); 295} 296 297int posix_memalign_exit(struct pt_regs *ctx) { 298 u64 pid = bpf_get_current_pid_tgid(); 299 u64 *memptr64 = memptrs.lookup(&pid); 300 void *addr; 301 302 if (memptr64 == 0) 303 return 0; 304 305 memptrs.delete(&pid); 306 307 if (bpf_probe_read_user(&addr, sizeof(void*), (void*)(size_t)*memptr64)) 308 return 0; 309 310 u64 addr64 = (u64)(size_t)addr; 311 return gen_alloc_exit2(ctx, addr64); 312} 313 314int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) { 315 return gen_alloc_enter(ctx, size); 316} 317 318int aligned_alloc_exit(struct pt_regs *ctx) { 319 return gen_alloc_exit(ctx); 320} 321 322int valloc_enter(struct pt_regs *ctx, size_t size) { 323 return gen_alloc_enter(ctx, size); 324} 325 326int valloc_exit(struct pt_regs *ctx) { 327 return gen_alloc_exit(ctx); 328} 329 330int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) { 331 return gen_alloc_enter(ctx, size); 332} 333 334int memalign_exit(struct pt_regs *ctx) { 335 return gen_alloc_exit(ctx); 336} 337 338int pvalloc_enter(struct pt_regs *ctx, size_t size) { 339 return gen_alloc_enter(ctx, size); 340} 341 342int pvalloc_exit(struct pt_regs *ctx) { 343 return gen_alloc_exit(ctx); 344} 345""" 346 347bpf_source_kernel_node = """ 348 349TRACEPOINT_PROBE(kmem, kmalloc_node) { 350 if (WORKAROUND_MISSING_FREE) 351 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 352 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 353 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 354} 355 356TRACEPOINT_PROBE(kmem, kmem_cache_alloc_node) { 357 if (WORKAROUND_MISSING_FREE) 358 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 359 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 360 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 361} 362""" 363 364bpf_source_kernel = """ 365 366TRACEPOINT_PROBE(kmem, kmalloc) { 367 if (WORKAROUND_MISSING_FREE) 368 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 369 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 370 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 371} 372 373TRACEPOINT_PROBE(kmem, kfree) { 374 return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 375} 376 377TRACEPOINT_PROBE(kmem, kmem_cache_alloc) { 378 if (WORKAROUND_MISSING_FREE) 379 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 380 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 381 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 382} 383 384TRACEPOINT_PROBE(kmem, kmem_cache_free) { 385 return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 386} 387 388TRACEPOINT_PROBE(kmem, mm_page_alloc) { 389 gen_alloc_enter((struct pt_regs *)args, PAGE_SIZE << args->order); 390 return gen_alloc_exit2((struct pt_regs *)args, args->pfn); 391} 392 393TRACEPOINT_PROBE(kmem, mm_page_free) { 394 return gen_free_enter((struct pt_regs *)args, (void *)args->pfn); 395} 396""" 397 398bpf_source_percpu = """ 399 400TRACEPOINT_PROBE(percpu, percpu_alloc_percpu) { 401 gen_alloc_enter((struct pt_regs *)args, args->size); 402 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 403} 404 405TRACEPOINT_PROBE(percpu, percpu_free_percpu) { 406 return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 407} 408""" 409 410if kernel_trace: 411 if args.percpu: 412 bpf_source += bpf_source_percpu 413 else: 414 bpf_source += bpf_source_kernel 415 if BPF.tracepoint_exists("kmem", "kmalloc_node"): 416 bpf_source += bpf_source_kernel_node 417 418if kernel_trace: 419 bpf_source = bpf_source.replace("WORKAROUND_MISSING_FREE", "1" 420 if args.wa_missing_free else "0") 421 422bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") 423bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n)) 424bpf_source = bpf_source.replace("PAGE_SIZE", str(resource.getpagesize())) 425 426size_filter = "" 427if min_size is not None and max_size is not None: 428 size_filter = "if (size < %d || size > %d) return 0;" % \ 429 (min_size, max_size) 430elif min_size is not None: 431 size_filter = "if (size < %d) return 0;" % min_size 432elif max_size is not None: 433 size_filter = "if (size > %d) return 0;" % max_size 434bpf_source = bpf_source.replace("SIZE_FILTER", size_filter) 435 436stack_flags = "0" 437if not kernel_trace: 438 stack_flags += "|BPF_F_USER_STACK" 439bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags) 440 441if args.ebpf: 442 print(bpf_source) 443 exit() 444 445bpf = BPF(text=bpf_source) 446 447if not kernel_trace: 448 print("Attaching to pid %d, Ctrl+C to quit." % pid) 449 450 def attach_probes(sym, fn_prefix=None, can_fail=False): 451 if fn_prefix is None: 452 fn_prefix = sym 453 454 try: 455 bpf.attach_uprobe(name=obj, sym=sym, 456 fn_name=fn_prefix + "_enter", 457 pid=pid) 458 bpf.attach_uretprobe(name=obj, sym=sym, 459 fn_name=fn_prefix + "_exit", 460 pid=pid) 461 except Exception: 462 if can_fail: 463 return 464 else: 465 raise 466 467 attach_probes("malloc") 468 attach_probes("calloc") 469 attach_probes("realloc") 470 attach_probes("mmap") 471 attach_probes("posix_memalign") 472 attach_probes("valloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory 473 attach_probes("memalign") 474 attach_probes("pvalloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory 475 attach_probes("aligned_alloc", can_fail=True) # added in C11 476 bpf.attach_uprobe(name=obj, sym="free", fn_name="free_enter", 477 pid=pid) 478 bpf.attach_uprobe(name=obj, sym="munmap", fn_name="munmap_enter", 479 pid=pid) 480 481else: 482 print("Attaching to kernel allocators, Ctrl+C to quit.") 483 484 # No probe attaching here. Allocations are counted by attaching to 485 # tracepoints. 486 # 487 # Memory allocations in Linux kernel are not limited to malloc/free 488 # equivalents. It's also common to allocate a memory page or multiple 489 # pages. Page allocator have two interfaces, one working with page 490 # frame numbers (PFN), while other working with page addresses. It's 491 # possible to allocate pages with one kind of functions, and free them 492 # with another. Code in kernel can easy convert PFNs to addresses and 493 # back, but it's hard to do the same in eBPF kprobe without fragile 494 # hacks. 495 # 496 # Fortunately, Linux exposes tracepoints for memory allocations, which 497 # can be instrumented by eBPF programs. Tracepoint for page allocations 498 # gives access to PFNs for both allocator interfaces. So there is no 499 # need to guess which allocation corresponds to which free. 500 501def print_outstanding(): 502 print("[%s] Top %d stacks with outstanding allocations:" % 503 (datetime.now().strftime("%H:%M:%S"), top_stacks)) 504 alloc_info = {} 505 allocs = bpf["allocs"] 506 stack_traces = bpf["stack_traces"] 507 for address, info in sorted(allocs.items(), key=lambda a: a[1].size): 508 if BPF.monotonic_time() - min_age_ns < info.timestamp_ns: 509 continue 510 if info.stack_id < 0: 511 continue 512 if info.stack_id in alloc_info: 513 alloc_info[info.stack_id].update(info.size) 514 else: 515 stack = list(stack_traces.walk(info.stack_id)) 516 combined = [] 517 for addr in stack: 518 combined.append(('0x'+format(addr, '016x')+'\t').encode('utf-8') + bpf.sym(addr, pid, 519 show_module=True, show_offset=True)) 520 alloc_info[info.stack_id] = Allocation(combined, 521 info.size) 522 if args.show_allocs: 523 print("\taddr = %x size = %s" % 524 (address.value, info.size)) 525 to_show = sorted(alloc_info.values(), 526 key=lambda a: a.size)[-top_stacks:] 527 for alloc in to_show: 528 print("\t%d bytes in %d allocations from stack\n\t\t%s" % 529 (alloc.size, alloc.count, 530 b"\n\t\t".join(alloc.stack).decode("ascii"))) 531 532def print_outstanding_combined(): 533 stack_traces = bpf["stack_traces"] 534 stacks = sorted(bpf["combined_allocs"].items(), 535 key=lambda a: -a[1].total_size) 536 cnt = 1 537 entries = [] 538 for stack_id, info in stacks: 539 try: 540 trace = [] 541 for addr in stack_traces.walk(stack_id.value): 542 sym = bpf.sym(addr, pid, 543 show_module=True, 544 show_offset=True) 545 trace.append(sym) 546 trace = "\n\t\t".join(trace.decode()) 547 except KeyError: 548 trace = "stack information lost" 549 550 entry = ("\t%d bytes in %d allocations from stack\n\t\t%s" % 551 (info.total_size, info.number_of_allocs, trace)) 552 entries.append(entry) 553 554 cnt += 1 555 if cnt > top_stacks: 556 break 557 558 print("[%s] Top %d stacks with outstanding allocations:" % 559 (datetime.now().strftime("%H:%M:%S"), top_stacks)) 560 561 print('\n'.join(reversed(entries))) 562 563count_so_far = 0 564while True: 565 if trace_all: 566 print(bpf.trace_fields()) 567 else: 568 try: 569 sleep(interval) 570 except KeyboardInterrupt: 571 exit() 572 if args.combined_only: 573 print_outstanding_combined() 574 else: 575 print_outstanding() 576 sys.stdout.flush() 577 count_so_far += 1 578 if num_prints is not None and count_so_far >= num_prints: 579 exit() 580