1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# cachetop Count cache kernel function calls per processes 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: cachetop 8# Taken from cachestat by Brendan Gregg 9# 10# Copyright (c) 2016-present, Facebook, Inc. 11# Licensed under the Apache License, Version 2.0 (the "License") 12# 13# 13-Jul-2016 Emmanuel Bretelle first version 14# 17-Mar-2022 Rocky Xing Added PID filter support. 15# 15-Feb-2023 Rong Tao Add writeback_dirty_{folio,page} tracepoints 16 17from __future__ import absolute_import 18from __future__ import division 19# Do not import unicode_literals until #623 is fixed 20# from __future__ import unicode_literals 21from __future__ import print_function 22 23from bcc import BPF 24from collections import defaultdict 25from time import strftime 26 27import argparse 28import curses 29import pwd 30import re 31import signal 32from time import sleep 33 34FIELDS = ( 35 "PID", 36 "UID", 37 "CMD", 38 "HITS", 39 "MISSES", 40 "DIRTIES", 41 "READ_HIT%", 42 "WRITE_HIT%" 43) 44DEFAULT_FIELD = "HITS" 45DEFAULT_SORT_FIELD = FIELDS.index(DEFAULT_FIELD) 46 47# signal handler 48def signal_ignore(signal, frame): 49 print() 50 51 52# Function to gather data from /proc/meminfo 53# return dictionary for quicker lookup of both values 54def get_meminfo(): 55 result = {} 56 57 for line in open('/proc/meminfo'): 58 k = line.split(':', 3) 59 v = k[1].split() 60 result[k[0]] = int(v[0]) 61 return result 62 63 64def get_processes_stats( 65 bpf, 66 sort_field=DEFAULT_SORT_FIELD, 67 sort_reverse=False): 68 ''' 69 Return a tuple containing: 70 buffer 71 cached 72 list of tuple with per process cache stats 73 ''' 74 counts = bpf.get_table("counts") 75 stats = defaultdict(lambda: defaultdict(int)) 76 for k, v in counts.items(): 77 stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.nf] = v.value 78 stats_list = [] 79 80 for pid, count in sorted(stats.items(), key=lambda stat: stat[0]): 81 rtaccess = 0 82 wtaccess = 0 83 mpa = 0 84 mbd = 0 85 apcl = 0 86 apd = 0 87 access = 0 88 misses = 0 89 rhits = 0 90 whits = 0 91 92 for k, v in count.items(): 93 if k == 0: # NF_APCL 94 apcl = max(0, v) 95 96 if k == 1: # NF_MPA 97 mpa = max(0, v) 98 99 if k == 2: # NF_MBD 100 mbd = max(0, v) 101 102 if k == 3: # NF_APD 103 apd = max(0, v) 104 105 # access = total cache access incl. reads(mpa) and writes(mbd) 106 # misses = total of add to lru which we do when we write(mbd) 107 # and also the mark the page dirty(same as mbd) 108 access = (mpa + mbd) 109 misses = (apcl + apd) 110 111 # rtaccess is the read hit % during the sample period. 112 # wtaccess is the write hit % during the sample period. 113 if mpa > 0: 114 rtaccess = float(mpa) / (access + misses) 115 if apcl > 0: 116 wtaccess = float(apcl) / (access + misses) 117 118 if wtaccess != 0: 119 whits = 100 * wtaccess 120 if rtaccess != 0: 121 rhits = 100 * rtaccess 122 123 _pid, uid, comm = pid.split('-', 2) 124 stats_list.append( 125 (int(_pid), uid, comm, 126 access, misses, mbd, 127 rhits, whits)) 128 129 stats_list = sorted( 130 stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse 131 ) 132 counts.clear() 133 return stats_list 134 135 136def handle_loop(stdscr, args): 137 # don't wait on key press 138 stdscr.nodelay(1) 139 # set default sorting field 140 sort_field = FIELDS.index(DEFAULT_FIELD) 141 sort_reverse = True 142 143 # load BPF program 144 bpf_text = """ 145 146 #include <uapi/linux/ptrace.h> 147 struct key_t { 148 // NF_{APCL,MPA,MBD,APD} 149 u64 nf; 150 u32 pid; 151 u32 uid; 152 char comm[16]; 153 }; 154 enum { 155 NF_APCL, 156 NF_MPA, 157 NF_MBD, 158 NF_APD, 159 }; 160 161 BPF_HASH(counts, struct key_t); 162 163 static int __do_count(void *ctx, u64 nf) { 164 u32 pid = bpf_get_current_pid_tgid() >> 32; 165 if (FILTER_PID) 166 return 0; 167 168 struct key_t key = {}; 169 u32 uid = bpf_get_current_uid_gid(); 170 171 key.nf = nf; 172 key.pid = pid; 173 key.uid = uid; 174 bpf_get_current_comm(&(key.comm), 16); 175 176 counts.increment(key); 177 return 0; 178 } 179 int do_count_apcl(struct pt_regs *ctx) { 180 return __do_count(ctx, NF_APCL); 181 } 182 int do_count_mpa(struct pt_regs *ctx) { 183 return __do_count(ctx, NF_MPA); 184 } 185 int do_count_mbd(struct pt_regs *ctx) { 186 return __do_count(ctx, NF_MBD); 187 } 188 int do_count_apd(struct pt_regs *ctx) { 189 return __do_count(ctx, NF_APD); 190 } 191 int do_count_apd_tp(void *ctx) { 192 return __do_count(ctx, NF_APD); 193 } 194 195 """ 196 197 if args.pid: 198 bpf_text = bpf_text.replace('FILTER_PID', 'pid != %d' % args.pid) 199 else: 200 bpf_text = bpf_text.replace('FILTER_PID', '0') 201 202 b = BPF(text=bpf_text) 203 b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count_apcl") 204 b.attach_kprobe(event="mark_page_accessed", fn_name="do_count_mpa") 205 b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count_mbd") 206 207 # Function account_page_dirtied() is changed to folio_account_dirtied() in 5.15. 208 # Introduce tracepoint writeback_dirty_{page,folio} 209 if BPF.get_kprobe_functions(b'folio_account_dirtied'): 210 b.attach_kprobe(event="folio_account_dirtied", fn_name="do_count_apd") 211 elif BPF.get_kprobe_functions(b'account_page_dirtied'): 212 b.attach_kprobe(event="account_page_dirtied", fn_name="do_count_apd") 213 elif BPF.tracepoint_exists("writeback", "writeback_dirty_folio"): 214 b.attach_tracepoint(tp="writeback:writeback_dirty_folio", fn_name="do_count_apd_tp") 215 elif BPF.tracepoint_exists("writeback", "writeback_dirty_page"): 216 b.attach_tracepoint(tp="writeback:writeback_dirty_page", fn_name="do_count_apd_tp") 217 else: 218 raise Exception("Failed to attach kprobe %s or %s and any tracepoint" % 219 ("folio_account_dirtied", "account_page_dirtied")) 220 221 exiting = 0 222 223 while 1: 224 s = stdscr.getch() 225 if s == ord('q'): 226 exiting = 1 227 elif s == ord('r'): 228 sort_reverse = not sort_reverse 229 elif s == ord('<'): 230 sort_field = max(0, sort_field - 1) 231 elif s == ord('>'): 232 sort_field = min(len(FIELDS) - 1, sort_field + 1) 233 try: 234 sleep(args.interval) 235 except KeyboardInterrupt: 236 exiting = 1 237 # as cleanup can take many seconds, trap Ctrl-C: 238 signal.signal(signal.SIGINT, signal_ignore) 239 240 # Get memory info 241 mem = get_meminfo() 242 cached = int(mem["Cached"]) / 1024 243 buff = int(mem["Buffers"]) / 1024 244 245 process_stats = get_processes_stats( 246 b, 247 sort_field=sort_field, 248 sort_reverse=sort_reverse) 249 stdscr.clear() 250 stdscr.addstr( 251 0, 0, 252 "%-8s Buffers MB: %.0f / Cached MB: %.0f " 253 "/ Sort: %s / Order: %s" % ( 254 strftime("%H:%M:%S"), buff, cached, FIELDS[sort_field], 255 sort_reverse and "descending" or "ascending" 256 ) 257 ) 258 259 # header 260 stdscr.addstr( 261 1, 0, 262 "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format( 263 *FIELDS 264 ), 265 curses.A_REVERSE 266 ) 267 (height, width) = stdscr.getmaxyx() 268 for i, stat in enumerate(process_stats): 269 uid = int(stat[1]) 270 try: 271 username = pwd.getpwuid(uid)[0] 272 except KeyError: 273 # `pwd` throws a KeyError if the user cannot be found. This can 274 # happen e.g. when the process is running in a cgroup that has 275 # different users from the host. 276 username = 'UNKNOWN({})'.format(uid) 277 278 stdscr.addstr( 279 i + 2, 0, 280 "{0:8} {username:8.8} {2:16} {3:8} {4:8} " 281 "{5:8} {6:9.1f}% {7:9.1f}%".format( 282 *stat, username=username 283 ) 284 ) 285 if i > height - 4: 286 break 287 stdscr.refresh() 288 if exiting: 289 print("Detaching...") 290 return 291 292 293def parse_arguments(): 294 parser = argparse.ArgumentParser( 295 description='Show Linux page cache hit/miss statistics including read ' 296 'and write hit % per processes in a UI like top.' 297 ) 298 parser.add_argument("-p", "--pid", type=int, metavar="PID", 299 help="trace this PID only") 300 parser.add_argument( 301 'interval', type=int, default=5, nargs='?', 302 help='Interval between probes.' 303 ) 304 305 args = parser.parse_args() 306 return args 307 308args = parse_arguments() 309curses.wrapper(handle_loop, args) 310