1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2021 Wenbo Zhang
3 #include <vmlinux.h>
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_core_read.h>
6 #include <bpf/bpf_tracing.h>
7 #include <bpf/bpf_endian.h>
8 #include "tcprtt.h"
9 #include "bits.bpf.h"
10 #include "maps.bpf.h"
11
12 /* Taken from kernel include/linux/socket.h. */
13 #define AF_INET 2 /* IP version 4 */
14 #define AF_INET6 10 /* IP version 6 */
15
16
17 const volatile bool targ_laddr_hist = false;
18 const volatile bool targ_raddr_hist = false;
19 const volatile bool targ_show_ext = false;
20 const volatile __u16 targ_sport = 0;
21 const volatile __u16 targ_dport = 0;
22 const volatile __u32 targ_saddr = 0;
23 const volatile __u32 targ_daddr = 0;
24 const volatile __u8 targ_saddr_v6[IPV6_LEN] = {};
25 const volatile __u8 targ_daddr_v6[IPV6_LEN] = {};
26 const volatile bool targ_ms = false;
27
28 #define MAX_ENTRIES 10240
29
30 struct {
31 __uint(type, BPF_MAP_TYPE_HASH);
32 __uint(max_entries, MAX_ENTRIES);
33 __type(key, struct hist_key);
34 __type(value, struct hist);
35 } hists SEC(".maps");
36
37 static struct hist zero;
38
39 /*
40 * We cannot use the following:
41 * __builtin_memcmp(targ_*addr_v6, *, sizeof(targ_*addr_v6));
42 * Indeed, by using the builtin, we would discard the volatile qualifier of
43 * targ_*addr_v6, so the compiler would optimize it and replaces the call
44 * with 0.
45 * So, using the volatile qualifier ensures this function is called at runtime.
46 */
ipv6_is_not_zero(const volatile __u8 addr[IPV6_LEN])47 static bool inline ipv6_is_not_zero(const volatile __u8 addr[IPV6_LEN])
48 {
49 for (int i = 0; i < IPV6_LEN; i++)
50 if (addr[i])
51 return true;
52 return false;
53 }
54
ipv6_are_different(const volatile __u8 a[IPV6_LEN],const __u8 b[IPV6_LEN])55 static bool inline ipv6_are_different(const volatile __u8 a[IPV6_LEN], const __u8 b[IPV6_LEN])
56 {
57 for (int i = 0; i < IPV6_LEN; i++)
58 if (a[i] != b[i])
59 return true;
60 return false;
61 }
62
handle_tcp_rcv_established(struct sock * sk)63 static int handle_tcp_rcv_established(struct sock *sk)
64 {
65 const struct inet_sock *inet = (struct inet_sock *)(sk);
66 struct tcp_sock *ts;
67 struct hist *histp;
68 struct hist_key key = {};
69 u64 slot;
70 u32 srtt;
71
72 if (targ_sport && targ_sport != BPF_CORE_READ(inet, inet_sport))
73 return 0;
74 if (targ_dport && targ_dport != BPF_CORE_READ(sk, __sk_common.skc_dport))
75 return 0;
76
77 key.family = BPF_CORE_READ(sk, __sk_common.skc_family);
78 switch (key.family) {
79 case AF_INET:
80 /* If we set any of IPv6 address, we do not care about IPv4 ones. */
81 if (ipv6_is_not_zero(targ_saddr_v6) || ipv6_is_not_zero(targ_daddr_v6))
82 return 0;
83
84 if (targ_saddr && targ_saddr != BPF_CORE_READ(inet, inet_saddr))
85 return 0;
86
87 if (targ_daddr && targ_daddr != BPF_CORE_READ(sk, __sk_common.skc_daddr))
88 return 0;
89
90 break;
91 case AF_INET6:
92 /*
93 * Reciprocal of the above: if we set any of IPv4 address, we do not care
94 * about IPv6 ones.
95 */
96 if (targ_saddr || targ_daddr)
97 return 0;
98
99 if (ipv6_is_not_zero(targ_saddr_v6)
100 && ipv6_are_different(targ_saddr_v6, BPF_CORE_READ(inet, pinet6, saddr.in6_u.u6_addr8)))
101 return 0;
102
103 if (ipv6_is_not_zero(targ_daddr_v6)
104 && ipv6_are_different(targ_daddr_v6, BPF_CORE_READ(sk, __sk_common.skc_v6_daddr.in6_u.u6_addr8)))
105 return 0;
106
107 break;
108 default:
109 return 0;
110 }
111
112 if (targ_laddr_hist) {
113 if (key.family == AF_INET6)
114 bpf_probe_read_kernel(key.addr, sizeof(key.addr), BPF_CORE_READ(inet, pinet6, saddr.in6_u.u6_addr8));
115 else
116 bpf_probe_read_kernel(key.addr, sizeof(inet->inet_saddr), &inet->inet_saddr);
117 } else if (targ_raddr_hist) {
118 if (key.family == AF_INET6)
119 bpf_probe_read_kernel(&key.addr, sizeof(key.addr), BPF_CORE_READ(sk, __sk_common.skc_v6_daddr.in6_u.u6_addr8));
120 else
121 bpf_probe_read_kernel(&key.addr, sizeof(inet->sk.__sk_common.skc_daddr), &inet->sk.__sk_common.skc_daddr);
122 } else {
123 key.family = 0;
124 }
125
126 histp = bpf_map_lookup_or_try_init(&hists, &key, &zero);
127 if (!histp)
128 return 0;
129 ts = (struct tcp_sock *)(sk);
130 srtt = BPF_CORE_READ(ts, srtt_us) >> 3;
131 if (targ_ms)
132 srtt /= 1000U;
133 slot = log2l(srtt);
134 if (slot >= MAX_SLOTS)
135 slot = MAX_SLOTS - 1;
136 __sync_fetch_and_add(&histp->slots[slot], 1);
137 if (targ_show_ext) {
138 __sync_fetch_and_add(&histp->latency, srtt);
139 __sync_fetch_and_add(&histp->cnt, 1);
140 }
141 return 0;
142 }
143
144 SEC("fentry/tcp_rcv_established")
BPF_PROG(tcp_rcv,struct sock * sk)145 int BPF_PROG(tcp_rcv, struct sock *sk)
146 {
147 return handle_tcp_rcv_established(sk);
148 }
149
150 SEC("kprobe/tcp_rcv_established")
BPF_KPROBE(tcp_rcv_kprobe,struct sock * sk)151 int BPF_KPROBE(tcp_rcv_kprobe, struct sock *sk)
152 {
153 return handle_tcp_rcv_established(sk);
154 }
155
156 char LICENSE[] SEC("license") = "Dual BSD/GPL";
157