1 // SPDX-License-Identifier: GPL-2.0
2 #include <test_progs.h>
3 #include <network_helpers.h>
4 #include "xdp_metadata.skel.h"
5 #include "xdp_metadata2.skel.h"
6 #include "xdp_metadata.h"
7 #include "xsk.h"
8
9 #include <bpf/btf.h>
10 #include <linux/errqueue.h>
11 #include <linux/if_link.h>
12 #include <linux/net_tstamp.h>
13 #include <netinet/udp.h>
14 #include <sys/mman.h>
15 #include <net/if.h>
16 #include <poll.h>
17
18 #define TX_NAME "veTX"
19 #define RX_NAME "veRX"
20
21 #define UDP_PAYLOAD_BYTES 4
22
23 #define UDP_SOURCE_PORT 1234
24 #define AF_XDP_CONSUMER_PORT 8080
25
26 #define UMEM_NUM 16
27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE
30 #define QUEUE_ID 0
31
32 #define TX_ADDR "10.0.0.1"
33 #define RX_ADDR "10.0.0.2"
34 #define PREFIX_LEN "8"
35 #define FAMILY AF_INET
36 #define TX_NETNS_NAME "xdp_metadata_tx"
37 #define RX_NETNS_NAME "xdp_metadata_rx"
38 #define TX_MAC "00:00:00:00:00:01"
39 #define RX_MAC "00:00:00:00:00:02"
40
41 #define VLAN_ID 59
42 #define VLAN_PROTO "802.1Q"
43 #define VLAN_PID htons(ETH_P_8021Q)
44 #define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID)
45
46 #define XDP_RSS_TYPE_L4 BIT(3)
47 #define VLAN_VID_MASK 0xfff
48
49 struct xsk {
50 void *umem_area;
51 struct xsk_umem *umem;
52 struct xsk_ring_prod fill;
53 struct xsk_ring_cons comp;
54 struct xsk_ring_prod tx;
55 struct xsk_ring_cons rx;
56 struct xsk_socket *socket;
57 };
58
open_xsk(int ifindex,struct xsk * xsk)59 static int open_xsk(int ifindex, struct xsk *xsk)
60 {
61 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
62 const struct xsk_socket_config socket_config = {
63 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
64 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
65 .bind_flags = XDP_COPY,
66 };
67 const struct xsk_umem_config umem_config = {
68 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
69 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
70 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
71 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM |
72 XDP_UMEM_TX_METADATA_LEN,
73 .tx_metadata_len = sizeof(struct xsk_tx_metadata),
74 };
75 __u32 idx;
76 u64 addr;
77 int ret;
78 int i;
79
80 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
81 if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
82 return -1;
83
84 ret = xsk_umem__create(&xsk->umem,
85 xsk->umem_area, UMEM_SIZE,
86 &xsk->fill,
87 &xsk->comp,
88 &umem_config);
89 if (!ASSERT_OK(ret, "xsk_umem__create"))
90 return ret;
91
92 ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
93 xsk->umem,
94 &xsk->rx,
95 &xsk->tx,
96 &socket_config);
97 if (!ASSERT_OK(ret, "xsk_socket__create"))
98 return ret;
99
100 /* First half of umem is for TX. This way address matches 1-to-1
101 * to the completion queue index.
102 */
103
104 for (i = 0; i < UMEM_NUM / 2; i++) {
105 addr = i * UMEM_FRAME_SIZE;
106 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
107 }
108
109 /* Second half of umem is for RX. */
110
111 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
112 if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
113 return ret;
114 if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
115 return -1;
116
117 for (i = 0; i < UMEM_NUM / 2; i++) {
118 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
119 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
120 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
121 }
122 xsk_ring_prod__submit(&xsk->fill, ret);
123
124 return 0;
125 }
126
close_xsk(struct xsk * xsk)127 static void close_xsk(struct xsk *xsk)
128 {
129 if (xsk->umem)
130 xsk_umem__delete(xsk->umem);
131 if (xsk->socket)
132 xsk_socket__delete(xsk->socket);
133 munmap(xsk->umem_area, UMEM_SIZE);
134 }
135
generate_packet(struct xsk * xsk,__u16 dst_port)136 static int generate_packet(struct xsk *xsk, __u16 dst_port)
137 {
138 struct xsk_tx_metadata *meta;
139 struct xdp_desc *tx_desc;
140 struct udphdr *udph;
141 struct ethhdr *eth;
142 struct iphdr *iph;
143 void *data;
144 __u32 idx;
145 int ret;
146
147 ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
148 if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
149 return -1;
150
151 tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
152 tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
153 printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
154 data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
155
156 meta = data - sizeof(struct xsk_tx_metadata);
157 memset(meta, 0, sizeof(*meta));
158 meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
159
160 eth = data;
161 iph = (void *)(eth + 1);
162 udph = (void *)(iph + 1);
163
164 memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
165 memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
166 eth->h_proto = htons(ETH_P_IP);
167
168 iph->version = 0x4;
169 iph->ihl = 0x5;
170 iph->tos = 0x9;
171 iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
172 iph->id = 0;
173 iph->frag_off = 0;
174 iph->ttl = 0;
175 iph->protocol = IPPROTO_UDP;
176 ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
177 ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
178 iph->check = build_ip_csum(iph);
179
180 udph->source = htons(UDP_SOURCE_PORT);
181 udph->dest = htons(dst_port);
182 udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
183 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
184 ntohs(udph->len), IPPROTO_UDP, 0);
185
186 memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
187
188 meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
189 meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
190 meta->request.csum_offset = offsetof(struct udphdr, check);
191
192 tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
193 tx_desc->options |= XDP_TX_METADATA;
194 xsk_ring_prod__submit(&xsk->tx, 1);
195
196 ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
197 if (!ASSERT_GE(ret, 0, "sendto"))
198 return ret;
199
200 return 0;
201 }
202
generate_packet_inet(void)203 static int generate_packet_inet(void)
204 {
205 char udp_payload[UDP_PAYLOAD_BYTES];
206 struct sockaddr_in rx_addr;
207 int sock_fd, err = 0;
208
209 /* Build a packet */
210 memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES);
211 rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR);
212 rx_addr.sin_family = AF_INET;
213 rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT);
214
215 sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
216 if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)"))
217 return sock_fd;
218
219 err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT,
220 (void *)&rx_addr, sizeof(rx_addr));
221 ASSERT_GE(err, 0, "sendto");
222
223 close(sock_fd);
224 return err;
225 }
226
complete_tx(struct xsk * xsk)227 static void complete_tx(struct xsk *xsk)
228 {
229 struct xsk_tx_metadata *meta;
230 __u64 addr;
231 void *data;
232 __u32 idx;
233
234 if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
235 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
236
237 printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
238
239 data = xsk_umem__get_data(xsk->umem_area, addr);
240 meta = data - sizeof(struct xsk_tx_metadata);
241
242 ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp");
243
244 xsk_ring_cons__release(&xsk->comp, 1);
245 }
246 }
247
refill_rx(struct xsk * xsk,__u64 addr)248 static void refill_rx(struct xsk *xsk, __u64 addr)
249 {
250 __u32 idx;
251
252 if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
253 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
254 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
255 xsk_ring_prod__submit(&xsk->fill, 1);
256 }
257 }
258
verify_xsk_metadata(struct xsk * xsk,bool sent_from_af_xdp)259 static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp)
260 {
261 const struct xdp_desc *rx_desc;
262 struct pollfd fds = {};
263 struct xdp_meta *meta;
264 struct udphdr *udph;
265 struct ethhdr *eth;
266 struct iphdr *iph;
267 __u64 comp_addr;
268 void *data;
269 __u64 addr;
270 __u32 idx = 0;
271 int ret;
272
273 ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
274 if (!ASSERT_EQ(ret, 0, "recvfrom"))
275 return -1;
276
277 fds.fd = xsk_socket__fd(xsk->socket);
278 fds.events = POLLIN;
279
280 ret = poll(&fds, 1, 1000);
281 if (!ASSERT_GT(ret, 0, "poll"))
282 return -1;
283
284 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
285 if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
286 return -2;
287
288 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
289 comp_addr = xsk_umem__extract_addr(rx_desc->addr);
290 addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
291 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
292 xsk, idx, rx_desc->addr, addr, comp_addr);
293 data = xsk_umem__get_data(xsk->umem_area, addr);
294
295 /* Make sure we got the packet offset correctly. */
296
297 eth = data;
298 ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
299 iph = (void *)(eth + 1);
300 ASSERT_EQ((int)iph->version, 4, "iph->version");
301 udph = (void *)(iph + 1);
302
303 /* custom metadata */
304
305 meta = data - sizeof(struct xdp_meta);
306
307 if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
308 return -1;
309
310 if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
311 return -1;
312
313 if (!sent_from_af_xdp) {
314 if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type"))
315 return -1;
316
317 if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci"))
318 return -1;
319
320 if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto"))
321 return -1;
322 goto done;
323 }
324
325 ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
326
327 /* checksum offload */
328 ASSERT_EQ(udph->check, htons(0x721c), "csum");
329
330 done:
331 xsk_ring_cons__release(&xsk->rx, 1);
332 refill_rx(xsk, comp_addr);
333
334 return 0;
335 }
336
switch_ns_to_rx(struct nstoken ** tok)337 static void switch_ns_to_rx(struct nstoken **tok)
338 {
339 close_netns(*tok);
340 *tok = open_netns(RX_NETNS_NAME);
341 }
342
switch_ns_to_tx(struct nstoken ** tok)343 static void switch_ns_to_tx(struct nstoken **tok)
344 {
345 close_netns(*tok);
346 *tok = open_netns(TX_NETNS_NAME);
347 }
348
test_xdp_metadata(void)349 void test_xdp_metadata(void)
350 {
351 struct xdp_metadata2 *bpf_obj2 = NULL;
352 struct xdp_metadata *bpf_obj = NULL;
353 struct bpf_program *new_prog, *prog;
354 struct nstoken *tok = NULL;
355 __u32 queue_id = QUEUE_ID;
356 struct bpf_map *prog_arr;
357 struct xsk tx_xsk = {};
358 struct xsk rx_xsk = {};
359 __u32 val, key = 0;
360 int retries = 10;
361 int rx_ifindex;
362 int tx_ifindex;
363 int sock_fd;
364 int ret;
365
366 /* Setup new networking namespaces, with a veth pair. */
367 SYS(out, "ip netns add " TX_NETNS_NAME);
368 SYS(out, "ip netns add " RX_NETNS_NAME);
369
370 tok = open_netns(TX_NETNS_NAME);
371 if (!ASSERT_OK_PTR(tok, "setns"))
372 goto out;
373 SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
374 " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
375 SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
376
377 SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
378 SYS(out, "ip link set dev " TX_NAME " up");
379
380 SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN
381 " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID));
382 SYS(out, "ip link set dev " TX_NAME_VLAN " up");
383 SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN);
384
385 /* Avoid ARP calls */
386 SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
387
388 switch_ns_to_rx(&tok);
389 if (!ASSERT_OK_PTR(tok, "setns rx"))
390 goto out;
391
392 SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
393 SYS(out, "ip link set dev " RX_NAME " up");
394 SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
395
396 rx_ifindex = if_nametoindex(RX_NAME);
397
398 /* Setup separate AF_XDP for RX interface. */
399
400 ret = open_xsk(rx_ifindex, &rx_xsk);
401 if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
402 goto out;
403
404 bpf_obj = xdp_metadata__open();
405 if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
406 goto out;
407
408 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
409 bpf_program__set_ifindex(prog, rx_ifindex);
410 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
411
412 if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
413 goto out;
414
415 /* Make sure we can't add dev-bound programs to prog maps. */
416 prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
417 if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
418 goto out;
419
420 val = bpf_program__fd(prog);
421 if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
422 &val, sizeof(val), BPF_ANY),
423 "update prog_arr"))
424 goto out;
425
426 /* Attach BPF program to RX interface. */
427
428 ret = bpf_xdp_attach(rx_ifindex,
429 bpf_program__fd(bpf_obj->progs.rx),
430 XDP_FLAGS, NULL);
431 if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
432 goto out;
433
434 sock_fd = xsk_socket__fd(rx_xsk.socket);
435 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
436 if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
437 goto out;
438
439 switch_ns_to_tx(&tok);
440 if (!ASSERT_OK_PTR(tok, "setns tx"))
441 goto out;
442
443 /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
444 tx_ifindex = if_nametoindex(TX_NAME);
445 ret = open_xsk(tx_ifindex, &tx_xsk);
446 if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
447 goto out;
448
449 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
450 "generate AF_XDP_CONSUMER_PORT"))
451 goto out;
452
453 switch_ns_to_rx(&tok);
454 if (!ASSERT_OK_PTR(tok, "setns rx"))
455 goto out;
456
457 /* Verify packet sent from AF_XDP has proper metadata. */
458 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
459 "verify_xsk_metadata"))
460 goto out;
461
462 switch_ns_to_tx(&tok);
463 if (!ASSERT_OK_PTR(tok, "setns tx"))
464 goto out;
465 complete_tx(&tx_xsk);
466
467 /* Now check metadata of packet, generated with network stack */
468 if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet"))
469 goto out;
470
471 switch_ns_to_rx(&tok);
472 if (!ASSERT_OK_PTR(tok, "setns rx"))
473 goto out;
474
475 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
476 "verify_xsk_metadata"))
477 goto out;
478
479 /* Make sure freplace correctly picks up original bound device
480 * and doesn't crash.
481 */
482
483 bpf_obj2 = xdp_metadata2__open();
484 if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
485 goto out;
486
487 new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
488 bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
489
490 if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
491 goto out;
492
493 if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
494 goto out;
495
496 switch_ns_to_tx(&tok);
497 if (!ASSERT_OK_PTR(tok, "setns tx"))
498 goto out;
499
500 /* Send packet to trigger . */
501 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
502 "generate freplace packet"))
503 goto out;
504
505 switch_ns_to_rx(&tok);
506 if (!ASSERT_OK_PTR(tok, "setns rx"))
507 goto out;
508
509 while (!retries--) {
510 if (bpf_obj2->bss->called)
511 break;
512 usleep(10);
513 }
514 ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
515
516 out:
517 close_xsk(&rx_xsk);
518 close_xsk(&tx_xsk);
519 xdp_metadata2__destroy(bpf_obj2);
520 xdp_metadata__destroy(bpf_obj);
521 if (tok)
522 close_netns(tok);
523 SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
524 SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
525 }
526