1 // SPDX-License-Identifier: GPL-2.0
2 #include <assert.h>
3 #include <errno.h>
4 #include <error.h>
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <ynl.h>
13
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16
17 #include <sys/epoll.h>
18 #include <sys/ioctl.h>
19 #include <sys/socket.h>
20 #include <sys/types.h>
21
22 #include <linux/genetlink.h>
23 #include <linux/netlink.h>
24
25 #include "netdev-user.h"
26
27 /* The below ifdef blob is required because:
28 *
29 * - sys/epoll.h does not (yet) have the ioctl definitions included. So,
30 * systems with older glibcs will not have them available. However,
31 * sys/epoll.h does include the type definition for epoll_data, which is
32 * needed by the user program (e.g. epoll_event.data.fd)
33 *
34 * - linux/eventpoll.h does not define the epoll_data type, it is simply an
35 * opaque __u64. It does, however, include the ioctl definition.
36 *
37 * Including both headers is impossible (types would be redefined), so I've
38 * opted instead to take sys/epoll.h, and include the blob below.
39 *
40 * Someday, when glibc is globally up to date, the blob below can be removed.
41 */
42 #if !defined(EPOLL_IOC_TYPE)
43 struct epoll_params {
44 uint32_t busy_poll_usecs;
45 uint16_t busy_poll_budget;
46 uint8_t prefer_busy_poll;
47
48 /* pad the struct to a multiple of 64bits */
49 uint8_t __pad;
50 };
51
52 #define EPOLL_IOC_TYPE 0x8A
53 #define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
54 #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
55 #endif
56
57 static uint16_t cfg_port = 8000;
58 static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
59 static char *cfg_outfile;
60 static int cfg_max_events = 8;
61 static uint32_t cfg_ifindex;
62
63 /* busy poll params */
64 static uint32_t cfg_busy_poll_usecs;
65 static uint16_t cfg_busy_poll_budget;
66 static uint8_t cfg_prefer_busy_poll;
67
68 /* IRQ params */
69 static uint32_t cfg_defer_hard_irqs;
70 static uint64_t cfg_gro_flush_timeout;
71 static uint64_t cfg_irq_suspend_timeout;
72
usage(const char * filepath)73 static void usage(const char *filepath)
74 {
75 error(1, 0,
76 "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
77 filepath);
78 }
79
parse_opts(int argc,char ** argv)80 static void parse_opts(int argc, char **argv)
81 {
82 unsigned long long tmp;
83 int ret;
84 int c;
85
86 if (argc <= 1)
87 usage(argv[0]);
88
89 while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
90 /* most options take integer values, except o and b, so reduce
91 * code duplication a bit for the common case by calling
92 * strtoull here and leave bounds checking and casting per
93 * option below.
94 */
95 if (c != 'o' && c != 'b')
96 tmp = strtoull(optarg, NULL, 0);
97
98 switch (c) {
99 case 'u':
100 if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
101 error(1, ERANGE, "busy_poll_usecs too large");
102
103 cfg_busy_poll_usecs = (uint32_t)tmp;
104 break;
105 case 'P':
106 if (tmp == ULLONG_MAX || tmp > 1)
107 error(1, ERANGE,
108 "prefer busy poll should be 0 or 1");
109
110 cfg_prefer_busy_poll = (uint8_t)tmp;
111 break;
112 case 'g':
113 if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
114 error(1, ERANGE,
115 "busy poll budget must be [0, UINT16_MAX]");
116
117 cfg_busy_poll_budget = (uint16_t)tmp;
118 break;
119 case 'p':
120 if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
121 error(1, ERANGE, "port must be <= 65535");
122
123 cfg_port = (uint16_t)tmp;
124 break;
125 case 'b':
126 ret = inet_aton(optarg, &cfg_bind_addr);
127 if (ret == 0)
128 error(1, errno,
129 "bind address %s invalid", optarg);
130 break;
131 case 'o':
132 cfg_outfile = strdup(optarg);
133 if (!cfg_outfile)
134 error(1, 0, "outfile invalid");
135 break;
136 case 'm':
137 if (tmp == ULLONG_MAX || tmp > INT_MAX)
138 error(1, ERANGE,
139 "max events must be > 0 and <= INT_MAX");
140
141 cfg_max_events = (int)tmp;
142 break;
143 case 'd':
144 if (tmp == ULLONG_MAX || tmp > INT32_MAX)
145 error(1, ERANGE,
146 "defer_hard_irqs must be <= INT32_MAX");
147
148 cfg_defer_hard_irqs = (uint32_t)tmp;
149 break;
150 case 'r':
151 if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
152 error(1, ERANGE,
153 "gro_flush_timeout must be < UINT64_MAX");
154
155 cfg_gro_flush_timeout = (uint64_t)tmp;
156 break;
157 case 's':
158 if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
159 error(1, ERANGE,
160 "irq_suspend_timeout must be < ULLONG_MAX");
161
162 cfg_irq_suspend_timeout = (uint64_t)tmp;
163 break;
164 case 'i':
165 if (tmp == ULLONG_MAX || tmp > INT_MAX)
166 error(1, ERANGE,
167 "ifindex must be <= INT_MAX");
168
169 cfg_ifindex = (int)tmp;
170 break;
171 }
172 }
173
174 if (!cfg_ifindex)
175 usage(argv[0]);
176
177 if (optind != argc)
178 usage(argv[0]);
179 }
180
epoll_ctl_add(int epfd,int fd,uint32_t events)181 static void epoll_ctl_add(int epfd, int fd, uint32_t events)
182 {
183 struct epoll_event ev;
184
185 ev.events = events;
186 ev.data.fd = fd;
187 if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
188 error(1, errno, "epoll_ctl add fd: %d", fd);
189 }
190
setnonblock(int sockfd)191 static void setnonblock(int sockfd)
192 {
193 int flags;
194
195 flags = fcntl(sockfd, F_GETFL, 0);
196
197 if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
198 error(1, errno, "unable to set socket to nonblocking mode");
199 }
200
write_chunk(int fd,char * buf,ssize_t buflen)201 static void write_chunk(int fd, char *buf, ssize_t buflen)
202 {
203 ssize_t remaining = buflen;
204 char *buf_offset = buf;
205 ssize_t writelen = 0;
206 ssize_t write_result;
207
208 while (writelen < buflen) {
209 write_result = write(fd, buf_offset, remaining);
210 if (write_result == -1)
211 error(1, errno, "unable to write data to outfile");
212
213 writelen += write_result;
214 remaining -= write_result;
215 buf_offset += write_result;
216 }
217 }
218
setup_queue(void)219 static void setup_queue(void)
220 {
221 struct netdev_napi_get_list *napi_list = NULL;
222 struct netdev_napi_get_req_dump *req = NULL;
223 struct netdev_napi_set_req *set_req = NULL;
224 struct ynl_sock *ys;
225 struct ynl_error yerr;
226 uint32_t napi_id = 0;
227
228 ys = ynl_sock_create(&ynl_netdev_family, &yerr);
229 if (!ys)
230 error(1, 0, "YNL: %s", yerr.msg);
231
232 req = netdev_napi_get_req_dump_alloc();
233 netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex);
234 napi_list = netdev_napi_get_dump(ys, req);
235
236 /* assume there is 1 NAPI configured and take the first */
237 if (napi_list->obj._present.id)
238 napi_id = napi_list->obj.id;
239 else
240 error(1, 0, "napi ID not present?");
241
242 set_req = netdev_napi_set_req_alloc();
243 netdev_napi_set_req_set_id(set_req, napi_id);
244 netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs);
245 netdev_napi_set_req_set_gro_flush_timeout(set_req,
246 cfg_gro_flush_timeout);
247 netdev_napi_set_req_set_irq_suspend_timeout(set_req,
248 cfg_irq_suspend_timeout);
249
250 if (netdev_napi_set(ys, set_req))
251 error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
252
253 netdev_napi_get_list_free(napi_list);
254 netdev_napi_get_req_dump_free(req);
255 netdev_napi_set_req_free(set_req);
256 ynl_sock_destroy(ys);
257 }
258
run_poller(void)259 static void run_poller(void)
260 {
261 struct epoll_event events[cfg_max_events];
262 struct epoll_params epoll_params = {0};
263 struct sockaddr_in server_addr;
264 int i, epfd, nfds;
265 ssize_t readlen;
266 int outfile_fd;
267 char buf[1024];
268 int sockfd;
269 int conn;
270 int val;
271
272 outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
273 if (outfile_fd == -1)
274 error(1, errno, "unable to open outfile: %s", cfg_outfile);
275
276 sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
277 if (sockfd == -1)
278 error(1, errno, "unable to create listen socket");
279
280 server_addr.sin_family = AF_INET;
281 server_addr.sin_port = htons(cfg_port);
282 server_addr.sin_addr = cfg_bind_addr;
283
284 /* these values are range checked during parse_opts, so casting is safe
285 * here
286 */
287 epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
288 epoll_params.busy_poll_budget = cfg_busy_poll_budget;
289 epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
290 epoll_params.__pad = 0;
291
292 val = 1;
293 if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
294 error(1, errno, "poller setsockopt reuseaddr");
295
296 setnonblock(sockfd);
297
298 if (bind(sockfd, (struct sockaddr *)&server_addr,
299 sizeof(struct sockaddr_in)))
300 error(0, errno, "poller bind to port: %d\n", cfg_port);
301
302 if (listen(sockfd, 1))
303 error(1, errno, "poller listen");
304
305 epfd = epoll_create1(0);
306 if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
307 error(1, errno, "unable to set busy poll params");
308
309 epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);
310
311 for (;;) {
312 nfds = epoll_wait(epfd, events, cfg_max_events, -1);
313 for (i = 0; i < nfds; i++) {
314 if (events[i].data.fd == sockfd) {
315 conn = accept(sockfd, NULL, NULL);
316 if (conn == -1)
317 error(1, errno,
318 "accepting incoming connection failed");
319
320 setnonblock(conn);
321 epoll_ctl_add(epfd, conn,
322 EPOLLIN | EPOLLET | EPOLLRDHUP |
323 EPOLLHUP);
324 } else if (events[i].events & EPOLLIN) {
325 for (;;) {
326 readlen = read(events[i].data.fd, buf,
327 sizeof(buf));
328 if (readlen > 0)
329 write_chunk(outfile_fd, buf,
330 readlen);
331 else
332 break;
333 }
334 } else {
335 /* spurious event ? */
336 }
337 if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
338 epoll_ctl(epfd, EPOLL_CTL_DEL,
339 events[i].data.fd, NULL);
340 close(events[i].data.fd);
341 close(outfile_fd);
342 return;
343 }
344 }
345 }
346 }
347
main(int argc,char * argv[])348 int main(int argc, char *argv[])
349 {
350 parse_opts(argc, argv);
351 setup_queue();
352 run_poller();
353
354 if (cfg_outfile)
355 free(cfg_outfile);
356
357 return 0;
358 }
359