1 /* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */
2
3 /*
4 * Copyright 2000-2002 Niels Provos <[email protected]>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 #ifdef HAVE_CONFIG_H
30 #include "config.h"
31 #endif
32
33 #define _GNU_SOURCE 1
34
35 #include <sys/types.h>
36 #ifdef HAVE_SYS_TIME_H
37 #include <sys/time.h>
38 #else
39 #include <sys/_libevent_time.h>
40 #endif
41 #include <sys/queue.h>
42 #include <sys/event.h>
43 #include <signal.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <errno.h>
49 #include <assert.h>
50 #ifdef HAVE_INTTYPES_H
51 #include <inttypes.h>
52 #endif
53
54 /* Some platforms apparently define the udata field of struct kevent as
55 * intptr_t, whereas others define it as void*. There doesn't seem to be an
56 * easy way to tell them apart via autoconf, so we need to use OS macros. */
57 #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
58 #define PTR_TO_UDATA(x) ((intptr_t)(x))
59 #else
60 #define PTR_TO_UDATA(x) (x)
61 #endif
62
63 #include "event.h"
64 #include "event-internal.h"
65 #include "log.h"
66 #include "evsignal.h"
67
68 #define EVLIST_X_KQINKERNEL 0x1000
69
70 #define NEVENT 64
71
72 struct kqop {
73 struct kevent *changes;
74 int nchanges;
75 struct kevent *events;
76 struct event_list evsigevents[NSIG];
77 int nevents;
78 int kq;
79 pid_t pid;
80 };
81
82 static void *kq_init (struct event_base *);
83 static int kq_add (void *, struct event *);
84 static int kq_del (void *, struct event *);
85 static int kq_dispatch (struct event_base *, void *, struct timeval *);
86 static int kq_insert (struct kqop *, struct kevent *);
87 static void kq_dealloc (struct event_base *, void *);
88
89 const struct eventop kqops = {
90 "kqueue",
91 kq_init,
92 kq_add,
93 kq_del,
94 kq_dispatch,
95 kq_dealloc,
96 1 /* need reinit */
97 };
98
99 static void *
kq_init(struct event_base * base)100 kq_init(struct event_base *base)
101 {
102 int i, kq;
103 struct kqop *kqueueop;
104
105 /* Disable kqueue when this environment variable is set */
106 if (evutil_getenv("EVENT_NOKQUEUE"))
107 return (NULL);
108
109 if (!(kqueueop = calloc(1, sizeof(struct kqop))))
110 return (NULL);
111
112 /* Initalize the kernel queue */
113
114 if ((kq = kqueue()) == -1) {
115 event_warn("kqueue");
116 free (kqueueop);
117 return (NULL);
118 }
119
120 kqueueop->kq = kq;
121
122 kqueueop->pid = getpid();
123
124 /* Initalize fields */
125 kqueueop->changes = malloc(NEVENT * sizeof(struct kevent));
126 if (kqueueop->changes == NULL) {
127 free (kqueueop);
128 return (NULL);
129 }
130 kqueueop->events = malloc(NEVENT * sizeof(struct kevent));
131 if (kqueueop->events == NULL) {
132 free (kqueueop->changes);
133 free (kqueueop);
134 return (NULL);
135 }
136 kqueueop->nevents = NEVENT;
137
138 /* we need to keep track of multiple events per signal */
139 for (i = 0; i < NSIG; ++i) {
140 TAILQ_INIT(&kqueueop->evsigevents[i]);
141 }
142
143 return (kqueueop);
144 }
145
146 static int
kq_insert(struct kqop * kqop,struct kevent * kev)147 kq_insert(struct kqop *kqop, struct kevent *kev)
148 {
149 int nevents = kqop->nevents;
150
151 if (kqop->nchanges == nevents) {
152 struct kevent *newchange;
153 struct kevent *newresult;
154
155 nevents *= 2;
156
157 newchange = realloc(kqop->changes,
158 nevents * sizeof(struct kevent));
159 if (newchange == NULL) {
160 event_warn("%s: malloc", __func__);
161 return (-1);
162 }
163 kqop->changes = newchange;
164
165 newresult = realloc(kqop->events,
166 nevents * sizeof(struct kevent));
167
168 /*
169 * If we fail, we don't have to worry about freeing,
170 * the next realloc will pick it up.
171 */
172 if (newresult == NULL) {
173 event_warn("%s: malloc", __func__);
174 return (-1);
175 }
176 kqop->events = newresult;
177
178 kqop->nevents = nevents;
179 }
180
181 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
182
183 event_debug(("%s: fd %d %s%s",
184 __func__, (int)kev->ident,
185 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
186 kev->flags == EV_DELETE ? " (del)" : ""));
187
188 return (0);
189 }
190
191 static void
kq_sighandler(int sig)192 kq_sighandler(int sig)
193 {
194 /* Do nothing here */
195 }
196
197 static int
kq_dispatch(struct event_base * base,void * arg,struct timeval * tv)198 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
199 {
200 struct kqop *kqop = arg;
201 struct kevent *changes = kqop->changes;
202 struct kevent *events = kqop->events;
203 struct event *ev;
204 struct timespec ts, *ts_p = NULL;
205 int i, res;
206
207 if (tv != NULL) {
208 TIMEVAL_TO_TIMESPEC(tv, &ts);
209 ts_p = &ts;
210 }
211
212 res = kevent(kqop->kq, changes, kqop->nchanges,
213 events, kqop->nevents, ts_p);
214 kqop->nchanges = 0;
215 if (res == -1) {
216 if (errno != EINTR) {
217 event_warn("kevent");
218 return (-1);
219 }
220
221 return (0);
222 }
223
224 event_debug(("%s: kevent reports %d", __func__, res));
225
226 for (i = 0; i < res; i++) {
227 int which = 0;
228
229 if (events[i].flags & EV_ERROR) {
230 /*
231 * Error messages that can happen, when a delete fails.
232 * EBADF happens when the file discriptor has been
233 * closed,
234 * ENOENT when the file discriptor was closed and
235 * then reopened.
236 * EINVAL for some reasons not understood; EINVAL
237 * should not be returned ever; but FreeBSD does :-\
238 * An error is also indicated when a callback deletes
239 * an event we are still processing. In that case
240 * the data field is set to ENOENT.
241 */
242 if (events[i].data == EBADF ||
243 events[i].data == EINVAL ||
244 events[i].data == ENOENT)
245 continue;
246 errno = events[i].data;
247 return (-1);
248 }
249
250 if (events[i].filter == EVFILT_READ) {
251 which |= EV_READ;
252 } else if (events[i].filter == EVFILT_WRITE) {
253 which |= EV_WRITE;
254 } else if (events[i].filter == EVFILT_SIGNAL) {
255 which |= EV_SIGNAL;
256 }
257
258 if (!which)
259 continue;
260
261 if (events[i].filter == EVFILT_SIGNAL) {
262 struct event_list *head =
263 (struct event_list *)events[i].udata;
264 TAILQ_FOREACH(ev, head, ev_signal_next) {
265 event_active(ev, which, events[i].data);
266 }
267 } else {
268 ev = (struct event *)events[i].udata;
269
270 if (!(ev->ev_events & EV_PERSIST))
271 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
272
273 event_active(ev, which, 1);
274 }
275 }
276
277 return (0);
278 }
279
280
281 static int
kq_add(void * arg,struct event * ev)282 kq_add(void *arg, struct event *ev)
283 {
284 struct kqop *kqop = arg;
285 struct kevent kev;
286
287 if (ev->ev_events & EV_SIGNAL) {
288 int nsignal = EVENT_SIGNAL(ev);
289
290 assert(nsignal >= 0 && nsignal < NSIG);
291 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
292 struct timespec timeout = { 0, 0 };
293
294 memset(&kev, 0, sizeof(kev));
295 kev.ident = nsignal;
296 kev.filter = EVFILT_SIGNAL;
297 kev.flags = EV_ADD;
298 kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]);
299
300 /* Be ready for the signal if it is sent any
301 * time between now and the next call to
302 * kq_dispatch. */
303 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
304 return (-1);
305
306 if (_evsignal_set_handler(ev->ev_base, nsignal,
307 kq_sighandler) == -1)
308 return (-1);
309 }
310
311 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
312 ev_signal_next);
313 ev->ev_flags |= EVLIST_X_KQINKERNEL;
314 return (0);
315 }
316
317 if (ev->ev_events & EV_READ) {
318 memset(&kev, 0, sizeof(kev));
319 kev.ident = ev->ev_fd;
320 kev.filter = EVFILT_READ;
321 #ifdef NOTE_EOF
322 /* Make it behave like select() and poll() */
323 kev.fflags = NOTE_EOF;
324 #endif
325 kev.flags = EV_ADD;
326 if (!(ev->ev_events & EV_PERSIST))
327 kev.flags |= EV_ONESHOT;
328 kev.udata = PTR_TO_UDATA(ev);
329
330 if (kq_insert(kqop, &kev) == -1)
331 return (-1);
332
333 ev->ev_flags |= EVLIST_X_KQINKERNEL;
334 }
335
336 if (ev->ev_events & EV_WRITE) {
337 memset(&kev, 0, sizeof(kev));
338 kev.ident = ev->ev_fd;
339 kev.filter = EVFILT_WRITE;
340 kev.flags = EV_ADD;
341 if (!(ev->ev_events & EV_PERSIST))
342 kev.flags |= EV_ONESHOT;
343 kev.udata = PTR_TO_UDATA(ev);
344
345 if (kq_insert(kqop, &kev) == -1)
346 return (-1);
347
348 ev->ev_flags |= EVLIST_X_KQINKERNEL;
349 }
350
351 return (0);
352 }
353
354 static int
kq_del(void * arg,struct event * ev)355 kq_del(void *arg, struct event *ev)
356 {
357 struct kqop *kqop = arg;
358 struct kevent kev;
359
360 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
361 return (0);
362
363 if (ev->ev_events & EV_SIGNAL) {
364 int nsignal = EVENT_SIGNAL(ev);
365 struct timespec timeout = { 0, 0 };
366
367 assert(nsignal >= 0 && nsignal < NSIG);
368 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
369 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
370 memset(&kev, 0, sizeof(kev));
371 kev.ident = nsignal;
372 kev.filter = EVFILT_SIGNAL;
373 kev.flags = EV_DELETE;
374
375 /* Because we insert signal events
376 * immediately, we need to delete them
377 * immediately, too */
378 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
379 return (-1);
380
381 if (_evsignal_restore_handler(ev->ev_base,
382 nsignal) == -1)
383 return (-1);
384 }
385
386 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
387 return (0);
388 }
389
390 if (ev->ev_events & EV_READ) {
391 memset(&kev, 0, sizeof(kev));
392 kev.ident = ev->ev_fd;
393 kev.filter = EVFILT_READ;
394 kev.flags = EV_DELETE;
395
396 if (kq_insert(kqop, &kev) == -1)
397 return (-1);
398
399 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
400 }
401
402 if (ev->ev_events & EV_WRITE) {
403 memset(&kev, 0, sizeof(kev));
404 kev.ident = ev->ev_fd;
405 kev.filter = EVFILT_WRITE;
406 kev.flags = EV_DELETE;
407
408 if (kq_insert(kqop, &kev) == -1)
409 return (-1);
410
411 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
412 }
413
414 return (0);
415 }
416
417 static void
kq_dealloc(struct event_base * base,void * arg)418 kq_dealloc(struct event_base *base, void *arg)
419 {
420 struct kqop *kqop = arg;
421
422 evsignal_dealloc(base);
423
424 if (kqop->changes)
425 free(kqop->changes);
426 if (kqop->events)
427 free(kqop->events);
428 if (kqop->kq >= 0 && kqop->pid == getpid())
429 close(kqop->kq);
430
431 memset(kqop, 0, sizeof(struct kqop));
432 free(kqop);
433 }
434