1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2015 Joyent, Inc.
14 */
15
16 /*
17 * Support for the signalfd facility, a Linux-borne facility for
18 * file descriptor-based synchronous signal consumption.
19 *
20 * As described on the signalfd(3C) man page, the general idea behind these
21 * file descriptors is that they can be used to synchronously consume signals
22 * via the read(2) syscall. That capability already exists with the
23 * sigwaitinfo(3C) function but the key advantage of signalfd is that, because
24 * it is file descriptor based, poll(2) can be used to determine when signals
25 * are available to be consumed.
26 *
27 * The general implementation uses signalfd_state to hold both the signal set
28 * and poll head for an open file descriptor. Because a process can be using
29 * different sigfds with different signal sets, each signalfd_state poll head
30 * can be thought of as an independent signal stream and the thread(s) waiting
31 * on that stream will get poll notification when any signal in the
32 * corresponding set is received.
33 *
34 * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc
35 * state for function callbacks and data when the proc needs to do work during
36 * signal delivery for pollwakeup.
37 *
38 * The read side of the implementation is straightforward and mimics the
39 * kernel behavior for sigtimedwait(). Signals continue to live on either
40 * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so
41 * that it is no longer pending.
42 *
43 * The poll side is more complex since all of the sigfds on the process need
44 * to be examined every time a signal is delivered to the process in order to
45 * pollwake any thread waiting in poll for that signal.
46 *
47 * Because it is likely that a process will only be using one, or a few, sigfds,
48 * but many total file descriptors, we maintain a list of sigfds which need
49 * pollwakeup. The list lives on the proc's p_sigfd struct. In this way only
50 * zero, or a few, of the state structs will need to be examined every time a
51 * signal is delivered to the process, instead of having to examine all of the
52 * file descriptors to find the state structs. When a state struct with a
53 * matching signal set is found then pollwakeup is called.
54 *
55 * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
56 * will clear out on its own. There is an exit helper (signalfd_exit_helper)
57 * which cleans up any remaining per-proc state when the process exits.
58 *
59 * The main complexity with signalfd is the interaction of forking and polling.
60 * This interaction is complex because now two processes have a fd that
61 * references the same dev_t (and its associated signalfd_state), but signals
62 * go to only one of those processes. Also, we don't know when one of the
63 * processes closes its fd because our 'close' entry point is only called when
64 * the last fd is closed (which could be by either process).
65 *
66 * Because the state struct is referenced by both file descriptors, and the
67 * state struct represents a signal stream needing a pollwakeup, if both
68 * processes were polling then both processes would get a pollwakeup when a
69 * signal arrives for either process (that is, the pollhead is associated with
70 * our dev_t so when a signal arrives the pollwakeup wakes up all waiters).
71 *
72 * Fortunately this is not a common problem in practice, but the implementation
73 * attempts to mitigate unexpected behavior. The typical behavior is that the
74 * parent has been polling the signalfd (which is why it was open in the first
75 * place) and the parent might have a pending signalfd_state (with the
76 * pollhead) on its per-process sigfd_list. After the fork the child will
77 * simply close that fd (among others) as part of the typical fork/close/exec
78 * pattern. Because the child will never poll that fd, it will never get any
79 * state onto its own sigfd_list (the child starts with a null list). The
80 * intention is that the child sees no pollwakeup activity for signals unless
81 * it explicitly reinvokes poll on the sigfd.
82 *
83 * As background, there are two primary polling cases to consider when the
84 * parent process forks:
85 * 1) If any thread is blocked in poll(2) then both the parent and child will
86 * return from the poll syscall with EINTR. This means that if either
87 * process wants to re-poll on a sigfd then it needs to re-run poll and
88 * would come back in to the signalfd_poll entry point. The parent would
89 * already have the dev_t's state on its sigfd_list and the child would not
90 * have anything there unless it called poll again on its fd.
91 * 2) If the process is using /dev/poll(7D) then the polling info is being
92 * cached by the poll device and the process might not currently be blocked
93 * on anything polling related. A subsequent DP_POLL ioctl will not invoke
94 * our signalfd_poll entry point again. Because the parent still has its
95 * sigfd_list setup, an incoming signal will hit our signalfd_pollwake_cb
96 * entry point, which in turn calls pollwake, and /dev/poll will do the
97 * right thing on DP_POLL. The child will not have a sigfd_list yet so the
98 * signal will not cause a pollwakeup. The dp code does its own handling for
99 * cleaning up its cache.
100 *
101 * This leaves only one odd corner case. If the parent and child both use
102 * the dup-ed sigfd to poll then when a signal is delivered to either process
103 * there is no way to determine which one should get the pollwakeup (since
104 * both processes will be queued on the same signal stream poll head). What
105 * happens in this case is that both processes will return from poll, but only
106 * one of them will actually have a signal to read. The other will return
107 * from read with EAGAIN, or block. This case is actually similar to the
108 * situation within a single process which got two different sigfd's with the
109 * same mask (or poll on two fd's that are dup-ed). Both would return from poll
110 * when a signal arrives but only one read would consume the signal and the
111 * other read would fail or block. Applications which poll on shared fd's
112 * cannot assume that a subsequent read will actually obtain data.
113 */
114
115 #include <sys/ddi.h>
116 #include <sys/sunddi.h>
117 #include <sys/signalfd.h>
118 #include <sys/conf.h>
119 #include <sys/sysmacros.h>
120 #include <sys/filio.h>
121 #include <sys/stat.h>
122 #include <sys/file.h>
123 #include <sys/schedctl.h>
124 #include <sys/id_space.h>
125 #include <sys/sdt.h>
126
127 typedef struct signalfd_state signalfd_state_t;
128
129 struct signalfd_state {
130 kmutex_t sfd_lock; /* lock protecting state */
131 pollhead_t sfd_pollhd; /* poll head */
132 k_sigset_t sfd_set; /* signals for this fd */
133 signalfd_state_t *sfd_next; /* next state on global list */
134 };
135
136 /*
137 * Internal global variables.
138 */
139 static kmutex_t signalfd_lock; /* lock protecting state */
140 static dev_info_t *signalfd_devi; /* device info */
141 static id_space_t *signalfd_minor; /* minor number arena */
142 static void *signalfd_softstate; /* softstate pointer */
143 static signalfd_state_t *signalfd_state; /* global list of state */
144
145 /*
146 * If we don't already have an entry in the proc's list for this state, add one.
147 */
148 static void
149 signalfd_wake_list_add(signalfd_state_t *state)
150 {
151 proc_t *p = curproc;
152 list_t *lst;
153 sigfd_wake_list_t *wlp;
154
155 ASSERT(MUTEX_HELD(&p->p_lock));
156 ASSERT(p->p_sigfd != NULL);
157
158 lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
159 for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
160 if (wlp->sigfd_wl_state == state)
161 break;
162 }
163
164 if (wlp == NULL) {
165 wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP);
166 wlp->sigfd_wl_state = state;
167 list_insert_head(lst, wlp);
168 }
169 }
170
171 static void
172 signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp)
173 {
174 list_remove(lst, wlp);
175 kmem_free(wlp, sizeof (sigfd_wake_list_t));
176 }
177
178 static void
179 signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state)
180 {
181 sigfd_wake_list_t *wlp;
182 list_t *lst;
183
184 ASSERT(MUTEX_HELD(&p->p_lock));
185
186 if (p->p_sigfd == NULL)
187 return;
188
189 lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
190 for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
191 if (wlp->sigfd_wl_state == state) {
192 signalfd_wake_rm(lst, wlp);
193 break;
194 }
195 }
196
197 if (list_is_empty(lst)) {
198 ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
199 list_destroy(lst);
200 kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
201 p->p_sigfd = NULL;
202 }
203 }
204
205 static void
206 signalfd_wake_list_cleanup(proc_t *p)
207 {
208 sigfd_wake_list_t *wlp;
209 list_t *lst;
210
211 ASSERT(MUTEX_HELD(&p->p_lock));
212
213 ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
214
215 lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
216 while (!list_is_empty(lst)) {
217 wlp = (sigfd_wake_list_t *)list_remove_head(lst);
218 kmem_free(wlp, sizeof (sigfd_wake_list_t));
219 }
220 }
221
222 static void
223 signalfd_exit_helper(void)
224 {
225 proc_t *p = curproc;
226 list_t *lst;
227
228 /* This being non-null is the only way we can get here */
229 ASSERT(p->p_sigfd != NULL);
230
231 mutex_enter(&p->p_lock);
232 lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
233
234 signalfd_wake_list_cleanup(p);
235 list_destroy(lst);
236 kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
237 p->p_sigfd = NULL;
238 mutex_exit(&p->p_lock);
239 }
240
241 /*
242 * Called every time a signal is delivered to the process so that we can
243 * see if any signal stream needs a pollwakeup. We maintain a list of
244 * signal state elements so that we don't have to look at every file descriptor
245 * on the process. If necessary, a further optimization would be to maintain a
246 * signal set mask that is a union of all of the sets in the list so that
247 * we don't even traverse the list if the signal is not in one of the elements.
248 * However, since the list is likely to be very short, this is not currently
249 * being done. A more complex data structure might also be used, but it is
250 * unclear what that would be since each signal set needs to be checked for a
251 * match.
252 */
253 static void
254 signalfd_pollwake_cb(void *arg0, int sig)
255 {
256 proc_t *p = (proc_t *)arg0;
257 list_t *lst;
258 sigfd_wake_list_t *wlp;
259
260 ASSERT(MUTEX_HELD(&p->p_lock));
261
262 if (p->p_sigfd == NULL)
263 return;
264
265 lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
266 wlp = list_head(lst);
267 while (wlp != NULL) {
268 signalfd_state_t *state = wlp->sigfd_wl_state;
269
270 mutex_enter(&state->sfd_lock);
271
272 if (sigismember(&state->sfd_set, sig) &&
273 state->sfd_pollhd.ph_list != NULL) {
274 sigfd_wake_list_t *tmp = wlp;
275
276 /* remove it from the list */
277 wlp = list_next(lst, wlp);
278 signalfd_wake_rm(lst, tmp);
279
280 mutex_exit(&state->sfd_lock);
281 pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN);
282 } else {
283 mutex_exit(&state->sfd_lock);
284 wlp = list_next(lst, wlp);
285 }
286 }
287 }
288
289 _NOTE(ARGSUSED(1))
290 static int
291 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
292 {
293 signalfd_state_t *state;
294 major_t major = getemajor(*devp);
295 minor_t minor = getminor(*devp);
296
297 if (minor != SIGNALFDMNRN_SIGNALFD)
298 return (ENXIO);
299
300 mutex_enter(&signalfd_lock);
301
302 minor = (minor_t)id_allocff(signalfd_minor);
303
304 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
305 id_free(signalfd_minor, minor);
306 mutex_exit(&signalfd_lock);
307 return (ENODEV);
308 }
309
310 state = ddi_get_soft_state(signalfd_softstate, minor);
311 *devp = makedevice(major, minor);
312
313 state->sfd_next = signalfd_state;
314 signalfd_state = state;
315
316 mutex_exit(&signalfd_lock);
317
318 return (0);
319 }
320
321 /*
322 * Consume one signal from our set in a manner similar to sigtimedwait().
323 * The block parameter is used to control whether we wait for a signal or
324 * return immediately if no signal is pending. We use the thread's t_sigwait
325 * member in the same way that it is used by sigtimedwait.
326 *
327 * Return 0 if we successfully consumed a signal or an errno if not.
328 */
329 static int
330 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
331 {
332 k_sigset_t oldmask;
333 kthread_t *t = curthread;
334 klwp_t *lwp = ttolwp(t);
335 proc_t *p = ttoproc(t);
388 sigemptyset(&t->t_sigwait);
389 return (EINTR);
390 }
391
392 if (lwp->lwp_curinfo) {
393 infop = &lwp->lwp_curinfo->sq_info;
394 } else {
395 infop = &info;
396 bzero(infop, sizeof (info));
397 infop->si_signo = lwp->lwp_cursig;
398 infop->si_code = SI_NOINFO;
399 }
400
401 lwp->lwp_ru.nsignals++;
402
403 DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
404 lwp->lwp_cursig = 0;
405 lwp->lwp_extsig = 0;
406 mutex_exit(&p->p_lock);
407
408 /* Convert k_siginfo into external, datamodel independent, struct. */
409 bzero(ssp, sizeof (*ssp));
410 ssp->ssi_signo = infop->si_signo;
411 ssp->ssi_errno = infop->si_errno;
412 ssp->ssi_code = infop->si_code;
413 ssp->ssi_pid = infop->si_pid;
414 ssp->ssi_uid = infop->si_uid;
415 ssp->ssi_fd = infop->si_fd;
416 ssp->ssi_band = infop->si_band;
417 ssp->ssi_trapno = infop->si_trapno;
418 ssp->ssi_status = infop->si_status;
419 ssp->ssi_utime = infop->si_utime;
420 ssp->ssi_stime = infop->si_stime;
421 ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
422
423 ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
424
425 if (lwp->lwp_curinfo) {
426 siginfofree(lwp->lwp_curinfo);
427 lwp->lwp_curinfo = NULL;
428 }
429 sigemptyset(&t->t_sigwait);
430 return (ret);
431 }
432
433 /*
434 * This is similar to sigtimedwait. Based on the fd mode we may wait until a
435 * signal within our specified set is posted. We consume as many available
436 * signals within our set as we can.
437 */
438 _NOTE(ARGSUSED(2))
439 static int
440 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
441 {
442 signalfd_state_t *state;
443 minor_t minor = getminor(dev);
444 boolean_t block = B_TRUE;
445 k_sigset_t set;
446 boolean_t got_one = B_FALSE;
447 int res;
448
449 if (uio->uio_resid < sizeof (signalfd_siginfo_t))
450 return (EINVAL);
451
452 state = ddi_get_soft_state(signalfd_softstate, minor);
453
454 if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
455 block = B_FALSE;
456
457 mutex_enter(&state->sfd_lock);
458 set = state->sfd_set;
459 mutex_exit(&state->sfd_lock);
460
461 if (sigisempty(&set))
462 return (set_errno(EINVAL));
463
464 do {
465 res = consume_signal(state->sfd_set, uio, block);
466 if (res == 0)
467 got_one = B_TRUE;
468
469 /*
470 * After consuming one signal we won't block trying to consume
471 * further signals.
472 */
473 block = B_FALSE;
474 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
475
476 if (got_one)
477 res = 0;
478
479 return (res);
480 }
481
482 /*
483 * If ksigset_t's were a single word, we would do:
484 * return (((p->p_sig | t->t_sig) & set) & fillset);
485 */
486 static int
487 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
488 {
489 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
490 set.__sigbits[0]) |
491 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
492 set.__sigbits[1]) |
493 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
494 set.__sigbits[2]) & FILLSET2));
495 }
496
497 _NOTE(ARGSUSED(4))
498 static int
499 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
500 struct pollhead **phpp)
501 {
502 signalfd_state_t *state;
503 minor_t minor = getminor(dev);
504 kthread_t *t = curthread;
505 proc_t *p = ttoproc(t);
506 short revents = 0;
507
508 state = ddi_get_soft_state(signalfd_softstate, minor);
509
510 mutex_enter(&state->sfd_lock);
511
512 if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
513 revents |= POLLRDNORM | POLLIN;
514
515 mutex_exit(&state->sfd_lock);
516
517 if (!(*reventsp = revents & events) && !anyyet) {
518 *phpp = &state->sfd_pollhd;
519
520 /*
521 * Enable pollwakeup handling.
522 */
523 if (p->p_sigfd == NULL) {
524 sigfd_proc_state_t *pstate;
525
526 pstate = kmem_zalloc(sizeof (sigfd_proc_state_t),
527 KM_SLEEP);
528 list_create(&pstate->sigfd_list,
529 sizeof (sigfd_wake_list_t),
530 offsetof(sigfd_wake_list_t, sigfd_wl_lst));
531
532 mutex_enter(&p->p_lock);
533 /* check again now that we're locked */
534 if (p->p_sigfd == NULL) {
535 p->p_sigfd = pstate;
536 } else {
537 /* someone beat us to it */
538 list_destroy(&pstate->sigfd_list);
539 kmem_free(pstate, sizeof (sigfd_proc_state_t));
540 }
541 mutex_exit(&p->p_lock);
542 }
543
544 mutex_enter(&p->p_lock);
545 if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb ==
546 NULL) {
547 ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb =
548 signalfd_pollwake_cb;
549 }
550 signalfd_wake_list_add(state);
551 mutex_exit(&p->p_lock);
552 }
553
554 return (0);
555 }
556
557 _NOTE(ARGSUSED(4))
558 static int
559 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
560 {
561 signalfd_state_t *state;
562 minor_t minor = getminor(dev);
563 sigset_t mask;
564
565 state = ddi_get_soft_state(signalfd_softstate, minor);
566
567 switch (cmd) {
568 case SIGNALFDIOC_MASK:
569 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
570 md) != 0)
571 return (set_errno(EFAULT));
572
573 mutex_enter(&state->sfd_lock);
574 sigutok(&mask, &state->sfd_set);
575 mutex_exit(&state->sfd_lock);
576
577 return (0);
578
579 default:
580 break;
581 }
582
583 return (ENOTTY);
584 }
585
586 _NOTE(ARGSUSED(1))
587 static int
588 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
589 {
590 signalfd_state_t *state, **sp;
591 minor_t minor = getminor(dev);
592 proc_t *p = curproc;
593
594 state = ddi_get_soft_state(signalfd_softstate, minor);
595
596 if (state->sfd_pollhd.ph_list != NULL) {
597 pollwakeup(&state->sfd_pollhd, POLLERR);
598 pollhead_clean(&state->sfd_pollhd);
599 }
600
601 /* Make sure our state is removed from our proc's pollwake list. */
602 mutex_enter(&p->p_lock);
603 signalfd_wake_list_rm(p, state);
604 mutex_exit(&p->p_lock);
605
606 mutex_enter(&signalfd_lock);
607
608 /* Remove our state from our global list. */
609 for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next))
610 VERIFY(*sp != NULL);
611
612 *sp = (*sp)->sfd_next;
613
614 ddi_soft_state_free(signalfd_softstate, minor);
615 id_free(signalfd_minor, minor);
616
617 mutex_exit(&signalfd_lock);
618
619 return (0);
620 }
621
622 static int
623 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
624 {
625 if (cmd != DDI_ATTACH || signalfd_devi != NULL)
626 return (DDI_FAILURE);
627
628 mutex_enter(&signalfd_lock);
629
630 signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
631 if (signalfd_minor == NULL) {
632 cmn_err(CE_WARN, "signalfd couldn't create id space");
633 mutex_exit(&signalfd_lock);
634 return (DDI_FAILURE);
635 }
636
637 if (ddi_soft_state_init(&signalfd_softstate,
638 sizeof (signalfd_state_t), 0) != 0) {
639 cmn_err(CE_WARN, "signalfd failed to create soft state");
640 id_space_destroy(signalfd_minor);
641 mutex_exit(&signalfd_lock);
642 return (DDI_FAILURE);
643 }
644
645 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
646 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
647 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
648 ddi_soft_state_fini(&signalfd_softstate);
649 id_space_destroy(signalfd_minor);
650 mutex_exit(&signalfd_lock);
651 return (DDI_FAILURE);
652 }
653
654 ddi_report_dev(devi);
655 signalfd_devi = devi;
656
657 sigfd_exit_helper = signalfd_exit_helper;
658
659 mutex_exit(&signalfd_lock);
660
661 return (DDI_SUCCESS);
662 }
663
664 _NOTE(ARGSUSED(0))
665 static int
666 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
667 {
668 switch (cmd) {
669 case DDI_DETACH:
670 break;
671
672 default:
673 return (DDI_FAILURE);
674 }
675
676 /* list should be empty */
677 VERIFY(signalfd_state == NULL);
678
679 mutex_enter(&signalfd_lock);
680 id_space_destroy(signalfd_minor);
681
682 ddi_remove_minor_node(signalfd_devi, NULL);
683 signalfd_devi = NULL;
684 sigfd_exit_helper = NULL;
685
686 ddi_soft_state_fini(&signalfd_softstate);
687 mutex_exit(&signalfd_lock);
688
689 return (DDI_SUCCESS);
690 }
691
692 _NOTE(ARGSUSED(0))
693 static int
694 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
695 {
696 int error;
697
698 switch (infocmd) {
699 case DDI_INFO_DEVT2DEVINFO:
|
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2016 Joyent, Inc.
14 */
15
16 /*
17 * Support for the signalfd facility, a Linux-borne facility for
18 * file descriptor-based synchronous signal consumption.
19 *
20 * As described on the signalfd(3C) man page, the general idea behind these
21 * file descriptors is that they can be used to synchronously consume signals
22 * via the read(2) syscall. While that capability already exists with the
23 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file
24 * descriptor based: It is able use the event facilities (poll(2), /dev/poll,
25 * event ports) to notify interested parties when consumable signals arrive.
26 *
27 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor
28 * will be allocated for them along with an associated signalfd_state_t struct.
29 * It is there where the mask of desired signals resides.
30 *
31 * Reading from the signalfd is straightforward and mimics the kernel behavior
32 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
33 * thread's t_sig, member. During a read operation, those which match the mask
34 * are consumed so they are no longer pending.
35 *
36 * The poll side is more complex. Every time a signal is delivered, all of the
37 * signalfds on the process need to be examined in order to pollwake threads
38 * waiting for signal arrival.
39 *
40 * When a thread polling on a signalfd requires a pollhead, several steps must
41 * be taken to safely ensure the proper result. A sigfd_proc_state_t is
42 * created for the calling process if it does not yet exist. It is there where
43 * a list of sigfd_poll_waiter_t structures reside which associate pollheads to
44 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a
45 * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the
46 * polled resource. If one is found, it is reused. Otherwise a new one is
47 * created, incrementing the refcount on the signalfd_state_t, and it is added
48 * to the sigfd_poll_waiter_t list.
49 *
50 * The complications imposed by fork(2) are why the pollhead is stored in the
51 * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t.
52 * More than one process can hold a reference to the signalfd at a time but
53 * arriving signals should wake only process-local pollers. Additionally,
54 * signalfd_close is called only when the last referencing fd is closed, hiding
55 * occurrences of preceeding threads which released their references. This
56 * necessitates reference counting on the signalfd_state_t so it is able to
57 * persist after close until all poll references have been cleansed. Doing so
58 * ensures that blocked pollers which hold references to the signalfd_state_t
59 * will be able to do clean-up after the descriptor itself has been closed.
60 *
61 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
62 * is called via the pointer in sigfd_proc_state_t. It will walk over the
63 * sigfd_poll_waiter_t entries present in the list, searching for any
64 * associated with a signalfd_state_t with a matching signal mask. The
65 * approach of keeping the poller list in p_sigfd was chosen because a process
66 * is likely to use few signalfds relative to its total file descriptors. It
67 * reduces the work required for each received signal.
68 *
69 * When matching sigfd_poll_waiter_t entries are encountered in the poller list
70 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to
71 * perform the pollwake. This is due to a lock ordering conflict between
72 * signalfd_poll and signalfd_pollwake_cb. The former acquires
73 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc)
74 * reverses the order. Defering the pollwake into a taskq means it can be
75 * performed without proc_t`p_lock held, avoiding the deadlock.
76 *
77 * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
78 * will clear out on its own. Any remaining per-process state which remains
79 * will be cleaned up by the exit helper (signalfd_exit_helper).
80 *
81 * The structures associated with signalfd state are designed to operate
82 * correctly across fork, but there is one caveat that applies. Using
83 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll
84 * descriptors (such as /dev/poll or event ports) will result in missed poll
85 * wake-ups. This is caused by the pollhead identity of signalfd descriptors
86 * being dependent on the process they are polled from. Because it has a
87 * thread-local cache, poll(2) is unaffected by this limitation.
88 *
89 * Lock ordering:
90 *
91 * 1. signalfd_lock
92 * 2. signalfd_state_t`sfd_lock
93 *
94 * 1. proc_t`p_lock (to walk p_sigfd)
95 * 2. signalfd_state_t`sfd_lock
96 * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0)
97 */
98
99 #include <sys/ddi.h>
100 #include <sys/sunddi.h>
101 #include <sys/signalfd.h>
102 #include <sys/conf.h>
103 #include <sys/sysmacros.h>
104 #include <sys/filio.h>
105 #include <sys/stat.h>
106 #include <sys/file.h>
107 #include <sys/schedctl.h>
108 #include <sys/id_space.h>
109 #include <sys/sdt.h>
110 #include <sys/brand.h>
111 #include <sys/disp.h>
112 #include <sys/taskq_impl.h>
113
114 typedef struct signalfd_state signalfd_state_t;
115
116 struct signalfd_state {
117 list_node_t sfd_list; /* node in global list */
118 kmutex_t sfd_lock; /* protects fields below */
119 uint_t sfd_count; /* ref count */
120 boolean_t sfd_valid; /* valid while open */
121 k_sigset_t sfd_set; /* signals for this fd */
122 };
123
124 typedef struct sigfd_poll_waiter {
125 list_node_t spw_list;
126 signalfd_state_t *spw_state;
127 pollhead_t spw_pollhd;
128 taskq_ent_t spw_taskent;
129 short spw_pollev;
130 } sigfd_poll_waiter_t;
131
132 /*
133 * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate,
134 * and signalfd_state (including sfd_list field of members)
135 */
136 static kmutex_t signalfd_lock;
137 static dev_info_t *signalfd_devi; /* device info */
138 static id_space_t *signalfd_minor; /* minor number arena */
139 static void *signalfd_softstate; /* softstate pointer */
140 static list_t signalfd_state; /* global list of state */
141 static taskq_t *signalfd_wakeq; /* pollwake event taskq */
142
143
144 static void
145 signalfd_state_enter_locked(signalfd_state_t *state)
146 {
147 ASSERT(MUTEX_HELD(&state->sfd_lock));
148 ASSERT(state->sfd_count > 0);
149 VERIFY(state->sfd_valid == B_TRUE);
150
151 state->sfd_count++;
152 }
153
154 static void
155 signalfd_state_release(signalfd_state_t *state, boolean_t force_invalidate)
156 {
157 mutex_enter(&state->sfd_lock);
158
159 if (force_invalidate) {
160 state->sfd_valid = B_FALSE;
161 }
162
163 ASSERT(state->sfd_count > 0);
164 if (state->sfd_count == 1) {
165 VERIFY(state->sfd_valid == B_FALSE);
166 mutex_exit(&state->sfd_lock);
167 if (force_invalidate) {
168 /*
169 * The invalidation performed in signalfd_close is done
170 * while signalfd_lock is held.
171 */
172 ASSERT(MUTEX_HELD(&signalfd_lock));
173 list_remove(&signalfd_state, state);
174 } else {
175 ASSERT(MUTEX_NOT_HELD(&signalfd_lock));
176 mutex_enter(&signalfd_lock);
177 list_remove(&signalfd_state, state);
178 mutex_exit(&signalfd_lock);
179 }
180 kmem_free(state, sizeof (*state));
181 return;
182 }
183 state->sfd_count--;
184 mutex_exit(&state->sfd_lock);
185 }
186
187 static sigfd_poll_waiter_t *
188 signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state)
189 {
190 list_t *lst = &pstate->sigfd_list;
191 sigfd_poll_waiter_t *pw;
192
193 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
194 if (pw->spw_state == state)
195 break;
196 }
197
198 if (pw == NULL) {
199 pw = kmem_zalloc(sizeof (*pw), KM_SLEEP);
200
201 mutex_enter(&state->sfd_lock);
202 signalfd_state_enter_locked(state);
203 pw->spw_state = state;
204 mutex_exit(&state->sfd_lock);
205 list_insert_head(lst, pw);
206 }
207 return (pw);
208 }
209
210 static sigfd_poll_waiter_t *
211 signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state)
212 {
213 list_t *lst = &pstate->sigfd_list;
214 sigfd_poll_waiter_t *pw;
215
216 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
217 if (pw->spw_state == state) {
218 break;
219 }
220 }
221
222 if (pw != NULL) {
223 list_remove(lst, pw);
224 pw->spw_state = NULL;
225 signalfd_state_release(state, B_FALSE);
226 }
227
228 return (pw);
229 }
230
231 static void
232 signalfd_wake_list_cleanup(proc_t *p)
233 {
234 sigfd_proc_state_t *pstate = p->p_sigfd;
235 sigfd_poll_waiter_t *pw;
236 list_t *lst;
237
238 ASSERT(MUTEX_HELD(&p->p_lock));
239 ASSERT(pstate != NULL);
240
241 lst = &pstate->sigfd_list;
242 while ((pw = list_remove_head(lst)) != NULL) {
243 signalfd_state_t *state = pw->spw_state;
244
245 pw->spw_state = NULL;
246 signalfd_state_release(state, B_FALSE);
247
248 pollwakeup(&pw->spw_pollhd, POLLERR);
249 pollhead_clean(&pw->spw_pollhd);
250 kmem_free(pw, sizeof (*pw));
251 }
252 list_destroy(lst);
253
254 p->p_sigfd = NULL;
255 kmem_free(pstate, sizeof (*pstate));
256 }
257
258 static void
259 signalfd_exit_helper(void)
260 {
261 proc_t *p = curproc;
262
263 mutex_enter(&p->p_lock);
264 signalfd_wake_list_cleanup(p);
265 mutex_exit(&p->p_lock);
266 }
267
268 /*
269 * Perform pollwake for a sigfd_poll_waiter_t entry.
270 * Thanks to the strict and conflicting lock orders required for signalfd_poll
271 * (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock),
272 * this is relegated to a taskq to avoid deadlock.
273 */
274 static void
275 signalfd_wake_task(void *arg)
276 {
277 sigfd_poll_waiter_t *pw = arg;
278 signalfd_state_t *state = pw->spw_state;
279
280 pw->spw_state = NULL;
281 signalfd_state_release(state, B_FALSE);
282 pollwakeup(&pw->spw_pollhd, pw->spw_pollev);
283 pollhead_clean(&pw->spw_pollhd);
284 kmem_free(pw, sizeof (*pw));
285 }
286
287 /*
288 * Called every time a signal is delivered to the process so that we can
289 * see if any signal stream needs a pollwakeup. We maintain a list of
290 * signal state elements so that we don't have to look at every file descriptor
291 * on the process. If necessary, a further optimization would be to maintain a
292 * signal set mask that is a union of all of the sets in the list so that
293 * we don't even traverse the list if the signal is not in one of the elements.
294 * However, since the list is likely to be very short, this is not currently
295 * being done. A more complex data structure might also be used, but it is
296 * unclear what that would be since each signal set needs to be checked for a
297 * match.
298 */
299 static void
300 signalfd_pollwake_cb(void *arg0, int sig)
301 {
302 proc_t *p = (proc_t *)arg0;
303 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
304 list_t *lst;
305 sigfd_poll_waiter_t *pw;
306
307 ASSERT(MUTEX_HELD(&p->p_lock));
308 ASSERT(pstate != NULL);
309
310 lst = &pstate->sigfd_list;
311 pw = list_head(lst);
312 while (pw != NULL) {
313 signalfd_state_t *state = pw->spw_state;
314 sigfd_poll_waiter_t *next;
315
316 mutex_enter(&state->sfd_lock);
317 if (!state->sfd_valid) {
318 pw->spw_pollev = POLLERR;
319 } else if (sigismember(&state->sfd_set, sig)) {
320 pw->spw_pollev = POLLRDNORM | POLLIN;
321 } else {
322 mutex_exit(&state->sfd_lock);
323 pw = list_next(lst, pw);
324 continue;
325 }
326 mutex_exit(&state->sfd_lock);
327
328 /*
329 * Pull the sigfd_poll_waiter_t out of the list and dispatch it
330 * to perform a pollwake. This cannot be done synchronously
331 * since signalfd_poll and signalfd_pollwake_cb have
332 * conflicting lock orders which can deadlock.
333 */
334 next = list_next(lst, pw);
335 list_remove(lst, pw);
336 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, pw, 0,
337 &pw->spw_taskent);
338 pw = next;
339 }
340 }
341
342 _NOTE(ARGSUSED(1))
343 static int
344 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
345 {
346 signalfd_state_t *state, **sstate;
347 major_t major = getemajor(*devp);
348 minor_t minor = getminor(*devp);
349
350 if (minor != SIGNALFDMNRN_SIGNALFD)
351 return (ENXIO);
352
353 mutex_enter(&signalfd_lock);
354
355 minor = (minor_t)id_allocff(signalfd_minor);
356 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
357 id_free(signalfd_minor, minor);
358 mutex_exit(&signalfd_lock);
359 return (ENODEV);
360 }
361
362 state = kmem_zalloc(sizeof (*state), KM_SLEEP);
363 state->sfd_valid = B_TRUE;
364 state->sfd_count = 1;
365 list_insert_head(&signalfd_state, (void *)state);
366
367 sstate = ddi_get_soft_state(signalfd_softstate, minor);
368 *sstate = state;
369 *devp = makedevice(major, minor);
370
371 mutex_exit(&signalfd_lock);
372
373 return (0);
374 }
375
376 /*
377 * Consume one signal from our set in a manner similar to sigtimedwait().
378 * The block parameter is used to control whether we wait for a signal or
379 * return immediately if no signal is pending. We use the thread's t_sigwait
380 * member in the same way that it is used by sigtimedwait.
381 *
382 * Return 0 if we successfully consumed a signal or an errno if not.
383 */
384 static int
385 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
386 {
387 k_sigset_t oldmask;
388 kthread_t *t = curthread;
389 klwp_t *lwp = ttolwp(t);
390 proc_t *p = ttoproc(t);
443 sigemptyset(&t->t_sigwait);
444 return (EINTR);
445 }
446
447 if (lwp->lwp_curinfo) {
448 infop = &lwp->lwp_curinfo->sq_info;
449 } else {
450 infop = &info;
451 bzero(infop, sizeof (info));
452 infop->si_signo = lwp->lwp_cursig;
453 infop->si_code = SI_NOINFO;
454 }
455
456 lwp->lwp_ru.nsignals++;
457
458 DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
459 lwp->lwp_cursig = 0;
460 lwp->lwp_extsig = 0;
461 mutex_exit(&p->p_lock);
462
463 if (PROC_IS_BRANDED(p) && BROP(p)->b_sigfd_translate)
464 BROP(p)->b_sigfd_translate(infop);
465
466 /* Convert k_siginfo into external, datamodel independent, struct. */
467 bzero(ssp, sizeof (*ssp));
468 ssp->ssi_signo = infop->si_signo;
469 ssp->ssi_errno = infop->si_errno;
470 ssp->ssi_code = infop->si_code;
471 ssp->ssi_pid = infop->si_pid;
472 ssp->ssi_uid = infop->si_uid;
473 ssp->ssi_fd = infop->si_fd;
474 ssp->ssi_band = infop->si_band;
475 ssp->ssi_trapno = infop->si_trapno;
476 ssp->ssi_status = infop->si_status;
477 ssp->ssi_utime = infop->si_utime;
478 ssp->ssi_stime = infop->si_stime;
479 ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
480
481 ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
482
483 if (lwp->lwp_curinfo) {
484 siginfofree(lwp->lwp_curinfo);
485 lwp->lwp_curinfo = NULL;
486 }
487 sigemptyset(&t->t_sigwait);
488 return (ret);
489 }
490
491 /*
492 * This is similar to sigtimedwait. Based on the fd mode we may wait until a
493 * signal within our specified set is posted. We consume as many available
494 * signals within our set as we can.
495 */
496 _NOTE(ARGSUSED(2))
497 static int
498 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
499 {
500 signalfd_state_t *state, **sstate;
501 minor_t minor = getminor(dev);
502 boolean_t block = B_TRUE;
503 k_sigset_t set;
504 boolean_t got_one = B_FALSE;
505 int res;
506
507 if (uio->uio_resid < sizeof (signalfd_siginfo_t))
508 return (EINVAL);
509
510 sstate = ddi_get_soft_state(signalfd_softstate, minor);
511 state = *sstate;
512
513 if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
514 block = B_FALSE;
515
516 mutex_enter(&state->sfd_lock);
517 set = state->sfd_set;
518 mutex_exit(&state->sfd_lock);
519
520 if (sigisempty(&set))
521 return (set_errno(EINVAL));
522
523 do {
524 res = consume_signal(set, uio, block);
525
526 if (res == 0) {
527 /*
528 * After consuming one signal, do not block while
529 * trying to consume more.
530 */
531 got_one = B_TRUE;
532 block = B_FALSE;
533
534 /*
535 * Refresh the matching signal set in case it was
536 * updated during the wait.
537 */
538 mutex_enter(&state->sfd_lock);
539 set = state->sfd_set;
540 mutex_exit(&state->sfd_lock);
541 if (sigisempty(&set))
542 break;
543 }
544 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
545
546 if (got_one)
547 res = 0;
548
549 return (res);
550 }
551
552 /*
553 * If ksigset_t's were a single word, we would do:
554 * return (((p->p_sig | t->t_sig) & set) & fillset);
555 */
556 static int
557 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
558 {
559 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
560 set.__sigbits[0]) |
561 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
562 set.__sigbits[1]) |
563 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
564 set.__sigbits[2]) & FILLSET2));
565 }
566
567 _NOTE(ARGSUSED(4))
568 static int
569 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
570 struct pollhead **phpp)
571 {
572 signalfd_state_t *state, **sstate;
573 minor_t minor = getminor(dev);
574 kthread_t *t = curthread;
575 proc_t *p = ttoproc(t);
576 short revents = 0;
577
578 sstate = ddi_get_soft_state(signalfd_softstate, minor);
579 state = *sstate;
580
581 mutex_enter(&state->sfd_lock);
582
583 if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
584 revents |= POLLRDNORM | POLLIN;
585
586 mutex_exit(&state->sfd_lock);
587
588 if (!(*reventsp = revents & events) && !anyyet) {
589 sigfd_proc_state_t *pstate;
590 sigfd_poll_waiter_t *pw;
591
592 /*
593 * Enable pollwakeup handling.
594 */
595 mutex_enter(&p->p_lock);
596 if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) {
597
598 mutex_exit(&p->p_lock);
599 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
600 list_create(&pstate->sigfd_list,
601 sizeof (sigfd_poll_waiter_t),
602 offsetof(sigfd_poll_waiter_t, spw_list));
603 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
604
605 /* Check again, after blocking for the alloc. */
606 mutex_enter(&p->p_lock);
607 if (p->p_sigfd == NULL) {
608 p->p_sigfd = pstate;
609 } else {
610 /* someone beat us to it */
611 list_destroy(&pstate->sigfd_list);
612 kmem_free(pstate, sizeof (*pstate));
613 pstate = p->p_sigfd;
614 }
615 }
616
617 pw = signalfd_wake_list_add(pstate, state);
618 *phpp = &pw->spw_pollhd;
619 mutex_exit(&p->p_lock);
620 }
621
622 return (0);
623 }
624
625 _NOTE(ARGSUSED(4))
626 static int
627 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
628 {
629 signalfd_state_t *state, **sstate;
630 minor_t minor = getminor(dev);
631 sigset_t mask;
632
633 sstate = ddi_get_soft_state(signalfd_softstate, minor);
634 state = *sstate;
635
636 switch (cmd) {
637 case SIGNALFDIOC_MASK:
638 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
639 md) != 0)
640 return (set_errno(EFAULT));
641
642 mutex_enter(&state->sfd_lock);
643 sigutok(&mask, &state->sfd_set);
644 mutex_exit(&state->sfd_lock);
645
646 return (0);
647
648 default:
649 break;
650 }
651
652 return (ENOTTY);
653 }
654
655 _NOTE(ARGSUSED(1))
656 static int
657 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
658 {
659 signalfd_state_t *state, **sstate;
660 sigfd_poll_waiter_t *pw = NULL;
661 minor_t minor = getminor(dev);
662 proc_t *p = curproc;
663
664 sstate = ddi_get_soft_state(signalfd_softstate, minor);
665 state = *sstate;
666
667 /* Make sure state is removed from this proc's pollwake list. */
668 mutex_enter(&p->p_lock);
669 if (p->p_sigfd != NULL) {
670 sigfd_proc_state_t *pstate = p->p_sigfd;
671
672 pw = signalfd_wake_list_rm(pstate, state);
673 if (list_is_empty(&pstate->sigfd_list)) {
674 signalfd_wake_list_cleanup(p);
675 }
676 }
677 mutex_exit(&p->p_lock);
678
679 if (pw != NULL) {
680 pollwakeup(&pw->spw_pollhd, POLLERR);
681 pollhead_clean(&pw->spw_pollhd);
682 kmem_free(pw, sizeof (*pw));
683 }
684
685 mutex_enter(&signalfd_lock);
686
687 *sstate = NULL;
688 ddi_soft_state_free(signalfd_softstate, minor);
689 id_free(signalfd_minor, minor);
690
691 signalfd_state_release(state, B_TRUE);
692
693 mutex_exit(&signalfd_lock);
694
695 return (0);
696 }
697
698 static int
699 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
700 {
701 if (cmd != DDI_ATTACH || signalfd_devi != NULL)
702 return (DDI_FAILURE);
703
704 mutex_enter(&signalfd_lock);
705
706 signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
707 if (signalfd_minor == NULL) {
708 cmn_err(CE_WARN, "signalfd couldn't create id space");
709 mutex_exit(&signalfd_lock);
710 return (DDI_FAILURE);
711 }
712
713 if (ddi_soft_state_init(&signalfd_softstate,
714 sizeof (signalfd_state_t *), 0) != 0) {
715 cmn_err(CE_WARN, "signalfd failed to create soft state");
716 id_space_destroy(signalfd_minor);
717 mutex_exit(&signalfd_lock);
718 return (DDI_FAILURE);
719 }
720
721 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
722 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
723 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
724 ddi_soft_state_fini(&signalfd_softstate);
725 id_space_destroy(signalfd_minor);
726 mutex_exit(&signalfd_lock);
727 return (DDI_FAILURE);
728 }
729
730 ddi_report_dev(devi);
731 signalfd_devi = devi;
732
733 sigfd_exit_helper = signalfd_exit_helper;
734
735 list_create(&signalfd_state, sizeof (signalfd_state_t),
736 offsetof(signalfd_state_t, sfd_list));
737
738 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri,
739 0, INT_MAX, TASKQ_PREPOPULATE);
740
741 mutex_exit(&signalfd_lock);
742
743 return (DDI_SUCCESS);
744 }
745
746 _NOTE(ARGSUSED(0))
747 static int
748 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
749 {
750 switch (cmd) {
751 case DDI_DETACH:
752 break;
753
754 default:
755 return (DDI_FAILURE);
756 }
757
758 mutex_enter(&signalfd_lock);
759
760 if (!list_is_empty(&signalfd_state)) {
761 /*
762 * There are dangling poll waiters holding signalfd_state_t
763 * entries on the global list. Detach is not possible until
764 * they purge themselves.
765 */
766 mutex_exit(&signalfd_lock);
767 return (DDI_FAILURE);
768 }
769 list_destroy(&signalfd_state);
770
771 /*
772 * With no remaining entries in the signalfd_state list, the wake taskq
773 * should be empty with no possibility for new entries.
774 */
775 taskq_destroy(signalfd_wakeq);
776
777 id_space_destroy(signalfd_minor);
778
779 ddi_remove_minor_node(signalfd_devi, NULL);
780 signalfd_devi = NULL;
781 sigfd_exit_helper = NULL;
782
783 ddi_soft_state_fini(&signalfd_softstate);
784 mutex_exit(&signalfd_lock);
785
786 return (DDI_SUCCESS);
787 }
788
789 _NOTE(ARGSUSED(0))
790 static int
791 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
792 {
793 int error;
794
795 switch (infocmd) {
796 case DDI_INFO_DEVT2DEVINFO:
|