1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2016 Joyent, Inc.
14 */
15
16 /*
17 * Support for the signalfd facility, a Linux-borne facility for
18 * file descriptor-based synchronous signal consumption.
19 *
20 * As described on the signalfd(3C) man page, the general idea behind these
21 * file descriptors is that they can be used to synchronously consume signals
22 * via the read(2) syscall. While that capability already exists with the
23 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file
24 * descriptor based: It is able use the event facilities (poll(2), /dev/poll,
25 * event ports) to notify interested parties when consumable signals arrive.
26 *
27 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor
28 * will be allocated for them along with an associated signalfd_state_t struct.
29 * It is there where the mask of desired signals resides.
30 *
31 * Reading from the signalfd is straightforward and mimics the kernel behavior
32 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
33 * thread's t_sig, member. During a read operation, those which match the mask
34 * are consumed so they are no longer pending.
35 *
36 * The poll side is more complex. Every time a signal is delivered, all of the
37 * signalfds on the process need to be examined in order to pollwake threads
38 * waiting for signal arrival.
39 *
40 * When a thread polling on a signalfd requires a pollhead, several steps must
41 * be taken to safely ensure the proper result. A sigfd_proc_state_t is
42 * created for the calling process if it does not yet exist. It is there where
43 * a list of sigfd_poll_waiter_t structures reside which associate pollheads to
44 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a
45 * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the
46 * polled resource. If one is found, it is reused. Otherwise a new one is
47 * created, incrementing the refcount on the signalfd_state_t, and it is added
48 * to the sigfd_poll_waiter_t list.
49 *
50 * The complications imposed by fork(2) are why the pollhead is stored in the
51 * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t.
52 * More than one process can hold a reference to the signalfd at a time but
53 * arriving signals should wake only process-local pollers. Additionally,
54 * signalfd_close is called only when the last referencing fd is closed, hiding
55 * occurrences of preceeding threads which released their references. This
56 * necessitates reference counting on the signalfd_state_t so it is able to
57 * persist after close until all poll references have been cleansed. Doing so
58 * ensures that blocked pollers which hold references to the signalfd_state_t
59 * will be able to do clean-up after the descriptor itself has been closed.
60 *
61 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
62 * is called via the pointer in sigfd_proc_state_t. It will walk over the
63 * sigfd_poll_waiter_t entries present in the list, searching for any
64 * associated with a signalfd_state_t with a matching signal mask. The
65 * approach of keeping the poller list in p_sigfd was chosen because a process
66 * is likely to use few signalfds relative to its total file descriptors. It
67 * reduces the work required for each received signal.
68 *
69 * When matching sigfd_poll_waiter_t entries are encountered in the poller list
70 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to
71 * perform the pollwake. This is due to a lock ordering conflict between
72 * signalfd_poll and signalfd_pollwake_cb. The former acquires
73 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc)
74 * reverses the order. Defering the pollwake into a taskq means it can be
75 * performed without proc_t`p_lock held, avoiding the deadlock.
76 *
77 * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
78 * will clear out on its own. Any remaining per-process state which remains
79 * will be cleaned up by the exit helper (signalfd_exit_helper).
80 *
81 * The structures associated with signalfd state are designed to operate
82 * correctly across fork, but there is one caveat that applies. Using
83 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll
84 * descriptors (such as /dev/poll or event ports) will result in missed poll
85 * wake-ups. This is caused by the pollhead identity of signalfd descriptors
86 * being dependent on the process they are polled from. Because it has a
87 * thread-local cache, poll(2) is unaffected by this limitation.
88 *
89 * Lock ordering:
90 *
91 * 1. signalfd_lock
92 * 2. signalfd_state_t`sfd_lock
93 *
94 * 1. proc_t`p_lock (to walk p_sigfd)
95 * 2. signalfd_state_t`sfd_lock
96 * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0)
97 */
98
99 #include <sys/ddi.h>
100 #include <sys/sunddi.h>
101 #include <sys/signalfd.h>
102 #include <sys/conf.h>
103 #include <sys/sysmacros.h>
104 #include <sys/filio.h>
105 #include <sys/stat.h>
106 #include <sys/file.h>
107 #include <sys/schedctl.h>
108 #include <sys/id_space.h>
109 #include <sys/sdt.h>
110 #include <sys/brand.h>
111 #include <sys/disp.h>
112 #include <sys/taskq_impl.h>
113
114 typedef struct signalfd_state signalfd_state_t;
115
116 struct signalfd_state {
117 list_node_t sfd_list; /* node in global list */
118 kmutex_t sfd_lock; /* protects fields below */
119 uint_t sfd_count; /* ref count */
120 boolean_t sfd_valid; /* valid while open */
121 k_sigset_t sfd_set; /* signals for this fd */
122 };
123
124 typedef struct sigfd_poll_waiter {
125 list_node_t spw_list;
126 signalfd_state_t *spw_state;
127 pollhead_t spw_pollhd;
128 taskq_ent_t spw_taskent;
129 short spw_pollev;
130 } sigfd_poll_waiter_t;
131
132 /*
133 * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate,
134 * and signalfd_state (including sfd_list field of members)
135 */
136 static kmutex_t signalfd_lock;
137 static dev_info_t *signalfd_devi; /* device info */
138 static id_space_t *signalfd_minor; /* minor number arena */
139 static void *signalfd_softstate; /* softstate pointer */
140 static list_t signalfd_state; /* global list of state */
141 static taskq_t *signalfd_wakeq; /* pollwake event taskq */
142
143
144 static void
145 signalfd_state_enter_locked(signalfd_state_t *state)
146 {
147 ASSERT(MUTEX_HELD(&state->sfd_lock));
148 ASSERT(state->sfd_count > 0);
149 VERIFY(state->sfd_valid == B_TRUE);
150
151 state->sfd_count++;
152 }
153
154 static void
155 signalfd_state_release(signalfd_state_t *state, boolean_t force_invalidate)
156 {
157 mutex_enter(&state->sfd_lock);
158
159 if (force_invalidate) {
160 state->sfd_valid = B_FALSE;
161 }
162
163 ASSERT(state->sfd_count > 0);
164 if (state->sfd_count == 1) {
165 VERIFY(state->sfd_valid == B_FALSE);
166 mutex_exit(&state->sfd_lock);
167 if (force_invalidate) {
168 /*
169 * The invalidation performed in signalfd_close is done
170 * while signalfd_lock is held.
171 */
172 ASSERT(MUTEX_HELD(&signalfd_lock));
173 list_remove(&signalfd_state, state);
174 } else {
175 ASSERT(MUTEX_NOT_HELD(&signalfd_lock));
176 mutex_enter(&signalfd_lock);
177 list_remove(&signalfd_state, state);
178 mutex_exit(&signalfd_lock);
179 }
180 kmem_free(state, sizeof (*state));
181 return;
182 }
183 state->sfd_count--;
184 mutex_exit(&state->sfd_lock);
185 }
186
187 static sigfd_poll_waiter_t *
188 signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state)
189 {
190 list_t *lst = &pstate->sigfd_list;
191 sigfd_poll_waiter_t *pw;
192
193 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
194 if (pw->spw_state == state)
195 break;
196 }
197
198 if (pw == NULL) {
199 pw = kmem_zalloc(sizeof (*pw), KM_SLEEP);
200
201 mutex_enter(&state->sfd_lock);
202 signalfd_state_enter_locked(state);
203 pw->spw_state = state;
204 mutex_exit(&state->sfd_lock);
205 list_insert_head(lst, pw);
206 }
207 return (pw);
208 }
209
210 static sigfd_poll_waiter_t *
211 signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state)
212 {
213 list_t *lst = &pstate->sigfd_list;
214 sigfd_poll_waiter_t *pw;
215
216 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
217 if (pw->spw_state == state) {
218 break;
219 }
220 }
221
222 if (pw != NULL) {
223 list_remove(lst, pw);
224 pw->spw_state = NULL;
225 signalfd_state_release(state, B_FALSE);
226 }
227
228 return (pw);
229 }
230
231 static void
232 signalfd_wake_list_cleanup(proc_t *p)
233 {
234 sigfd_proc_state_t *pstate = p->p_sigfd;
235 sigfd_poll_waiter_t *pw;
236 list_t *lst;
237
238 ASSERT(MUTEX_HELD(&p->p_lock));
239 ASSERT(pstate != NULL);
240
241 lst = &pstate->sigfd_list;
242 while ((pw = list_remove_head(lst)) != NULL) {
243 signalfd_state_t *state = pw->spw_state;
244
245 pw->spw_state = NULL;
246 signalfd_state_release(state, B_FALSE);
247
248 pollwakeup(&pw->spw_pollhd, POLLERR);
249 pollhead_clean(&pw->spw_pollhd);
250 kmem_free(pw, sizeof (*pw));
251 }
252 list_destroy(lst);
253
254 p->p_sigfd = NULL;
255 kmem_free(pstate, sizeof (*pstate));
256 }
257
258 static void
259 signalfd_exit_helper(void)
260 {
261 proc_t *p = curproc;
262
263 mutex_enter(&p->p_lock);
264 signalfd_wake_list_cleanup(p);
265 mutex_exit(&p->p_lock);
266 }
267
268 /*
269 * Perform pollwake for a sigfd_poll_waiter_t entry.
270 * Thanks to the strict and conflicting lock orders required for signalfd_poll
271 * (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock),
272 * this is relegated to a taskq to avoid deadlock.
273 */
274 static void
275 signalfd_wake_task(void *arg)
276 {
277 sigfd_poll_waiter_t *pw = arg;
278 signalfd_state_t *state = pw->spw_state;
279
280 pw->spw_state = NULL;
281 signalfd_state_release(state, B_FALSE);
282 pollwakeup(&pw->spw_pollhd, pw->spw_pollev);
283 pollhead_clean(&pw->spw_pollhd);
284 kmem_free(pw, sizeof (*pw));
285 }
286
287 /*
288 * Called every time a signal is delivered to the process so that we can
289 * see if any signal stream needs a pollwakeup. We maintain a list of
290 * signal state elements so that we don't have to look at every file descriptor
291 * on the process. If necessary, a further optimization would be to maintain a
292 * signal set mask that is a union of all of the sets in the list so that
293 * we don't even traverse the list if the signal is not in one of the elements.
294 * However, since the list is likely to be very short, this is not currently
295 * being done. A more complex data structure might also be used, but it is
296 * unclear what that would be since each signal set needs to be checked for a
297 * match.
298 */
299 static void
300 signalfd_pollwake_cb(void *arg0, int sig)
301 {
302 proc_t *p = (proc_t *)arg0;
303 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
304 list_t *lst;
305 sigfd_poll_waiter_t *pw;
306
307 ASSERT(MUTEX_HELD(&p->p_lock));
308 ASSERT(pstate != NULL);
309
310 lst = &pstate->sigfd_list;
311 pw = list_head(lst);
312 while (pw != NULL) {
313 signalfd_state_t *state = pw->spw_state;
314 sigfd_poll_waiter_t *next;
315
316 mutex_enter(&state->sfd_lock);
317 if (!state->sfd_valid) {
318 pw->spw_pollev = POLLERR;
319 } else if (sigismember(&state->sfd_set, sig)) {
320 pw->spw_pollev = POLLRDNORM | POLLIN;
321 } else {
322 mutex_exit(&state->sfd_lock);
323 pw = list_next(lst, pw);
324 continue;
325 }
326 mutex_exit(&state->sfd_lock);
327
328 /*
329 * Pull the sigfd_poll_waiter_t out of the list and dispatch it
330 * to perform a pollwake. This cannot be done synchronously
331 * since signalfd_poll and signalfd_pollwake_cb have
332 * conflicting lock orders which can deadlock.
333 */
334 next = list_next(lst, pw);
335 list_remove(lst, pw);
336 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, pw, 0,
337 &pw->spw_taskent);
338 pw = next;
339 }
340 }
341
342 _NOTE(ARGSUSED(1))
343 static int
344 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
345 {
346 signalfd_state_t *state, **sstate;
347 major_t major = getemajor(*devp);
348 minor_t minor = getminor(*devp);
349
350 if (minor != SIGNALFDMNRN_SIGNALFD)
351 return (ENXIO);
352
353 mutex_enter(&signalfd_lock);
354
355 minor = (minor_t)id_allocff(signalfd_minor);
356 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
357 id_free(signalfd_minor, minor);
358 mutex_exit(&signalfd_lock);
359 return (ENODEV);
360 }
361
362 state = kmem_zalloc(sizeof (*state), KM_SLEEP);
363 state->sfd_valid = B_TRUE;
364 state->sfd_count = 1;
365 list_insert_head(&signalfd_state, (void *)state);
366
367 sstate = ddi_get_soft_state(signalfd_softstate, minor);
368 *sstate = state;
369 *devp = makedevice(major, minor);
370
371 mutex_exit(&signalfd_lock);
372
373 return (0);
374 }
375
376 /*
377 * Consume one signal from our set in a manner similar to sigtimedwait().
378 * The block parameter is used to control whether we wait for a signal or
379 * return immediately if no signal is pending. We use the thread's t_sigwait
380 * member in the same way that it is used by sigtimedwait.
381 *
382 * Return 0 if we successfully consumed a signal or an errno if not.
383 */
384 static int
385 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
386 {
387 k_sigset_t oldmask;
388 kthread_t *t = curthread;
389 klwp_t *lwp = ttolwp(t);
390 proc_t *p = ttoproc(t);
391 timespec_t now;
392 timespec_t *rqtp = NULL; /* null means blocking */
393 int timecheck = 0;
394 int ret = 0;
395 k_siginfo_t info, *infop;
396 signalfd_siginfo_t ssi, *ssp = &ssi;
397
398 if (block == B_FALSE) {
399 timecheck = timechanged;
400 gethrestime(&now);
401 rqtp = &now; /* non-blocking check for pending signals */
402 }
403
404 t->t_sigwait = set;
405
406 mutex_enter(&p->p_lock);
407 /*
408 * set the thread's signal mask to unmask those signals in the
409 * specified set.
410 */
411 schedctl_finish_sigblock(t);
412 oldmask = t->t_hold;
413 sigdiffset(&t->t_hold, &t->t_sigwait);
414
415 /*
416 * Based on rqtp, wait indefinitely until we take a signal in our set
417 * or return immediately if there are no signals pending from our set.
418 */
419 while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
420 timecheck)) > 0)
421 continue;
422
423 /* Restore thread's signal mask to its previous value. */
424 t->t_hold = oldmask;
425 t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */
426
427 if (ret == -1) {
428 /* no signals pending */
429 mutex_exit(&p->p_lock);
430 sigemptyset(&t->t_sigwait);
431 return (EAGAIN); /* no signals pending */
432 }
433
434 /* Don't bother with signal if it is not in request set. */
435 if (lwp->lwp_cursig == 0 ||
436 !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
437 mutex_exit(&p->p_lock);
438 /*
439 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
440 * This happens if some other thread in this process called
441 * forkall() or exit().
442 */
443 sigemptyset(&t->t_sigwait);
444 return (EINTR);
445 }
446
447 if (lwp->lwp_curinfo) {
448 infop = &lwp->lwp_curinfo->sq_info;
449 } else {
450 infop = &info;
451 bzero(infop, sizeof (info));
452 infop->si_signo = lwp->lwp_cursig;
453 infop->si_code = SI_NOINFO;
454 }
455
456 lwp->lwp_ru.nsignals++;
457
458 DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
459 lwp->lwp_cursig = 0;
460 lwp->lwp_extsig = 0;
461 mutex_exit(&p->p_lock);
462
463 if (PROC_IS_BRANDED(p) && BROP(p)->b_sigfd_translate)
464 BROP(p)->b_sigfd_translate(infop);
465
466 /* Convert k_siginfo into external, datamodel independent, struct. */
467 bzero(ssp, sizeof (*ssp));
468 ssp->ssi_signo = infop->si_signo;
469 ssp->ssi_errno = infop->si_errno;
470 ssp->ssi_code = infop->si_code;
471 ssp->ssi_pid = infop->si_pid;
472 ssp->ssi_uid = infop->si_uid;
473 ssp->ssi_fd = infop->si_fd;
474 ssp->ssi_band = infop->si_band;
475 ssp->ssi_trapno = infop->si_trapno;
476 ssp->ssi_status = infop->si_status;
477 ssp->ssi_utime = infop->si_utime;
478 ssp->ssi_stime = infop->si_stime;
479 ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
480
481 ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
482
483 if (lwp->lwp_curinfo) {
484 siginfofree(lwp->lwp_curinfo);
485 lwp->lwp_curinfo = NULL;
486 }
487 sigemptyset(&t->t_sigwait);
488 return (ret);
489 }
490
491 /*
492 * This is similar to sigtimedwait. Based on the fd mode we may wait until a
493 * signal within our specified set is posted. We consume as many available
494 * signals within our set as we can.
495 */
496 _NOTE(ARGSUSED(2))
497 static int
498 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
499 {
500 signalfd_state_t *state, **sstate;
501 minor_t minor = getminor(dev);
502 boolean_t block = B_TRUE;
503 k_sigset_t set;
504 boolean_t got_one = B_FALSE;
505 int res;
506
507 if (uio->uio_resid < sizeof (signalfd_siginfo_t))
508 return (EINVAL);
509
510 sstate = ddi_get_soft_state(signalfd_softstate, minor);
511 state = *sstate;
512
513 if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
514 block = B_FALSE;
515
516 mutex_enter(&state->sfd_lock);
517 set = state->sfd_set;
518 mutex_exit(&state->sfd_lock);
519
520 if (sigisempty(&set))
521 return (set_errno(EINVAL));
522
523 do {
524 res = consume_signal(set, uio, block);
525
526 if (res == 0) {
527 /*
528 * After consuming one signal, do not block while
529 * trying to consume more.
530 */
531 got_one = B_TRUE;
532 block = B_FALSE;
533
534 /*
535 * Refresh the matching signal set in case it was
536 * updated during the wait.
537 */
538 mutex_enter(&state->sfd_lock);
539 set = state->sfd_set;
540 mutex_exit(&state->sfd_lock);
541 if (sigisempty(&set))
542 break;
543 }
544 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
545
546 if (got_one)
547 res = 0;
548
549 return (res);
550 }
551
552 /*
553 * If ksigset_t's were a single word, we would do:
554 * return (((p->p_sig | t->t_sig) & set) & fillset);
555 */
556 static int
557 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
558 {
559 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
560 set.__sigbits[0]) |
561 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
562 set.__sigbits[1]) |
563 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
564 set.__sigbits[2]) & FILLSET2));
565 }
566
567 _NOTE(ARGSUSED(4))
568 static int
569 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
570 struct pollhead **phpp)
571 {
572 signalfd_state_t *state, **sstate;
573 minor_t minor = getminor(dev);
574 kthread_t *t = curthread;
575 proc_t *p = ttoproc(t);
576 short revents = 0;
577
578 sstate = ddi_get_soft_state(signalfd_softstate, minor);
579 state = *sstate;
580
581 mutex_enter(&state->sfd_lock);
582
583 if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
584 revents |= POLLRDNORM | POLLIN;
585
586 mutex_exit(&state->sfd_lock);
587
588 if (!(*reventsp = revents & events) && !anyyet) {
589 sigfd_proc_state_t *pstate;
590 sigfd_poll_waiter_t *pw;
591
592 /*
593 * Enable pollwakeup handling.
594 */
595 mutex_enter(&p->p_lock);
596 if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) {
597
598 mutex_exit(&p->p_lock);
599 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
600 list_create(&pstate->sigfd_list,
601 sizeof (sigfd_poll_waiter_t),
602 offsetof(sigfd_poll_waiter_t, spw_list));
603 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
604
605 /* Check again, after blocking for the alloc. */
606 mutex_enter(&p->p_lock);
607 if (p->p_sigfd == NULL) {
608 p->p_sigfd = pstate;
609 } else {
610 /* someone beat us to it */
611 list_destroy(&pstate->sigfd_list);
612 kmem_free(pstate, sizeof (*pstate));
613 pstate = p->p_sigfd;
614 }
615 }
616
617 pw = signalfd_wake_list_add(pstate, state);
618 *phpp = &pw->spw_pollhd;
619 mutex_exit(&p->p_lock);
620 }
621
622 return (0);
623 }
624
625 _NOTE(ARGSUSED(4))
626 static int
627 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
628 {
629 signalfd_state_t *state, **sstate;
630 minor_t minor = getminor(dev);
631 sigset_t mask;
632
633 sstate = ddi_get_soft_state(signalfd_softstate, minor);
634 state = *sstate;
635
636 switch (cmd) {
637 case SIGNALFDIOC_MASK:
638 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
639 md) != 0)
640 return (set_errno(EFAULT));
641
642 mutex_enter(&state->sfd_lock);
643 sigutok(&mask, &state->sfd_set);
644 mutex_exit(&state->sfd_lock);
645
646 return (0);
647
648 default:
649 break;
650 }
651
652 return (ENOTTY);
653 }
654
655 _NOTE(ARGSUSED(1))
656 static int
657 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
658 {
659 signalfd_state_t *state, **sstate;
660 sigfd_poll_waiter_t *pw = NULL;
661 minor_t minor = getminor(dev);
662 proc_t *p = curproc;
663
664 sstate = ddi_get_soft_state(signalfd_softstate, minor);
665 state = *sstate;
666
667 /* Make sure state is removed from this proc's pollwake list. */
668 mutex_enter(&p->p_lock);
669 if (p->p_sigfd != NULL) {
670 sigfd_proc_state_t *pstate = p->p_sigfd;
671
672 pw = signalfd_wake_list_rm(pstate, state);
673 if (list_is_empty(&pstate->sigfd_list)) {
674 signalfd_wake_list_cleanup(p);
675 }
676 }
677 mutex_exit(&p->p_lock);
678
679 if (pw != NULL) {
680 pollwakeup(&pw->spw_pollhd, POLLERR);
681 pollhead_clean(&pw->spw_pollhd);
682 kmem_free(pw, sizeof (*pw));
683 }
684
685 mutex_enter(&signalfd_lock);
686
687 *sstate = NULL;
688 ddi_soft_state_free(signalfd_softstate, minor);
689 id_free(signalfd_minor, minor);
690
691 signalfd_state_release(state, B_TRUE);
692
693 mutex_exit(&signalfd_lock);
694
695 return (0);
696 }
697
698 static int
699 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
700 {
701 if (cmd != DDI_ATTACH || signalfd_devi != NULL)
702 return (DDI_FAILURE);
703
704 mutex_enter(&signalfd_lock);
705
706 signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
707 if (signalfd_minor == NULL) {
708 cmn_err(CE_WARN, "signalfd couldn't create id space");
709 mutex_exit(&signalfd_lock);
710 return (DDI_FAILURE);
711 }
712
713 if (ddi_soft_state_init(&signalfd_softstate,
714 sizeof (signalfd_state_t *), 0) != 0) {
715 cmn_err(CE_WARN, "signalfd failed to create soft state");
716 id_space_destroy(signalfd_minor);
717 mutex_exit(&signalfd_lock);
718 return (DDI_FAILURE);
719 }
720
721 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
722 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
723 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
724 ddi_soft_state_fini(&signalfd_softstate);
725 id_space_destroy(signalfd_minor);
726 mutex_exit(&signalfd_lock);
727 return (DDI_FAILURE);
728 }
729
730 ddi_report_dev(devi);
731 signalfd_devi = devi;
732
733 sigfd_exit_helper = signalfd_exit_helper;
734
735 list_create(&signalfd_state, sizeof (signalfd_state_t),
736 offsetof(signalfd_state_t, sfd_list));
737
738 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri,
739 0, INT_MAX, TASKQ_PREPOPULATE);
740
741 mutex_exit(&signalfd_lock);
742
743 return (DDI_SUCCESS);
744 }
745
746 _NOTE(ARGSUSED(0))
747 static int
748 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
749 {
750 switch (cmd) {
751 case DDI_DETACH:
752 break;
753
754 default:
755 return (DDI_FAILURE);
756 }
757
758 mutex_enter(&signalfd_lock);
759
760 if (!list_is_empty(&signalfd_state)) {
761 /*
762 * There are dangling poll waiters holding signalfd_state_t
763 * entries on the global list. Detach is not possible until
764 * they purge themselves.
765 */
766 mutex_exit(&signalfd_lock);
767 return (DDI_FAILURE);
768 }
769 list_destroy(&signalfd_state);
770
771 /*
772 * With no remaining entries in the signalfd_state list, the wake taskq
773 * should be empty with no possibility for new entries.
774 */
775 taskq_destroy(signalfd_wakeq);
776
777 id_space_destroy(signalfd_minor);
778
779 ddi_remove_minor_node(signalfd_devi, NULL);
780 signalfd_devi = NULL;
781 sigfd_exit_helper = NULL;
782
783 ddi_soft_state_fini(&signalfd_softstate);
784 mutex_exit(&signalfd_lock);
785
786 return (DDI_SUCCESS);
787 }
788
789 _NOTE(ARGSUSED(0))
790 static int
791 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
792 {
793 int error;
794
795 switch (infocmd) {
796 case DDI_INFO_DEVT2DEVINFO:
797 *result = (void *)signalfd_devi;
798 error = DDI_SUCCESS;
799 break;
800 case DDI_INFO_DEVT2INSTANCE:
801 *result = (void *)0;
802 error = DDI_SUCCESS;
803 break;
804 default:
805 error = DDI_FAILURE;
806 }
807 return (error);
808 }
809
810 static struct cb_ops signalfd_cb_ops = {
811 signalfd_open, /* open */
812 signalfd_close, /* close */
813 nulldev, /* strategy */
814 nulldev, /* print */
815 nodev, /* dump */
816 signalfd_read, /* read */
817 nodev, /* write */
818 signalfd_ioctl, /* ioctl */
819 nodev, /* devmap */
820 nodev, /* mmap */
821 nodev, /* segmap */
822 signalfd_poll, /* poll */
823 ddi_prop_op, /* cb_prop_op */
824 0, /* streamtab */
825 D_NEW | D_MP /* Driver compatibility flag */
826 };
827
828 static struct dev_ops signalfd_ops = {
829 DEVO_REV, /* devo_rev */
830 0, /* refcnt */
831 signalfd_info, /* get_dev_info */
832 nulldev, /* identify */
833 nulldev, /* probe */
834 signalfd_attach, /* attach */
835 signalfd_detach, /* detach */
836 nodev, /* reset */
837 &signalfd_cb_ops, /* driver operations */
838 NULL, /* bus operations */
839 nodev, /* dev power */
840 ddi_quiesce_not_needed, /* quiesce */
841 };
842
843 static struct modldrv modldrv = {
844 &mod_driverops, /* module type (this is a pseudo driver) */
845 "signalfd support", /* name of module */
846 &signalfd_ops, /* driver ops */
847 };
848
849 static struct modlinkage modlinkage = {
850 MODREV_1,
851 (void *)&modldrv,
852 NULL
853 };
854
855 int
856 _init(void)
857 {
858 return (mod_install(&modlinkage));
859 }
860
861 int
862 _info(struct modinfo *modinfop)
863 {
864 return (mod_info(&modlinkage, modinfop));
865 }
866
867 int
868 _fini(void)
869 {
870 return (mod_remove(&modlinkage));
871 }