Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/signalfd.c
+++ new/usr/src/uts/common/io/signalfd.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2016 Joyent, Inc.
14 14 */
15 15
16 16 /*
17 17 * Support for the signalfd facility, a Linux-borne facility for
18 18 * file descriptor-based synchronous signal consumption.
19 19 *
20 20 * As described on the signalfd(3C) man page, the general idea behind these
21 21 * file descriptors is that they can be used to synchronously consume signals
22 22 * via the read(2) syscall. While that capability already exists with the
23 23 * sigwaitinfo(3C) function, signalfd holds an advantage since it is file
24 24 * descriptor based: It is able use the event facilities (poll(2), /dev/poll,
25 25 * event ports) to notify interested parties when consumable signals arrive.
26 26 *
27 27 * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor
28 28 * will be allocated for them along with an associated signalfd_state_t struct.
29 29 * It is there where the mask of desired signals resides.
30 30 *
31 31 * Reading from the signalfd is straightforward and mimics the kernel behavior
32 32 * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
33 33 * thread's t_sig, member. During a read operation, those which match the mask
34 34 * are consumed so they are no longer pending.
35 35 *
36 36 * The poll side is more complex. Every time a signal is delivered, all of the
37 37 * signalfds on the process need to be examined in order to pollwake threads
38 38 * waiting for signal arrival.
39 39 *
40 40 * When a thread polling on a signalfd requires a pollhead, several steps must
41 41 * be taken to safely ensure the proper result. A sigfd_proc_state_t is
42 42 * created for the calling process if it does not yet exist. It is there where
43 43 * a list of sigfd_poll_waiter_t structures reside which associate pollheads to
44 44 * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a
45 45 * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the
46 46 * polled resource. If one is found, it is reused. Otherwise a new one is
47 47 * created, incrementing the refcount on the signalfd_state_t, and it is added
48 48 * to the sigfd_poll_waiter_t list.
49 49 *
50 50 * The complications imposed by fork(2) are why the pollhead is stored in the
51 51 * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t.
52 52 * More than one process can hold a reference to the signalfd at a time but
53 53 * arriving signals should wake only process-local pollers. Additionally,
54 54 * signalfd_close is called only when the last referencing fd is closed, hiding
55 55 * occurrences of preceeding threads which released their references. This
56 56 * necessitates reference counting on the signalfd_state_t so it is able to
57 57 * persist after close until all poll references have been cleansed. Doing so
58 58 * ensures that blocked pollers which hold references to the signalfd_state_t
59 59 * will be able to do clean-up after the descriptor itself has been closed.
60 60 *
61 61 * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
62 62 * is called via the pointer in sigfd_proc_state_t. It will walk over the
63 63 * sigfd_poll_waiter_t entries present in the list, searching for any
64 64 * associated with a signalfd_state_t with a matching signal mask. The
65 65 * approach of keeping the poller list in p_sigfd was chosen because a process
66 66 * is likely to use few signalfds relative to its total file descriptors. It
67 67 * reduces the work required for each received signal.
68 68 *
69 69 * When matching sigfd_poll_waiter_t entries are encountered in the poller list
70 70 * during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to
71 71 * perform the pollwake. This is due to a lock ordering conflict between
72 72 * signalfd_poll and signalfd_pollwake_cb. The former acquires
73 73 * pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc)
74 74 * reverses the order. Defering the pollwake into a taskq means it can be
75 75 * performed without proc_t`p_lock held, avoiding the deadlock.
76 76 *
77 77 * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
78 78 * will clear out on its own. Any remaining per-process state which remains
79 79 * will be cleaned up by the exit helper (signalfd_exit_helper).
80 80 *
81 81 * The structures associated with signalfd state are designed to operate
82 82 * correctly across fork, but there is one caveat that applies. Using
83 83 * fork-shared signalfd descriptors in conjuction with fork-shared caching poll
84 84 * descriptors (such as /dev/poll or event ports) will result in missed poll
85 85 * wake-ups. This is caused by the pollhead identity of signalfd descriptors
86 86 * being dependent on the process they are polled from. Because it has a
87 87 * thread-local cache, poll(2) is unaffected by this limitation.
88 88 *
89 89 * Lock ordering:
90 90 *
91 91 * 1. signalfd_lock
92 92 * 2. signalfd_state_t`sfd_lock
93 93 *
94 94 * 1. proc_t`p_lock (to walk p_sigfd)
95 95 * 2. signalfd_state_t`sfd_lock
96 96 * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0)
97 97 */
98 98
99 99 #include <sys/ddi.h>
100 100 #include <sys/sunddi.h>
101 101 #include <sys/signalfd.h>
102 102 #include <sys/conf.h>
103 103 #include <sys/sysmacros.h>
104 104 #include <sys/filio.h>
105 105 #include <sys/stat.h>
106 106 #include <sys/file.h>
107 107 #include <sys/schedctl.h>
108 108 #include <sys/id_space.h>
109 109 #include <sys/sdt.h>
110 110 #include <sys/brand.h>
111 111 #include <sys/disp.h>
112 112 #include <sys/taskq_impl.h>
113 113
114 114 typedef struct signalfd_state signalfd_state_t;
115 115
116 116 struct signalfd_state {
117 117 list_node_t sfd_list; /* node in global list */
118 118 kmutex_t sfd_lock; /* protects fields below */
119 119 uint_t sfd_count; /* ref count */
120 120 boolean_t sfd_valid; /* valid while open */
121 121 k_sigset_t sfd_set; /* signals for this fd */
122 122 };
123 123
124 124 typedef struct sigfd_poll_waiter {
125 125 list_node_t spw_list;
126 126 signalfd_state_t *spw_state;
127 127 pollhead_t spw_pollhd;
128 128 taskq_ent_t spw_taskent;
129 129 short spw_pollev;
130 130 } sigfd_poll_waiter_t;
131 131
132 132 /*
133 133 * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate,
134 134 * and signalfd_state (including sfd_list field of members)
135 135 */
136 136 static kmutex_t signalfd_lock;
137 137 static dev_info_t *signalfd_devi; /* device info */
138 138 static id_space_t *signalfd_minor; /* minor number arena */
139 139 static void *signalfd_softstate; /* softstate pointer */
140 140 static list_t signalfd_state; /* global list of state */
141 141 static taskq_t *signalfd_wakeq; /* pollwake event taskq */
142 142
143 143
144 144 static void
145 145 signalfd_state_enter_locked(signalfd_state_t *state)
146 146 {
147 147 ASSERT(MUTEX_HELD(&state->sfd_lock));
148 148 ASSERT(state->sfd_count > 0);
149 149 VERIFY(state->sfd_valid == B_TRUE);
150 150
151 151 state->sfd_count++;
152 152 }
153 153
154 154 static void
155 155 signalfd_state_release(signalfd_state_t *state, boolean_t force_invalidate)
156 156 {
157 157 mutex_enter(&state->sfd_lock);
158 158
159 159 if (force_invalidate) {
160 160 state->sfd_valid = B_FALSE;
161 161 }
162 162
163 163 ASSERT(state->sfd_count > 0);
164 164 if (state->sfd_count == 1) {
165 165 VERIFY(state->sfd_valid == B_FALSE);
166 166 mutex_exit(&state->sfd_lock);
167 167 if (force_invalidate) {
168 168 /*
169 169 * The invalidation performed in signalfd_close is done
170 170 * while signalfd_lock is held.
171 171 */
172 172 ASSERT(MUTEX_HELD(&signalfd_lock));
173 173 list_remove(&signalfd_state, state);
174 174 } else {
175 175 ASSERT(MUTEX_NOT_HELD(&signalfd_lock));
176 176 mutex_enter(&signalfd_lock);
177 177 list_remove(&signalfd_state, state);
178 178 mutex_exit(&signalfd_lock);
179 179 }
180 180 kmem_free(state, sizeof (*state));
181 181 return;
182 182 }
183 183 state->sfd_count--;
184 184 mutex_exit(&state->sfd_lock);
185 185 }
186 186
187 187 static sigfd_poll_waiter_t *
188 188 signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state)
189 189 {
190 190 list_t *lst = &pstate->sigfd_list;
191 191 sigfd_poll_waiter_t *pw;
192 192
193 193 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
194 194 if (pw->spw_state == state)
195 195 break;
196 196 }
197 197
198 198 if (pw == NULL) {
199 199 pw = kmem_zalloc(sizeof (*pw), KM_SLEEP);
200 200
201 201 mutex_enter(&state->sfd_lock);
202 202 signalfd_state_enter_locked(state);
203 203 pw->spw_state = state;
204 204 mutex_exit(&state->sfd_lock);
205 205 list_insert_head(lst, pw);
206 206 }
207 207 return (pw);
208 208 }
209 209
210 210 static sigfd_poll_waiter_t *
211 211 signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state)
212 212 {
213 213 list_t *lst = &pstate->sigfd_list;
214 214 sigfd_poll_waiter_t *pw;
215 215
216 216 for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
217 217 if (pw->spw_state == state) {
218 218 break;
219 219 }
220 220 }
221 221
222 222 if (pw != NULL) {
223 223 list_remove(lst, pw);
224 224 pw->spw_state = NULL;
225 225 signalfd_state_release(state, B_FALSE);
226 226 }
227 227
228 228 return (pw);
229 229 }
230 230
231 231 static void
232 232 signalfd_wake_list_cleanup(proc_t *p)
233 233 {
234 234 sigfd_proc_state_t *pstate = p->p_sigfd;
235 235 sigfd_poll_waiter_t *pw;
236 236 list_t *lst;
237 237
238 238 ASSERT(MUTEX_HELD(&p->p_lock));
239 239 ASSERT(pstate != NULL);
240 240
241 241 lst = &pstate->sigfd_list;
242 242 while ((pw = list_remove_head(lst)) != NULL) {
243 243 signalfd_state_t *state = pw->spw_state;
244 244
245 245 pw->spw_state = NULL;
246 246 signalfd_state_release(state, B_FALSE);
247 247
248 248 pollwakeup(&pw->spw_pollhd, POLLERR);
249 249 pollhead_clean(&pw->spw_pollhd);
250 250 kmem_free(pw, sizeof (*pw));
251 251 }
252 252 list_destroy(lst);
253 253
254 254 p->p_sigfd = NULL;
255 255 kmem_free(pstate, sizeof (*pstate));
256 256 }
257 257
258 258 static void
259 259 signalfd_exit_helper(void)
260 260 {
261 261 proc_t *p = curproc;
262 262
263 263 mutex_enter(&p->p_lock);
264 264 signalfd_wake_list_cleanup(p);
265 265 mutex_exit(&p->p_lock);
266 266 }
267 267
268 268 /*
269 269 * Perform pollwake for a sigfd_poll_waiter_t entry.
270 270 * Thanks to the strict and conflicting lock orders required for signalfd_poll
271 271 * (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock),
272 272 * this is relegated to a taskq to avoid deadlock.
273 273 */
274 274 static void
275 275 signalfd_wake_task(void *arg)
276 276 {
277 277 sigfd_poll_waiter_t *pw = arg;
278 278 signalfd_state_t *state = pw->spw_state;
279 279
280 280 pw->spw_state = NULL;
281 281 signalfd_state_release(state, B_FALSE);
282 282 pollwakeup(&pw->spw_pollhd, pw->spw_pollev);
283 283 pollhead_clean(&pw->spw_pollhd);
284 284 kmem_free(pw, sizeof (*pw));
285 285 }
286 286
287 287 /*
288 288 * Called every time a signal is delivered to the process so that we can
289 289 * see if any signal stream needs a pollwakeup. We maintain a list of
290 290 * signal state elements so that we don't have to look at every file descriptor
291 291 * on the process. If necessary, a further optimization would be to maintain a
292 292 * signal set mask that is a union of all of the sets in the list so that
293 293 * we don't even traverse the list if the signal is not in one of the elements.
294 294 * However, since the list is likely to be very short, this is not currently
295 295 * being done. A more complex data structure might also be used, but it is
296 296 * unclear what that would be since each signal set needs to be checked for a
297 297 * match.
298 298 */
299 299 static void
300 300 signalfd_pollwake_cb(void *arg0, int sig)
301 301 {
302 302 proc_t *p = (proc_t *)arg0;
303 303 sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
304 304 list_t *lst;
305 305 sigfd_poll_waiter_t *pw;
306 306
307 307 ASSERT(MUTEX_HELD(&p->p_lock));
308 308 ASSERT(pstate != NULL);
309 309
310 310 lst = &pstate->sigfd_list;
311 311 pw = list_head(lst);
312 312 while (pw != NULL) {
313 313 signalfd_state_t *state = pw->spw_state;
314 314 sigfd_poll_waiter_t *next;
315 315
316 316 mutex_enter(&state->sfd_lock);
317 317 if (!state->sfd_valid) {
318 318 pw->spw_pollev = POLLERR;
319 319 } else if (sigismember(&state->sfd_set, sig)) {
320 320 pw->spw_pollev = POLLRDNORM | POLLIN;
321 321 } else {
322 322 mutex_exit(&state->sfd_lock);
323 323 pw = list_next(lst, pw);
324 324 continue;
325 325 }
326 326 mutex_exit(&state->sfd_lock);
327 327
328 328 /*
329 329 * Pull the sigfd_poll_waiter_t out of the list and dispatch it
330 330 * to perform a pollwake. This cannot be done synchronously
331 331 * since signalfd_poll and signalfd_pollwake_cb have
332 332 * conflicting lock orders which can deadlock.
333 333 */
334 334 next = list_next(lst, pw);
335 335 list_remove(lst, pw);
336 336 taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, pw, 0,
337 337 &pw->spw_taskent);
338 338 pw = next;
339 339 }
340 340 }
341 341
342 342 _NOTE(ARGSUSED(1))
343 343 static int
344 344 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
345 345 {
346 346 signalfd_state_t *state, **sstate;
347 347 major_t major = getemajor(*devp);
348 348 minor_t minor = getminor(*devp);
349 349
350 350 if (minor != SIGNALFDMNRN_SIGNALFD)
351 351 return (ENXIO);
352 352
353 353 mutex_enter(&signalfd_lock);
354 354
355 355 minor = (minor_t)id_allocff(signalfd_minor);
356 356 if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
357 357 id_free(signalfd_minor, minor);
358 358 mutex_exit(&signalfd_lock);
359 359 return (ENODEV);
360 360 }
361 361
362 362 state = kmem_zalloc(sizeof (*state), KM_SLEEP);
363 363 state->sfd_valid = B_TRUE;
364 364 state->sfd_count = 1;
365 365 list_insert_head(&signalfd_state, (void *)state);
366 366
367 367 sstate = ddi_get_soft_state(signalfd_softstate, minor);
368 368 *sstate = state;
369 369 *devp = makedevice(major, minor);
370 370
371 371 mutex_exit(&signalfd_lock);
372 372
373 373 return (0);
374 374 }
375 375
376 376 /*
377 377 * Consume one signal from our set in a manner similar to sigtimedwait().
378 378 * The block parameter is used to control whether we wait for a signal or
379 379 * return immediately if no signal is pending. We use the thread's t_sigwait
380 380 * member in the same way that it is used by sigtimedwait.
381 381 *
382 382 * Return 0 if we successfully consumed a signal or an errno if not.
383 383 */
384 384 static int
385 385 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
386 386 {
387 387 k_sigset_t oldmask;
388 388 kthread_t *t = curthread;
389 389 klwp_t *lwp = ttolwp(t);
390 390 proc_t *p = ttoproc(t);
391 391 timespec_t now;
392 392 timespec_t *rqtp = NULL; /* null means blocking */
393 393 int timecheck = 0;
394 394 int ret = 0;
395 395 k_siginfo_t info, *infop;
396 396 signalfd_siginfo_t ssi, *ssp = &ssi;
397 397
398 398 if (block == B_FALSE) {
399 399 timecheck = timechanged;
400 400 gethrestime(&now);
401 401 rqtp = &now; /* non-blocking check for pending signals */
402 402 }
403 403
404 404 t->t_sigwait = set;
405 405
406 406 mutex_enter(&p->p_lock);
407 407 /*
408 408 * set the thread's signal mask to unmask those signals in the
409 409 * specified set.
410 410 */
411 411 schedctl_finish_sigblock(t);
412 412 oldmask = t->t_hold;
413 413 sigdiffset(&t->t_hold, &t->t_sigwait);
414 414
415 415 /*
416 416 * Based on rqtp, wait indefinitely until we take a signal in our set
417 417 * or return immediately if there are no signals pending from our set.
418 418 */
419 419 while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
420 420 timecheck)) > 0)
421 421 continue;
422 422
423 423 /* Restore thread's signal mask to its previous value. */
424 424 t->t_hold = oldmask;
425 425 t->t_sig_check = 1; /* so post_syscall sees new t_hold mask */
426 426
427 427 if (ret == -1) {
428 428 /* no signals pending */
429 429 mutex_exit(&p->p_lock);
430 430 sigemptyset(&t->t_sigwait);
431 431 return (EAGAIN); /* no signals pending */
432 432 }
433 433
434 434 /* Don't bother with signal if it is not in request set. */
435 435 if (lwp->lwp_cursig == 0 ||
436 436 !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
437 437 mutex_exit(&p->p_lock);
438 438 /*
439 439 * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
440 440 * This happens if some other thread in this process called
441 441 * forkall() or exit().
442 442 */
443 443 sigemptyset(&t->t_sigwait);
444 444 return (EINTR);
445 445 }
446 446
447 447 if (lwp->lwp_curinfo) {
448 448 infop = &lwp->lwp_curinfo->sq_info;
449 449 } else {
450 450 infop = &info;
451 451 bzero(infop, sizeof (info));
452 452 infop->si_signo = lwp->lwp_cursig;
453 453 infop->si_code = SI_NOINFO;
454 454 }
455 455
456 456 lwp->lwp_ru.nsignals++;
457 457
458 458 DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
459 459 lwp->lwp_cursig = 0;
460 460 lwp->lwp_extsig = 0;
461 461 mutex_exit(&p->p_lock);
462 462
463 463 if (PROC_IS_BRANDED(p) && BROP(p)->b_sigfd_translate)
464 464 BROP(p)->b_sigfd_translate(infop);
465 465
466 466 /* Convert k_siginfo into external, datamodel independent, struct. */
467 467 bzero(ssp, sizeof (*ssp));
468 468 ssp->ssi_signo = infop->si_signo;
469 469 ssp->ssi_errno = infop->si_errno;
470 470 ssp->ssi_code = infop->si_code;
471 471 ssp->ssi_pid = infop->si_pid;
472 472 ssp->ssi_uid = infop->si_uid;
473 473 ssp->ssi_fd = infop->si_fd;
474 474 ssp->ssi_band = infop->si_band;
475 475 ssp->ssi_trapno = infop->si_trapno;
476 476 ssp->ssi_status = infop->si_status;
477 477 ssp->ssi_utime = infop->si_utime;
478 478 ssp->ssi_stime = infop->si_stime;
479 479 ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
480 480
481 481 ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
482 482
483 483 if (lwp->lwp_curinfo) {
484 484 siginfofree(lwp->lwp_curinfo);
485 485 lwp->lwp_curinfo = NULL;
486 486 }
487 487 sigemptyset(&t->t_sigwait);
488 488 return (ret);
489 489 }
490 490
491 491 /*
492 492 * This is similar to sigtimedwait. Based on the fd mode we may wait until a
493 493 * signal within our specified set is posted. We consume as many available
494 494 * signals within our set as we can.
495 495 */
496 496 _NOTE(ARGSUSED(2))
497 497 static int
498 498 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
499 499 {
500 500 signalfd_state_t *state, **sstate;
501 501 minor_t minor = getminor(dev);
502 502 boolean_t block = B_TRUE;
503 503 k_sigset_t set;
504 504 boolean_t got_one = B_FALSE;
505 505 int res;
506 506
507 507 if (uio->uio_resid < sizeof (signalfd_siginfo_t))
508 508 return (EINVAL);
509 509
510 510 sstate = ddi_get_soft_state(signalfd_softstate, minor);
511 511 state = *sstate;
512 512
513 513 if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
514 514 block = B_FALSE;
515 515
516 516 mutex_enter(&state->sfd_lock);
517 517 set = state->sfd_set;
518 518 mutex_exit(&state->sfd_lock);
519 519
520 520 if (sigisempty(&set))
521 521 return (set_errno(EINVAL));
522 522
523 523 do {
524 524 res = consume_signal(set, uio, block);
525 525
526 526 if (res == 0) {
527 527 /*
528 528 * After consuming one signal, do not block while
529 529 * trying to consume more.
530 530 */
531 531 got_one = B_TRUE;
532 532 block = B_FALSE;
533 533
534 534 /*
535 535 * Refresh the matching signal set in case it was
536 536 * updated during the wait.
537 537 */
538 538 mutex_enter(&state->sfd_lock);
539 539 set = state->sfd_set;
540 540 mutex_exit(&state->sfd_lock);
541 541 if (sigisempty(&set))
542 542 break;
543 543 }
544 544 } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
545 545
546 546 if (got_one)
547 547 res = 0;
548 548
549 549 return (res);
550 550 }
551 551
552 552 /*
553 553 * If ksigset_t's were a single word, we would do:
554 554 * return (((p->p_sig | t->t_sig) & set) & fillset);
555 555 */
556 556 static int
557 557 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
558 558 {
559 559 return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
560 560 set.__sigbits[0]) |
561 561 ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
562 562 set.__sigbits[1]) |
563 563 (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
564 564 set.__sigbits[2]) & FILLSET2));
565 565 }
566 566
567 567 _NOTE(ARGSUSED(4))
568 568 static int
569 569 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
570 570 struct pollhead **phpp)
571 571 {
572 572 signalfd_state_t *state, **sstate;
573 573 minor_t minor = getminor(dev);
574 574 kthread_t *t = curthread;
575 575 proc_t *p = ttoproc(t);
576 576 short revents = 0;
577 577
578 578 sstate = ddi_get_soft_state(signalfd_softstate, minor);
579 579 state = *sstate;
580 580
581 581 mutex_enter(&state->sfd_lock);
582 582
583 583 if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
584 584 revents |= POLLRDNORM | POLLIN;
585 585
586 586 mutex_exit(&state->sfd_lock);
587 587
588 588 if (!(*reventsp = revents & events) && !anyyet) {
589 589 sigfd_proc_state_t *pstate;
590 590 sigfd_poll_waiter_t *pw;
591 591
592 592 /*
593 593 * Enable pollwakeup handling.
594 594 */
595 595 mutex_enter(&p->p_lock);
596 596 if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) {
597 597
598 598 mutex_exit(&p->p_lock);
599 599 pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
600 600 list_create(&pstate->sigfd_list,
601 601 sizeof (sigfd_poll_waiter_t),
602 602 offsetof(sigfd_poll_waiter_t, spw_list));
603 603 pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
604 604
605 605 /* Check again, after blocking for the alloc. */
606 606 mutex_enter(&p->p_lock);
607 607 if (p->p_sigfd == NULL) {
608 608 p->p_sigfd = pstate;
609 609 } else {
610 610 /* someone beat us to it */
611 611 list_destroy(&pstate->sigfd_list);
612 612 kmem_free(pstate, sizeof (*pstate));
613 613 pstate = p->p_sigfd;
614 614 }
615 615 }
616 616
617 617 pw = signalfd_wake_list_add(pstate, state);
618 618 *phpp = &pw->spw_pollhd;
619 619 mutex_exit(&p->p_lock);
620 620 }
621 621
622 622 return (0);
623 623 }
624 624
625 625 _NOTE(ARGSUSED(4))
626 626 static int
627 627 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
628 628 {
629 629 signalfd_state_t *state, **sstate;
630 630 minor_t minor = getminor(dev);
631 631 sigset_t mask;
632 632
633 633 sstate = ddi_get_soft_state(signalfd_softstate, minor);
634 634 state = *sstate;
635 635
636 636 switch (cmd) {
637 637 case SIGNALFDIOC_MASK:
638 638 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
639 639 md) != 0)
640 640 return (set_errno(EFAULT));
641 641
642 642 mutex_enter(&state->sfd_lock);
643 643 sigutok(&mask, &state->sfd_set);
644 644 mutex_exit(&state->sfd_lock);
645 645
646 646 return (0);
647 647
648 648 default:
649 649 break;
650 650 }
651 651
652 652 return (ENOTTY);
653 653 }
654 654
655 655 _NOTE(ARGSUSED(1))
656 656 static int
657 657 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
658 658 {
659 659 signalfd_state_t *state, **sstate;
660 660 sigfd_poll_waiter_t *pw = NULL;
661 661 minor_t minor = getminor(dev);
662 662 proc_t *p = curproc;
663 663
664 664 sstate = ddi_get_soft_state(signalfd_softstate, minor);
665 665 state = *sstate;
666 666
667 667 /* Make sure state is removed from this proc's pollwake list. */
668 668 mutex_enter(&p->p_lock);
669 669 if (p->p_sigfd != NULL) {
670 670 sigfd_proc_state_t *pstate = p->p_sigfd;
671 671
672 672 pw = signalfd_wake_list_rm(pstate, state);
673 673 if (list_is_empty(&pstate->sigfd_list)) {
674 674 signalfd_wake_list_cleanup(p);
675 675 }
676 676 }
677 677 mutex_exit(&p->p_lock);
678 678
679 679 if (pw != NULL) {
680 680 pollwakeup(&pw->spw_pollhd, POLLERR);
681 681 pollhead_clean(&pw->spw_pollhd);
682 682 kmem_free(pw, sizeof (*pw));
683 683 }
684 684
685 685 mutex_enter(&signalfd_lock);
686 686
687 687 *sstate = NULL;
688 688 ddi_soft_state_free(signalfd_softstate, minor);
689 689 id_free(signalfd_minor, minor);
690 690
691 691 signalfd_state_release(state, B_TRUE);
692 692
693 693 mutex_exit(&signalfd_lock);
694 694
695 695 return (0);
696 696 }
697 697
698 698 static int
699 699 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
700 700 {
701 701 if (cmd != DDI_ATTACH || signalfd_devi != NULL)
702 702 return (DDI_FAILURE);
703 703
704 704 mutex_enter(&signalfd_lock);
705 705
706 706 signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
707 707 if (signalfd_minor == NULL) {
708 708 cmn_err(CE_WARN, "signalfd couldn't create id space");
709 709 mutex_exit(&signalfd_lock);
710 710 return (DDI_FAILURE);
711 711 }
712 712
713 713 if (ddi_soft_state_init(&signalfd_softstate,
714 714 sizeof (signalfd_state_t *), 0) != 0) {
715 715 cmn_err(CE_WARN, "signalfd failed to create soft state");
716 716 id_space_destroy(signalfd_minor);
717 717 mutex_exit(&signalfd_lock);
718 718 return (DDI_FAILURE);
719 719 }
720 720
721 721 if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
722 722 SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
723 723 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
724 724 ddi_soft_state_fini(&signalfd_softstate);
725 725 id_space_destroy(signalfd_minor);
726 726 mutex_exit(&signalfd_lock);
727 727 return (DDI_FAILURE);
728 728 }
729 729
730 730 ddi_report_dev(devi);
731 731 signalfd_devi = devi;
732 732
733 733 sigfd_exit_helper = signalfd_exit_helper;
734 734
735 735 list_create(&signalfd_state, sizeof (signalfd_state_t),
736 736 offsetof(signalfd_state_t, sfd_list));
737 737
738 738 signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri,
739 739 0, INT_MAX, TASKQ_PREPOPULATE);
740 740
741 741 mutex_exit(&signalfd_lock);
742 742
743 743 return (DDI_SUCCESS);
744 744 }
745 745
746 746 _NOTE(ARGSUSED(0))
747 747 static int
748 748 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
749 749 {
750 750 switch (cmd) {
751 751 case DDI_DETACH:
752 752 break;
753 753
754 754 default:
755 755 return (DDI_FAILURE);
756 756 }
757 757
758 758 mutex_enter(&signalfd_lock);
759 759
760 760 if (!list_is_empty(&signalfd_state)) {
761 761 /*
762 762 * There are dangling poll waiters holding signalfd_state_t
763 763 * entries on the global list. Detach is not possible until
764 764 * they purge themselves.
765 765 */
766 766 mutex_exit(&signalfd_lock);
767 767 return (DDI_FAILURE);
768 768 }
769 769 list_destroy(&signalfd_state);
770 770
771 771 /*
772 772 * With no remaining entries in the signalfd_state list, the wake taskq
773 773 * should be empty with no possibility for new entries.
774 774 */
775 775 taskq_destroy(signalfd_wakeq);
776 776
777 777 id_space_destroy(signalfd_minor);
778 778
779 779 ddi_remove_minor_node(signalfd_devi, NULL);
780 780 signalfd_devi = NULL;
781 781 sigfd_exit_helper = NULL;
782 782
783 783 ddi_soft_state_fini(&signalfd_softstate);
784 784 mutex_exit(&signalfd_lock);
785 785
786 786 return (DDI_SUCCESS);
787 787 }
788 788
789 789 _NOTE(ARGSUSED(0))
790 790 static int
791 791 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
792 792 {
793 793 int error;
794 794
795 795 switch (infocmd) {
796 796 case DDI_INFO_DEVT2DEVINFO:
797 797 *result = (void *)signalfd_devi;
798 798 error = DDI_SUCCESS;
799 799 break;
800 800 case DDI_INFO_DEVT2INSTANCE:
801 801 *result = (void *)0;
802 802 error = DDI_SUCCESS;
803 803 break;
804 804 default:
805 805 error = DDI_FAILURE;
806 806 }
807 807 return (error);
808 808 }
809 809
810 810 static struct cb_ops signalfd_cb_ops = {
811 811 signalfd_open, /* open */
812 812 signalfd_close, /* close */
813 813 nulldev, /* strategy */
814 814 nulldev, /* print */
815 815 nodev, /* dump */
816 816 signalfd_read, /* read */
817 817 nodev, /* write */
818 818 signalfd_ioctl, /* ioctl */
819 819 nodev, /* devmap */
820 820 nodev, /* mmap */
821 821 nodev, /* segmap */
822 822 signalfd_poll, /* poll */
823 823 ddi_prop_op, /* cb_prop_op */
824 824 0, /* streamtab */
825 825 D_NEW | D_MP /* Driver compatibility flag */
826 826 };
827 827
828 828 static struct dev_ops signalfd_ops = {
829 829 DEVO_REV, /* devo_rev */
830 830 0, /* refcnt */
831 831 signalfd_info, /* get_dev_info */
832 832 nulldev, /* identify */
833 833 nulldev, /* probe */
834 834 signalfd_attach, /* attach */
835 835 signalfd_detach, /* detach */
836 836 nodev, /* reset */
837 837 &signalfd_cb_ops, /* driver operations */
838 838 NULL, /* bus operations */
839 839 nodev, /* dev power */
840 840 ddi_quiesce_not_needed, /* quiesce */
841 841 };
842 842
843 843 static struct modldrv modldrv = {
844 844 &mod_driverops, /* module type (this is a pseudo driver) */
845 845 "signalfd support", /* name of module */
846 846 &signalfd_ops, /* driver ops */
847 847 };
848 848
849 849 static struct modlinkage modlinkage = {
850 850 MODREV_1,
851 851 (void *)&modldrv,
852 852 NULL
853 853 };
854 854
855 855 int
856 856 _init(void)
857 857 {
858 858 return (mod_install(&modlinkage));
859 859 }
860 860
861 861 int
862 862 _info(struct modinfo *modinfop)
863 863 {
864 864 return (mod_info(&modlinkage, modinfop));
865 865 }
866 866
867 867 int
868 868 _fini(void)
869 869 {
870 870 return (mod_remove(&modlinkage));
871 871 }
|
↓ open down ↓ |
871 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX