8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 * Copyright 2016 Joyent, Inc.
29 */
30
31 #include <sys/types.h>
32 #include <sys/devops.h>
33 #include <sys/conf.h>
34 #include <sys/modctl.h>
35 #include <sys/sunddi.h>
36 #include <sys/stat.h>
37 #include <sys/poll_impl.h>
38 #include <sys/errno.h>
39 #include <sys/kmem.h>
40 #include <sys/mkdev.h>
41 #include <sys/debug.h>
42 #include <sys/file.h>
43 #include <sys/sysmacros.h>
44 #include <sys/systm.h>
45 #include <sys/bitmap.h>
46 #include <sys/devpoll.h>
47 #include <sys/rctl.h>
48 #include <sys/resource.h>
349 * perform the implicit removal to remain
350 * closer to the epoll semantics.
351 */
352 if (is_epoll) {
353 pdp->pd_fp = NULL;
354 pdp->pd_events = 0;
355
356 if (pdp->pd_php != NULL) {
357 pollhead_delete(pdp->pd_php,
358 pdp);
359 pdp->pd_php = NULL;
360 }
361
362 BT_CLEAR(pcp->pc_bitmap, fd);
363 continue;
364 }
365 }
366
367 if (fp != pdp->pd_fp) {
368 /*
369 * user is polling on a cached fd which was
370 * closed and then reused. Unfortunately
371 * there is no good way to inform user.
372 * If the file struct is also reused, we
373 * may not be able to detect the fd reuse
374 * at all. As long as this does not
375 * cause system failure and/or memory leak,
376 * we will play along. Man page states if
377 * user does not clean up closed fds, polling
378 * results will be indeterministic.
379 *
380 * XXX - perhaps log the detection of fd
381 * reuse?
382 */
383 pdp->pd_fp = fp;
384 }
385 /*
386 * XXX - pollrelock() logic needs to know which
387 * which pollcache lock to grab. It'd be a
388 * cleaner solution if we could pass pcp as
389 * an arguement in VOP_POLL interface instead
390 * of implicitly passing it using thread_t
391 * struct. On the other hand, changing VOP_POLL
392 * interface will require all driver/file system
393 * poll routine to change. May want to revisit
394 * the tradeoff later.
395 */
396 curthread->t_pollcache = pcp;
397 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0,
398 &revent, &php, NULL);
399 curthread->t_pollcache = NULL;
400 releasef(fd);
401 if (error != 0) {
402 break;
403 }
404
405 /*
406 * layered devices (e.g. console driver)
407 * may change the vnode and thus the pollhead
408 * pointer out from underneath us.
409 */
410 if (php != NULL && pdp->pd_php != NULL &&
411 php != pdp->pd_php) {
412 pollhead_delete(pdp->pd_php, pdp);
413 pdp->pd_php = php;
414 pollhead_insert(php, pdp);
415 /*
416 * The bit should still be set.
417 */
418 ASSERT(BT_TEST(pcp->pc_bitmap, fd));
419 goto retry;
420 }
421
422 if (revent != 0) {
423 if (pfdp != NULL) {
424 pfdp[fdcnt].fd = fd;
425 pfdp[fdcnt].events = pdp->pd_events;
426 pfdp[fdcnt].revents = revent;
427 } else if (epoll != NULL) {
428 epoll_event_t *ep = &epoll[fdcnt];
429
430 ASSERT(epoll != NULL);
431 ep->data.u64 = pdp->pd_epolldata;
432
433 /*
434 * If any of the event bits are set for
435 * which poll and epoll representations
436 * differ, swizzle in the native epoll
437 * values.
438 */
439 if (revent & mask) {
440 ep->events = (revent & ~mask) |
441 ((revent & POLLRDHUP) ?
442 EPOLLRDHUP : 0) |
443 ((revent & POLLWRBAND) ?
444 EPOLLWRBAND : 0);
445 } else {
446 ep->events = revent;
447 }
448
449 /*
450 * We define POLLWRNORM to be POLLOUT,
451 * but epoll has separate definitions
452 * for them; if POLLOUT is set and the
453 * user has asked for EPOLLWRNORM, set
471
472 /*
473 * If a call to pollunlock() fails
474 * during VOP_POLL, skip over the fd
475 * and continue polling.
476 *
477 * Otherwise, report that there is an
478 * event pending.
479 */
480 if ((ps->ps_flags & POLLSTATE_ULFAIL)
481 != 0) {
482 ps->ps_flags &=
483 ~POLLSTATE_ULFAIL;
484 continue;
485 } else {
486 fdcnt++;
487 break;
488 }
489 }
490
491 /*
492 * If POLLET is set, clear the bit in the
493 * bitmap -- which effectively latches the
494 * edge on a pollwakeup() from the driver.
495 */
496 if (pdp->pd_events & POLLET)
497 BT_CLEAR(pcp->pc_bitmap, fd);
498
499 /*
500 * If POLLONESHOT is set, perform the implicit
501 * POLLREMOVE.
502 */
503 if (pdp->pd_events & POLLONESHOT) {
504 pdp->pd_fp = NULL;
505 pdp->pd_events = 0;
506
507 if (pdp->pd_php != NULL) {
508 pollhead_delete(pdp->pd_php,
509 pdp);
510 pdp->pd_php = NULL;
511 }
512
513 BT_CLEAR(pcp->pc_bitmap, fd);
514 }
515
516 fdcnt++;
517 } else if (php != NULL) {
518 /*
519 * We clear a bit or cache a poll fd if
520 * the driver returns a poll head ptr,
521 * which is expected in the case of 0
522 * revents. Some buggy driver may return
523 * NULL php pointer with 0 revents. In
524 * this case, we just treat the driver as
525 * "noncachable" and not clearing the bit
526 * in bitmap.
527 */
528 if ((pdp->pd_php != NULL) &&
529 ((pcp->pc_flag & PC_POLLWAKE) == 0)) {
530 BT_CLEAR(pcp->pc_bitmap, fd);
531 }
532 if (pdp->pd_php == NULL) {
533 pollhead_insert(php, pdp);
534 pdp->pd_php = php;
535 /*
900 continue;
901 }
902
903 /*
904 * do VOP_POLL and cache this poll fd.
905 */
906 /*
907 * XXX - pollrelock() logic needs to know which
908 * which pollcache lock to grab. It'd be a
909 * cleaner solution if we could pass pcp as
910 * an arguement in VOP_POLL interface instead
911 * of implicitly passing it using thread_t
912 * struct. On the other hand, changing VOP_POLL
913 * interface will require all driver/file system
914 * poll routine to change. May want to revisit
915 * the tradeoff later.
916 */
917 curthread->t_pollcache = pcp;
918 error = VOP_POLL(fp->f_vnode, pfdp->events, 0,
919 &pfdp->revents, &php, NULL);
920 curthread->t_pollcache = NULL;
921 /*
922 * We always set the bit when this fd is cached;
923 * this forces the first DP_POLL to poll this fd.
924 * Real performance gain comes from subsequent
925 * DP_POLL. We also attempt a pollhead_insert();
926 * if it's not possible, we'll do it in dpioctl().
927 */
928 BT_SET(pcp->pc_bitmap, fd);
929 if (error != 0) {
930 releasef(fd);
931 break;
932 }
933 pdp->pd_fp = fp;
934 pdp->pd_events |= pfdp->events;
935 if (php != NULL) {
936 if (pdp->pd_php == NULL) {
937 pollhead_insert(php, pdp);
938 pdp->pd_php = php;
939 } else {
1441 mutex_exit(&devpoll_lock);
1442
1443 mutex_enter(&dpep->dpe_lock);
1444 if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) == 0) {
1445 /* Poll recursion is not yet supported for non-epoll handles */
1446 *reventsp = POLLERR;
1447 mutex_exit(&dpep->dpe_lock);
1448 return (0);
1449 } else {
1450 dpep->dpe_refcnt++;
1451 pcp = dpep->dpe_pcache;
1452 mutex_exit(&dpep->dpe_lock);
1453 }
1454
1455 res = pollstate_enter(pcp);
1456 if (res == PSE_SUCCESS) {
1457 nfds_t nfds = 1;
1458 int fdcnt = 0;
1459 pollstate_t *ps = curthread->t_pollstate;
1460
1461 rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt);
1462 if (rc == 0) {
1463 *reventsp = (fdcnt > 0) ? POLLIN : 0;
1464 }
1465 pcachelink_assoc(pcp, ps->ps_pc_stack[0]);
1466 pollstate_exit(pcp);
1467 } else {
1468 switch (res) {
1469 case PSE_FAIL_DEPTH:
1470 rc = EINVAL;
1471 break;
1472 case PSE_FAIL_LOOP:
1473 case PSE_FAIL_DEADLOCK:
1474 rc = ELOOP;
1475 break;
1476 default:
1477 /*
1478 * If anything else has gone awry, such as being polled
1479 * from an unexpected context, fall back to the
1480 * recursion-intolerant response.
1481 */
1482 *reventsp = POLLERR;
1483 rc = 0;
1484 break;
|
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 * Copyright 2017 Joyent, Inc.
29 */
30
31 #include <sys/types.h>
32 #include <sys/devops.h>
33 #include <sys/conf.h>
34 #include <sys/modctl.h>
35 #include <sys/sunddi.h>
36 #include <sys/stat.h>
37 #include <sys/poll_impl.h>
38 #include <sys/errno.h>
39 #include <sys/kmem.h>
40 #include <sys/mkdev.h>
41 #include <sys/debug.h>
42 #include <sys/file.h>
43 #include <sys/sysmacros.h>
44 #include <sys/systm.h>
45 #include <sys/bitmap.h>
46 #include <sys/devpoll.h>
47 #include <sys/rctl.h>
48 #include <sys/resource.h>
349 * perform the implicit removal to remain
350 * closer to the epoll semantics.
351 */
352 if (is_epoll) {
353 pdp->pd_fp = NULL;
354 pdp->pd_events = 0;
355
356 if (pdp->pd_php != NULL) {
357 pollhead_delete(pdp->pd_php,
358 pdp);
359 pdp->pd_php = NULL;
360 }
361
362 BT_CLEAR(pcp->pc_bitmap, fd);
363 continue;
364 }
365 }
366
367 if (fp != pdp->pd_fp) {
368 /*
369 * The user is polling on a cached fd which was
370 * closed and then reused. Unfortunately there
371 * is no good way to communicate this fact to
372 * the consumer.
373 *
374 * If the file struct is also reused, we may
375 * not be able to detect the fd reuse at all.
376 * As long as this does not cause system
377 * failure and/or memory leaks, we will play
378 * along. The man page states that if the user
379 * does not clean up closed fds, polling
380 * results will be indeterministic.
381 *
382 * XXX: perhaps log the detection of fd reuse?
383 */
384 pdp->pd_fp = fp;
385
386 /*
387 * When this situation has been detected, it's
388 * likely that any existing pollhead is
389 * ill-suited to perform proper wake-ups.
390 *
391 * Clean up the old entry under the expectation
392 * that a valid one will be provided as part of
393 * the later VOP_POLL.
394 */
395 if (pdp->pd_php != NULL) {
396 pollhead_delete(pdp->pd_php, pdp);
397 pdp->pd_php = NULL;
398 }
399 }
400 /*
401 * XXX - pollrelock() logic needs to know which
402 * which pollcache lock to grab. It'd be a
403 * cleaner solution if we could pass pcp as
404 * an arguement in VOP_POLL interface instead
405 * of implicitly passing it using thread_t
406 * struct. On the other hand, changing VOP_POLL
407 * interface will require all driver/file system
408 * poll routine to change. May want to revisit
409 * the tradeoff later.
410 */
411 curthread->t_pollcache = pcp;
412 error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0,
413 &revent, &php, NULL);
414
415 /*
416 * Recheck edge-triggered descriptors which lack a
417 * pollhead. While this check is performed when an fd
418 * is added to the pollcache in dpwrite(), subsequent
419 * descriptor manipulation could cause a different
420 * resource to be present now.
421 */
422 if ((pdp->pd_events & POLLET) && error == 0 &&
423 pdp->pd_php == NULL && php == NULL && revent != 0) {
424 short levent = 0;
425
426 /*
427 * The same POLLET-only VOP_POLL is used in an
428 * attempt to coax a pollhead from older
429 * driver logic.
430 */
431 error = VOP_POLL(fp->f_vnode, POLLET,
432 0, &levent, &php, NULL);
433 }
434
435 curthread->t_pollcache = NULL;
436 releasef(fd);
437 if (error != 0) {
438 break;
439 }
440
441 /*
442 * layered devices (e.g. console driver)
443 * may change the vnode and thus the pollhead
444 * pointer out from underneath us.
445 */
446 if (php != NULL && pdp->pd_php != NULL &&
447 php != pdp->pd_php) {
448 pollhead_delete(pdp->pd_php, pdp);
449 pdp->pd_php = php;
450 pollhead_insert(php, pdp);
451 /*
452 * The bit should still be set.
453 */
454 ASSERT(BT_TEST(pcp->pc_bitmap, fd));
455 goto retry;
456 }
457
458 if (revent != 0) {
459 if (pfdp != NULL) {
460 pfdp[fdcnt].fd = fd;
461 pfdp[fdcnt].events = pdp->pd_events;
462 pfdp[fdcnt].revents = revent;
463 } else if (epoll != NULL) {
464 epoll_event_t *ep = &epoll[fdcnt];
465
466 ASSERT(epoll != NULL);
467 ep->data.u64 = pdp->pd_epolldata;
468
469 /*
470 * Since POLLNVAL is a legal event for
471 * VOP_POLL handlers to emit, it must
472 * be translated epoll-legal.
473 */
474 if (revent & POLLNVAL) {
475 revent &= ~POLLNVAL;
476 revent |= POLLERR;
477 }
478
479 /*
480 * If any of the event bits are set for
481 * which poll and epoll representations
482 * differ, swizzle in the native epoll
483 * values.
484 */
485 if (revent & mask) {
486 ep->events = (revent & ~mask) |
487 ((revent & POLLRDHUP) ?
488 EPOLLRDHUP : 0) |
489 ((revent & POLLWRBAND) ?
490 EPOLLWRBAND : 0);
491 } else {
492 ep->events = revent;
493 }
494
495 /*
496 * We define POLLWRNORM to be POLLOUT,
497 * but epoll has separate definitions
498 * for them; if POLLOUT is set and the
499 * user has asked for EPOLLWRNORM, set
517
518 /*
519 * If a call to pollunlock() fails
520 * during VOP_POLL, skip over the fd
521 * and continue polling.
522 *
523 * Otherwise, report that there is an
524 * event pending.
525 */
526 if ((ps->ps_flags & POLLSTATE_ULFAIL)
527 != 0) {
528 ps->ps_flags &=
529 ~POLLSTATE_ULFAIL;
530 continue;
531 } else {
532 fdcnt++;
533 break;
534 }
535 }
536
537 /* Handle special polling modes. */
538 if (pdp->pd_events & POLLONESHOT) {
539 /*
540 * If POLLONESHOT is set, perform the
541 * implicit POLLREMOVE.
542 */
543 pdp->pd_fp = NULL;
544 pdp->pd_events = 0;
545
546 if (pdp->pd_php != NULL) {
547 pollhead_delete(pdp->pd_php,
548 pdp);
549 pdp->pd_php = NULL;
550 }
551
552 BT_CLEAR(pcp->pc_bitmap, fd);
553 } else if (pdp->pd_events & POLLET) {
554 /*
555 * Wire up the pollhead which should
556 * have been provided. Edge-triggered
557 * polling cannot function properly
558 * with drivers which do not emit one.
559 */
560 if (php != NULL &&
561 pdp->pd_php == NULL) {
562 pollhead_insert(php, pdp);
563 pdp->pd_php = php;
564 }
565
566 /*
567 * If the driver has emitted a pollhead,
568 * clear the bit in the bitmap which
569 * effectively latches the edge on a
570 * pollwakeup() from the driver.
571 */
572 if (pdp->pd_php != NULL) {
573 BT_CLEAR(pcp->pc_bitmap, fd);
574 }
575 }
576
577 fdcnt++;
578 } else if (php != NULL) {
579 /*
580 * We clear a bit or cache a poll fd if
581 * the driver returns a poll head ptr,
582 * which is expected in the case of 0
583 * revents. Some buggy driver may return
584 * NULL php pointer with 0 revents. In
585 * this case, we just treat the driver as
586 * "noncachable" and not clearing the bit
587 * in bitmap.
588 */
589 if ((pdp->pd_php != NULL) &&
590 ((pcp->pc_flag & PC_POLLWAKE) == 0)) {
591 BT_CLEAR(pcp->pc_bitmap, fd);
592 }
593 if (pdp->pd_php == NULL) {
594 pollhead_insert(php, pdp);
595 pdp->pd_php = php;
596 /*
961 continue;
962 }
963
964 /*
965 * do VOP_POLL and cache this poll fd.
966 */
967 /*
968 * XXX - pollrelock() logic needs to know which
969 * which pollcache lock to grab. It'd be a
970 * cleaner solution if we could pass pcp as
971 * an arguement in VOP_POLL interface instead
972 * of implicitly passing it using thread_t
973 * struct. On the other hand, changing VOP_POLL
974 * interface will require all driver/file system
975 * poll routine to change. May want to revisit
976 * the tradeoff later.
977 */
978 curthread->t_pollcache = pcp;
979 error = VOP_POLL(fp->f_vnode, pfdp->events, 0,
980 &pfdp->revents, &php, NULL);
981
982 /*
983 * Edge-triggered polling requires a pollhead in order
984 * to initiate wake-ups properly. Drivers which are
985 * savvy to POLLET presence, which should include
986 * everything in-gate, will always emit one, regardless
987 * of revent status. Older drivers which only emit a
988 * pollhead if 'revents == 0' are given a second chance
989 * here via a second VOP_POLL, with only POLLET set in
990 * the events of interest. These circumstances should
991 * induce any cacheable drivers to emit a pollhead for
992 * wake-ups.
993 *
994 * Drivers which never emit a pollhead will simply
995 * disobey the exectation of edge-triggered behavior.
996 * This includes recursive epoll which, even on Linux,
997 * yields its events in a level-triggered fashion only.
998 */
999 if ((pdp->pd_events & POLLET) && error == 0 &&
1000 php == NULL) {
1001 short levent = 0;
1002
1003 error = VOP_POLL(fp->f_vnode, POLLET, 0,
1004 &levent, &php, NULL);
1005 }
1006
1007 curthread->t_pollcache = NULL;
1008 /*
1009 * We always set the bit when this fd is cached;
1010 * this forces the first DP_POLL to poll this fd.
1011 * Real performance gain comes from subsequent
1012 * DP_POLL. We also attempt a pollhead_insert();
1013 * if it's not possible, we'll do it in dpioctl().
1014 */
1015 BT_SET(pcp->pc_bitmap, fd);
1016 if (error != 0) {
1017 releasef(fd);
1018 break;
1019 }
1020 pdp->pd_fp = fp;
1021 pdp->pd_events |= pfdp->events;
1022 if (php != NULL) {
1023 if (pdp->pd_php == NULL) {
1024 pollhead_insert(php, pdp);
1025 pdp->pd_php = php;
1026 } else {
1528 mutex_exit(&devpoll_lock);
1529
1530 mutex_enter(&dpep->dpe_lock);
1531 if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) == 0) {
1532 /* Poll recursion is not yet supported for non-epoll handles */
1533 *reventsp = POLLERR;
1534 mutex_exit(&dpep->dpe_lock);
1535 return (0);
1536 } else {
1537 dpep->dpe_refcnt++;
1538 pcp = dpep->dpe_pcache;
1539 mutex_exit(&dpep->dpe_lock);
1540 }
1541
1542 res = pollstate_enter(pcp);
1543 if (res == PSE_SUCCESS) {
1544 nfds_t nfds = 1;
1545 int fdcnt = 0;
1546 pollstate_t *ps = curthread->t_pollstate;
1547
1548 /*
1549 * Recursive polling will only emit certain events. Skip a
1550 * scan of the pollcache if those events are not of interest.
1551 */
1552 if (events & (POLLIN|POLLRDNORM)) {
1553 rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt);
1554 } else {
1555 rc = 0;
1556 fdcnt = 0;
1557 }
1558
1559 if (rc == 0 && fdcnt > 0) {
1560 *reventsp = POLLIN|POLLRDNORM;
1561 } else {
1562 *reventsp = 0;
1563 }
1564 pcachelink_assoc(pcp, ps->ps_pc_stack[0]);
1565 pollstate_exit(pcp);
1566 } else {
1567 switch (res) {
1568 case PSE_FAIL_DEPTH:
1569 rc = EINVAL;
1570 break;
1571 case PSE_FAIL_LOOP:
1572 case PSE_FAIL_DEADLOCK:
1573 rc = ELOOP;
1574 break;
1575 default:
1576 /*
1577 * If anything else has gone awry, such as being polled
1578 * from an unexpected context, fall back to the
1579 * recursion-intolerant response.
1580 */
1581 *reventsp = POLLERR;
1582 rc = 0;
1583 break;
|