336 * POLLNVAL. This is consistent with poll(2)
337 * polling a closed fd. Hope this will remind
338 * user to do a POLLREMOVE.
339 */
340 if (!is_epoll && pfdp != NULL) {
341 pfdp[fdcnt].fd = fd;
342 pfdp[fdcnt].revents = POLLNVAL;
343 fdcnt++;
344 continue;
345 }
346
347 /*
348 * In the epoll compatibility case, we actually
349 * perform the implicit removal to remain
350 * closer to the epoll semantics.
351 */
352 if (is_epoll) {
353 pdp->pd_fp = NULL;
354 pdp->pd_events = 0;
355
356 if (php != NULL) {
357 pollhead_delete(php, pdp);
358 pdp->pd_php = NULL;
359 }
360
361 BT_CLEAR(pcp->pc_bitmap, fd);
362 continue;
363 }
364 }
365
366 if (fp != pdp->pd_fp) {
367 /*
368 * user is polling on a cached fd which was
369 * closed and then reused. Unfortunately
370 * there is no good way to inform user.
371 * If the file struct is also reused, we
372 * may not be able to detect the fd reuse
373 * at all. As long as this does not
374 * cause system failure and/or memory leak,
375 * we will play along. Man page states if
376 * user does not clean up closed fds, polling
377 * results will be indeterministic.
486 break;
487 }
488 }
489
490 /*
491 * If POLLET is set, clear the bit in the
492 * bitmap -- which effectively latches the
493 * edge on a pollwakeup() from the driver.
494 */
495 if (pdp->pd_events & POLLET)
496 BT_CLEAR(pcp->pc_bitmap, fd);
497
498 /*
499 * If POLLONESHOT is set, perform the implicit
500 * POLLREMOVE.
501 */
502 if (pdp->pd_events & POLLONESHOT) {
503 pdp->pd_fp = NULL;
504 pdp->pd_events = 0;
505
506 if (php != NULL) {
507 pollhead_delete(php, pdp);
508 pdp->pd_php = NULL;
509 }
510
511 BT_CLEAR(pcp->pc_bitmap, fd);
512 }
513
514 fdcnt++;
515 } else if (php != NULL) {
516 /*
517 * We clear a bit or cache a poll fd if
518 * the driver returns a poll head ptr,
519 * which is expected in the case of 0
520 * revents. Some buggy driver may return
521 * NULL php pointer with 0 revents. In
522 * this case, we just treat the driver as
523 * "noncachable" and not clearing the bit
524 * in bitmap.
525 */
526 if ((pdp->pd_php != NULL) &&
527 ((pcp->pc_flag & PC_POLLWAKE) == 0)) {
622 mutex_exit(&devpoll_lock);
623 return (0);
624 }
625
626 /*
627 * Write to dev/poll add/remove fd's to/from a cached poll fd set,
628 * or change poll events for a watched fd.
629 */
630 /*ARGSUSED*/
631 static int
632 dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
633 {
634 minor_t minor;
635 dp_entry_t *dpep;
636 pollcache_t *pcp;
637 pollfd_t *pollfdp, *pfdp;
638 dvpoll_epollfd_t *epfdp;
639 uintptr_t limit;
640 int error, size;
641 ssize_t uiosize;
642 nfds_t pollfdnum;
643 struct pollhead *php = NULL;
644 polldat_t *pdp;
645 int fd;
646 file_t *fp;
647 boolean_t is_epoll, fds_added = B_FALSE;
648
649 minor = getminor(dev);
650
651 mutex_enter(&devpoll_lock);
652 ASSERT(minor < dptblsize);
653 dpep = devpolltbl[minor];
654 ASSERT(dpep != NULL);
655 mutex_exit(&devpoll_lock);
656
657 mutex_enter(&dpep->dpe_lock);
658 pcp = dpep->dpe_pcache;
659 is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0;
660 size = (is_epoll) ? sizeof (dvpoll_epollfd_t) : sizeof (pollfd_t);
661 mutex_exit(&dpep->dpe_lock);
687 mutex_exit(&curproc->p_lock);
688 return (EINVAL);
689 }
690 mutex_exit(&curproc->p_lock);
691 }
692
693 /*
694 * Copy in the pollfd array. Walk through the array and add
695 * each polled fd to the cached set.
696 */
697 pollfdp = kmem_alloc(uiosize, KM_SLEEP);
698 limit = (uintptr_t)pollfdp + (pollfdnum * size);
699
700 /*
701 * Although /dev/poll uses the write(2) interface to cache fds, it's
702 * not supposed to function as a seekable device. To prevent offset
703 * from growing and eventually exceed the maximum, reset the offset
704 * here for every call.
705 */
706 uiop->uio_loffset = 0;
707 if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop))
708 != 0) {
709 kmem_free(pollfdp, uiosize);
710 return (error);
711 }
712 /*
713 * We are about to enter the core portion of dpwrite(). Make sure this
714 * write has exclusive access in this portion of the code, i.e., no
715 * other writers in this code.
716 *
717 * Waiting for all readers to drop their references to the dpe is
718 * unecessary since the pollcache itself is protected by pc_lock.
719 */
720 mutex_enter(&dpep->dpe_lock);
721 dpep->dpe_writerwait++;
722 while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0) {
723 ASSERT(dpep->dpe_refcnt != 0);
724
725 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
726 dpep->dpe_writerwait--;
727 mutex_exit(&dpep->dpe_lock);
728 kmem_free(pollfdp, uiosize);
729 return (EINTR);
730 }
731 }
964 /*
965 * Wake any pollcache waiters so they can check the new descriptors.
966 *
967 * Any fds added to an recursive-capable pollcache could themselves be
968 * /dev/poll handles. To ensure that proper event propagation occurs,
969 * parent pollcaches are woken too, so that they can create any needed
970 * pollcache links.
971 */
972 if (fds_added) {
973 cv_broadcast(&pcp->pc_cv);
974 pcache_wake_parents(pcp);
975 }
976 pollstate_exit(pcp);
977 mutex_enter(&dpep->dpe_lock);
978 bypass:
979 dpep->dpe_flag &= ~DP_WRITER_PRESENT;
980 dpep->dpe_refcnt--;
981 cv_broadcast(&dpep->dpe_cv);
982 mutex_exit(&dpep->dpe_lock);
983 kmem_free(pollfdp, uiosize);
984 return (error);
985 }
986
987 #define DP_SIGMASK_RESTORE(ksetp) { \
988 if (ksetp != NULL) { \
989 mutex_enter(&p->p_lock); \
990 if (lwp->lwp_cursig == 0) { \
991 t->t_hold = lwp->lwp_sigoldmask; \
992 t->t_flag &= ~T_TOMASK; \
993 } \
994 mutex_exit(&p->p_lock); \
995 } \
996 }
997
998 /*ARGSUSED*/
999 static int
1000 dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1001 {
1002 minor_t minor;
1003 dp_entry_t *dpep;
1106 DP_REFRELE(dpep);
1107 return (EFAULT);
1108 }
1109
1110 deadline = STRUCT_FGET(dvpoll, dp_timeout);
1111 if (deadline > 0) {
1112 /*
1113 * Convert the deadline from relative milliseconds
1114 * to absolute nanoseconds. They must wait for at
1115 * least a tick.
1116 */
1117 deadline = MSEC2NSEC(deadline);
1118 deadline = MAX(deadline, nsec_per_tick);
1119 deadline += now;
1120 }
1121
1122 if (cmd == DP_PPOLL) {
1123 void *setp = STRUCT_FGETP(dvpoll, dp_setp);
1124
1125 if (setp != NULL) {
1126 if (copyin(setp, &set, sizeof (set))) {
1127 DP_REFRELE(dpep);
1128 return (EFAULT);
1129 }
1130
1131 sigutok(&set, &kset);
1132 ksetp = &kset;
1133
1134 mutex_enter(&p->p_lock);
1135 schedctl_finish_sigblock(t);
1136 lwp->lwp_sigoldmask = t->t_hold;
1137 t->t_hold = *ksetp;
1138 t->t_flag |= T_TOMASK;
1139
1140 /*
1141 * Like ppoll() with a non-NULL sigset, we'll
1142 * call cv_reltimedwait_sig() just to check for
1143 * signals. This call will return immediately
1144 * with either 0 (signalled) or -1 (no signal).
1145 * There are some conditions whereby we can
1146 * get 0 from cv_reltimedwait_sig() without
1147 * a true signal (e.g., a directed stop), so
1148 * we restore our signal mask in the unlikely
1149 * event that lwp_cursig is 0.
1150 */
1151 if (!cv_reltimedwait_sig(&t->t_delay_cv,
1152 &p->p_lock, 0, TR_CLOCK_TICK)) {
1262
1263 error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
1264 &pcp->pc_lock, deadline);
1265
1266 /*
1267 * If we were awakened by a signal or timeout then
1268 * break the loop, else poll again.
1269 */
1270 if (error <= 0) {
1271 error = (error == 0) ? EINTR : 0;
1272 break;
1273 } else {
1274 error = 0;
1275 }
1276 }
1277 pollstate_exit(pcp);
1278
1279 DP_SIGMASK_RESTORE(ksetp);
1280
1281 if (error == 0 && fdcnt > 0) {
1282 if (copyout(ps->ps_dpbuf,
1283 STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) {
1284 DP_REFRELE(dpep);
1285 return (EFAULT);
1286 }
1287 *rvalp = fdcnt;
1288 }
1289 break;
1290 }
1291
1292 case DP_ISPOLLED:
1293 {
1294 pollfd_t pollfd;
1295 polldat_t *pdp;
1296
1297 STRUCT_INIT(dvpoll, mode);
1298 error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t));
1299 if (error) {
1300 DP_REFRELE(dpep);
1301 return (EFAULT);
|
336 * POLLNVAL. This is consistent with poll(2)
337 * polling a closed fd. Hope this will remind
338 * user to do a POLLREMOVE.
339 */
340 if (!is_epoll && pfdp != NULL) {
341 pfdp[fdcnt].fd = fd;
342 pfdp[fdcnt].revents = POLLNVAL;
343 fdcnt++;
344 continue;
345 }
346
347 /*
348 * In the epoll compatibility case, we actually
349 * perform the implicit removal to remain
350 * closer to the epoll semantics.
351 */
352 if (is_epoll) {
353 pdp->pd_fp = NULL;
354 pdp->pd_events = 0;
355
356 if (pdp->pd_php != NULL) {
357 pollhead_delete(pdp->pd_php,
358 pdp);
359 pdp->pd_php = NULL;
360 }
361
362 BT_CLEAR(pcp->pc_bitmap, fd);
363 continue;
364 }
365 }
366
367 if (fp != pdp->pd_fp) {
368 /*
369 * user is polling on a cached fd which was
370 * closed and then reused. Unfortunately
371 * there is no good way to inform user.
372 * If the file struct is also reused, we
373 * may not be able to detect the fd reuse
374 * at all. As long as this does not
375 * cause system failure and/or memory leak,
376 * we will play along. Man page states if
377 * user does not clean up closed fds, polling
378 * results will be indeterministic.
487 break;
488 }
489 }
490
491 /*
492 * If POLLET is set, clear the bit in the
493 * bitmap -- which effectively latches the
494 * edge on a pollwakeup() from the driver.
495 */
496 if (pdp->pd_events & POLLET)
497 BT_CLEAR(pcp->pc_bitmap, fd);
498
499 /*
500 * If POLLONESHOT is set, perform the implicit
501 * POLLREMOVE.
502 */
503 if (pdp->pd_events & POLLONESHOT) {
504 pdp->pd_fp = NULL;
505 pdp->pd_events = 0;
506
507 if (pdp->pd_php != NULL) {
508 pollhead_delete(pdp->pd_php,
509 pdp);
510 pdp->pd_php = NULL;
511 }
512
513 BT_CLEAR(pcp->pc_bitmap, fd);
514 }
515
516 fdcnt++;
517 } else if (php != NULL) {
518 /*
519 * We clear a bit or cache a poll fd if
520 * the driver returns a poll head ptr,
521 * which is expected in the case of 0
522 * revents. Some buggy driver may return
523 * NULL php pointer with 0 revents. In
524 * this case, we just treat the driver as
525 * "noncachable" and not clearing the bit
526 * in bitmap.
527 */
528 if ((pdp->pd_php != NULL) &&
529 ((pcp->pc_flag & PC_POLLWAKE) == 0)) {
624 mutex_exit(&devpoll_lock);
625 return (0);
626 }
627
628 /*
629 * Write to dev/poll add/remove fd's to/from a cached poll fd set,
630 * or change poll events for a watched fd.
631 */
632 /*ARGSUSED*/
633 static int
634 dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
635 {
636 minor_t minor;
637 dp_entry_t *dpep;
638 pollcache_t *pcp;
639 pollfd_t *pollfdp, *pfdp;
640 dvpoll_epollfd_t *epfdp;
641 uintptr_t limit;
642 int error, size;
643 ssize_t uiosize;
644 size_t copysize;
645 nfds_t pollfdnum;
646 struct pollhead *php = NULL;
647 polldat_t *pdp;
648 int fd;
649 file_t *fp;
650 boolean_t is_epoll, fds_added = B_FALSE;
651
652 minor = getminor(dev);
653
654 mutex_enter(&devpoll_lock);
655 ASSERT(minor < dptblsize);
656 dpep = devpolltbl[minor];
657 ASSERT(dpep != NULL);
658 mutex_exit(&devpoll_lock);
659
660 mutex_enter(&dpep->dpe_lock);
661 pcp = dpep->dpe_pcache;
662 is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0;
663 size = (is_epoll) ? sizeof (dvpoll_epollfd_t) : sizeof (pollfd_t);
664 mutex_exit(&dpep->dpe_lock);
690 mutex_exit(&curproc->p_lock);
691 return (EINVAL);
692 }
693 mutex_exit(&curproc->p_lock);
694 }
695
696 /*
697 * Copy in the pollfd array. Walk through the array and add
698 * each polled fd to the cached set.
699 */
700 pollfdp = kmem_alloc(uiosize, KM_SLEEP);
701 limit = (uintptr_t)pollfdp + (pollfdnum * size);
702
703 /*
704 * Although /dev/poll uses the write(2) interface to cache fds, it's
705 * not supposed to function as a seekable device. To prevent offset
706 * from growing and eventually exceed the maximum, reset the offset
707 * here for every call.
708 */
709 uiop->uio_loffset = 0;
710
711 /*
712 * Use uiocopy instead of uiomove when populating pollfdp, keeping
713 * uio_resid untouched for now. Write syscalls will translate EINTR
714 * into a success if they detect "successfully transfered" data via an
715 * updated uio_resid. Falsely suppressing such errors is disastrous.
716 */
717 if ((error = uiocopy((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop,
718 ©size)) != 0) {
719 kmem_free(pollfdp, uiosize);
720 return (error);
721 }
722
723 /*
724 * We are about to enter the core portion of dpwrite(). Make sure this
725 * write has exclusive access in this portion of the code, i.e., no
726 * other writers in this code.
727 *
728 * Waiting for all readers to drop their references to the dpe is
729 * unecessary since the pollcache itself is protected by pc_lock.
730 */
731 mutex_enter(&dpep->dpe_lock);
732 dpep->dpe_writerwait++;
733 while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0) {
734 ASSERT(dpep->dpe_refcnt != 0);
735
736 if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
737 dpep->dpe_writerwait--;
738 mutex_exit(&dpep->dpe_lock);
739 kmem_free(pollfdp, uiosize);
740 return (EINTR);
741 }
742 }
975 /*
976 * Wake any pollcache waiters so they can check the new descriptors.
977 *
978 * Any fds added to an recursive-capable pollcache could themselves be
979 * /dev/poll handles. To ensure that proper event propagation occurs,
980 * parent pollcaches are woken too, so that they can create any needed
981 * pollcache links.
982 */
983 if (fds_added) {
984 cv_broadcast(&pcp->pc_cv);
985 pcache_wake_parents(pcp);
986 }
987 pollstate_exit(pcp);
988 mutex_enter(&dpep->dpe_lock);
989 bypass:
990 dpep->dpe_flag &= ~DP_WRITER_PRESENT;
991 dpep->dpe_refcnt--;
992 cv_broadcast(&dpep->dpe_cv);
993 mutex_exit(&dpep->dpe_lock);
994 kmem_free(pollfdp, uiosize);
995 if (error == 0) {
996 /*
997 * The state of uio_resid is updated only after the pollcache
998 * is successfully modified.
999 */
1000 uioskip(uiop, copysize);
1001 }
1002 return (error);
1003 }
1004
1005 #define DP_SIGMASK_RESTORE(ksetp) { \
1006 if (ksetp != NULL) { \
1007 mutex_enter(&p->p_lock); \
1008 if (lwp->lwp_cursig == 0) { \
1009 t->t_hold = lwp->lwp_sigoldmask; \
1010 t->t_flag &= ~T_TOMASK; \
1011 } \
1012 mutex_exit(&p->p_lock); \
1013 } \
1014 }
1015
1016 /*ARGSUSED*/
1017 static int
1018 dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1019 {
1020 minor_t minor;
1021 dp_entry_t *dpep;
1124 DP_REFRELE(dpep);
1125 return (EFAULT);
1126 }
1127
1128 deadline = STRUCT_FGET(dvpoll, dp_timeout);
1129 if (deadline > 0) {
1130 /*
1131 * Convert the deadline from relative milliseconds
1132 * to absolute nanoseconds. They must wait for at
1133 * least a tick.
1134 */
1135 deadline = MSEC2NSEC(deadline);
1136 deadline = MAX(deadline, nsec_per_tick);
1137 deadline += now;
1138 }
1139
1140 if (cmd == DP_PPOLL) {
1141 void *setp = STRUCT_FGETP(dvpoll, dp_setp);
1142
1143 if (setp != NULL) {
1144 if ((mode & FKIOCTL) != 0) {
1145 /* Use the signal set directly */
1146 ksetp = (k_sigset_t *)setp;
1147 } else {
1148 if (copyin(setp, &set, sizeof (set))) {
1149 DP_REFRELE(dpep);
1150 return (EFAULT);
1151 }
1152 sigutok(&set, &kset);
1153 ksetp = &kset;
1154 }
1155
1156 mutex_enter(&p->p_lock);
1157 schedctl_finish_sigblock(t);
1158 lwp->lwp_sigoldmask = t->t_hold;
1159 t->t_hold = *ksetp;
1160 t->t_flag |= T_TOMASK;
1161
1162 /*
1163 * Like ppoll() with a non-NULL sigset, we'll
1164 * call cv_reltimedwait_sig() just to check for
1165 * signals. This call will return immediately
1166 * with either 0 (signalled) or -1 (no signal).
1167 * There are some conditions whereby we can
1168 * get 0 from cv_reltimedwait_sig() without
1169 * a true signal (e.g., a directed stop), so
1170 * we restore our signal mask in the unlikely
1171 * event that lwp_cursig is 0.
1172 */
1173 if (!cv_reltimedwait_sig(&t->t_delay_cv,
1174 &p->p_lock, 0, TR_CLOCK_TICK)) {
1284
1285 error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
1286 &pcp->pc_lock, deadline);
1287
1288 /*
1289 * If we were awakened by a signal or timeout then
1290 * break the loop, else poll again.
1291 */
1292 if (error <= 0) {
1293 error = (error == 0) ? EINTR : 0;
1294 break;
1295 } else {
1296 error = 0;
1297 }
1298 }
1299 pollstate_exit(pcp);
1300
1301 DP_SIGMASK_RESTORE(ksetp);
1302
1303 if (error == 0 && fdcnt > 0) {
1304 /*
1305 * It should be noted that FKIOCTL does not influence
1306 * the copyout (vs bcopy) of dp_fds at this time.
1307 */
1308 if (copyout(ps->ps_dpbuf,
1309 STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) {
1310 DP_REFRELE(dpep);
1311 return (EFAULT);
1312 }
1313 *rvalp = fdcnt;
1314 }
1315 break;
1316 }
1317
1318 case DP_ISPOLLED:
1319 {
1320 pollfd_t pollfd;
1321 polldat_t *pdp;
1322
1323 STRUCT_INIT(dvpoll, mode);
1324 error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t));
1325 if (error) {
1326 DP_REFRELE(dpep);
1327 return (EFAULT);
|