Print this page
OS-5518 devpoll write feigns success in the face of EINTR
OS-5520 epoll_ctl not allowed to emit EINTR
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-5516 vmxnet3s declares wrong sdu
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Bryan Cantrill <bryan@joyent.com>
OS-5511 epoll should not leave dangling polldat_t entries
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-5260 lxbrand epoll_pwait needs sigset translation
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/devpoll.c
          +++ new/usr/src/uts/common/io/devpoll.c
↓ open down ↓ 345 lines elided ↑ open up ↑
 346  346  
 347  347                                  /*
 348  348                                   * In the epoll compatibility case, we actually
 349  349                                   * perform the implicit removal to remain
 350  350                                   * closer to the epoll semantics.
 351  351                                   */
 352  352                                  if (is_epoll) {
 353  353                                          pdp->pd_fp = NULL;
 354  354                                          pdp->pd_events = 0;
 355  355  
 356      -                                        if (php != NULL) {
 357      -                                                pollhead_delete(php, pdp);
      356 +                                        if (pdp->pd_php != NULL) {
      357 +                                                pollhead_delete(pdp->pd_php,
      358 +                                                    pdp);
 358  359                                                  pdp->pd_php = NULL;
 359  360                                          }
 360  361  
 361  362                                          BT_CLEAR(pcp->pc_bitmap, fd);
 362  363                                          continue;
 363  364                                  }
 364  365                          }
 365  366  
 366  367                          if (fp != pdp->pd_fp) {
 367  368                                  /*
↓ open down ↓ 128 lines elided ↑ open up ↑
 496  497                                          BT_CLEAR(pcp->pc_bitmap, fd);
 497  498  
 498  499                                  /*
 499  500                                   * If POLLONESHOT is set, perform the implicit
 500  501                                   * POLLREMOVE.
 501  502                                   */
 502  503                                  if (pdp->pd_events & POLLONESHOT) {
 503  504                                          pdp->pd_fp = NULL;
 504  505                                          pdp->pd_events = 0;
 505  506  
 506      -                                        if (php != NULL) {
 507      -                                                pollhead_delete(php, pdp);
      507 +                                        if (pdp->pd_php != NULL) {
      508 +                                                pollhead_delete(pdp->pd_php,
      509 +                                                    pdp);
 508  510                                                  pdp->pd_php = NULL;
 509  511                                          }
 510  512  
 511  513                                          BT_CLEAR(pcp->pc_bitmap, fd);
 512  514                                  }
 513  515  
 514  516                                  fdcnt++;
 515  517                          } else if (php != NULL) {
 516  518                                  /*
 517  519                                   * We clear a bit or cache a poll fd if
↓ open down ↓ 114 lines elided ↑ open up ↑
 632  634  dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
 633  635  {
 634  636          minor_t         minor;
 635  637          dp_entry_t      *dpep;
 636  638          pollcache_t     *pcp;
 637  639          pollfd_t        *pollfdp, *pfdp;
 638  640          dvpoll_epollfd_t *epfdp;
 639  641          uintptr_t       limit;
 640  642          int             error, size;
 641  643          ssize_t         uiosize;
      644 +        size_t          copysize;
 642  645          nfds_t          pollfdnum;
 643  646          struct pollhead *php = NULL;
 644  647          polldat_t       *pdp;
 645  648          int             fd;
 646  649          file_t          *fp;
 647  650          boolean_t       is_epoll, fds_added = B_FALSE;
 648  651  
 649  652          minor = getminor(dev);
 650  653  
 651  654          mutex_enter(&devpoll_lock);
↓ open down ↓ 45 lines elided ↑ open up ↑
 697  700          pollfdp = kmem_alloc(uiosize, KM_SLEEP);
 698  701          limit = (uintptr_t)pollfdp + (pollfdnum * size);
 699  702  
 700  703          /*
 701  704           * Although /dev/poll uses the write(2) interface to cache fds, it's
 702  705           * not supposed to function as a seekable device. To prevent offset
 703  706           * from growing and eventually exceed the maximum, reset the offset
 704  707           * here for every call.
 705  708           */
 706  709          uiop->uio_loffset = 0;
 707      -        if ((error = uiomove((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop))
 708      -            != 0) {
      710 +
      711 +        /*
      712 +         * Use uiocopy instead of uiomove when populating pollfdp, keeping
      713 +         * uio_resid untouched for now.  Write syscalls will translate EINTR
      714 +         * into a success if they detect "successfully transfered" data via an
      715 +         * updated uio_resid.  Falsely suppressing such errors is disastrous.
      716 +         */
      717 +        if ((error = uiocopy((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop,
      718 +            &copysize)) != 0) {
 709  719                  kmem_free(pollfdp, uiosize);
 710  720                  return (error);
 711  721          }
      722 +
 712  723          /*
 713  724           * We are about to enter the core portion of dpwrite(). Make sure this
 714  725           * write has exclusive access in this portion of the code, i.e., no
 715  726           * other writers in this code.
 716  727           *
 717  728           * Waiting for all readers to drop their references to the dpe is
 718  729           * unecessary since the pollcache itself is protected by pc_lock.
 719  730           */
 720  731          mutex_enter(&dpep->dpe_lock);
 721  732          dpep->dpe_writerwait++;
↓ open down ↓ 252 lines elided ↑ open up ↑
 974  985                  pcache_wake_parents(pcp);
 975  986          }
 976  987          pollstate_exit(pcp);
 977  988          mutex_enter(&dpep->dpe_lock);
 978  989  bypass:
 979  990          dpep->dpe_flag &= ~DP_WRITER_PRESENT;
 980  991          dpep->dpe_refcnt--;
 981  992          cv_broadcast(&dpep->dpe_cv);
 982  993          mutex_exit(&dpep->dpe_lock);
 983  994          kmem_free(pollfdp, uiosize);
      995 +        if (error == 0) {
      996 +                /*
      997 +                 * The state of uio_resid is updated only after the pollcache
      998 +                 * is successfully modified.
      999 +                 */
     1000 +                uioskip(uiop, copysize);
     1001 +        }
 984 1002          return (error);
 985 1003  }
 986 1004  
 987 1005  #define DP_SIGMASK_RESTORE(ksetp) {                                     \
 988 1006          if (ksetp != NULL) {                                            \
 989 1007                  mutex_enter(&p->p_lock);                                \
 990 1008                  if (lwp->lwp_cursig == 0) {                             \
 991 1009                          t->t_hold = lwp->lwp_sigoldmask;                \
 992 1010                          t->t_flag &= ~T_TOMASK;                         \
 993 1011                  }                                                       \
↓ open down ↓ 122 lines elided ↑ open up ↑
1116 1134                           */
1117 1135                          deadline = MSEC2NSEC(deadline);
1118 1136                          deadline = MAX(deadline, nsec_per_tick);
1119 1137                          deadline += now;
1120 1138                  }
1121 1139  
1122 1140                  if (cmd == DP_PPOLL) {
1123 1141                          void *setp = STRUCT_FGETP(dvpoll, dp_setp);
1124 1142  
1125 1143                          if (setp != NULL) {
1126      -                                if (copyin(setp, &set, sizeof (set))) {
1127      -                                        DP_REFRELE(dpep);
1128      -                                        return (EFAULT);
     1144 +                                if ((mode & FKIOCTL) != 0) {
     1145 +                                        /* Use the signal set directly */
     1146 +                                        ksetp = (k_sigset_t *)setp;
     1147 +                                } else {
     1148 +                                        if (copyin(setp, &set, sizeof (set))) {
     1149 +                                                DP_REFRELE(dpep);
     1150 +                                                return (EFAULT);
     1151 +                                        }
     1152 +                                        sigutok(&set, &kset);
     1153 +                                        ksetp = &kset;
1129 1154                                  }
1130 1155  
1131      -                                sigutok(&set, &kset);
1132      -                                ksetp = &kset;
1133      -
1134 1156                                  mutex_enter(&p->p_lock);
1135 1157                                  schedctl_finish_sigblock(t);
1136 1158                                  lwp->lwp_sigoldmask = t->t_hold;
1137 1159                                  t->t_hold = *ksetp;
1138 1160                                  t->t_flag |= T_TOMASK;
1139 1161  
1140 1162                                  /*
1141 1163                                   * Like ppoll() with a non-NULL sigset, we'll
1142 1164                                   * call cv_reltimedwait_sig() just to check for
1143 1165                                   * signals.  This call will return immediately
↓ open down ↓ 128 lines elided ↑ open up ↑
1272 1294                                  break;
1273 1295                          } else {
1274 1296                                  error = 0;
1275 1297                          }
1276 1298                  }
1277 1299                  pollstate_exit(pcp);
1278 1300  
1279 1301                  DP_SIGMASK_RESTORE(ksetp);
1280 1302  
1281 1303                  if (error == 0 && fdcnt > 0) {
     1304 +                        /*
     1305 +                         * It should be noted that FKIOCTL does not influence
     1306 +                         * the copyout (vs bcopy) of dp_fds at this time.
     1307 +                         */
1282 1308                          if (copyout(ps->ps_dpbuf,
1283 1309                              STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) {
1284 1310                                  DP_REFRELE(dpep);
1285 1311                                  return (EFAULT);
1286 1312                          }
1287 1313                          *rvalp = fdcnt;
1288 1314                  }
1289 1315                  break;
1290 1316          }
1291 1317  
↓ open down ↓ 411 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX