epoll Wdiff usr/src/uts/common/fs/fs_subr.c

Print this page

8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/fs_subr.c
          +++ new/usr/src/uts/common/fs/fs_subr.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]

↓ open down ↓

17 lines elided

↑ open up ↑

  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  22   22  /*        All Rights Reserved   */
  23   23  
  24   24  
  25   25  /*
  26   26   * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  27   27   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  28      - * Copyright 2015 Joyent, Inc.
       28 + * Copyright 2017 Joyent, Inc.
  29   29   */
  30   30  
  31   31  /*
  32   32   * Generic vnode operations.
  33   33   */
  34   34  #include <sys/types.h>
  35   35  #include <sys/param.h>
  36   36  #include <sys/systm.h>
  37   37  #include <sys/errno.h>
  38   38  #include <sys/fcntl.h>

  39   39  #include <sys/flock.h>
  40   40  #include <sys/statvfs.h>
  41   41  #include <sys/vfs.h>
  42   42  #include <sys/vnode.h>
  43   43  #include <sys/proc.h>
  44   44  #include <sys/user.h>
  45   45  #include <sys/unistd.h>
  46   46  #include <sys/cred.h>
  47   47  #include <sys/poll.h>
  48   48  #include <sys/debug.h>
  49   49  #include <sys/cmn_err.h>
  50   50  #include <sys/stream.h>
  51   51  #include <fs/fs_subr.h>
  52   52  #include <fs/fs_reparse.h>
  53   53  #include <sys/door.h>
  54   54  #include <sys/acl.h>
  55   55  #include <sys/share.h>
  56   56  #include <sys/file.h>
  57   57  #include <sys/kmem.h>
  58   58  #include <sys/file.h>
  59   59  #include <sys/nbmlock.h>
  60   60  #include <acl/acl_common.h>
  61   61  #include <sys/pathname.h>
  62   62  
  63   63  static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
  64   64  
  65   65  /*
  66   66   * Tunable to limit the number of retry to recover from STALE error.
  67   67   */
  68   68  int fs_estale_retry = 5;
  69   69  
  70   70  /*
  71   71   * supports for reparse point door upcall
  72   72   */
  73   73  static door_handle_t reparsed_door;
  74   74  static kmutex_t reparsed_door_lock;
  75   75  
  76   76  /*
  77   77   * The associated operation is not supported by the file system.
  78   78   */
  79   79  int
  80   80  fs_nosys()
  81   81  {
  82   82          return (ENOSYS);
  83   83  }
  84   84  
  85   85  /*
  86   86   * The associated operation is invalid (on this vnode).
  87   87   */
  88   88  int
  89   89  fs_inval()
  90   90  {
  91   91          return (EINVAL);
  92   92  }
  93   93  
  94   94  /*
  95   95   * The associated operation is valid only for directories.
  96   96   */
  97   97  int
  98   98  fs_notdir()
  99   99  {
 100  100          return (ENOTDIR);
 101  101  }
 102  102  
 103  103  /*
 104  104   * Free the file system specific resources. For the file systems that
 105  105   * do not support the forced unmount, it will be a nop function.
 106  106   */
 107  107  
 108  108  /*ARGSUSED*/
 109  109  void
 110  110  fs_freevfs(vfs_t *vfsp)
 111  111  {
 112  112  }
 113  113  
 114  114  /* ARGSUSED */
 115  115  int
 116  116  fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
 117  117      size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
 118  118      caller_context_t *ct)
 119  119  {
 120  120          return (ENOSYS);
 121  121  }
 122  122  
 123  123  /* ARGSUSED */
 124  124  int
 125  125  fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
 126  126      size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
 127  127      caller_context_t *ct)
 128  128  {
 129  129          return (ENOSYS);
 130  130  }
 131  131  
 132  132  /* ARGSUSED */
 133  133  int
 134  134  fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
 135  135      struct pollhead **phpp, caller_context_t *ct)
 136  136  {
 137  137          return (ENOSYS);
 138  138  }
 139  139  
 140  140  
 141  141  /*
 142  142   * The file system has nothing to sync to disk.  However, the
 143  143   * VFS_SYNC operation must not fail.
 144  144   */
 145  145  /* ARGSUSED */
 146  146  int
 147  147  fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
 148  148  {
 149  149          return (0);
 150  150  }
 151  151  
 152  152  /*
 153  153   * Does nothing but VOP_FSYNC must not fail.
 154  154   */
 155  155  /* ARGSUSED */
 156  156  int
 157  157  fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
 158  158  {
 159  159          return (0);
 160  160  }
 161  161  
 162  162  /*
 163  163   * Does nothing but VOP_PUTPAGE must not fail.
 164  164   */
 165  165  /* ARGSUSED */
 166  166  int
 167  167  fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
 168  168      caller_context_t *ctp)
 169  169  {
 170  170          return (0);
 171  171  }
 172  172  
 173  173  /*
 174  174   * Does nothing but VOP_IOCTL must not fail.
 175  175   */
 176  176  /* ARGSUSED */
 177  177  int
 178  178  fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
 179  179      int *rvalp)
 180  180  {
 181  181          return (0);
 182  182  }
 183  183  
 184  184  /*
 185  185   * Read/write lock/unlock.  Does nothing.
 186  186   */
 187  187  /* ARGSUSED */
 188  188  int
 189  189  fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
 190  190  {
 191  191          return (-1);
 192  192  }
 193  193  
 194  194  /* ARGSUSED */
 195  195  void
 196  196  fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
 197  197  {
 198  198  }
 199  199  
 200  200  /*
 201  201   * Compare two vnodes.
 202  202   */
 203  203  /*ARGSUSED2*/
 204  204  int
 205  205  fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
 206  206  {
 207  207          return (vp1 == vp2);
 208  208  }
 209  209  
 210  210  /*
 211  211   * No-op seek operation.
 212  212   */
 213  213  /* ARGSUSED */
 214  214  int
 215  215  fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
 216  216  {
 217  217          return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
 218  218  }
 219  219  
 220  220  /*
 221  221   * File and record locking.
 222  222   */
 223  223  /* ARGSUSED */
 224  224  int
 225  225  fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
 226  226      flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
 227  227  {
 228  228          int frcmd;
 229  229          int nlmid;
 230  230          int error = 0;
 231  231          boolean_t skip_lock = B_FALSE;
 232  232          flk_callback_t serialize_callback;
 233  233          int serialize = 0;
 234  234          v_mode_t mode;
 235  235  
 236  236          switch (cmd) {
 237  237  
 238  238          case F_GETLK:
 239  239          case F_O_GETLK:
 240  240                  if (flag & F_REMOTELOCK) {
 241  241                          frcmd = RCMDLCK;
 242  242                  } else if (flag & F_PXFSLOCK) {
 243  243                          frcmd = PCMDLCK;
 244  244                  } else {
 245  245                          frcmd = 0;
 246  246                          bfp->l_pid = ttoproc(curthread)->p_pid;
 247  247                          bfp->l_sysid = 0;
 248  248                  }
 249  249                  break;
 250  250  
 251  251          case F_OFD_GETLK:
 252  252                  /*
 253  253                   * TBD we do not support remote OFD locks at this time.
 254  254                   */
 255  255                  if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
 256  256                          error = EINVAL;
 257  257                          goto done;
 258  258                  }
 259  259                  skip_lock = B_TRUE;
 260  260                  break;
 261  261  
 262  262          case F_SETLK_NBMAND:
 263  263                  /*
 264  264                   * Are NBMAND locks allowed on this file?
 265  265                   */
 266  266                  if (!vp->v_vfsp ||
 267  267                      !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
 268  268                          error = EINVAL;
 269  269                          goto done;
 270  270                  }
 271  271                  if (vp->v_type != VREG) {
 272  272                          error = EINVAL;
 273  273                          goto done;
 274  274                  }
 275  275                  /*FALLTHROUGH*/
 276  276  
 277  277          case F_SETLK:
 278  278                  if (flag & F_REMOTELOCK) {
 279  279                          frcmd = SETFLCK|RCMDLCK;
 280  280                  } else if (flag & F_PXFSLOCK) {
 281  281                          frcmd = SETFLCK|PCMDLCK;
 282  282                  } else {
 283  283                          frcmd = SETFLCK;
 284  284                          bfp->l_pid = ttoproc(curthread)->p_pid;
 285  285                          bfp->l_sysid = 0;
 286  286                  }
 287  287                  if (cmd == F_SETLK_NBMAND &&
 288  288                      (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
 289  289                          frcmd |= NBMLCK;
 290  290                  }
 291  291  
 292  292                  if (nbl_need_check(vp)) {
 293  293                          nbl_start_crit(vp, RW_WRITER);
 294  294                          serialize = 1;
 295  295                          if (frcmd & NBMLCK) {
 296  296                                  mode = (bfp->l_type == F_RDLCK) ?
 297  297                                      V_READ : V_RDANDWR;
 298  298                                  if (vn_is_mapped(vp, mode)) {
 299  299                                          error = EAGAIN;
 300  300                                          goto done;
 301  301                                  }
 302  302                          }
 303  303                  }
 304  304                  break;
 305  305  
 306  306          case F_SETLKW:
 307  307                  if (flag & F_REMOTELOCK) {
 308  308                          frcmd = SETFLCK|SLPFLCK|RCMDLCK;
 309  309                  } else if (flag & F_PXFSLOCK) {
 310  310                          frcmd = SETFLCK|SLPFLCK|PCMDLCK;
 311  311                  } else {
 312  312                          frcmd = SETFLCK|SLPFLCK;
 313  313                          bfp->l_pid = ttoproc(curthread)->p_pid;
 314  314                          bfp->l_sysid = 0;
 315  315                  }
 316  316  
 317  317                  if (nbl_need_check(vp)) {
 318  318                          nbl_start_crit(vp, RW_WRITER);
 319  319                          serialize = 1;
 320  320                  }
 321  321                  break;
 322  322  
 323  323          case F_OFD_SETLK:
 324  324          case F_OFD_SETLKW:
 325  325          case F_FLOCK:
 326  326          case F_FLOCKW:
 327  327                  /*
 328  328                   * TBD we do not support remote OFD locks at this time.
 329  329                   */
 330  330                  if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
 331  331                          error = EINVAL;
 332  332                          goto done;
 333  333                  }
 334  334                  skip_lock = B_TRUE;
 335  335                  break;
 336  336  
 337  337          case F_HASREMOTELOCKS:
 338  338                  nlmid = GETNLMID(bfp->l_sysid);
 339  339                  if (nlmid != 0) {       /* booted as a cluster */
 340  340                          l_has_rmt(bfp) =
 341  341                              cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
 342  342                  } else {                /* not booted as a cluster */
 343  343                          l_has_rmt(bfp) = flk_has_remote_locks(vp);
 344  344                  }
 345  345  
 346  346                  goto done;
 347  347  
 348  348          default:
 349  349                  error = EINVAL;
 350  350                  goto done;
 351  351          }
 352  352  
 353  353          /*
 354  354           * If this is a blocking lock request and we're serializing lock
 355  355           * requests, modify the callback list to leave the critical region
 356  356           * while we're waiting for the lock.
 357  357           */
 358  358  
 359  359          if (serialize && (frcmd & SLPFLCK) != 0) {
 360  360                  flk_add_callback(&serialize_callback,
 361  361                      frlock_serialize_blocked, vp, flk_cbp);
 362  362                  flk_cbp = &serialize_callback;
 363  363          }
 364  364  
 365  365          if (!skip_lock)
 366  366                  error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
 367  367  
 368  368          if (serialize && (frcmd & SLPFLCK) != 0)
 369  369                  flk_del_callback(&serialize_callback);
 370  370  
 371  371  done:
 372  372          if (serialize)
 373  373                  nbl_end_crit(vp);
 374  374  
 375  375          return (error);
 376  376  }
 377  377  
 378  378  /*
 379  379   * Callback when a lock request blocks and we are serializing requests.  If
 380  380   * before sleeping, leave the critical region.  If after wakeup, reenter
 381  381   * the critical region.
 382  382   */
 383  383  
 384  384  static callb_cpr_t *
 385  385  frlock_serialize_blocked(flk_cb_when_t when, void *infop)
 386  386  {
 387  387          vnode_t *vp = (vnode_t *)infop;
 388  388  
 389  389          if (when == FLK_BEFORE_SLEEP)
 390  390                  nbl_end_crit(vp);
 391  391          else {
 392  392                  nbl_start_crit(vp, RW_WRITER);
 393  393          }
 394  394  
 395  395          return (NULL);
 396  396  }
 397  397  
 398  398  /*
 399  399   * Allow any flags.
 400  400   */
 401  401  /* ARGSUSED */
 402  402  int
 403  403  fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
 404  404  {
 405  405          return (0);
 406  406  }
 407  407  
 408  408  /*

↓ open down ↓

370 lines elided

↑ open up ↑

 409  409   * Return the answer requested to poll() for non-device files.
 410  410   * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
 411  411   */
 412  412  struct pollhead fs_pollhd;
 413  413  
 414  414  /* ARGSUSED */
 415  415  int
 416  416  fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
 417  417      struct pollhead **phpp, caller_context_t *ct)
 418  418  {
      419 +        /*
      420 +         * Reject all attempts for edge-triggered polling.  These should only
      421 +         * occur when regular files are added to a /dev/poll handle which is in
      422 +         * epoll mode.  The Linux epoll does not allow epoll-ing on regular
      423 +         * files at all, so rejecting EPOLLET requests is congruent with those
      424 +         * expectations.
      425 +         */
      426 +        if (events & POLLET) {
      427 +                return (EPERM);
      428 +        }
      429 +
 419  430          *reventsp = 0;
 420  431          if (events & POLLIN)
 421  432                  *reventsp |= POLLIN;
 422  433          if (events & POLLRDNORM)
 423  434                  *reventsp |= POLLRDNORM;
 424  435          if (events & POLLRDBAND)
 425  436                  *reventsp |= POLLRDBAND;
 426  437          if (events & POLLOUT)
 427  438                  *reventsp |= POLLOUT;
 428  439          if (events & POLLWRBAND)
 429  440                  *reventsp |= POLLWRBAND;
 430      -        *phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
      441 +        /*
      442 +         * Emitting a pollhead without the intention of issuing pollwakeup()
      443 +         * calls against it is a recipe for trouble.  It's only acceptable in
      444 +         * this case since the above logic matches practically all useful
      445 +         * events.
      446 +         */
      447 +        if (*reventsp == 0 && !anyyet) {
      448 +                *phpp = &fs_pollhd;
      449 +        }
 431  450          return (0);
 432  451  }
 433  452  
 434  453  /*
 435  454   * POSIX pathconf() support.
 436  455   */
 437  456  /* ARGSUSED */
 438  457  int
 439  458  fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 440  459      caller_context_t *ct)

 441  460  {
 442  461          ulong_t val;
 443  462          int error = 0;
 444  463          struct statvfs64 vfsbuf;
 445  464  
 446  465          switch (cmd) {
 447  466  
 448  467          case _PC_LINK_MAX:
 449  468                  val = MAXLINK;
 450  469                  break;
 451  470  
 452  471          case _PC_MAX_CANON:
 453  472                  val = MAX_CANON;
 454  473                  break;
 455  474  
 456  475          case _PC_MAX_INPUT:
 457  476                  val = MAX_INPUT;
 458  477                  break;
 459  478  
 460  479          case _PC_NAME_MAX:
 461  480                  bzero(&vfsbuf, sizeof (vfsbuf));
 462  481                  if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
 463  482                          break;
 464  483                  val = vfsbuf.f_namemax;
 465  484                  break;
 466  485  
 467  486          case _PC_PATH_MAX:
 468  487          case _PC_SYMLINK_MAX:
 469  488                  val = MAXPATHLEN;
 470  489                  break;
 471  490  
 472  491          case _PC_PIPE_BUF:
 473  492                  val = PIPE_BUF;
 474  493                  break;
 475  494  
 476  495          case _PC_NO_TRUNC:
 477  496                  if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
 478  497                          val = 1;        /* NOTRUNC is enabled for vp */
 479  498                  else
 480  499                          val = (ulong_t)-1;
 481  500                  break;
 482  501  
 483  502          case _PC_VDISABLE:
 484  503                  val = _POSIX_VDISABLE;
 485  504                  break;
 486  505  
 487  506          case _PC_CHOWN_RESTRICTED:
 488  507                  if (rstchown)
 489  508                          val = rstchown; /* chown restricted enabled */
 490  509                  else
 491  510                          val = (ulong_t)-1;
 492  511                  break;
 493  512  
 494  513          case _PC_FILESIZEBITS:
 495  514  
 496  515                  /*
 497  516                   * If ever we come here it means that underlying file system
 498  517                   * does not recognise the command and therefore this
 499  518                   * configurable limit cannot be determined. We return -1
 500  519                   * and don't change errno.
 501  520                   */
 502  521  
 503  522                  val = (ulong_t)-1;    /* large file support */
 504  523                  break;
 505  524  
 506  525          case _PC_ACL_ENABLED:
 507  526                  val = 0;
 508  527                  break;
 509  528  
 510  529          case _PC_CASE_BEHAVIOR:
 511  530                  val = _CASE_SENSITIVE;
 512  531                  if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
 513  532                          val |= _CASE_INSENSITIVE;
 514  533                  if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
 515  534                          val &= ~_CASE_SENSITIVE;
 516  535                  break;
 517  536  
 518  537          case _PC_SATTR_ENABLED:
 519  538          case _PC_SATTR_EXISTS:
 520  539                  val = 0;
 521  540                  break;
 522  541  
 523  542          case _PC_ACCESS_FILTERING:
 524  543                  val = 0;
 525  544                  break;
 526  545  
 527  546          default:
 528  547                  error = EINVAL;
 529  548                  break;
 530  549          }
 531  550  
 532  551          if (error == 0)
 533  552                  *valp = val;
 534  553          return (error);
 535  554  }
 536  555  
 537  556  /*
 538  557   * Dispose of a page.
 539  558   */
 540  559  /* ARGSUSED */
 541  560  void
 542  561  fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
 543  562      caller_context_t *ct)
 544  563  {
 545  564  
 546  565          ASSERT(fl == B_FREE || fl == B_INVAL);
 547  566  
 548  567          if (fl == B_FREE)
 549  568                  page_free(pp, dn);
 550  569          else
 551  570                  page_destroy(pp, dn);
 552  571  }
 553  572  
 554  573  /* ARGSUSED */
 555  574  void
 556  575  fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
 557  576      caller_context_t *ct)
 558  577  {
 559  578          cmn_err(CE_PANIC, "fs_nodispose invoked");
 560  579  }
 561  580  
 562  581  /*
 563  582   * fabricate acls for file systems that do not support acls.
 564  583   */
 565  584  /* ARGSUSED */
 566  585  int
 567  586  fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
 568  587      caller_context_t *ct)
 569  588  {
 570  589          aclent_t        *aclentp;
 571  590          struct vattr    vattr;
 572  591          int             error;
 573  592          size_t          aclsize;
 574  593  
 575  594          vsecattr->vsa_aclcnt    = 0;
 576  595          vsecattr->vsa_aclentsz  = 0;
 577  596          vsecattr->vsa_aclentp   = NULL;
 578  597          vsecattr->vsa_dfaclcnt  = 0;    /* Default ACLs are not fabricated */
 579  598          vsecattr->vsa_dfaclentp = NULL;
 580  599  
 581  600          vattr.va_mask = AT_MODE | AT_UID | AT_GID;
 582  601          if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
 583  602                  return (error);
 584  603  
 585  604          if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
 586  605                  aclsize = 4 * sizeof (aclent_t);
 587  606                  vsecattr->vsa_aclcnt    = 4; /* USER, GROUP, OTHER, and CLASS */
 588  607                  vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
 589  608                  aclentp = vsecattr->vsa_aclentp;
 590  609  
 591  610                  aclentp->a_type = USER_OBJ;     /* Owner */
 592  611                  aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
 593  612                  aclentp->a_id = vattr.va_uid;   /* Really undefined */
 594  613                  aclentp++;
 595  614  
 596  615                  aclentp->a_type = GROUP_OBJ;    /* Group */
 597  616                  aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
 598  617                  aclentp->a_id = vattr.va_gid;   /* Really undefined */
 599  618                  aclentp++;
 600  619  
 601  620                  aclentp->a_type = OTHER_OBJ;    /* Other */
 602  621                  aclentp->a_perm = vattr.va_mode & 0007;
 603  622                  aclentp->a_id = (gid_t)-1;      /* Really undefined */
 604  623                  aclentp++;
 605  624  
 606  625                  aclentp->a_type = CLASS_OBJ;    /* Class */
 607  626                  aclentp->a_perm = (ushort_t)(0007);
 608  627                  aclentp->a_id = (gid_t)-1;      /* Really undefined */
 609  628          } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
 610  629                  VERIFY(0 == acl_trivial_create(vattr.va_mode,
 611  630                      (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
 612  631                      &vsecattr->vsa_aclcnt));
 613  632                  vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
 614  633          }
 615  634  
 616  635          return (error);
 617  636  }
 618  637  
 619  638  /*
 620  639   * Common code for implementing DOS share reservations
 621  640   */
 622  641  /* ARGSUSED4 */
 623  642  int
 624  643  fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
 625  644      caller_context_t *ct)
 626  645  {
 627  646          int error;
 628  647  
 629  648          /*
 630  649           * Make sure that the file was opened with permissions appropriate
 631  650           * for the request, and make sure the caller isn't trying to sneak
 632  651           * in an NBMAND request.
 633  652           */
 634  653          if (cmd == F_SHARE) {
 635  654                  if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
 636  655                      ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
 637  656                          return (EBADF);
 638  657                  if (shr->s_access & (F_RMACC | F_MDACC))
 639  658                          return (EINVAL);
 640  659                  if (shr->s_deny & (F_MANDDNY | F_RMDNY))
 641  660                          return (EINVAL);
 642  661          }
 643  662          if (cmd == F_SHARE_NBMAND) {
 644  663                  /* make sure nbmand is allowed on the file */
 645  664                  if (!vp->v_vfsp ||
 646  665                      !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
 647  666                          return (EINVAL);
 648  667                  }
 649  668                  if (vp->v_type != VREG) {
 650  669                          return (EINVAL);
 651  670                  }
 652  671          }
 653  672  
 654  673          nbl_start_crit(vp, RW_WRITER);
 655  674  
 656  675          switch (cmd) {
 657  676  
 658  677          case F_SHARE_NBMAND:
 659  678                  shr->s_deny |= F_MANDDNY;
 660  679                  /*FALLTHROUGH*/
 661  680          case F_SHARE:
 662  681                  error = add_share(vp, shr);
 663  682                  break;
 664  683  
 665  684          case F_UNSHARE:
 666  685                  error = del_share(vp, shr);
 667  686                  break;
 668  687  
 669  688          case F_HASREMOTELOCKS:
 670  689                  /*
 671  690                   * We are overloading this command to refer to remote
 672  691                   * shares as well as remote locks, despite its name.
 673  692                   */
 674  693                  shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
 675  694                  error = 0;
 676  695                  break;
 677  696  
 678  697          default:
 679  698                  error = EINVAL;
 680  699                  break;
 681  700          }
 682  701  
 683  702          nbl_end_crit(vp);
 684  703          return (error);
 685  704  }
 686  705  
 687  706  /*ARGSUSED1*/
 688  707  int
 689  708  fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
 690  709      caller_context_t *ct)
 691  710  {
 692  711          ASSERT(vp != NULL);
 693  712          return (ENOTSUP);
 694  713  }
 695  714  
 696  715  /*ARGSUSED1*/
 697  716  int
 698  717  fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
 699  718      caller_context_t *ct)
 700  719  {
 701  720          ASSERT(vp != NULL);
 702  721          return (0);
 703  722  }
 704  723  
 705  724  /*
 706  725   * return 1 for non-trivial ACL.
 707  726   *
 708  727   * NB: It is not necessary for the caller to VOP_RWLOCK since
 709  728   *      we only issue VOP_GETSECATTR.
 710  729   *
 711  730   * Returns 0 == trivial
 712  731   *         1 == NOT Trivial
 713  732   *         <0 could not determine.
 714  733   */
 715  734  int
 716  735  fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
 717  736  {
 718  737          ulong_t         acl_styles;
 719  738          ulong_t         acl_flavor;
 720  739          vsecattr_t      vsecattr;
 721  740          int             error;
 722  741          int             isnontrivial;
 723  742  
 724  743          /* determine the forms of ACLs maintained */
 725  744          error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
 726  745  
 727  746          /* clear bits we don't understand and establish default acl_style */
 728  747          acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
 729  748          if (error || (acl_styles == 0))
 730  749                  acl_styles = _ACL_ACLENT_ENABLED;
 731  750  
 732  751          vsecattr.vsa_aclentp = NULL;
 733  752          vsecattr.vsa_dfaclentp = NULL;
 734  753          vsecattr.vsa_aclcnt = 0;
 735  754          vsecattr.vsa_dfaclcnt = 0;
 736  755  
 737  756          while (acl_styles) {
 738  757                  /* select one of the styles as current flavor */
 739  758                  acl_flavor = 0;
 740  759                  if (acl_styles & _ACL_ACLENT_ENABLED) {
 741  760                          acl_flavor = _ACL_ACLENT_ENABLED;
 742  761                          vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
 743  762                  } else if (acl_styles & _ACL_ACE_ENABLED) {
 744  763                          acl_flavor = _ACL_ACE_ENABLED;
 745  764                          vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
 746  765                  }
 747  766  
 748  767                  ASSERT(vsecattr.vsa_mask && acl_flavor);
 749  768                  error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
 750  769                  if (error == 0)
 751  770                          break;
 752  771  
 753  772                  /* that flavor failed */
 754  773                  acl_styles &= ~acl_flavor;
 755  774          }
 756  775  
 757  776          /* if all styles fail then assume trivial */
 758  777          if (acl_styles == 0)
 759  778                  return (0);
 760  779  
 761  780          /* process the flavor that worked */
 762  781          isnontrivial = 0;
 763  782          if (acl_flavor & _ACL_ACLENT_ENABLED) {
 764  783                  if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
 765  784                          isnontrivial = 1;
 766  785                  if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
 767  786                          kmem_free(vsecattr.vsa_aclentp,
 768  787                              vsecattr.vsa_aclcnt * sizeof (aclent_t));
 769  788                  if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
 770  789                          kmem_free(vsecattr.vsa_dfaclentp,
 771  790                              vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
 772  791          }
 773  792          if (acl_flavor & _ACL_ACE_ENABLED) {
 774  793                  isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
 775  794                      vsecattr.vsa_aclcnt);
 776  795  
 777  796                  if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
 778  797                          kmem_free(vsecattr.vsa_aclentp,
 779  798                              vsecattr.vsa_aclcnt * sizeof (ace_t));
 780  799                  /* ACE has no vsecattr.vsa_dfaclcnt */
 781  800          }
 782  801          return (isnontrivial);
 783  802  }
 784  803  
 785  804  /*
 786  805   * Check whether we need a retry to recover from STALE error.
 787  806   */
 788  807  int
 789  808  fs_need_estale_retry(int retry_count)
 790  809  {
 791  810          if (retry_count < fs_estale_retry)
 792  811                  return (1);
 793  812          else
 794  813                  return (0);
 795  814  }
 796  815  
 797  816  
 798  817  static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
 799  818  
 800  819  /*
 801  820   * Routine for anti-virus scanner to call to register its scanning routine.
 802  821   */
 803  822  void
 804  823  fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
 805  824  {
 806  825          fs_av_scan = av_scan;
 807  826  }
 808  827  
 809  828  /*
 810  829   * Routine for file systems to call to initiate anti-virus scanning.
 811  830   * Scanning will only be done on REGular files (currently).
 812  831   */
 813  832  int
 814  833  fs_vscan(vnode_t *vp, cred_t *cr, int async)
 815  834  {
 816  835          int ret = 0;
 817  836  
 818  837          if (fs_av_scan && vp->v_type == VREG)
 819  838                  ret = (*fs_av_scan)(vp, cr, async);
 820  839  
 821  840          return (ret);
 822  841  }
 823  842  
 824  843  /*
 825  844   * support functions for reparse point
 826  845   */
 827  846  /*
 828  847   * reparse_vnode_parse
 829  848   *
 830  849   * Read the symlink data of a reparse point specified by the vnode
 831  850   * and return the reparse data as name-value pair in the nvlist.
 832  851   */
 833  852  int
 834  853  reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
 835  854  {
 836  855          int err;
 837  856          char *lkdata;
 838  857          struct uio uio;
 839  858          struct iovec iov;
 840  859  
 841  860          if (vp == NULL || nvl == NULL)
 842  861                  return (EINVAL);
 843  862  
 844  863          lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
 845  864  
 846  865          /*
 847  866           * Set up io vector to read sym link data
 848  867           */
 849  868          iov.iov_base = lkdata;
 850  869          iov.iov_len = MAXREPARSELEN;
 851  870          uio.uio_iov = &iov;
 852  871          uio.uio_iovcnt = 1;
 853  872          uio.uio_segflg = UIO_SYSSPACE;
 854  873          uio.uio_extflg = UIO_COPY_CACHED;
 855  874          uio.uio_loffset = (offset_t)0;
 856  875          uio.uio_resid = MAXREPARSELEN;
 857  876  
 858  877          if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
 859  878                  *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
 860  879                  err = reparse_parse(lkdata, nvl);
 861  880          }
 862  881          kmem_free(lkdata, MAXREPARSELEN);       /* done with lkdata */
 863  882  
 864  883          return (err);
 865  884  }
 866  885  
 867  886  void
 868  887  reparse_point_init()
 869  888  {
 870  889          mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
 871  890  }
 872  891  
 873  892  static door_handle_t
 874  893  reparse_door_get_handle()
 875  894  {
 876  895          door_handle_t dh;
 877  896  
 878  897          mutex_enter(&reparsed_door_lock);
 879  898          if ((dh = reparsed_door) == NULL) {
 880  899                  if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
 881  900                          reparsed_door = NULL;
 882  901                          dh = NULL;
 883  902                  } else
 884  903                          dh = reparsed_door;
 885  904          }
 886  905          mutex_exit(&reparsed_door_lock);
 887  906          return (dh);
 888  907  }
 889  908  
 890  909  static void
 891  910  reparse_door_reset_handle()
 892  911  {
 893  912          mutex_enter(&reparsed_door_lock);
 894  913          reparsed_door = NULL;
 895  914          mutex_exit(&reparsed_door_lock);
 896  915  }
 897  916  
 898  917  /*
 899  918   * reparse_kderef
 900  919   *
 901  920   * Accepts the service-specific item from the reparse point and returns
 902  921   * the service-specific data requested.  The caller specifies the size of
 903  922   * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
 904  923   * if the results will not fit in the buffer, in which case, *bufsz will
 905  924   * contain the number of bytes needed to hold the results.
 906  925   *
 907  926   * if ok return 0 and update *bufsize with length of actual result
 908  927   * else return error code.
 909  928   */
 910  929  int
 911  930  reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
 912  931      size_t *bufsize)
 913  932  {
 914  933          int err, retries, need_free, retried_doorhd;
 915  934          size_t dlen, res_len;
 916  935          char *darg;
 917  936          door_arg_t door_args;
 918  937          reparsed_door_res_t *resp;
 919  938          door_handle_t rp_door;
 920  939  
 921  940          if (svc_type == NULL || svc_data == NULL || buf == NULL ||
 922  941              bufsize == NULL)
 923  942                  return (EINVAL);
 924  943  
 925  944          /* get reparsed's door handle */
 926  945          if ((rp_door = reparse_door_get_handle()) == NULL)
 927  946                  return (EBADF);
 928  947  
 929  948          /* setup buffer for door_call args and results */
 930  949          dlen = strlen(svc_type) + strlen(svc_data) + 2;
 931  950          if (*bufsize < dlen) {
 932  951                  darg = kmem_alloc(dlen, KM_SLEEP);
 933  952                  need_free = 1;
 934  953          } else {
 935  954                  darg = buf;     /* use same buffer for door's args & results */
 936  955                  need_free = 0;
 937  956          }
 938  957  
 939  958          /* build argument string of door call */
 940  959          (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
 941  960  
 942  961          /* setup args for door call */
 943  962          door_args.data_ptr = darg;
 944  963          door_args.data_size = dlen;
 945  964          door_args.desc_ptr = NULL;
 946  965          door_args.desc_num = 0;
 947  966          door_args.rbuf = buf;
 948  967          door_args.rsize = *bufsize;
 949  968  
 950  969          /* do the door_call */
 951  970          retried_doorhd = 0;
 952  971          retries = 0;
 953  972          door_ki_hold(rp_door);
 954  973          while ((err = door_ki_upcall_limited(rp_door, &door_args,
 955  974              NULL, SIZE_MAX, 0)) != 0) {
 956  975                  if (err == EAGAIN || err == EINTR) {
 957  976                          if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
 958  977                                  delay(SEC_TO_TICK(1));
 959  978                                  continue;
 960  979                          }
 961  980                  } else if (err == EBADF) {
 962  981                          /* door server goes away... */
 963  982                          reparse_door_reset_handle();
 964  983  
 965  984                          if (retried_doorhd == 0) {
 966  985                                  door_ki_rele(rp_door);
 967  986                                  retried_doorhd++;
 968  987                                  rp_door = reparse_door_get_handle();
 969  988                                  if (rp_door != NULL) {
 970  989                                          door_ki_hold(rp_door);
 971  990                                          continue;
 972  991                                  }
 973  992                          }
 974  993                  }
 975  994                  break;
 976  995          }
 977  996  
 978  997          if (rp_door)
 979  998                  door_ki_rele(rp_door);
 980  999  
 981 1000          if (need_free)
 982 1001                  kmem_free(darg, dlen);          /* done with args buffer */
 983 1002  
 984 1003          if (err != 0)
 985 1004                  return (err);
 986 1005  
 987 1006          resp = (reparsed_door_res_t *)door_args.rbuf;
 988 1007          if ((err = resp->res_status) == 0) {
 989 1008                  /*
 990 1009                   * have to save the length of the results before the
 991 1010                   * bcopy below since it's can be an overlap copy that
 992 1011                   * overwrites the reparsed_door_res_t structure at
 993 1012                   * the beginning of the buffer.
 994 1013                   */
 995 1014                  res_len = (size_t)resp->res_len;
 996 1015  
 997 1016                  /* deref call is ok */
 998 1017                  if (res_len > *bufsize)
 999 1018                          err = EOVERFLOW;
1000 1019                  else
1001 1020                          bcopy(resp->res_data, buf, res_len);
1002 1021                  *bufsize = res_len;
1003 1022          }
1004 1023          if (door_args.rbuf != buf)
1005 1024                  kmem_free(door_args.rbuf, door_args.rsize);
1006 1025  
1007 1026          return (err);
1008 1027  }

↓ open down ↓

568 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX