1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  28  * Copyright 2015 Joyent, Inc.
  29  */
  30 
  31 /*
  32  * Generic vnode operations.
  33  */
  34 #include <sys/types.h>
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/errno.h>
  38 #include <sys/fcntl.h>
  39 #include <sys/flock.h>
  40 #include <sys/statvfs.h>
  41 #include <sys/vfs.h>
  42 #include <sys/vnode.h>
  43 #include <sys/proc.h>
  44 #include <sys/user.h>
  45 #include <sys/unistd.h>
  46 #include <sys/cred.h>
  47 #include <sys/poll.h>
  48 #include <sys/debug.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/stream.h>
  51 #include <fs/fs_subr.h>
  52 #include <fs/fs_reparse.h>
  53 #include <sys/door.h>
  54 #include <sys/acl.h>
  55 #include <sys/share.h>
  56 #include <sys/file.h>
  57 #include <sys/kmem.h>
  58 #include <sys/file.h>
  59 #include <sys/nbmlock.h>
  60 #include <acl/acl_common.h>
  61 #include <sys/pathname.h>
  62 
  63 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
  64 
  65 /*
  66  * Tunable to limit the number of retry to recover from STALE error.
  67  */
  68 int fs_estale_retry = 5;
  69 
  70 /*
  71  * supports for reparse point door upcall
  72  */
  73 static door_handle_t reparsed_door;
  74 static kmutex_t reparsed_door_lock;
  75 
  76 /*
  77  * The associated operation is not supported by the file system.
  78  */
  79 int
  80 fs_nosys()
  81 {
  82         return (ENOSYS);
  83 }
  84 
  85 /*
  86  * The associated operation is invalid (on this vnode).
  87  */
  88 int
  89 fs_inval()
  90 {
  91         return (EINVAL);
  92 }
  93 
  94 /*
  95  * The associated operation is valid only for directories.
  96  */
  97 int
  98 fs_notdir()
  99 {
 100         return (ENOTDIR);
 101 }
 102 
 103 /*
 104  * Free the file system specific resources. For the file systems that
 105  * do not support the forced unmount, it will be a nop function.
 106  */
 107 
 108 /*ARGSUSED*/
 109 void
 110 fs_freevfs(vfs_t *vfsp)
 111 {
 112 }
 113 
 114 /* ARGSUSED */
 115 int
 116 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
 117     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
 118     caller_context_t *ct)
 119 {
 120         return (ENOSYS);
 121 }
 122 
 123 /* ARGSUSED */
 124 int
 125 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
 126     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
 127     caller_context_t *ct)
 128 {
 129         return (ENOSYS);
 130 }
 131 
 132 /* ARGSUSED */
 133 int
 134 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
 135     struct pollhead **phpp, caller_context_t *ct)
 136 {
 137         return (ENOSYS);
 138 }
 139 
 140 
 141 /*
 142  * The file system has nothing to sync to disk.  However, the
 143  * VFS_SYNC operation must not fail.
 144  */
 145 /* ARGSUSED */
 146 int
 147 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
 148 {
 149         return (0);
 150 }
 151 
 152 /*
 153  * Does nothing but VOP_FSYNC must not fail.
 154  */
 155 /* ARGSUSED */
 156 int
 157 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
 158 {
 159         return (0);
 160 }
 161 
 162 /*
 163  * Does nothing but VOP_PUTPAGE must not fail.
 164  */
 165 /* ARGSUSED */
 166 int
 167 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
 168     caller_context_t *ctp)
 169 {
 170         return (0);
 171 }
 172 
 173 /*
 174  * Does nothing but VOP_IOCTL must not fail.
 175  */
 176 /* ARGSUSED */
 177 int
 178 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
 179     int *rvalp)
 180 {
 181         return (0);
 182 }
 183 
 184 /*
 185  * Read/write lock/unlock.  Does nothing.
 186  */
 187 /* ARGSUSED */
 188 int
 189 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
 190 {
 191         return (-1);
 192 }
 193 
 194 /* ARGSUSED */
 195 void
 196 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
 197 {
 198 }
 199 
 200 /*
 201  * Compare two vnodes.
 202  */
 203 /*ARGSUSED2*/
 204 int
 205 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
 206 {
 207         return (vp1 == vp2);
 208 }
 209 
 210 /*
 211  * No-op seek operation.
 212  */
 213 /* ARGSUSED */
 214 int
 215 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
 216 {
 217         return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
 218 }
 219 
 220 /*
 221  * File and record locking.
 222  */
 223 /* ARGSUSED */
 224 int
 225 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
 226     flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
 227 {
 228         int frcmd;
 229         int nlmid;
 230         int error = 0;
 231         boolean_t skip_lock = B_FALSE;
 232         flk_callback_t serialize_callback;
 233         int serialize = 0;
 234         v_mode_t mode;
 235 
 236         switch (cmd) {
 237 
 238         case F_GETLK:
 239         case F_O_GETLK:
 240                 if (flag & F_REMOTELOCK) {
 241                         frcmd = RCMDLCK;
 242                 } else if (flag & F_PXFSLOCK) {
 243                         frcmd = PCMDLCK;
 244                 } else {
 245                         frcmd = 0;
 246                         bfp->l_pid = ttoproc(curthread)->p_pid;
 247                         bfp->l_sysid = 0;
 248                 }
 249                 break;
 250 
 251         case F_OFD_GETLK:
 252                 /*
 253                  * TBD we do not support remote OFD locks at this time.
 254                  */
 255                 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
 256                         error = EINVAL;
 257                         goto done;
 258                 }
 259                 skip_lock = B_TRUE;
 260                 break;
 261 
 262         case F_SETLK_NBMAND:
 263                 /*
 264                  * Are NBMAND locks allowed on this file?
 265                  */
 266                 if (!vp->v_vfsp ||
 267                     !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
 268                         error = EINVAL;
 269                         goto done;
 270                 }
 271                 if (vp->v_type != VREG) {
 272                         error = EINVAL;
 273                         goto done;
 274                 }
 275                 /*FALLTHROUGH*/
 276 
 277         case F_SETLK:
 278                 if (flag & F_REMOTELOCK) {
 279                         frcmd = SETFLCK|RCMDLCK;
 280                 } else if (flag & F_PXFSLOCK) {
 281                         frcmd = SETFLCK|PCMDLCK;
 282                 } else {
 283                         frcmd = SETFLCK;
 284                         bfp->l_pid = ttoproc(curthread)->p_pid;
 285                         bfp->l_sysid = 0;
 286                 }
 287                 if (cmd == F_SETLK_NBMAND &&
 288                     (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
 289                         frcmd |= NBMLCK;
 290                 }
 291 
 292                 if (nbl_need_check(vp)) {
 293                         nbl_start_crit(vp, RW_WRITER);
 294                         serialize = 1;
 295                         if (frcmd & NBMLCK) {
 296                                 mode = (bfp->l_type == F_RDLCK) ?
 297                                     V_READ : V_RDANDWR;
 298                                 if (vn_is_mapped(vp, mode)) {
 299                                         error = EAGAIN;
 300                                         goto done;
 301                                 }
 302                         }
 303                 }
 304                 break;
 305 
 306         case F_SETLKW:
 307                 if (flag & F_REMOTELOCK) {
 308                         frcmd = SETFLCK|SLPFLCK|RCMDLCK;
 309                 } else if (flag & F_PXFSLOCK) {
 310                         frcmd = SETFLCK|SLPFLCK|PCMDLCK;
 311                 } else {
 312                         frcmd = SETFLCK|SLPFLCK;
 313                         bfp->l_pid = ttoproc(curthread)->p_pid;
 314                         bfp->l_sysid = 0;
 315                 }
 316 
 317                 if (nbl_need_check(vp)) {
 318                         nbl_start_crit(vp, RW_WRITER);
 319                         serialize = 1;
 320                 }
 321                 break;
 322 
 323         case F_OFD_SETLK:
 324         case F_OFD_SETLKW:
 325         case F_FLOCK:
 326         case F_FLOCKW:
 327                 /*
 328                  * TBD we do not support remote OFD locks at this time.
 329                  */
 330                 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
 331                         error = EINVAL;
 332                         goto done;
 333                 }
 334                 skip_lock = B_TRUE;
 335                 break;
 336 
 337         case F_HASREMOTELOCKS:
 338                 nlmid = GETNLMID(bfp->l_sysid);
 339                 if (nlmid != 0) {       /* booted as a cluster */
 340                         l_has_rmt(bfp) =
 341                             cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
 342                 } else {                /* not booted as a cluster */
 343                         l_has_rmt(bfp) = flk_has_remote_locks(vp);
 344                 }
 345 
 346                 goto done;
 347 
 348         default:
 349                 error = EINVAL;
 350                 goto done;
 351         }
 352 
 353         /*
 354          * If this is a blocking lock request and we're serializing lock
 355          * requests, modify the callback list to leave the critical region
 356          * while we're waiting for the lock.
 357          */
 358 
 359         if (serialize && (frcmd & SLPFLCK) != 0) {
 360                 flk_add_callback(&serialize_callback,
 361                     frlock_serialize_blocked, vp, flk_cbp);
 362                 flk_cbp = &serialize_callback;
 363         }
 364 
 365         if (!skip_lock)
 366                 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
 367 
 368         if (serialize && (frcmd & SLPFLCK) != 0)
 369                 flk_del_callback(&serialize_callback);
 370 
 371 done:
 372         if (serialize)
 373                 nbl_end_crit(vp);
 374 
 375         return (error);
 376 }
 377 
 378 /*
 379  * Callback when a lock request blocks and we are serializing requests.  If
 380  * before sleeping, leave the critical region.  If after wakeup, reenter
 381  * the critical region.
 382  */
 383 
 384 static callb_cpr_t *
 385 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
 386 {
 387         vnode_t *vp = (vnode_t *)infop;
 388 
 389         if (when == FLK_BEFORE_SLEEP)
 390                 nbl_end_crit(vp);
 391         else {
 392                 nbl_start_crit(vp, RW_WRITER);
 393         }
 394 
 395         return (NULL);
 396 }
 397 
 398 /*
 399  * Allow any flags.
 400  */
 401 /* ARGSUSED */
 402 int
 403 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
 404 {
 405         return (0);
 406 }
 407 
 408 /*
 409  * Return the answer requested to poll() for non-device files.
 410  * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
 411  */
 412 struct pollhead fs_pollhd;
 413 
 414 /* ARGSUSED */
 415 int
 416 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
 417     struct pollhead **phpp, caller_context_t *ct)
 418 {
 419         *reventsp = 0;
 420         if (events & POLLIN)
 421                 *reventsp |= POLLIN;
 422         if (events & POLLRDNORM)
 423                 *reventsp |= POLLRDNORM;
 424         if (events & POLLRDBAND)
 425                 *reventsp |= POLLRDBAND;
 426         if (events & POLLOUT)
 427                 *reventsp |= POLLOUT;
 428         if (events & POLLWRBAND)
 429                 *reventsp |= POLLWRBAND;
 430         *phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
 431         return (0);
 432 }
 433 
 434 /*
 435  * POSIX pathconf() support.
 436  */
 437 /* ARGSUSED */
 438 int
 439 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 440     caller_context_t *ct)
 441 {
 442         ulong_t val;
 443         int error = 0;
 444         struct statvfs64 vfsbuf;
 445 
 446         switch (cmd) {
 447 
 448         case _PC_LINK_MAX:
 449                 val = MAXLINK;
 450                 break;
 451 
 452         case _PC_MAX_CANON:
 453                 val = MAX_CANON;
 454                 break;
 455 
 456         case _PC_MAX_INPUT:
 457                 val = MAX_INPUT;
 458                 break;
 459 
 460         case _PC_NAME_MAX:
 461                 bzero(&vfsbuf, sizeof (vfsbuf));
 462                 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
 463                         break;
 464                 val = vfsbuf.f_namemax;
 465                 break;
 466 
 467         case _PC_PATH_MAX:
 468         case _PC_SYMLINK_MAX:
 469                 val = MAXPATHLEN;
 470                 break;
 471 
 472         case _PC_PIPE_BUF:
 473                 val = PIPE_BUF;
 474                 break;
 475 
 476         case _PC_NO_TRUNC:
 477                 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
 478                         val = 1;        /* NOTRUNC is enabled for vp */
 479                 else
 480                         val = (ulong_t)-1;
 481                 break;
 482 
 483         case _PC_VDISABLE:
 484                 val = _POSIX_VDISABLE;
 485                 break;
 486 
 487         case _PC_CHOWN_RESTRICTED:
 488                 if (rstchown)
 489                         val = rstchown; /* chown restricted enabled */
 490                 else
 491                         val = (ulong_t)-1;
 492                 break;
 493 
 494         case _PC_FILESIZEBITS:
 495 
 496                 /*
 497                  * If ever we come here it means that underlying file system
 498                  * does not recognise the command and therefore this
 499                  * configurable limit cannot be determined. We return -1
 500                  * and don't change errno.
 501                  */
 502 
 503                 val = (ulong_t)-1;    /* large file support */
 504                 break;
 505 
 506         case _PC_ACL_ENABLED:
 507                 val = 0;
 508                 break;
 509 
 510         case _PC_CASE_BEHAVIOR:
 511                 val = _CASE_SENSITIVE;
 512                 if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
 513                         val |= _CASE_INSENSITIVE;
 514                 if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
 515                         val &= ~_CASE_SENSITIVE;
 516                 break;
 517 
 518         case _PC_SATTR_ENABLED:
 519         case _PC_SATTR_EXISTS:
 520                 val = 0;
 521                 break;
 522 
 523         case _PC_ACCESS_FILTERING:
 524                 val = 0;
 525                 break;
 526 
 527         default:
 528                 error = EINVAL;
 529                 break;
 530         }
 531 
 532         if (error == 0)
 533                 *valp = val;
 534         return (error);
 535 }
 536 
 537 /*
 538  * Dispose of a page.
 539  */
 540 /* ARGSUSED */
 541 void
 542 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
 543     caller_context_t *ct)
 544 {
 545 
 546         ASSERT(fl == B_FREE || fl == B_INVAL);
 547 
 548         if (fl == B_FREE)
 549                 page_free(pp, dn);
 550         else
 551                 page_destroy(pp, dn);
 552 }
 553 
 554 /* ARGSUSED */
 555 void
 556 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
 557     caller_context_t *ct)
 558 {
 559         cmn_err(CE_PANIC, "fs_nodispose invoked");
 560 }
 561 
 562 /*
 563  * fabricate acls for file systems that do not support acls.
 564  */
 565 /* ARGSUSED */
 566 int
 567 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
 568     caller_context_t *ct)
 569 {
 570         aclent_t        *aclentp;
 571         struct vattr    vattr;
 572         int             error;
 573         size_t          aclsize;
 574 
 575         vsecattr->vsa_aclcnt = 0;
 576         vsecattr->vsa_aclentsz       = 0;
 577         vsecattr->vsa_aclentp        = NULL;
 578         vsecattr->vsa_dfaclcnt       = 0;    /* Default ACLs are not fabricated */
 579         vsecattr->vsa_dfaclentp      = NULL;
 580 
 581         vattr.va_mask = AT_MODE | AT_UID | AT_GID;
 582         if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
 583                 return (error);
 584 
 585         if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
 586                 aclsize = 4 * sizeof (aclent_t);
 587                 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */
 588                 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
 589                 aclentp = vsecattr->vsa_aclentp;
 590 
 591                 aclentp->a_type = USER_OBJ;  /* Owner */
 592                 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
 593                 aclentp->a_id = vattr.va_uid;   /* Really undefined */
 594                 aclentp++;
 595 
 596                 aclentp->a_type = GROUP_OBJ;    /* Group */
 597                 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
 598                 aclentp->a_id = vattr.va_gid;   /* Really undefined */
 599                 aclentp++;
 600 
 601                 aclentp->a_type = OTHER_OBJ;    /* Other */
 602                 aclentp->a_perm = vattr.va_mode & 0007;
 603                 aclentp->a_id = (gid_t)-1;   /* Really undefined */
 604                 aclentp++;
 605 
 606                 aclentp->a_type = CLASS_OBJ;    /* Class */
 607                 aclentp->a_perm = (ushort_t)(0007);
 608                 aclentp->a_id = (gid_t)-1;   /* Really undefined */
 609         } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
 610                 VERIFY(0 == acl_trivial_create(vattr.va_mode,
 611                     (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
 612                     &vsecattr->vsa_aclcnt));
 613                 vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
 614         }
 615 
 616         return (error);
 617 }
 618 
 619 /*
 620  * Common code for implementing DOS share reservations
 621  */
 622 /* ARGSUSED4 */
 623 int
 624 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
 625     caller_context_t *ct)
 626 {
 627         int error;
 628 
 629         /*
 630          * Make sure that the file was opened with permissions appropriate
 631          * for the request, and make sure the caller isn't trying to sneak
 632          * in an NBMAND request.
 633          */
 634         if (cmd == F_SHARE) {
 635                 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
 636                     ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
 637                         return (EBADF);
 638                 if (shr->s_access & (F_RMACC | F_MDACC))
 639                         return (EINVAL);
 640                 if (shr->s_deny & (F_MANDDNY | F_RMDNY))
 641                         return (EINVAL);
 642         }
 643         if (cmd == F_SHARE_NBMAND) {
 644                 /* make sure nbmand is allowed on the file */
 645                 if (!vp->v_vfsp ||
 646                     !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
 647                         return (EINVAL);
 648                 }
 649                 if (vp->v_type != VREG) {
 650                         return (EINVAL);
 651                 }
 652         }
 653 
 654         nbl_start_crit(vp, RW_WRITER);
 655 
 656         switch (cmd) {
 657 
 658         case F_SHARE_NBMAND:
 659                 shr->s_deny |= F_MANDDNY;
 660                 /*FALLTHROUGH*/
 661         case F_SHARE:
 662                 error = add_share(vp, shr);
 663                 break;
 664 
 665         case F_UNSHARE:
 666                 error = del_share(vp, shr);
 667                 break;
 668 
 669         case F_HASREMOTELOCKS:
 670                 /*
 671                  * We are overloading this command to refer to remote
 672                  * shares as well as remote locks, despite its name.
 673                  */
 674                 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
 675                 error = 0;
 676                 break;
 677 
 678         default:
 679                 error = EINVAL;
 680                 break;
 681         }
 682 
 683         nbl_end_crit(vp);
 684         return (error);
 685 }
 686 
 687 /*ARGSUSED1*/
 688 int
 689 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
 690     caller_context_t *ct)
 691 {
 692         ASSERT(vp != NULL);
 693         return (ENOTSUP);
 694 }
 695 
 696 /*ARGSUSED1*/
 697 int
 698 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
 699     caller_context_t *ct)
 700 {
 701         ASSERT(vp != NULL);
 702         return (0);
 703 }
 704 
 705 /*
 706  * return 1 for non-trivial ACL.
 707  *
 708  * NB: It is not necessary for the caller to VOP_RWLOCK since
 709  *      we only issue VOP_GETSECATTR.
 710  *
 711  * Returns 0 == trivial
 712  *         1 == NOT Trivial
 713  *         <0 could not determine.
 714  */
 715 int
 716 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
 717 {
 718         ulong_t         acl_styles;
 719         ulong_t         acl_flavor;
 720         vsecattr_t      vsecattr;
 721         int             error;
 722         int             isnontrivial;
 723 
 724         /* determine the forms of ACLs maintained */
 725         error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
 726 
 727         /* clear bits we don't understand and establish default acl_style */
 728         acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
 729         if (error || (acl_styles == 0))
 730                 acl_styles = _ACL_ACLENT_ENABLED;
 731 
 732         vsecattr.vsa_aclentp = NULL;
 733         vsecattr.vsa_dfaclentp = NULL;
 734         vsecattr.vsa_aclcnt = 0;
 735         vsecattr.vsa_dfaclcnt = 0;
 736 
 737         while (acl_styles) {
 738                 /* select one of the styles as current flavor */
 739                 acl_flavor = 0;
 740                 if (acl_styles & _ACL_ACLENT_ENABLED) {
 741                         acl_flavor = _ACL_ACLENT_ENABLED;
 742                         vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
 743                 } else if (acl_styles & _ACL_ACE_ENABLED) {
 744                         acl_flavor = _ACL_ACE_ENABLED;
 745                         vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
 746                 }
 747 
 748                 ASSERT(vsecattr.vsa_mask && acl_flavor);
 749                 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
 750                 if (error == 0)
 751                         break;
 752 
 753                 /* that flavor failed */
 754                 acl_styles &= ~acl_flavor;
 755         }
 756 
 757         /* if all styles fail then assume trivial */
 758         if (acl_styles == 0)
 759                 return (0);
 760 
 761         /* process the flavor that worked */
 762         isnontrivial = 0;
 763         if (acl_flavor & _ACL_ACLENT_ENABLED) {
 764                 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
 765                         isnontrivial = 1;
 766                 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
 767                         kmem_free(vsecattr.vsa_aclentp,
 768                             vsecattr.vsa_aclcnt * sizeof (aclent_t));
 769                 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
 770                         kmem_free(vsecattr.vsa_dfaclentp,
 771                             vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
 772         }
 773         if (acl_flavor & _ACL_ACE_ENABLED) {
 774                 isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
 775                     vsecattr.vsa_aclcnt);
 776 
 777                 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
 778                         kmem_free(vsecattr.vsa_aclentp,
 779                             vsecattr.vsa_aclcnt * sizeof (ace_t));
 780                 /* ACE has no vsecattr.vsa_dfaclcnt */
 781         }
 782         return (isnontrivial);
 783 }
 784 
 785 /*
 786  * Check whether we need a retry to recover from STALE error.
 787  */
 788 int
 789 fs_need_estale_retry(int retry_count)
 790 {
 791         if (retry_count < fs_estale_retry)
 792                 return (1);
 793         else
 794                 return (0);
 795 }
 796 
 797 
 798 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
 799 
 800 /*
 801  * Routine for anti-virus scanner to call to register its scanning routine.
 802  */
 803 void
 804 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
 805 {
 806         fs_av_scan = av_scan;
 807 }
 808 
 809 /*
 810  * Routine for file systems to call to initiate anti-virus scanning.
 811  * Scanning will only be done on REGular files (currently).
 812  */
 813 int
 814 fs_vscan(vnode_t *vp, cred_t *cr, int async)
 815 {
 816         int ret = 0;
 817 
 818         if (fs_av_scan && vp->v_type == VREG)
 819                 ret = (*fs_av_scan)(vp, cr, async);
 820 
 821         return (ret);
 822 }
 823 
 824 /*
 825  * support functions for reparse point
 826  */
 827 /*
 828  * reparse_vnode_parse
 829  *
 830  * Read the symlink data of a reparse point specified by the vnode
 831  * and return the reparse data as name-value pair in the nvlist.
 832  */
 833 int
 834 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
 835 {
 836         int err;
 837         char *lkdata;
 838         struct uio uio;
 839         struct iovec iov;
 840 
 841         if (vp == NULL || nvl == NULL)
 842                 return (EINVAL);
 843 
 844         lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
 845 
 846         /*
 847          * Set up io vector to read sym link data
 848          */
 849         iov.iov_base = lkdata;
 850         iov.iov_len = MAXREPARSELEN;
 851         uio.uio_iov = &iov;
 852         uio.uio_iovcnt = 1;
 853         uio.uio_segflg = UIO_SYSSPACE;
 854         uio.uio_extflg = UIO_COPY_CACHED;
 855         uio.uio_loffset = (offset_t)0;
 856         uio.uio_resid = MAXREPARSELEN;
 857 
 858         if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
 859                 *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
 860                 err = reparse_parse(lkdata, nvl);
 861         }
 862         kmem_free(lkdata, MAXREPARSELEN);       /* done with lkdata */
 863 
 864         return (err);
 865 }
 866 
 867 void
 868 reparse_point_init()
 869 {
 870         mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
 871 }
 872 
 873 static door_handle_t
 874 reparse_door_get_handle()
 875 {
 876         door_handle_t dh;
 877 
 878         mutex_enter(&reparsed_door_lock);
 879         if ((dh = reparsed_door) == NULL) {
 880                 if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
 881                         reparsed_door = NULL;
 882                         dh = NULL;
 883                 } else
 884                         dh = reparsed_door;
 885         }
 886         mutex_exit(&reparsed_door_lock);
 887         return (dh);
 888 }
 889 
 890 static void
 891 reparse_door_reset_handle()
 892 {
 893         mutex_enter(&reparsed_door_lock);
 894         reparsed_door = NULL;
 895         mutex_exit(&reparsed_door_lock);
 896 }
 897 
 898 /*
 899  * reparse_kderef
 900  *
 901  * Accepts the service-specific item from the reparse point and returns
 902  * the service-specific data requested.  The caller specifies the size of
 903  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
 904  * if the results will not fit in the buffer, in which case, *bufsz will
 905  * contain the number of bytes needed to hold the results.
 906  *
 907  * if ok return 0 and update *bufsize with length of actual result
 908  * else return error code.
 909  */
 910 int
 911 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
 912     size_t *bufsize)
 913 {
 914         int err, retries, need_free, retried_doorhd;
 915         size_t dlen, res_len;
 916         char *darg;
 917         door_arg_t door_args;
 918         reparsed_door_res_t *resp;
 919         door_handle_t rp_door;
 920 
 921         if (svc_type == NULL || svc_data == NULL || buf == NULL ||
 922             bufsize == NULL)
 923                 return (EINVAL);
 924 
 925         /* get reparsed's door handle */
 926         if ((rp_door = reparse_door_get_handle()) == NULL)
 927                 return (EBADF);
 928 
 929         /* setup buffer for door_call args and results */
 930         dlen = strlen(svc_type) + strlen(svc_data) + 2;
 931         if (*bufsize < dlen) {
 932                 darg = kmem_alloc(dlen, KM_SLEEP);
 933                 need_free = 1;
 934         } else {
 935                 darg = buf;     /* use same buffer for door's args & results */
 936                 need_free = 0;
 937         }
 938 
 939         /* build argument string of door call */
 940         (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
 941 
 942         /* setup args for door call */
 943         door_args.data_ptr = darg;
 944         door_args.data_size = dlen;
 945         door_args.desc_ptr = NULL;
 946         door_args.desc_num = 0;
 947         door_args.rbuf = buf;
 948         door_args.rsize = *bufsize;
 949 
 950         /* do the door_call */
 951         retried_doorhd = 0;
 952         retries = 0;
 953         door_ki_hold(rp_door);
 954         while ((err = door_ki_upcall_limited(rp_door, &door_args,
 955             NULL, SIZE_MAX, 0)) != 0) {
 956                 if (err == EAGAIN || err == EINTR) {
 957                         if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
 958                                 delay(SEC_TO_TICK(1));
 959                                 continue;
 960                         }
 961                 } else if (err == EBADF) {
 962                         /* door server goes away... */
 963                         reparse_door_reset_handle();
 964 
 965                         if (retried_doorhd == 0) {
 966                                 door_ki_rele(rp_door);
 967                                 retried_doorhd++;
 968                                 rp_door = reparse_door_get_handle();
 969                                 if (rp_door != NULL) {
 970                                         door_ki_hold(rp_door);
 971                                         continue;
 972                                 }
 973                         }
 974                 }
 975                 break;
 976         }
 977 
 978         if (rp_door)
 979                 door_ki_rele(rp_door);
 980 
 981         if (need_free)
 982                 kmem_free(darg, dlen);          /* done with args buffer */
 983 
 984         if (err != 0)
 985                 return (err);
 986 
 987         resp = (reparsed_door_res_t *)door_args.rbuf;
 988         if ((err = resp->res_status) == 0) {
 989                 /*
 990                  * have to save the length of the results before the
 991                  * bcopy below since it's can be an overlap copy that
 992                  * overwrites the reparsed_door_res_t structure at
 993                  * the beginning of the buffer.
 994                  */
 995                 res_len = (size_t)resp->res_len;
 996 
 997                 /* deref call is ok */
 998                 if (res_len > *bufsize)
 999                         err = EOVERFLOW;
1000                 else
1001                         bcopy(resp->res_data, buf, res_len);
1002                 *bufsize = res_len;
1003         }
1004         if (door_args.rbuf != buf)
1005                 kmem_free(door_args.rbuf, door_args.rsize);
1006 
1007         return (err);
1008 }