1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  14  */
  15 
  16 #include <sys/types.h>
  17 #include <sys/param.h>
  18 #include <sys/systm.h>
  19 #include <sys/t_lock.h>
  20 #include <sys/errno.h>
  21 #include <sys/cred.h>
  22 #include <sys/user.h>
  23 #include <sys/uio.h>
  24 #include <sys/file.h>
  25 #include <sys/pathname.h>
  26 #include <sys/vfs.h>
  27 #include <sys/vnode.h>
  28 #include <sys/stat.h>
  29 #include <sys/mode.h>
  30 #include <sys/kmem.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/debug.h>
  33 #include <sys/atomic.h>
  34 #include <sys/acl.h>
  35 #include <sys/filio.h>
  36 #include <sys/flock.h>
  37 #include <sys/nbmlock.h>
  38 #include <sys/fcntl.h>
  39 #include <sys/poll.h>
  40 #include <sys/time.h>
  41 
  42 #include <errno.h>
  43 #include <fcntl.h>
  44 #include <unistd.h>
  45 
  46 #include "vncache.h"
  47 
  48 #define O_RWMASK        (O_WRONLY | O_RDWR) /* == 3 */
  49 
  50 int fop_shrlock_enable = 0;
  51 
  52 int stat_to_vattr(const struct stat *, vattr_t *);
  53 int fop__getxvattr(vnode_t *, xvattr_t *);
  54 int fop__setxvattr(vnode_t *, xvattr_t *);
  55 
  56 static void fake_inactive_xattrdir(vnode_t *);
  57 
  58 /* ARGSUSED */
  59 int
  60 fop_open(
  61         vnode_t **vpp,
  62         int mode,
  63         cred_t *cr,
  64         caller_context_t *ct)
  65 {
  66 
  67         if ((*vpp)->v_type == VREG) {
  68                 if (mode & FREAD)
  69                         atomic_add_32(&((*vpp)->v_rdcnt), 1);
  70                 if (mode & FWRITE)
  71                         atomic_add_32(&((*vpp)->v_wrcnt), 1);
  72         }
  73 
  74         /* call to ->vop_open was here */
  75 
  76         return (0);
  77 }
  78 
  79 /* ARGSUSED */
  80 int
  81 fop_close(
  82         vnode_t *vp,
  83         int flag,
  84         int count,
  85         offset_t offset,
  86         cred_t *cr,
  87         caller_context_t *ct)
  88 {
  89 
  90         /* call to ->vop_close was here */
  91 
  92         /*
  93          * Check passed in count to handle possible dups. Vnode counts are only
  94          * kept on regular files
  95          */
  96         if ((vp->v_type == VREG) && (count == 1))  {
  97                 if (flag & FREAD) {
  98                         ASSERT(vp->v_rdcnt > 0);
  99                         atomic_add_32(&(vp->v_rdcnt), -1);
 100                 }
 101                 if (flag & FWRITE) {
 102                         ASSERT(vp->v_wrcnt > 0);
 103                         atomic_add_32(&(vp->v_wrcnt), -1);
 104                 }
 105         }
 106         return (0);
 107 }
 108 
 109 /* ARGSUSED */
 110 int
 111 fop_read(
 112         vnode_t *vp,
 113         uio_t *uio,
 114         int ioflag,
 115         cred_t *cr,
 116         caller_context_t *ct)
 117 {
 118         struct stat st;
 119         struct iovec *iov;
 120         ssize_t resid;
 121         size_t cnt;
 122         int n;
 123 
 124         /*
 125          * If that caller asks for read beyond end of file,
 126          * that causes the pread call to block.  (Ugh!)
 127          * Get the file size and return what we can.
 128          */
 129         (void) fstat(vp->v_fd, &st);
 130         resid = uio->uio_resid;
 131         if ((uio->uio_loffset + resid) > st.st_size)
 132                 resid = st.st_size - uio->uio_loffset;
 133 
 134         while (resid > 0) {
 135 
 136                 ASSERT(uio->uio_iovcnt > 0);
 137                 iov = uio->uio_iov;
 138 
 139                 if (iov->iov_len == 0) {
 140                         uio->uio_iov++;
 141                         uio->uio_iovcnt--;
 142                         continue;
 143                 }
 144                 cnt = iov->iov_len;
 145                 if (cnt > resid)
 146                         cnt = resid;
 147 
 148                 n = pread(vp->v_fd, iov->iov_base, cnt, uio->uio_loffset);
 149                 if (n < 0)
 150                         return (errno);
 151 
 152                 iov->iov_base += n;
 153                 iov->iov_len -= n;
 154 
 155                 uio->uio_resid -= n;
 156                 uio->uio_loffset += n;
 157 
 158                 resid -= n;
 159         }
 160 
 161         return (0);
 162 }
 163 
 164 /* ARGSUSED */
 165 int
 166 fop_write(
 167         vnode_t *vp,
 168         uio_t *uio,
 169         int ioflag,
 170         cred_t *cr,
 171         caller_context_t *ct)
 172 {
 173         struct iovec *iov;
 174         size_t cnt;
 175         int n;
 176 
 177         while (uio->uio_resid > 0) {
 178 
 179                 ASSERT(uio->uio_iovcnt > 0);
 180                 iov = uio->uio_iov;
 181 
 182                 if (iov->iov_len == 0) {
 183                         uio->uio_iov++;
 184                         uio->uio_iovcnt--;
 185                         continue;
 186                 }
 187                 cnt = iov->iov_len;
 188                 if (cnt > uio->uio_resid)
 189                         cnt = uio->uio_resid;
 190 
 191                 n = pwrite(vp->v_fd, iov->iov_base, iov->iov_len,
 192                     uio->uio_loffset);
 193                 if (n < 0)
 194                         return (errno);
 195 
 196                 iov->iov_base += n;
 197                 iov->iov_len -= n;
 198 
 199                 uio->uio_resid -= n;
 200                 uio->uio_loffset += n;
 201         }
 202 
 203         if (ioflag == FSYNC) {
 204                 (void) fsync(vp->v_fd);
 205         }
 206 
 207         return (0);
 208 }
 209 
 210 /* ARGSUSED */
 211 int
 212 fop_ioctl(
 213         vnode_t *vp,
 214         int cmd,
 215         intptr_t arg,
 216         int flag,
 217         cred_t *cr,
 218         int *rvalp,
 219         caller_context_t *ct)
 220 {
 221         off64_t off;
 222         int rv, whence;
 223 
 224         switch (cmd) {
 225         case _FIO_SEEK_DATA:
 226         case _FIO_SEEK_HOLE:
 227                 whence = (cmd == _FIO_SEEK_DATA) ? SEEK_DATA : SEEK_HOLE;
 228                 bcopy((void *)arg, &off, sizeof (off));
 229                 off = lseek(vp->v_fd, off, whence);
 230                 if (off == (off64_t)-1) {
 231                         rv = errno;
 232                 } else {
 233                         bcopy(&off, (void *)arg, sizeof (off));
 234                         rv = 0;
 235                 }
 236                 break;
 237 
 238         default:
 239                 rv = ENOTTY;
 240                 break;
 241         }
 242 
 243         return (rv);
 244 }
 245 
 246 /* ARGSUSED */
 247 int
 248 fop_setfl(
 249         vnode_t *vp,
 250         int oflags,
 251         int nflags,
 252         cred_t *cr,
 253         caller_context_t *ct)
 254 {
 255         /* allow any flags? See fs_setfl */
 256         return (0);
 257 }
 258 
 259 /* ARGSUSED */
 260 int
 261 fop_getattr(
 262         vnode_t *vp,
 263         vattr_t *vap,
 264         int flags,
 265         cred_t *cr,
 266         caller_context_t *ct)
 267 {
 268         int error;
 269         struct stat st;
 270 
 271         if (fstat(vp->v_fd, &st) == -1)
 272                 return (errno);
 273         error = stat_to_vattr(&st, vap);
 274 
 275         if (vap->va_mask & AT_XVATTR)
 276                 (void) fop__getxvattr(vp, (xvattr_t *)vap);
 277 
 278         return (error);
 279 }
 280 
 281 /* ARGSUSED */
 282 int
 283 fop_setattr(
 284         vnode_t *vp,
 285         vattr_t *vap,
 286         int flags,
 287         cred_t *cr,
 288         caller_context_t *ct)
 289 {
 290         timespec_t times[2];
 291         int err;
 292 
 293         if (vap->va_mask & AT_SIZE) {
 294                 if (ftruncate(vp->v_fd, vap->va_size) == -1) {
 295                         err = errno;
 296                         if (err == EBADF)
 297                                 err = EACCES;
 298                         return (err);
 299                 }
 300         }
 301 
 302         /* AT_MODE or anything else? */
 303 
 304         if (vap->va_mask & AT_XVATTR)
 305                 (void) fop__setxvattr(vp, (xvattr_t *)vap);
 306 
 307         if (vap->va_mask & (AT_ATIME | AT_MTIME)) {
 308                 if (vap->va_mask & AT_ATIME) {
 309                         times[0] = vap->va_atime;
 310                 } else {
 311                         times[0].tv_sec = 0;
 312                         times[0].tv_nsec = UTIME_OMIT;
 313                 }
 314                 if (vap->va_mask & AT_MTIME) {
 315                         times[1] = vap->va_mtime;
 316                 } else {
 317                         times[1].tv_sec = 0;
 318                         times[1].tv_nsec = UTIME_OMIT;
 319                 }
 320 
 321                 (void) futimens(vp->v_fd, times);
 322         }
 323 
 324         return (0);
 325 }
 326 
 327 /* ARGSUSED */
 328 int
 329 fop_access(
 330         vnode_t *vp,
 331         int mode,
 332         int flags,
 333         cred_t *cr,
 334         caller_context_t *ct)
 335 {
 336         return (0);
 337 }
 338 
 339 /*
 340  * Conceptually like xattr_dir_lookup()
 341  */
 342 static int
 343 fake_lookup_xattrdir(
 344         vnode_t *dvp,
 345         vnode_t **vpp)
 346 {
 347         int len, fd;
 348         int omode = O_RDWR | O_NOFOLLOW;
 349         vnode_t *vp;
 350 
 351         *vpp = NULL;
 352 
 353         if (dvp->v_type != VDIR && dvp->v_type != VREG)
 354                 return (EINVAL);
 355 
 356         /*
 357          * If we're already in sysattr space, don't allow creation
 358          * of another level of sysattrs.
 359          */
 360         if (dvp->v_flag & V_SYSATTR)
 361                 return (EINVAL);
 362 
 363         mutex_enter(&dvp->v_lock);
 364         if (dvp->v_xattrdir != NULL) {
 365                 *vpp = dvp->v_xattrdir;
 366                 VN_HOLD(*vpp);
 367                 mutex_exit(&dvp->v_lock);
 368                 return (0);
 369         }
 370         mutex_exit(&dvp->v_lock);
 371 
 372         omode = O_RDONLY|O_XATTR;
 373         fd = openat(dvp->v_fd, ".", omode);
 374         if (fd < 0)
 375                 return (errno);
 376 
 377         vp = vn_alloc(KM_SLEEP);
 378         vp->v_fd = fd;
 379         vp->v_flag = V_XATTRDIR|V_SYSATTR;
 380         vp->v_type = VDIR;
 381         vp->v_vfsp = dvp->v_vfsp;
 382 
 383         /* Set v_path to parent path + "/@" (like NFS) */
 384         len = strlen(dvp->v_path) + 3;
 385         vp->v_path = kmem_alloc(len, KM_SLEEP);
 386         (void) snprintf(vp->v_path, len, "%s/@", dvp->v_path);
 387 
 388         /*
 389          * Keep a pointer to the parent and a hold on it.
 390          * Both are cleaned up in fake_inactive_xattrdir
 391          */
 392         vp->v_data = dvp;
 393         vn_hold(dvp);
 394 
 395         mutex_enter(&dvp->v_lock);
 396         if (dvp->v_xattrdir == NULL) {
 397                 *vpp = dvp->v_xattrdir = vp;
 398                 mutex_exit(&dvp->v_lock);
 399         } else {
 400                 *vpp = dvp->v_xattrdir;
 401                 mutex_exit(&dvp->v_lock);
 402                 fake_inactive_xattrdir(vp);
 403         }
 404 
 405         return (0);
 406 }
 407 
 408 /* ARGSUSED */
 409 int
 410 fop_lookup(
 411         vnode_t *dvp,
 412         char *name,
 413         vnode_t **vpp,
 414         pathname_t *pnp,
 415         int flags,
 416         vnode_t *rdir,
 417         cred_t *cr,
 418         caller_context_t *ct,
 419         int *deflags,           /* Returned per-dirent flags */
 420         pathname_t *ppnp)       /* Returned case-preserved name in directory */
 421 {
 422         int fd;
 423         int omode = O_RDWR | O_NOFOLLOW;
 424         vnode_t *vp;
 425         struct stat st;
 426 
 427         if (flags & LOOKUP_XATTR)
 428                 return (fake_lookup_xattrdir(dvp, vpp));
 429 
 430         /*
 431          * If lookup is for "", just return dvp.
 432          */
 433         if (name[0] == '\0') {
 434                 vn_hold(dvp);
 435                 *vpp = dvp;
 436                 return (0);
 437         }
 438 
 439         if (fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW) == -1)
 440                 return (errno);
 441 
 442         vp = vncache_lookup(&st);
 443         if (vp != NULL) {
 444                 /* lookup gave us a hold */
 445                 *vpp = vp;
 446                 return (0);
 447         }
 448 
 449         if (S_ISDIR(st.st_mode))
 450                 omode = O_RDONLY | O_NOFOLLOW;
 451 
 452 again:
 453         fd = openat(dvp->v_fd, name, omode, 0);
 454         if (fd < 0) {
 455                 if ((omode & O_RWMASK) == O_RDWR) {
 456                         omode &= ~O_RWMASK;
 457                         omode |= O_RDONLY;
 458                         goto again;
 459                 }
 460                 return (errno);
 461         }
 462 
 463         if (fstat(fd, &st) == -1) {
 464                 (void) close(fd);
 465                 return (errno);
 466         }
 467 
 468         vp = vncache_enter(&st, dvp, name, fd);
 469 
 470         *vpp = vp;
 471         return (0);
 472 }
 473 
 474 /* ARGSUSED */
 475 int
 476 fop_create(
 477         vnode_t *dvp,
 478         char *name,
 479         vattr_t *vap,
 480         vcexcl_t excl,
 481         int mode,
 482         vnode_t **vpp,
 483         cred_t *cr,
 484         int flags,
 485         caller_context_t *ct,
 486         vsecattr_t *vsecp)      /* ACL to set during create */
 487 {
 488         struct stat st;
 489         vnode_t *vp;
 490         int err, fd, omode;
 491 
 492         /*
 493          * If creating "", just return dvp.
 494          */
 495         if (name[0] == '\0') {
 496                 vn_hold(dvp);
 497                 *vpp = dvp;
 498                 return (0);
 499         }
 500 
 501         err = fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW);
 502         if (err != 0)
 503                 err = errno;
 504 
 505         vp = NULL;
 506         if (err == 0) {
 507                 /* The file already exists. */
 508                 if (excl == EXCL)
 509                         return (EEXIST);
 510 
 511                 vp = vncache_lookup(&st);
 512                 /* vp gained a hold */
 513         }
 514 
 515         if (vp == NULL) {
 516                 /*
 517                  * Open it. (may or may not exist)
 518                  */
 519                 omode = O_RDWR | O_CREAT | O_NOFOLLOW;
 520                 if (excl == EXCL)
 521                         omode |= O_EXCL;
 522         open_again:
 523                 fd = openat(dvp->v_fd, name, omode, mode);
 524                 if (fd < 0) {
 525                         if ((omode & O_RWMASK) == O_RDWR) {
 526                                 omode &= ~O_RWMASK;
 527                                 omode |= O_RDONLY;
 528                                 goto open_again;
 529                         }
 530                         return (errno);
 531                 }
 532                 (void) fstat(fd, &st);
 533 
 534                 vp = vncache_enter(&st, dvp, name, fd);
 535                 /* vp has its initial hold */
 536         }
 537 
 538         /* Should have the vp now. */
 539         if (vp == NULL)
 540                 return (EFAULT);
 541 
 542         if (vp->v_type == VDIR && vap->va_type != VDIR) {
 543                 vn_rele(vp);
 544                 return (EISDIR);
 545         }
 546         if (vp->v_type != VDIR && vap->va_type == VDIR) {
 547                 vn_rele(vp);
 548                 return (ENOTDIR);
 549         }
 550 
 551         /*
 552          * Might need to set attributes.
 553          */
 554         (void) fop_setattr(vp, vap, 0, cr, ct);
 555 
 556         *vpp = vp;
 557         return (0);
 558 }
 559 
 560 /* ARGSUSED */
 561 int
 562 fop_remove(
 563         vnode_t *dvp,
 564         char *name,
 565         cred_t *cr,
 566         caller_context_t *ct,
 567         int flags)
 568 {
 569 
 570         if (unlinkat(dvp->v_fd, name, 0))
 571                 return (errno);
 572 
 573         return (0);
 574 }
 575 
 576 /* ARGSUSED */
 577 int
 578 fop_link(
 579         vnode_t *to_dvp,
 580         vnode_t *fr_vp,
 581         char *to_name,
 582         cred_t *cr,
 583         caller_context_t *ct,
 584         int flags)
 585 {
 586         int err;
 587 
 588         /*
 589          * Would prefer to specify "from" as the combination:
 590          * (fr_vp->v_fd, NULL) but linkat does not permit it.
 591          */
 592         err = linkat(AT_FDCWD, fr_vp->v_path, to_dvp->v_fd, to_name,
 593             AT_SYMLINK_FOLLOW);
 594         if (err == -1)
 595                 err = errno;
 596 
 597         return (err);
 598 }
 599 
 600 /* ARGSUSED */
 601 int
 602 fop_rename(
 603         vnode_t *from_dvp,
 604         char *from_name,
 605         vnode_t *to_dvp,
 606         char *to_name,
 607         cred_t *cr,
 608         caller_context_t *ct,
 609         int flags)
 610 {
 611         struct stat st;
 612         vnode_t *vp;
 613         int err;
 614 
 615         if (fstatat(from_dvp->v_fd, from_name, &st,
 616             AT_SYMLINK_NOFOLLOW) == -1)
 617                 return (errno);
 618 
 619         vp = vncache_lookup(&st);
 620         if (vp == NULL)
 621                 return (ENOENT);
 622 
 623         err = renameat(from_dvp->v_fd, from_name, to_dvp->v_fd, to_name);
 624         if (err == -1)
 625                 err = errno;
 626         else
 627                 vncache_renamed(vp, to_dvp, to_name);
 628 
 629         vn_rele(vp);
 630 
 631         return (err);
 632 }
 633 
 634 /* ARGSUSED */
 635 int
 636 fop_mkdir(
 637         vnode_t *dvp,
 638         char *name,
 639         vattr_t *vap,
 640         vnode_t **vpp,
 641         cred_t *cr,
 642         caller_context_t *ct,
 643         int flags,
 644         vsecattr_t *vsecp)      /* ACL to set during create */
 645 {
 646         struct stat st;
 647         int err, fd;
 648 
 649         mode_t mode = vap->va_mode & 0777;
 650 
 651         if (mkdirat(dvp->v_fd, name, mode) == -1)
 652                 return (errno);
 653 
 654         if ((fd = openat(dvp->v_fd, name, O_RDONLY)) == -1)
 655                 return (errno);
 656         if (fstat(fd, &st) == -1) {
 657                 err = errno;
 658                 (void) close(fd);
 659                 return (err);
 660         }
 661 
 662         *vpp = vncache_enter(&st, dvp, name, fd);
 663 
 664         /*
 665          * Might need to set attributes.
 666          */
 667         (void) fop_setattr(*vpp, vap, 0, cr, ct);
 668 
 669         return (0);
 670 }
 671 
 672 /* ARGSUSED */
 673 int
 674 fop_rmdir(
 675         vnode_t *dvp,
 676         char *name,
 677         vnode_t *cdir,
 678         cred_t *cr,
 679         caller_context_t *ct,
 680         int flags)
 681 {
 682 
 683         if (unlinkat(dvp->v_fd, name, AT_REMOVEDIR) == -1)
 684                 return (errno);
 685 
 686         return (0);
 687 }
 688 
 689 /* ARGSUSED */
 690 int
 691 fop_readdir(
 692         vnode_t *vp,
 693         uio_t *uiop,
 694         cred_t *cr,
 695         int *eofp,
 696         caller_context_t *ct,
 697         int flags)
 698 {
 699         struct iovec *iov;
 700         int cnt;
 701         int error = 0;
 702         int fd = vp->v_fd;
 703 
 704         if (eofp) {
 705                 *eofp = 0;
 706         }
 707 
 708         error = lseek(fd, uiop->uio_loffset, SEEK_SET);
 709         if (error == -1)
 710                 return (errno);
 711 
 712         ASSERT(uiop->uio_iovcnt > 0);
 713         iov = uiop->uio_iov;
 714         if (iov->iov_len < sizeof (struct dirent))
 715                 return (EINVAL);
 716 
 717         /* LINTED E_BAD_PTR_CAST_ALIGN */
 718         cnt = getdents(fd, (struct dirent *)(uiop->uio_iov->iov_base),
 719             uiop->uio_resid);
 720         if (cnt == -1)
 721                 return (errno);
 722         if (cnt == 0) {
 723                 if (eofp) {
 724                         *eofp = 1;
 725                 }
 726                 return (ENOENT);
 727         }
 728 
 729         iov->iov_base += cnt;
 730         iov->iov_len  -= cnt;
 731         uiop->uio_resid -= cnt;
 732         uiop->uio_loffset = lseek(fd, 0LL, SEEK_CUR);
 733 
 734         return (0);
 735 }
 736 
 737 /* ARGSUSED */
 738 int
 739 fop_symlink(
 740         vnode_t *dvp,
 741         char *linkname,
 742         vattr_t *vap,
 743         char *target,
 744         cred_t *cr,
 745         caller_context_t *ct,
 746         int flags)
 747 {
 748         return (ENOSYS);
 749 }
 750 
 751 /* ARGSUSED */
 752 int
 753 fop_readlink(
 754         vnode_t *vp,
 755         uio_t *uiop,
 756         cred_t *cr,
 757         caller_context_t *ct)
 758 {
 759         return (ENOSYS);
 760 }
 761 
 762 /* ARGSUSED */
 763 int
 764 fop_fsync(
 765         vnode_t *vp,
 766         int syncflag,
 767         cred_t *cr,
 768         caller_context_t *ct)
 769 {
 770 
 771         if (fsync(vp->v_fd) == -1)
 772                 return (errno);
 773 
 774         return (0);
 775 }
 776 
 777 /* ARGSUSED */
 778 void
 779 fop_inactive(
 780         vnode_t *vp,
 781         cred_t *cr,
 782         caller_context_t *ct)
 783 {
 784         if (vp->v_flag & V_XATTRDIR) {
 785                 fake_inactive_xattrdir(vp);
 786         } else {
 787                 vncache_inactive(vp);
 788         }
 789 }
 790 
 791 /*
 792  * The special xattr directories are not in the vncache AVL, but
 793  * hang off the parent's v_xattrdir field.  When vn_rele finds
 794  * an xattr dir at v_count == 1 it calls here, but until we
 795  * take locks on both the parent and the xattrdir, we don't
 796  * know if we're really at the last reference.  So in here we
 797  * take both locks, re-check the count, and either bail out
 798  * or proceed with "inactive" vnode cleanup.  Part of that
 799  * cleanup includes releasing the hold on the parent and
 800  * clearing the parent's v_xattrdir field, which were
 801  * setup in fake_lookup_xattrdir()
 802  */
 803 static void
 804 fake_inactive_xattrdir(vnode_t *vp)
 805 {
 806         vnode_t *dvp = vp->v_data; /* parent */
 807         mutex_enter(&dvp->v_lock);
 808         mutex_enter(&vp->v_lock);
 809         if (vp->v_count > 1) {
 810                 /* new ref. via v_xattrdir */
 811                 mutex_exit(&vp->v_lock);
 812                 mutex_exit(&dvp->v_lock);
 813                 return;
 814         }
 815         ASSERT(dvp->v_xattrdir == vp);
 816         dvp->v_xattrdir = NULL;
 817         mutex_exit(&vp->v_lock);
 818         mutex_exit(&dvp->v_lock);
 819         vn_rele(dvp);
 820         vn_free(vp);
 821 }
 822 
 823 /* ARGSUSED */
 824 int
 825 fop_fid(
 826         vnode_t *vp,
 827         fid_t *fidp,
 828         caller_context_t *ct)
 829 {
 830         return (ENOSYS);
 831 }
 832 
 833 /* ARGSUSED */
 834 int
 835 fop_rwlock(
 836         vnode_t *vp,
 837         int write_lock,
 838         caller_context_t *ct)
 839 {
 840         /* See: fs_rwlock */
 841         return (-1);
 842 }
 843 
 844 /* ARGSUSED */
 845 void
 846 fop_rwunlock(
 847         vnode_t *vp,
 848         int write_lock,
 849         caller_context_t *ct)
 850 {
 851         /* See: fs_rwunlock */
 852 }
 853 
 854 /* ARGSUSED */
 855 int
 856 fop_seek(
 857         vnode_t *vp,
 858         offset_t ooff,
 859         offset_t *noffp,
 860         caller_context_t *ct)
 861 {
 862         return (ENOSYS);
 863 }
 864 
 865 /* ARGSUSED */
 866 int
 867 fop_cmp(
 868         vnode_t *vp1,
 869         vnode_t *vp2,
 870         caller_context_t *ct)
 871 {
 872         /* See fs_cmp */
 873         return (vncache_cmp(vp1, vp2));
 874 }
 875 
 876 /* ARGSUSED */
 877 int
 878 fop_frlock(
 879         vnode_t *vp,
 880         int cmd,
 881         flock64_t *bfp,
 882         int flag,
 883         offset_t offset,
 884         struct flk_callback *flk_cbp,
 885         cred_t *cr,
 886         caller_context_t *ct)
 887 {
 888 #if defined(_LP64)
 889         offset_t maxoffset = INT64_MAX;
 890 #elif defined(_ILP32)
 891         /*
 892          * Sadly, the fcntl API enforces 32-bit offsets,
 893          * even though we have _FILE_OFFSET_BITS=64
 894          */
 895         offset_t maxoffset = INT32_MAX;
 896 #else
 897 #error "unsupported env."
 898 #endif
 899 
 900         /* See fs_frlock */
 901 
 902         switch (cmd) {
 903         case F_GETLK:
 904         case F_SETLK_NBMAND:
 905         case F_SETLK:
 906         case F_SETLKW:
 907                 break;
 908         default:
 909                 return (EINVAL);
 910         }
 911 
 912         /* We only get SEEK_SET ranges here. */
 913         if (bfp->l_whence != 0)
 914                 return (EINVAL);
 915 
 916         /*
 917          * One limitation of using fcntl(2) F_SETLK etc is that
 918          * the real kernel limits the offsets we can use.
 919          * (Maybe the fcntl API should loosen that up?)
 920          * See syscall/fcntl.c:flock_check()
 921          *
 922          * Here in libfksmbsrv we can just ignore such locks,
 923          * or ignore the part that extends beyond maxoffset.
 924          * The SMB layer still keeps track of such locks for
 925          * conflict detection, so not reflecting such locks
 926          * into the real FS layer is OK.  Note: this may
 927          * modify the pased bfp->l_len.
 928          */
 929         if (bfp->l_start < 0 || bfp->l_start > maxoffset)
 930                 return (0);
 931         if (bfp->l_len < 0 || bfp->l_len > maxoffset)
 932                 return (0);
 933         if (bfp->l_len > (maxoffset - bfp->l_start + 1))
 934                 bfp->l_len = (maxoffset - bfp->l_start + 1);
 935 
 936         if (fcntl(vp->v_fd, cmd, bfp) == -1)
 937                 return (errno);
 938 
 939         return (0);
 940 }
 941 
 942 /* ARGSUSED */
 943 int
 944 fop_space(
 945         vnode_t *vp,
 946         int cmd,
 947         flock64_t *bfp,
 948         int flag,
 949         offset_t offset,
 950         cred_t *cr,
 951         caller_context_t *ct)
 952 {
 953         /* See fs_frlock */
 954 
 955         switch (cmd) {
 956         case F_ALLOCSP:
 957         case F_FREESP:
 958                 break;
 959         default:
 960                 return (EINVAL);
 961         }
 962 
 963         if (fcntl(vp->v_fd, cmd, bfp) == -1)
 964                 return (errno);
 965 
 966         return (0);
 967 }
 968 
 969 /* ARGSUSED */
 970 int
 971 fop_realvp(
 972         vnode_t *vp,
 973         vnode_t **vpp,
 974         caller_context_t *ct)
 975 {
 976         return (ENOSYS);
 977 }
 978 
 979 /* ARGSUSED */
 980 int
 981 fop_getpage(
 982         vnode_t *vp,
 983         offset_t off,
 984         size_t len,
 985         uint_t *protp,
 986         struct page **plarr,
 987         size_t plsz,
 988         struct seg *seg,
 989         caddr_t addr,
 990         enum seg_rw rw,
 991         cred_t *cr,
 992         caller_context_t *ct)
 993 {
 994         return (ENOSYS);
 995 }
 996 
 997 /* ARGSUSED */
 998 int
 999 fop_putpage(
1000         vnode_t *vp,
1001         offset_t off,
1002         size_t len,
1003         int flags,
1004         cred_t *cr,
1005         caller_context_t *ct)
1006 {
1007         return (ENOSYS);
1008 }
1009 
1010 /* ARGSUSED */
1011 int
1012 fop_map(
1013         vnode_t *vp,
1014         offset_t off,
1015         struct as *as,
1016         caddr_t *addrp,
1017         size_t len,
1018         uchar_t prot,
1019         uchar_t maxprot,
1020         uint_t flags,
1021         cred_t *cr,
1022         caller_context_t *ct)
1023 {
1024         return (ENOSYS);
1025 }
1026 
1027 /* ARGSUSED */
1028 int
1029 fop_addmap(
1030         vnode_t *vp,
1031         offset_t off,
1032         struct as *as,
1033         caddr_t addr,
1034         size_t len,
1035         uchar_t prot,
1036         uchar_t maxprot,
1037         uint_t flags,
1038         cred_t *cr,
1039         caller_context_t *ct)
1040 {
1041         return (ENOSYS);
1042 }
1043 
1044 /* ARGSUSED */
1045 int
1046 fop_delmap(
1047         vnode_t *vp,
1048         offset_t off,
1049         struct as *as,
1050         caddr_t addr,
1051         size_t len,
1052         uint_t prot,
1053         uint_t maxprot,
1054         uint_t flags,
1055         cred_t *cr,
1056         caller_context_t *ct)
1057 {
1058         return (ENOSYS);
1059 }
1060 
1061 /* ARGSUSED */
1062 int
1063 fop_poll(
1064         vnode_t *vp,
1065         short events,
1066         int anyyet,
1067         short *reventsp,
1068         struct pollhead **phpp,
1069         caller_context_t *ct)
1070 {
1071         *reventsp = 0;
1072         if (events & POLLIN)
1073                 *reventsp |= POLLIN;
1074         if (events & POLLRDNORM)
1075                 *reventsp |= POLLRDNORM;
1076         if (events & POLLRDBAND)
1077                 *reventsp |= POLLRDBAND;
1078         if (events & POLLOUT)
1079                 *reventsp |= POLLOUT;
1080         if (events & POLLWRBAND)
1081                 *reventsp |= POLLWRBAND;
1082         *phpp = NULL; /* or fake_pollhead? */
1083 
1084         return (0);
1085 }
1086 
1087 /* ARGSUSED */
1088 int
1089 fop_dump(
1090         vnode_t *vp,
1091         caddr_t addr,
1092         offset_t lbdn,
1093         offset_t dblks,
1094         caller_context_t *ct)
1095 {
1096         return (ENOSYS);
1097 }
1098 
1099 /*
1100  * See fs_pathconf
1101  */
1102 /* ARGSUSED */
1103 int
1104 fop_pathconf(
1105         vnode_t *vp,
1106         int cmd,
1107         ulong_t *valp,
1108         cred_t *cr,
1109         caller_context_t *ct)
1110 {
1111         register ulong_t val;
1112         register int error = 0;
1113 
1114         switch (cmd) {
1115 
1116         case _PC_LINK_MAX:
1117                 val = MAXLINK;
1118                 break;
1119 
1120         case _PC_MAX_CANON:
1121                 val = MAX_CANON;
1122                 break;
1123 
1124         case _PC_MAX_INPUT:
1125                 val = MAX_INPUT;
1126                 break;
1127 
1128         case _PC_NAME_MAX:
1129                 val = MAXNAMELEN;
1130                 break;
1131 
1132         case _PC_PATH_MAX:
1133         case _PC_SYMLINK_MAX:
1134                 val = MAXPATHLEN;
1135                 break;
1136 
1137         case _PC_PIPE_BUF:
1138                 val = PIPE_BUF;
1139                 break;
1140 
1141         case _PC_NO_TRUNC:
1142                 val = (ulong_t)-1;
1143                 break;
1144 
1145         case _PC_VDISABLE:
1146                 val = _POSIX_VDISABLE;
1147                 break;
1148 
1149         case _PC_CHOWN_RESTRICTED:
1150                 val = 1; /* chown restricted enabled */
1151                 break;
1152 
1153         case _PC_FILESIZEBITS:
1154                 val = (ulong_t)-1;    /* large file support */
1155                 break;
1156 
1157         case _PC_ACL_ENABLED:
1158                 val = _ACL_ACE_ENABLED;
1159                 break;
1160 
1161         case _PC_CASE_BEHAVIOR:
1162                 val = _CASE_SENSITIVE;
1163                 break;
1164 
1165         case _PC_SATTR_ENABLED:
1166         case _PC_SATTR_EXISTS:
1167                 val = 0;
1168                 break;
1169 
1170         case _PC_ACCESS_FILTERING:
1171                 val = 0;
1172                 break;
1173 
1174         default:
1175                 error = EINVAL;
1176                 break;
1177         }
1178 
1179         if (error == 0)
1180                 *valp = val;
1181         return (error);
1182 }
1183 
1184 /* ARGSUSED */
1185 int
1186 fop_pageio(
1187         vnode_t *vp,
1188         struct page *pp,
1189         u_offset_t io_off,
1190         size_t io_len,
1191         int flags,
1192         cred_t *cr,
1193         caller_context_t *ct)
1194 {
1195         return (ENOSYS);
1196 }
1197 
1198 /* ARGSUSED */
1199 int
1200 fop_dumpctl(
1201         vnode_t *vp,
1202         int action,
1203         offset_t *blkp,
1204         caller_context_t *ct)
1205 {
1206         return (ENOSYS);
1207 }
1208 
1209 /* ARGSUSED */
1210 void
1211 fop_dispose(
1212         vnode_t *vp,
1213         struct page *pp,
1214         int flag,
1215         int dn,
1216         cred_t *cr,
1217         caller_context_t *ct)
1218 {
1219 }
1220 
1221 /* ARGSUSED */
1222 int
1223 fop_setsecattr(
1224         vnode_t *vp,
1225         vsecattr_t *vsap,
1226         int flag,
1227         cred_t *cr,
1228         caller_context_t *ct)
1229 {
1230         return (0);
1231 }
1232 
1233 /*
1234  * Fake up just enough of this so we can test get/set SDs.
1235  */
1236 /* ARGSUSED */
1237 int
1238 fop_getsecattr(
1239         vnode_t *vp,
1240         vsecattr_t *vsecattr,
1241         int flag,
1242         cred_t *cr,
1243         caller_context_t *ct)
1244 {
1245 
1246         vsecattr->vsa_aclcnt = 0;
1247         vsecattr->vsa_aclentsz       = 0;
1248         vsecattr->vsa_aclentp        = NULL;
1249         vsecattr->vsa_dfaclcnt       = 0;    /* Default ACLs are not fabricated */
1250         vsecattr->vsa_dfaclentp      = NULL;
1251 
1252         if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
1253                 aclent_t *aclentp;
1254                 size_t aclsize;
1255 
1256                 aclsize = sizeof (aclent_t);
1257                 vsecattr->vsa_aclcnt = 1;
1258                 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
1259                 aclentp = vsecattr->vsa_aclentp;
1260 
1261                 aclentp->a_type = OTHER_OBJ;
1262                 aclentp->a_perm = 0777;
1263                 aclentp->a_id = (gid_t)-1;
1264                 aclentp++;
1265         } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
1266                 ace_t *acl;
1267 
1268                 acl = kmem_alloc(sizeof (ace_t), KM_SLEEP);
1269                 acl->a_who = (uint32_t)-1;
1270                 acl->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
1271                 acl->a_flags = ACE_EVERYONE;
1272                 acl->a_access_mask  = ACE_MODIFY_PERMS;
1273 
1274                 vsecattr->vsa_aclentp = (void *)acl;
1275                 vsecattr->vsa_aclcnt = 1;
1276                 vsecattr->vsa_aclentsz = sizeof (ace_t);
1277         }
1278 
1279         return (0);
1280 }
1281 
1282 /* ARGSUSED */
1283 int
1284 fop_shrlock(
1285         vnode_t *vp,
1286         int cmd,
1287         struct shrlock *shr,
1288         int flag,
1289         cred_t *cr,
1290         caller_context_t *ct)
1291 {
1292 
1293         switch (cmd) {
1294         case F_SHARE:
1295         case F_SHARE_NBMAND:
1296         case F_UNSHARE:
1297                 break;
1298         default:
1299                 return (EINVAL);
1300         }
1301 
1302         if (!fop_shrlock_enable)
1303                 return (0);
1304 
1305         if (fcntl(vp->v_fd, cmd, shr) == -1)
1306                 return (errno);
1307 
1308         return (0);
1309 }
1310 
1311 /* ARGSUSED */
1312 int
1313 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
1314     caller_context_t *ct)
1315 {
1316         return (ENOSYS);
1317 }
1318 
1319 /* ARGSUSED */
1320 int
1321 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
1322     caller_context_t *ct)
1323 {
1324         return (ENOSYS);
1325 }
1326 
1327 /* ARGSUSED */
1328 int
1329 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
1330 {
1331         return (ENOSYS);
1332 }
1333 
1334 
1335 /*
1336  * ***************************************************************
1337  * other VOP support
1338  */
1339 
1340 /*
1341  * Convert stat(2) formats to vnode types and vice versa.  (Knows about
1342  * numerical order of S_IFMT and vnode types.)
1343  */
1344 enum vtype iftovt_tab[] = {
1345         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
1346         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
1347 };
1348 
1349 ushort_t vttoif_tab[] = {
1350         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
1351         S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
1352 };
1353 
1354 /*
1355  * stat_to_vattr()
1356  *
1357  * Convert from a stat structure to an vattr structure
1358  * Note: only set fields according to va_mask
1359  */
1360 
1361 int
1362 stat_to_vattr(const struct stat *st, vattr_t *vap)
1363 {
1364 
1365         if (vap->va_mask & AT_TYPE)
1366                 vap->va_type = IFTOVT(st->st_mode);
1367 
1368         if (vap->va_mask & AT_MODE)
1369                 vap->va_mode = st->st_mode;
1370 
1371         if (vap->va_mask & AT_UID)
1372                 vap->va_uid = st->st_uid;
1373 
1374         if (vap->va_mask & AT_GID)
1375                 vap->va_gid = st->st_gid;
1376 
1377         if (vap->va_mask & AT_FSID)
1378                 vap->va_fsid = st->st_dev;
1379 
1380         if (vap->va_mask & AT_NODEID)
1381                 vap->va_nodeid = st->st_ino;
1382 
1383         if (vap->va_mask & AT_NLINK)
1384                 vap->va_nlink = st->st_nlink;
1385 
1386         if (vap->va_mask & AT_SIZE)
1387                 vap->va_size = (u_offset_t)st->st_size;
1388 
1389         if (vap->va_mask & AT_ATIME) {
1390                 vap->va_atime.tv_sec  = st->st_atim.tv_sec;
1391                 vap->va_atime.tv_nsec = st->st_atim.tv_nsec;
1392         }
1393 
1394         if (vap->va_mask & AT_MTIME) {
1395                 vap->va_mtime.tv_sec  = st->st_mtim.tv_sec;
1396                 vap->va_mtime.tv_nsec = st->st_mtim.tv_nsec;
1397         }
1398 
1399         if (vap->va_mask & AT_CTIME) {
1400                 vap->va_ctime.tv_sec  = st->st_ctim.tv_sec;
1401                 vap->va_ctime.tv_nsec = st->st_ctim.tv_nsec;
1402         }
1403 
1404         if (vap->va_mask & AT_RDEV)
1405                 vap->va_rdev = st->st_rdev;
1406 
1407         if (vap->va_mask & AT_BLKSIZE)
1408                 vap->va_blksize = (uint_t)st->st_blksize;
1409 
1410 
1411         if (vap->va_mask & AT_NBLOCKS)
1412                 vap->va_nblocks = (u_longlong_t)st->st_blocks;
1413 
1414         if (vap->va_mask & AT_SEQ)
1415                 vap->va_seq = 0;
1416 
1417         return (0);
1418 }
1419 
1420 /* ARGSUSED */
1421 void
1422 flk_init_callback(flk_callback_t *flk_cb,
1423         callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
1424 {
1425 }
1426 
1427 void
1428 vn_hold(vnode_t *vp)
1429 {
1430         mutex_enter(&vp->v_lock);
1431         vp->v_count++;
1432         mutex_exit(&vp->v_lock);
1433 }
1434 
1435 void
1436 vn_rele(vnode_t *vp)
1437 {
1438         VERIFY3U(vp->v_count, !=, 0);
1439         mutex_enter(&vp->v_lock);
1440         if (vp->v_count == 1) {
1441                 mutex_exit(&vp->v_lock);
1442                 fop_inactive(vp, NULL, NULL);
1443         } else {
1444                 vp->v_count--;
1445                 mutex_exit(&vp->v_lock);
1446         }
1447 }
1448 
1449 int
1450 vn_has_other_opens(
1451         vnode_t *vp,
1452         v_mode_t mode)
1453 {
1454 
1455         switch (mode) {
1456         case V_WRITE:
1457                 if (vp->v_wrcnt > 1)
1458                         return (V_TRUE);
1459                 break;
1460         case V_RDORWR:
1461                 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
1462                         return (V_TRUE);
1463                 break;
1464         case V_RDANDWR:
1465                 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
1466                         return (V_TRUE);
1467                 break;
1468         case V_READ:
1469                 if (vp->v_rdcnt > 1)
1470                         return (V_TRUE);
1471                 break;
1472         }
1473 
1474         return (V_FALSE);
1475 }
1476 
1477 /*
1478  * vn_is_opened() checks whether a particular file is opened and
1479  * whether the open is for read and/or write.
1480  *
1481  * Vnode counts are only kept on regular files (v_type=VREG).
1482  */
1483 int
1484 vn_is_opened(
1485         vnode_t *vp,
1486         v_mode_t mode)
1487 {
1488 
1489         ASSERT(vp != NULL);
1490 
1491         switch (mode) {
1492         case V_WRITE:
1493                 if (vp->v_wrcnt)
1494                         return (V_TRUE);
1495                 break;
1496         case V_RDANDWR:
1497                 if (vp->v_rdcnt && vp->v_wrcnt)
1498                         return (V_TRUE);
1499                 break;
1500         case V_RDORWR:
1501                 if (vp->v_rdcnt || vp->v_wrcnt)
1502                         return (V_TRUE);
1503                 break;
1504         case V_READ:
1505                 if (vp->v_rdcnt)
1506                         return (V_TRUE);
1507                 break;
1508         }
1509 
1510         return (V_FALSE);
1511 }
1512 
1513 /*
1514  * vn_is_mapped() checks whether a particular file is mapped and whether
1515  * the file is mapped read and/or write.
1516  */
1517 /* ARGSUSED */
1518 int
1519 vn_is_mapped(
1520         vnode_t *vp,
1521         v_mode_t mode)
1522 {
1523         return (V_FALSE);
1524 }