1 /*
   2  * Copyright (c) 2000-2001 Boris Popov
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *    This product includes software developed by Boris Popov.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  * $Id: smbfs_vnops.c,v 1.128.36.1 2005/05/27 02:35:28 lindak Exp $
  33  */
  34 
  35 /*
  36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  37  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  38  */
  39 
  40 /*
  41  * Vnode operations
  42  *
  43  * This file is similar to nfs3_vnops.c
  44  */
  45 
  46 #include <sys/param.h>
  47 #include <sys/systm.h>
  48 #include <sys/cred.h>
  49 #include <sys/vnode.h>
  50 #include <sys/vfs.h>
  51 #include <sys/filio.h>
  52 #include <sys/uio.h>
  53 #include <sys/dirent.h>
  54 #include <sys/errno.h>
  55 #include <sys/sunddi.h>
  56 #include <sys/sysmacros.h>
  57 #include <sys/kmem.h>
  58 #include <sys/cmn_err.h>
  59 #include <sys/vfs_opreg.h>
  60 #include <sys/policy.h>
  61 #include <sys/sdt.h>
  62 #include <sys/taskq_impl.h>
  63 #include <sys/zone.h>
  64 
  65 #ifdef  _KERNEL
  66 #include <sys/vmsystm.h>  // for desfree
  67 #include <vm/hat.h>
  68 #include <vm/as.h>
  69 #include <vm/page.h>
  70 #include <vm/pvn.h>
  71 #include <vm/seg.h>
  72 #include <vm/seg_map.h>
  73 #include <vm/seg_kpm.h>
  74 #include <vm/seg_vn.h>
  75 #endif  // _KERNEL
  76 
  77 #include <netsmb/smb_osdep.h>
  78 #include <netsmb/smb.h>
  79 #include <netsmb/smb_conn.h>
  80 #include <netsmb/smb_subr.h>
  81 
  82 #include <smbfs/smbfs.h>
  83 #include <smbfs/smbfs_node.h>
  84 #include <smbfs/smbfs_subr.h>
  85 
  86 #include <sys/fs/smbfs_ioctl.h>
  87 #include <fs/fs_subr.h>
  88 
  89 #ifndef MAXOFF32_T
  90 #define MAXOFF32_T      0x7fffffff
  91 #endif
  92 
  93 /*
  94  * We assign directory offsets like the NFS client, where the
  95  * offset increments by _one_ after each directory entry.
  96  * Further, the entries "." and ".." are always at offsets
  97  * zero and one (respectively) and the "real" entries from
  98  * the server appear at offsets starting with two.  This
  99  * macro is used to initialize the n_dirofs field after
 100  * setting n_dirseq with a _findopen call.
 101  */
 102 #define FIRST_DIROFS    2
 103 
 104 /*
 105  * These characters are illegal in NTFS file names.
 106  * ref: http://support.microsoft.com/kb/147438
 107  *
 108  * Careful!  The check in the XATTR case skips the
 109  * first character to allow colon in XATTR names.
 110  */
 111 static const char illegal_chars[] = {
 112         ':',    /* colon - keep this first! */
 113         '\\',   /* back slash */
 114         '/',    /* slash */
 115         '*',    /* asterisk */
 116         '?',    /* question mark */
 117         '"',    /* double quote */
 118         '<', /* less than sign */
 119         '>', /* greater than sign */
 120         '|',    /* vertical bar */
 121         0
 122 };
 123 
 124 /*
 125  * Turning this on causes nodes to be created in the cache
 126  * during directory listings, normally avoiding a second
 127  * OtW attribute fetch just after a readdir.
 128  */
 129 int smbfs_fastlookup = 1;
 130 
 131 struct vnodeops *smbfs_vnodeops = NULL;
 132 
 133 /* local static function defines */
 134 
 135 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
 136                         cred_t *);
 137 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
 138                         int cache_ok, caller_context_t *);
 139 static int      smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
 140                         int flags);
 141 static int      smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp,
 142                         char *nnm, struct smb_cred *scred, int flags);
 143 static int      smbfssetattr(vnode_t *, struct vattr *, int, cred_t *);
 144 static int      smbfs_accessx(void *, int, cred_t *);
 145 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 146                         caller_context_t *);
 147 static int      smbfsflush(smbnode_t *, struct smb_cred *);
 148 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 149 static uint32_t xvattr_to_dosattr(smbnode_t *, struct vattr *);
 150 
 151 static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
 152 
 153 static int      smbfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
 154                         caller_context_t *);
 155 #ifdef  _KERNEL
 156 static int      smbfs_getapage(vnode_t *, u_offset_t, size_t, uint_t *,
 157                         page_t *[], size_t, struct seg *, caddr_t,
 158                         enum seg_rw, cred_t *);
 159 static int      smbfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
 160                         int, cred_t *);
 161 static void     smbfs_delmap_async(void *);
 162 
 163 static int      smbfs_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int,
 164                         cred_t *);
 165 static int      smbfs_bio(struct buf *, int, cred_t *);
 166 static int      smbfs_writenp(smbnode_t *np, caddr_t base, int tcount,
 167                         struct uio *uiop, int pgcreated);
 168 #endif  // _KERNEL
 169 
 170 /*
 171  * Error flags used to pass information about certain special errors
 172  * which need to be handled specially.
 173  */
 174 #define SMBFS_EOF                       -98
 175 
 176 /* When implementing OtW locks, make this a real function. */
 177 #define smbfs_lm_has_sleep(vp) 0
 178 
 179 /*
 180  * These are the vnode ops routines which implement the vnode interface to
 181  * the networked file system.  These routines just take their parameters,
 182  * make them look networkish by putting the right info into interface structs,
 183  * and then calling the appropriate remote routine(s) to do the work.
 184  *
 185  * Note on directory name lookup cacheing:  If we detect a stale fhandle,
 186  * we purge the directory cache relative to that vnode.  This way, the
 187  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
 188  * more details on smbnode locking.
 189  */
 190 
 191 
 192 /*
 193  * XXX
 194  * When new and relevant functionality is enabled, we should be
 195  * calling vfs_set_feature() to inform callers that pieces of
 196  * functionality are available, per PSARC 2007/227.
 197  */
 198 /* ARGSUSED */
 199 static int
 200 smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 201 {
 202         smbnode_t       *np;
 203         vnode_t         *vp;
 204         smbfattr_t      fa;
 205         smb_fh_t        *fid = NULL;
 206         smb_fh_t        *oldfid;
 207         uint32_t        rights;
 208         struct smb_cred scred;
 209         smbmntinfo_t    *smi;
 210         smb_share_t     *ssp;
 211         cred_t          *oldcr;
 212         int             error = 0;
 213 
 214         vp = *vpp;
 215         np = VTOSMB(vp);
 216         smi = VTOSMI(vp);
 217         ssp = smi->smi_share;
 218 
 219         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 220                 return (EIO);
 221 
 222         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 223                 return (EIO);
 224 
 225         if (vp->v_type != VREG && vp->v_type != VDIR) { /* XXX VLNK? */
 226                 SMBVDEBUG("open eacces vtype=%d\n", vp->v_type);
 227                 return (EACCES);
 228         }
 229 
 230         /*
 231          * Get exclusive access to n_fid and related stuff.
 232          * No returns after this until out.
 233          */
 234         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
 235                 return (EINTR);
 236         smb_credinit(&scred, cr);
 237 
 238         /*
 239          * Keep track of the vnode type at first open.
 240          * It may change later, and we need close to do
 241          * cleanup for the type we opened.  Also deny
 242          * open of new types until old type is closed.
 243          */
 244         if (np->n_ovtype == VNON) {
 245                 ASSERT(np->n_dirrefs == 0);
 246                 ASSERT(np->n_fidrefs == 0);
 247         } else if (np->n_ovtype != vp->v_type) {
 248                 SMBVDEBUG("open n_ovtype=%d v_type=%d\n",
 249                     np->n_ovtype, vp->v_type);
 250                 error = EACCES;
 251                 goto out;
 252         }
 253 
 254         /*
 255          * Directory open.  See smbfs_readvdir()
 256          */
 257         if (vp->v_type == VDIR) {
 258                 if (np->n_dirseq == NULL) {
 259                         /* first open */
 260                         error = smbfs_smb_findopen(np, "*", 1,
 261                             SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
 262                             &scred, &np->n_dirseq);
 263                         if (error != 0)
 264                                 goto out;
 265                 }
 266                 np->n_dirofs = FIRST_DIROFS;
 267                 np->n_dirrefs++;
 268                 goto have_fid;
 269         }
 270 
 271         /*
 272          * If caller specified O_TRUNC/FTRUNC, then be sure to set
 273          * FWRITE (to drive successful setattr(size=0) after open)
 274          */
 275         if (flag & FTRUNC)
 276                 flag |= FWRITE;
 277 
 278         /*
 279          * If we already have it open, and the FID is still valid,
 280          * check whether the rights are sufficient for FID reuse.
 281          */
 282         if (np->n_fidrefs > 0 &&
 283             (fid = np->n_fid) != NULL &&
 284             fid->fh_vcgenid == ssp->ss_vcgenid) {
 285                 int upgrade = 0;
 286 
 287                 if ((flag & FWRITE) &&
 288                     !(fid->fh_rights & SA_RIGHT_FILE_WRITE_DATA))
 289                         upgrade = 1;
 290                 if ((flag & FREAD) &&
 291                     !(fid->fh_rights & SA_RIGHT_FILE_READ_DATA))
 292                         upgrade = 1;
 293                 if (!upgrade) {
 294                         /*
 295                          *  the existing open is good enough
 296                          */
 297                         np->n_fidrefs++;
 298                         goto have_fid;
 299                 }
 300                 fid = NULL;
 301         }
 302         rights = (fid != NULL) ? fid->fh_rights : 0;
 303 
 304         /*
 305          * we always ask for READ_CONTROL so we can always get the
 306          * owner/group IDs to satisfy a stat.  Ditto attributes.
 307          */
 308         rights |= (STD_RIGHT_READ_CONTROL_ACCESS |
 309             SA_RIGHT_FILE_READ_ATTRIBUTES);
 310         if ((flag & FREAD))
 311                 rights |= SA_RIGHT_FILE_READ_DATA;
 312         if ((flag & FWRITE))
 313                 rights |= SA_RIGHT_FILE_WRITE_DATA |
 314                     SA_RIGHT_FILE_APPEND_DATA |
 315                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
 316 
 317         bzero(&fa, sizeof (fa));
 318         error = smbfs_smb_open(np,
 319             NULL, 0, 0, /* name nmlen xattr */
 320             rights, &scred,
 321             &fid, &fa);
 322         if (error)
 323                 goto out;
 324         smbfs_attrcache_fa(vp, &fa);
 325 
 326         /*
 327          * We have a new FID and access rights.
 328          */
 329         oldfid = np->n_fid;
 330         np->n_fid = fid;
 331         np->n_fidrefs++;
 332         if (oldfid != NULL)
 333                 smb_fh_rele(oldfid);
 334 
 335         /*
 336          * This thread did the open.
 337          * Save our credentials too.
 338          */
 339         mutex_enter(&np->r_statelock);
 340         oldcr = np->r_cred;
 341         np->r_cred = cr;
 342         crhold(cr);
 343         if (oldcr)
 344                 crfree(oldcr);
 345         mutex_exit(&np->r_statelock);
 346 
 347 have_fid:
 348         /*
 349          * Keep track of the vnode type at first open.
 350          * (see comments above)
 351          */
 352         if (np->n_ovtype == VNON)
 353                 np->n_ovtype = vp->v_type;
 354 
 355 out:
 356         smb_credrele(&scred);
 357         smbfs_rw_exit(&np->r_lkserlock);
 358         return (error);
 359 }
 360 
 361 /*ARGSUSED*/
 362 static int
 363 smbfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 364         caller_context_t *ct)
 365 {
 366         smbnode_t       *np;
 367         smbmntinfo_t    *smi;
 368         struct smb_cred scred;
 369         int error = 0;
 370 
 371         np = VTOSMB(vp);
 372         smi = VTOSMI(vp);
 373 
 374         /*
 375          * Don't "bail out" for VFS_UNMOUNTED here,
 376          * as we want to do cleanup, etc.
 377          */
 378 
 379         /*
 380          * zone_enter(2) prevents processes from changing zones with SMBFS files
 381          * open; if we happen to get here from the wrong zone we can't do
 382          * anything over the wire.
 383          */
 384         if (smi->smi_zone_ref.zref_zone != curproc->p_zone) {
 385                 /*
 386                  * We could attempt to clean up locks, except we're sure
 387                  * that the current process didn't acquire any locks on
 388                  * the file: any attempt to lock a file belong to another zone
 389                  * will fail, and one can't lock an SMBFS file and then change
 390                  * zones, as that fails too.
 391                  *
 392                  * Returning an error here is the sane thing to do.  A
 393                  * subsequent call to VN_RELE() which translates to a
 394                  * smbfs_inactive() will clean up state: if the zone of the
 395                  * vnode's origin is still alive and kicking, an async worker
 396                  * thread will handle the request (from the correct zone), and
 397                  * everything (minus the final smbfs_getattr_otw() call) should
 398                  * be OK. If the zone is going away smbfs_async_inactive() will
 399                  * throw away cached pages inline.
 400                  */
 401                 return (EIO);
 402         }
 403 
 404         /*
 405          * If we are using local locking for this filesystem, then
 406          * release all of the SYSV style record locks.  Otherwise,
 407          * we are doing network locking and we need to release all
 408          * of the network locks.  All of the locks held by this
 409          * process on this file are released no matter what the
 410          * incoming reference count is.
 411          */
 412         if (smi->smi_flags & SMI_LLOCK) {
 413                 pid_t pid = ddi_get_pid();
 414                 cleanlocks(vp, pid, 0);
 415                 cleanshares(vp, pid);
 416         }
 417         /*
 418          * else doing OtW locking.  SMB servers drop all locks
 419          * on the file ID we close here, so no _lockrelease()
 420          */
 421 
 422         /*
 423          * This (passed in) count is the ref. count from the
 424          * user's file_t before the closef call (fio.c).
 425          * The rest happens only on last close.
 426          */
 427         if (count > 1)
 428                 return (0);
 429 
 430         /* NFS has DNLC purge here. */
 431 
 432         /*
 433          * If the file was open for write and there are pages,
 434          * then make sure dirty pages written back.
 435          *
 436          * NFS does this async when "close-to-open" is off
 437          * (MI_NOCTO flag is set) to avoid blocking the caller.
 438          * For now, always do this synchronously (no B_ASYNC).
 439          */
 440         if ((flag & FWRITE) && vn_has_cached_data(vp)) {
 441                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
 442                 if (error == EAGAIN)
 443                         error = 0;
 444         }
 445         if (error == 0) {
 446                 mutex_enter(&np->r_statelock);
 447                 np->r_flags &= ~RSTALE;
 448                 np->r_error = 0;
 449                 mutex_exit(&np->r_statelock);
 450         }
 451 
 452         /*
 453          * Decrement the reference count for the FID
 454          * and possibly do the OtW close.
 455          *
 456          * Exclusive lock for modifying n_fid stuff.
 457          * Don't want this one ever interruptible.
 458          */
 459         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
 460         smb_credinit(&scred, cr);
 461 
 462         smbfs_rele_fid(np, &scred);
 463 
 464         smb_credrele(&scred);
 465         smbfs_rw_exit(&np->r_lkserlock);
 466 
 467         return (0);
 468 }
 469 
 470 /*
 471  * Helper for smbfs_close.  Decrement the reference count
 472  * for an SMB-level file or directory ID, and when the last
 473  * reference for the fid goes away, do the OtW close.
 474  * Also called in smbfs_inactive (defensive cleanup).
 475  */
 476 static void
 477 smbfs_rele_fid(smbnode_t *np, struct smb_cred *scred)
 478 {
 479         cred_t          *oldcr;
 480         struct smbfs_fctx *fctx;
 481         int             error;
 482         smb_fh_t        *ofid;
 483 
 484         error = 0;
 485 
 486         /* Make sure we serialize for n_dirseq use. */
 487         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
 488 
 489         /*
 490          * Note that vp->v_type may change if a remote node
 491          * is deleted and recreated as a different type, and
 492          * our getattr may change v_type accordingly.
 493          * Now use n_ovtype to keep track of the v_type
 494          * we had during open (see comments above).
 495          */
 496         switch (np->n_ovtype) {
 497         case VDIR:
 498                 ASSERT(np->n_dirrefs > 0);
 499                 if (--np->n_dirrefs)
 500                         return;
 501                 if ((fctx = np->n_dirseq) != NULL) {
 502                         np->n_dirseq = NULL;
 503                         np->n_dirofs = 0;
 504                         error = smbfs_smb_findclose(fctx, scred);
 505                 }
 506                 break;
 507 
 508         case VREG:
 509                 ASSERT(np->n_fidrefs > 0);
 510                 if (--np->n_fidrefs)
 511                         return;
 512                 if ((ofid = np->n_fid) != NULL) {
 513                         np->n_fid = NULL;
 514                         smb_fh_rele(ofid);
 515                 }
 516                 break;
 517 
 518         default:
 519                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
 520                 break;
 521         }
 522         if (error) {
 523                 SMBVDEBUG("error %d closing %s\n",
 524                     error, np->n_rpath);
 525         }
 526 
 527         /* Allow next open to use any v_type. */
 528         np->n_ovtype = VNON;
 529 
 530         /*
 531          * Other "last close" stuff.
 532          */
 533         mutex_enter(&np->r_statelock);
 534         if (np->n_flag & NATTRCHANGED)
 535                 smbfs_attrcache_rm_locked(np);
 536         oldcr = np->r_cred;
 537         np->r_cred = NULL;
 538         mutex_exit(&np->r_statelock);
 539         if (oldcr != NULL)
 540                 crfree(oldcr);
 541 }
 542 
 543 /* ARGSUSED */
 544 static int
 545 smbfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 546         caller_context_t *ct)
 547 {
 548         struct smb_cred scred;
 549         struct vattr    va;
 550         smbnode_t       *np;
 551         smbmntinfo_t    *smi;
 552         offset_t        endoff;
 553         ssize_t         past_eof;
 554         int             error;
 555 
 556         np = VTOSMB(vp);
 557         smi = VTOSMI(vp);
 558 
 559         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 560                 return (EIO);
 561 
 562         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 563                 return (EIO);
 564 
 565         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 566 
 567         if (vp->v_type != VREG)
 568                 return (EISDIR);
 569 
 570         if (uiop->uio_resid == 0)
 571                 return (0);
 572 
 573         /*
 574          * Like NFS3, just check for 63-bit overflow.
 575          * Our SMB layer takes care to return EFBIG
 576          * when it has to fallback to a 32-bit call.
 577          */
 578         endoff = uiop->uio_loffset + uiop->uio_resid;
 579         if (uiop->uio_loffset < 0 || endoff < 0)
 580                 return (EINVAL);
 581 
 582         /* get vnode attributes from server */
 583         va.va_mask = AT_SIZE | AT_MTIME;
 584         if (error = smbfsgetattr(vp, &va, cr))
 585                 return (error);
 586 
 587         /* Update mtime with mtime from server here? */
 588 
 589         /* if offset is beyond EOF, read nothing */
 590         if (uiop->uio_loffset >= va.va_size)
 591                 return (0);
 592 
 593         /*
 594          * Limit the read to the remaining file size.
 595          * Do this by temporarily reducing uio_resid
 596          * by the amount the lies beyoned the EOF.
 597          */
 598         if (endoff > va.va_size) {
 599                 past_eof = (ssize_t)(endoff - va.va_size);
 600                 uiop->uio_resid -= past_eof;
 601         } else
 602                 past_eof = 0;
 603 
 604         /*
 605          * Bypass VM if caching has been disabled (e.g., locking) or if
 606          * using client-side direct I/O and the file is not mmap'd and
 607          * there are no cached pages.
 608          */
 609         if ((vp->v_flag & VNOCACHE) ||
 610             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 611             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 612             !vn_has_cached_data(vp))) {
 613 
 614                 /* Shared lock for n_fid use in smb_rwuio */
 615                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 616                         return (EINTR);
 617                 smb_credinit(&scred, cr);
 618 
 619                 error = smb_rwuio(np->n_fid, UIO_READ,
 620                     uiop, &scred, smb_timo_read);
 621 
 622                 smb_credrele(&scred);
 623                 smbfs_rw_exit(&np->r_lkserlock);
 624 
 625                 /* undo adjustment of resid */
 626                 uiop->uio_resid += past_eof;
 627 
 628                 return (error);
 629         }
 630 
 631 #ifdef  _KERNEL
 632         /* (else) Do I/O through segmap. */
 633         do {
 634                 caddr_t         base;
 635                 u_offset_t      off;
 636                 size_t          n;
 637                 int             on;
 638                 uint_t          flags;
 639 
 640                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 641                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 642                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 643 
 644                 error = smbfs_validate_caches(vp, cr);
 645                 if (error)
 646                         break;
 647 
 648                 /* NFS waits for RINCACHEPURGE here. */
 649 
 650                 if (vpm_enable) {
 651                         /*
 652                          * Copy data.
 653                          */
 654                         error = vpm_data_copy(vp, off + on, n, uiop,
 655                             1, NULL, 0, S_READ);
 656                 } else {
 657                         base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
 658                             S_READ);
 659 
 660                         error = uiomove(base + on, n, UIO_READ, uiop);
 661                 }
 662 
 663                 if (!error) {
 664                         /*
 665                          * If read a whole block or read to eof,
 666                          * won't need this buffer again soon.
 667                          */
 668                         mutex_enter(&np->r_statelock);
 669                         if (n + on == MAXBSIZE ||
 670                             uiop->uio_loffset == np->r_size)
 671                                 flags = SM_DONTNEED;
 672                         else
 673                                 flags = 0;
 674                         mutex_exit(&np->r_statelock);
 675                         if (vpm_enable) {
 676                                 error = vpm_sync_pages(vp, off, n, flags);
 677                         } else {
 678                                 error = segmap_release(segkmap, base, flags);
 679                         }
 680                 } else {
 681                         if (vpm_enable) {
 682                                 (void) vpm_sync_pages(vp, off, n, 0);
 683                         } else {
 684                                 (void) segmap_release(segkmap, base, 0);
 685                         }
 686                 }
 687         } while (!error && uiop->uio_resid > 0);
 688 #else   // _KERNEL
 689         error = ENOSYS;
 690 #endif  // _KERNEL
 691 
 692         /* undo adjustment of resid */
 693         uiop->uio_resid += past_eof;
 694 
 695         return (error);
 696 }
 697 
 698 
 699 /* ARGSUSED */
 700 static int
 701 smbfs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
 702         caller_context_t *ct)
 703 {
 704         struct smb_cred scred;
 705         struct vattr    va;
 706         smbnode_t       *np;
 707         smbmntinfo_t    *smi;
 708         offset_t        endoff, limit;
 709         ssize_t         past_limit;
 710         int             error, timo;
 711         u_offset_t      last_off;
 712         size_t          last_resid;
 713 #ifdef  _KERNEL
 714         uint_t          bsize;
 715 #endif
 716 
 717         np = VTOSMB(vp);
 718         smi = VTOSMI(vp);
 719 
 720         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
 721                 return (EIO);
 722 
 723         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 724                 return (EIO);
 725 
 726         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
 727 
 728         if (vp->v_type != VREG)
 729                 return (EISDIR);
 730 
 731         if (uiop->uio_resid == 0)
 732                 return (0);
 733 
 734         /*
 735          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
 736          */
 737         if (ioflag & (FAPPEND | FSYNC)) {
 738                 if (np->n_flag & NMODIFIED) {
 739                         smbfs_attrcache_remove(np);
 740                 }
 741         }
 742         if (ioflag & FAPPEND) {
 743                 /*
 744                  * File size can be changed by another client
 745                  *
 746                  * Todo: Consider redesigning this to use a
 747                  * handle opened for append instead.
 748                  */
 749                 va.va_mask = AT_SIZE;
 750                 if (error = smbfsgetattr(vp, &va, cr))
 751                         return (error);
 752                 uiop->uio_loffset = va.va_size;
 753         }
 754 
 755         /*
 756          * Like NFS3, just check for 63-bit overflow.
 757          */
 758         endoff = uiop->uio_loffset + uiop->uio_resid;
 759         if (uiop->uio_loffset < 0 || endoff < 0)
 760                 return (EINVAL);
 761 
 762         /*
 763          * Check to make sure that the process will not exceed
 764          * its limit on file size.  It is okay to write up to
 765          * the limit, but not beyond.  Thus, the write which
 766          * reaches the limit will be short and the next write
 767          * will return an error.
 768          *
 769          * So if we're starting at or beyond the limit, EFBIG.
 770          * Otherwise, temporarily reduce resid to the amount
 771          * that is after the limit.
 772          */
 773         limit = uiop->uio_llimit;
 774         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 775                 limit = MAXOFFSET_T;
 776         if (uiop->uio_loffset >= limit) {
 777 #ifdef  _KERNEL
 778                 proc_t *p = ttoproc(curthread);
 779 
 780                 mutex_enter(&p->p_lock);
 781                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
 782                     p->p_rctls, p, RCA_UNSAFE_SIGINFO);
 783                 mutex_exit(&p->p_lock);
 784 #endif  // _KERNEL
 785                 return (EFBIG);
 786         }
 787         if (endoff > limit) {
 788                 past_limit = (ssize_t)(endoff - limit);
 789                 uiop->uio_resid -= past_limit;
 790         } else
 791                 past_limit = 0;
 792 
 793         /*
 794          * Bypass VM if caching has been disabled (e.g., locking) or if
 795          * using client-side direct I/O and the file is not mmap'd and
 796          * there are no cached pages.
 797          */
 798         if ((vp->v_flag & VNOCACHE) ||
 799             (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
 800             np->r_mapcnt == 0 && np->r_inmap == 0 &&
 801             !vn_has_cached_data(vp))) {
 802 
 803 #ifdef  _KERNEL
 804 smbfs_fwrite:
 805 #endif  // _KERNEL
 806                 if (np->r_flags & RSTALE) {
 807                         last_resid = uiop->uio_resid;
 808                         last_off = uiop->uio_loffset;
 809                         error = np->r_error;
 810                         /*
 811                          * A close may have cleared r_error, if so,
 812                          * propagate ESTALE error return properly
 813                          */
 814                         if (error == 0)
 815                                 error = ESTALE;
 816                         goto bottom;
 817                 }
 818 
 819                 /* Timeout: longer for append. */
 820                 timo = smb_timo_write;
 821                 if (endoff > np->r_size)
 822                         timo = smb_timo_append;
 823 
 824                 /* Shared lock for n_fid use in smb_rwuio */
 825                 if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
 826                         return (EINTR);
 827                 smb_credinit(&scred, cr);
 828 
 829                 error = smb_rwuio(np->n_fid, UIO_WRITE,
 830                     uiop, &scred, timo);
 831 
 832                 if (error == 0) {
 833                         mutex_enter(&np->r_statelock);
 834                         np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
 835                         if (uiop->uio_loffset > (offset_t)np->r_size)
 836                                 np->r_size = (len_t)uiop->uio_loffset;
 837                         mutex_exit(&np->r_statelock);
 838                         if (ioflag & (FSYNC | FDSYNC)) {
 839                                 /* Don't error the I/O if this fails. */
 840                                 (void) smbfsflush(np, &scred);
 841                         }
 842                 }
 843 
 844                 smb_credrele(&scred);
 845                 smbfs_rw_exit(&np->r_lkserlock);
 846 
 847                 /* undo adjustment of resid */
 848                 uiop->uio_resid += past_limit;
 849 
 850                 return (error);
 851         }
 852 
 853 #ifdef  _KERNEL
 854         /* (else) Do I/O through segmap. */
 855         bsize = vp->v_vfsp->vfs_bsize;
 856 
 857         do {
 858                 caddr_t         base;
 859                 u_offset_t      off;
 860                 size_t          n;
 861                 int             on;
 862                 uint_t          flags;
 863 
 864                 off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
 865                 on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
 866                 n = MIN(MAXBSIZE - on, uiop->uio_resid);
 867 
 868                 last_resid = uiop->uio_resid;
 869                 last_off = uiop->uio_loffset;
 870 
 871                 if (np->r_flags & RSTALE) {
 872                         error = np->r_error;
 873                         /*
 874                          * A close may have cleared r_error, if so,
 875                          * propagate ESTALE error return properly
 876                          */
 877                         if (error == 0)
 878                                 error = ESTALE;
 879                         break;
 880                 }
 881 
 882                 /*
 883                  * From NFS: Don't create dirty pages faster than they
 884                  * can be cleaned.
 885                  *
 886                  * Here NFS also checks for async writes (np->r_awcount)
 887                  */
 888                 mutex_enter(&np->r_statelock);
 889                 while (np->r_gcount > 0) {
 890                         if (SMBINTR(vp)) {
 891                                 klwp_t *lwp = ttolwp(curthread);
 892 
 893                                 if (lwp != NULL)
 894                                         lwp->lwp_nostop++;
 895                                 if (!cv_wait_sig(&np->r_cv, &np->r_statelock)) {
 896                                         mutex_exit(&np->r_statelock);
 897                                         if (lwp != NULL)
 898                                                 lwp->lwp_nostop--;
 899                                         error = EINTR;
 900                                         goto bottom;
 901                                 }
 902                                 if (lwp != NULL)
 903                                         lwp->lwp_nostop--;
 904                         } else
 905                                 cv_wait(&np->r_cv, &np->r_statelock);
 906                 }
 907                 mutex_exit(&np->r_statelock);
 908 
 909                 /*
 910                  * Touch the page and fault it in if it is not in core
 911                  * before segmap_getmapflt or vpm_data_copy can lock it.
 912                  * This is to avoid the deadlock if the buffer is mapped
 913                  * to the same file through mmap which we want to write.
 914                  */
 915                 uio_prefaultpages((long)n, uiop);
 916 
 917                 if (vpm_enable) {
 918                         /*
 919                          * It will use kpm mappings, so no need to
 920                          * pass an address.
 921                          */
 922                         error = smbfs_writenp(np, NULL, n, uiop, 0);
 923                 } else {
 924                         if (segmap_kpm) {
 925                                 int pon = uiop->uio_loffset & PAGEOFFSET;
 926                                 size_t pn = MIN(PAGESIZE - pon,
 927                                     uiop->uio_resid);
 928                                 int pagecreate;
 929 
 930                                 mutex_enter(&np->r_statelock);
 931                                 pagecreate = (pon == 0) && (pn == PAGESIZE ||
 932                                     uiop->uio_loffset + pn >= np->r_size);
 933                                 mutex_exit(&np->r_statelock);
 934 
 935                                 base = segmap_getmapflt(segkmap, vp, off + on,
 936                                     pn, !pagecreate, S_WRITE);
 937 
 938                                 error = smbfs_writenp(np, base + pon, n, uiop,
 939                                     pagecreate);
 940 
 941                         } else {
 942                                 base = segmap_getmapflt(segkmap, vp, off + on,
 943                                     n, 0, S_READ);
 944                                 error = smbfs_writenp(np, base + on, n, uiop, 0);
 945                         }
 946                 }
 947 
 948                 if (!error) {
 949                         if (smi->smi_flags & SMI_NOAC)
 950                                 flags = SM_WRITE;
 951                         else if ((uiop->uio_loffset % bsize) == 0 ||
 952                             IS_SWAPVP(vp)) {
 953                                 /*
 954                                  * Have written a whole block.
 955                                  * Start an asynchronous write
 956                                  * and mark the buffer to
 957                                  * indicate that it won't be
 958                                  * needed again soon.
 959                                  */
 960                                 flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
 961                         } else
 962                                 flags = 0;
 963                         if ((ioflag & (FSYNC|FDSYNC)) ||
 964                             (np->r_flags & ROUTOFSPACE)) {
 965                                 flags &= ~SM_ASYNC;
 966                                 flags |= SM_WRITE;
 967                         }
 968                         if (vpm_enable) {
 969                                 error = vpm_sync_pages(vp, off, n, flags);
 970                         } else {
 971                                 error = segmap_release(segkmap, base, flags);
 972                         }
 973                 } else {
 974                         if (vpm_enable) {
 975                                 (void) vpm_sync_pages(vp, off, n, 0);
 976                         } else {
 977                                 (void) segmap_release(segkmap, base, 0);
 978                         }
 979                         /*
 980                          * In the event that we got an access error while
 981                          * faulting in a page for a write-only file just
 982                          * force a write.
 983                          */
 984                         if (error == EACCES)
 985                                 goto smbfs_fwrite;
 986                 }
 987         } while (!error && uiop->uio_resid > 0);
 988 #else   // _KERNEL
 989         last_resid = uiop->uio_resid;
 990         last_off = uiop->uio_loffset;
 991         error = ENOSYS;
 992 #endif  // _KERNEL
 993 
 994 bottom:
 995         /* undo adjustment of resid */
 996         if (error) {
 997                 uiop->uio_resid = last_resid + past_limit;
 998                 uiop->uio_loffset = last_off;
 999         } else {
1000                 uiop->uio_resid += past_limit;
1001         }
1002 
1003         return (error);
1004 }
1005 
1006 #ifdef  _KERNEL
1007 
1008 /*
1009  * Like nfs_client.c: writerp()
1010  *
1011  * Write by creating pages and uiomove data onto them.
1012  */
1013 
1014 int
1015 smbfs_writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uio,
1016     int pgcreated)
1017 {
1018         int             pagecreate;
1019         int             n;
1020         int             saved_n;
1021         caddr_t         saved_base;
1022         u_offset_t      offset;
1023         int             error;
1024         int             sm_error;
1025         vnode_t         *vp = SMBTOV(np);
1026 
1027         ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
1028         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
1029         if (!vpm_enable) {
1030                 ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
1031         }
1032 
1033         /*
1034          * Move bytes in at most PAGESIZE chunks. We must avoid
1035          * spanning pages in uiomove() because page faults may cause
1036          * the cache to be invalidated out from under us. The r_size is not
1037          * updated until after the uiomove. If we push the last page of a
1038          * file before r_size is correct, we will lose the data written past
1039          * the current (and invalid) r_size.
1040          */
1041         do {
1042                 offset = uio->uio_loffset;
1043                 pagecreate = 0;
1044 
1045                 /*
1046                  * n is the number of bytes required to satisfy the request
1047                  *   or the number of bytes to fill out the page.
1048                  */
1049                 n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
1050 
1051                 /*
1052                  * Check to see if we can skip reading in the page
1053                  * and just allocate the memory.  We can do this
1054                  * if we are going to rewrite the entire mapping
1055                  * or if we are going to write to or beyond the current
1056                  * end of file from the beginning of the mapping.
1057                  *
1058                  * The read of r_size is now protected by r_statelock.
1059                  */
1060                 mutex_enter(&np->r_statelock);
1061                 /*
1062                  * When pgcreated is nonzero the caller has already done
1063                  * a segmap_getmapflt with forcefault 0 and S_WRITE. With
1064                  * segkpm this means we already have at least one page
1065                  * created and mapped at base.
1066                  */
1067                 pagecreate = pgcreated ||
1068                     ((offset & PAGEOFFSET) == 0 &&
1069                     (n == PAGESIZE || ((offset + n) >= np->r_size)));
1070 
1071                 mutex_exit(&np->r_statelock);
1072                 if (!vpm_enable && pagecreate) {
1073                         /*
1074                          * The last argument tells segmap_pagecreate() to
1075                          * always lock the page, as opposed to sometimes
1076                          * returning with the page locked. This way we avoid a
1077                          * fault on the ensuing uiomove(), but also
1078                          * more importantly (to fix bug 1094402) we can
1079                          * call segmap_fault() to unlock the page in all
1080                          * cases. An alternative would be to modify
1081                          * segmap_pagecreate() to tell us when it is
1082                          * locking a page, but that's a fairly major
1083                          * interface change.
1084                          */
1085                         if (pgcreated == 0)
1086                                 (void) segmap_pagecreate(segkmap, base,
1087                                     (uint_t)n, 1);
1088                         saved_base = base;
1089                         saved_n = n;
1090                 }
1091 
1092                 /*
1093                  * The number of bytes of data in the last page can not
1094                  * be accurately be determined while page is being
1095                  * uiomove'd to and the size of the file being updated.
1096                  * Thus, inform threads which need to know accurately
1097                  * how much data is in the last page of the file.  They
1098                  * will not do the i/o immediately, but will arrange for
1099                  * the i/o to happen later when this modify operation
1100                  * will have finished.
1101                  */
1102                 ASSERT(!(np->r_flags & RMODINPROGRESS));
1103                 mutex_enter(&np->r_statelock);
1104                 np->r_flags |= RMODINPROGRESS;
1105                 np->r_modaddr = (offset & MAXBMASK);
1106                 mutex_exit(&np->r_statelock);
1107 
1108                 if (vpm_enable) {
1109                         /*
1110                          * Copy data. If new pages are created, part of
1111                          * the page that is not written will be initizliazed
1112                          * with zeros.
1113                          */
1114                         error = vpm_data_copy(vp, offset, n, uio,
1115                             !pagecreate, NULL, 0, S_WRITE);
1116                 } else {
1117                         error = uiomove(base, n, UIO_WRITE, uio);
1118                 }
1119 
1120                 /*
1121                  * r_size is the maximum number of
1122                  * bytes known to be in the file.
1123                  * Make sure it is at least as high as the
1124                  * first unwritten byte pointed to by uio_loffset.
1125                  */
1126                 mutex_enter(&np->r_statelock);
1127                 if (np->r_size < uio->uio_loffset)
1128                         np->r_size = uio->uio_loffset;
1129                 np->r_flags &= ~RMODINPROGRESS;
1130                 np->r_flags |= RDIRTY;
1131                 mutex_exit(&np->r_statelock);
1132 
1133                 /* n = # of bytes written */
1134                 n = (int)(uio->uio_loffset - offset);
1135 
1136                 if (!vpm_enable) {
1137                         base += n;
1138                 }
1139                 tcount -= n;
1140                 /*
1141                  * If we created pages w/o initializing them completely,
1142                  * we need to zero the part that wasn't set up.
1143                  * This happens on a most EOF write cases and if
1144                  * we had some sort of error during the uiomove.
1145                  */
1146                 if (!vpm_enable && pagecreate) {
1147                         if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
1148                                 (void) kzero(base, PAGESIZE - n);
1149 
1150                         if (pgcreated) {
1151                                 /*
1152                                  * Caller is responsible for this page,
1153                                  * it was not created in this loop.
1154                                  */
1155                                 pgcreated = 0;
1156                         } else {
1157                                 /*
1158                                  * For bug 1094402: segmap_pagecreate locks
1159                                  * page. Unlock it. This also unlocks the
1160                                  * pages allocated by page_create_va() in
1161                                  * segmap_pagecreate().
1162                                  */
1163                                 sm_error = segmap_fault(kas.a_hat, segkmap,
1164                                     saved_base, saved_n,
1165                                     F_SOFTUNLOCK, S_WRITE);
1166                                 if (error == 0)
1167                                         error = sm_error;
1168                         }
1169                 }
1170         } while (tcount > 0 && error == 0);
1171 
1172         return (error);
1173 }
1174 
1175 /*
1176  * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
1177  * Like nfs3_rdwrlbn()
1178  */
1179 static int
1180 smbfs_rdwrlbn(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
1181         int flags, cred_t *cr)
1182 {
1183         smbmntinfo_t    *smi = VTOSMI(vp);
1184         struct buf *bp;
1185         int error;
1186         int sync;
1187 
1188         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1189                 return (EIO);
1190 
1191         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1192                 return (EIO);
1193 
1194         bp = pageio_setup(pp, len, vp, flags);
1195         ASSERT(bp != NULL);
1196 
1197         /*
1198          * pageio_setup should have set b_addr to 0.  This
1199          * is correct since we want to do I/O on a page
1200          * boundary.  bp_mapin will use this addr to calculate
1201          * an offset, and then set b_addr to the kernel virtual
1202          * address it allocated for us.
1203          */
1204         ASSERT(bp->b_un.b_addr == 0);
1205 
1206         bp->b_edev = 0;
1207         bp->b_dev = 0;
1208         bp->b_lblkno = lbtodb(off);
1209         bp->b_file = vp;
1210         bp->b_offset = (offset_t)off;
1211         bp_mapin(bp);
1212 
1213         /*
1214          * Calculate the desired level of stability to write data.
1215          */
1216         if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) &&
1217             freemem > desfree) {
1218                 sync = 0;
1219         } else {
1220                 sync = 1;
1221         }
1222 
1223         error = smbfs_bio(bp, sync, cr);
1224 
1225         bp_mapout(bp);
1226         pageio_done(bp);
1227 
1228         return (error);
1229 }
1230 
1231 
1232 /*
1233  * Corresponds to nfs3_vnopc.c : nfs3_bio(), though the NFS code
1234  * uses nfs3read()/nfs3write() where we use smb_rwuio().  Also,
1235  * NFS has this later in the file.  Move it up here closer to
1236  * the one call site just above.
1237  */
1238 
1239 static int
1240 smbfs_bio(struct buf *bp, int sync, cred_t *cr)
1241 {
1242         struct iovec aiov[1];
1243         struct uio  auio;
1244         struct smb_cred scred;
1245         smbnode_t *np = VTOSMB(bp->b_vp);
1246         smbmntinfo_t *smi = np->n_mount;
1247         offset_t offset;
1248         offset_t endoff;
1249         size_t count;
1250         size_t past_eof;
1251         int error;
1252 
1253         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
1254 
1255         offset = ldbtob(bp->b_lblkno);
1256         count = bp->b_bcount;
1257         endoff = offset + count;
1258         if (offset < 0 || endoff < 0)
1259                 return (EINVAL);
1260 
1261         /*
1262          * Limit file I/O to the remaining file size, but see
1263          * the notes in smbfs_getpage about SMBFS_EOF.
1264          */
1265         mutex_enter(&np->r_statelock);
1266         if (offset >= np->r_size) {
1267                 mutex_exit(&np->r_statelock);
1268                 if (bp->b_flags & B_READ) {
1269                         return (SMBFS_EOF);
1270                 } else {
1271                         return (EINVAL);
1272                 }
1273         }
1274         if (endoff > np->r_size) {
1275                 past_eof = (size_t)(endoff - np->r_size);
1276                 count -= past_eof;
1277         } else
1278                 past_eof = 0;
1279         mutex_exit(&np->r_statelock);
1280         ASSERT(count > 0);
1281 
1282         /* Caller did bpmapin().  Mapped address is... */
1283         aiov[0].iov_base = bp->b_un.b_addr;
1284         aiov[0].iov_len = count;
1285         auio.uio_iov = aiov;
1286         auio.uio_iovcnt = 1;
1287         auio.uio_loffset = offset;
1288         auio.uio_segflg = UIO_SYSSPACE;
1289         auio.uio_fmode = 0;
1290         auio.uio_resid = count;
1291 
1292         /* Shared lock for n_fid use in smb_rwuio */
1293         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER,
1294             smi->smi_flags & SMI_INT))
1295                 return (EINTR);
1296         smb_credinit(&scred, cr);
1297 
1298         DTRACE_IO1(start, struct buf *, bp);
1299 
1300         if (bp->b_flags & B_READ) {
1301 
1302                 error = smb_rwuio(np->n_fid, UIO_READ,
1303                     &auio, &scred, smb_timo_read);
1304 
1305                 /* Like NFS, only set b_error here. */
1306                 bp->b_error = error;
1307                 bp->b_resid = auio.uio_resid;
1308 
1309                 if (!error && auio.uio_resid != 0)
1310                         error = EIO;
1311                 if (!error && past_eof != 0) {
1312                         /* Zero the memory beyond EOF. */
1313                         bzero(bp->b_un.b_addr + count, past_eof);
1314                 }
1315         } else {
1316 
1317                 error = smb_rwuio(np->n_fid, UIO_WRITE,
1318                     &auio, &scred, smb_timo_write);
1319 
1320                 /* Like NFS, only set b_error here. */
1321                 bp->b_error = error;
1322                 bp->b_resid = auio.uio_resid;
1323 
1324                 if (!error && auio.uio_resid != 0)
1325                         error = EIO;
1326                 if (!error && sync) {
1327                         (void) smbfsflush(np, &scred);
1328                 }
1329         }
1330 
1331         /*
1332          * This comes from nfs3_commit()
1333          */
1334         if (error != 0) {
1335                 mutex_enter(&np->r_statelock);
1336                 if (error == ESTALE)
1337                         np->r_flags |= RSTALE;
1338                 if (!np->r_error)
1339                         np->r_error = error;
1340                 mutex_exit(&np->r_statelock);
1341                 bp->b_flags |= B_ERROR;
1342         }
1343 
1344         DTRACE_IO1(done, struct buf *, bp);
1345 
1346         smb_credrele(&scred);
1347         smbfs_rw_exit(&np->r_lkserlock);
1348 
1349         if (error == ESTALE)
1350                 smbfs_attrcache_remove(np);
1351 
1352         return (error);
1353 }
1354 #endif  // _KERNEL
1355 
1356 /*
1357  * Here NFS has: nfs3write, nfs3read
1358  * We use smb_rwuio instead.
1359  */
1360 
1361 /* ARGSUSED */
1362 static int
1363 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
1364         cred_t *cr, int *rvalp, caller_context_t *ct)
1365 {
1366         int             error;
1367         smbmntinfo_t    *smi;
1368 
1369         smi = VTOSMI(vp);
1370 
1371         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1372                 return (EIO);
1373 
1374         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1375                 return (EIO);
1376 
1377         switch (cmd) {
1378 
1379         case _FIOFFS:
1380                 error = smbfs_fsync(vp, 0, cr, ct);
1381                 break;
1382 
1383                 /*
1384                  * The following two ioctls are used by bfu.
1385                  * Silently ignore to avoid bfu errors.
1386                  */
1387         case _FIOGDIO:
1388         case _FIOSDIO:
1389                 error = 0;
1390                 break;
1391 
1392 #if 0   /* Todo - SMB ioctl query regions */
1393         case _FIO_SEEK_DATA:
1394         case _FIO_SEEK_HOLE:
1395 #endif
1396 
1397         case _FIODIRECTIO:
1398                 error = smbfs_directio(vp, (int)arg, cr);
1399                 break;
1400 
1401                 /*
1402                  * Allow get/set with "raw" security descriptor (SD) data.
1403                  * Useful for testing, diagnosing idmap problems, etc.
1404                  */
1405         case SMBFSIO_GETSD:
1406                 error = smbfs_acl_iocget(vp, arg, flag, cr);
1407                 break;
1408 
1409         case SMBFSIO_SETSD:
1410                 error = smbfs_acl_iocset(vp, arg, flag, cr);
1411                 break;
1412 
1413         default:
1414                 error = ENOTTY;
1415                 break;
1416         }
1417 
1418         return (error);
1419 }
1420 
1421 
1422 /*
1423  * Return either cached or remote attributes. If get remote attr
1424  * use them to check and invalidate caches, then cache the new attributes.
1425  */
1426 /* ARGSUSED */
1427 static int
1428 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1429         caller_context_t *ct)
1430 {
1431         smbnode_t *np;
1432         smbmntinfo_t *smi;
1433         int error;
1434 
1435         smi = VTOSMI(vp);
1436 
1437         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1438                 return (EIO);
1439 
1440         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1441                 return (EIO);
1442 
1443         /*
1444          * If it has been specified that the return value will
1445          * just be used as a hint, and we are only being asked
1446          * for size, fsid or rdevid, then return the client's
1447          * notion of these values without checking to make sure
1448          * that the attribute cache is up to date.
1449          * The whole point is to avoid an over the wire GETATTR
1450          * call.
1451          */
1452         np = VTOSMB(vp);
1453         if (flags & ATTR_HINT) {
1454                 if (vap->va_mask ==
1455                     (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
1456                         mutex_enter(&np->r_statelock);
1457                         if (vap->va_mask | AT_SIZE)
1458                                 vap->va_size = np->r_size;
1459                         if (vap->va_mask | AT_FSID)
1460                                 vap->va_fsid = vp->v_vfsp->vfs_dev;
1461                         if (vap->va_mask | AT_RDEV)
1462                                 vap->va_rdev = vp->v_rdev;
1463                         mutex_exit(&np->r_statelock);
1464                         return (0);
1465                 }
1466         }
1467 
1468         /*
1469          * Only need to flush pages if asking for the mtime
1470          * and if there any dirty pages.
1471          *
1472          * Here NFS also checks for async writes (np->r_awcount)
1473          */
1474         if (vap->va_mask & AT_MTIME) {
1475                 if (vn_has_cached_data(vp) &&
1476                     ((np->r_flags & RDIRTY) != 0)) {
1477                         mutex_enter(&np->r_statelock);
1478                         np->r_gcount++;
1479                         mutex_exit(&np->r_statelock);
1480                         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
1481                         mutex_enter(&np->r_statelock);
1482                         if (error && (error == ENOSPC || error == EDQUOT)) {
1483                                 if (!np->r_error)
1484                                         np->r_error = error;
1485                         }
1486                         if (--np->r_gcount == 0)
1487                                 cv_broadcast(&np->r_cv);
1488                         mutex_exit(&np->r_statelock);
1489                 }
1490         }
1491 
1492         return (smbfsgetattr(vp, vap, cr));
1493 }
1494 
1495 /* smbfsgetattr() in smbfs_client.c */
1496 
1497 /*ARGSUSED4*/
1498 static int
1499 smbfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
1500                 caller_context_t *ct)
1501 {
1502         vfs_t           *vfsp;
1503         smbmntinfo_t    *smi;
1504         int             error;
1505         uint_t          mask;
1506         struct vattr    oldva;
1507 
1508         vfsp = vp->v_vfsp;
1509         smi = VFTOSMI(vfsp);
1510 
1511         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1512                 return (EIO);
1513 
1514         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1515                 return (EIO);
1516 
1517         mask = vap->va_mask;
1518         if (mask & AT_NOSET)
1519                 return (EINVAL);
1520 
1521         if (vfsp->vfs_flag & VFS_RDONLY)
1522                 return (EROFS);
1523 
1524         /*
1525          * This is a _local_ access check so that only the owner of
1526          * this mount can set attributes.  With ACLs enabled, the
1527          * file owner can be different from the mount owner, and we
1528          * need to check the _mount_ owner here.  See _access_rwx
1529          */
1530         bzero(&oldva, sizeof (oldva));
1531         oldva.va_mask = AT_TYPE | AT_MODE;
1532         error = smbfsgetattr(vp, &oldva, cr);
1533         if (error)
1534                 return (error);
1535         oldva.va_mask |= AT_UID | AT_GID;
1536         oldva.va_uid = smi->smi_uid;
1537         oldva.va_gid = smi->smi_gid;
1538 
1539         error = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
1540             smbfs_accessx, vp);
1541         if (error)
1542                 return (error);
1543 
1544         if (mask & (AT_UID | AT_GID)) {
1545                 if (smi->smi_flags & SMI_ACL)
1546                         error = smbfs_acl_setids(vp, vap, cr);
1547                 else
1548                         error = ENOSYS;
1549                 if (error != 0) {
1550                         SMBVDEBUG("error %d seting UID/GID on %s",
1551                             error, VTOSMB(vp)->n_rpath);
1552                         /*
1553                          * It might be more correct to return the
1554                          * error here, but that causes complaints
1555                          * when root extracts a cpio archive, etc.
1556                          * So ignore this error, and go ahead with
1557                          * the rest of the setattr work.
1558                          */
1559                 }
1560         }
1561 
1562         error = smbfssetattr(vp, vap, flags, cr);
1563 
1564 #ifdef  SMBFS_VNEVENT
1565         if (error == 0 && (vap->va_mask & AT_SIZE) && vap->va_size == 0)
1566                 vnevent_truncate(vp, ct);
1567 #endif
1568 
1569         return (error);
1570 }
1571 
1572 /*
1573  * Mostly from Darwin smbfs_setattr()
1574  * but then modified a lot.
1575  */
1576 /* ARGSUSED */
1577 static int
1578 smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
1579 {
1580         int             error = 0;
1581         smbnode_t       *np = VTOSMB(vp);
1582         smbmntinfo_t    *smi = np->n_mount;
1583         uint_t          mask = vap->va_mask;
1584         struct timespec *mtime, *atime;
1585         struct smb_cred scred;
1586         int             modified = 0;
1587         smb_fh_t        *fid = NULL;
1588         uint32_t rights = 0;
1589         uint32_t dosattr = 0;
1590 
1591         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
1592 
1593         /*
1594          * There are no settable attributes on the XATTR dir,
1595          * so just silently ignore these.  On XATTR files,
1596          * you can set the size but nothing else.
1597          */
1598         if (vp->v_flag & V_XATTRDIR)
1599                 return (0);
1600         if (np->n_flag & N_XATTR) {
1601                 if (mask & AT_TIMES)
1602                         SMBVDEBUG("ignore set time on xattr\n");
1603                 mask &= AT_SIZE;
1604         }
1605 
1606         /*
1607          * Only need to flush pages if there are any pages and
1608          * if the file is marked as dirty in some fashion.  The
1609          * file must be flushed so that we can accurately
1610          * determine the size of the file and the cached data
1611          * after the SETATTR returns.  A file is considered to
1612          * be dirty if it is either marked with RDIRTY, has
1613          * outstanding i/o's active, or is mmap'd.  In this
1614          * last case, we can't tell whether there are dirty
1615          * pages, so we flush just to be sure.
1616          */
1617         if (vn_has_cached_data(vp) &&
1618             ((np->r_flags & RDIRTY) ||
1619             np->r_count > 0 ||
1620             np->r_mapcnt > 0)) {
1621                 ASSERT(vp->v_type != VCHR);
1622                 error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
1623                 if (error && (error == ENOSPC || error == EDQUOT)) {
1624                         mutex_enter(&np->r_statelock);
1625                         if (!np->r_error)
1626                                 np->r_error = error;
1627                         mutex_exit(&np->r_statelock);
1628                 }
1629         }
1630 
1631         /*
1632          * If our caller is trying to set multiple attributes, they
1633          * can make no assumption about what order they are done in.
1634          * Here we try to do them in order of decreasing likelihood
1635          * of failure, just to minimize the chance we'll wind up
1636          * with a partially complete request.
1637          */
1638 
1639         smb_credinit(&scred, cr);
1640 
1641         /*
1642          * If the caller has provided extensible attributes,
1643          * map those into DOS attributes supported by SMB.
1644          * Note: zero means "no change".
1645          */
1646         if (mask & AT_XVATTR)
1647                 dosattr = xvattr_to_dosattr(np, vap);
1648 
1649         /*
1650          * Will we need an open handle for this setattr?
1651          * If so, what rights will we need?
1652          */
1653         if (dosattr || (mask & (AT_ATIME | AT_MTIME))) {
1654                 rights |=
1655                     SA_RIGHT_FILE_WRITE_ATTRIBUTES;
1656         }
1657         if (mask & AT_SIZE) {
1658                 rights |=
1659                     SA_RIGHT_FILE_WRITE_DATA |
1660                     SA_RIGHT_FILE_APPEND_DATA;
1661         }
1662 
1663         /*
1664          * Only SIZE really requires a handle, but it's
1665          * simpler and more reliable to set via a handle.
1666          * Some servers like NT4 won't set times by path.
1667          * Also, we're usually setting everything anyway.
1668          */
1669         if (rights != 0) {
1670                 error = smbfs_smb_tmpopen(np, rights, &scred, &fid);
1671                 if (error) {
1672                         SMBVDEBUG("error %d opening %s\n",
1673                             error, np->n_rpath);
1674                         goto out;
1675                 }
1676                 ASSERT(fid != NULL);
1677         }
1678 
1679         /*
1680          * If the server supports the UNIX extensions, right here is where
1681          * we'd support changes to uid, gid, mode, and possibly va_flags.
1682          * For now we claim to have made any such changes.
1683          */
1684 
1685         if (mask & AT_SIZE) {
1686                 /*
1687                  * If the new file size is less than what the client sees as
1688                  * the file size, then just change the size and invalidate
1689                  * the pages.
1690                  */
1691 
1692                 /*
1693                  * Set the file size to vap->va_size.
1694                  */
1695                 ASSERT(fid != NULL);
1696                 error = smbfs_smb_setfsize(smi->smi_share, fid,
1697                     vap->va_size, &scred);
1698                 if (error) {
1699                         SMBVDEBUG("setsize error %d file %s\n",
1700                             error, np->n_rpath);
1701                 } else {
1702                         /*
1703                          * Darwin had code here to zero-extend.
1704                          * Tests indicate the server will zero-fill,
1705                          * so looks like we don't need to do that.
1706                          */
1707                         mutex_enter(&np->r_statelock);
1708                         np->r_size = vap->va_size;
1709                         np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
1710                         mutex_exit(&np->r_statelock);
1711                         modified = 1;
1712                 }
1713         }
1714 
1715         /*
1716          * Todo: Implement setting create_time (which is
1717          * different from ctime).
1718          */
1719         mtime = ((mask & AT_MTIME) ? &vap->va_mtime : 0);
1720         atime = ((mask & AT_ATIME) ? &vap->va_atime : 0);
1721 
1722         if (dosattr || mtime || atime) {
1723                 /*
1724                  * Always use the handle-based set attr call now.
1725                  */
1726                 ASSERT(fid != NULL);
1727                 error = smbfs_smb_setfattr(smi->smi_share, fid,
1728                     dosattr, mtime, atime, &scred);
1729                 if (error) {
1730                         SMBVDEBUG("set times error %d file %s\n",
1731                             error, np->n_rpath);
1732                 } else {
1733                         modified = 1;
1734                 }
1735         }
1736 
1737 out:
1738         if (fid != NULL)
1739                 smbfs_smb_tmpclose(np, fid);
1740 
1741         smb_credrele(&scred);
1742 
1743         if (modified) {
1744                 /*
1745                  * Invalidate attribute cache in case the server
1746                  * doesn't set exactly the attributes we asked.
1747                  */
1748                 smbfs_attrcache_remove(np);
1749 
1750                 /*
1751                  * If changing the size of the file, invalidate
1752                  * any local cached data which is no longer part
1753                  * of the file.  We also possibly invalidate the
1754                  * last page in the file.  We could use
1755                  * pvn_vpzero(), but this would mark the page as
1756                  * modified and require it to be written back to
1757                  * the server for no particularly good reason.
1758                  * This way, if we access it, then we bring it
1759                  * back in.  A read should be cheaper than a
1760                  * write.
1761                  */
1762                 if (mask & AT_SIZE) {
1763                         smbfs_invalidate_pages(vp,
1764                             (vap->va_size & PAGEMASK), cr);
1765                 }
1766         }
1767 
1768         return (error);
1769 }
1770 
1771 /*
1772  * Helper function for extensible system attributes (PSARC 2007/315)
1773  * Compute the DOS attribute word to pass to _setfattr (see above).
1774  * This returns zero IFF no change is being made to attributes.
1775  * Otherwise return the new attributes or SMB_EFA_NORMAL.
1776  */
1777 static uint32_t
1778 xvattr_to_dosattr(smbnode_t *np, struct vattr *vap)
1779 {
1780         xvattr_t *xvap = (xvattr_t *)vap;
1781         xoptattr_t *xoap = NULL;
1782         uint32_t attr = np->r_attr.fa_attr;
1783         boolean_t anyset = B_FALSE;
1784 
1785         if ((xoap = xva_getxoptattr(xvap)) == NULL)
1786                 return (0);
1787 
1788         if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1789                 if (xoap->xoa_archive)
1790                         attr |= SMB_FA_ARCHIVE;
1791                 else
1792                         attr &= ~SMB_FA_ARCHIVE;
1793                 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1794                 anyset = B_TRUE;
1795         }
1796         if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1797                 if (xoap->xoa_system)
1798                         attr |= SMB_FA_SYSTEM;
1799                 else
1800                         attr &= ~SMB_FA_SYSTEM;
1801                 XVA_SET_RTN(xvap, XAT_SYSTEM);
1802                 anyset = B_TRUE;
1803         }
1804         if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1805                 if (xoap->xoa_readonly)
1806                         attr |= SMB_FA_RDONLY;
1807                 else
1808                         attr &= ~SMB_FA_RDONLY;
1809                 XVA_SET_RTN(xvap, XAT_READONLY);
1810                 anyset = B_TRUE;
1811         }
1812         if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1813                 if (xoap->xoa_hidden)
1814                         attr |= SMB_FA_HIDDEN;
1815                 else
1816                         attr &= ~SMB_FA_HIDDEN;
1817                 XVA_SET_RTN(xvap, XAT_HIDDEN);
1818                 anyset = B_TRUE;
1819         }
1820 
1821         if (anyset == B_FALSE)
1822                 return (0);     /* no change */
1823         if (attr == 0)
1824                 attr = SMB_EFA_NORMAL;
1825 
1826         return (attr);
1827 }
1828 
1829 /*
1830  * smbfs_access_rwx()
1831  * Common function for smbfs_access, etc.
1832  *
1833  * The security model implemented by the FS is unusual
1834  * due to the current "single user mounts" restriction:
1835  * All access under a given mount point uses the CIFS
1836  * credentials established by the owner of the mount.
1837  *
1838  * Most access checking is handled by the CIFS server,
1839  * but we need sufficient Unix access checks here to
1840  * prevent other local Unix users from having access
1841  * to objects under this mount that the uid/gid/mode
1842  * settings in the mount would not allow.
1843  *
1844  * With this model, there is a case where we need the
1845  * ability to do an access check before we have the
1846  * vnode for an object.  This function takes advantage
1847  * of the fact that the uid/gid/mode is per mount, and
1848  * avoids the need for a vnode.
1849  *
1850  * We still (sort of) need a vnode when we call
1851  * secpolicy_vnode_access, but that only uses
1852  * the vtype field, so we can use a pair of fake
1853  * vnodes that have only v_type filled in.
1854  */
1855 static int
1856 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
1857 {
1858         /* See the secpolicy call below. */
1859         static const vnode_t tmpl_vdir = { .v_type = VDIR };
1860         static const vnode_t tmpl_vreg = { .v_type = VREG };
1861         vattr_t         va;
1862         vnode_t         *tvp;
1863         struct smbmntinfo *smi = VFTOSMI(vfsp);
1864         int shift = 0;
1865 
1866         /*
1867          * Build our (fabricated) vnode attributes.
1868          */
1869         bzero(&va, sizeof (va));
1870         va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
1871         va.va_type = vtype;
1872         va.va_mode = (vtype == VDIR) ?
1873             smi->smi_dmode : smi->smi_fmode;
1874         va.va_uid = smi->smi_uid;
1875         va.va_gid = smi->smi_gid;
1876 
1877         /*
1878          * Disallow write attempts on read-only file systems,
1879          * unless the file is a device or fifo node.  Note:
1880          * Inline vn_is_readonly and IS_DEVVP here because
1881          * we may not have a vnode ptr.  Original expr. was:
1882          * (mode & VWRITE) && vn_is_readonly(vp) && !IS_DEVVP(vp))
1883          */
1884         if ((mode & VWRITE) &&
1885             (vfsp->vfs_flag & VFS_RDONLY) &&
1886             !(vtype == VCHR || vtype == VBLK || vtype == VFIFO))
1887                 return (EROFS);
1888 
1889         /*
1890          * Disallow attempts to access mandatory lock files.
1891          * Similarly, expand MANDLOCK here.
1892          */
1893         if ((mode & (VWRITE | VREAD | VEXEC)) &&
1894             va.va_type == VREG && MANDMODE(va.va_mode))
1895                 return (EACCES);
1896 
1897         /*
1898          * Access check is based on only
1899          * one of owner, group, public.
1900          * If not owner, then check group.
1901          * If not a member of the group,
1902          * then check public access.
1903          */
1904         if (crgetuid(cr) != va.va_uid) {
1905                 shift += 3;
1906                 if (!groupmember(va.va_gid, cr))
1907                         shift += 3;
1908         }
1909 
1910         /*
1911          * We need a vnode for secpolicy_vnode_access,
1912          * but the only thing it looks at is v_type,
1913          * so pass one of the templates above.
1914          */
1915         tvp = (va.va_type == VDIR) ?
1916             (vnode_t *)&tmpl_vdir :
1917             (vnode_t *)&tmpl_vreg;
1918 
1919         return (secpolicy_vnode_access2(cr, tvp, va.va_uid,
1920             va.va_mode << shift, mode));
1921 }
1922 
1923 /*
1924  * See smbfs_setattr
1925  */
1926 static int
1927 smbfs_accessx(void *arg, int mode, cred_t *cr)
1928 {
1929         vnode_t *vp = arg;
1930         /*
1931          * Note: The caller has checked the current zone,
1932          * the SMI_DEAD and VFS_UNMOUNTED flags, etc.
1933          */
1934         return (smbfs_access_rwx(vp->v_vfsp, vp->v_type, mode, cr));
1935 }
1936 
1937 /*
1938  * XXX
1939  * This op should support PSARC 2007/403, Modified Access Checks for CIFS
1940  */
1941 /* ARGSUSED */
1942 static int
1943 smbfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
1944 {
1945         vfs_t           *vfsp;
1946         smbmntinfo_t    *smi;
1947 
1948         vfsp = vp->v_vfsp;
1949         smi = VFTOSMI(vfsp);
1950 
1951         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1952                 return (EIO);
1953 
1954         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
1955                 return (EIO);
1956 
1957         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
1958 }
1959 
1960 
1961 /* ARGSUSED */
1962 static int
1963 smbfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
1964 {
1965         /* Not yet... */
1966         return (ENOSYS);
1967 }
1968 
1969 
1970 /*
1971  * Flush local dirty pages to stable storage on the server.
1972  *
1973  * If FNODSYNC is specified, then there is nothing to do because
1974  * metadata changes are not cached on the client before being
1975  * sent to the server.
1976  */
1977 /* ARGSUSED */
1978 static int
1979 smbfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
1980 {
1981         int             error = 0;
1982         smbmntinfo_t    *smi;
1983         smbnode_t       *np;
1984         struct smb_cred scred;
1985 
1986         np = VTOSMB(vp);
1987         smi = VTOSMI(vp);
1988 
1989         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
1990                 return (EIO);
1991 
1992         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1993                 return (EIO);
1994 
1995         if ((syncflag & FNODSYNC) || IS_SWAPVP(vp))
1996                 return (0);
1997 
1998         if ((syncflag & (FSYNC|FDSYNC)) == 0)
1999                 return (0);
2000 
2001         error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
2002         if (error)
2003                 return (error);
2004 
2005         /* Shared lock for n_fid use in _flush */
2006         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
2007                 return (EINTR);
2008         smb_credinit(&scred, cr);
2009 
2010         error = smbfsflush(np, &scred);
2011 
2012         smb_credrele(&scred);
2013         smbfs_rw_exit(&np->r_lkserlock);
2014 
2015         return (error);
2016 }
2017 
2018 static int
2019 smbfsflush(smbnode_t *np, struct smb_cred *scrp)
2020 {
2021         struct smb_share *ssp = np->n_mount->smi_share;
2022         smb_fh_t *fhp;
2023         int error;
2024 
2025         /* Shared lock for n_fid use below. */
2026         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_READER));
2027 
2028         if (!(np->n_flag & NFLUSHWIRE))
2029                 return (0);
2030         if (np->n_fidrefs == 0)
2031                 return (0); /* not open */
2032         if ((fhp = np->n_fid) == NULL)
2033                 return (0);
2034 
2035         /* After reconnect, n_fid is invalid */
2036         if (fhp->fh_vcgenid != ssp->ss_vcgenid)
2037                 return (ESTALE);
2038 
2039         error = smbfs_smb_flush(ssp, fhp, scrp);
2040 
2041         if (!error) {
2042                 mutex_enter(&np->r_statelock);
2043                 np->n_flag &= ~NFLUSHWIRE;
2044                 mutex_exit(&np->r_statelock);
2045         }
2046         return (error);
2047 }
2048 
2049 /*
2050  * Last reference to vnode went away.
2051  */
2052 /* ARGSUSED */
2053 static void
2054 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
2055 {
2056         struct smb_cred scred;
2057         smbnode_t       *np = VTOSMB(vp);
2058         int error;
2059 
2060         /*
2061          * Don't "bail out" for VFS_UNMOUNTED here,
2062          * as we want to do cleanup, etc.
2063          * See also pcfs_inactive
2064          */
2065 
2066         /*
2067          * If this is coming from the wrong zone, we let someone in the right
2068          * zone take care of it asynchronously.  We can get here due to
2069          * VN_RELE() being called from pageout() or fsflush().  This call may
2070          * potentially turn into an expensive no-op if, for instance, v_count
2071          * gets incremented in the meantime, but it's still correct.
2072          */
2073 
2074         /*
2075          * From NFS:rinactive()
2076          *
2077          * Before freeing anything, wait until all asynchronous
2078          * activity is done on this rnode.  This will allow all
2079          * asynchronous read ahead and write behind i/o's to
2080          * finish.
2081          */
2082         mutex_enter(&np->r_statelock);
2083         while (np->r_count > 0)
2084                 cv_wait(&np->r_cv, &np->r_statelock);
2085         mutex_exit(&np->r_statelock);
2086 
2087         /*
2088          * Flush and invalidate all pages associated with the vnode.
2089          */
2090         if (vn_has_cached_data(vp)) {
2091                 if ((np->r_flags & RDIRTY) && !np->r_error) {
2092                         error = smbfs_putpage(vp, (u_offset_t)0, 0, 0, cr, ct);
2093                         if (error && (error == ENOSPC || error == EDQUOT)) {
2094                                 mutex_enter(&np->r_statelock);
2095                                 if (!np->r_error)
2096                                         np->r_error = error;
2097                                 mutex_exit(&np->r_statelock);
2098                         }
2099                 }
2100                 smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
2101         }
2102         /*
2103          * This vnode should have lost all cached data.
2104          */
2105         ASSERT(vn_has_cached_data(vp) == 0);
2106 
2107         /*
2108          * Defend against the possibility that higher-level callers
2109          * might not correctly balance open and close calls.  If we
2110          * get here with open references remaining, it means there
2111          * was a missing VOP_CLOSE somewhere.  If that happens, do
2112          * the close here so we don't "leak" FIDs on the server.
2113          *
2114          * Exclusive lock for modifying n_fid stuff.
2115          * Don't want this one ever interruptible.
2116          */
2117         (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
2118         smb_credinit(&scred, cr);
2119 
2120         switch (np->n_ovtype) {
2121         case VNON:
2122                 /* not open (OK) */
2123                 break;
2124 
2125         case VDIR:
2126                 if (np->n_dirrefs == 0)
2127                         break;
2128                 SMBVDEBUG("open dir: refs %d path %s\n",
2129                     np->n_dirrefs, np->n_rpath);
2130                 /* Force last close. */
2131                 np->n_dirrefs = 1;
2132                 smbfs_rele_fid(np, &scred);
2133                 break;
2134 
2135         case VREG:
2136                 if (np->n_fidrefs == 0)
2137                         break;
2138                 SMBVDEBUG("open file: refs %d path %s\n",
2139                     np->n_fidrefs, np->n_rpath);
2140                 /* Force last close. */
2141                 np->n_fidrefs = 1;
2142                 smbfs_rele_fid(np, &scred);
2143                 break;
2144 
2145         default:
2146                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
2147                 np->n_ovtype = VNON;
2148                 break;
2149         }
2150 
2151         smb_credrele(&scred);
2152         smbfs_rw_exit(&np->r_lkserlock);
2153 
2154         /*
2155          * XATTR directories (and the files under them) have
2156          * little value for reclaim, so just remove them from
2157          * the "hash" (AVL) as soon as they go inactive.
2158          * Note that the node may already have been removed
2159          * from the hash by smbfsremove.
2160          */
2161         if ((np->n_flag & N_XATTR) != 0 &&
2162             (np->r_flags & RHASHED) != 0)
2163                 smbfs_rmhash(np);
2164 
2165         smbfs_addfree(np);
2166 }
2167 
2168 /*
2169  * Remote file system operations having to do with directory manipulation.
2170  */
2171 /* ARGSUSED */
2172 static int
2173 smbfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
2174         int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
2175         int *direntflags, pathname_t *realpnp)
2176 {
2177         vfs_t           *vfs;
2178         smbmntinfo_t    *smi;
2179         smbnode_t       *dnp;
2180         int             error;
2181 
2182         vfs = dvp->v_vfsp;
2183         smi = VFTOSMI(vfs);
2184 
2185         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2186                 return (EPERM);
2187 
2188         if (smi->smi_flags & SMI_DEAD || vfs->vfs_flag & VFS_UNMOUNTED)
2189                 return (EIO);
2190 
2191         dnp = VTOSMB(dvp);
2192 
2193         /*
2194          * Are we looking up extended attributes?  If so, "dvp" is
2195          * the file or directory for which we want attributes, and
2196          * we need a lookup of the (faked up) attribute directory
2197          * before we lookup the rest of the path.
2198          */
2199         if (flags & LOOKUP_XATTR) {
2200                 /*
2201                  * Require the xattr mount option.
2202                  */
2203                 if ((vfs->vfs_flag & VFS_XATTR) == 0)
2204                         return (EINVAL);
2205 
2206                 error = smbfs_get_xattrdir(dvp, vpp, cr, flags);
2207                 return (error);
2208         }
2209 
2210         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_READER, SMBINTR(dvp)))
2211                 return (EINTR);
2212 
2213         error = smbfslookup(dvp, nm, vpp, cr, 1, ct);
2214 
2215         smbfs_rw_exit(&dnp->r_rwlock);
2216 
2217         /*
2218          * If the caller passes an invalid name here, we'll have
2219          * error == EINVAL but want to return ENOENT.  This is
2220          * common with things like "ls foo*" with no matches.
2221          */
2222         if (error == EINVAL)
2223                 error = ENOENT;
2224 
2225         return (error);
2226 }
2227 
2228 /* ARGSUSED */
2229 static int
2230 smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
2231         int cache_ok, caller_context_t *ct)
2232 {
2233         int             error;
2234         int             supplen; /* supported length */
2235         vnode_t         *vp;
2236         smbnode_t       *np;
2237         smbnode_t       *dnp;
2238         smbmntinfo_t    *smi;
2239         /* struct smb_vc        *vcp; */
2240         const char      *ill;
2241         const char      *name = (const char *)nm;
2242         int             nmlen = strlen(nm);
2243         int             rplen;
2244         struct smb_cred scred;
2245         struct smbfattr fa;
2246 
2247         smi = VTOSMI(dvp);
2248         dnp = VTOSMB(dvp);
2249 
2250         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
2251 
2252         supplen = 255;
2253 
2254         /*
2255          * RWlock must be held, either reader or writer.
2256          */
2257         ASSERT(dnp->r_rwlock.count != 0);
2258 
2259         /*
2260          * If lookup is for "", just return dvp.
2261          * No need to perform any access checks.
2262          */
2263         if (nmlen == 0) {
2264                 VN_HOLD(dvp);
2265                 *vpp = dvp;
2266                 return (0);
2267         }
2268 
2269         /*
2270          * Can't do lookups in non-directories.
2271          */
2272         if (dvp->v_type != VDIR)
2273                 return (ENOTDIR);
2274 
2275         /*
2276          * Need search permission in the directory.
2277          */
2278         error = smbfs_access(dvp, VEXEC, 0, cr, ct);
2279         if (error)
2280                 return (error);
2281 
2282         /*
2283          * If lookup is for ".", just return dvp.
2284          * Access check was done above.
2285          */
2286         if (nmlen == 1 && name[0] == '.') {
2287                 VN_HOLD(dvp);
2288                 *vpp = dvp;
2289                 return (0);
2290         }
2291 
2292         /*
2293          * Now some sanity checks on the name.
2294          * First check the length.
2295          */
2296         if (nmlen > supplen)
2297                 return (ENAMETOOLONG);
2298 
2299         /*
2300          * Avoid surprises with characters that are
2301          * illegal in Windows file names.
2302          * Todo: CATIA mappings?
2303          */
2304         ill = illegal_chars;
2305         if (dnp->n_flag & N_XATTR)
2306                 ill++; /* allow colon */
2307         if (strpbrk(nm, ill))
2308                 return (EINVAL);
2309 
2310         /*
2311          * Special handling for lookup of ".."
2312          *
2313          * We keep full pathnames (as seen on the server)
2314          * so we can just trim off the last component to
2315          * get the full pathname of the parent.  Note:
2316          * We don't actually copy and modify, but just
2317          * compute the trimmed length and pass that with
2318          * the current dir path (not null terminated).
2319          *
2320          * We don't go over-the-wire to get attributes
2321          * for ".." because we know it's a directory,
2322          * and we can just leave the rest "stale"
2323          * until someone does a getattr.
2324          */
2325         if (nmlen == 2 && name[0] == '.' && name[1] == '.') {
2326                 if (dvp->v_flag & VROOT) {
2327                         /*
2328                          * Already at the root.  This can happen
2329                          * with directory listings at the root,
2330                          * which lookup "." and ".." to get the
2331                          * inode numbers.  Let ".." be the same
2332                          * as "." in the FS root.
2333                          */
2334                         VN_HOLD(dvp);
2335                         *vpp = dvp;
2336                         return (0);
2337                 }
2338 
2339                 /*
2340                  * Special case for XATTR directory
2341                  */
2342                 if (dvp->v_flag & V_XATTRDIR) {
2343                         error = smbfs_xa_parent(dvp, vpp);
2344                         return (error);
2345                 }
2346 
2347                 /*
2348                  * Find the parent path length.
2349                  */
2350                 rplen = dnp->n_rplen;
2351                 ASSERT(rplen > 0);
2352                 while (--rplen >= 0) {
2353                         if (dnp->n_rpath[rplen] == '\\')
2354                                 break;
2355                 }
2356                 if (rplen <= 0) {
2357                         /* Found our way to the root. */
2358                         vp = SMBTOV(smi->smi_root);
2359                         VN_HOLD(vp);
2360                         *vpp = vp;
2361                         return (0);
2362                 }
2363                 np = smbfs_node_findcreate(smi,
2364                     dnp->n_rpath, rplen, NULL, 0, 0,
2365                     &smbfs_fattr0); /* force create */
2366                 ASSERT(np != NULL);
2367                 vp = SMBTOV(np);
2368                 vp->v_type = VDIR;
2369 
2370                 /* Success! */
2371                 *vpp = vp;
2372                 return (0);
2373         }
2374 
2375         /*
2376          * Normal lookup of a name under this directory.
2377          * Note we handled "", ".", ".." above.
2378          */
2379         if (cache_ok) {
2380                 /*
2381                  * The caller indicated that it's OK to use a
2382                  * cached result for this lookup, so try to
2383                  * reclaim a node from the smbfs node cache.
2384                  */
2385                 error = smbfslookup_cache(dvp, nm, nmlen, &vp, cr);
2386                 if (error)
2387                         return (error);
2388                 if (vp != NULL) {
2389                         /* hold taken in lookup_cache */
2390                         *vpp = vp;
2391                         return (0);
2392                 }
2393         }
2394 
2395         /*
2396          * OK, go over-the-wire to get the attributes,
2397          * then create the node.
2398          */
2399         smb_credinit(&scred, cr);
2400         /* Note: this can allocate a new "name" */
2401         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fa, &scred);
2402         smb_credrele(&scred);
2403         if (error == ENOTDIR) {
2404                 /*
2405                  * Lookup failed because this directory was
2406                  * removed or renamed by another client.
2407                  * Remove any cached attributes under it.
2408                  */
2409                 smbfs_attrcache_remove(dnp);
2410                 smbfs_attrcache_prune(dnp);
2411         }
2412         if (error)
2413                 goto out;
2414 
2415         error = smbfs_nget(dvp, name, nmlen, &fa, &vp);
2416         if (error)
2417                 goto out;
2418 
2419         /* Success! */
2420         *vpp = vp;
2421 
2422 out:
2423         /* smbfs_smb_lookup may have allocated name. */
2424         if (name != nm)
2425                 smbfs_name_free(name, nmlen);
2426 
2427         return (error);
2428 }
2429 
2430 /*
2431  * smbfslookup_cache
2432  *
2433  * Try to reclaim a node from the smbfs node cache.
2434  * Some statistics for DEBUG.
2435  *
2436  * This mechanism lets us avoid many of the five (or more)
2437  * OtW lookup calls per file seen with "ls -l" if we search
2438  * the smbfs node cache for recently inactive(ated) nodes.
2439  */
2440 #ifdef DEBUG
2441 int smbfs_lookup_cache_calls = 0;
2442 int smbfs_lookup_cache_error = 0;
2443 int smbfs_lookup_cache_miss = 0;
2444 int smbfs_lookup_cache_stale = 0;
2445 int smbfs_lookup_cache_hits = 0;
2446 #endif /* DEBUG */
2447 
2448 /* ARGSUSED */
2449 static int
2450 smbfslookup_cache(vnode_t *dvp, char *nm, int nmlen,
2451         vnode_t **vpp, cred_t *cr)
2452 {
2453         struct vattr va;
2454         smbnode_t *dnp;
2455         smbnode_t *np;
2456         vnode_t *vp;
2457         int error;
2458         char sep;
2459 
2460         dnp = VTOSMB(dvp);
2461         *vpp = NULL;
2462 
2463 #ifdef DEBUG
2464         smbfs_lookup_cache_calls++;
2465 #endif
2466 
2467         /*
2468          * First make sure we can get attributes for the
2469          * directory.  Cached attributes are OK here.
2470          * If we removed or renamed the directory, this
2471          * will return ENOENT.  If someone else removed
2472          * this directory or file, we'll find out when we
2473          * try to open or get attributes.
2474          */
2475         va.va_mask = AT_TYPE | AT_MODE;
2476         error = smbfsgetattr(dvp, &va, cr);
2477         if (error) {
2478 #ifdef DEBUG
2479                 smbfs_lookup_cache_error++;
2480 #endif
2481                 return (error);
2482         }
2483 
2484         /*
2485          * Passing NULL smbfattr here so we will
2486          * just look, not create.
2487          */
2488         sep = SMBFS_DNP_SEP(dnp);
2489         np = smbfs_node_findcreate(dnp->n_mount,
2490             dnp->n_rpath, dnp->n_rplen,
2491             nm, nmlen, sep, NULL);
2492         if (np == NULL) {
2493 #ifdef DEBUG
2494                 smbfs_lookup_cache_miss++;
2495 #endif
2496                 return (0);
2497         }
2498 
2499         /*
2500          * Found it.  Attributes still valid?
2501          */
2502         vp = SMBTOV(np);
2503         if (np->r_attrtime <= gethrtime()) {
2504                 /* stale */
2505 #ifdef DEBUG
2506                 smbfs_lookup_cache_stale++;
2507 #endif
2508                 VN_RELE(vp);
2509                 return (0);
2510         }
2511 
2512         /*
2513          * Success!
2514          * Caller gets hold from smbfs_node_findcreate
2515          */
2516 #ifdef DEBUG
2517         smbfs_lookup_cache_hits++;
2518 #endif
2519         *vpp = vp;
2520         return (0);
2521 }
2522 
2523 
2524 /*
2525  * XXX
2526  * vsecattr_t is new to build 77, and we need to eventually support
2527  * it in order to create an ACL when an object is created.
2528  *
2529  * This op should support the new FIGNORECASE flag for case-insensitive
2530  * lookups, per PSARC 2007/244.
2531  */
2532 /* ARGSUSED */
2533 static int
2534 smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
2535         int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct,
2536         vsecattr_t *vsecp)
2537 {
2538         int             error;
2539         vfs_t           *vfsp;
2540         vnode_t         *vp;
2541         smbnode_t       *np;
2542         smbnode_t       *dnp;
2543         smbmntinfo_t    *smi;
2544         struct vattr    vattr;
2545         struct smbfattr fattr;
2546         struct smb_cred scred;
2547         const char *name = (const char *)nm;
2548         int             nmlen = strlen(nm);
2549         uint32_t        disp;
2550         smb_fh_t        *fid = NULL;
2551         int             xattr;
2552 
2553         vfsp = dvp->v_vfsp;
2554         smi = VFTOSMI(vfsp);
2555         dnp = VTOSMB(dvp);
2556         vp = NULL;
2557 
2558         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2559                 return (EPERM);
2560 
2561         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
2562                 return (EIO);
2563 
2564         /*
2565          * Note: this may break mknod(2) calls to create a directory,
2566          * but that's obscure use.  Some other filesystems do this.
2567          * Todo: redirect VDIR type here to _mkdir.
2568          */
2569         if (va->va_type != VREG)
2570                 return (EINVAL);
2571 
2572         /*
2573          * If the pathname is "", just use dvp, no checks.
2574          * Do this outside of the rwlock (like zfs).
2575          */
2576         if (nmlen == 0) {
2577                 VN_HOLD(dvp);
2578                 *vpp = dvp;
2579                 return (0);
2580         }
2581 
2582         /* Don't allow "." or ".." through here. */
2583         if ((nmlen == 1 && name[0] == '.') ||
2584             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
2585                 return (EISDIR);
2586 
2587         /*
2588          * We make a copy of the attributes because the caller does not
2589          * expect us to change what va points to.
2590          */
2591         vattr = *va;
2592 
2593         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2594                 return (EINTR);
2595         smb_credinit(&scred, cr);
2596 
2597         /*
2598          * NFS needs to go over the wire, just to be sure whether the
2599          * file exists or not.  Using a cached result is dangerous in
2600          * this case when making a decision regarding existence.
2601          *
2602          * The SMB protocol does NOT really need to go OTW here
2603          * thanks to the expressive NTCREATE disposition values.
2604          * Unfortunately, to do Unix access checks correctly,
2605          * we need to know if the object already exists.
2606          * When the object does not exist, we need VWRITE on
2607          * the directory.  Note: smbfslookup() checks VEXEC.
2608          */
2609         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2610         if (error == 0) {
2611                 /*
2612                  * The file already exists.  Error?
2613                  * NB: have a hold from smbfslookup
2614                  */
2615                 if (exclusive == EXCL) {
2616                         error = EEXIST;
2617                         VN_RELE(vp);
2618                         goto out;
2619                 }
2620                 /*
2621                  * Verify requested access.
2622                  */
2623                 error = smbfs_access(vp, mode, 0, cr, ct);
2624                 if (error) {
2625                         VN_RELE(vp);
2626                         goto out;
2627                 }
2628 
2629                 /*
2630                  * Truncate (if requested).
2631                  */
2632                 if ((vattr.va_mask & AT_SIZE) && vp->v_type == VREG) {
2633                         np = VTOSMB(vp);
2634                         /*
2635                          * Check here for large file truncation by
2636                          * LF-unaware process, like ufs_create().
2637                          */
2638                         if (!(lfaware & FOFFMAX)) {
2639                                 mutex_enter(&np->r_statelock);
2640                                 if (np->r_size > MAXOFF32_T)
2641                                         error = EOVERFLOW;
2642                                 mutex_exit(&np->r_statelock);
2643                         }
2644                         if (error) {
2645                                 VN_RELE(vp);
2646                                 goto out;
2647                         }
2648                         vattr.va_mask = AT_SIZE;
2649                         error = smbfssetattr(vp, &vattr, 0, cr);
2650                         if (error) {
2651                                 VN_RELE(vp);
2652                                 goto out;
2653                         }
2654 #ifdef  SMBFS_VNEVENT
2655                         /* Existing file was truncated */
2656                         vnevent_create(vp, ct);
2657 #endif
2658                         /* invalidate pages done in smbfssetattr() */
2659                 }
2660                 /* Success! */
2661                 *vpp = vp;
2662                 goto out;
2663         }
2664 
2665         /*
2666          * The file did not exist.  Need VWRITE in the directory.
2667          */
2668         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
2669         if (error)
2670                 goto out;
2671 
2672         /*
2673          * Now things get tricky.  We also need to check the
2674          * requested open mode against the file we may create.
2675          * See comments at smbfs_access_rwx
2676          */
2677         error = smbfs_access_rwx(vfsp, VREG, mode, cr);
2678         if (error)
2679                 goto out;
2680 
2681         /*
2682          * Now the code derived from Darwin,
2683          * but with greater use of NT_CREATE
2684          * disposition options.  Much changed.
2685          *
2686          * Create (or open) a new child node.
2687          * Note we handled "." and ".." above.
2688          */
2689 
2690         if (exclusive == EXCL)
2691                 disp = NTCREATEX_DISP_CREATE;
2692         else {
2693                 /* Truncate regular files if requested. */
2694                 if ((va->va_type == VREG) &&
2695                     (va->va_mask & AT_SIZE) &&
2696                     (va->va_size == 0))
2697                         disp = NTCREATEX_DISP_OVERWRITE_IF;
2698                 else
2699                         disp = NTCREATEX_DISP_OPEN_IF;
2700         }
2701         xattr = (dnp->n_flag & N_XATTR) ? 1 : 0;
2702         error = smbfs_smb_create(dnp,
2703             name, nmlen, xattr,
2704             disp, &scred, &fid);
2705         if (error)
2706                 goto out;
2707 
2708         /*
2709          * Should use the fid to get/set the size
2710          * while we have it opened here.  See above.
2711          */
2712         smbfs_smb_close(fid);
2713 
2714         /*
2715          * In the open case, the name may differ a little
2716          * from what we passed to create (case, etc.)
2717          * so call lookup to get the (opened) name.
2718          *
2719          * XXX: Could avoid this extra lookup if the
2720          * "createact" result from NT_CREATE says we
2721          * created the object.
2722          */
2723         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
2724         if (error)
2725                 goto out;
2726 
2727         /* update attr and directory cache */
2728         smbfs_attr_touchdir(dnp);
2729 
2730         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
2731         if (error)
2732                 goto out;
2733 
2734         /* Success! */
2735         *vpp = vp;
2736         error = 0;
2737 
2738 out:
2739         smb_credrele(&scred);
2740         smbfs_rw_exit(&dnp->r_rwlock);
2741         if (name != nm)
2742                 smbfs_name_free(name, nmlen);
2743         return (error);
2744 }
2745 
2746 /*
2747  * XXX
2748  * This op should support the new FIGNORECASE flag for case-insensitive
2749  * lookups, per PSARC 2007/244.
2750  */
2751 /* ARGSUSED */
2752 static int
2753 smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
2754         int flags)
2755 {
2756         struct smb_cred scred;
2757         vnode_t         *vp = NULL;
2758         smbnode_t       *dnp = VTOSMB(dvp);
2759         smbmntinfo_t    *smi = VTOSMI(dvp);
2760         int             error;
2761 
2762         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
2763                 return (EPERM);
2764 
2765         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2766                 return (EIO);
2767 
2768         /*
2769          * Verify access to the dirctory.
2770          */
2771         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
2772         if (error)
2773                 return (error);
2774 
2775         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
2776                 return (EINTR);
2777         smb_credinit(&scred, cr);
2778 
2779         /* Lookup the file to remove. */
2780         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
2781         if (error != 0)
2782                 goto out;
2783 
2784         /* Don't allow unlink of a directory. */
2785         if (vp->v_type == VDIR) {
2786                 error = EPERM;
2787                 goto out;
2788         }
2789 
2790         /*
2791          * Do the real remove work
2792          */
2793         error = smbfsremove(dvp, vp, &scred, flags);
2794         if (error != 0)
2795                 goto out;
2796 
2797 #ifdef  SMBFS_VNEVENT
2798         vnevent_remove(vp, dvp, nm, ct);
2799 #endif
2800 
2801 out:
2802         if (vp != NULL)
2803                 VN_RELE(vp);
2804 
2805         smb_credrele(&scred);
2806         smbfs_rw_exit(&dnp->r_rwlock);
2807 
2808         return (error);
2809 }
2810 
2811 /*
2812  * smbfsremove does the real work of removing in SMBFS
2813  * Caller has done dir access checks etc.
2814  *
2815  * The normal way to delete a file over SMB is open it (with DELETE access),
2816  * set the "delete-on-close" flag, and close the file.  The problem for Unix
2817  * applications is that they expect the file name to be gone once the unlink
2818  * completes, and the SMB server does not actually delete the file until ALL
2819  * opens of that file are closed.  We can't assume our open handles are the
2820  * only open handles on a file we're deleting, so to be safe we'll try to
2821  * rename the file to a temporary name and then set delete-on-close.  If we
2822  * fail to set delete-on-close (i.e. because other opens prevent it) then
2823  * undo the changes we made and give up with EBUSY.  Note that we might have
2824  * permission to delete a file but lack permission to rename, so we want to
2825  * continue in cases where rename fails.  As an optimization, only do the
2826  * rename when we have the file open.
2827  *
2828  * This is similar to what NFS does when deleting a file that has local opens,
2829  * but thanks to SMB delete-on-close, we don't need to keep track of when the
2830  * last local open goes away and send a delete.  The server does that for us.
2831  */
2832 /* ARGSUSED */
2833 static int
2834 smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
2835     int flags)
2836 {
2837         smbnode_t       *dnp = VTOSMB(dvp);
2838         smbnode_t       *np = VTOSMB(vp);
2839         smbmntinfo_t    *smi = np->n_mount;
2840         char            *tmpname = NULL;
2841         int             tnlen;
2842         int             error;
2843         smb_fh_t        *fid = NULL;
2844         boolean_t       renamed = B_FALSE;
2845 
2846         /*
2847          * The dvp RWlock must be held as writer.
2848          */
2849         ASSERT(dnp->r_rwlock.owner == curthread);
2850 
2851         /*
2852          * We need to flush any dirty pages which happen to
2853          * be hanging around before removing the file.  This
2854          * shouldn't happen very often and mostly on file
2855          * systems mounted "nocto".
2856          */
2857         if (vn_has_cached_data(vp) &&
2858             ((np->r_flags & RDIRTY) || np->r_count > 0)) {
2859                 error = smbfs_putpage(vp, (offset_t)0, 0, 0,
2860                     scred->scr_cred, NULL);
2861                 if (error && (error == ENOSPC || error == EDQUOT)) {
2862                         mutex_enter(&np->r_statelock);
2863                         if (!np->r_error)
2864                                 np->r_error = error;
2865                         mutex_exit(&np->r_statelock);
2866                 }
2867         }
2868 
2869         /*
2870          * Get a file handle with delete access.
2871          * Close this FID before return.
2872          */
2873         error = smbfs_smb_tmpopen(np, STD_RIGHT_DELETE_ACCESS,
2874             scred, &fid);
2875         if (error) {
2876                 SMBVDEBUG("error %d opening %s\n",
2877                     error, np->n_rpath);
2878                 goto out;
2879         }
2880         ASSERT(fid != NULL);
2881 
2882         /*
2883          * If we have the file open, try to rename it to a temporary name.
2884          * If we can't rename, continue on and try setting DoC anyway.
2885          * Unnecessary for directories.
2886          */
2887         if (vp->v_type != VDIR && vp->v_count > 1 && np->n_fidrefs > 0) {
2888                 tmpname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2889                 tnlen = smbfs_newname(tmpname, MAXNAMELEN);
2890                 error = smbfs_smb_rename(dnp, np, dnp, tmpname, tnlen,
2891                     fid, scred);
2892                 if (error != 0) {
2893                         SMBVDEBUG("error %d renaming %s -> %s\n",
2894                             error, np->n_rpath, tmpname);
2895                         /* Keep going without the rename. */
2896                 } else {
2897                         renamed = B_TRUE;
2898                 }
2899         }
2900 
2901         /*
2902          * Mark the file as delete-on-close.  If we can't,
2903          * undo what we did and err out.
2904          */
2905         error = smbfs_smb_setdisp(smi->smi_share, fid, 1, scred);
2906         if (error != 0) {
2907                 SMBVDEBUG("error %d setting DoC on %s\n",
2908                     error, np->n_rpath);
2909                 /*
2910                  * Failed to set DoC. If we renamed, undo that.
2911                  * Need np->n_rpath relative to parent (dnp).
2912                  * Use parent path name length plus one for
2913                  * the separator ('/' or ':')
2914                  */
2915                 if (renamed) {
2916                         char *oldname;
2917                         int oldnlen;
2918                         int err2;
2919 
2920                         oldname = np->n_rpath + (dnp->n_rplen + 1);
2921                         oldnlen = np->n_rplen - (dnp->n_rplen + 1);
2922                         err2 = smbfs_smb_rename(dnp, np, dnp, oldname, oldnlen,
2923                             fid, scred);
2924                         SMBVDEBUG("error %d un-renaming %s -> %s\n",
2925                             err2, tmpname, np->n_rpath);
2926                 }
2927                 error = EBUSY;
2928                 goto out;
2929         }
2930         /* Done! */
2931         smbfs_attrcache_remove(np);
2932         smbfs_attrcache_prune(np);
2933 
2934 out:
2935         if (tmpname != NULL)
2936                 kmem_free(tmpname, MAXNAMELEN);
2937         if (fid != NULL)
2938                 smbfs_smb_tmpclose(np, fid);
2939 
2940         if (error == 0) {
2941                 /* Keep lookup from finding this node anymore. */
2942                 smbfs_rmhash(np);
2943         }
2944 
2945         return (error);
2946 }
2947 
2948 
2949 /* ARGSUSED */
2950 static int
2951 smbfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
2952         caller_context_t *ct, int flags)
2953 {
2954         /* Not yet... */
2955         return (ENOSYS);
2956 }
2957 
2958 
2959 /*
2960  * XXX
2961  * This op should support the new FIGNORECASE flag for case-insensitive
2962  * lookups, per PSARC 2007/244.
2963  */
2964 /* ARGSUSED */
2965 static int
2966 smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
2967         caller_context_t *ct, int flags)
2968 {
2969         struct smb_cred scred;
2970         smbnode_t       *odnp = VTOSMB(odvp);
2971         smbnode_t       *ndnp = VTOSMB(ndvp);
2972         vnode_t         *ovp;
2973         int error;
2974 
2975         if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone ||
2976             curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone)
2977                 return (EPERM);
2978 
2979         if (VTOSMI(odvp)->smi_flags & SMI_DEAD ||
2980             VTOSMI(ndvp)->smi_flags & SMI_DEAD ||
2981             odvp->v_vfsp->vfs_flag & VFS_UNMOUNTED ||
2982             ndvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
2983                 return (EIO);
2984 
2985         if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
2986             strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
2987                 return (EINVAL);
2988 
2989         /*
2990          * Check that everything is on the same filesystem.
2991          * vn_rename checks the fsid's, but in case we don't
2992          * fill those in correctly, check here too.
2993          */
2994         if (odvp->v_vfsp != ndvp->v_vfsp)
2995                 return (EXDEV);
2996 
2997         /*
2998          * Need write access on source and target.
2999          * Server takes care of most checks.
3000          */
3001         error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
3002         if (error)
3003                 return (error);
3004         if (odvp != ndvp) {
3005                 error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
3006                 if (error)
3007                         return (error);
3008         }
3009 
3010         /*
3011          * Need to lock both old/new dirs as writer.
3012          *
3013          * Avoid deadlock here on old vs new directory nodes
3014          * by always taking the locks in order of address.
3015          * The order is arbitrary, but must be consistent.
3016          */
3017         if (odnp < ndnp) {
3018                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
3019                     SMBINTR(odvp)))
3020                         return (EINTR);
3021                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
3022                     SMBINTR(ndvp))) {
3023                         smbfs_rw_exit(&odnp->r_rwlock);
3024                         return (EINTR);
3025                 }
3026         } else {
3027                 if (smbfs_rw_enter_sig(&ndnp->r_rwlock, RW_WRITER,
3028                     SMBINTR(ndvp)))
3029                         return (EINTR);
3030                 if (smbfs_rw_enter_sig(&odnp->r_rwlock, RW_WRITER,
3031                     SMBINTR(odvp))) {
3032                         smbfs_rw_exit(&ndnp->r_rwlock);
3033                         return (EINTR);
3034                 }
3035         }
3036         smb_credinit(&scred, cr);
3037 
3038         /* Lookup the "old" name */
3039         error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
3040         if (error == 0) {
3041                 /*
3042                  * Do the real rename work
3043                  */
3044                 error = smbfsrename(odvp, ovp, ndvp, nnm, &scred, flags);
3045                 VN_RELE(ovp);
3046         }
3047 
3048         smb_credrele(&scred);
3049         smbfs_rw_exit(&odnp->r_rwlock);
3050         smbfs_rw_exit(&ndnp->r_rwlock);
3051 
3052         return (error);
3053 }
3054 
3055 /*
3056  * smbfsrename does the real work of renaming in SMBFS
3057  * Caller has done dir access checks etc.
3058  */
3059 /* ARGSUSED */
3060 static int
3061 smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp, char *nnm,
3062     struct smb_cred *scred, int flags)
3063 {
3064         smbnode_t       *odnp = VTOSMB(odvp);
3065         smbnode_t       *onp = VTOSMB(ovp);
3066         smbnode_t       *ndnp = VTOSMB(ndvp);
3067         vnode_t         *nvp = NULL;
3068         int             error;
3069         int             nvp_locked = 0;
3070         smb_fh_t        *fid = NULL;
3071 
3072         /* Things our caller should have checked. */
3073         ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
3074         ASSERT(odvp->v_vfsp == ndvp->v_vfsp);
3075         ASSERT(odnp->r_rwlock.owner == curthread);
3076         ASSERT(ndnp->r_rwlock.owner == curthread);
3077 
3078         /*
3079          * Lookup the target file.  If it exists, it needs to be
3080          * checked to see whether it is a mount point and whether
3081          * it is active (open).
3082          */
3083         error = smbfslookup(ndvp, nnm, &nvp, scred->scr_cred, 0, NULL);
3084         if (!error) {
3085                 /*
3086                  * Target (nvp) already exists.  Check that it
3087                  * has the same type as the source.  The server
3088                  * will check this also, (and more reliably) but
3089                  * this lets us return the correct error codes.
3090                  */
3091                 if (ovp->v_type == VDIR) {
3092                         if (nvp->v_type != VDIR) {
3093                                 error = ENOTDIR;
3094                                 goto out;
3095                         }
3096                 } else {
3097                         if (nvp->v_type == VDIR) {
3098                                 error = EISDIR;
3099                                 goto out;
3100                         }
3101                 }
3102 
3103                 /*
3104                  * POSIX dictates that when the source and target
3105                  * entries refer to the same file object, rename
3106                  * must do nothing and exit without error.
3107                  */
3108                 if (ovp == nvp) {
3109                         error = 0;
3110                         goto out;
3111                 }
3112 
3113                 /*
3114                  * Also must ensure the target is not a mount point,
3115                  * and keep mount/umount away until we're done.
3116                  */
3117                 if (vn_vfsrlock(nvp)) {
3118                         error = EBUSY;
3119                         goto out;
3120                 }
3121                 nvp_locked = 1;
3122                 if (vn_mountedvfs(nvp) != NULL) {
3123                         error = EBUSY;
3124                         goto out;
3125                 }
3126 
3127                 /*
3128                  * CIFS may give a SHARING_VIOLATION error when
3129                  * trying to rename onto an exising object,
3130                  * so try to remove the target first.
3131                  * (Only for files, not directories.)
3132                  */
3133                 if (nvp->v_type == VDIR) {
3134                         error = EEXIST;
3135                         goto out;
3136                 }
3137                 error = smbfsremove(ndvp, nvp, scred, flags);
3138                 if (error != 0)
3139                         goto out;
3140 
3141                 /*
3142                  * OK, removed the target file.  Continue as if
3143                  * lookup target had failed (nvp == NULL).
3144                  */
3145                 vn_vfsunlock(nvp);
3146                 nvp_locked = 0;
3147                 VN_RELE(nvp);
3148                 nvp = NULL;
3149         } /* nvp */
3150 
3151         /*
3152          * Get a file handle with delete access.
3153          * Close this FID before return.
3154          */
3155         error = smbfs_smb_tmpopen(onp, STD_RIGHT_DELETE_ACCESS,
3156             scred, &fid);
3157         if (error) {
3158                 SMBVDEBUG("error %d opening %s\n",
3159                     error, onp->n_rpath);
3160                 goto out;
3161         }
3162 
3163         smbfs_attrcache_remove(onp);
3164         error = smbfs_smb_rename(odnp, onp, ndnp, nnm, strlen(nnm),
3165             fid, scred);
3166 
3167         smbfs_smb_tmpclose(onp, fid);
3168 
3169         /*
3170          * If the old name should no longer exist,
3171          * discard any cached attributes under it.
3172          */
3173         if (error == 0) {
3174                 smbfs_attrcache_prune(onp);
3175                 /* SMBFS_VNEVENT... */
3176         }
3177 
3178 out:
3179         if (nvp) {
3180                 if (nvp_locked)
3181                         vn_vfsunlock(nvp);
3182                 VN_RELE(nvp);
3183         }
3184 
3185         return (error);
3186 }
3187 
3188 /*
3189  * XXX
3190  * vsecattr_t is new to build 77, and we need to eventually support
3191  * it in order to create an ACL when an object is created.
3192  *
3193  * This op should support the new FIGNORECASE flag for case-insensitive
3194  * lookups, per PSARC 2007/244.
3195  */
3196 /* ARGSUSED */
3197 static int
3198 smbfs_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
3199         cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
3200 {
3201         vnode_t         *vp;
3202         struct smbnode  *dnp = VTOSMB(dvp);
3203         struct smbmntinfo *smi = VTOSMI(dvp);
3204         struct smb_cred scred;
3205         struct smbfattr fattr;
3206         const char              *name = (const char *) nm;
3207         int             nmlen = strlen(name);
3208         int             error;
3209 
3210         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3211                 return (EPERM);
3212 
3213         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3214                 return (EIO);
3215 
3216         if ((nmlen == 1 && name[0] == '.') ||
3217             (nmlen == 2 && name[0] == '.' && name[1] == '.'))
3218                 return (EEXIST);
3219 
3220         /* Only plain files are allowed in V_XATTRDIR. */
3221         if (dvp->v_flag & V_XATTRDIR)
3222                 return (EINVAL);
3223 
3224         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3225                 return (EINTR);
3226         smb_credinit(&scred, cr);
3227 
3228         /*
3229          * Require write access in the containing directory.
3230          */
3231         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
3232         if (error)
3233                 goto out;
3234 
3235         error = smbfs_smb_mkdir(dnp, name, nmlen, &scred);
3236         if (error)
3237                 goto out;
3238 
3239         error = smbfs_smb_lookup(dnp, &name, &nmlen, &fattr, &scred);
3240         if (error)
3241                 goto out;
3242 
3243         smbfs_attr_touchdir(dnp);
3244 
3245         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
3246         if (error)
3247                 goto out;
3248 
3249         /* Success! */
3250         *vpp = vp;
3251         error = 0;
3252 out:
3253         smb_credrele(&scred);
3254         smbfs_rw_exit(&dnp->r_rwlock);
3255 
3256         if (name != nm)
3257                 smbfs_name_free(name, nmlen);
3258 
3259         return (error);
3260 }
3261 
3262 /*
3263  * XXX
3264  * This op should support the new FIGNORECASE flag for case-insensitive
3265  * lookups, per PSARC 2007/244.
3266  */
3267 /* ARGSUSED */
3268 static int
3269 smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
3270         caller_context_t *ct, int flags)
3271 {
3272         struct smb_cred scred;
3273         vnode_t         *vp = NULL;
3274         int             vp_locked = 0;
3275         struct smbmntinfo *smi = VTOSMI(dvp);
3276         struct smbnode  *dnp = VTOSMB(dvp);
3277         struct smbnode  *np;
3278         int             error;
3279 
3280         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3281                 return (EPERM);
3282 
3283         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3284                 return (EIO);
3285 
3286         /*
3287          * Verify access to the dirctory.
3288          */
3289         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
3290         if (error)
3291                 return (error);
3292 
3293         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
3294                 return (EINTR);
3295         smb_credinit(&scred, cr);
3296 
3297         /*
3298          * First lookup the entry to be removed.
3299          */
3300         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
3301         if (error)
3302                 goto out;
3303         np = VTOSMB(vp);
3304 
3305         /*
3306          * Disallow rmdir of "." or current dir, or the FS root.
3307          * Also make sure it's a directory, not a mount point,
3308          * and lock to keep mount/umount away until we're done.
3309          */
3310         if ((vp == dvp) || (vp == cdir) || (vp->v_flag & VROOT)) {
3311                 error = EINVAL;
3312                 goto out;
3313         }
3314         if (vp->v_type != VDIR) {
3315                 error = ENOTDIR;
3316                 goto out;
3317         }
3318         if (vn_vfsrlock(vp)) {
3319                 error = EBUSY;
3320                 goto out;
3321         }
3322         vp_locked = 1;
3323         if (vn_mountedvfs(vp) != NULL) {
3324                 error = EBUSY;
3325                 goto out;
3326         }
3327 
3328         /*
3329          * Do the real rmdir work
3330          */
3331         error = smbfsremove(dvp, vp, &scred, flags);
3332         if (error)
3333                 goto out;
3334 
3335 #ifdef  SMBFS_VNEVENT
3336         vnevent_rmdir(vp, dvp, nm, ct);
3337 #endif
3338 
3339         mutex_enter(&np->r_statelock);
3340         dnp->n_flag |= NMODIFIED;
3341         mutex_exit(&np->r_statelock);
3342         smbfs_attr_touchdir(dnp);
3343         smbfs_rmhash(np);
3344 
3345 out:
3346         if (vp) {
3347                 if (vp_locked)
3348                         vn_vfsunlock(vp);
3349                 VN_RELE(vp);
3350         }
3351         smb_credrele(&scred);
3352         smbfs_rw_exit(&dnp->r_rwlock);
3353 
3354         return (error);
3355 }
3356 
3357 
3358 /* ARGSUSED */
3359 static int
3360 smbfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
3361         caller_context_t *ct, int flags)
3362 {
3363         /* Not yet... */
3364         return (ENOSYS);
3365 }
3366 
3367 
3368 /* ARGSUSED */
3369 static int
3370 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
3371         caller_context_t *ct, int flags)
3372 {
3373         struct smbnode  *np = VTOSMB(vp);
3374         int             error = 0;
3375         smbmntinfo_t    *smi;
3376 
3377         smi = VTOSMI(vp);
3378 
3379         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3380                 return (EIO);
3381 
3382         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3383                 return (EIO);
3384 
3385         /*
3386          * Require read access in the directory.
3387          */
3388         error = smbfs_access(vp, VREAD, 0, cr, ct);
3389         if (error)
3390                 return (error);
3391 
3392         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
3393 
3394         /*
3395          * Todo readdir cache here
3396          *
3397          * I am serializing the entire readdir opreation
3398          * now since we have not yet implemented readdir
3399          * cache. This fix needs to be revisited once
3400          * we implement readdir cache.
3401          */
3402         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp)))
3403                 return (EINTR);
3404 
3405         error = smbfs_readvdir(vp, uiop, cr, eofp, ct);
3406 
3407         smbfs_rw_exit(&np->r_lkserlock);
3408 
3409         return (error);
3410 }
3411 
3412 /* ARGSUSED */
3413 static int
3414 smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
3415         caller_context_t *ct)
3416 {
3417         /*
3418          * Note: "limit" tells the SMB-level FindFirst/FindNext
3419          * functions how many directory entries to request in
3420          * each OtW call.  It needs to be large enough so that
3421          * we don't make lots of tiny OtW requests, but there's
3422          * no point making it larger than the maximum number of
3423          * OtW entries that would fit in a maximum sized trans2
3424          * response (64k / 48).  Beyond that, it's just tuning.
3425          * WinNT used 512, Win2k used 1366.  We use 1000.
3426          */
3427         static const int limit = 1000;
3428         /* Largest possible dirent size. */
3429         static const size_t dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN);
3430         struct smb_cred scred;
3431         vnode_t         *newvp;
3432         struct smbnode  *np = VTOSMB(vp);
3433         struct smbfs_fctx *ctx;
3434         struct dirent64 *dp;
3435         ssize_t         save_resid;
3436         offset_t        save_offset; /* 64 bits */
3437         int             offset; /* yes, 32 bits */
3438         int             nmlen, error;
3439         ushort_t        reclen;
3440 
3441         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);
3442 
3443         /* Make sure we serialize for n_dirseq use. */
3444         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));
3445 
3446         /*
3447          * Make sure smbfs_open filled in n_dirseq
3448          */
3449         if (np->n_dirseq == NULL)
3450                 return (EBADF);
3451 
3452         /* Check for overflow of (32-bit) directory offset. */
3453         if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX ||
3454             (uio->uio_loffset + uio->uio_resid) > INT32_MAX)
3455                 return (EINVAL);
3456 
3457         /* Require space for at least one dirent. */
3458         if (uio->uio_resid < dbufsiz)
3459                 return (EINVAL);
3460 
3461         SMBVDEBUG("dirname='%s'\n", np->n_rpath);
3462         smb_credinit(&scred, cr);
3463         dp = kmem_alloc(dbufsiz, KM_SLEEP);
3464 
3465         save_resid = uio->uio_resid;
3466         save_offset = uio->uio_loffset;
3467         offset = uio->uio_offset;
3468         SMBVDEBUG("in: offset=%d, resid=%d\n",
3469             (int)uio->uio_offset, (int)uio->uio_resid);
3470         error = 0;
3471 
3472         /*
3473          * Generate the "." and ".." entries here so we can
3474          * (1) make sure they appear (but only once), and
3475          * (2) deal with getting their I numbers which the
3476          * findnext below does only for normal names.
3477          */
3478         while (offset < FIRST_DIROFS) {
3479                 /*
3480                  * Tricky bit filling in the first two:
3481                  * offset 0 is ".", offset 1 is ".."
3482                  * so strlen of these is offset+1.
3483                  */
3484                 reclen = DIRENT64_RECLEN(offset + 1);
3485                 if (uio->uio_resid < reclen)
3486                         goto out;
3487                 bzero(dp, reclen);
3488                 dp->d_reclen = reclen;
3489                 dp->d_name[0] = '.';
3490                 dp->d_name[1] = '.';
3491                 dp->d_name[offset + 1] = '\0';
3492                 /*
3493                  * Want the real I-numbers for the "." and ".."
3494                  * entries.  For these two names, we know that
3495                  * smbfslookup can get the nodes efficiently.
3496                  */
3497                 error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct);
3498                 if (error) {
3499                         dp->d_ino = np->n_ino + offset; /* fiction */
3500                 } else {
3501                         dp->d_ino = VTOSMB(newvp)->n_ino;
3502                         VN_RELE(newvp);
3503                 }
3504                 /*
3505                  * Note: d_off is the offset that a user-level program
3506                  * should seek to for reading the NEXT directory entry.
3507                  * See libc: readdir, telldir, seekdir
3508                  */
3509                 dp->d_off = offset + 1;
3510                 error = uiomove(dp, reclen, UIO_READ, uio);
3511                 if (error)
3512                         goto out;
3513                 /*
3514                  * Note: uiomove updates uio->uio_offset,
3515                  * but we want it to be our "cookie" value,
3516                  * which just counts dirents ignoring size.
3517                  */
3518                 uio->uio_offset = ++offset;
3519         }
3520 
3521         /*
3522          * If there was a backward seek, we have to reopen.
3523          */
3524         if (offset < np->n_dirofs) {
3525                 SMBVDEBUG("Reopening search %d:%d\n",
3526                     offset, np->n_dirofs);
3527                 error = smbfs_smb_findopen(np, "*", 1,
3528                     SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR,
3529                     &scred, &ctx);
3530                 if (error) {
3531                         SMBVDEBUG("can not open search, error = %d", error);
3532                         goto out;
3533                 }
3534                 /* free the old one */
3535                 (void) smbfs_smb_findclose(np->n_dirseq, &scred);
3536                 /* save the new one */
3537                 np->n_dirseq = ctx;
3538                 np->n_dirofs = FIRST_DIROFS;
3539         } else {
3540                 ctx = np->n_dirseq;
3541         }
3542 
3543         /*
3544          * Skip entries before the requested offset.
3545          */
3546         while (np->n_dirofs < offset) {
3547                 error = smbfs_smb_findnext(ctx, limit, &scred);
3548                 if (error != 0)
3549                         goto out;
3550                 np->n_dirofs++;
3551         }
3552 
3553         /*
3554          * While there's room in the caller's buffer:
3555          *      get a directory entry from SMB,
3556          *      convert to a dirent, copyout.
3557          * We stop when there is no longer room for a
3558          * maximum sized dirent because we must decide
3559          * before we know anything about the next entry.
3560          */
3561         while (uio->uio_resid >= dbufsiz) {
3562                 error = smbfs_smb_findnext(ctx, limit, &scred);
3563                 if (error != 0)
3564                         goto out;
3565                 np->n_dirofs++;
3566 
3567                 /* Sanity check the name length. */
3568                 nmlen = ctx->f_nmlen;
3569                 if (nmlen > SMB_MAXFNAMELEN) {
3570                         nmlen = SMB_MAXFNAMELEN;
3571                         SMBVDEBUG("Truncating name: %s\n", ctx->f_name);
3572                 }
3573                 if (smbfs_fastlookup) {
3574                         /* See comment at smbfs_fastlookup above. */
3575                         if (smbfs_nget(vp, ctx->f_name, nmlen,
3576                             &ctx->f_attr, &newvp) == 0)
3577                                 VN_RELE(newvp);
3578                 }
3579 
3580                 reclen = DIRENT64_RECLEN(nmlen);
3581                 bzero(dp, reclen);
3582                 dp->d_reclen = reclen;
3583                 bcopy(ctx->f_name, dp->d_name, nmlen);
3584                 dp->d_name[nmlen] = '\0';
3585                 dp->d_ino = ctx->f_inum;
3586                 dp->d_off = offset + 1;      /* See d_off comment above */
3587                 error = uiomove(dp, reclen, UIO_READ, uio);
3588                 if (error)
3589                         goto out;
3590                 /* See comment re. uio_offset above. */
3591                 uio->uio_offset = ++offset;
3592         }
3593 
3594 out:
3595         /*
3596          * When we come to the end of a directory, the
3597          * SMB-level functions return ENOENT, but the
3598          * caller is not expecting an error return.
3599          *
3600          * Also note that we must delay the call to
3601          * smbfs_smb_findclose(np->n_dirseq, ...)
3602          * until smbfs_close so that all reads at the
3603          * end of the directory will return no data.
3604          */
3605         if (error == ENOENT) {
3606                 error = 0;
3607                 if (eofp)
3608                         *eofp = 1;
3609         }
3610         /*
3611          * If we encountered an error (i.e. "access denied")
3612          * from the FindFirst call, we will have copied out
3613          * the "." and ".." entries leaving offset == 2.
3614          * In that case, restore the original offset/resid
3615          * so the caller gets no data with the error.
3616          */
3617         if (error != 0 && offset == FIRST_DIROFS) {
3618                 uio->uio_loffset = save_offset;
3619                 uio->uio_resid = save_resid;
3620         }
3621         SMBVDEBUG("out: offset=%d, resid=%d\n",
3622             (int)uio->uio_offset, (int)uio->uio_resid);
3623 
3624         kmem_free(dp, dbufsiz);
3625         smb_credrele(&scred);
3626         return (error);
3627 }
3628 
3629 /*
3630  * Here NFS has: nfs3_bio
3631  * See smbfs_bio above.
3632  */
3633 
3634 /* ARGSUSED */
3635 static int
3636 smbfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3637 {
3638         return (ENOSYS);
3639 }
3640 
3641 
3642 /*
3643  * The pair of functions VOP_RWLOCK, VOP_RWUNLOCK
3644  * are optional functions that are called by:
3645  *    getdents, before/after VOP_READDIR
3646  *    pread, before/after ... VOP_READ
3647  *    pwrite, before/after ... VOP_WRITE
3648  *    (other places)
3649  *
3650  * Careful here: None of the above check for any
3651  * error returns from VOP_RWLOCK / VOP_RWUNLOCK!
3652  * In fact, the return value from _rwlock is NOT
3653  * an error code, but V_WRITELOCK_TRUE / _FALSE.
3654  *
3655  * Therefore, it's up to _this_ code to make sure
3656  * the lock state remains balanced, which means
3657  * we can't "bail out" on interrupts, etc.
3658  */
3659 
3660 /* ARGSUSED2 */
3661 static int
3662 smbfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3663 {
3664         smbnode_t       *np = VTOSMB(vp);
3665 
3666         if (!write_lock) {
3667                 (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_READER, FALSE);
3668                 return (V_WRITELOCK_FALSE);
3669         }
3670 
3671 
3672         (void) smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, FALSE);
3673         return (V_WRITELOCK_TRUE);
3674 }
3675 
3676 /* ARGSUSED */
3677 static void
3678 smbfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
3679 {
3680         smbnode_t       *np = VTOSMB(vp);
3681 
3682         smbfs_rw_exit(&np->r_rwlock);
3683 }
3684 
3685 
3686 /* ARGSUSED */
3687 static int
3688 smbfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
3689 {
3690         smbmntinfo_t    *smi;
3691 
3692         smi = VTOSMI(vp);
3693 
3694         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3695                 return (EPERM);
3696 
3697         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3698                 return (EIO);
3699 
3700         /*
3701          * Because we stuff the readdir cookie into the offset field
3702          * someone may attempt to do an lseek with the cookie which
3703          * we want to succeed.
3704          */
3705         if (vp->v_type == VDIR)
3706                 return (0);
3707 
3708         /* Like NFS3, just check for 63-bit overflow. */
3709         if (*noffp < 0)
3710                 return (EINVAL);
3711 
3712         return (0);
3713 }
3714 
3715 /* mmap support ******************************************************** */
3716 
3717 #ifdef  _KERNEL
3718 
3719 #ifdef DEBUG
3720 static int smbfs_lostpage = 0;  /* number of times we lost original page */
3721 #endif
3722 
3723 /*
3724  * Return all the pages from [off..off+len) in file
3725  * Like nfs3_getpage
3726  */
3727 /* ARGSUSED */
3728 static int
3729 smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
3730         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3731         enum seg_rw rw, cred_t *cr, caller_context_t *ct)
3732 {
3733         smbnode_t       *np;
3734         smbmntinfo_t    *smi;
3735         int             error;
3736 
3737         np = VTOSMB(vp);
3738         smi = VTOSMI(vp);
3739 
3740         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3741                 return (EIO);
3742 
3743         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3744                 return (EIO);
3745 
3746         if (vp->v_flag & VNOMAP)
3747                 return (ENOSYS);
3748 
3749         if (protp != NULL)
3750                 *protp = PROT_ALL;
3751 
3752         /*
3753          * Now valididate that the caches are up to date.
3754          */
3755         error = smbfs_validate_caches(vp, cr);
3756         if (error)
3757                 return (error);
3758 
3759 retry:
3760         mutex_enter(&np->r_statelock);
3761 
3762         /*
3763          * Don't create dirty pages faster than they
3764          * can be cleaned ... (etc. see nfs)
3765          *
3766          * Here NFS also tests:
3767          *  (mi->mi_max_threads != 0 &&
3768          *  rp->r_awcount > 2 * mi->mi_max_threads)
3769          */
3770         if (rw == S_CREATE) {
3771                 while (np->r_gcount > 0)
3772                         cv_wait(&np->r_cv, &np->r_statelock);
3773         }
3774 
3775         /*
3776          * If we are getting called as a side effect of a write
3777          * operation the local file size might not be extended yet.
3778          * In this case we want to be able to return pages of zeroes.
3779          */
3780         if (off + len > np->r_size + PAGEOFFSET && seg != segkmap) {
3781                 mutex_exit(&np->r_statelock);
3782                 return (EFAULT);                /* beyond EOF */
3783         }
3784 
3785         mutex_exit(&np->r_statelock);
3786 
3787         error = pvn_getpages(smbfs_getapage, vp, off, len, protp,
3788             pl, plsz, seg, addr, rw, cr);
3789 
3790         switch (error) {
3791         case SMBFS_EOF:
3792                 smbfs_purge_caches(vp, cr);
3793                 goto retry;
3794         case ESTALE:
3795                 /*
3796                  * Here NFS has: PURGE_STALE_FH(error, vp, cr);
3797                  * In-line here as we only use it once.
3798                  */
3799                 mutex_enter(&np->r_statelock);
3800                 np->r_flags |= RSTALE;
3801                 if (!np->r_error)
3802                         np->r_error = (error);
3803                 mutex_exit(&np->r_statelock);
3804                 if (vn_has_cached_data(vp))
3805                         smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
3806                 smbfs_purge_caches(vp, cr);
3807                 break;
3808         default:
3809                 break;
3810         }
3811 
3812         return (error);
3813 }
3814 
3815 /*
3816  * Called from pvn_getpages to get a particular page.
3817  * Like nfs3_getapage
3818  */
3819 /* ARGSUSED */
3820 static int
3821 smbfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
3822         page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
3823         enum seg_rw rw, cred_t *cr)
3824 {
3825         smbnode_t       *np;
3826         smbmntinfo_t   *smi;
3827 
3828         uint_t          bsize;
3829         struct buf      *bp;
3830         page_t          *pp;
3831         u_offset_t      lbn;
3832         u_offset_t      io_off;
3833         u_offset_t      blkoff;
3834         size_t          io_len;
3835         uint_t blksize;
3836         int error;
3837         /* int readahead; */
3838         int readahead_issued = 0;
3839         /* int ra_window; * readahead window */
3840         page_t *pagefound;
3841 
3842         np = VTOSMB(vp);
3843         smi = VTOSMI(vp);
3844 
3845         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
3846                 return (EIO);
3847 
3848         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
3849                 return (EIO);
3850 
3851         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
3852 
3853 reread:
3854         bp = NULL;
3855         pp = NULL;
3856         pagefound = NULL;
3857 
3858         if (pl != NULL)
3859                 pl[0] = NULL;
3860 
3861         error = 0;
3862         lbn = off / bsize;
3863         blkoff = lbn * bsize;
3864 
3865         /*
3866          * NFS queues up readahead work here.
3867          */
3868 
3869 again:
3870         if ((pagefound = page_exists(vp, off)) == NULL) {
3871                 if (pl == NULL) {
3872                         (void) 0; /* Todo: smbfs_async_readahead(); */
3873                 } else if (rw == S_CREATE) {
3874                         /*
3875                          * Block for this page is not allocated, or the offset
3876                          * is beyond the current allocation size, or we're
3877                          * allocating a swap slot and the page was not found,
3878                          * so allocate it and return a zero page.
3879                          */
3880                         if ((pp = page_create_va(vp, off,
3881                             PAGESIZE, PG_WAIT, seg, addr)) == NULL)
3882                                 cmn_err(CE_PANIC, "smbfs_getapage: page_create");
3883                         io_len = PAGESIZE;
3884                         mutex_enter(&np->r_statelock);
3885                         np->r_nextr = off + PAGESIZE;
3886                         mutex_exit(&np->r_statelock);
3887                 } else {
3888                         /*
3889                          * Need to go to server to get a BLOCK, exception to
3890                          * that being while reading at offset = 0 or doing
3891                          * random i/o, in that case read only a PAGE.
3892                          */
3893                         mutex_enter(&np->r_statelock);
3894                         if (blkoff < np->r_size &&
3895                             blkoff + bsize >= np->r_size) {
3896                                 /*
3897                                  * If only a block or less is left in
3898                                  * the file, read all that is remaining.
3899                                  */
3900                                 if (np->r_size <= off) {
3901                                         /*
3902                                          * Trying to access beyond EOF,
3903                                          * set up to get at least one page.
3904                                          */
3905                                         blksize = off + PAGESIZE - blkoff;
3906                                 } else
3907                                         blksize = np->r_size - blkoff;
3908                         } else if ((off == 0) ||
3909                             (off != np->r_nextr && !readahead_issued)) {
3910                                 blksize = PAGESIZE;
3911                                 blkoff = off; /* block = page here */
3912                         } else
3913                                 blksize = bsize;
3914                         mutex_exit(&np->r_statelock);
3915 
3916                         pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
3917                             &io_len, blkoff, blksize, 0);
3918 
3919                         /*
3920                          * Some other thread has entered the page,
3921                          * so just use it.
3922                          */
3923                         if (pp == NULL)
3924                                 goto again;
3925 
3926                         /*
3927                          * Now round the request size up to page boundaries.
3928                          * This ensures that the entire page will be
3929                          * initialized to zeroes if EOF is encountered.
3930                          */
3931                         io_len = ptob(btopr(io_len));
3932 
3933                         bp = pageio_setup(pp, io_len, vp, B_READ);
3934                         ASSERT(bp != NULL);
3935 
3936                         /*
3937                          * pageio_setup should have set b_addr to 0.  This
3938                          * is correct since we want to do I/O on a page
3939                          * boundary.  bp_mapin will use this addr to calculate
3940                          * an offset, and then set b_addr to the kernel virtual
3941                          * address it allocated for us.
3942                          */
3943                         ASSERT(bp->b_un.b_addr == 0);
3944 
3945                         bp->b_edev = 0;
3946                         bp->b_dev = 0;
3947                         bp->b_lblkno = lbtodb(io_off);
3948                         bp->b_file = vp;
3949                         bp->b_offset = (offset_t)off;
3950                         bp_mapin(bp);
3951 
3952                         /*
3953                          * If doing a write beyond what we believe is EOF,
3954                          * don't bother trying to read the pages from the
3955                          * server, we'll just zero the pages here.  We
3956                          * don't check that the rw flag is S_WRITE here
3957                          * because some implementations may attempt a
3958                          * read access to the buffer before copying data.
3959                          */
3960                         mutex_enter(&np->r_statelock);
3961                         if (io_off >= np->r_size && seg == segkmap) {
3962                                 mutex_exit(&np->r_statelock);
3963                                 bzero(bp->b_un.b_addr, io_len);
3964                         } else {
3965                                 mutex_exit(&np->r_statelock);
3966                                 error = smbfs_bio(bp, 0, cr);
3967                         }
3968 
3969                         /*
3970                          * Unmap the buffer before freeing it.
3971                          */
3972                         bp_mapout(bp);
3973                         pageio_done(bp);
3974 
3975                         /* Here NFS3 updates all pp->p_fsdata */
3976 
3977                         if (error == SMBFS_EOF) {
3978                                 /*
3979                                  * If doing a write system call just return
3980                                  * zeroed pages, else user tried to get pages
3981                                  * beyond EOF, return error.  We don't check
3982                                  * that the rw flag is S_WRITE here because
3983                                  * some implementations may attempt a read
3984                                  * access to the buffer before copying data.
3985                                  */
3986                                 if (seg == segkmap)
3987                                         error = 0;
3988                                 else
3989                                         error = EFAULT;
3990                         }
3991 
3992                         if (!readahead_issued && !error) {
3993                                 mutex_enter(&np->r_statelock);
3994                                 np->r_nextr = io_off + io_len;
3995                                 mutex_exit(&np->r_statelock);
3996                         }
3997                 }
3998         }
3999 
4000         if (pl == NULL)
4001                 return (error);
4002 
4003         if (error) {
4004                 if (pp != NULL)
4005                         pvn_read_done(pp, B_ERROR);
4006                 return (error);
4007         }
4008 
4009         if (pagefound) {
4010                 se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
4011 
4012                 /*
4013                  * Page exists in the cache, acquire the appropriate lock.
4014                  * If this fails, start all over again.
4015                  */
4016                 if ((pp = page_lookup(vp, off, se)) == NULL) {
4017 #ifdef DEBUG
4018                         smbfs_lostpage++;
4019 #endif
4020                         goto reread;
4021                 }
4022                 pl[0] = pp;
4023                 pl[1] = NULL;
4024                 return (0);
4025         }
4026 
4027         if (pp != NULL)
4028                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
4029 
4030         return (error);
4031 }
4032 
4033 /*
4034  * Here NFS has: nfs3_readahead
4035  * No read-ahead in smbfs yet.
4036  */
4037 
4038 #endif  // _KERNEL
4039 
4040 /*
4041  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
4042  * If len == 0, do from off to EOF.
4043  *
4044  * The normal cases should be len == 0 && off == 0 (entire vp list),
4045  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
4046  * (from pageout).
4047  *
4048  * Like nfs3_putpage + nfs_putpages
4049  */
4050 /* ARGSUSED */
4051 static int
4052 smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
4053         caller_context_t *ct)
4054 {
4055 #ifdef  _KERNEL
4056         smbnode_t *np;
4057         smbmntinfo_t *smi;
4058         page_t *pp;
4059         u_offset_t eoff;
4060         u_offset_t io_off;
4061         size_t io_len;
4062         int error;
4063         int rdirty;
4064         int err;
4065 
4066         np = VTOSMB(vp);
4067         smi = VTOSMI(vp);
4068 
4069         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4070                 return (EIO);
4071 
4072         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4073                 return (EIO);
4074 
4075         if (vp->v_flag & VNOMAP)
4076                 return (ENOSYS);
4077 
4078         /* Here NFS does rp->r_count (++/--) stuff. */
4079 
4080         /* Beginning of code from nfs_putpages. */
4081 
4082         if (!vn_has_cached_data(vp))
4083                 return (0);
4084 
4085         /*
4086          * If ROUTOFSPACE is set, then all writes turn into B_INVAL
4087          * writes.  B_FORCE is set to force the VM system to actually
4088          * invalidate the pages, even if the i/o failed.  The pages
4089          * need to get invalidated because they can't be written out
4090          * because there isn't any space left on either the server's
4091          * file system or in the user's disk quota.  The B_FREE bit
4092          * is cleared to avoid confusion as to whether this is a
4093          * request to place the page on the freelist or to destroy
4094          * it.
4095          */
4096         if ((np->r_flags & ROUTOFSPACE) ||
4097             (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
4098                 flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
4099 
4100         if (len == 0) {
4101                 /*
4102                  * If doing a full file synchronous operation, then clear
4103                  * the RDIRTY bit.  If a page gets dirtied while the flush
4104                  * is happening, then RDIRTY will get set again.  The
4105                  * RDIRTY bit must get cleared before the flush so that
4106                  * we don't lose this information.
4107                  *
4108                  * NFS has B_ASYNC vs sync stuff here.
4109                  */
4110                 if (off == (u_offset_t)0 &&
4111                     (np->r_flags & RDIRTY)) {
4112                         mutex_enter(&np->r_statelock);
4113                         rdirty = (np->r_flags & RDIRTY);
4114                         np->r_flags &= ~RDIRTY;
4115                         mutex_exit(&np->r_statelock);
4116                 } else
4117                         rdirty = 0;
4118 
4119                 /*
4120                  * Search the entire vp list for pages >= off, and flush
4121                  * the dirty pages.
4122                  */
4123                 error = pvn_vplist_dirty(vp, off, smbfs_putapage,
4124                     flags, cr);
4125 
4126                 /*
4127                  * If an error occurred and the file was marked as dirty
4128                  * before and we aren't forcibly invalidating pages, then
4129                  * reset the RDIRTY flag.
4130                  */
4131                 if (error && rdirty &&
4132                     (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
4133                         mutex_enter(&np->r_statelock);
4134                         np->r_flags |= RDIRTY;
4135                         mutex_exit(&np->r_statelock);
4136                 }
4137         } else {
4138                 /*
4139                  * Do a range from [off...off + len) looking for pages
4140                  * to deal with.
4141                  */
4142                 error = 0;
4143                 io_len = 1; /* quiet warnings */
4144                 eoff = off + len;
4145 
4146                 for (io_off = off; io_off < eoff; io_off += io_len) {
4147                         mutex_enter(&np->r_statelock);
4148                         if (io_off >= np->r_size) {
4149                                 mutex_exit(&np->r_statelock);
4150                                 break;
4151                         }
4152                         mutex_exit(&np->r_statelock);
4153                         /*
4154                          * If we are not invalidating, synchronously
4155                          * freeing or writing pages use the routine
4156                          * page_lookup_nowait() to prevent reclaiming
4157                          * them from the free list.
4158                          */
4159                         if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
4160                                 pp = page_lookup(vp, io_off,
4161                                     (flags & (B_INVAL | B_FREE)) ?
4162                                     SE_EXCL : SE_SHARED);
4163                         } else {
4164                                 pp = page_lookup_nowait(vp, io_off,
4165                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
4166                         }
4167 
4168                         if (pp == NULL || !pvn_getdirty(pp, flags))
4169                                 io_len = PAGESIZE;
4170                         else {
4171                                 err = smbfs_putapage(vp, pp, &io_off,
4172                                     &io_len, flags, cr);
4173                                 if (!error)
4174                                         error = err;
4175                                 /*
4176                                  * "io_off" and "io_len" are returned as
4177                                  * the range of pages we actually wrote.
4178                                  * This allows us to skip ahead more quickly
4179                                  * since several pages may've been dealt
4180                                  * with by this iteration of the loop.
4181                                  */
4182                         }
4183                 }
4184         }
4185 
4186         return (error);
4187 
4188 #else   // _KERNEL
4189         return (ENOSYS);
4190 #endif  // _KERNEL
4191 }
4192 
4193 #ifdef  _KERNEL
4194 
4195 /*
4196  * Write out a single page, possibly klustering adjacent dirty pages.
4197  *
4198  * Like nfs3_putapage / nfs3_sync_putapage
4199  */
4200 static int
4201 smbfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
4202         int flags, cred_t *cr)
4203 {
4204         smbnode_t *np;
4205         u_offset_t io_off;
4206         u_offset_t lbn_off;
4207         u_offset_t lbn;
4208         size_t io_len;
4209         uint_t bsize;
4210         int error;
4211 
4212         np = VTOSMB(vp);
4213 
4214         ASSERT(!vn_is_readonly(vp));
4215 
4216         bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
4217         lbn = pp->p_offset / bsize;
4218         lbn_off = lbn * bsize;
4219 
4220         /*
4221          * Find a kluster that fits in one block, or in
4222          * one page if pages are bigger than blocks.  If
4223          * there is less file space allocated than a whole
4224          * page, we'll shorten the i/o request below.
4225          */
4226         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
4227             roundup(bsize, PAGESIZE), flags);
4228 
4229         /*
4230          * pvn_write_kluster shouldn't have returned a page with offset
4231          * behind the original page we were given.  Verify that.
4232          */
4233         ASSERT((pp->p_offset / bsize) >= lbn);
4234 
4235         /*
4236          * Now pp will have the list of kept dirty pages marked for
4237          * write back.  It will also handle invalidation and freeing
4238          * of pages that are not dirty.  Check for page length rounding
4239          * problems.
4240          */
4241         if (io_off + io_len > lbn_off + bsize) {
4242                 ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
4243                 io_len = lbn_off + bsize - io_off;
4244         }
4245         /*
4246          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4247          * consistent value of r_size. RMODINPROGRESS is set in writerp().
4248          * When RMODINPROGRESS is set it indicates that a uiomove() is in
4249          * progress and the r_size has not been made consistent with the
4250          * new size of the file. When the uiomove() completes the r_size is
4251          * updated and the RMODINPROGRESS flag is cleared.
4252          *
4253          * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
4254          * consistent value of r_size. Without this handshaking, it is
4255          * possible that smbfs_bio() picks  up the old value of r_size
4256          * before the uiomove() in writerp() completes. This will result
4257          * in the write through smbfs_bio() being dropped.
4258          *
4259          * More precisely, there is a window between the time the uiomove()
4260          * completes and the time the r_size is updated. If a VOP_PUTPAGE()
4261          * operation intervenes in this window, the page will be picked up,
4262          * because it is dirty (it will be unlocked, unless it was
4263          * pagecreate'd). When the page is picked up as dirty, the dirty
4264          * bit is reset (pvn_getdirty()). In smbfs_write(), r_size is
4265          * checked. This will still be the old size. Therefore the page will
4266          * not be written out. When segmap_release() calls VOP_PUTPAGE(),
4267          * the page will be found to be clean and the write will be dropped.
4268          */
4269         if (np->r_flags & RMODINPROGRESS) {
4270                 mutex_enter(&np->r_statelock);
4271                 if ((np->r_flags & RMODINPROGRESS) &&
4272                     np->r_modaddr + MAXBSIZE > io_off &&
4273                     np->r_modaddr < io_off + io_len) {
4274                         page_t *plist;
4275                         /*
4276                          * A write is in progress for this region of the file.
4277                          * If we did not detect RMODINPROGRESS here then this
4278                          * path through smbfs_putapage() would eventually go to
4279                          * smbfs_bio() and may not write out all of the data
4280                          * in the pages. We end up losing data. So we decide
4281                          * to set the modified bit on each page in the page
4282                          * list and mark the rnode with RDIRTY. This write
4283                          * will be restarted at some later time.
4284                          */
4285                         plist = pp;
4286                         while (plist != NULL) {
4287                                 pp = plist;
4288                                 page_sub(&plist, pp);
4289                                 hat_setmod(pp);
4290                                 page_io_unlock(pp);
4291                                 page_unlock(pp);
4292                         }
4293                         np->r_flags |= RDIRTY;
4294                         mutex_exit(&np->r_statelock);
4295                         if (offp)
4296                                 *offp = io_off;
4297                         if (lenp)
4298                                 *lenp = io_len;
4299                         return (0);
4300                 }
4301                 mutex_exit(&np->r_statelock);
4302         }
4303 
4304         /*
4305          * NFS handles (flags & B_ASYNC) here...
4306          * (See nfs_async_putapage())
4307          *
4308          * This code section from: nfs3_sync_putapage()
4309          */
4310 
4311         flags |= B_WRITE;
4312 
4313         error = smbfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
4314 
4315         if ((error == ENOSPC || error == EDQUOT || error == EFBIG ||
4316             error == EACCES) &&
4317             (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
4318                 if (!(np->r_flags & ROUTOFSPACE)) {
4319                         mutex_enter(&np->r_statelock);
4320                         np->r_flags |= ROUTOFSPACE;
4321                         mutex_exit(&np->r_statelock);
4322                 }
4323                 flags |= B_ERROR;
4324                 pvn_write_done(pp, flags);
4325                 /*
4326                  * If this was not an async thread, then try again to
4327                  * write out the pages, but this time, also destroy
4328                  * them whether or not the write is successful.  This
4329                  * will prevent memory from filling up with these
4330                  * pages and destroying them is the only alternative
4331                  * if they can't be written out.
4332                  *
4333                  * Don't do this if this is an async thread because
4334                  * when the pages are unlocked in pvn_write_done,
4335                  * some other thread could have come along, locked
4336                  * them, and queued for an async thread.  It would be
4337                  * possible for all of the async threads to be tied
4338                  * up waiting to lock the pages again and they would
4339                  * all already be locked and waiting for an async
4340                  * thread to handle them.  Deadlock.
4341                  */
4342                 if (!(flags & B_ASYNC)) {
4343                         error = smbfs_putpage(vp, io_off, io_len,
4344                             B_INVAL | B_FORCE, cr, NULL);
4345                 }
4346         } else {
4347                 if (error)
4348                         flags |= B_ERROR;
4349                 else if (np->r_flags & ROUTOFSPACE) {
4350                         mutex_enter(&np->r_statelock);
4351                         np->r_flags &= ~ROUTOFSPACE;
4352                         mutex_exit(&np->r_statelock);
4353                 }
4354                 pvn_write_done(pp, flags);
4355         }
4356 
4357         /* Now more code from: nfs3_putapage */
4358 
4359         if (offp)
4360                 *offp = io_off;
4361         if (lenp)
4362                 *lenp = io_len;
4363 
4364         return (error);
4365 }
4366 
4367 #endif  // _KERNEL
4368 
4369 
4370 /*
4371  * NFS has this in nfs_client.c (shared by v2,v3,...)
4372  * We have it here so smbfs_putapage can be file scope.
4373  */
4374 void
4375 smbfs_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr)
4376 {
4377         smbnode_t *np;
4378 
4379         np = VTOSMB(vp);
4380 
4381         mutex_enter(&np->r_statelock);
4382         while (np->r_flags & RTRUNCATE)
4383                 cv_wait(&np->r_cv, &np->r_statelock);
4384         np->r_flags |= RTRUNCATE;
4385 
4386         if (off == (u_offset_t)0) {
4387                 np->r_flags &= ~RDIRTY;
4388                 if (!(np->r_flags & RSTALE))
4389                         np->r_error = 0;
4390         }
4391         /* Here NFSv3 has np->r_truncaddr = off; */
4392         mutex_exit(&np->r_statelock);
4393 
4394 #ifdef  _KERNEL
4395         (void) pvn_vplist_dirty(vp, off, smbfs_putapage,
4396             B_INVAL | B_TRUNC, cr);
4397 #endif  // _KERNEL
4398 
4399         mutex_enter(&np->r_statelock);
4400         np->r_flags &= ~RTRUNCATE;
4401         cv_broadcast(&np->r_cv);
4402         mutex_exit(&np->r_statelock);
4403 }
4404 
4405 #ifdef  _KERNEL
4406 
4407 /* Like nfs3_map */
4408 
4409 /* ARGSUSED */
4410 static int
4411 smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
4412         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4413         cred_t *cr, caller_context_t *ct)
4414 {
4415         segvn_crargs_t  vn_a;
4416         struct vattr    va;
4417         smbnode_t       *np;
4418         smbmntinfo_t    *smi;
4419         int             error;
4420 
4421         np = VTOSMB(vp);
4422         smi = VTOSMI(vp);
4423 
4424         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4425                 return (EIO);
4426 
4427         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4428                 return (EIO);
4429 
4430         if (vp->v_flag & VNOMAP)
4431                 return (ENOSYS);
4432 
4433         if (off < 0 || off + (ssize_t)len < 0)
4434                 return (ENXIO);
4435 
4436         if (vp->v_type != VREG)
4437                 return (ENODEV);
4438 
4439         /*
4440          * NFS does close-to-open consistency stuff here.
4441          * Just get (possibly cached) attributes.
4442          */
4443         va.va_mask = AT_ALL;
4444         if ((error = smbfsgetattr(vp, &va, cr)) != 0)
4445                 return (error);
4446 
4447         /*
4448          * Check to see if the vnode is currently marked as not cachable.
4449          * This means portions of the file are locked (through VOP_FRLOCK).
4450          * In this case the map request must be refused.  We use
4451          * rp->r_lkserlock to avoid a race with concurrent lock requests.
4452          */
4453         /*
4454          * Atomically increment r_inmap after acquiring r_rwlock. The
4455          * idea here is to acquire r_rwlock to block read/write and
4456          * not to protect r_inmap. r_inmap will inform smbfs_read/write()
4457          * that we are in smbfs_map(). Now, r_rwlock is acquired in order
4458          * and we can prevent the deadlock that would have occurred
4459          * when smbfs_addmap() would have acquired it out of order.
4460          *
4461          * Since we are not protecting r_inmap by any lock, we do not
4462          * hold any lock when we decrement it. We atomically decrement
4463          * r_inmap after we release r_lkserlock.  Note that rwlock is
4464          * re-entered as writer in smbfs_addmap (called via as_map).
4465          */
4466 
4467         if (smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, SMBINTR(vp)))
4468                 return (EINTR);
4469         atomic_inc_uint(&np->r_inmap);
4470         smbfs_rw_exit(&np->r_rwlock);
4471 
4472         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp))) {
4473                 atomic_dec_uint(&np->r_inmap);
4474                 return (EINTR);
4475         }
4476 
4477         if (vp->v_flag & VNOCACHE) {
4478                 error = EAGAIN;
4479                 goto done;
4480         }
4481 
4482         /*
4483          * Don't allow concurrent locks and mapping if mandatory locking is
4484          * enabled.
4485          */
4486         if ((flk_has_remote_locks(vp) || smbfs_lm_has_sleep(vp)) &&
4487             MANDLOCK(vp, va.va_mode)) {
4488                 error = EAGAIN;
4489                 goto done;
4490         }
4491 
4492         as_rangelock(as);
4493         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
4494         if (error != 0) {
4495                 as_rangeunlock(as);
4496                 goto done;
4497         }
4498 
4499         vn_a.vp = vp;
4500         vn_a.offset = off;
4501         vn_a.type = (flags & MAP_TYPE);
4502         vn_a.prot = (uchar_t)prot;
4503         vn_a.maxprot = (uchar_t)maxprot;
4504         vn_a.flags = (flags & ~MAP_TYPE);
4505         vn_a.cred = cr;
4506         vn_a.amp = NULL;
4507         vn_a.szc = 0;
4508         vn_a.lgrp_mem_policy_flags = 0;
4509 
4510         error = as_map(as, *addrp, len, segvn_create, &vn_a);
4511         as_rangeunlock(as);
4512 
4513 done:
4514         smbfs_rw_exit(&np->r_lkserlock);
4515         atomic_dec_uint(&np->r_inmap);
4516         return (error);
4517 }
4518 
4519 /*
4520  * This uses addmap/delmap functions to hold the SMB FID open as long as
4521  * there are pages mapped in this as/seg.  Increment the FID refs. when
4522  * the maping count goes from zero to non-zero, and release the FID ref
4523  * when the maping count goes from non-zero to zero.
4524  */
4525 
4526 /* ARGSUSED */
4527 static int
4528 smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4529         size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
4530         cred_t *cr, caller_context_t *ct)
4531 {
4532         smbnode_t *np = VTOSMB(vp);
4533         boolean_t inc_fidrefs = B_FALSE;
4534 
4535         /*
4536          * When r_mapcnt goes from zero to non-zero,
4537          * increment n_fidrefs
4538          */
4539         mutex_enter(&np->r_statelock);
4540         if (np->r_mapcnt == 0)
4541                 inc_fidrefs = B_TRUE;
4542         np->r_mapcnt += btopr(len);
4543         mutex_exit(&np->r_statelock);
4544 
4545         if (inc_fidrefs) {
4546                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4547                 np->n_fidrefs++;
4548                 smbfs_rw_exit(&np->r_lkserlock);
4549         }
4550 
4551         return (0);
4552 }
4553 
4554 /*
4555  * Args passed to smbfs_delmap_async
4556  */
4557 typedef struct smbfs_delmap_args {
4558         taskq_ent_t             dm_tqent;
4559         cred_t                  *dm_cr;
4560         vnode_t                 *dm_vp;
4561         offset_t                dm_off;
4562         caddr_t                 dm_addr;
4563         size_t                  dm_len;
4564         uint_t                  dm_prot;
4565         uint_t                  dm_maxprot;
4566         uint_t                  dm_flags;
4567         boolean_t               dm_rele_fid;
4568 } smbfs_delmap_args_t;
4569 
4570 /*
4571  * Using delmap not only to release the SMB FID (as described above)
4572  * but to flush dirty pages as needed.  Both of those do the actual
4573  * work in an async taskq job to avoid interfering with locks held
4574  * in the VM layer when this is called.
4575  */
4576 
4577 /* ARGSUSED */
4578 static int
4579 smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4580         size_t len, uint_t prot, uint_t maxprot, uint_t flags,
4581         cred_t *cr, caller_context_t *ct)
4582 {
4583         smbnode_t               *np = VTOSMB(vp);
4584         smbmntinfo_t            *smi = VTOSMI(vp);
4585         smbfs_delmap_args_t     *dmapp;
4586 
4587         dmapp = kmem_zalloc(sizeof (*dmapp), KM_SLEEP);
4588 
4589         /*
4590          * The VM layer may segvn_free the seg holding this vnode
4591          * before our callback has a chance run, so take a hold on
4592          * the vnode here and release it in the callback.
4593          * (same for the cred)
4594          */
4595         crhold(cr);
4596         VN_HOLD(vp);
4597 
4598         dmapp->dm_vp = vp;
4599         dmapp->dm_cr = cr;
4600         dmapp->dm_off = off;
4601         dmapp->dm_addr = addr;
4602         dmapp->dm_len = len;
4603         dmapp->dm_prot = prot;
4604         dmapp->dm_maxprot = maxprot;
4605         dmapp->dm_flags = flags;
4606         dmapp->dm_rele_fid = B_FALSE;
4607 
4608         /*
4609          * Go ahead and decrement r_mapcount now, which is
4610          * the primary purpose of this function.
4611          *
4612          * When r_mapcnt goes to zero, we need to call
4613          * smbfs_rele_fid, but can't do that here, so
4614          * set a flag telling the async task to do it.
4615          */
4616         mutex_enter(&np->r_statelock);
4617         np->r_mapcnt -= btopr(len);
4618         ASSERT(np->r_mapcnt >= 0);
4619         if (np->r_mapcnt == 0)
4620                 dmapp->dm_rele_fid = B_TRUE;
4621         mutex_exit(&np->r_statelock);
4622 
4623         taskq_dispatch_ent(smi->smi_taskq, smbfs_delmap_async, dmapp, 0,
4624             &dmapp->dm_tqent);
4625 
4626         return (0);
4627 }
4628 
4629 /*
4630  * Remove some pages from an mmap'd vnode.  Flush any
4631  * dirty pages in the unmapped range.
4632  */
4633 /* ARGSUSED */
4634 static void
4635 smbfs_delmap_async(void *varg)
4636 {
4637         smbfs_delmap_args_t     *dmapp = varg;
4638         cred_t                  *cr;
4639         vnode_t                 *vp;
4640         smbnode_t               *np;
4641         smbmntinfo_t            *smi;
4642 
4643         cr = dmapp->dm_cr;
4644         vp = dmapp->dm_vp;
4645         np = VTOSMB(vp);
4646         smi = VTOSMI(vp);
4647 
4648         /* Decremented r_mapcnt in smbfs_delmap */
4649 
4650         /*
4651          * Initiate a page flush and potential commit if there are
4652          * pages, the file system was not mounted readonly, the segment
4653          * was mapped shared, and the pages themselves were writeable.
4654          *
4655          * mark RDIRTY here, will be used to check if a file is dirty when
4656          * unmount smbfs
4657          */
4658         if (vn_has_cached_data(vp) && !vn_is_readonly(vp) &&
4659             dmapp->dm_flags == MAP_SHARED &&
4660             (dmapp->dm_maxprot & PROT_WRITE) != 0) {
4661                 mutex_enter(&np->r_statelock);
4662                 np->r_flags |= RDIRTY;
4663                 mutex_exit(&np->r_statelock);
4664 
4665                 /*
4666                  * Need to finish the putpage before we
4667                  * close the OtW FID needed for I/O.
4668                  */
4669                 (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len, 0,
4670                     dmapp->dm_cr, NULL);
4671         }
4672 
4673         if ((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO))
4674                 (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len,
4675                     B_INVAL, dmapp->dm_cr, NULL);
4676 
4677         /*
4678          * If r_mapcnt went to zero, drop our FID ref now.
4679          * On the last fidref, this does an OtW close.
4680          */
4681         if (dmapp->dm_rele_fid) {
4682                 struct smb_cred scred;
4683 
4684                 (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
4685                 smb_credinit(&scred, dmapp->dm_cr);
4686 
4687                 smbfs_rele_fid(np, &scred);
4688 
4689                 smb_credrele(&scred);
4690                 smbfs_rw_exit(&np->r_lkserlock);
4691         }
4692 
4693         /* Release holds taken in smbfs_delmap */
4694         VN_RELE(vp);
4695         crfree(cr);
4696 
4697         kmem_free(dmapp, sizeof (*dmapp));
4698 }
4699 
4700 /* No smbfs_pageio() or smbfs_dispose() ops. */
4701 
4702 #endif  // _KERNEL
4703 
4704 /* misc. ******************************************************** */
4705 
4706 
4707 /*
4708  * XXX
4709  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
4710  */
4711 static int
4712 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4713         offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
4714         caller_context_t *ct)
4715 {
4716         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4717                 return (EIO);
4718 
4719         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4720                 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
4721         else
4722                 return (ENOSYS);
4723 }
4724 
4725 /*
4726  * Free storage space associated with the specified vnode.  The portion
4727  * to be freed is specified by bfp->l_start and bfp->l_len (already
4728  * normalized to a "whence" of 0).
4729  *
4730  * Called by fcntl(fd, F_FREESP, lkp) for libc:ftruncate, etc.
4731  */
4732 /* ARGSUSED */
4733 static int
4734 smbfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
4735         offset_t offset, cred_t *cr, caller_context_t *ct)
4736 {
4737         int             error;
4738         smbmntinfo_t    *smi;
4739 
4740         smi = VTOSMI(vp);
4741 
4742         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4743                 return (EIO);
4744 
4745         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4746                 return (EIO);
4747 
4748         /* Caller (fcntl) has checked v_type */
4749         ASSERT(vp->v_type == VREG);
4750         if (cmd != F_FREESP)
4751                 return (EINVAL);
4752 
4753         /*
4754          * Like NFS3, no 32-bit offset checks here.
4755          * Our SMB layer takes care to return EFBIG
4756          * when it has to fallback to a 32-bit call.
4757          */
4758 
4759         error = convoff(vp, bfp, 0, offset);
4760         if (!error) {
4761                 ASSERT(bfp->l_start >= 0);
4762                 if (bfp->l_len == 0) {
4763                         struct vattr va;
4764 
4765                         /*
4766                          * ftruncate should not change the ctime and
4767                          * mtime if we truncate the file to its
4768                          * previous size.
4769                          */
4770                         va.va_mask = AT_SIZE;
4771                         error = smbfsgetattr(vp, &va, cr);
4772                         if (error || va.va_size == bfp->l_start)
4773                                 return (error);
4774                         va.va_mask = AT_SIZE;
4775                         va.va_size = bfp->l_start;
4776                         error = smbfssetattr(vp, &va, 0, cr);
4777                         /* SMBFS_VNEVENT... */
4778                 } else
4779                         error = EINVAL;
4780         }
4781 
4782         return (error);
4783 }
4784 
4785 
4786 /* ARGSUSED */
4787 static int
4788 smbfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
4789 {
4790 
4791         return (ENOSYS);
4792 }
4793 
4794 
4795 /* ARGSUSED */
4796 static int
4797 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4798         caller_context_t *ct)
4799 {
4800         vfs_t *vfs;
4801         smbmntinfo_t *smi;
4802         struct smb_share *ssp;
4803 
4804         vfs = vp->v_vfsp;
4805         smi = VFTOSMI(vfs);
4806 
4807         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4808                 return (EIO);
4809 
4810         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
4811                 return (EIO);
4812 
4813         switch (cmd) {
4814         case _PC_FILESIZEBITS:
4815                 ssp = smi->smi_share;
4816                 if (SSTOVC(ssp)->vc_sopt.sv_caps & SMB_CAP_LARGE_FILES)
4817                         *valp = 64;
4818                 else
4819                         *valp = 32;
4820                 break;
4821 
4822         case _PC_LINK_MAX:
4823                 /* We only ever report one link to an object */
4824                 *valp = 1;
4825                 break;
4826 
4827         case _PC_ACL_ENABLED:
4828                 /*
4829                  * Always indicate that ACLs are enabled and
4830                  * that we support ACE_T format, otherwise
4831                  * libsec will ask for ACLENT_T format data
4832                  * which we don't support.
4833                  */
4834                 *valp = _ACL_ACE_ENABLED;
4835                 break;
4836 
4837         case _PC_SYMLINK_MAX:   /* No symlinks until we do Unix extensions */
4838                 *valp = 0;
4839                 break;
4840 
4841         case _PC_XATTR_EXISTS:
4842                 if (vfs->vfs_flag & VFS_XATTR) {
4843                         *valp = smbfs_xa_exists(vp, cr);
4844                         break;
4845                 }
4846                 return (EINVAL);
4847 
4848         case _PC_SATTR_ENABLED:
4849         case _PC_SATTR_EXISTS:
4850                 *valp = 1;
4851                 break;
4852 
4853         case _PC_TIMESTAMP_RESOLUTION:
4854                 /*
4855                  * Windows times are tenths of microseconds
4856                  * (multiples of 100 nanoseconds).
4857                  */
4858                 *valp = 100L;
4859                 break;
4860 
4861         default:
4862                 return (fs_pathconf(vp, cmd, valp, cr, ct));
4863         }
4864         return (0);
4865 }
4866 
4867 /* ARGSUSED */
4868 static int
4869 smbfs_getsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4870         caller_context_t *ct)
4871 {
4872         vfs_t *vfsp;
4873         smbmntinfo_t *smi;
4874         int     error;
4875         uint_t  mask;
4876 
4877         vfsp = vp->v_vfsp;
4878         smi = VFTOSMI(vfsp);
4879 
4880         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4881                 return (EIO);
4882 
4883         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4884                 return (EIO);
4885 
4886         /*
4887          * Our _pathconf indicates _ACL_ACE_ENABLED,
4888          * so we should only see VSA_ACE, etc here.
4889          * Note: vn_create asks for VSA_DFACLCNT,
4890          * and it expects ENOSYS and empty data.
4891          */
4892         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT |
4893             VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
4894         if (mask == 0)
4895                 return (ENOSYS);
4896 
4897         if (smi->smi_flags & SMI_ACL)
4898                 error = smbfs_acl_getvsa(vp, vsa, flag, cr);
4899         else
4900                 error = ENOSYS;
4901 
4902         if (error == ENOSYS)
4903                 error = fs_fab_acl(vp, vsa, flag, cr, ct);
4904 
4905         return (error);
4906 }
4907 
4908 /* ARGSUSED */
4909 static int
4910 smbfs_setsecattr(vnode_t *vp, vsecattr_t *vsa, int flag, cred_t *cr,
4911         caller_context_t *ct)
4912 {
4913         vfs_t *vfsp;
4914         smbmntinfo_t *smi;
4915         int     error;
4916         uint_t  mask;
4917 
4918         vfsp = vp->v_vfsp;
4919         smi = VFTOSMI(vfsp);
4920 
4921         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
4922                 return (EIO);
4923 
4924         if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
4925                 return (EIO);
4926 
4927         /*
4928          * Our _pathconf indicates _ACL_ACE_ENABLED,
4929          * so we should only see VSA_ACE, etc here.
4930          */
4931         mask = vsa->vsa_mask & (VSA_ACE | VSA_ACECNT);
4932         if (mask == 0)
4933                 return (ENOSYS);
4934 
4935         if (vfsp->vfs_flag & VFS_RDONLY)
4936                 return (EROFS);
4937 
4938         /*
4939          * Allow only the mount owner to do this.
4940          * See comments at smbfs_access_rwx.
4941          */
4942         error = secpolicy_vnode_setdac(cr, smi->smi_uid);
4943         if (error != 0)
4944                 return (error);
4945 
4946         if (smi->smi_flags & SMI_ACL)
4947                 error = smbfs_acl_setvsa(vp, vsa, flag, cr);
4948         else
4949                 error = ENOSYS;
4950 
4951         return (error);
4952 }
4953 
4954 
4955 /*
4956  * XXX
4957  * This op should eventually support PSARC 2007/268.
4958  */
4959 static int
4960 smbfs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
4961         caller_context_t *ct)
4962 {
4963         if (curproc->p_zone != VTOSMI(vp)->smi_zone_ref.zref_zone)
4964                 return (EIO);
4965 
4966         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
4967                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
4968         else
4969                 return (ENOSYS);
4970 }
4971 
4972 
4973 /*
4974  * Most unimplemented ops will return ENOSYS because of fs_nosys().
4975  * The only ops where that won't work are ACCESS (due to open(2)
4976  * failures) and ... (anything else left?)
4977  */
4978 const fs_operation_def_t smbfs_vnodeops_template[] = {
4979         VOPNAME_OPEN,           { .vop_open = smbfs_open },
4980         VOPNAME_CLOSE,          { .vop_close = smbfs_close },
4981         VOPNAME_READ,           { .vop_read = smbfs_read },
4982         VOPNAME_WRITE,          { .vop_write = smbfs_write },
4983         VOPNAME_IOCTL,          { .vop_ioctl = smbfs_ioctl },
4984         VOPNAME_GETATTR,        { .vop_getattr = smbfs_getattr },
4985         VOPNAME_SETATTR,        { .vop_setattr = smbfs_setattr },
4986         VOPNAME_ACCESS,         { .vop_access = smbfs_access },
4987         VOPNAME_LOOKUP,         { .vop_lookup = smbfs_lookup },
4988         VOPNAME_CREATE,         { .vop_create = smbfs_create },
4989         VOPNAME_REMOVE,         { .vop_remove = smbfs_remove },
4990         VOPNAME_LINK,           { .vop_link = smbfs_link },
4991         VOPNAME_RENAME,         { .vop_rename = smbfs_rename },
4992         VOPNAME_MKDIR,          { .vop_mkdir = smbfs_mkdir },
4993         VOPNAME_RMDIR,          { .vop_rmdir = smbfs_rmdir },
4994         VOPNAME_READDIR,        { .vop_readdir = smbfs_readdir },
4995         VOPNAME_SYMLINK,        { .vop_symlink = smbfs_symlink },
4996         VOPNAME_READLINK,       { .vop_readlink = smbfs_readlink },
4997         VOPNAME_FSYNC,          { .vop_fsync = smbfs_fsync },
4998         VOPNAME_INACTIVE,       { .vop_inactive = smbfs_inactive },
4999         VOPNAME_FID,            { .vop_fid = smbfs_fid },
5000         VOPNAME_RWLOCK,         { .vop_rwlock = smbfs_rwlock },
5001         VOPNAME_RWUNLOCK,       { .vop_rwunlock = smbfs_rwunlock },
5002         VOPNAME_SEEK,           { .vop_seek = smbfs_seek },
5003         VOPNAME_FRLOCK,         { .vop_frlock = smbfs_frlock },
5004         VOPNAME_SPACE,          { .vop_space = smbfs_space },
5005         VOPNAME_REALVP,         { .vop_realvp = smbfs_realvp },
5006 #ifdef  _KERNEL
5007         VOPNAME_GETPAGE,        { .vop_getpage = smbfs_getpage },
5008         VOPNAME_PUTPAGE,        { .vop_putpage = smbfs_putpage },
5009         VOPNAME_MAP,            { .vop_map = smbfs_map },
5010         VOPNAME_ADDMAP,         { .vop_addmap = smbfs_addmap },
5011         VOPNAME_DELMAP,         { .vop_delmap = smbfs_delmap },
5012 #endif  // _KERNEL
5013         VOPNAME_PATHCONF,       { .vop_pathconf = smbfs_pathconf },
5014         VOPNAME_SETSECATTR,     { .vop_setsecattr = smbfs_setsecattr },
5015         VOPNAME_GETSECATTR,     { .vop_getsecattr = smbfs_getsecattr },
5016         VOPNAME_SHRLOCK,        { .vop_shrlock = smbfs_shrlock },
5017 #ifdef  SMBFS_VNEVENT
5018         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
5019 #endif
5020         { NULL, NULL }
5021 };