big-one Udiff usr/src/uts/common/fs/smbclnt/smbfs/smbfs

Print this page

NEX-14666 Need to provide SMB 2.1 Client
NEX-17187 panic in smbfs_acl_store
NEX-17231 smbfs create xattr files finds wrong file
NEX-17224 smbfs lookup EINVAL should be ENOENT
NEX-17260 SMB1 client fails to list directory after NEX-14666
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Matt Barden <matt.barden@nexenta.com>
Reviewed by: Rick McNeal <rick.mcneal@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Joyce McIntosh <joyce.mcintosh@nexenta.com>
and: (cleanup)
NEX-16818 Add fksmbcl development tool
NEX-17264 SMB client test tp_smbutil_013 fails after NEX-14666
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Matt Barden <matt.barden@nexenta.com>
and: (fix ref leaks)
NEX-16783 Panic in smbfs_delmap_callback (fix leak)
Reviewed by: Evan Layton <evan.layton@nexenta.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
NEX-16783 Panic in smbfs_delmap_callback
Reviewed by: Jean McCormack <jean.mccormack@nexenta.com>
Reviewed by: Dan Fields <dan.fields@nexenta.com>
5404 smbfs needs mmap support
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Jason King <jason.brian.king@gmail.com>
Reviewed by: Andrew Stormont <andyjstormont@gmail.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
2552 smbfs: add support for NFS-like remove
Portions contributed by: Gordon Ross <gordon.w.ross@gmail.com>
Reviewed by: Yuri Pankov <yuripv@yuripv.net>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: C Fraire <cfraire@me.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

@@ -32,12 +32,20 @@
  * $Id: smbfs_vnops.c,v 1.128.36.1 2005/05/27 02:35:28 lindak Exp $
  */
 
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  */
 
+/*
+ * Vnode operations
+ *
+ * This file is similar to nfs3_vnops.c
+ */
+
+#include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cred.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/filio.h>

@@ -48,11 +56,26 @@
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/vfs_opreg.h>
 #include <sys/policy.h>
+#include <sys/sdt.h>
+#include <sys/taskq_impl.h>
+#include <sys/zone.h>
 
+#ifdef  _KERNEL
+#include <sys/vmsystm.h>        // for desfree
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/page.h>
+#include <vm/pvn.h>
+#include <vm/seg.h>
+#include <vm/seg_map.h>
+#include <vm/seg_kpm.h>
+#include <vm/seg_vn.h>
+#endif  // _KERNEL
+
 #include <netsmb/smb_osdep.h>
 #include <netsmb/smb.h>
 #include <netsmb/smb_conn.h>
 #include <netsmb/smb_subr.h>

@@ -61,10 +84,14 @@
 #include <smbfs/smbfs_subr.h>
 
 #include <sys/fs/smbfs_ioctl.h>
 #include <fs/fs_subr.h>
 
+#ifndef MAXOFF32_T
+#define MAXOFF32_T      0x7fffffff
+#endif
+
 /*
  * We assign directory offsets like the NFS client, where the
  * offset increments by _one_ after each directory entry.
  * Further, the entries "." and ".." are always at offsets
  * zero and one (respectively) and the "real" entries from

@@ -99,26 +126,59 @@
  * during directory listings, normally avoiding a second
  * OtW attribute fetch just after a readdir.
  */
 int smbfs_fastlookup = 1;
 
+struct vnodeops *smbfs_vnodeops = NULL;
+
 /* local static function defines */
 
 static int      smbfslookup_cache(vnode_t *, char *, int, vnode_t **,
                         cred_t *);
 static int      smbfslookup(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr,
                         int cache_ok, caller_context_t *);
-static int      smbfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm,
-                        cred_t *cr, caller_context_t *);
+static int      smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
+                        int flags);
+static int      smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp,
+                        char *nnm, struct smb_cred *scred, int flags);
 static int      smbfssetattr(vnode_t *, struct vattr *, int, cred_t *);
 static int      smbfs_accessx(void *, int, cred_t *);
 static int      smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
                         caller_context_t *);
+static int      smbfsflush(smbnode_t *, struct smb_cred *);
 static void     smbfs_rele_fid(smbnode_t *, struct smb_cred *);
 static uint32_t xvattr_to_dosattr(smbnode_t *, struct vattr *);
 
+static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
+
+static int      smbfs_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
+                        caller_context_t *);
+#ifdef  _KERNEL
+static int      smbfs_getapage(vnode_t *, u_offset_t, size_t, uint_t *,
+                        page_t *[], size_t, struct seg *, caddr_t,
+                        enum seg_rw, cred_t *);
+static int      smbfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
+                        int, cred_t *);
+static void     smbfs_delmap_async(void *);
+
+static int      smbfs_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int,
+                        cred_t *);
+static int      smbfs_bio(struct buf *, int, cred_t *);
+static int      smbfs_writenp(smbnode_t *np, caddr_t base, int tcount,
+                        struct uio *uiop, int pgcreated);
+#endif  // _KERNEL
+
 /*
+ * Error flags used to pass information about certain special errors
+ * which need to be handled specially.
+ */
+#define SMBFS_EOF                       -98
+
+/* When implementing OtW locks, make this a real function. */
+#define smbfs_lm_has_sleep(vp) 0
+
+/*
  * These are the vnode ops routines which implement the vnode interface to
  * the networked file system.  These routines just take their parameters,
  * make them look networkish by putting the right info into interface structs,
  * and then calling the appropriate remote routine(s) to do the work.
  *

@@ -126,113 +186,12 @@
  * we purge the directory cache relative to that vnode.  This way, the
  * user won't get burned by the cache repeatedly.  See <smbfs/smbnode.h> for
  * more details on smbnode locking.
  */
 
-static int      smbfs_open(vnode_t **, int, cred_t *, caller_context_t *);
-static int      smbfs_close(vnode_t *, int, int, offset_t, cred_t *,
-                        caller_context_t *);
-static int      smbfs_read(vnode_t *, struct uio *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_write(vnode_t *, struct uio *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
-                        caller_context_t *);
-static int      smbfs_getattr(vnode_t *, struct vattr *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_setattr(vnode_t *, struct vattr *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_access(vnode_t *, int, int, cred_t *, caller_context_t *);
-static int      smbfs_fsync(vnode_t *, int, cred_t *, caller_context_t *);
-static void     smbfs_inactive(vnode_t *, cred_t *, caller_context_t *);
-static int      smbfs_lookup(vnode_t *, char *, vnode_t **, struct pathname *,
-                        int, vnode_t *, cred_t *, caller_context_t *,
-                        int *, pathname_t *);
-static int      smbfs_create(vnode_t *, char *, struct vattr *, enum vcexcl,
-                        int, vnode_t **, cred_t *, int, caller_context_t *,
-                        vsecattr_t *);
-static int      smbfs_remove(vnode_t *, char *, cred_t *, caller_context_t *,
-                        int);
-static int      smbfs_rename(vnode_t *, char *, vnode_t *, char *, cred_t *,
-                        caller_context_t *, int);
-static int      smbfs_mkdir(vnode_t *, char *, struct vattr *, vnode_t **,
-                        cred_t *, caller_context_t *, int, vsecattr_t *);
-static int      smbfs_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
-                        caller_context_t *, int);
-static int      smbfs_readdir(vnode_t *, struct uio *, cred_t *, int *,
-                        caller_context_t *, int);
-static int      smbfs_rwlock(vnode_t *, int, caller_context_t *);
-static void     smbfs_rwunlock(vnode_t *, int, caller_context_t *);
-static int      smbfs_seek(vnode_t *, offset_t, offset_t *, caller_context_t *);
-static int      smbfs_frlock(vnode_t *, int, struct flock64 *, int, offset_t,
-                        struct flk_callback *, cred_t *, caller_context_t *);
-static int      smbfs_space(vnode_t *, int, struct flock64 *, int, offset_t,
-                        cred_t *, caller_context_t *);
-static int      smbfs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
-                        caller_context_t *);
-static int      smbfs_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
-                        caller_context_t *);
-static int      smbfs_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
-                        caller_context_t *);
 
-/* Dummy function to use until correct function is ported in */
-int noop_vnodeop() {
-        return (0);
-}
-
-struct vnodeops *smbfs_vnodeops = NULL;
-
 /*
- * Most unimplemented ops will return ENOSYS because of fs_nosys().
- * The only ops where that won't work are ACCESS (due to open(2)
- * failures) and ... (anything else left?)
- */
-const fs_operation_def_t smbfs_vnodeops_template[] = {
-        { VOPNAME_OPEN,         { .vop_open = smbfs_open } },
-        { VOPNAME_CLOSE,        { .vop_close = smbfs_close } },
-        { VOPNAME_READ,         { .vop_read = smbfs_read } },
-        { VOPNAME_WRITE,        { .vop_write = smbfs_write } },
-        { VOPNAME_IOCTL,        { .vop_ioctl = smbfs_ioctl } },
-        { VOPNAME_GETATTR,      { .vop_getattr = smbfs_getattr } },
-        { VOPNAME_SETATTR,      { .vop_setattr = smbfs_setattr } },
-        { VOPNAME_ACCESS,       { .vop_access = smbfs_access } },
-        { VOPNAME_LOOKUP,       { .vop_lookup = smbfs_lookup } },
-        { VOPNAME_CREATE,       { .vop_create = smbfs_create } },
-        { VOPNAME_REMOVE,       { .vop_remove = smbfs_remove } },
-        { VOPNAME_LINK,         { .error = fs_nosys } }, /* smbfs_link, */
-        { VOPNAME_RENAME,       { .vop_rename = smbfs_rename } },
-        { VOPNAME_MKDIR,        { .vop_mkdir = smbfs_mkdir } },
-        { VOPNAME_RMDIR,        { .vop_rmdir = smbfs_rmdir } },
-        { VOPNAME_READDIR,      { .vop_readdir = smbfs_readdir } },
-        { VOPNAME_SYMLINK,      { .error = fs_nosys } }, /* smbfs_symlink, */
-        { VOPNAME_READLINK,     { .error = fs_nosys } }, /* smbfs_readlink, */
-        { VOPNAME_FSYNC,        { .vop_fsync = smbfs_fsync } },
-        { VOPNAME_INACTIVE,     { .vop_inactive = smbfs_inactive } },
-        { VOPNAME_FID,          { .error = fs_nosys } }, /* smbfs_fid, */
-        { VOPNAME_RWLOCK,       { .vop_rwlock = smbfs_rwlock } },
-        { VOPNAME_RWUNLOCK,     { .vop_rwunlock = smbfs_rwunlock } },
-        { VOPNAME_SEEK,         { .vop_seek = smbfs_seek } },
-        { VOPNAME_FRLOCK,       { .vop_frlock = smbfs_frlock } },
-        { VOPNAME_SPACE,        { .vop_space = smbfs_space } },
-        { VOPNAME_REALVP,       { .error = fs_nosys } }, /* smbfs_realvp, */
-        { VOPNAME_GETPAGE,      { .error = fs_nosys } }, /* smbfs_getpage, */
-        { VOPNAME_PUTPAGE,      { .error = fs_nosys } }, /* smbfs_putpage, */
-        { VOPNAME_MAP,          { .error = fs_nosys } }, /* smbfs_map, */
-        { VOPNAME_ADDMAP,       { .error = fs_nosys } }, /* smbfs_addmap, */
-        { VOPNAME_DELMAP,       { .error = fs_nosys } }, /* smbfs_delmap, */
-        { VOPNAME_DUMP,         { .error = fs_nosys } }, /* smbfs_dump, */
-        { VOPNAME_PATHCONF,     { .vop_pathconf = smbfs_pathconf } },
-        { VOPNAME_PAGEIO,       { .error = fs_nosys } }, /* smbfs_pageio, */
-        { VOPNAME_SETSECATTR,   { .vop_setsecattr = smbfs_setsecattr } },
-        { VOPNAME_GETSECATTR,   { .vop_getsecattr = smbfs_getsecattr } },
-        { VOPNAME_SHRLOCK,      { .vop_shrlock = smbfs_shrlock } },
-        { NULL, NULL }
-};
-
-/*
  * XXX
  * When new and relevant functionality is enabled, we should be
  * calling vfs_set_feature() to inform callers that pieces of
  * functionality are available, per PSARC 2007/227.
  */

@@ -241,18 +200,17 @@
 smbfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 {
         smbnode_t       *np;
         vnode_t         *vp;
         smbfattr_t      fa;
-        u_int32_t       rights, rightsrcvd;
-        u_int16_t       fid, oldfid;
-        int             oldgenid;
+        smb_fh_t        *fid = NULL;
+        smb_fh_t        *oldfid;
+        uint32_t        rights;
         struct smb_cred scred;
         smbmntinfo_t    *smi;
         smb_share_t     *ssp;
         cred_t          *oldcr;
-        int             tmperror;
         int             error = 0;
 
         vp = *vpp;
         np = VTOSMB(vp);
         smi = VTOSMI(vp);

@@ -280,11 +238,10 @@
         /*
          * Keep track of the vnode type at first open.
          * It may change later, and we need close to do
          * cleanup for the type we opened.  Also deny
          * open of new types until old type is closed.
-         * XXX: Per-open instance nodes whould help.
          */
         if (np->n_ovtype == VNON) {
                 ASSERT(np->n_dirrefs == 0);
                 ASSERT(np->n_fidrefs == 0);
         } else if (np->n_ovtype != vp->v_type) {

@@ -321,28 +278,30 @@
         /*
          * If we already have it open, and the FID is still valid,
          * check whether the rights are sufficient for FID reuse.
          */
         if (np->n_fidrefs > 0 &&
-            np->n_vcgenid == ssp->ss_vcgenid) {
+            (fid = np->n_fid) != NULL &&
+            fid->fh_vcgenid == ssp->ss_vcgenid) {
                 int upgrade = 0;
 
                 if ((flag & FWRITE) &&
-                    !(np->n_rights & SA_RIGHT_FILE_WRITE_DATA))
+                    !(fid->fh_rights & SA_RIGHT_FILE_WRITE_DATA))
                         upgrade = 1;
                 if ((flag & FREAD) &&
-                    !(np->n_rights & SA_RIGHT_FILE_READ_DATA))
+                    !(fid->fh_rights & SA_RIGHT_FILE_READ_DATA))
                         upgrade = 1;
                 if (!upgrade) {
                         /*
                          *  the existing open is good enough
                          */
                         np->n_fidrefs++;
                         goto have_fid;
                 }
+                fid = NULL;
         }
-        rights = np->n_fidrefs ? np->n_rights : 0;
+        rights = (fid != NULL) ? fid->fh_rights : 0;
 
         /*
          * we always ask for READ_CONTROL so we can always get the
          * owner/group IDs to satisfy a stat.  Ditto attributes.
          */

@@ -357,37 +316,23 @@
 
         bzero(&fa, sizeof (fa));
         error = smbfs_smb_open(np,
             NULL, 0, 0, /* name nmlen xattr */
             rights, &scred,
-            &fid, &rightsrcvd, &fa);
+            &fid, &fa);
         if (error)
                 goto out;
         smbfs_attrcache_fa(vp, &fa);
 
         /*
          * We have a new FID and access rights.
          */
         oldfid = np->n_fid;
-        oldgenid = np->n_vcgenid;
         np->n_fid = fid;
-        np->n_vcgenid = ssp->ss_vcgenid;
-        np->n_rights = rightsrcvd;
         np->n_fidrefs++;
-        if (np->n_fidrefs > 1 &&
-            oldgenid == ssp->ss_vcgenid) {
-                /*
-                 * We already had it open (presumably because
-                 * it was open with insufficient rights.)
-                 * Close old wire-open.
-                 */
-                tmperror = smbfs_smb_close(ssp,
-                    oldfid, NULL, &scred);
-                if (tmperror)
-                        SMBVDEBUG("error %d closing %s\n",
-                            tmperror, np->n_rpath);
-        }
+        if (oldfid != NULL)
+                smb_fh_rele(oldfid);
 
         /*
          * This thread did the open.
          * Save our credentials too.
          */

@@ -419,10 +364,11 @@
         caller_context_t *ct)
 {
         smbnode_t       *np;
         smbmntinfo_t    *smi;
         struct smb_cred scred;
+        int error = 0;
 
         np = VTOSMB(vp);
         smi = VTOSMI(vp);
 
         /*

@@ -466,20 +412,46 @@
         if (smi->smi_flags & SMI_LLOCK) {
                 pid_t pid = ddi_get_pid();
                 cleanlocks(vp, pid, 0);
                 cleanshares(vp, pid);
         }
+        /*
+         * else doing OtW locking.  SMB servers drop all locks
+         * on the file ID we close here, so no _lockrelease()
+         */
 
         /*
          * This (passed in) count is the ref. count from the
          * user's file_t before the closef call (fio.c).
-         * We only care when the reference goes away.
+         * The rest happens only on last close.
          */
         if (count > 1)
                 return (0);
 
+        /* NFS has DNLC purge here. */
+
         /*
+         * If the file was open for write and there are pages,
+         * then make sure dirty pages written back.
+         *
+         * NFS does this async when "close-to-open" is off
+         * (MI_NOCTO flag is set) to avoid blocking the caller.
+         * For now, always do this synchronously (no B_ASYNC).
+         */
+        if ((flag & FWRITE) && vn_has_cached_data(vp)) {
+                error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
+                if (error == EAGAIN)
+                        error = 0;
+        }
+        if (error == 0) {
+                mutex_enter(&np->r_statelock);
+                np->r_flags &= ~RSTALE;
+                np->r_error = 0;
+                mutex_exit(&np->r_statelock);
+        }
+
+        /*
          * Decrement the reference count for the FID
          * and possibly do the OtW close.
          *
          * Exclusive lock for modifying n_fid stuff.
          * Don't want this one ever interruptible.

@@ -502,17 +474,15 @@
  * Also called in smbfs_inactive (defensive cleanup).
  */
 static void
 smbfs_rele_fid(smbnode_t *np, struct smb_cred *scred)
 {
-        smb_share_t     *ssp;
         cred_t          *oldcr;
         struct smbfs_fctx *fctx;
         int             error;
-        uint16_t ofid;
+        smb_fh_t        *ofid;
 
-        ssp = np->n_mount->smi_share;
         error = 0;
 
         /* Make sure we serialize for n_dirseq use. */
         ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER));

@@ -537,18 +507,14 @@
 
         case VREG:
                 ASSERT(np->n_fidrefs > 0);
                 if (--np->n_fidrefs)
                         return;
-                if ((ofid = np->n_fid) != SMB_FID_UNUSED) {
-                        np->n_fid = SMB_FID_UNUSED;
-                        /* After reconnect, n_fid is invalid */
-                        if (np->n_vcgenid == ssp->ss_vcgenid) {
-                                error = smbfs_smb_close(
-                                    ssp, ofid, NULL, scred);
+                if ((ofid = np->n_fid) != NULL) {
+                        np->n_fid = NULL;
+                        smb_fh_rele(ofid);
                         }
-                }
                 break;
 
         default:
                 SMBVDEBUG("bad n_ovtype %d\n", np->n_ovtype);
                 break;

@@ -581,18 +547,16 @@
 {
         struct smb_cred scred;
         struct vattr    va;
         smbnode_t       *np;
         smbmntinfo_t    *smi;
-        smb_share_t     *ssp;
         offset_t        endoff;
         ssize_t         past_eof;
         int             error;
 
         np = VTOSMB(vp);
         smi = VTOSMI(vp);
-        ssp = smi->smi_share;
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EIO);
 
         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)

@@ -635,29 +599,102 @@
                 past_eof = (ssize_t)(endoff - va.va_size);
                 uiop->uio_resid -= past_eof;
         } else
                 past_eof = 0;
 
+        /*
+         * Bypass VM if caching has been disabled (e.g., locking) or if
+         * using client-side direct I/O and the file is not mmap'd and
+         * there are no cached pages.
+         */
+        if ((vp->v_flag & VNOCACHE) ||
+            (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
+            np->r_mapcnt == 0 && np->r_inmap == 0 &&
+            !vn_has_cached_data(vp))) {
+
         /* Shared lock for n_fid use in smb_rwuio */
         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
-        /* After reconnect, n_fid is invalid */
-        if (np->n_vcgenid != ssp->ss_vcgenid)
-                error = ESTALE;
-        else
-                error = smb_rwuio(ssp, np->n_fid, UIO_READ,
+                error = smb_rwuio(np->n_fid, UIO_READ,
                     uiop, &scred, smb_timo_read);
 
         smb_credrele(&scred);
         smbfs_rw_exit(&np->r_lkserlock);
 
         /* undo adjustment of resid */
         uiop->uio_resid += past_eof;
 
         return (error);
+        }
+
+#ifdef  _KERNEL
+        /* (else) Do I/O through segmap. */
+        do {
+                caddr_t         base;
+                u_offset_t      off;
+                size_t          n;
+                int             on;
+                uint_t          flags;
+
+                off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
+                on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
+                n = MIN(MAXBSIZE - on, uiop->uio_resid);
+
+                error = smbfs_validate_caches(vp, cr);
+                if (error)
+                        break;
+
+                /* NFS waits for RINCACHEPURGE here. */
+
+                if (vpm_enable) {
+                        /*
+                         * Copy data.
+                         */
+                        error = vpm_data_copy(vp, off + on, n, uiop,
+                            1, NULL, 0, S_READ);
+                } else {
+                        base = segmap_getmapflt(segkmap, vp, off + on, n, 1,
+                            S_READ);
+
+                        error = uiomove(base + on, n, UIO_READ, uiop);
+                }
+
+                if (!error) {
+                        /*
+                         * If read a whole block or read to eof,
+                         * won't need this buffer again soon.
+                         */
+                        mutex_enter(&np->r_statelock);
+                        if (n + on == MAXBSIZE ||
+                            uiop->uio_loffset == np->r_size)
+                                flags = SM_DONTNEED;
+                        else
+                                flags = 0;
+                        mutex_exit(&np->r_statelock);
+                        if (vpm_enable) {
+                                error = vpm_sync_pages(vp, off, n, flags);
+                        } else {
+                                error = segmap_release(segkmap, base, flags);
+                        }
+                } else {
+                        if (vpm_enable) {
+                                (void) vpm_sync_pages(vp, off, n, 0);
+                        } else {
+                                (void) segmap_release(segkmap, base, 0);
+                        }
+                }
+        } while (!error && uiop->uio_resid > 0);
+#else   // _KERNEL
+        error = ENOSYS;
+#endif  // _KERNEL
+
+        /* undo adjustment of resid */
+        uiop->uio_resid += past_eof;
+
+        return (error);
 }
 
 
 /* ARGSUSED */
 static int

@@ -666,18 +703,21 @@
 {
         struct smb_cred scred;
         struct vattr    va;
         smbnode_t       *np;
         smbmntinfo_t    *smi;
-        smb_share_t     *ssp;
         offset_t        endoff, limit;
         ssize_t         past_limit;
         int             error, timo;
+        u_offset_t      last_off;
+        size_t          last_resid;
+#ifdef  _KERNEL
+        uint_t          bsize;
+#endif
 
         np = VTOSMB(vp);
         smi = VTOSMI(vp);
-        ssp = smi->smi_share;
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EIO);
 
         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)

@@ -695,16 +735,18 @@
          * Handle ioflag bits: (FAPPEND|FSYNC|FDSYNC)
          */
         if (ioflag & (FAPPEND | FSYNC)) {
                 if (np->n_flag & NMODIFIED) {
                         smbfs_attrcache_remove(np);
-                        /* XXX: smbfs_vinvalbuf? */
                 }
         }
         if (ioflag & FAPPEND) {
                 /*
                  * File size can be changed by another client
+                 *
+                 * Todo: Consider redesigning this to use a
+                 * handle opened for append instead.
                  */
                 va.va_mask = AT_SIZE;
                 if (error = smbfsgetattr(vp, &va, cr))
                         return (error);
                 uiop->uio_loffset = va.va_size;

@@ -724,23 +766,58 @@
          * reaches the limit will be short and the next write
          * will return an error.
          *
          * So if we're starting at or beyond the limit, EFBIG.
          * Otherwise, temporarily reduce resid to the amount
-         * the falls after the limit.
+         * that is after the limit.
          */
         limit = uiop->uio_llimit;
         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
                 limit = MAXOFFSET_T;
-        if (uiop->uio_loffset >= limit)
+        if (uiop->uio_loffset >= limit) {
+#ifdef  _KERNEL
+                proc_t *p = ttoproc(curthread);
+
+                mutex_enter(&p->p_lock);
+                (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+                    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
+                mutex_exit(&p->p_lock);
+#endif  // _KERNEL
                 return (EFBIG);
+        }
         if (endoff > limit) {
                 past_limit = (ssize_t)(endoff - limit);
                 uiop->uio_resid -= past_limit;
         } else
                 past_limit = 0;
 
+        /*
+         * Bypass VM if caching has been disabled (e.g., locking) or if
+         * using client-side direct I/O and the file is not mmap'd and
+         * there are no cached pages.
+         */
+        if ((vp->v_flag & VNOCACHE) ||
+            (((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO)) &&
+            np->r_mapcnt == 0 && np->r_inmap == 0 &&
+            !vn_has_cached_data(vp))) {
+
+#ifdef  _KERNEL
+smbfs_fwrite:
+#endif  // _KERNEL
+                if (np->r_flags & RSTALE) {
+                        last_resid = uiop->uio_resid;
+                        last_off = uiop->uio_loffset;
+                        error = np->r_error;
+                        /*
+                         * A close may have cleared r_error, if so,
+                         * propagate ESTALE error return properly
+                         */
+                        if (error == 0)
+                                error = ESTALE;
+                        goto bottom;
+                }
+
         /* Timeout: longer for append. */
         timo = smb_timo_write;
         if (endoff > np->r_size)
                 timo = smb_timo_append;

@@ -747,26 +824,22 @@
         /* Shared lock for n_fid use in smb_rwuio */
         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
-        /* After reconnect, n_fid is invalid */
-        if (np->n_vcgenid != ssp->ss_vcgenid)
-                error = ESTALE;
-        else
-                error = smb_rwuio(ssp, np->n_fid, UIO_WRITE,
+                error = smb_rwuio(np->n_fid, UIO_WRITE,
                     uiop, &scred, timo);
 
         if (error == 0) {
                 mutex_enter(&np->r_statelock);
                 np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
                 if (uiop->uio_loffset > (offset_t)np->r_size)
                         np->r_size = (len_t)uiop->uio_loffset;
                 mutex_exit(&np->r_statelock);
-                if (ioflag & (FSYNC|FDSYNC)) {
+                        if (ioflag & (FSYNC | FDSYNC)) {
                         /* Don't error the I/O if this fails. */
-                        (void) smbfs_smb_flush(np, &scred);
+                                (void) smbfsflush(np, &scred);
                 }
         }
 
         smb_credrele(&scred);
         smbfs_rw_exit(&np->r_lkserlock);

@@ -773,13 +846,520 @@
 
         /* undo adjustment of resid */
         uiop->uio_resid += past_limit;
 
         return (error);
+        }
+
+#ifdef  _KERNEL
+        /* (else) Do I/O through segmap. */
+        bsize = vp->v_vfsp->vfs_bsize;
+
+        do {
+                caddr_t         base;
+                u_offset_t      off;
+                size_t          n;
+                int             on;
+                uint_t          flags;
+
+                off = uiop->uio_loffset & MAXBMASK; /* mapping offset */
+                on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */
+                n = MIN(MAXBSIZE - on, uiop->uio_resid);
+
+                last_resid = uiop->uio_resid;
+                last_off = uiop->uio_loffset;
+
+                if (np->r_flags & RSTALE) {
+                        error = np->r_error;
+                        /*
+                         * A close may have cleared r_error, if so,
+                         * propagate ESTALE error return properly
+                         */
+                        if (error == 0)
+                                error = ESTALE;
+                        break;
+                }
+
+                /*
+                 * From NFS: Don't create dirty pages faster than they
+                 * can be cleaned.
+                 *
+                 * Here NFS also checks for async writes (np->r_awcount)
+                 */
+                mutex_enter(&np->r_statelock);
+                while (np->r_gcount > 0) {
+                        if (SMBINTR(vp)) {
+                                klwp_t *lwp = ttolwp(curthread);
+
+                                if (lwp != NULL)
+                                        lwp->lwp_nostop++;
+                                if (!cv_wait_sig(&np->r_cv, &np->r_statelock)) {
+                                        mutex_exit(&np->r_statelock);
+                                        if (lwp != NULL)
+                                                lwp->lwp_nostop--;
+                                        error = EINTR;
+                                        goto bottom;
+                                }
+                                if (lwp != NULL)
+                                        lwp->lwp_nostop--;
+                        } else
+                                cv_wait(&np->r_cv, &np->r_statelock);
+                }
+                mutex_exit(&np->r_statelock);
+
+                /*
+                 * Touch the page and fault it in if it is not in core
+                 * before segmap_getmapflt or vpm_data_copy can lock it.
+                 * This is to avoid the deadlock if the buffer is mapped
+                 * to the same file through mmap which we want to write.
+                 */
+                uio_prefaultpages((long)n, uiop);
+
+                if (vpm_enable) {
+                        /*
+                         * It will use kpm mappings, so no need to
+                         * pass an address.
+                         */
+                        error = smbfs_writenp(np, NULL, n, uiop, 0);
+                } else {
+                        if (segmap_kpm) {
+                                int pon = uiop->uio_loffset & PAGEOFFSET;
+                                size_t pn = MIN(PAGESIZE - pon,
+                                    uiop->uio_resid);
+                                int pagecreate;
+
+                                mutex_enter(&np->r_statelock);
+                                pagecreate = (pon == 0) && (pn == PAGESIZE ||
+                                    uiop->uio_loffset + pn >= np->r_size);
+                                mutex_exit(&np->r_statelock);
+
+                                base = segmap_getmapflt(segkmap, vp, off + on,
+                                    pn, !pagecreate, S_WRITE);
+
+                                error = smbfs_writenp(np, base + pon, n, uiop,
+                                    pagecreate);
+
+                        } else {
+                                base = segmap_getmapflt(segkmap, vp, off + on,
+                                    n, 0, S_READ);
+                                error = smbfs_writenp(np, base + on, n, uiop, 0);
+                        }
+                }
+
+                if (!error) {
+                        if (smi->smi_flags & SMI_NOAC)
+                                flags = SM_WRITE;
+                        else if ((uiop->uio_loffset % bsize) == 0 ||
+                            IS_SWAPVP(vp)) {
+                                /*
+                                 * Have written a whole block.
+                                 * Start an asynchronous write
+                                 * and mark the buffer to
+                                 * indicate that it won't be
+                                 * needed again soon.
+                                 */
+                                flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
+                        } else
+                                flags = 0;
+                        if ((ioflag & (FSYNC|FDSYNC)) ||
+                            (np->r_flags & ROUTOFSPACE)) {
+                                flags &= ~SM_ASYNC;
+                                flags |= SM_WRITE;
+                        }
+                        if (vpm_enable) {
+                                error = vpm_sync_pages(vp, off, n, flags);
+                        } else {
+                                error = segmap_release(segkmap, base, flags);
+                        }
+                } else {
+                        if (vpm_enable) {
+                                (void) vpm_sync_pages(vp, off, n, 0);
+                        } else {
+                                (void) segmap_release(segkmap, base, 0);
+                        }
+                        /*
+                         * In the event that we got an access error while
+                         * faulting in a page for a write-only file just
+                         * force a write.
+                         */
+                        if (error == EACCES)
+                                goto smbfs_fwrite;
+                }
+        } while (!error && uiop->uio_resid > 0);
+#else   // _KERNEL
+        last_resid = uiop->uio_resid;
+        last_off = uiop->uio_loffset;
+        error = ENOSYS;
+#endif  // _KERNEL
+
+bottom:
+        /* undo adjustment of resid */
+        if (error) {
+                uiop->uio_resid = last_resid + past_limit;
+                uiop->uio_loffset = last_off;
+        } else {
+                uiop->uio_resid += past_limit;
+        }
+
+        return (error);
 }
 
+#ifdef  _KERNEL
 
+/*
+ * Like nfs_client.c: writerp()
+ *
+ * Write by creating pages and uiomove data onto them.
+ */
+
+int
+smbfs_writenp(smbnode_t *np, caddr_t base, int tcount, struct uio *uio,
+    int pgcreated)
+{
+        int             pagecreate;
+        int             n;
+        int             saved_n;
+        caddr_t         saved_base;
+        u_offset_t      offset;
+        int             error;
+        int             sm_error;
+        vnode_t         *vp = SMBTOV(np);
+
+        ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
+        ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_WRITER));
+        if (!vpm_enable) {
+                ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
+        }
+
+        /*
+         * Move bytes in at most PAGESIZE chunks. We must avoid
+         * spanning pages in uiomove() because page faults may cause
+         * the cache to be invalidated out from under us. The r_size is not
+         * updated until after the uiomove. If we push the last page of a
+         * file before r_size is correct, we will lose the data written past
+         * the current (and invalid) r_size.
+         */
+        do {
+                offset = uio->uio_loffset;
+                pagecreate = 0;
+
+                /*
+                 * n is the number of bytes required to satisfy the request
+                 *   or the number of bytes to fill out the page.
+                 */
+                n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
+
+                /*
+                 * Check to see if we can skip reading in the page
+                 * and just allocate the memory.  We can do this
+                 * if we are going to rewrite the entire mapping
+                 * or if we are going to write to or beyond the current
+                 * end of file from the beginning of the mapping.
+                 *
+                 * The read of r_size is now protected by r_statelock.
+                 */
+                mutex_enter(&np->r_statelock);
+                /*
+                 * When pgcreated is nonzero the caller has already done
+                 * a segmap_getmapflt with forcefault 0 and S_WRITE. With
+                 * segkpm this means we already have at least one page
+                 * created and mapped at base.
+                 */
+                pagecreate = pgcreated ||
+                    ((offset & PAGEOFFSET) == 0 &&
+                    (n == PAGESIZE || ((offset + n) >= np->r_size)));
+
+                mutex_exit(&np->r_statelock);
+                if (!vpm_enable && pagecreate) {
+                        /*
+                         * The last argument tells segmap_pagecreate() to
+                         * always lock the page, as opposed to sometimes
+                         * returning with the page locked. This way we avoid a
+                         * fault on the ensuing uiomove(), but also
+                         * more importantly (to fix bug 1094402) we can
+                         * call segmap_fault() to unlock the page in all
+                         * cases. An alternative would be to modify
+                         * segmap_pagecreate() to tell us when it is
+                         * locking a page, but that's a fairly major
+                         * interface change.
+                         */
+                        if (pgcreated == 0)
+                                (void) segmap_pagecreate(segkmap, base,
+                                    (uint_t)n, 1);
+                        saved_base = base;
+                        saved_n = n;
+                }
+
+                /*
+                 * The number of bytes of data in the last page can not
+                 * be accurately be determined while page is being
+                 * uiomove'd to and the size of the file being updated.
+                 * Thus, inform threads which need to know accurately
+                 * how much data is in the last page of the file.  They
+                 * will not do the i/o immediately, but will arrange for
+                 * the i/o to happen later when this modify operation
+                 * will have finished.
+                 */
+                ASSERT(!(np->r_flags & RMODINPROGRESS));
+                mutex_enter(&np->r_statelock);
+                np->r_flags |= RMODINPROGRESS;
+                np->r_modaddr = (offset & MAXBMASK);
+                mutex_exit(&np->r_statelock);
+
+                if (vpm_enable) {
+                        /*
+                         * Copy data. If new pages are created, part of
+                         * the page that is not written will be initizliazed
+                         * with zeros.
+                         */
+                        error = vpm_data_copy(vp, offset, n, uio,
+                            !pagecreate, NULL, 0, S_WRITE);
+                } else {
+                        error = uiomove(base, n, UIO_WRITE, uio);
+                }
+
+                /*
+                 * r_size is the maximum number of
+                 * bytes known to be in the file.
+                 * Make sure it is at least as high as the
+                 * first unwritten byte pointed to by uio_loffset.
+                 */
+                mutex_enter(&np->r_statelock);
+                if (np->r_size < uio->uio_loffset)
+                        np->r_size = uio->uio_loffset;
+                np->r_flags &= ~RMODINPROGRESS;
+                np->r_flags |= RDIRTY;
+                mutex_exit(&np->r_statelock);
+
+                /* n = # of bytes written */
+                n = (int)(uio->uio_loffset - offset);
+
+                if (!vpm_enable) {
+                        base += n;
+                }
+                tcount -= n;
+                /*
+                 * If we created pages w/o initializing them completely,
+                 * we need to zero the part that wasn't set up.
+                 * This happens on a most EOF write cases and if
+                 * we had some sort of error during the uiomove.
+                 */
+                if (!vpm_enable && pagecreate) {
+                        if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
+                                (void) kzero(base, PAGESIZE - n);
+
+                        if (pgcreated) {
+                                /*
+                                 * Caller is responsible for this page,
+                                 * it was not created in this loop.
+                                 */
+                                pgcreated = 0;
+                        } else {
+                                /*
+                                 * For bug 1094402: segmap_pagecreate locks
+                                 * page. Unlock it. This also unlocks the
+                                 * pages allocated by page_create_va() in
+                                 * segmap_pagecreate().
+                                 */
+                                sm_error = segmap_fault(kas.a_hat, segkmap,
+                                    saved_base, saved_n,
+                                    F_SOFTUNLOCK, S_WRITE);
+                                if (error == 0)
+                                        error = sm_error;
+                        }
+                }
+        } while (tcount > 0 && error == 0);
+
+        return (error);
+}
+
+/*
+ * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
+ * Like nfs3_rdwrlbn()
+ */
+static int
+smbfs_rdwrlbn(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
+        int flags, cred_t *cr)
+{
+        smbmntinfo_t    *smi = VTOSMI(vp);
+        struct buf *bp;
+        int error;
+        int sync;
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        bp = pageio_setup(pp, len, vp, flags);
+        ASSERT(bp != NULL);
+
+        /*
+         * pageio_setup should have set b_addr to 0.  This
+         * is correct since we want to do I/O on a page
+         * boundary.  bp_mapin will use this addr to calculate
+         * an offset, and then set b_addr to the kernel virtual
+         * address it allocated for us.
+         */
+        ASSERT(bp->b_un.b_addr == 0);
+
+        bp->b_edev = 0;
+        bp->b_dev = 0;
+        bp->b_lblkno = lbtodb(off);
+        bp->b_file = vp;
+        bp->b_offset = (offset_t)off;
+        bp_mapin(bp);
+
+        /*
+         * Calculate the desired level of stability to write data.
+         */
+        if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) &&
+            freemem > desfree) {
+                sync = 0;
+        } else {
+                sync = 1;
+        }
+
+        error = smbfs_bio(bp, sync, cr);
+
+        bp_mapout(bp);
+        pageio_done(bp);
+
+        return (error);
+}
+
+
+/*
+ * Corresponds to nfs3_vnopc.c : nfs3_bio(), though the NFS code
+ * uses nfs3read()/nfs3write() where we use smb_rwuio().  Also,
+ * NFS has this later in the file.  Move it up here closer to
+ * the one call site just above.
+ */
+
+static int
+smbfs_bio(struct buf *bp, int sync, cred_t *cr)
+{
+        struct iovec aiov[1];
+        struct uio  auio;
+        struct smb_cred scred;
+        smbnode_t *np = VTOSMB(bp->b_vp);
+        smbmntinfo_t *smi = np->n_mount;
+        offset_t offset;
+        offset_t endoff;
+        size_t count;
+        size_t past_eof;
+        int error;
+
+        ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
+
+        offset = ldbtob(bp->b_lblkno);
+        count = bp->b_bcount;
+        endoff = offset + count;
+        if (offset < 0 || endoff < 0)
+                return (EINVAL);
+
+        /*
+         * Limit file I/O to the remaining file size, but see
+         * the notes in smbfs_getpage about SMBFS_EOF.
+         */
+        mutex_enter(&np->r_statelock);
+        if (offset >= np->r_size) {
+                mutex_exit(&np->r_statelock);
+                if (bp->b_flags & B_READ) {
+                        return (SMBFS_EOF);
+                } else {
+                        return (EINVAL);
+                }
+        }
+        if (endoff > np->r_size) {
+                past_eof = (size_t)(endoff - np->r_size);
+                count -= past_eof;
+        } else
+                past_eof = 0;
+        mutex_exit(&np->r_statelock);
+        ASSERT(count > 0);
+
+        /* Caller did bpmapin().  Mapped address is... */
+        aiov[0].iov_base = bp->b_un.b_addr;
+        aiov[0].iov_len = count;
+        auio.uio_iov = aiov;
+        auio.uio_iovcnt = 1;
+        auio.uio_loffset = offset;
+        auio.uio_segflg = UIO_SYSSPACE;
+        auio.uio_fmode = 0;
+        auio.uio_resid = count;
+
+        /* Shared lock for n_fid use in smb_rwuio */
+        if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER,
+            smi->smi_flags & SMI_INT))
+                return (EINTR);
+        smb_credinit(&scred, cr);
+
+        DTRACE_IO1(start, struct buf *, bp);
+
+        if (bp->b_flags & B_READ) {
+
+                error = smb_rwuio(np->n_fid, UIO_READ,
+                    &auio, &scred, smb_timo_read);
+
+                /* Like NFS, only set b_error here. */
+                bp->b_error = error;
+                bp->b_resid = auio.uio_resid;
+
+                if (!error && auio.uio_resid != 0)
+                        error = EIO;
+                if (!error && past_eof != 0) {
+                        /* Zero the memory beyond EOF. */
+                        bzero(bp->b_un.b_addr + count, past_eof);
+                }
+        } else {
+
+                error = smb_rwuio(np->n_fid, UIO_WRITE,
+                    &auio, &scred, smb_timo_write);
+
+                /* Like NFS, only set b_error here. */
+                bp->b_error = error;
+                bp->b_resid = auio.uio_resid;
+
+                if (!error && auio.uio_resid != 0)
+                        error = EIO;
+                if (!error && sync) {
+                        (void) smbfsflush(np, &scred);
+                }
+        }
+
+        /*
+         * This comes from nfs3_commit()
+         */
+        if (error != 0) {
+                mutex_enter(&np->r_statelock);
+                if (error == ESTALE)
+                        np->r_flags |= RSTALE;
+                if (!np->r_error)
+                        np->r_error = error;
+                mutex_exit(&np->r_statelock);
+                bp->b_flags |= B_ERROR;
+        }
+
+        DTRACE_IO1(done, struct buf *, bp);
+
+        smb_credrele(&scred);
+        smbfs_rw_exit(&np->r_lkserlock);
+
+        if (error == ESTALE)
+                smbfs_attrcache_remove(np);
+
+        return (error);
+}
+#endif  // _KERNEL
+
+/*
+ * Here NFS has: nfs3write, nfs3read
+ * We use smb_rwuio instead.
+ */
+
 /* ARGSUSED */
 static int
 smbfs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag,
         cred_t *cr, int *rvalp, caller_context_t *ct)
 {

@@ -793,11 +1373,10 @@
 
         if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
                 return (EIO);
 
         switch (cmd) {
-                /* First three from ZFS. XXX - need these? */
 
         case _FIOFFS:
                 error = smbfs_fsync(vp, 0, cr, ct);
                 break;

@@ -808,14 +1387,18 @@
         case _FIOGDIO:
         case _FIOSDIO:
                 error = 0;
                 break;
 
-#ifdef NOT_YET  /* XXX - from the NFS code. */
+#if 0   /* Todo - SMB ioctl query regions */
+        case _FIO_SEEK_DATA:
+        case _FIO_SEEK_HOLE:
+#endif
+
         case _FIODIRECTIO:
                 error = smbfs_directio(vp, (int)arg, cr);
-#endif
+                break;
 
                 /*
                  * Allow get/set with "raw" security descriptor (SD) data.
                  * Useful for testing, diagnosing idmap problems, etc.
                  */

@@ -845,10 +1428,11 @@
 smbfs_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
         caller_context_t *ct)
 {
         smbnode_t *np;
         smbmntinfo_t *smi;
+        int error;
 
         smi = VTOSMI(vp);
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EIO);

@@ -879,10 +1463,34 @@
                         mutex_exit(&np->r_statelock);
                         return (0);
                 }
         }
 
+        /*
+         * Only need to flush pages if asking for the mtime
+         * and if there any dirty pages.
+         *
+         * Here NFS also checks for async writes (np->r_awcount)
+         */
+        if (vap->va_mask & AT_MTIME) {
+                if (vn_has_cached_data(vp) &&
+                    ((np->r_flags & RDIRTY) != 0)) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_gcount++;
+                        mutex_exit(&np->r_statelock);
+                        error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
+                        mutex_enter(&np->r_statelock);
+                        if (error && (error == ENOSPC || error == EDQUOT)) {
+                                if (!np->r_error)
+                                        np->r_error = error;
+                        }
+                        if (--np->r_gcount == 0)
+                                cv_broadcast(&np->r_cv);
+                        mutex_exit(&np->r_statelock);
+                }
+        }
+
         return (smbfsgetattr(vp, vap, cr));
 }
 
 /* smbfsgetattr() in smbfs_client.c */

@@ -949,11 +1557,18 @@
                          * the rest of the setattr work.
                          */
                 }
         }
 
-        return (smbfssetattr(vp, vap, flags, cr));
+        error = smbfssetattr(vp, vap, flags, cr);
+
+#ifdef  SMBFS_VNEVENT
+        if (error == 0 && (vap->va_mask & AT_SIZE) && vap->va_size == 0)
+                vnevent_truncate(vp, ct);
+#endif
+
+        return (error);
 }
 
 /*
  * Mostly from Darwin smbfs_setattr()
  * but then modified a lot.

@@ -962,16 +1577,16 @@
 static int
 smbfssetattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr)
 {
         int             error = 0;
         smbnode_t       *np = VTOSMB(vp);
+        smbmntinfo_t    *smi = np->n_mount;
         uint_t          mask = vap->va_mask;
         struct timespec *mtime, *atime;
         struct smb_cred scred;
-        int             cerror, modified = 0;
-        unsigned short  fid;
-        int have_fid = 0;
+        int             modified = 0;
+        smb_fh_t        *fid = NULL;
         uint32_t rights = 0;
         uint32_t dosattr = 0;
 
         ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone_ref.zref_zone);

@@ -987,20 +1602,42 @@
                         SMBVDEBUG("ignore set time on xattr\n");
                 mask &= AT_SIZE;
         }
 
         /*
+         * Only need to flush pages if there are any pages and
+         * if the file is marked as dirty in some fashion.  The
+         * file must be flushed so that we can accurately
+         * determine the size of the file and the cached data
+         * after the SETATTR returns.  A file is considered to
+         * be dirty if it is either marked with RDIRTY, has
+         * outstanding i/o's active, or is mmap'd.  In this
+         * last case, we can't tell whether there are dirty
+         * pages, so we flush just to be sure.
+         */
+        if (vn_has_cached_data(vp) &&
+            ((np->r_flags & RDIRTY) ||
+            np->r_count > 0 ||
+            np->r_mapcnt > 0)) {
+                ASSERT(vp->v_type != VCHR);
+                error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, NULL);
+                if (error && (error == ENOSPC || error == EDQUOT)) {
+                        mutex_enter(&np->r_statelock);
+                        if (!np->r_error)
+                                np->r_error = error;
+                        mutex_exit(&np->r_statelock);
+                }
+        }
+
+        /*
          * If our caller is trying to set multiple attributes, they
          * can make no assumption about what order they are done in.
          * Here we try to do them in order of decreasing likelihood
          * of failure, just to minimize the chance we'll wind up
          * with a partially complete request.
          */
 
-        /* Shared lock for (possible) n_fid use. */
-        if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
-                return (EINTR);
         smb_credinit(&scred, cr);
 
         /*
          * If the caller has provided extensible attributes,
          * map those into DOS attributes supported by SMB.

@@ -1034,11 +1671,11 @@
                 if (error) {
                         SMBVDEBUG("error %d opening %s\n",
                             error, np->n_rpath);
                         goto out;
                 }
-                have_fid = 1;
+                ASSERT(fid != NULL);
         }
 
         /*
          * If the server supports the UNIX extensions, right here is where
          * we'd support changes to uid, gid, mode, and possibly va_flags.

@@ -1048,52 +1685,48 @@
         if (mask & AT_SIZE) {
                 /*
                  * If the new file size is less than what the client sees as
                  * the file size, then just change the size and invalidate
                  * the pages.
-                 * I am commenting this code at present because the function
-                 * smbfs_putapage() is not yet implemented.
                  */
 
                 /*
                  * Set the file size to vap->va_size.
                  */
-                ASSERT(have_fid);
-                error = smbfs_smb_setfsize(np, fid, vap->va_size, &scred);
+                ASSERT(fid != NULL);
+                error = smbfs_smb_setfsize(smi->smi_share, fid,
+                    vap->va_size, &scred);
                 if (error) {
                         SMBVDEBUG("setsize error %d file %s\n",
                             error, np->n_rpath);
                 } else {
                         /*
                          * Darwin had code here to zero-extend.
                          * Tests indicate the server will zero-fill,
-                         * so looks like we don't need to do this.
-                         * Good thing, as this could take forever.
-                         *
-                         * XXX: Reportedly, writing one byte of zero
-                         * at the end offset avoids problems here.
+                         * so looks like we don't need to do that.
                          */
                         mutex_enter(&np->r_statelock);
                         np->r_size = vap->va_size;
+                        np->n_flag |= (NFLUSHWIRE | NATTRCHANGED);
                         mutex_exit(&np->r_statelock);
                         modified = 1;
                 }
         }
 
         /*
-         * XXX: When Solaris has create_time, set that too.
-         * Note: create_time is different from ctime.
+         * Todo: Implement setting create_time (which is
+         * different from ctime).
          */
         mtime = ((mask & AT_MTIME) ? &vap->va_mtime : 0);
         atime = ((mask & AT_ATIME) ? &vap->va_atime : 0);
 
         if (dosattr || mtime || atime) {
                 /*
                  * Always use the handle-based set attr call now.
                  */
-                ASSERT(have_fid);
-                error = smbfs_smb_setfattr(np, fid,
+                ASSERT(fid != NULL);
+                error = smbfs_smb_setfattr(smi->smi_share, fid,
                     dosattr, mtime, atime, &scred);
                 if (error) {
                         SMBVDEBUG("set times error %d file %s\n",
                             error, np->n_rpath);
                 } else {

@@ -1100,28 +1733,40 @@
                         modified = 1;
                 }
         }
 
 out:
+        if (fid != NULL)
+                smbfs_smb_tmpclose(np, fid);
+
+        smb_credrele(&scred);
+
         if (modified) {
                 /*
                  * Invalidate attribute cache in case the server
                  * doesn't set exactly the attributes we asked.
                  */
                 smbfs_attrcache_remove(np);
-        }
 
-        if (have_fid) {
-                cerror = smbfs_smb_tmpclose(np, fid, &scred);
-                if (cerror)
-                        SMBVDEBUG("error %d closing %s\n",
-                            cerror, np->n_rpath);
+                /*
+                 * If changing the size of the file, invalidate
+                 * any local cached data which is no longer part
+                 * of the file.  We also possibly invalidate the
+                 * last page in the file.  We could use
+                 * pvn_vpzero(), but this would mark the page as
+                 * modified and require it to be written back to
+                 * the server for no particularly good reason.
+                 * This way, if we access it, then we bring it
+                 * back in.  A read should be cheaper than a
+                 * write.
+                 */
+                if (mask & AT_SIZE) {
+                        smbfs_invalidate_pages(vp,
+                            (vap->va_size & PAGEMASK), cr);
         }
+        }
 
-        smb_credrele(&scred);
-        smbfs_rw_exit(&np->r_lkserlock);
-
         return (error);
 }
 
 /*
  * Helper function for extensible system attributes (PSARC 2007/315)

@@ -1204,14 +1849,10 @@
  *
  * We still (sort of) need a vnode when we call
  * secpolicy_vnode_access, but that only uses
  * the vtype field, so we can use a pair of fake
  * vnodes that have only v_type filled in.
- *
- * XXX: Later, add a new secpolicy_vtype_access()
- * that takes the vtype instead of a vnode, and
- * get rid of the tmpl_vxxx fake vnodes below.
  */
 static int
 smbfs_access_rwx(vfs_t *vfsp, int vtype, int mode, cred_t *cr)
 {
         /* See the secpolicy call below. */

@@ -1222,12 +1863,10 @@
         struct smbmntinfo *smi = VFTOSMI(vfsp);
         int shift = 0;
 
         /*
          * Build our (fabricated) vnode attributes.
-         * XXX: Could make these templates in the
-         * per-mount struct and use them here.
          */
         bzero(&va, sizeof (va));
         va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
         va.va_type = vtype;
         va.va_mode = (vtype == VDIR) ?

@@ -1248,11 +1887,10 @@
                 return (EROFS);
 
         /*
          * Disallow attempts to access mandatory lock files.
          * Similarly, expand MANDLOCK here.
-         * XXX: not sure we need this.
          */
         if ((mode & (VWRITE | VREAD | VEXEC)) &&
             va.va_type == VREG && MANDMODE(va.va_mode))
                 return (EACCES);

@@ -1318,10 +1956,19 @@
 
         return (smbfs_access_rwx(vfsp, vp->v_type, mode, cr));
 }
 
 
+/* ARGSUSED */
+static int
+smbfs_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct)
+{
+        /* Not yet... */
+        return (ENOSYS);
+}
+
+
 /*
  * Flush local dirty pages to stable storage on the server.
  *
  * If FNODSYNC is specified, then there is nothing to do because
  * metadata changes are not cached on the client before being

@@ -1349,50 +1996,117 @@
                 return (0);
 
         if ((syncflag & (FSYNC|FDSYNC)) == 0)
                 return (0);
 
+        error = smbfs_putpage(vp, (offset_t)0, 0, 0, cr, ct);
+        if (error)
+                return (error);
+
         /* Shared lock for n_fid use in _flush */
         if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_READER, SMBINTR(vp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
-        error = smbfs_smb_flush(np, &scred);
+        error = smbfsflush(np, &scred);
 
         smb_credrele(&scred);
         smbfs_rw_exit(&np->r_lkserlock);
 
         return (error);
 }
 
+static int
+smbfsflush(smbnode_t *np, struct smb_cred *scrp)
+{
+        struct smb_share *ssp = np->n_mount->smi_share;
+        smb_fh_t *fhp;
+        int error;
+
+        /* Shared lock for n_fid use below. */
+        ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_READER));
+
+        if (!(np->n_flag & NFLUSHWIRE))
+                return (0);
+        if (np->n_fidrefs == 0)
+                return (0); /* not open */
+        if ((fhp = np->n_fid) == NULL)
+                return (0);
+
+        /* After reconnect, n_fid is invalid */
+        if (fhp->fh_vcgenid != ssp->ss_vcgenid)
+                return (ESTALE);
+
+        error = smbfs_smb_flush(ssp, fhp, scrp);
+
+        if (!error) {
+                mutex_enter(&np->r_statelock);
+                np->n_flag &= ~NFLUSHWIRE;
+                mutex_exit(&np->r_statelock);
+        }
+        return (error);
+}
+
 /*
  * Last reference to vnode went away.
  */
 /* ARGSUSED */
 static void
 smbfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
 {
-        smbnode_t       *np;
         struct smb_cred scred;
+        smbnode_t       *np = VTOSMB(vp);
+        int error;
 
         /*
          * Don't "bail out" for VFS_UNMOUNTED here,
          * as we want to do cleanup, etc.
          * See also pcfs_inactive
          */
 
-        np = VTOSMB(vp);
-
         /*
          * If this is coming from the wrong zone, we let someone in the right
          * zone take care of it asynchronously.  We can get here due to
          * VN_RELE() being called from pageout() or fsflush().  This call may
          * potentially turn into an expensive no-op if, for instance, v_count
          * gets incremented in the meantime, but it's still correct.
          */
 
         /*
+         * From NFS:rinactive()
+         *
+         * Before freeing anything, wait until all asynchronous
+         * activity is done on this rnode.  This will allow all
+         * asynchronous read ahead and write behind i/o's to
+         * finish.
+         */
+        mutex_enter(&np->r_statelock);
+        while (np->r_count > 0)
+                cv_wait(&np->r_cv, &np->r_statelock);
+        mutex_exit(&np->r_statelock);
+
+        /*
+         * Flush and invalidate all pages associated with the vnode.
+         */
+        if (vn_has_cached_data(vp)) {
+                if ((np->r_flags & RDIRTY) && !np->r_error) {
+                        error = smbfs_putpage(vp, (u_offset_t)0, 0, 0, cr, ct);
+                        if (error && (error == ENOSPC || error == EDQUOT)) {
+                                mutex_enter(&np->r_statelock);
+                                if (!np->r_error)
+                                        np->r_error = error;
+                                mutex_exit(&np->r_statelock);
+                        }
+                }
+                smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
+        }
+        /*
+         * This vnode should have lost all cached data.
+         */
+        ASSERT(vn_has_cached_data(vp) == 0);
+
+        /*
          * Defend against the possibility that higher-level callers
          * might not correctly balance open and close calls.  If we
          * get here with open references remaining, it means there
          * was a missing VOP_CLOSE somewhere.  If that happens, do
          * the close here so we don't "leak" FIDs on the server.

@@ -1419,12 +2133,12 @@
                 break;
 
         case VREG:
                 if (np->n_fidrefs == 0)
                         break;
-                SMBVDEBUG("open file: refs %d id 0x%x path %s\n",
-                    np->n_fidrefs, np->n_fid, np->n_rpath);
+                SMBVDEBUG("open file: refs %d path %s\n",
+                    np->n_fidrefs, np->n_rpath);
                 /* Force last close. */
                 np->n_fidrefs = 1;
                 smbfs_rele_fid(np, &scred);
                 break;

@@ -1435,10 +2149,21 @@
         }
 
         smb_credrele(&scred);
         smbfs_rw_exit(&np->r_lkserlock);
 
+        /*
+         * XATTR directories (and the files under them) have
+         * little value for reclaim, so just remove them from
+         * the "hash" (AVL) as soon as they go inactive.
+         * Note that the node may already have been removed
+         * from the hash by smbfsremove.
+         */
+        if ((np->n_flag & N_XATTR) != 0 &&
+            (np->r_flags & RHASHED) != 0)
+                smbfs_rmhash(np);
+
         smbfs_addfree(np);
 }
 
 /*
  * Remote file system operations having to do with directory manipulation.

@@ -1487,10 +2212,18 @@
 
         error = smbfslookup(dvp, nm, vpp, cr, 1, ct);
 
         smbfs_rw_exit(&dnp->r_rwlock);
 
+        /*
+         * If the caller passes an invalid name here, we'll have
+         * error == EINVAL but want to return ENOENT.  This is
+         * common with things like "ls foo*" with no matches.
+         */
+        if (error == EINVAL)
+                error = ENOENT;
+
         return (error);
 }
 
 /* ARGSUSED */
 static int

@@ -1514,23 +2247,14 @@
         smi = VTOSMI(dvp);
         dnp = VTOSMB(dvp);
 
         ASSERT(curproc->p_zone == smi->smi_zone_ref.zref_zone);
 
-#ifdef NOT_YET
-        vcp = SSTOVC(smi->smi_share);
-
-        /* XXX: Should compute this once and store it in smbmntinfo_t */
-        supplen = (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) ? 255 : 12;
-#else
         supplen = 255;
-#endif
 
         /*
          * RWlock must be held, either reader or writer.
-         * XXX: Can we check without looking directly
-         * inside the struct smbfs_rwlock_t?
          */
         ASSERT(dnp->r_rwlock.count != 0);
 
         /*
          * If lookup is for "", just return dvp.

@@ -1573,11 +2297,11 @@
                 return (ENAMETOOLONG);
 
         /*
          * Avoid surprises with characters that are
          * illegal in Windows file names.
-         * Todo: CATIA mappings  XXX
+         * Todo: CATIA mappings?
          */
         ill = illegal_chars;
         if (dnp->n_flag & N_XATTR)
                 ill++; /* allow colon */
         if (strpbrk(nm, ill))

@@ -1794,10 +2518,11 @@
 #endif
         *vpp = vp;
         return (0);
 }
 
+
 /*
  * XXX
  * vsecattr_t is new to build 77, and we need to eventually support
  * it in order to create an ACL when an object is created.
  *

@@ -1809,25 +2534,22 @@
 smbfs_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
         int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct,
         vsecattr_t *vsecp)
 {
         int             error;
-        int             cerror;
         vfs_t           *vfsp;
         vnode_t         *vp;
-#ifdef NOT_YET
         smbnode_t       *np;
-#endif
         smbnode_t       *dnp;
         smbmntinfo_t    *smi;
         struct vattr    vattr;
         struct smbfattr fattr;
         struct smb_cred scred;
         const char *name = (const char *)nm;
         int             nmlen = strlen(nm);
         uint32_t        disp;
-        uint16_t        fid;
+        smb_fh_t        *fid = NULL;
         int             xattr;
 
         vfsp = dvp->v_vfsp;
         smi = VFTOSMI(vfsp);
         dnp = VTOSMB(dvp);

@@ -1840,11 +2562,11 @@
                 return (EIO);
 
         /*
          * Note: this may break mknod(2) calls to create a directory,
          * but that's obscure use.  Some other filesystems do this.
-         * XXX: Later, redirect VDIR type here to _mkdir.
+         * Todo: redirect VDIR type here to _mkdir.
          */
         if (va->va_type != VREG)
                 return (EINVAL);
 
         /*

@@ -1871,15 +2593,10 @@
         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
         /*
-         * XXX: Do we need r_lkserlock too?
-         * No use of any shared fid or fctx...
-         */
-
-        /*
          * NFS needs to go over the wire, just to be sure whether the
          * file exists or not.  Using a cached result is dangerous in
          * this case when making a decision regarding existence.
          *
          * The SMB protocol does NOT really need to go OTW here

@@ -1910,22 +2627,39 @@
                 }
 
                 /*
                  * Truncate (if requested).
                  */
-                if ((vattr.va_mask & AT_SIZE) && vattr.va_size == 0) {
+                if ((vattr.va_mask & AT_SIZE) && vp->v_type == VREG) {
+                        np = VTOSMB(vp);
+                        /*
+                         * Check here for large file truncation by
+                         * LF-unaware process, like ufs_create().
+                         */
+                        if (!(lfaware & FOFFMAX)) {
+                                mutex_enter(&np->r_statelock);
+                                if (np->r_size > MAXOFF32_T)
+                                        error = EOVERFLOW;
+                                mutex_exit(&np->r_statelock);
+                        }
+                        if (error) {
+                                VN_RELE(vp);
+                                goto out;
+                        }
                         vattr.va_mask = AT_SIZE;
                         error = smbfssetattr(vp, &vattr, 0, cr);
                         if (error) {
                                 VN_RELE(vp);
                                 goto out;
                         }
-                }
-                /* Success! */
-#ifdef NOT_YET
+#ifdef  SMBFS_VNEVENT
+                        /* Existing file was truncated */
                 vnevent_create(vp, ct);
 #endif
+                        /* invalidate pages done in smbfssetattr() */
+                }
+                /* Success! */
                 *vpp = vp;
                 goto out;
         }
 
         /*

@@ -1970,49 +2704,15 @@
             disp, &scred, &fid);
         if (error)
                 goto out;
 
         /*
-         * XXX: Missing some code here to deal with
-         * the case where we opened an existing file,
-         * it's size is larger than 32-bits, and we're
-         * setting the size from a process that's not
-         * aware of large file offsets.  i.e.
-         * from the NFS3 code:
-         */
-#if NOT_YET /* XXX */
-        if ((vattr.va_mask & AT_SIZE) &&
-            vp->v_type == VREG) {
-                np = VTOSMB(vp);
-                /*
-                 * Check here for large file handled
-                 * by LF-unaware process (as
-                 * ufs_create() does)
-                 */
-                if (!(lfaware & FOFFMAX)) {
-                        mutex_enter(&np->r_statelock);
-                        if (np->r_size > MAXOFF32_T)
-                                error = EOVERFLOW;
-                        mutex_exit(&np->r_statelock);
-                }
-                if (!error) {
-                        vattr.va_mask = AT_SIZE;
-                        error = smbfssetattr(vp,
-                            &vattr, 0, cr);
-                }
-        }
-#endif /* XXX */
-        /*
          * Should use the fid to get/set the size
          * while we have it opened here.  See above.
          */
+        smbfs_smb_close(fid);
 
-        cerror = smbfs_smb_close(smi->smi_share, fid, NULL, &scred);
-        if (cerror)
-                SMBVDEBUG("error %d closing %s\\%s\n",
-                    cerror, dnp->n_rpath, name);
-
         /*
          * In the open case, the name may differ a little
          * from what we passed to create (case, etc.)
          * so call lookup to get the (opened) name.
          *

@@ -2029,12 +2729,10 @@
 
         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
         if (error)
                 goto out;
 
-        /* XXX invalidate pages if we truncated? */
-
         /* Success! */
         *vpp = vp;
         error = 0;
 
 out:

@@ -2053,102 +2751,213 @@
 /* ARGSUSED */
 static int
 smbfs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
         int flags)
 {
-        int             error;
-        vnode_t         *vp;
-        smbnode_t       *np;
-        smbnode_t       *dnp;
         struct smb_cred scred;
-        /* enum smbfsstat status; */
-        smbmntinfo_t    *smi;
+        vnode_t         *vp = NULL;
+        smbnode_t       *dnp = VTOSMB(dvp);
+        smbmntinfo_t    *smi = VTOSMI(dvp);
+        int             error;
 
-        smi = VTOSMI(dvp);
-
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EPERM);
 
         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
                 return (EIO);
 
-        dnp = VTOSMB(dvp);
-        if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
-                return (EINTR);
-        smb_credinit(&scred, cr);
-
         /*
          * Verify access to the dirctory.
          */
         error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
         if (error)
-                goto out;
+                return (error);
 
-        /*
-         * NOTE:  the darwin code gets the "vp" passed in so it looks
-         * like the "vp" has probably been "lookup"ed by the VFS layer.
-         * It looks like we will need to lookup the vp to check the
-         * caches and check if the object being deleted is a directory.
-         */
+        if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
+                return (EINTR);
+        smb_credinit(&scred, cr);
+
+        /* Lookup the file to remove. */
         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
-        if (error)
+        if (error != 0)
                 goto out;
 
-        /* Never allow link/unlink directories on CIFS. */
+        /* Don't allow unlink of a directory. */
         if (vp->v_type == VDIR) {
-                VN_RELE(vp);
                 error = EPERM;
                 goto out;
         }
 
         /*
-         * Now we have the real reference count on the vnode
-         * Do we have the file open?
+         * Do the real remove work
          */
-        np = VTOSMB(vp);
-        mutex_enter(&np->r_statelock);
-        if ((vp->v_count > 1) && (np->n_fidrefs > 0)) {
-                /*
-                 * NFS does a rename on remove here.
-                 * Probably not applicable for SMB.
-                 * Like Darwin, just return EBUSY.
+        error = smbfsremove(dvp, vp, &scred, flags);
+        if (error != 0)
+                goto out;
+
+#ifdef  SMBFS_VNEVENT
+        vnevent_remove(vp, dvp, nm, ct);
+#endif
+
+out:
+        if (vp != NULL)
+                VN_RELE(vp);
+
+        smb_credrele(&scred);
+        smbfs_rw_exit(&dnp->r_rwlock);
+
+        return (error);
+}
+
+/*
+ * smbfsremove does the real work of removing in SMBFS
+ * Caller has done dir access checks etc.
                  *
-                 * XXX: Todo - Use Trans2rename, and
-                 * if that fails, ask the server to
-                 * set the delete-on-close flag.
+ * The normal way to delete a file over SMB is open it (with DELETE access),
+ * set the "delete-on-close" flag, and close the file.  The problem for Unix
+ * applications is that they expect the file name to be gone once the unlink
+ * completes, and the SMB server does not actually delete the file until ALL
+ * opens of that file are closed.  We can't assume our open handles are the
+ * only open handles on a file we're deleting, so to be safe we'll try to
+ * rename the file to a temporary name and then set delete-on-close.  If we
+ * fail to set delete-on-close (i.e. because other opens prevent it) then
+ * undo the changes we made and give up with EBUSY.  Note that we might have
+ * permission to delete a file but lack permission to rename, so we want to
+ * continue in cases where rename fails.  As an optimization, only do the
+ * rename when we have the file open.
+ *
+ * This is similar to what NFS does when deleting a file that has local opens,
+ * but thanks to SMB delete-on-close, we don't need to keep track of when the
+ * last local open goes away and send a delete.  The server does that for us.
                  */
+/* ARGSUSED */
+static int
+smbfsremove(vnode_t *dvp, vnode_t *vp, struct smb_cred *scred,
+    int flags)
+{
+        smbnode_t       *dnp = VTOSMB(dvp);
+        smbnode_t       *np = VTOSMB(vp);
+        smbmntinfo_t    *smi = np->n_mount;
+        char            *tmpname = NULL;
+        int             tnlen;
+        int             error;
+        smb_fh_t        *fid = NULL;
+        boolean_t       renamed = B_FALSE;
+
+        /*
+         * The dvp RWlock must be held as writer.
+         */
+        ASSERT(dnp->r_rwlock.owner == curthread);
+
+        /*
+         * We need to flush any dirty pages which happen to
+         * be hanging around before removing the file.  This
+         * shouldn't happen very often and mostly on file
+         * systems mounted "nocto".
+         */
+        if (vn_has_cached_data(vp) &&
+            ((np->r_flags & RDIRTY) || np->r_count > 0)) {
+                error = smbfs_putpage(vp, (offset_t)0, 0, 0,
+                    scred->scr_cred, NULL);
+                if (error && (error == ENOSPC || error == EDQUOT)) {
+                        mutex_enter(&np->r_statelock);
+                        if (!np->r_error)
+                                np->r_error = error;
                 mutex_exit(&np->r_statelock);
-                error = EBUSY;
-        } else {
-                smbfs_attrcache_rm_locked(np);
-                mutex_exit(&np->r_statelock);
+                }
+        }
 
-                error = smbfs_smb_delete(np, &scred, NULL, 0, 0);
+        /*
+         * Get a file handle with delete access.
+         * Close this FID before return.
+         */
+        error = smbfs_smb_tmpopen(np, STD_RIGHT_DELETE_ACCESS,
+            scred, &fid);
+        if (error) {
+                SMBVDEBUG("error %d opening %s\n",
+                    error, np->n_rpath);
+                goto out;
+        }
+        ASSERT(fid != NULL);
 
                 /*
-                 * If the file should no longer exist, discard
-                 * any cached attributes under this node.
+         * If we have the file open, try to rename it to a temporary name.
+         * If we can't rename, continue on and try setting DoC anyway.
+         * Unnecessary for directories.
                  */
-                switch (error) {
-                case 0:
-                case ENOENT:
-                case ENOTDIR:
-                        smbfs_attrcache_prune(np);
-                        break;
+        if (vp->v_type != VDIR && vp->v_count > 1 && np->n_fidrefs > 0) {
+                tmpname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+                tnlen = smbfs_newname(tmpname, MAXNAMELEN);
+                error = smbfs_smb_rename(dnp, np, dnp, tmpname, tnlen,
+                    fid, scred);
+                if (error != 0) {
+                        SMBVDEBUG("error %d renaming %s -> %s\n",
+                            error, np->n_rpath, tmpname);
+                        /* Keep going without the rename. */
+                } else {
+                        renamed = B_TRUE;
                 }
         }
 
-        VN_RELE(vp);
+        /*
+         * Mark the file as delete-on-close.  If we can't,
+         * undo what we did and err out.
+         */
+        error = smbfs_smb_setdisp(smi->smi_share, fid, 1, scred);
+        if (error != 0) {
+                SMBVDEBUG("error %d setting DoC on %s\n",
+                    error, np->n_rpath);
+                /*
+                 * Failed to set DoC. If we renamed, undo that.
+                 * Need np->n_rpath relative to parent (dnp).
+                 * Use parent path name length plus one for
+                 * the separator ('/' or ':')
+                 */
+                if (renamed) {
+                        char *oldname;
+                        int oldnlen;
+                        int err2;
 
+                        oldname = np->n_rpath + (dnp->n_rplen + 1);
+                        oldnlen = np->n_rplen - (dnp->n_rplen + 1);
+                        err2 = smbfs_smb_rename(dnp, np, dnp, oldname, oldnlen,
+                            fid, scred);
+                        SMBVDEBUG("error %d un-renaming %s -> %s\n",
+                            err2, tmpname, np->n_rpath);
+                }
+                error = EBUSY;
+                goto out;
+        }
+        /* Done! */
+        smbfs_attrcache_remove(np);
+        smbfs_attrcache_prune(np);
+
 out:
-        smb_credrele(&scred);
-        smbfs_rw_exit(&dnp->r_rwlock);
+        if (tmpname != NULL)
+                kmem_free(tmpname, MAXNAMELEN);
+        if (fid != NULL)
+                smbfs_smb_tmpclose(np, fid);
 
+        if (error == 0) {
+                /* Keep lookup from finding this node anymore. */
+                smbfs_rmhash(np);
+        }
+
         return (error);
 }
 
 
+/* ARGSUSED */
+static int
+smbfs_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
+        caller_context_t *ct, int flags)
+{
+        /* Not yet... */
+        return (ENOSYS);
+}
+
+
 /*
  * XXX
  * This op should support the new FIGNORECASE flag for case-insensitive
  * lookups, per PSARC 2007/244.
  */

@@ -2155,11 +2964,15 @@
 /* ARGSUSED */
 static int
 smbfs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
         caller_context_t *ct, int flags)
 {
-        /* vnode_t              *realvp; */
+        struct smb_cred scred;
+        smbnode_t       *odnp = VTOSMB(odvp);
+        smbnode_t       *ndnp = VTOSMB(ndvp);
+        vnode_t         *ovp;
+        int error;
 
         if (curproc->p_zone != VTOSMI(odvp)->smi_zone_ref.zref_zone ||
             curproc->p_zone != VTOSMI(ndvp)->smi_zone_ref.zref_zone)
                 return (EPERM);

@@ -2167,34 +2980,10 @@
             VTOSMI(ndvp)->smi_flags & SMI_DEAD ||
             odvp->v_vfsp->vfs_flag & VFS_UNMOUNTED ||
             ndvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
                 return (EIO);
 
-        return (smbfsrename(odvp, onm, ndvp, nnm, cr, ct));
-}
-
-/*
- * smbfsrename does the real work of renaming in SMBFS
- */
-/* ARGSUSED */
-static int
-smbfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
-        caller_context_t *ct)
-{
-        int             error;
-        int             nvp_locked = 0;
-        vnode_t         *nvp = NULL;
-        vnode_t         *ovp = NULL;
-        smbnode_t       *onp;
-        smbnode_t       *nnp;
-        smbnode_t       *odnp;
-        smbnode_t       *ndnp;
-        struct smb_cred scred;
-        /* enum smbfsstat       status; */
-
-        ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
-
         if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
             strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0)
                 return (EINVAL);
 
         /*

@@ -2203,14 +2992,26 @@
          * fill those in correctly, check here too.
          */
         if (odvp->v_vfsp != ndvp->v_vfsp)
                 return (EXDEV);
 
-        odnp = VTOSMB(odvp);
-        ndnp = VTOSMB(ndvp);
+        /*
+         * Need write access on source and target.
+         * Server takes care of most checks.
+         */
+        error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
+        if (error)
+                return (error);
+        if (odvp != ndvp) {
+                error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
+                if (error)
+                        return (error);
+        }
 
         /*
+         * Need to lock both old/new dirs as writer.
+         *
          * Avoid deadlock here on old vs new directory nodes
          * by always taking the locks in order of address.
          * The order is arbitrary, but must be consistent.
          */
         if (odnp < ndnp) {

@@ -2231,40 +3032,57 @@
                         smbfs_rw_exit(&ndnp->r_rwlock);
                         return (EINTR);
                 }
         }
         smb_credinit(&scred, cr);
-        /*
-         * No returns after this point (goto out)
-         */
 
+        /* Lookup the "old" name */
+        error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
+        if (error == 0) {
         /*
-         * Need write access on source and target.
-         * Server takes care of most checks.
+                 * Do the real rename work
          */
-        error = smbfs_access(odvp, VWRITE|VEXEC, 0, cr, ct);
-        if (error)
-                goto out;
-        if (odvp != ndvp) {
-                error = smbfs_access(ndvp, VWRITE, 0, cr, ct);
-                if (error)
-                        goto out;
+                error = smbfsrename(odvp, ovp, ndvp, nnm, &scred, flags);
+                VN_RELE(ovp);
         }
 
-        /*
-         * Lookup the source name.  Must already exist.
+        smb_credrele(&scred);
+        smbfs_rw_exit(&odnp->r_rwlock);
+        smbfs_rw_exit(&ndnp->r_rwlock);
+
+        return (error);
+}
+
+/*
+ * smbfsrename does the real work of renaming in SMBFS
+ * Caller has done dir access checks etc.
          */
-        error = smbfslookup(odvp, onm, &ovp, cr, 0, ct);
-        if (error)
-                goto out;
+/* ARGSUSED */
+static int
+smbfsrename(vnode_t *odvp, vnode_t *ovp, vnode_t *ndvp, char *nnm,
+    struct smb_cred *scred, int flags)
+{
+        smbnode_t       *odnp = VTOSMB(odvp);
+        smbnode_t       *onp = VTOSMB(ovp);
+        smbnode_t       *ndnp = VTOSMB(ndvp);
+        vnode_t         *nvp = NULL;
+        int             error;
+        int             nvp_locked = 0;
+        smb_fh_t        *fid = NULL;
 
+        /* Things our caller should have checked. */
+        ASSERT(curproc->p_zone == VTOSMI(odvp)->smi_zone_ref.zref_zone);
+        ASSERT(odvp->v_vfsp == ndvp->v_vfsp);
+        ASSERT(odnp->r_rwlock.owner == curthread);
+        ASSERT(ndnp->r_rwlock.owner == curthread);
+
         /*
          * Lookup the target file.  If it exists, it needs to be
          * checked to see whether it is a mount point and whether
          * it is active (open).
          */
-        error = smbfslookup(ndvp, nnm, &nvp, cr, 0, ct);
+        error = smbfslookup(ndvp, nnm, &nvp, scred->scr_cred, 0, NULL);
         if (!error) {
                 /*
                  * Target (nvp) already exists.  Check that it
                  * has the same type as the source.  The server
                  * will check this also, (and more reliably) but

@@ -2305,97 +3123,67 @@
                         error = EBUSY;
                         goto out;
                 }
 
                 /*
-                 * CIFS gives a SHARING_VIOLATION error when
+                 * CIFS may give a SHARING_VIOLATION error when
                  * trying to rename onto an exising object,
                  * so try to remove the target first.
                  * (Only for files, not directories.)
                  */
                 if (nvp->v_type == VDIR) {
                         error = EEXIST;
                         goto out;
                 }
-
-                /*
-                 * Nodes that are "not active" here have v_count=2
-                 * because vn_renameat (our caller) did a lookup on
-                 * both the source and target before this call.
-                 * Otherwise this similar to smbfs_remove.
-                 */
-                nnp = VTOSMB(nvp);
-                mutex_enter(&nnp->r_statelock);
-                if ((nvp->v_count > 2) && (nnp->n_fidrefs > 0)) {
-                        /*
-                         * The target file exists, is not the same as
-                         * the source file, and is active.  Other FS
-                         * implementations unlink the target here.
-                         * For SMB, we don't assume we can remove an
-                         * open file.  Return an error instead.
-                         */
-                        mutex_exit(&nnp->r_statelock);
-                        error = EBUSY;
+                error = smbfsremove(ndvp, nvp, scred, flags);
+                if (error != 0)
                         goto out;
-                }
 
                 /*
-                 * Target file is not active. Try to remove it.
-                 */
-                smbfs_attrcache_rm_locked(nnp);
-                mutex_exit(&nnp->r_statelock);
-
-                error = smbfs_smb_delete(nnp, &scred, NULL, 0, 0);
-
-                /*
-                 * Similar to smbfs_remove
-                 */
-                switch (error) {
-                case 0:
-                case ENOENT:
-                case ENOTDIR:
-                        smbfs_attrcache_prune(nnp);
-                        break;
-                }
-
-                if (error)
-                        goto out;
-                /*
                  * OK, removed the target file.  Continue as if
                  * lookup target had failed (nvp == NULL).
                  */
                 vn_vfsunlock(nvp);
                 nvp_locked = 0;
                 VN_RELE(nvp);
                 nvp = NULL;
         } /* nvp */
 
-        onp = VTOSMB(ovp);
+        /*
+         * Get a file handle with delete access.
+         * Close this FID before return.
+         */
+        error = smbfs_smb_tmpopen(onp, STD_RIGHT_DELETE_ACCESS,
+            scred, &fid);
+        if (error) {
+                SMBVDEBUG("error %d opening %s\n",
+                    error, onp->n_rpath);
+                goto out;
+        }
+
         smbfs_attrcache_remove(onp);
+        error = smbfs_smb_rename(odnp, onp, ndnp, nnm, strlen(nnm),
+            fid, scred);
 
-        error = smbfs_smb_rename(onp, ndnp, nnm, strlen(nnm), &scred);
+        smbfs_smb_tmpclose(onp, fid);
 
         /*
          * If the old name should no longer exist,
          * discard any cached attributes under it.
          */
-        if (error == 0)
+        if (error == 0) {
                 smbfs_attrcache_prune(onp);
+                /* SMBFS_VNEVENT... */
+        }
 
 out:
         if (nvp) {
                 if (nvp_locked)
                         vn_vfsunlock(nvp);
                 VN_RELE(nvp);
         }
-        if (ovp)
-                VN_RELE(ovp);
 
-        smb_credrele(&scred);
-        smbfs_rw_exit(&odnp->r_rwlock);
-        smbfs_rw_exit(&ndnp->r_rwlock);
-
         return (error);
 }
 
 /*
  * XXX

@@ -2415,11 +3203,11 @@
         struct smbmntinfo *smi = VTOSMI(dvp);
         struct smb_cred scred;
         struct smbfattr fattr;
         const char              *name = (const char *) nm;
         int             nmlen = strlen(name);
-        int             error, hiderr;
+        int             error;
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EPERM);
 
         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)

@@ -2436,15 +3224,10 @@
         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
         /*
-         * XXX: Do we need r_lkserlock too?
-         * No use of any shared fid or fctx...
-         */
-
-        /*
          * Require write access in the containing directory.
          */
         error = smbfs_access(dvp, VWRITE, 0, cr, ct);
         if (error)
                 goto out;

@@ -2461,14 +3244,10 @@
 
         error = smbfs_nget(dvp, name, nmlen, &fattr, &vp);
         if (error)
                 goto out;
 
-        if (name[0] == '.')
-                if ((hiderr = smbfs_smb_hideit(VTOSMB(vp), NULL, 0, &scred)))
-                        SMBVDEBUG("hide failure %d\n", hiderr);
-
         /* Success! */
         *vpp = vp;
         error = 0;
 out:
         smb_credrele(&scred);

@@ -2488,37 +3267,36 @@
 /* ARGSUSED */
 static int
 smbfs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
         caller_context_t *ct, int flags)
 {
+        struct smb_cred scred;
         vnode_t         *vp = NULL;
         int             vp_locked = 0;
         struct smbmntinfo *smi = VTOSMI(dvp);
         struct smbnode  *dnp = VTOSMB(dvp);
         struct smbnode  *np;
-        struct smb_cred scred;
         int             error;
 
         if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
                 return (EPERM);
 
         if (smi->smi_flags & SMI_DEAD || dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
                 return (EIO);
 
+        /*
+         * Verify access to the dirctory.
+         */
+        error = smbfs_access(dvp, VWRITE|VEXEC, 0, cr, ct);
+        if (error)
+                return (error);
+
         if (smbfs_rw_enter_sig(&dnp->r_rwlock, RW_WRITER, SMBINTR(dvp)))
                 return (EINTR);
         smb_credinit(&scred, cr);
 
         /*
-         * Require w/x access in the containing directory.
-         * Server handles all other access checks.
-         */
-        error = smbfs_access(dvp, VEXEC|VWRITE, 0, cr, ct);
-        if (error)
-                goto out;
-
-        /*
          * First lookup the entry to be removed.
          */
         error = smbfslookup(dvp, nm, &vp, cr, 0, ct);
         if (error)
                 goto out;

@@ -2545,27 +3323,21 @@
         if (vn_mountedvfs(vp) != NULL) {
                 error = EBUSY;
                 goto out;
         }
 
-        smbfs_attrcache_remove(np);
-        error = smbfs_smb_rmdir(np, &scred);
-
         /*
-         * Similar to smbfs_remove
+         * Do the real rmdir work
          */
-        switch (error) {
-        case 0:
-        case ENOENT:
-        case ENOTDIR:
-                smbfs_attrcache_prune(np);
-                break;
-        }
-
+        error = smbfsremove(dvp, vp, &scred, flags);
         if (error)
                 goto out;
 
+#ifdef  SMBFS_VNEVENT
+        vnevent_rmdir(vp, dvp, nm, ct);
+#endif
+
         mutex_enter(&np->r_statelock);
         dnp->n_flag |= NMODIFIED;
         mutex_exit(&np->r_statelock);
         smbfs_attr_touchdir(dnp);
         smbfs_rmhash(np);

@@ -2583,10 +3355,20 @@
 }
 
 
 /* ARGSUSED */
 static int
+smbfs_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr,
+        caller_context_t *ct, int flags)
+{
+        /* Not yet... */
+        return (ENOSYS);
+}
+
+
+/* ARGSUSED */
+static int
 smbfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
         caller_context_t *ct, int flags)
 {
         struct smbnode  *np = VTOSMB(vp);
         int             error = 0;

@@ -2608,12 +3390,11 @@
                 return (error);
 
         ASSERT(smbfs_rw_lock_held(&np->r_rwlock, RW_READER));
 
         /*
-         * XXX: Todo readdir cache here
-         * Note: NFS code is just below this.
+         * Todo readdir cache here
          *
          * I am serializing the entire readdir opreation
          * now since we have not yet implemented readdir
          * cache. This fix needs to be revisited once
          * we implement readdir cache.

@@ -2843,11 +3624,23 @@
         kmem_free(dp, dbufsiz);
         smb_credrele(&scred);
         return (error);
 }
 
+/*
+ * Here NFS has: nfs3_bio
+ * See smbfs_bio above.
+ */
 
+/* ARGSUSED */
+static int
+smbfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
+{
+        return (ENOSYS);
+}
+
+
 /*
  * The pair of functions VOP_RWLOCK, VOP_RWUNLOCK
  * are optional functions that are called by:
  *    getdents, before/after VOP_READDIR
  *    pread, before/after ... VOP_READ

@@ -2917,12 +3710,1003 @@
                 return (EINVAL);
 
         return (0);
 }
 
+/* mmap support ******************************************************** */
 
+#ifdef  _KERNEL
+
+#ifdef DEBUG
+static int smbfs_lostpage = 0;  /* number of times we lost original page */
+#endif
+
 /*
+ * Return all the pages from [off..off+len) in file
+ * Like nfs3_getpage
+ */
+/* ARGSUSED */
+static int
+smbfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
+        page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
+        enum seg_rw rw, cred_t *cr, caller_context_t *ct)
+{
+        smbnode_t       *np;
+        smbmntinfo_t    *smi;
+        int             error;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        if (vp->v_flag & VNOMAP)
+                return (ENOSYS);
+
+        if (protp != NULL)
+                *protp = PROT_ALL;
+
+        /*
+         * Now valididate that the caches are up to date.
+         */
+        error = smbfs_validate_caches(vp, cr);
+        if (error)
+                return (error);
+
+retry:
+        mutex_enter(&np->r_statelock);
+
+        /*
+         * Don't create dirty pages faster than they
+         * can be cleaned ... (etc. see nfs)
+         *
+         * Here NFS also tests:
+         *  (mi->mi_max_threads != 0 &&
+         *  rp->r_awcount > 2 * mi->mi_max_threads)
+         */
+        if (rw == S_CREATE) {
+                while (np->r_gcount > 0)
+                        cv_wait(&np->r_cv, &np->r_statelock);
+        }
+
+        /*
+         * If we are getting called as a side effect of a write
+         * operation the local file size might not be extended yet.
+         * In this case we want to be able to return pages of zeroes.
+         */
+        if (off + len > np->r_size + PAGEOFFSET && seg != segkmap) {
+                mutex_exit(&np->r_statelock);
+                return (EFAULT);                /* beyond EOF */
+        }
+
+        mutex_exit(&np->r_statelock);
+
+        error = pvn_getpages(smbfs_getapage, vp, off, len, protp,
+            pl, plsz, seg, addr, rw, cr);
+
+        switch (error) {
+        case SMBFS_EOF:
+                smbfs_purge_caches(vp, cr);
+                goto retry;
+        case ESTALE:
+                /*
+                 * Here NFS has: PURGE_STALE_FH(error, vp, cr);
+                 * In-line here as we only use it once.
+                 */
+                mutex_enter(&np->r_statelock);
+                np->r_flags |= RSTALE;
+                if (!np->r_error)
+                        np->r_error = (error);
+                mutex_exit(&np->r_statelock);
+                if (vn_has_cached_data(vp))
+                        smbfs_invalidate_pages(vp, (u_offset_t)0, cr);
+                smbfs_purge_caches(vp, cr);
+                break;
+        default:
+                break;
+        }
+
+        return (error);
+}
+
+/*
+ * Called from pvn_getpages to get a particular page.
+ * Like nfs3_getapage
+ */
+/* ARGSUSED */
+static int
+smbfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
+        page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
+        enum seg_rw rw, cred_t *cr)
+{
+        smbnode_t       *np;
+        smbmntinfo_t   *smi;
+
+        uint_t          bsize;
+        struct buf      *bp;
+        page_t          *pp;
+        u_offset_t      lbn;
+        u_offset_t      io_off;
+        u_offset_t      blkoff;
+        size_t          io_len;
+        uint_t blksize;
+        int error;
+        /* int readahead; */
+        int readahead_issued = 0;
+        /* int ra_window; * readahead window */
+        page_t *pagefound;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
+
+reread:
+        bp = NULL;
+        pp = NULL;
+        pagefound = NULL;
+
+        if (pl != NULL)
+                pl[0] = NULL;
+
+        error = 0;
+        lbn = off / bsize;
+        blkoff = lbn * bsize;
+
+        /*
+         * NFS queues up readahead work here.
+         */
+
+again:
+        if ((pagefound = page_exists(vp, off)) == NULL) {
+                if (pl == NULL) {
+                        (void) 0; /* Todo: smbfs_async_readahead(); */
+                } else if (rw == S_CREATE) {
+                        /*
+                         * Block for this page is not allocated, or the offset
+                         * is beyond the current allocation size, or we're
+                         * allocating a swap slot and the page was not found,
+                         * so allocate it and return a zero page.
+                         */
+                        if ((pp = page_create_va(vp, off,
+                            PAGESIZE, PG_WAIT, seg, addr)) == NULL)
+                                cmn_err(CE_PANIC, "smbfs_getapage: page_create");
+                        io_len = PAGESIZE;
+                        mutex_enter(&np->r_statelock);
+                        np->r_nextr = off + PAGESIZE;
+                        mutex_exit(&np->r_statelock);
+                } else {
+                        /*
+                         * Need to go to server to get a BLOCK, exception to
+                         * that being while reading at offset = 0 or doing
+                         * random i/o, in that case read only a PAGE.
+                         */
+                        mutex_enter(&np->r_statelock);
+                        if (blkoff < np->r_size &&
+                            blkoff + bsize >= np->r_size) {
+                                /*
+                                 * If only a block or less is left in
+                                 * the file, read all that is remaining.
+                                 */
+                                if (np->r_size <= off) {
+                                        /*
+                                         * Trying to access beyond EOF,
+                                         * set up to get at least one page.
+                                         */
+                                        blksize = off + PAGESIZE - blkoff;
+                                } else
+                                        blksize = np->r_size - blkoff;
+                        } else if ((off == 0) ||
+                            (off != np->r_nextr && !readahead_issued)) {
+                                blksize = PAGESIZE;
+                                blkoff = off; /* block = page here */
+                        } else
+                                blksize = bsize;
+                        mutex_exit(&np->r_statelock);
+
+                        pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
+                            &io_len, blkoff, blksize, 0);
+
+                        /*
+                         * Some other thread has entered the page,
+                         * so just use it.
+                         */
+                        if (pp == NULL)
+                                goto again;
+
+                        /*
+                         * Now round the request size up to page boundaries.
+                         * This ensures that the entire page will be
+                         * initialized to zeroes if EOF is encountered.
+                         */
+                        io_len = ptob(btopr(io_len));
+
+                        bp = pageio_setup(pp, io_len, vp, B_READ);
+                        ASSERT(bp != NULL);
+
+                        /*
+                         * pageio_setup should have set b_addr to 0.  This
+                         * is correct since we want to do I/O on a page
+                         * boundary.  bp_mapin will use this addr to calculate
+                         * an offset, and then set b_addr to the kernel virtual
+                         * address it allocated for us.
+                         */
+                        ASSERT(bp->b_un.b_addr == 0);
+
+                        bp->b_edev = 0;
+                        bp->b_dev = 0;
+                        bp->b_lblkno = lbtodb(io_off);
+                        bp->b_file = vp;
+                        bp->b_offset = (offset_t)off;
+                        bp_mapin(bp);
+
+                        /*
+                         * If doing a write beyond what we believe is EOF,
+                         * don't bother trying to read the pages from the
+                         * server, we'll just zero the pages here.  We
+                         * don't check that the rw flag is S_WRITE here
+                         * because some implementations may attempt a
+                         * read access to the buffer before copying data.
+                         */
+                        mutex_enter(&np->r_statelock);
+                        if (io_off >= np->r_size && seg == segkmap) {
+                                mutex_exit(&np->r_statelock);
+                                bzero(bp->b_un.b_addr, io_len);
+                        } else {
+                                mutex_exit(&np->r_statelock);
+                                error = smbfs_bio(bp, 0, cr);
+                        }
+
+                        /*
+                         * Unmap the buffer before freeing it.
+                         */
+                        bp_mapout(bp);
+                        pageio_done(bp);
+
+                        /* Here NFS3 updates all pp->p_fsdata */
+
+                        if (error == SMBFS_EOF) {
+                                /*
+                                 * If doing a write system call just return
+                                 * zeroed pages, else user tried to get pages
+                                 * beyond EOF, return error.  We don't check
+                                 * that the rw flag is S_WRITE here because
+                                 * some implementations may attempt a read
+                                 * access to the buffer before copying data.
+                                 */
+                                if (seg == segkmap)
+                                        error = 0;
+                                else
+                                        error = EFAULT;
+                        }
+
+                        if (!readahead_issued && !error) {
+                                mutex_enter(&np->r_statelock);
+                                np->r_nextr = io_off + io_len;
+                                mutex_exit(&np->r_statelock);
+                        }
+                }
+        }
+
+        if (pl == NULL)
+                return (error);
+
+        if (error) {
+                if (pp != NULL)
+                        pvn_read_done(pp, B_ERROR);
+                return (error);
+        }
+
+        if (pagefound) {
+                se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
+
+                /*
+                 * Page exists in the cache, acquire the appropriate lock.
+                 * If this fails, start all over again.
+                 */
+                if ((pp = page_lookup(vp, off, se)) == NULL) {
+#ifdef DEBUG
+                        smbfs_lostpage++;
+#endif
+                        goto reread;
+                }
+                pl[0] = pp;
+                pl[1] = NULL;
+                return (0);
+        }
+
+        if (pp != NULL)
+                pvn_plist_init(pp, pl, plsz, off, io_len, rw);
+
+        return (error);
+}
+
+/*
+ * Here NFS has: nfs3_readahead
+ * No read-ahead in smbfs yet.
+ */
+
+#endif  // _KERNEL
+
+/*
+ * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
+ * If len == 0, do from off to EOF.
+ *
+ * The normal cases should be len == 0 && off == 0 (entire vp list),
+ * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
+ * (from pageout).
+ *
+ * Like nfs3_putpage + nfs_putpages
+ */
+/* ARGSUSED */
+static int
+smbfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
+        caller_context_t *ct)
+{
+#ifdef  _KERNEL
+        smbnode_t *np;
+        smbmntinfo_t *smi;
+        page_t *pp;
+        u_offset_t eoff;
+        u_offset_t io_off;
+        size_t io_len;
+        int error;
+        int rdirty;
+        int err;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        if (vp->v_flag & VNOMAP)
+                return (ENOSYS);
+
+        /* Here NFS does rp->r_count (++/--) stuff. */
+
+        /* Beginning of code from nfs_putpages. */
+
+        if (!vn_has_cached_data(vp))
+                return (0);
+
+        /*
+         * If ROUTOFSPACE is set, then all writes turn into B_INVAL
+         * writes.  B_FORCE is set to force the VM system to actually
+         * invalidate the pages, even if the i/o failed.  The pages
+         * need to get invalidated because they can't be written out
+         * because there isn't any space left on either the server's
+         * file system or in the user's disk quota.  The B_FREE bit
+         * is cleared to avoid confusion as to whether this is a
+         * request to place the page on the freelist or to destroy
+         * it.
+         */
+        if ((np->r_flags & ROUTOFSPACE) ||
+            (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
+                flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
+
+        if (len == 0) {
+                /*
+                 * If doing a full file synchronous operation, then clear
+                 * the RDIRTY bit.  If a page gets dirtied while the flush
+                 * is happening, then RDIRTY will get set again.  The
+                 * RDIRTY bit must get cleared before the flush so that
+                 * we don't lose this information.
+                 *
+                 * NFS has B_ASYNC vs sync stuff here.
+                 */
+                if (off == (u_offset_t)0 &&
+                    (np->r_flags & RDIRTY)) {
+                        mutex_enter(&np->r_statelock);
+                        rdirty = (np->r_flags & RDIRTY);
+                        np->r_flags &= ~RDIRTY;
+                        mutex_exit(&np->r_statelock);
+                } else
+                        rdirty = 0;
+
+                /*
+                 * Search the entire vp list for pages >= off, and flush
+                 * the dirty pages.
+                 */
+                error = pvn_vplist_dirty(vp, off, smbfs_putapage,
+                    flags, cr);
+
+                /*
+                 * If an error occurred and the file was marked as dirty
+                 * before and we aren't forcibly invalidating pages, then
+                 * reset the RDIRTY flag.
+                 */
+                if (error && rdirty &&
+                    (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_flags |= RDIRTY;
+                        mutex_exit(&np->r_statelock);
+                }
+        } else {
+                /*
+                 * Do a range from [off...off + len) looking for pages
+                 * to deal with.
+                 */
+                error = 0;
+                io_len = 1; /* quiet warnings */
+                eoff = off + len;
+
+                for (io_off = off; io_off < eoff; io_off += io_len) {
+                        mutex_enter(&np->r_statelock);
+                        if (io_off >= np->r_size) {
+                                mutex_exit(&np->r_statelock);
+                                break;
+                        }
+                        mutex_exit(&np->r_statelock);
+                        /*
+                         * If we are not invalidating, synchronously
+                         * freeing or writing pages use the routine
+                         * page_lookup_nowait() to prevent reclaiming
+                         * them from the free list.
+                         */
+                        if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
+                                pp = page_lookup(vp, io_off,
+                                    (flags & (B_INVAL | B_FREE)) ?
+                                    SE_EXCL : SE_SHARED);
+                        } else {
+                                pp = page_lookup_nowait(vp, io_off,
+                                    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
+                        }
+
+                        if (pp == NULL || !pvn_getdirty(pp, flags))
+                                io_len = PAGESIZE;
+                        else {
+                                err = smbfs_putapage(vp, pp, &io_off,
+                                    &io_len, flags, cr);
+                                if (!error)
+                                        error = err;
+                                /*
+                                 * "io_off" and "io_len" are returned as
+                                 * the range of pages we actually wrote.
+                                 * This allows us to skip ahead more quickly
+                                 * since several pages may've been dealt
+                                 * with by this iteration of the loop.
+                                 */
+                        }
+                }
+        }
+
+        return (error);
+
+#else   // _KERNEL
+        return (ENOSYS);
+#endif  // _KERNEL
+}
+
+#ifdef  _KERNEL
+
+/*
+ * Write out a single page, possibly klustering adjacent dirty pages.
+ *
+ * Like nfs3_putapage / nfs3_sync_putapage
+ */
+static int
+smbfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
+        int flags, cred_t *cr)
+{
+        smbnode_t *np;
+        u_offset_t io_off;
+        u_offset_t lbn_off;
+        u_offset_t lbn;
+        size_t io_len;
+        uint_t bsize;
+        int error;
+
+        np = VTOSMB(vp);
+
+        ASSERT(!vn_is_readonly(vp));
+
+        bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
+        lbn = pp->p_offset / bsize;
+        lbn_off = lbn * bsize;
+
+        /*
+         * Find a kluster that fits in one block, or in
+         * one page if pages are bigger than blocks.  If
+         * there is less file space allocated than a whole
+         * page, we'll shorten the i/o request below.
+         */
+        pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off,
+            roundup(bsize, PAGESIZE), flags);
+
+        /*
+         * pvn_write_kluster shouldn't have returned a page with offset
+         * behind the original page we were given.  Verify that.
+         */
+        ASSERT((pp->p_offset / bsize) >= lbn);
+
+        /*
+         * Now pp will have the list of kept dirty pages marked for
+         * write back.  It will also handle invalidation and freeing
+         * of pages that are not dirty.  Check for page length rounding
+         * problems.
+         */
+        if (io_off + io_len > lbn_off + bsize) {
+                ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE);
+                io_len = lbn_off + bsize - io_off;
+        }
+        /*
+         * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
+         * consistent value of r_size. RMODINPROGRESS is set in writerp().
+         * When RMODINPROGRESS is set it indicates that a uiomove() is in
+         * progress and the r_size has not been made consistent with the
+         * new size of the file. When the uiomove() completes the r_size is
+         * updated and the RMODINPROGRESS flag is cleared.
+         *
+         * The RMODINPROGRESS flag makes sure that smbfs_bio() sees a
+         * consistent value of r_size. Without this handshaking, it is
+         * possible that smbfs_bio() picks  up the old value of r_size
+         * before the uiomove() in writerp() completes. This will result
+         * in the write through smbfs_bio() being dropped.
+         *
+         * More precisely, there is a window between the time the uiomove()
+         * completes and the time the r_size is updated. If a VOP_PUTPAGE()
+         * operation intervenes in this window, the page will be picked up,
+         * because it is dirty (it will be unlocked, unless it was
+         * pagecreate'd). When the page is picked up as dirty, the dirty
+         * bit is reset (pvn_getdirty()). In smbfs_write(), r_size is
+         * checked. This will still be the old size. Therefore the page will
+         * not be written out. When segmap_release() calls VOP_PUTPAGE(),
+         * the page will be found to be clean and the write will be dropped.
+         */
+        if (np->r_flags & RMODINPROGRESS) {
+                mutex_enter(&np->r_statelock);
+                if ((np->r_flags & RMODINPROGRESS) &&
+                    np->r_modaddr + MAXBSIZE > io_off &&
+                    np->r_modaddr < io_off + io_len) {
+                        page_t *plist;
+                        /*
+                         * A write is in progress for this region of the file.
+                         * If we did not detect RMODINPROGRESS here then this
+                         * path through smbfs_putapage() would eventually go to
+                         * smbfs_bio() and may not write out all of the data
+                         * in the pages. We end up losing data. So we decide
+                         * to set the modified bit on each page in the page
+                         * list and mark the rnode with RDIRTY. This write
+                         * will be restarted at some later time.
+                         */
+                        plist = pp;
+                        while (plist != NULL) {
+                                pp = plist;
+                                page_sub(&plist, pp);
+                                hat_setmod(pp);
+                                page_io_unlock(pp);
+                                page_unlock(pp);
+                        }
+                        np->r_flags |= RDIRTY;
+                        mutex_exit(&np->r_statelock);
+                        if (offp)
+                                *offp = io_off;
+                        if (lenp)
+                                *lenp = io_len;
+                        return (0);
+                }
+                mutex_exit(&np->r_statelock);
+        }
+
+        /*
+         * NFS handles (flags & B_ASYNC) here...
+         * (See nfs_async_putapage())
+         *
+         * This code section from: nfs3_sync_putapage()
+         */
+
+        flags |= B_WRITE;
+
+        error = smbfs_rdwrlbn(vp, pp, io_off, io_len, flags, cr);
+
+        if ((error == ENOSPC || error == EDQUOT || error == EFBIG ||
+            error == EACCES) &&
+            (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) {
+                if (!(np->r_flags & ROUTOFSPACE)) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_flags |= ROUTOFSPACE;
+                        mutex_exit(&np->r_statelock);
+                }
+                flags |= B_ERROR;
+                pvn_write_done(pp, flags);
+                /*
+                 * If this was not an async thread, then try again to
+                 * write out the pages, but this time, also destroy
+                 * them whether or not the write is successful.  This
+                 * will prevent memory from filling up with these
+                 * pages and destroying them is the only alternative
+                 * if they can't be written out.
+                 *
+                 * Don't do this if this is an async thread because
+                 * when the pages are unlocked in pvn_write_done,
+                 * some other thread could have come along, locked
+                 * them, and queued for an async thread.  It would be
+                 * possible for all of the async threads to be tied
+                 * up waiting to lock the pages again and they would
+                 * all already be locked and waiting for an async
+                 * thread to handle them.  Deadlock.
+                 */
+                if (!(flags & B_ASYNC)) {
+                        error = smbfs_putpage(vp, io_off, io_len,
+                            B_INVAL | B_FORCE, cr, NULL);
+                }
+        } else {
+                if (error)
+                        flags |= B_ERROR;
+                else if (np->r_flags & ROUTOFSPACE) {
+                        mutex_enter(&np->r_statelock);
+                        np->r_flags &= ~ROUTOFSPACE;
+                        mutex_exit(&np->r_statelock);
+                }
+                pvn_write_done(pp, flags);
+        }
+
+        /* Now more code from: nfs3_putapage */
+
+        if (offp)
+                *offp = io_off;
+        if (lenp)
+                *lenp = io_len;
+
+        return (error);
+}
+
+#endif  // _KERNEL
+
+
+/*
+ * NFS has this in nfs_client.c (shared by v2,v3,...)
+ * We have it here so smbfs_putapage can be file scope.
+ */
+void
+smbfs_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr)
+{
+        smbnode_t *np;
+
+        np = VTOSMB(vp);
+
+        mutex_enter(&np->r_statelock);
+        while (np->r_flags & RTRUNCATE)
+                cv_wait(&np->r_cv, &np->r_statelock);
+        np->r_flags |= RTRUNCATE;
+
+        if (off == (u_offset_t)0) {
+                np->r_flags &= ~RDIRTY;
+                if (!(np->r_flags & RSTALE))
+                        np->r_error = 0;
+        }
+        /* Here NFSv3 has np->r_truncaddr = off; */
+        mutex_exit(&np->r_statelock);
+
+#ifdef  _KERNEL
+        (void) pvn_vplist_dirty(vp, off, smbfs_putapage,
+            B_INVAL | B_TRUNC, cr);
+#endif  // _KERNEL
+
+        mutex_enter(&np->r_statelock);
+        np->r_flags &= ~RTRUNCATE;
+        cv_broadcast(&np->r_cv);
+        mutex_exit(&np->r_statelock);
+}
+
+#ifdef  _KERNEL
+
+/* Like nfs3_map */
+
+/* ARGSUSED */
+static int
+smbfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+        size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
+        cred_t *cr, caller_context_t *ct)
+{
+        segvn_crargs_t  vn_a;
+        struct vattr    va;
+        smbnode_t       *np;
+        smbmntinfo_t    *smi;
+        int             error;
+
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
+                return (EIO);
+
+        if (smi->smi_flags & SMI_DEAD || vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+                return (EIO);
+
+        if (vp->v_flag & VNOMAP)
+                return (ENOSYS);
+
+        if (off < 0 || off + (ssize_t)len < 0)
+                return (ENXIO);
+
+        if (vp->v_type != VREG)
+                return (ENODEV);
+
+        /*
+         * NFS does close-to-open consistency stuff here.
+         * Just get (possibly cached) attributes.
+         */
+        va.va_mask = AT_ALL;
+        if ((error = smbfsgetattr(vp, &va, cr)) != 0)
+                return (error);
+
+        /*
+         * Check to see if the vnode is currently marked as not cachable.
+         * This means portions of the file are locked (through VOP_FRLOCK).
+         * In this case the map request must be refused.  We use
+         * rp->r_lkserlock to avoid a race with concurrent lock requests.
+         */
+        /*
+         * Atomically increment r_inmap after acquiring r_rwlock. The
+         * idea here is to acquire r_rwlock to block read/write and
+         * not to protect r_inmap. r_inmap will inform smbfs_read/write()
+         * that we are in smbfs_map(). Now, r_rwlock is acquired in order
+         * and we can prevent the deadlock that would have occurred
+         * when smbfs_addmap() would have acquired it out of order.
+         *
+         * Since we are not protecting r_inmap by any lock, we do not
+         * hold any lock when we decrement it. We atomically decrement
+         * r_inmap after we release r_lkserlock.  Note that rwlock is
+         * re-entered as writer in smbfs_addmap (called via as_map).
+         */
+
+        if (smbfs_rw_enter_sig(&np->r_rwlock, RW_WRITER, SMBINTR(vp)))
+                return (EINTR);
+        atomic_inc_uint(&np->r_inmap);
+        smbfs_rw_exit(&np->r_rwlock);
+
+        if (smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, SMBINTR(vp))) {
+                atomic_dec_uint(&np->r_inmap);
+                return (EINTR);
+        }
+
+        if (vp->v_flag & VNOCACHE) {
+                error = EAGAIN;
+                goto done;
+        }
+
+        /*
+         * Don't allow concurrent locks and mapping if mandatory locking is
+         * enabled.
+         */
+        if ((flk_has_remote_locks(vp) || smbfs_lm_has_sleep(vp)) &&
+            MANDLOCK(vp, va.va_mode)) {
+                error = EAGAIN;
+                goto done;
+        }
+
+        as_rangelock(as);
+        error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
+        if (error != 0) {
+                as_rangeunlock(as);
+                goto done;
+        }
+
+        vn_a.vp = vp;
+        vn_a.offset = off;
+        vn_a.type = (flags & MAP_TYPE);
+        vn_a.prot = (uchar_t)prot;
+        vn_a.maxprot = (uchar_t)maxprot;
+        vn_a.flags = (flags & ~MAP_TYPE);
+        vn_a.cred = cr;
+        vn_a.amp = NULL;
+        vn_a.szc = 0;
+        vn_a.lgrp_mem_policy_flags = 0;
+
+        error = as_map(as, *addrp, len, segvn_create, &vn_a);
+        as_rangeunlock(as);
+
+done:
+        smbfs_rw_exit(&np->r_lkserlock);
+        atomic_dec_uint(&np->r_inmap);
+        return (error);
+}
+
+/*
+ * This uses addmap/delmap functions to hold the SMB FID open as long as
+ * there are pages mapped in this as/seg.  Increment the FID refs. when
+ * the maping count goes from zero to non-zero, and release the FID ref
+ * when the maping count goes from non-zero to zero.
+ */
+
+/* ARGSUSED */
+static int
+smbfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+        size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
+        cred_t *cr, caller_context_t *ct)
+{
+        smbnode_t *np = VTOSMB(vp);
+        boolean_t inc_fidrefs = B_FALSE;
+
+        /*
+         * When r_mapcnt goes from zero to non-zero,
+         * increment n_fidrefs
+         */
+        mutex_enter(&np->r_statelock);
+        if (np->r_mapcnt == 0)
+                inc_fidrefs = B_TRUE;
+        np->r_mapcnt += btopr(len);
+        mutex_exit(&np->r_statelock);
+
+        if (inc_fidrefs) {
+                (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
+                np->n_fidrefs++;
+                smbfs_rw_exit(&np->r_lkserlock);
+        }
+
+        return (0);
+}
+
+/*
+ * Args passed to smbfs_delmap_async
+ */
+typedef struct smbfs_delmap_args {
+        taskq_ent_t             dm_tqent;
+        cred_t                  *dm_cr;
+        vnode_t                 *dm_vp;
+        offset_t                dm_off;
+        caddr_t                 dm_addr;
+        size_t                  dm_len;
+        uint_t                  dm_prot;
+        uint_t                  dm_maxprot;
+        uint_t                  dm_flags;
+        boolean_t               dm_rele_fid;
+} smbfs_delmap_args_t;
+
+/*
+ * Using delmap not only to release the SMB FID (as described above)
+ * but to flush dirty pages as needed.  Both of those do the actual
+ * work in an async taskq job to avoid interfering with locks held
+ * in the VM layer when this is called.
+ */
+
+/* ARGSUSED */
+static int
+smbfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+        size_t len, uint_t prot, uint_t maxprot, uint_t flags,
+        cred_t *cr, caller_context_t *ct)
+{
+        smbnode_t               *np = VTOSMB(vp);
+        smbmntinfo_t            *smi = VTOSMI(vp);
+        smbfs_delmap_args_t     *dmapp;
+
+        dmapp = kmem_zalloc(sizeof (*dmapp), KM_SLEEP);
+
+        /*
+         * The VM layer may segvn_free the seg holding this vnode
+         * before our callback has a chance run, so take a hold on
+         * the vnode here and release it in the callback.
+         * (same for the cred)
+         */
+        crhold(cr);
+        VN_HOLD(vp);
+
+        dmapp->dm_vp = vp;
+        dmapp->dm_cr = cr;
+        dmapp->dm_off = off;
+        dmapp->dm_addr = addr;
+        dmapp->dm_len = len;
+        dmapp->dm_prot = prot;
+        dmapp->dm_maxprot = maxprot;
+        dmapp->dm_flags = flags;
+        dmapp->dm_rele_fid = B_FALSE;
+
+        /*
+         * Go ahead and decrement r_mapcount now, which is
+         * the primary purpose of this function.
+         *
+         * When r_mapcnt goes to zero, we need to call
+         * smbfs_rele_fid, but can't do that here, so
+         * set a flag telling the async task to do it.
+         */
+        mutex_enter(&np->r_statelock);
+        np->r_mapcnt -= btopr(len);
+        ASSERT(np->r_mapcnt >= 0);
+        if (np->r_mapcnt == 0)
+                dmapp->dm_rele_fid = B_TRUE;
+        mutex_exit(&np->r_statelock);
+
+        taskq_dispatch_ent(smi->smi_taskq, smbfs_delmap_async, dmapp, 0,
+            &dmapp->dm_tqent);
+
+        return (0);
+}
+
+/*
+ * Remove some pages from an mmap'd vnode.  Flush any
+ * dirty pages in the unmapped range.
+ */
+/* ARGSUSED */
+static void
+smbfs_delmap_async(void *varg)
+{
+        smbfs_delmap_args_t     *dmapp = varg;
+        cred_t                  *cr;
+        vnode_t                 *vp;
+        smbnode_t               *np;
+        smbmntinfo_t            *smi;
+
+        cr = dmapp->dm_cr;
+        vp = dmapp->dm_vp;
+        np = VTOSMB(vp);
+        smi = VTOSMI(vp);
+
+        /* Decremented r_mapcnt in smbfs_delmap */
+
+        /*
+         * Initiate a page flush and potential commit if there are
+         * pages, the file system was not mounted readonly, the segment
+         * was mapped shared, and the pages themselves were writeable.
+         *
+         * mark RDIRTY here, will be used to check if a file is dirty when
+         * unmount smbfs
+         */
+        if (vn_has_cached_data(vp) && !vn_is_readonly(vp) &&
+            dmapp->dm_flags == MAP_SHARED &&
+            (dmapp->dm_maxprot & PROT_WRITE) != 0) {
+                mutex_enter(&np->r_statelock);
+                np->r_flags |= RDIRTY;
+                mutex_exit(&np->r_statelock);
+
+                /*
+                 * Need to finish the putpage before we
+                 * close the OtW FID needed for I/O.
+                 */
+                (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len, 0,
+                    dmapp->dm_cr, NULL);
+        }
+
+        if ((np->r_flags & RDIRECTIO) || (smi->smi_flags & SMI_DIRECTIO))
+                (void) smbfs_putpage(vp, dmapp->dm_off, dmapp->dm_len,
+                    B_INVAL, dmapp->dm_cr, NULL);
+
+        /*
+         * If r_mapcnt went to zero, drop our FID ref now.
+         * On the last fidref, this does an OtW close.
+         */
+        if (dmapp->dm_rele_fid) {
+                struct smb_cred scred;
+
+                (void) smbfs_rw_enter_sig(&np->r_lkserlock, RW_WRITER, 0);
+                smb_credinit(&scred, dmapp->dm_cr);
+
+                smbfs_rele_fid(np, &scred);
+
+                smb_credrele(&scred);
+                smbfs_rw_exit(&np->r_lkserlock);
+        }
+
+        /* Release holds taken in smbfs_delmap */
+        VN_RELE(vp);
+        crfree(cr);
+
+        kmem_free(dmapp, sizeof (*dmapp));
+}
+
+/* No smbfs_pageio() or smbfs_dispose() ops. */
+
+#endif  // _KERNEL
+
+/* misc. ******************************************************** */
+
+
+/*
  * XXX
  * This op may need to support PSARC 2007/440, nbmand changes for CIFS Service.
  */
 static int
 smbfs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,

@@ -2988,19 +4772,30 @@
                         if (error || va.va_size == bfp->l_start)
                                 return (error);
                         va.va_mask = AT_SIZE;
                         va.va_size = bfp->l_start;
                         error = smbfssetattr(vp, &va, 0, cr);
+                        /* SMBFS_VNEVENT... */
                 } else
                         error = EINVAL;
         }
 
         return (error);
 }
 
+
 /* ARGSUSED */
 static int
+smbfs_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+
+        return (ENOSYS);
+}
+
+
+/* ARGSUSED */
+static int
 smbfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
         caller_context_t *ct)
 {
         vfs_t *vfs;
         smbmntinfo_t *smi;

@@ -3171,5 +4966,56 @@
         if (VTOSMI(vp)->smi_flags & SMI_LLOCK)
                 return (fs_shrlock(vp, cmd, shr, flag, cr, ct));
         else
                 return (ENOSYS);
 }
+
+
+/*
+ * Most unimplemented ops will return ENOSYS because of fs_nosys().
+ * The only ops where that won't work are ACCESS (due to open(2)
+ * failures) and ... (anything else left?)
+ */
+const fs_operation_def_t smbfs_vnodeops_template[] = {
+        VOPNAME_OPEN,           { .vop_open = smbfs_open },
+        VOPNAME_CLOSE,          { .vop_close = smbfs_close },
+        VOPNAME_READ,           { .vop_read = smbfs_read },
+        VOPNAME_WRITE,          { .vop_write = smbfs_write },
+        VOPNAME_IOCTL,          { .vop_ioctl = smbfs_ioctl },
+        VOPNAME_GETATTR,        { .vop_getattr = smbfs_getattr },
+        VOPNAME_SETATTR,        { .vop_setattr = smbfs_setattr },
+        VOPNAME_ACCESS,         { .vop_access = smbfs_access },
+        VOPNAME_LOOKUP,         { .vop_lookup = smbfs_lookup },
+        VOPNAME_CREATE,         { .vop_create = smbfs_create },
+        VOPNAME_REMOVE,         { .vop_remove = smbfs_remove },
+        VOPNAME_LINK,           { .vop_link = smbfs_link },
+        VOPNAME_RENAME,         { .vop_rename = smbfs_rename },
+        VOPNAME_MKDIR,          { .vop_mkdir = smbfs_mkdir },
+        VOPNAME_RMDIR,          { .vop_rmdir = smbfs_rmdir },
+        VOPNAME_READDIR,        { .vop_readdir = smbfs_readdir },
+        VOPNAME_SYMLINK,        { .vop_symlink = smbfs_symlink },
+        VOPNAME_READLINK,       { .vop_readlink = smbfs_readlink },
+        VOPNAME_FSYNC,          { .vop_fsync = smbfs_fsync },
+        VOPNAME_INACTIVE,       { .vop_inactive = smbfs_inactive },
+        VOPNAME_FID,            { .vop_fid = smbfs_fid },
+        VOPNAME_RWLOCK,         { .vop_rwlock = smbfs_rwlock },
+        VOPNAME_RWUNLOCK,       { .vop_rwunlock = smbfs_rwunlock },
+        VOPNAME_SEEK,           { .vop_seek = smbfs_seek },
+        VOPNAME_FRLOCK,         { .vop_frlock = smbfs_frlock },
+        VOPNAME_SPACE,          { .vop_space = smbfs_space },
+        VOPNAME_REALVP,         { .vop_realvp = smbfs_realvp },
+#ifdef  _KERNEL
+        VOPNAME_GETPAGE,        { .vop_getpage = smbfs_getpage },
+        VOPNAME_PUTPAGE,        { .vop_putpage = smbfs_putpage },
+        VOPNAME_MAP,            { .vop_map = smbfs_map },
+        VOPNAME_ADDMAP,         { .vop_addmap = smbfs_addmap },
+        VOPNAME_DELMAP,         { .vop_delmap = smbfs_delmap },
+#endif  // _KERNEL
+        VOPNAME_PATHCONF,       { .vop_pathconf = smbfs_pathconf },
+        VOPNAME_SETSECATTR,     { .vop_setsecattr = smbfs_setsecattr },
+        VOPNAME_GETSECATTR,     { .vop_getsecattr = smbfs_getsecattr },
+        VOPNAME_SHRLOCK,        { .vop_shrlock = smbfs_shrlock },
+#ifdef  SMBFS_VNEVENT
+        VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
+#endif
+        { NULL, NULL }
+};