Print this page
        
@@ -19,11 +19,11 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016, Joyent, Inc.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  */
 
 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
 /*        All Rights Reserved   */
 
@@ -64,12 +64,10 @@
 #include <sys/nbmlock.h>
 #include <sys/fcntl.h>
 #include <fs/fs_subr.h>
 #include <sys/taskq.h>
 #include <fs/fs_reparse.h>
-#include <sys/time.h>
-#include <sys/sdt.h>
 
 /* Determine if this vnode is a file that is read-only */
 #define ISROFILE(vp)    \
         ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
             (vp)->v_type != VFIFO && vn_is_readonly(vp))
@@ -102,13 +100,10 @@
 kmutex_t        vskstat_tree_lock;
 
 /* Global variable which enables/disables the vopstats collection */
 int vopstats_enabled = 1;
 
-/* Global used for empty/invalid v_path */
-char *vn_vpath_empty = "";
-
 /*
  * forward declarations for internal vnode specific data (vsd)
  */
 static void *vsd_realloc(void *, size_t, size_t);
 
@@ -206,11 +201,10 @@
         }
 
 #define VOP_LATENCY_10MS        10000000
 #define VOP_LATENCY_100MS       100000000
 #define VOP_LATENCY_1S          1000000000
-#define VOP_LATENCY_10S         10000000000
 
 /*
  * Convert stat(2) formats to vnode types and vice versa.  (Knows about
  * numerical order of S_IFMT and vnode types.)
  */
@@ -2292,12 +2286,11 @@
         mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
         rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
         vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
-        vp->v_path = vn_vpath_empty;
-        vp->v_path_stamp = 0;
+        vp->v_path = NULL;
         vp->v_mpssdata = NULL;
         vp->v_vsd = NULL;
         vp->v_fopdata = NULL;
 
         return (0);
@@ -2340,11 +2333,10 @@
  */
 void
 vn_recycle(vnode_t *vp)
 {
         ASSERT(vp->v_pages == NULL);
-        VERIFY(vp->v_path != NULL);
 
         /*
          * XXX - This really belongs in vn_reinit(), but we have some issues
          * with the counts.  Best to have it here for clean initialization.
          */
@@ -2363,15 +2355,14 @@
                 ASSERT(vp->v_femhead->femh_list == NULL);
                 mutex_destroy(&vp->v_femhead->femh_lock);
                 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
                 vp->v_femhead = NULL;
         }
-        if (vp->v_path != vn_vpath_empty) {
+        if (vp->v_path) {
                 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
-                vp->v_path = vn_vpath_empty;
+                vp->v_path = NULL;
         }
-        vp->v_path_stamp = 0;
 
         if (vp->v_fopdata != NULL) {
                 free_fopdata(vp);
         }
         vp->v_mpssdata = NULL;
@@ -2438,14 +2429,13 @@
          * some with v_count of 1.  In any case, the value should
          * never be anything else.
          */
         ASSERT((vp->v_count == 0) || (vp->v_count == 1));
         ASSERT(vp->v_count_dnlc == 0);
-        VERIFY(vp->v_path != NULL);
-        if (vp->v_path != vn_vpath_empty) {
+        if (vp->v_path != NULL) {
                 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
-                vp->v_path = vn_vpath_empty;
+                vp->v_path = NULL;
         }
 
         /* If FEM was in use, make sure everything gets cleaned up */
         if (vp->v_femhead) {
                 /* XXX - There should be a free_femhead() that does all this */
@@ -2975,243 +2965,129 @@
 
         return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
 }
 
 /*
- * The value stored in v_path is relative to rootdir, located in the global
- * zone.  Zones or chroot environments which reside deeper inside the VFS
- * hierarchy will have a relative view of MAXPATHLEN since they are unaware of
- * what lies below their perceived root.  In order to keep v_path usable for
- * these child environments, its allocations are allowed to exceed MAXPATHLEN.
- *
- * An upper bound of max_vnode_path is placed upon v_path allocations to
- * prevent the system from going too wild at the behest of pathological
- * behavior from the operator.
+ * Given a starting vnode and a path, updates the path in the target vnode in
+ * a safe manner.  If the vnode already has path information embedded, then the
+ * cached path is left untouched.
  */
+
 size_t max_vnode_path = 4 * MAXPATHLEN;
 
-
 void
-vn_clearpath(vnode_t *vp, hrtime_t compare_stamp)
+vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
+    const char *path, size_t plen)
 {
-        char *buf;
+        char    *rpath;
+        vnode_t *base;
+        size_t  rpathlen, rpathalloc;
+        int     doslash = 1;
 
-        mutex_enter(&vp->v_lock);
-        /*
-         * If the snapshot of v_path_stamp passed in via compare_stamp does not
-         * match the present value on the vnode, it indicates that subsequent
-         * changes have occurred.  The v_path value is not cleared in this case
-         * since the new value may be valid.
-         */
-        if (compare_stamp != 0 && vp->v_path_stamp != compare_stamp) {
-                mutex_exit(&vp->v_lock);
-                return;
+        if (*path == '/') {
+                base = rootvp;
+                path++;
+                plen--;
+        } else {
+                base = startvp;
         }
-        buf = vp->v_path;
-        vp->v_path = vn_vpath_empty;
-        vp->v_path_stamp = 0;
-        mutex_exit(&vp->v_lock);
-        if (buf != vn_vpath_empty) {
-                kmem_free(buf, strlen(buf) + 1);
-        }
-}
 
-static void
-vn_setpath_common(vnode_t *pvp, vnode_t *vp, const char *name, size_t len,
-    boolean_t is_rename)
-{
-        char *buf, *oldbuf;
-        hrtime_t pstamp;
-        size_t baselen, buflen = 0;
-
-        /* Handle the vn_setpath_str case. */
-        if (pvp == NULL) {
-                if (len + 1 > max_vnode_path) {
-                        DTRACE_PROBE4(vn__setpath__too__long, vnode_t *, pvp,
-                            vnode_t *, vp, char *, name, size_t, len + 1);
-                        return;
-                }
-                buf = kmem_alloc(len + 1, KM_SLEEP);
-                bcopy(name, buf, len);
-                buf[len] = '\0';
-
-                mutex_enter(&vp->v_lock);
-                oldbuf = vp->v_path;
-                vp->v_path = buf;
-                vp->v_path_stamp = gethrtime();
-                mutex_exit(&vp->v_lock);
-                if (oldbuf != vn_vpath_empty) {
-                        kmem_free(oldbuf, strlen(oldbuf) + 1);
-                }
-                return;
-        }
-
-        /* Take snapshot of parent dir */
-        mutex_enter(&pvp->v_lock);
-retrybuf:
-        if (pvp->v_path == vn_vpath_empty) {
                 /*
-                 * Without v_path from the parent directory, generating a child
-                 * path from the name is impossible.
+         * We cannot grab base->v_lock while we hold vp->v_lock because of
+         * the potential for deadlock.
                  */
-                if (len > 0) {
-                        pstamp = pvp->v_path_stamp;
-                        mutex_exit(&pvp->v_lock);
-                        vn_clearpath(vp, pstamp);
+        mutex_enter(&base->v_lock);
+        if (base->v_path == NULL) {
+                mutex_exit(&base->v_lock);
                         return;
                 }
 
+        rpathlen = strlen(base->v_path);
+        rpathalloc = rpathlen + plen + 1;
+        /* Avoid adding a slash if there's already one there */
+        if (base->v_path[rpathlen-1] == '/')
+                doslash = 0;
+        else
+                rpathalloc++;
+
                 /*
-                 * The only feasible case here is where a NUL lookup is being
-                 * performed on rootdir prior to its v_path being populated.
+         * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
+         * so we must do this dance.  If, by chance, something changes the path,
+         * just give up since there is no real harm.
                  */
-                ASSERT(pvp->v_path_stamp = 0);
-                baselen = 0;
-                pstamp = 0;
-        } else {
-                pstamp = pvp->v_path_stamp;
-                baselen = strlen(pvp->v_path);
-                /* ignore a trailing slash if present */
-                if (pvp->v_path[baselen - 1] == '/') {
-                        /* This should only the be case for rootdir */
-                        ASSERT(baselen == 1 && pvp == rootdir);
-                        baselen--;
-                }
-        }
-        mutex_exit(&pvp->v_lock);
+        mutex_exit(&base->v_lock);
 
-        if (buflen != 0) {
-                /* Free the existing (mis-sized) buffer in case of retry */
-                kmem_free(buf, buflen);
-        }
-        /* base, '/', name and trailing NUL */
-        buflen = baselen + len + 2;
-        if (buflen > max_vnode_path) {
-                DTRACE_PROBE4(vn__setpath_too__long, vnode_t *, pvp,
-                    vnode_t *, vp, char *, name, size_t, buflen);
+        /* Paths should stay within reason */
+        if (rpathalloc > max_vnode_path)
                 return;
-        }
-        buf = kmem_alloc(buflen, KM_SLEEP);
 
-        mutex_enter(&pvp->v_lock);
-        if (pvp->v_path_stamp != pstamp) {
-                size_t vlen;
+        rpath = kmem_alloc(rpathalloc, KM_SLEEP);
 
-                /*
-                 * Since v_path_stamp changed on the parent, it is likely that
-                 * v_path has been altered as well.  If the length does not
-                 * exactly match what was previously measured, the buffer
-                 * allocation must be repeated for proper sizing.
-                 */
-                if (pvp->v_path == vn_vpath_empty) {
-                        /* Give up if parent lack v_path */
-                        mutex_exit(&pvp->v_lock);
-                        kmem_free(buf, buflen);
+        mutex_enter(&base->v_lock);
+        if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
+                mutex_exit(&base->v_lock);
+                kmem_free(rpath, rpathalloc);
                         return;
                 }
-                vlen = strlen(pvp->v_path);
-                if (pvp->v_path[vlen - 1] == '/') {
-                        vlen--;
-                }
-                if (vlen != baselen) {
-                        goto retrybuf;
-                }
-        }
-        bcopy(pvp->v_path, buf, baselen);
-        mutex_exit(&pvp->v_lock);
+        bcopy(base->v_path, rpath, rpathlen);
+        mutex_exit(&base->v_lock);
 
-        buf[baselen] = '/';
-        baselen++;
-        bcopy(name, &buf[baselen], len + 1);
+        if (doslash)
+                rpath[rpathlen++] = '/';
+        bcopy(path, rpath + rpathlen, plen);
+        rpath[rpathlen + plen] = '\0';
 
         mutex_enter(&vp->v_lock);
-        if (vp->v_path_stamp == 0) {
-                /* never-visited vnode can inherit stamp from parent */
-                ASSERT(vp->v_path == vn_vpath_empty);
-                vp->v_path_stamp = pstamp;
-                vp->v_path = buf;
+        if (vp->v_path != NULL) {
                 mutex_exit(&vp->v_lock);
-        } else if (vp->v_path_stamp < pstamp || is_rename) {
-                /*
-                 * Install the updated path and stamp, ensuring that the v_path
-                 * pointer is valid at all times for dtrace.
-                 */
-                oldbuf = vp->v_path;
-                vp->v_path = buf;
-                vp->v_path_stamp = gethrtime();
-                mutex_exit(&vp->v_lock);
-                kmem_free(oldbuf, strlen(oldbuf) + 1);
+                kmem_free(rpath, rpathalloc);
         } else {
-                /*
-                 * If the timestamp matches or is greater, it means another
-                 * thread performed the update first while locks were dropped
-                 * here to make the allocation.  We defer to the newer value.
-                 */
+                vp->v_path = rpath;
                 mutex_exit(&vp->v_lock);
-                kmem_free(buf, buflen);
         }
-        ASSERT(MUTEX_NOT_HELD(&vp->v_lock));
 }
 
+/*
+ * Sets the path to the vnode to be the given string, regardless of current
+ * context.  The string must be a complete path from rootdir.  This is only used
+ * by fsop_root() for setting the path based on the mountpoint.
+ */
 void
-vn_updatepath(vnode_t *pvp, vnode_t *vp, const char *name)
+vn_setpath_str(struct vnode *vp, const char *str, size_t len)
 {
-        size_t len;
+        char *buf = kmem_alloc(len + 1, KM_SLEEP);
 
-        /*
-         * If the parent is older or empty, there's nothing further to do.
-         */
-        if (pvp->v_path == vn_vpath_empty ||
-            pvp->v_path_stamp <= vp->v_path_stamp) {
+        mutex_enter(&vp->v_lock);
+        if (vp->v_path != NULL) {
+                mutex_exit(&vp->v_lock);
+                kmem_free(buf, len + 1);
                 return;
         }
 
-        /*
-         * Given the lack of appropriate context, meaningful updates to v_path
-         * cannot be made for during lookups for the '.' or '..' entries.
-         */
-        len = strlen(name);
-        if (len == 0 || (len == 1 && name[0] == '.') ||
-            (len == 2 && name[0] == '.' && name[1] == '.')) {
-                return;
-        }
+        vp->v_path = buf;
+        bcopy(str, vp->v_path, len);
+        vp->v_path[len] = '\0';
 
-        vn_setpath_common(pvp, vp, name, len, B_FALSE);
+        mutex_exit(&vp->v_lock);
 }
 
 /*
- * Given a starting vnode and a path, updates the path in the target vnode in
- * a safe manner.  If the vnode already has path information embedded, then the
- * cached path is left untouched.
- */
-/* ARGSUSED */
-void
-vn_setpath(vnode_t *rootvp, vnode_t *pvp, vnode_t *vp, const char *name,
-    size_t len)
-{
-        vn_setpath_common(pvp, vp, name, len, B_FALSE);
-}
-
-/*
- * Sets the path to the vnode to be the given string, regardless of current
- * context.  The string must be a complete path from rootdir.  This is only used
- * by fsop_root() for setting the path based on the mountpoint.
- */
-void
-vn_setpath_str(vnode_t *vp, const char *str, size_t len)
-{
-        vn_setpath_common(NULL, vp, str, len, B_FALSE);
-}
-
-/*
  * Called from within filesystem's vop_rename() to handle renames once the
  * target vnode is available.
  */
 void
-vn_renamepath(vnode_t *pvp, vnode_t *vp, const char *name, size_t len)
+vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
 {
-        vn_setpath_common(pvp, vp, name, len, B_TRUE);
+        char *tmp;
+
+        mutex_enter(&vp->v_lock);
+        tmp = vp->v_path;
+        vp->v_path = NULL;
+        mutex_exit(&vp->v_lock);
+        vn_setpath(rootdir, dvp, vp, nm, len);
+        if (tmp != NULL)
+                kmem_free(tmp, strlen(tmp) + 1);
 }
 
 /*
  * Similar to vn_setpath_str(), this function sets the path of the destination
  * vnode to the be the same as the source vnode.
@@ -3218,46 +3094,41 @@
  */
 void
 vn_copypath(struct vnode *src, struct vnode *dst)
 {
         char *buf;
-        hrtime_t stamp;
-        size_t buflen;
+        int alloc;
 
         mutex_enter(&src->v_lock);
-        if (src->v_path == vn_vpath_empty) {
+        if (src->v_path == NULL) {
                 mutex_exit(&src->v_lock);
                 return;
         }
-        buflen = strlen(src->v_path) + 1;
-        mutex_exit(&src->v_lock);
+        alloc = strlen(src->v_path) + 1;
 
-        buf = kmem_alloc(buflen, KM_SLEEP);
-
+        /* avoid kmem_alloc() with lock held */
+        mutex_exit(&src->v_lock);
+        buf = kmem_alloc(alloc, KM_SLEEP);
         mutex_enter(&src->v_lock);
-        if (src->v_path == vn_vpath_empty ||
-            strlen(src->v_path) + 1 != buflen) {
+        if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
                 mutex_exit(&src->v_lock);
-                kmem_free(buf, buflen);
+                kmem_free(buf, alloc);
                 return;
         }
-        bcopy(src->v_path, buf, buflen);
-        stamp = src->v_path_stamp;
+        bcopy(src->v_path, buf, alloc);
         mutex_exit(&src->v_lock);
 
         mutex_enter(&dst->v_lock);
-        if (dst->v_path != vn_vpath_empty) {
+        if (dst->v_path != NULL) {
                 mutex_exit(&dst->v_lock);
-                kmem_free(buf, buflen);
+                kmem_free(buf, alloc);
                 return;
         }
         dst->v_path = buf;
-        dst->v_path_stamp = stamp;
         mutex_exit(&dst->v_lock);
 }
 
-
 /*
  * XXX Private interface for segvn routines that handle vnode
  * large page segments.
  *
  * return 1 if vp's file system VOP_PAGEIO() implementation
@@ -3442,19 +3313,14 @@
                         if (lat < VOP_LATENCY_100MS)
                                 atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
                         else if (lat < VOP_LATENCY_1S) {
                                 atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
                                 atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
-                        } else if (lat < VOP_LATENCY_10S) {
-                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
-                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
-                                atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
                         } else {
                                 atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
                                 atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
                                 atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
-                                atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
                         }
                 }
         }
 
         return (err);
@@ -3510,19 +3376,14 @@
                         if (lat < VOP_LATENCY_100MS)
                                 atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
                         else if (lat < VOP_LATENCY_1S) {
                                 atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
                                 atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
-                        } else if (lat < VOP_LATENCY_10S) {
-                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
-                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
-                                atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
                         } else {
                                 atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
                                 atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
                                 atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
-                                atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
                         }
                 }
         }
 
         return (err);
@@ -3686,12 +3547,14 @@
                 ret = (*(dvp)->v_op->vop_lookup)
                     (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
         }
         if (ret == 0 && *vpp) {
                 VOPSTATS_UPDATE(*vpp, lookup);
-                vn_updatepath(dvp, *vpp, nm);
+                if ((*vpp)->v_path == NULL) {
+                        vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
         }
+        }
 
         return (ret);
 }
 
 int
@@ -3726,12 +3589,14 @@
 
         ret = (*(dvp)->v_op->vop_create)
             (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
         if (ret == 0 && *vpp) {
                 VOPSTATS_UPDATE(*vpp, create);
-                vn_updatepath(dvp, *vpp, name);
+                if ((*vpp)->v_path == NULL) {
+                        vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
         }
+        }
 
         return (ret);
 }
 
 int
@@ -3846,12 +3711,15 @@
 
         ret = (*(dvp)->v_op->vop_mkdir)
             (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
         if (ret == 0 && *vpp) {
                 VOPSTATS_UPDATE(*vpp, mkdir);
-                vn_updatepath(dvp, *vpp, dirname);
+                if ((*vpp)->v_path == NULL) {
+                        vn_setpath(rootdir, dvp, *vpp, dirname,
+                            strlen(dirname));
         }
+        }
 
         return (ret);
 }
 
 int