Print this page
@@ -19,11 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016, Joyent, Inc.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
@@ -64,12 +64,10 @@
#include <sys/nbmlock.h>
#include <sys/fcntl.h>
#include <fs/fs_subr.h>
#include <sys/taskq.h>
#include <fs/fs_reparse.h>
-#include <sys/time.h>
-#include <sys/sdt.h>
/* Determine if this vnode is a file that is read-only */
#define ISROFILE(vp) \
((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
(vp)->v_type != VFIFO && vn_is_readonly(vp))
@@ -102,13 +100,10 @@
kmutex_t vskstat_tree_lock;
/* Global variable which enables/disables the vopstats collection */
int vopstats_enabled = 1;
-/* Global used for empty/invalid v_path */
-char *vn_vpath_empty = "";
-
/*
* forward declarations for internal vnode specific data (vsd)
*/
static void *vsd_realloc(void *, size_t, size_t);
@@ -206,11 +201,10 @@
}
#define VOP_LATENCY_10MS 10000000
#define VOP_LATENCY_100MS 100000000
#define VOP_LATENCY_1S 1000000000
-#define VOP_LATENCY_10S 10000000000
/*
* Convert stat(2) formats to vnode types and vice versa. (Knows about
* numerical order of S_IFMT and vnode types.)
*/
@@ -2292,12 +2286,11 @@
mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
vp->v_femhead = NULL; /* Must be done before vn_reinit() */
- vp->v_path = vn_vpath_empty;
- vp->v_path_stamp = 0;
+ vp->v_path = NULL;
vp->v_mpssdata = NULL;
vp->v_vsd = NULL;
vp->v_fopdata = NULL;
return (0);
@@ -2340,11 +2333,10 @@
*/
void
vn_recycle(vnode_t *vp)
{
ASSERT(vp->v_pages == NULL);
- VERIFY(vp->v_path != NULL);
/*
* XXX - This really belongs in vn_reinit(), but we have some issues
* with the counts. Best to have it here for clean initialization.
*/
@@ -2363,15 +2355,14 @@
ASSERT(vp->v_femhead->femh_list == NULL);
mutex_destroy(&vp->v_femhead->femh_lock);
kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
vp->v_femhead = NULL;
}
- if (vp->v_path != vn_vpath_empty) {
+ if (vp->v_path) {
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
- vp->v_path = vn_vpath_empty;
+ vp->v_path = NULL;
}
- vp->v_path_stamp = 0;
if (vp->v_fopdata != NULL) {
free_fopdata(vp);
}
vp->v_mpssdata = NULL;
@@ -2438,14 +2429,13 @@
* some with v_count of 1. In any case, the value should
* never be anything else.
*/
ASSERT((vp->v_count == 0) || (vp->v_count == 1));
ASSERT(vp->v_count_dnlc == 0);
- VERIFY(vp->v_path != NULL);
- if (vp->v_path != vn_vpath_empty) {
+ if (vp->v_path != NULL) {
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
- vp->v_path = vn_vpath_empty;
+ vp->v_path = NULL;
}
/* If FEM was in use, make sure everything gets cleaned up */
if (vp->v_femhead) {
/* XXX - There should be a free_femhead() that does all this */
@@ -2975,243 +2965,129 @@
return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
}
/*
- * The value stored in v_path is relative to rootdir, located in the global
- * zone. Zones or chroot environments which reside deeper inside the VFS
- * hierarchy will have a relative view of MAXPATHLEN since they are unaware of
- * what lies below their perceived root. In order to keep v_path usable for
- * these child environments, its allocations are allowed to exceed MAXPATHLEN.
- *
- * An upper bound of max_vnode_path is placed upon v_path allocations to
- * prevent the system from going too wild at the behest of pathological
- * behavior from the operator.
+ * Given a starting vnode and a path, updates the path in the target vnode in
+ * a safe manner. If the vnode already has path information embedded, then the
+ * cached path is left untouched.
*/
+
size_t max_vnode_path = 4 * MAXPATHLEN;
-
void
-vn_clearpath(vnode_t *vp, hrtime_t compare_stamp)
+vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
+ const char *path, size_t plen)
{
- char *buf;
+ char *rpath;
+ vnode_t *base;
+ size_t rpathlen, rpathalloc;
+ int doslash = 1;
- mutex_enter(&vp->v_lock);
- /*
- * If the snapshot of v_path_stamp passed in via compare_stamp does not
- * match the present value on the vnode, it indicates that subsequent
- * changes have occurred. The v_path value is not cleared in this case
- * since the new value may be valid.
- */
- if (compare_stamp != 0 && vp->v_path_stamp != compare_stamp) {
- mutex_exit(&vp->v_lock);
- return;
+ if (*path == '/') {
+ base = rootvp;
+ path++;
+ plen--;
+ } else {
+ base = startvp;
}
- buf = vp->v_path;
- vp->v_path = vn_vpath_empty;
- vp->v_path_stamp = 0;
- mutex_exit(&vp->v_lock);
- if (buf != vn_vpath_empty) {
- kmem_free(buf, strlen(buf) + 1);
- }
-}
-static void
-vn_setpath_common(vnode_t *pvp, vnode_t *vp, const char *name, size_t len,
- boolean_t is_rename)
-{
- char *buf, *oldbuf;
- hrtime_t pstamp;
- size_t baselen, buflen = 0;
-
- /* Handle the vn_setpath_str case. */
- if (pvp == NULL) {
- if (len + 1 > max_vnode_path) {
- DTRACE_PROBE4(vn__setpath__too__long, vnode_t *, pvp,
- vnode_t *, vp, char *, name, size_t, len + 1);
- return;
- }
- buf = kmem_alloc(len + 1, KM_SLEEP);
- bcopy(name, buf, len);
- buf[len] = '\0';
-
- mutex_enter(&vp->v_lock);
- oldbuf = vp->v_path;
- vp->v_path = buf;
- vp->v_path_stamp = gethrtime();
- mutex_exit(&vp->v_lock);
- if (oldbuf != vn_vpath_empty) {
- kmem_free(oldbuf, strlen(oldbuf) + 1);
- }
- return;
- }
-
- /* Take snapshot of parent dir */
- mutex_enter(&pvp->v_lock);
-retrybuf:
- if (pvp->v_path == vn_vpath_empty) {
/*
- * Without v_path from the parent directory, generating a child
- * path from the name is impossible.
+ * We cannot grab base->v_lock while we hold vp->v_lock because of
+ * the potential for deadlock.
*/
- if (len > 0) {
- pstamp = pvp->v_path_stamp;
- mutex_exit(&pvp->v_lock);
- vn_clearpath(vp, pstamp);
+ mutex_enter(&base->v_lock);
+ if (base->v_path == NULL) {
+ mutex_exit(&base->v_lock);
return;
}
+ rpathlen = strlen(base->v_path);
+ rpathalloc = rpathlen + plen + 1;
+ /* Avoid adding a slash if there's already one there */
+ if (base->v_path[rpathlen-1] == '/')
+ doslash = 0;
+ else
+ rpathalloc++;
+
/*
- * The only feasible case here is where a NUL lookup is being
- * performed on rootdir prior to its v_path being populated.
+ * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
+ * so we must do this dance. If, by chance, something changes the path,
+ * just give up since there is no real harm.
*/
- ASSERT(pvp->v_path_stamp = 0);
- baselen = 0;
- pstamp = 0;
- } else {
- pstamp = pvp->v_path_stamp;
- baselen = strlen(pvp->v_path);
- /* ignore a trailing slash if present */
- if (pvp->v_path[baselen - 1] == '/') {
- /* This should only the be case for rootdir */
- ASSERT(baselen == 1 && pvp == rootdir);
- baselen--;
- }
- }
- mutex_exit(&pvp->v_lock);
+ mutex_exit(&base->v_lock);
- if (buflen != 0) {
- /* Free the existing (mis-sized) buffer in case of retry */
- kmem_free(buf, buflen);
- }
- /* base, '/', name and trailing NUL */
- buflen = baselen + len + 2;
- if (buflen > max_vnode_path) {
- DTRACE_PROBE4(vn__setpath_too__long, vnode_t *, pvp,
- vnode_t *, vp, char *, name, size_t, buflen);
+ /* Paths should stay within reason */
+ if (rpathalloc > max_vnode_path)
return;
- }
- buf = kmem_alloc(buflen, KM_SLEEP);
- mutex_enter(&pvp->v_lock);
- if (pvp->v_path_stamp != pstamp) {
- size_t vlen;
+ rpath = kmem_alloc(rpathalloc, KM_SLEEP);
- /*
- * Since v_path_stamp changed on the parent, it is likely that
- * v_path has been altered as well. If the length does not
- * exactly match what was previously measured, the buffer
- * allocation must be repeated for proper sizing.
- */
- if (pvp->v_path == vn_vpath_empty) {
- /* Give up if parent lack v_path */
- mutex_exit(&pvp->v_lock);
- kmem_free(buf, buflen);
+ mutex_enter(&base->v_lock);
+ if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
+ mutex_exit(&base->v_lock);
+ kmem_free(rpath, rpathalloc);
return;
}
- vlen = strlen(pvp->v_path);
- if (pvp->v_path[vlen - 1] == '/') {
- vlen--;
- }
- if (vlen != baselen) {
- goto retrybuf;
- }
- }
- bcopy(pvp->v_path, buf, baselen);
- mutex_exit(&pvp->v_lock);
+ bcopy(base->v_path, rpath, rpathlen);
+ mutex_exit(&base->v_lock);
- buf[baselen] = '/';
- baselen++;
- bcopy(name, &buf[baselen], len + 1);
+ if (doslash)
+ rpath[rpathlen++] = '/';
+ bcopy(path, rpath + rpathlen, plen);
+ rpath[rpathlen + plen] = '\0';
mutex_enter(&vp->v_lock);
- if (vp->v_path_stamp == 0) {
- /* never-visited vnode can inherit stamp from parent */
- ASSERT(vp->v_path == vn_vpath_empty);
- vp->v_path_stamp = pstamp;
- vp->v_path = buf;
+ if (vp->v_path != NULL) {
mutex_exit(&vp->v_lock);
- } else if (vp->v_path_stamp < pstamp || is_rename) {
- /*
- * Install the updated path and stamp, ensuring that the v_path
- * pointer is valid at all times for dtrace.
- */
- oldbuf = vp->v_path;
- vp->v_path = buf;
- vp->v_path_stamp = gethrtime();
- mutex_exit(&vp->v_lock);
- kmem_free(oldbuf, strlen(oldbuf) + 1);
+ kmem_free(rpath, rpathalloc);
} else {
- /*
- * If the timestamp matches or is greater, it means another
- * thread performed the update first while locks were dropped
- * here to make the allocation. We defer to the newer value.
- */
+ vp->v_path = rpath;
mutex_exit(&vp->v_lock);
- kmem_free(buf, buflen);
}
- ASSERT(MUTEX_NOT_HELD(&vp->v_lock));
}
+/*
+ * Sets the path to the vnode to be the given string, regardless of current
+ * context. The string must be a complete path from rootdir. This is only used
+ * by fsop_root() for setting the path based on the mountpoint.
+ */
void
-vn_updatepath(vnode_t *pvp, vnode_t *vp, const char *name)
+vn_setpath_str(struct vnode *vp, const char *str, size_t len)
{
- size_t len;
+ char *buf = kmem_alloc(len + 1, KM_SLEEP);
- /*
- * If the parent is older or empty, there's nothing further to do.
- */
- if (pvp->v_path == vn_vpath_empty ||
- pvp->v_path_stamp <= vp->v_path_stamp) {
+ mutex_enter(&vp->v_lock);
+ if (vp->v_path != NULL) {
+ mutex_exit(&vp->v_lock);
+ kmem_free(buf, len + 1);
return;
}
- /*
- * Given the lack of appropriate context, meaningful updates to v_path
- * cannot be made for during lookups for the '.' or '..' entries.
- */
- len = strlen(name);
- if (len == 0 || (len == 1 && name[0] == '.') ||
- (len == 2 && name[0] == '.' && name[1] == '.')) {
- return;
- }
+ vp->v_path = buf;
+ bcopy(str, vp->v_path, len);
+ vp->v_path[len] = '\0';
- vn_setpath_common(pvp, vp, name, len, B_FALSE);
+ mutex_exit(&vp->v_lock);
}
/*
- * Given a starting vnode and a path, updates the path in the target vnode in
- * a safe manner. If the vnode already has path information embedded, then the
- * cached path is left untouched.
- */
-/* ARGSUSED */
-void
-vn_setpath(vnode_t *rootvp, vnode_t *pvp, vnode_t *vp, const char *name,
- size_t len)
-{
- vn_setpath_common(pvp, vp, name, len, B_FALSE);
-}
-
-/*
- * Sets the path to the vnode to be the given string, regardless of current
- * context. The string must be a complete path from rootdir. This is only used
- * by fsop_root() for setting the path based on the mountpoint.
- */
-void
-vn_setpath_str(vnode_t *vp, const char *str, size_t len)
-{
- vn_setpath_common(NULL, vp, str, len, B_FALSE);
-}
-
-/*
* Called from within filesystem's vop_rename() to handle renames once the
* target vnode is available.
*/
void
-vn_renamepath(vnode_t *pvp, vnode_t *vp, const char *name, size_t len)
+vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
{
- vn_setpath_common(pvp, vp, name, len, B_TRUE);
+ char *tmp;
+
+ mutex_enter(&vp->v_lock);
+ tmp = vp->v_path;
+ vp->v_path = NULL;
+ mutex_exit(&vp->v_lock);
+ vn_setpath(rootdir, dvp, vp, nm, len);
+ if (tmp != NULL)
+ kmem_free(tmp, strlen(tmp) + 1);
}
/*
* Similar to vn_setpath_str(), this function sets the path of the destination
* vnode to the be the same as the source vnode.
@@ -3218,46 +3094,41 @@
*/
void
vn_copypath(struct vnode *src, struct vnode *dst)
{
char *buf;
- hrtime_t stamp;
- size_t buflen;
+ int alloc;
mutex_enter(&src->v_lock);
- if (src->v_path == vn_vpath_empty) {
+ if (src->v_path == NULL) {
mutex_exit(&src->v_lock);
return;
}
- buflen = strlen(src->v_path) + 1;
- mutex_exit(&src->v_lock);
+ alloc = strlen(src->v_path) + 1;
- buf = kmem_alloc(buflen, KM_SLEEP);
-
+ /* avoid kmem_alloc() with lock held */
+ mutex_exit(&src->v_lock);
+ buf = kmem_alloc(alloc, KM_SLEEP);
mutex_enter(&src->v_lock);
- if (src->v_path == vn_vpath_empty ||
- strlen(src->v_path) + 1 != buflen) {
+ if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
mutex_exit(&src->v_lock);
- kmem_free(buf, buflen);
+ kmem_free(buf, alloc);
return;
}
- bcopy(src->v_path, buf, buflen);
- stamp = src->v_path_stamp;
+ bcopy(src->v_path, buf, alloc);
mutex_exit(&src->v_lock);
mutex_enter(&dst->v_lock);
- if (dst->v_path != vn_vpath_empty) {
+ if (dst->v_path != NULL) {
mutex_exit(&dst->v_lock);
- kmem_free(buf, buflen);
+ kmem_free(buf, alloc);
return;
}
dst->v_path = buf;
- dst->v_path_stamp = stamp;
mutex_exit(&dst->v_lock);
}
-
/*
* XXX Private interface for segvn routines that handle vnode
* large page segments.
*
* return 1 if vp's file system VOP_PAGEIO() implementation
@@ -3442,19 +3313,14 @@
if (lat < VOP_LATENCY_100MS)
atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
else if (lat < VOP_LATENCY_1S) {
atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
- } else if (lat < VOP_LATENCY_10S) {
- atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
- atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
- atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
} else {
atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
- atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
}
}
}
return (err);
@@ -3510,19 +3376,14 @@
if (lat < VOP_LATENCY_100MS)
atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
else if (lat < VOP_LATENCY_1S) {
atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
- } else if (lat < VOP_LATENCY_10S) {
- atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
- atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
- atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
} else {
atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
- atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
}
}
}
return (err);
@@ -3686,12 +3547,14 @@
ret = (*(dvp)->v_op->vop_lookup)
(dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
}
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, lookup);
- vn_updatepath(dvp, *vpp, nm);
+ if ((*vpp)->v_path == NULL) {
+ vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
}
+ }
return (ret);
}
int
@@ -3726,12 +3589,14 @@
ret = (*(dvp)->v_op->vop_create)
(dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, create);
- vn_updatepath(dvp, *vpp, name);
+ if ((*vpp)->v_path == NULL) {
+ vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
}
+ }
return (ret);
}
int
@@ -3846,12 +3711,15 @@
ret = (*(dvp)->v_op->vop_mkdir)
(dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, mkdir);
- vn_updatepath(dvp, *vpp, dirname);
+ if ((*vpp)->v_path == NULL) {
+ vn_setpath(rootdir, dvp, *vpp, dirname,
+ strlen(dirname));
}
+ }
return (ret);
}
int