Print this page
OS-5483 iostat -x shows around 100% utilization for idle zone
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
OS-5148 ftruncate at offset should emit proper events
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-338 Kstat counters to show "slow" VFS operations
OS-3294 add support for inotify
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

@@ -19,11 +19,11 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  */
 
 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
 /*        All Rights Reserved   */
 

@@ -198,10 +198,14 @@
         vfs_t *vfsp = (vp)->v_vfsp;                                     \
         if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0)            \
                 cr = crgetmapped(cr);                                   \
         }
 
+#define VOP_LATENCY_10MS        10000000
+#define VOP_LATENCY_100MS       100000000
+#define VOP_LATENCY_1S          1000000000
+
 /*
  * Convert stat(2) formats to vnode types and vice versa.  (Knows about
  * numerical order of S_IFMT and vnode types.)
  */
 enum vtype iftovt_tab[] = {

@@ -2514,10 +2518,11 @@
 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
 {
         if (vp == NULL || vp->v_femhead == NULL) {
                 return;
         }
+        (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct);
         (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
 }
 
 void
 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,

@@ -2528,16 +2533,17 @@
         }
         (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
 }
 
 void
-vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
+vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
+    caller_context_t *ct)
 {
         if (vp == NULL || vp->v_femhead == NULL) {
                 return;
         }
-        (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
+        (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct);
 }
 
 void
 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
 {

@@ -2620,10 +2626,19 @@
                 return;
         }
         (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
 }
 
+void
+vnevent_resize(vnode_t *vp, caller_context_t *ct)
+{
+        if (vp == NULL || vp->v_femhead == NULL) {
+                return;
+        }
+        (void) VOP_VNEVENT(vp, VE_RESIZE, NULL, NULL, ct);
+}
+
 /*
  * Vnode accessors.
  */
 
 int

@@ -3259,18 +3274,57 @@
         uio_t *uiop,
         int ioflag,
         cred_t *cr,
         caller_context_t *ct)
 {
-        int     err;
         ssize_t resid_start = uiop->uio_resid;
+        zone_t  *zonep = curzone;
+        zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
 
+        hrtime_t start = 0, lat;
+        ssize_t len;
+        int err;
+
+        if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) &&
+            vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) {
+                start = gethrtime();
+
+                mutex_enter(&zonep->zone_vfs_lock);
+                kstat_runq_enter(&zonep->zone_vfs_rwstats);
+                mutex_exit(&zonep->zone_vfs_lock);
+        }
+
         VOPXID_MAP_CR(vp, cr);
 
         err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
-        VOPSTATS_UPDATE_IO(vp, read,
-            read_bytes, (resid_start - uiop->uio_resid));
+        len = resid_start - uiop->uio_resid;
+
+        VOPSTATS_UPDATE_IO(vp, read, read_bytes, len);
+
+        if (start != 0) {
+                mutex_enter(&zonep->zone_vfs_lock);
+                zonep->zone_vfs_rwstats.reads++;
+                zonep->zone_vfs_rwstats.nread += len;
+                kstat_runq_exit(&zonep->zone_vfs_rwstats);
+                mutex_exit(&zonep->zone_vfs_lock);
+
+                lat = gethrtime() - start;
+
+                if (lat >= VOP_LATENCY_10MS) {
+                        if (lat < VOP_LATENCY_100MS)
+                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+                        else if (lat < VOP_LATENCY_1S) {
+                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+                        } else {
+                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+                                atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+                        }
+                }
+        }
+
         return (err);
 }
 
 int
 fop_write(

@@ -3278,18 +3332,62 @@
         uio_t *uiop,
         int ioflag,
         cred_t *cr,
         caller_context_t *ct)
 {
-        int     err;
         ssize_t resid_start = uiop->uio_resid;
+        zone_t  *zonep = curzone;
+        zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
 
+        hrtime_t start = 0, lat;
+        ssize_t len;
+        int     err;
+
+        /*
+         * For the purposes of VFS kstat consumers, the "waitq" calculation is
+         * repurposed as the active queue for VFS write operations.  There's no
+         * actual wait queue for VFS operations.
+         */
+        if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) &&
+            vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) {
+                start = gethrtime();
+
+                mutex_enter(&zonep->zone_vfs_lock);
+                kstat_waitq_enter(&zonep->zone_vfs_rwstats);
+                mutex_exit(&zonep->zone_vfs_lock);
+        }
+
         VOPXID_MAP_CR(vp, cr);
 
         err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
-        VOPSTATS_UPDATE_IO(vp, write,
-            write_bytes, (resid_start - uiop->uio_resid));
+        len = resid_start - uiop->uio_resid;
+
+        VOPSTATS_UPDATE_IO(vp, write, write_bytes, len);
+
+        if (start != 0) {
+                mutex_enter(&zonep->zone_vfs_lock);
+                zonep->zone_vfs_rwstats.writes++;
+                zonep->zone_vfs_rwstats.nwritten += len;
+                kstat_waitq_exit(&zonep->zone_vfs_rwstats);
+                mutex_exit(&zonep->zone_vfs_lock);
+
+                lat = gethrtime() - start;
+
+                if (lat >= VOP_LATENCY_10MS) {
+                        if (lat < VOP_LATENCY_100MS)
+                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+                        else if (lat < VOP_LATENCY_1S) {
+                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+                        } else {
+                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+                                atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+                        }
+                }
+        }
+
         return (err);
 }
 
 int
 fop_ioctl(