Print this page
OS-5483 iostat -x shows around 100% utilization for idle zone
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
OS-5148 ftruncate at offset should emit proper events
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-338 Kstat counters to show "slow" VFS operations
OS-3294 add support for inotify
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/vnode.c
          +++ new/usr/src/uts/common/fs/vnode.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       24 + * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  /*
  31   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   32   * The Regents of the University of California
  33   33   * All Rights Reserved
  34   34   *
↓ open down ↓ 158 lines elided ↑ open up ↑
 193  193  /*
 194  194   * If the filesystem does not support XIDs map credential
 195  195   * If the vfsp is NULL, perhaps we should also map?
 196  196   */
 197  197  #define VOPXID_MAP_CR(vp, cr)   {                                       \
 198  198          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 199  199          if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0)            \
 200  200                  cr = crgetmapped(cr);                                   \
 201  201          }
 202  202  
      203 +#define VOP_LATENCY_10MS        10000000
      204 +#define VOP_LATENCY_100MS       100000000
      205 +#define VOP_LATENCY_1S          1000000000
      206 +
 203  207  /*
 204  208   * Convert stat(2) formats to vnode types and vice versa.  (Knows about
 205  209   * numerical order of S_IFMT and vnode types.)
 206  210   */
 207  211  enum vtype iftovt_tab[] = {
 208  212          VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 209  213          VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
 210  214  };
 211  215  
 212  216  ushort_t vttoif_tab[] = {
↓ open down ↓ 2296 lines elided ↑ open up ↑
2509 2513  
2510 2514          return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2511 2515  }
2512 2516  
2513 2517  void
2514 2518  vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2515 2519  {
2516 2520          if (vp == NULL || vp->v_femhead == NULL) {
2517 2521                  return;
2518 2522          }
     2523 +        (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct);
2519 2524          (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2520 2525  }
2521 2526  
2522 2527  void
2523 2528  vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2524 2529      caller_context_t *ct)
2525 2530  {
2526 2531          if (vp == NULL || vp->v_femhead == NULL) {
2527 2532                  return;
2528 2533          }
2529 2534          (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2530 2535  }
2531 2536  
2532 2537  void
2533      -vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
     2538 +vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
     2539 +    caller_context_t *ct)
2534 2540  {
2535 2541          if (vp == NULL || vp->v_femhead == NULL) {
2536 2542                  return;
2537 2543          }
2538      -        (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
     2544 +        (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct);
2539 2545  }
2540 2546  
2541 2547  void
2542 2548  vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2543 2549  {
2544 2550          if (vp == NULL || vp->v_femhead == NULL) {
2545 2551                  return;
2546 2552          }
2547 2553          (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2548 2554  }
↓ open down ↓ 66 lines elided ↑ open up ↑
2615 2621  
2616 2622  void
2617 2623  vnevent_truncate(vnode_t *vp, caller_context_t *ct)
2618 2624  {
2619 2625          if (vp == NULL || vp->v_femhead == NULL) {
2620 2626                  return;
2621 2627          }
2622 2628          (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
2623 2629  }
2624 2630  
     2631 +void
     2632 +vnevent_resize(vnode_t *vp, caller_context_t *ct)
     2633 +{
     2634 +        if (vp == NULL || vp->v_femhead == NULL) {
     2635 +                return;
     2636 +        }
     2637 +        (void) VOP_VNEVENT(vp, VE_RESIZE, NULL, NULL, ct);
     2638 +}
     2639 +
2625 2640  /*
2626 2641   * Vnode accessors.
2627 2642   */
2628 2643  
2629 2644  int
2630 2645  vn_is_readonly(vnode_t *vp)
2631 2646  {
2632 2647          return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2633 2648  }
2634 2649  
↓ open down ↓ 619 lines elided ↑ open up ↑
3254 3269  }
3255 3270  
3256 3271  int
3257 3272  fop_read(
3258 3273          vnode_t *vp,
3259 3274          uio_t *uiop,
3260 3275          int ioflag,
3261 3276          cred_t *cr,
3262 3277          caller_context_t *ct)
3263 3278  {
3264      -        int     err;
3265 3279          ssize_t resid_start = uiop->uio_resid;
     3280 +        zone_t  *zonep = curzone;
     3281 +        zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
3266 3282  
     3283 +        hrtime_t start = 0, lat;
     3284 +        ssize_t len;
     3285 +        int err;
     3286 +
     3287 +        if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) &&
     3288 +            vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) {
     3289 +                start = gethrtime();
     3290 +
     3291 +                mutex_enter(&zonep->zone_vfs_lock);
     3292 +                kstat_runq_enter(&zonep->zone_vfs_rwstats);
     3293 +                mutex_exit(&zonep->zone_vfs_lock);
     3294 +        }
     3295 +
3267 3296          VOPXID_MAP_CR(vp, cr);
3268 3297  
3269 3298          err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
3270      -        VOPSTATS_UPDATE_IO(vp, read,
3271      -            read_bytes, (resid_start - uiop->uio_resid));
     3299 +        len = resid_start - uiop->uio_resid;
     3300 +
     3301 +        VOPSTATS_UPDATE_IO(vp, read, read_bytes, len);
     3302 +
     3303 +        if (start != 0) {
     3304 +                mutex_enter(&zonep->zone_vfs_lock);
     3305 +                zonep->zone_vfs_rwstats.reads++;
     3306 +                zonep->zone_vfs_rwstats.nread += len;
     3307 +                kstat_runq_exit(&zonep->zone_vfs_rwstats);
     3308 +                mutex_exit(&zonep->zone_vfs_lock);
     3309 +
     3310 +                lat = gethrtime() - start;
     3311 +
     3312 +                if (lat >= VOP_LATENCY_10MS) {
     3313 +                        if (lat < VOP_LATENCY_100MS)
     3314 +                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
     3315 +                        else if (lat < VOP_LATENCY_1S) {
     3316 +                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
     3317 +                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
     3318 +                        } else {
     3319 +                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
     3320 +                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
     3321 +                                atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
     3322 +                        }
     3323 +                }
     3324 +        }
     3325 +
3272 3326          return (err);
3273 3327  }
3274 3328  
3275 3329  int
3276 3330  fop_write(
3277 3331          vnode_t *vp,
3278 3332          uio_t *uiop,
3279 3333          int ioflag,
3280 3334          cred_t *cr,
3281 3335          caller_context_t *ct)
3282 3336  {
3283      -        int     err;
3284 3337          ssize_t resid_start = uiop->uio_resid;
     3338 +        zone_t  *zonep = curzone;
     3339 +        zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
3285 3340  
     3341 +        hrtime_t start = 0, lat;
     3342 +        ssize_t len;
     3343 +        int     err;
     3344 +
     3345 +        /*
     3346 +         * For the purposes of VFS kstat consumers, the "waitq" calculation is
     3347 +         * repurposed as the active queue for VFS write operations.  There's no
     3348 +         * actual wait queue for VFS operations.
     3349 +         */
     3350 +        if ((vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) &&
     3351 +            vp->v_vfsp != NULL && (vp->v_vfsp->vfs_flag & VFS_STATS)) {
     3352 +                start = gethrtime();
     3353 +
     3354 +                mutex_enter(&zonep->zone_vfs_lock);
     3355 +                kstat_waitq_enter(&zonep->zone_vfs_rwstats);
     3356 +                mutex_exit(&zonep->zone_vfs_lock);
     3357 +        }
     3358 +
3286 3359          VOPXID_MAP_CR(vp, cr);
3287 3360  
3288 3361          err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
3289      -        VOPSTATS_UPDATE_IO(vp, write,
3290      -            write_bytes, (resid_start - uiop->uio_resid));
     3362 +        len = resid_start - uiop->uio_resid;
     3363 +
     3364 +        VOPSTATS_UPDATE_IO(vp, write, write_bytes, len);
     3365 +
     3366 +        if (start != 0) {
     3367 +                mutex_enter(&zonep->zone_vfs_lock);
     3368 +                zonep->zone_vfs_rwstats.writes++;
     3369 +                zonep->zone_vfs_rwstats.nwritten += len;
     3370 +                kstat_waitq_exit(&zonep->zone_vfs_rwstats);
     3371 +                mutex_exit(&zonep->zone_vfs_lock);
     3372 +
     3373 +                lat = gethrtime() - start;
     3374 +
     3375 +                if (lat >= VOP_LATENCY_10MS) {
     3376 +                        if (lat < VOP_LATENCY_100MS)
     3377 +                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
     3378 +                        else if (lat < VOP_LATENCY_1S) {
     3379 +                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
     3380 +                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
     3381 +                        } else {
     3382 +                                atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
     3383 +                                atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
     3384 +                                atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
     3385 +                        }
     3386 +                }
     3387 +        }
     3388 +
3291 3389          return (err);
3292 3390  }
3293 3391  
3294 3392  int
3295 3393  fop_ioctl(
3296 3394          vnode_t *vp,
3297 3395          int cmd,
3298 3396          intptr_t arg,
3299 3397          int flag,
3300 3398          cred_t *cr,
↓ open down ↓ 1277 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX