1 #! /usr/bin/ksh -p
   2 # CDDL HEADER START
   3 #
   4 # The contents of this file are subject to the terms of the
   5 # Common Development and Distribution License (the "License").
   6 # You may not use this file except in compliance with the License.
   7 #
   8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9 # or http://www.opensolaris.org/os/licensing.
  10 # See the License for the specific language governing permissions
  11 # and limitations under the License.
  12 #
  13 # When distributing Covered Code, include this CDDL HEADER in each
  14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15 # If applicable, add the following below this CDDL HEADER, with the
  16 # fields enclosed by brackets "[]" replaced with your own identifying
  17 # information: Portions Copyright [yyyy] [name of copyright owner]
  18 #
  19 # CDDL HEADER END
  20 # Portions Copyright 2009 Sun Microsystems, Inc.
  21 # Portions Copyright 2009 Richard Elling
  22 #
  23 # File:   zilstat.ksh
  24 # Author: Richard.Elling@RichardElling.com
  25 # Online information: 
  26 #   http://www.RichardElling.com/Home/scripts-and-programs-1/zilstat-intro
  27 #
  28 # This dtrace program will help identify the ZIL activity by sampling
  29 # writes sent to the ZIL.
  30 # output:
  31 #     [TIME]
  32 #     Bytes - total bytes written to ZIL over the interval
  33 #     Bytes/S - bytes/s written to ZIL over ther interval
  34 #     Max-Rate - maximum rate during any 1-second sample
  35 # this output is listed for both the size of the data and the
  36 # size of the buffer containing the data.
  37 # In an attempt to reconcile writes > zfs_immediate_write_sz
  38 # or to otherwise make better decisions, the size of the buffer
  39 # is also represented in bins: <=4kBytes, 4-32kBytes, and > 32kBytes.
  40 # This should help you determine if the workload contains a bunch of
  41 # itty-bitty synchronous writes or just a few, large writes. This may
  42 # be important because if the pool does not have a log device and the
  43 # writes are > 32kBytes (zfs_immediate_write_sz) then they are not written
  44 # to the ZIL, but are instead written directly to the pool. OTOH, if there
  45 # is a separate log, then the writes are always sent to the log with the
  46 # expectation that the log device is always faster (lower latency) than
  47 # the pool. However, until a per-pool view is generated, the stats collected
  48 # here are for all pools.
  49 #
  50 # TODO: add per-pool option which also knows about zfs_immediate_write_sz
  51 #       logic.
  52 #
  53 ##############################
  54 # --- Process Arguments ---
  55 #
  56 
  57 ### default variables
  58 opt_mega=0
  59 opt_pool=0
  60 opt_time=0
  61 opt_txg=0
  62 filter=0
  63 pool=
  64 lines=-1
  65 interval=1
  66 count=-1
  67 
  68 ### process options
  69 while getopts hl:Mp:t name
  70 do
  71         case $name in
  72         l)  lines=$OPTARG ;;
  73         M)  opt_mega=1 ;;
  74         p)  opt_pool=1; pool=$OPTARG ;;
  75         t)  opt_time=1 ;;
  76         h|?)    ME=$(basename $0)
  77                 cat <<-END >&2
  78                 Usage: $ME [gMt][-l linecount] [-p poolname] [interval [count]]
  79     -M  # print numbers as megabytes (base 10)
  80     -t  # print timestamp
  81     -p poolname      # only look at poolname
  82     -l linecount    # print header every linecount lines (default=only once)
  83     interval in seconds or "txg" for transaction group commit intervals
  84              note: "txg" only appropriate when -p poolname is used
  85     count will limit the number of intervals reported
  86 
  87     examples:
  88         $ME # default output, 1 second samples
  89         $ME 10  # 10 second samples
  90         $ME 10 6    # print 6 x 10 second samples
  91         $ME -p rpool    # show ZIL stats for rpool only
  92 
  93     output:
  94         [TIME]
  95         N-Bytes    - data bytes written to ZIL over the interval
  96         N-Bytes/s  - data bytes/s written to ZIL over ther interval
  97         N-Max-Rate - maximum data rate during any 1-second sample
  98         B-Bytes    - buffer bytes written to ZIL over the interval
  99         B-Bytes/s  - buffer bytes/s written to ZIL over ther interval
 100         B-Max-Rate - maximum buffer rate during any 1-second sample
 101         ops        - number of synchronous iops per interval
 102         <=4kB      - number of synchronous iops <= 4kBytes per interval
 103         4-32kB     - number of synchronous iops 4-32kBytes per interval
 104         >=32kB     - number of synchronous iops >= 32kBytes per interval
 105     note: data bytes are actual data, total bytes counts buffer size
 106                 END
 107                 exit 1
 108         esac
 109 done
 110 
 111 shift $(( $OPTIND - 1 ))
 112 
 113 ### option logic
 114 if [[ "$1" > 0 ]]; then
 115         interval=$1; shift
 116 fi
 117 if [[ "$1" > 0 ]]; then
 118         count=$1; shift
 119 fi
 120 if (( opt_pool )); then
 121         filter=1
 122 fi
 123 
 124 case "$interval" in
 125     "txg")
 126         if [[ $opt_pool != 1 ]]; then
 127             echo "error: -p poolname option must be used for txg intervals"
 128             exit 1
 129         fi
 130         opt_txg=1
 131         interval=0
 132         ;;
 133     [0-9]*)
 134         ;;
 135     *)
 136         echo "error: interval must be a number or \"txg\""
 137         exit 1
 138         ;;
 139 esac
 140 
 141 ##############################
 142 # --- Main Program, DTrace ---
 143 
 144 /usr/sbin/dtrace -n '
 145 #pragma D option quiet
 146  inline int OPT_time = '$opt_time';
 147  inline int OPT_txg = '$opt_txg';
 148  inline int OPT_pool = '$opt_pool';
 149  inline int OPT_mega = '$opt_mega';
 150  inline int INTERVAL = '$interval';
 151  inline int LINES = '$lines';
 152  inline int COUNTER = '$count';
 153  inline int FILTER = '$filter';
 154  inline string POOL = "'$pool'";
 155  dtrace:::BEGIN
 156  {
 157     /* starting values */
 158     MEGA = 1000000;
 159     counts = COUNTER;
 160     secs = INTERVAL;
 161     interval = INTERVAL;
 162     interval == 0 ? interval++ : 1;
 163     line = 0;
 164     last_event[""] = 0;
 165     nused=0;
 166     nused_max_per_sec=0;
 167     nused_per_sec=0;
 168     size=0;
 169     size_max_per_sec=0;
 170     size_per_sec=0;
 171     syncops=0;
 172     size_4k=0;
 173     size_4k_32k=0;
 174     size_32k=0;
 175     OPT_txg ? printf("waiting for txg commit...\n") : 1;
 176  }
 177 
 178  /*
 179   * collect info when zil_lwb_write_start fires
 180   */
 181 fbt::zil_lwb_write_issue:entry
 182 /OPT_pool == 0 || POOL == args[0]->zl_dmu_pool->dp_spa->spa_name/
 183 {
 184      nused += args[1]->lwb_nused;
 185      nused_per_sec += args[1]->lwb_nused;
 186      size += args[1]->lwb_sz;
 187      size_per_sec += args[1]->lwb_sz;
 188      syncops++;
 189      args[1]->lwb_sz <= 4096 ? size_4k++ : 1;
 190      args[1]->lwb_sz > 4096 && args[1]->lwb_sz < 32768 ? size_4k_32k++ : 1;
 191      args[1]->lwb_sz >= 32768 ? size_32k++ : 1;
 192 }
 193 
 194 /*
 195  * Timer
 196  */
 197 profile:::tick-1sec
 198 {
 199         OPT_txg ? secs++ : secs--;
 200         nused_per_sec > nused_max_per_sec ? nused_max_per_sec = nused_per_sec : 1;
 201         nused_per_sec = 0;
 202         size_per_sec > size_max_per_sec ? size_max_per_sec = size_per_sec : 1;
 203         size_per_sec = 0;
 204 }
 205 
 206 /*
 207  * Print header
 208  */
 209 profile:::tick-1sec
 210 /OPT_txg == 0 && line == 0/
 211 {
 212         /* print optional headers */
 213         OPT_time   ? printf("%-20s ", "TIME")  : 1;
 214 
 215         /* print header */
 216         OPT_mega  ? printf("%10s %10s %10s %10s %10s %10s",
 217                 "N-MB", "N-MB/s", "N-Max-Rate",
 218                 "B-MB", "B-MB/s", "B-Max-Rate") :
 219             printf("%10s %10s %10s %10s %10s %10s",
 220                 "N-Bytes", "N-Bytes/s", "N-Max-Rate",
 221                 "B-Bytes", "B-Bytes/s", "B-Max-Rate");
 222         printf(" %6s %6s %6s %6s\n",
 223                 "ops", "<=4kB", "4-32kB", ">=32kB");
 224         line = LINES;
 225 }
 226 
 227  fbt::txg_quiesce:entry
 228  /OPT_txg == 1 && POOL == args[0]->dp_spa->spa_name && line == 0/
 229  {
 230         OPT_time  ? printf("%-20s ", "TIME")  : 1;
 231 
 232         OPT_mega  ? printf("%10s %10s %10s %10s %10s %10s %10s",
 233                 "txg", "N-MB", "N-MB/s", "N-Max-Rate",
 234                 "B-MB", "B-MB/s", "B-Max-Rate") :
 235             printf("%10s %10s %10s %10s %10s %10s %10s",
 236                 "txg", "N-Bytes", "N-Bytes/s", "N-Max-Rate",
 237                 "B-Bytes", "B-Bytes/s", "B-Max-Rate");
 238         printf(" %6s %6s %6s %6s\n",
 239                 "ops", "<=4kB", "4-32kB", ">=32kB");
 240         line = LINES;
 241 }
 242 
 243  /*
 244   * Print Output
 245   */
 246  profile:::tick-1sec
 247  /OPT_txg == 0 && secs == 0/
 248  {
 249         OPT_time  ? printf("%-20Y ", walltimestamp) : 1;
 250         OPT_mega  ?
 251             printf("%10d %10d %10d %10d %10d %10d",
 252                 nused/MEGA, nused/(interval*MEGA), nused_max_per_sec/MEGA,
 253                 size/MEGA, size/(interval*MEGA), size_max_per_sec/MEGA) :
 254             printf("%10d %10d %10d %10d %10d %10d",
 255                 nused, nused/interval, nused_max_per_sec,
 256                 size, size/interval, size_max_per_sec);
 257         printf(" %6d %6d %6d %6d\n",
 258                 syncops, size_4k, size_4k_32k, size_32k);
 259         nused = 0;
 260         nused_per_sec = 0;
 261         nused_max_per_sec = 0;
 262         size=0;
 263         size_max_per_sec=0;
 264         size_per_sec=0;
 265         syncops=0;
 266         size_4k=0;
 267         size_4k_32k=0;
 268         size_32k=0;
 269         secs = INTERVAL;
 270         counts--;
 271         line--;
 272  }
 273 
 274 fbt::txg_quiesce:entry
 275 /OPT_txg == 1 && POOL == args[0]->dp_spa->spa_name/
 276 {
 277         secs <= 0 ? secs=1 : 1;
 278         OPT_time ? printf("%-20Y ", walltimestamp) : 1;
 279         OPT_mega ?
 280             printf("%10d %10d %10d %10d %10d %10d %10d", args[1],
 281                 nused/MEGA, nused/(secs*MEGA), nused_max_per_sec/MEGA,
 282                 size/MEGA, size/(secs*MEGA), size_max_per_sec/MEGA) :
 283             printf("%10d %10d %10d %10d %10d %10d %10d", args[1],
 284                 nused, nused/secs, nused_max_per_sec,
 285                 size, size/secs, size_max_per_sec);
 286         printf(" %6d %6d %6d %6d\n",
 287                 syncops, size_4k, size_4k_32k, size_32k);
 288         nused = 0;
 289         nused_per_sec = 0;
 290         nused_max_per_sec = 0;
 291         size=0;
 292         size_max_per_sec=0;
 293         size_per_sec=0;
 294         syncops=0;
 295         size_4k=0;
 296         size_4k_32k=0;
 297         size_32k=0;
 298         secs = 0;
 299         counts--;
 300         line--;
 301  }
 302 
 303  /*
 304   * End of program
 305   */
 306  profile:::tick-1sec
 307  /OPT_txg == 0 && counts == 0/
 308  {
 309         exit(0);
 310  }
 311  fbt::txg_quiesce:entry
 312  /OPT_txg == 1 && counts == 0/
 313  {
 314     exit(0);
 315  }
 316 '