1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * rewritten from UCB 4.13 83/09/25
  27  * rewritten from SunOS 4.1 SID 1.18 89/10/06
  28  */
  29 /*
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  32  * Copyright 2016 James S. Blachly, MD. All rights reserved.
  33  */
  34 
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdarg.h>
  38 #include <ctype.h>
  39 #include <unistd.h>
  40 #include <memory.h>
  41 #include <errno.h>
  42 #include <string.h>
  43 #include <signal.h>
  44 #include <sys/types.h>
  45 #include <time.h>
  46 #include <sys/time.h>
  47 #include <sys/sysinfo.h>
  48 #include <inttypes.h>
  49 #include <strings.h>
  50 #include <sys/systeminfo.h>
  51 #include <kstat.h>
  52 #include <locale.h>
  53 
  54 #include "dsr.h"
  55 #include "statcommon.h"
  56 
  57 #define DISK_OLD                0x0001
  58 #define DISK_NEW                0x0002
  59 #define DISK_EXTENDED           0x0004
  60 #define DISK_ERRORS             0x0008
  61 #define DISK_EXTENDED_ERRORS    0x0010
  62 #define DISK_IOPATH_LI          0x0020  /* LunInitiator */
  63 #define DISK_IOPATH_LTI         0x0040  /* LunTargetInitiator */
  64 
  65 #define DISK_NORMAL             (DISK_OLD | DISK_NEW)
  66 #define DISK_IO_MASK            (DISK_OLD | DISK_NEW | DISK_EXTENDED)
  67 #define DISK_ERROR_MASK         (DISK_ERRORS | DISK_EXTENDED_ERRORS)
  68 #define PRINT_VERTICAL          (DISK_ERROR_MASK | DISK_EXTENDED)
  69 
  70 #define REPRINT 19
  71 
  72 #define NUMBER_OF_ERR_COUNTERS  3
  73 
  74 /*
  75  * It's really a pseudo-gigabyte. We use 1000000000 bytes so that the disk
  76  * labels don't look bad. 1GB is really 1073741824 bytes.
  77  */
  78 #define DISK_GIGABYTE   1000000000.0
  79 
  80 /*
  81  * Function desciptor to be called when extended
  82  * headers are used.
  83  */
  84 typedef struct formatter {
  85         void (*nfunc)(void);
  86         struct formatter *next;
  87 } format_t;
  88 
  89 /*
  90  * Used to get formatting right when printing tty/cpu
  91  * data to the right of disk data
  92  */
  93 enum show_disk_mode {
  94         SHOW_FIRST_ONLY,
  95         SHOW_SECOND_ONWARDS,
  96         SHOW_ALL
  97 };
  98 
  99 enum show_disk_mode show_disk_mode = SHOW_ALL;
 100 
 101 char *cmdname = "iostat";
 102 int caught_cont = 0;
 103 
 104 static char one_blank[] = " ";
 105 static char two_blanks[] = "  ";
 106 
 107 /*
 108  * count for number of lines to be emitted before a header is
 109  * shown again. Only used for the basic format.
 110  */
 111 static  uint_t  tohdr = 1;
 112 
 113 /*
 114  * If we're in raw format, have we printed a header? We only do it
 115  * once for raw but we emit it every REPRINT lines in non-raw format.
 116  * This applies only for the basic header. The extended header is
 117  * done only once in both formats.
 118  */
 119 static  uint_t  hdr_out;
 120 
 121 /*
 122  * Flags representing arguments from command line
 123  */
 124 static  uint_t  do_tty;                 /* show tty info (-t) */
 125 static  uint_t  do_disk;                /* show disk info per selected */
 126                                         /* format (-d, -D, -e, -E, -x -X -Y) */
 127 static  uint_t  do_cpu;                 /* show cpu info (-c) */
 128 static  uint_t  do_interval;            /* do intervals (-I) */
 129 static  int     do_partitions;          /* per-partition stats (-p) */
 130 static  int     do_partitions_only;     /* per-partition stats only (-P) */
 131 static  int     do_zfs;
 132 static  int     do_zfs_only;
 133                                         /* no per-device stats for disks */
 134 static  uint_t  do_conversions;         /* display disks as cXtYdZ (-n) */
 135 static  uint_t  do_megabytes;           /* display data in MB/sec (-M) */
 136 static  uint_t  do_controller;          /* display controller info (-C) */
 137 static  uint_t  do_raw;                 /* emit raw format (-r) */
 138 static  uint_t  timestamp_fmt = NODATE; /* timestamp  each display (-T) */
 139 static  uint_t  do_devid;               /* -E should show devid */
 140 
 141 /*
 142  * Default number of disk drives to be displayed in basic format
 143  */
 144 #define DEFAULT_LIMIT   4
 145 
 146 struct iodev_filter df;
 147 
 148 static  uint_t  suppress_state;         /* skip state change messages */
 149 static  uint_t  suppress_zero;          /* skip zero valued lines */
 150 static  uint_t  show_mountpts;          /* show mount points */
 151 static  int     interval;               /* interval (seconds) to output */
 152 static  int     iter;                   /* iterations from command line */
 153 
 154 #define SMALL_SCRATCH_BUFLEN    MAXNAMELEN
 155 
 156 static int      iodevs_nl;              /* name field width */
 157 #define IODEVS_NL_MIN           6       /* not too thin for "device" */
 158 #define IODEVS_NL_MAX           64      /* but keep full width under 80 */
 159 
 160 static  char    disk_header[132];
 161 static  uint_t  dh_len;                 /* disk header length for centering */
 162 static  int     lineout;                /* data waiting to be printed? */
 163 
 164 static struct snapshot *newss;
 165 static struct snapshot *oldss;
 166 static  double  getime;                 /* elapsed time */
 167 static  double  percent;                /* 100 / etime */
 168 
 169 /*
 170  * List of functions to be called which will construct the desired output
 171  */
 172 static format_t *formatter_list;
 173 static format_t *formatter_end;
 174 
 175 static u_longlong_t     ull_delta(u_longlong_t, u_longlong_t);
 176 static uint_t   u32_delta(uint_t, uint_t);
 177 static void setup(void (*nfunc)(void));
 178 static void print_tty_hdr1(void);
 179 static void print_tty_hdr2(void);
 180 static void print_cpu_hdr1(void);
 181 static void print_cpu_hdr2(void);
 182 static void print_tty_data(void);
 183 static void print_cpu_data(void);
 184 static void print_err_hdr(void);
 185 static void print_disk_header(void);
 186 static void hdrout(void);
 187 static void disk_errors(void);
 188 static void do_newline(void);
 189 static void push_out(const char *, ...);
 190 static void printhdr(int);
 191 static void printxhdr(void);
 192 static void usage(void);
 193 static void do_args(int, char **);
 194 static void do_format(void);
 195 static void show_all_disks(void);
 196 static void show_first_disk(void);
 197 static void show_other_disks(void);
 198 static void show_disk_errors(void *, void *, void *);
 199 static void write_core_header(void);
 200 static int  fzero(double value);
 201 static int  safe_strtoi(char const *val, char *errmsg);
 202 
 203 int
 204 main(int argc, char **argv)
 205 {
 206         enum snapshot_types types = SNAP_SYSTEM;
 207         kstat_ctl_t *kc;
 208         long hz;
 209         int forever;
 210         hrtime_t start_n;
 211         hrtime_t period_n;
 212 
 213         (void) setlocale(LC_ALL, "");
 214 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
 215 #define TEXT_DOMAIN "SYS_TEST"          /* Use this only if it weren't */
 216 #endif
 217         (void) textdomain(TEXT_DOMAIN);
 218 
 219         do_args(argc, argv);
 220 
 221         /*
 222          * iostat historically showed CPU changes, even though
 223          * it doesn't provide much useful information
 224          */
 225         types |= SNAP_CPUS;
 226 
 227         if (do_disk)
 228                 types |= SNAP_IODEVS;
 229 
 230         if (do_disk && !do_partitions_only && !do_zfs_only)
 231                 df.if_allowed_types |= IODEV_DISK;
 232         if (do_disk & DISK_IOPATH_LI) {
 233                 df.if_allowed_types |= IODEV_IOPATH_LTI;
 234                 types |= SNAP_IOPATHS_LI;
 235         }
 236         if (do_disk & DISK_IOPATH_LTI) {
 237                 df.if_allowed_types |= IODEV_IOPATH_LTI;
 238                 types |= SNAP_IOPATHS_LTI;
 239         }
 240         if (do_disk & DISK_ERROR_MASK)
 241                 types |= SNAP_IODEV_ERRORS;
 242         if (do_partitions || do_partitions_only)
 243                 df.if_allowed_types |= IODEV_PARTITION;
 244         if (do_zfs || do_zfs_only)
 245                 df.if_allowed_types |= IODEV_ZFS;
 246         if (do_conversions)
 247                 types |= SNAP_IODEV_PRETTY;
 248         if (do_devid)
 249                 types |= SNAP_IODEV_DEVID;
 250         if (do_controller) {
 251                 if (!(do_disk & PRINT_VERTICAL) ||
 252                     (do_disk & DISK_EXTENDED_ERRORS))
 253                         fail(0, "-C can only be used with -e or -x.");
 254                 types |= SNAP_CONTROLLERS;
 255                 df.if_allowed_types |= IODEV_CONTROLLER;
 256         }
 257 
 258         hz = sysconf(_SC_CLK_TCK);
 259 
 260         /*
 261          * Undocumented behavior - sending a SIGCONT will result
 262          * in a new header being emitted. Used only if we're not
 263          * doing extended headers. This is a historical
 264          * artifact.
 265          */
 266         if (!(do_disk & PRINT_VERTICAL))
 267                 (void) signal(SIGCONT, printhdr);
 268 
 269         if (interval)
 270                 period_n = (hrtime_t)interval * NANOSEC;
 271 
 272         kc = open_kstat();
 273         if (interval)
 274                 start_n = gethrtime();
 275         newss = acquire_snapshot(kc, types, &df);
 276 
 277         /* compute width of "device" field */
 278         iodevs_nl = newss->s_iodevs_is_name_maxlen;
 279         iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
 280             IODEVS_NL_MIN : iodevs_nl;
 281         iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
 282             IODEVS_NL_MAX : iodevs_nl;
 283 
 284         do_format();
 285 
 286         forever = (iter == 0);
 287         do {
 288                 if (do_conversions && show_mountpts)
 289                         do_mnttab();
 290 
 291                 if (do_tty || do_cpu) {
 292                         kstat_t *oldks;
 293                         oldks = oldss ? &oldss->s_sys.ss_agg_sys : NULL;
 294                         getime = cpu_ticks_delta(oldks,
 295                             &newss->s_sys.ss_agg_sys);
 296                         percent = (getime > 0.0) ? 100.0 / getime : 0.0;
 297                         getime = (getime / nr_active_cpus(newss)) / hz;
 298                         if (getime == 0.0)
 299                                 getime = (double)interval;
 300                         if (getime == 0.0 || do_interval)
 301                                 getime = 1.0;
 302                 }
 303 
 304                 if (formatter_list) {
 305                         format_t *tmp;
 306                         tmp = formatter_list;
 307 
 308                         if (timestamp_fmt != NODATE)
 309                                 print_timestamp(timestamp_fmt);
 310 
 311                         while (tmp) {
 312                                 (tmp->nfunc)();
 313                                 tmp = tmp->next;
 314                         }
 315                         (void) fflush(stdout);
 316                 }
 317 
 318                 /* only remaining/doing a single iteration, we are done */
 319                 if (iter == 1)
 320                         continue;
 321 
 322                 if (interval > 0)
 323                         /* Have a kip */
 324                         sleep_until(&start_n, period_n, forever, &caught_cont);
 325 
 326                 free_snapshot(oldss);
 327                 oldss = newss;
 328                 newss = acquire_snapshot(kc, types, &df);
 329                 iodevs_nl = (newss->s_iodevs_is_name_maxlen > iodevs_nl) ?
 330                     newss->s_iodevs_is_name_maxlen : iodevs_nl;
 331                 iodevs_nl = (iodevs_nl < IODEVS_NL_MIN) ?
 332                     IODEVS_NL_MIN : iodevs_nl;
 333                 iodevs_nl = (iodevs_nl > IODEVS_NL_MAX) ?
 334                     IODEVS_NL_MAX : iodevs_nl;
 335 
 336                 if (!suppress_state)
 337                         snapshot_report_changes(oldss, newss);
 338 
 339                 /* if config changed, show stats from boot */
 340                 if (snapshot_has_changed(oldss, newss)) {
 341                         free_snapshot(oldss);
 342                         oldss = NULL;
 343                 }
 344 
 345         } while (--iter);
 346 
 347         free_snapshot(oldss);
 348         free_snapshot(newss);
 349         (void) kstat_close(kc);
 350         free(df.if_names);
 351         return (0);
 352 }
 353 
 354 /*
 355  * Some magic numbers used in header formatting.
 356  *
 357  * DISK_LEN = length of either "kps tps serv" or "wps rps util"
 358  *            using 0 as the first position
 359  *
 360  * DISK_ERROR_LEN = length of "s/w h/w trn tot" with one space on
 361  *              either side. Does not use zero as first pos.
 362  *
 363  * DEVICE_LEN = length of "device" + 1 character.
 364  */
 365 
 366 #define DISK_LEN        11
 367 #define DISK_ERROR_LEN  16
 368 #define DEVICE_LEN      7
 369 
 370 /*ARGSUSED*/
 371 static void
 372 show_disk_name(void *v1, void *v2, void *data)
 373 {
 374         struct iodev_snapshot *dev = (struct iodev_snapshot *)v2;
 375         size_t slen;
 376         char *name;
 377         char fbuf[SMALL_SCRATCH_BUFLEN];
 378 
 379         if (dev == NULL)
 380                 return;
 381 
 382         name = do_conversions ? dev->is_pretty : dev->is_name;
 383         name = name ? name : dev->is_name;
 384 
 385         if (!do_raw) {
 386                 uint_t width;
 387 
 388                 slen = strlen(name);
 389                 /*
 390                  * The length is less
 391                  * than the section
 392                  * which will be displayed
 393                  * on the next line.
 394                  * Center the entry.
 395                  */
 396 
 397                 width = (DISK_LEN + 1)/2 + (slen / 2);
 398                 (void) snprintf(fbuf, sizeof (fbuf),
 399                     "%*s", width, name);
 400                 name = fbuf;
 401                 push_out("%-13.13s ", name);
 402         } else {
 403                 push_out(name);
 404         }
 405 }
 406 
 407 /*ARGSUSED*/
 408 static void
 409 show_disk_header(void *v1, void *v2, void *data)
 410 {
 411         push_out(disk_header);
 412 }
 413 
 414 /*
 415  * Write out a two line header. What is written out depends on the flags
 416  * selected but in the worst case consists of a tty header, a disk header
 417  * providing information for 4 disks and a cpu header.
 418  *
 419  * The tty header consists of the word "tty" on the first line above the
 420  * words "tin tout" on the next line. If present the tty portion consumes
 421  * the first 10 characters of each line since "tin tout" is surrounded
 422  * by single spaces.
 423  *
 424  * Each of the disk sections is a 14 character "block" in which the name of
 425  * the disk is centered in the first 12 characters of the first line.
 426  *
 427  * The cpu section is an 11 character block with "cpu" centered over the
 428  * section.
 429  *
 430  * The worst case should look as follows:
 431  *
 432  * 0---------1--------2---------3---------4---------5---------6---------7-------
 433  *    tty        sd0           sd1           sd2           sd3           cpu
 434  *  tin tout kps tps serv  kps tps serv  kps tps serv  kps tps serv  us sy dt id
 435  *  NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NN NN NN NN
 436  *
 437  * When -D is specified, the disk header looks as follows (worst case):
 438  *
 439  * 0---------1--------2---------3---------4---------5---------6---------7-------
 440  *     tty        sd0           sd1             sd2          sd3          cpu
 441  *   tin tout rps wps util  rps wps util  rps wps util  rps wps util us sy dt id
 442  *   NNN NNNN NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN  NNN NNN NNNN NN NN NN NN
 443  */
 444 static void
 445 printhdr(int sig)
 446 {
 447         /*
 448          * If we're here because a signal fired, reenable the
 449          * signal.
 450          */
 451         if (sig)
 452                 (void) signal(SIGCONT, printhdr);
 453         if (sig == SIGCONT)
 454                 caught_cont = 1;
 455         /*
 456          * Horizontal mode headers
 457          *
 458          * First line
 459          */
 460         if (do_tty)
 461                 print_tty_hdr1();
 462 
 463         if (do_disk & DISK_NORMAL) {
 464                 (void) snapshot_walk(SNAP_IODEVS, NULL, newss,
 465                     show_disk_name, NULL);
 466         }
 467 
 468         if (do_cpu)
 469                 print_cpu_hdr1();
 470         do_newline();
 471 
 472         /*
 473          * Second line
 474          */
 475         if (do_tty)
 476                 print_tty_hdr2();
 477 
 478         if (do_disk & DISK_NORMAL) {
 479                 (void) snapshot_walk(SNAP_IODEVS, NULL, newss,
 480                     show_disk_header, NULL);
 481         }
 482 
 483         if (do_cpu)
 484                 print_cpu_hdr2();
 485         do_newline();
 486 
 487         tohdr = REPRINT;
 488 }
 489 
 490 /*
 491  * Write out the extended header centered over the core information.
 492  */
 493 static void
 494 write_core_header(void)
 495 {
 496         char *edev = "extended device statistics";
 497         uint_t lead_space_ct;
 498         uint_t follow_space_ct;
 499         size_t edevlen;
 500 
 501         if (do_raw == 0) {
 502                 /*
 503                  * The things we do to look nice...
 504                  *
 505                  * Center the core output header. Make sure we have the
 506                  * right number of trailing spaces for follow-on headers
 507                  * (i.e., cpu and/or tty and/or errors).
 508                  */
 509                 edevlen = strlen(edev);
 510                 lead_space_ct = dh_len - edevlen;
 511                 lead_space_ct /= 2;
 512                 if (lead_space_ct > 0) {
 513                         follow_space_ct = dh_len - (lead_space_ct + edevlen);
 514                         if (do_disk & DISK_ERRORS)
 515                                 follow_space_ct -= DISK_ERROR_LEN;
 516                         if ((do_disk & DISK_EXTENDED) && do_conversions)
 517                                 follow_space_ct -= DEVICE_LEN;
 518 
 519                         push_out("%1$*2$.*2$s%3$s%4$*5$.*5$s", one_blank,
 520                             lead_space_ct, edev, one_blank, follow_space_ct);
 521                 } else
 522                         push_out("%56s", edev);
 523         } else
 524                 push_out(edev);
 525 }
 526 
 527 /*
 528  * In extended mode headers, we don't want to reprint the header on
 529  * signals as they are printed every time anyways.
 530  */
 531 static void
 532 printxhdr(void)
 533 {
 534 
 535         /*
 536          * Vertical mode headers
 537          */
 538         if (do_disk & DISK_EXTENDED)
 539                 setup(write_core_header);
 540         if (do_disk & DISK_ERRORS)
 541                 setup(print_err_hdr);
 542 
 543         if (do_conversions) {
 544                 setup(do_newline);
 545                 if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
 546                         setup(print_disk_header);
 547                 setup(do_newline);
 548         } else {
 549                 if (do_tty)
 550                         setup(print_tty_hdr1);
 551                 if (do_cpu)
 552                         setup(print_cpu_hdr1);
 553                 setup(do_newline);
 554 
 555                 if (do_disk & (DISK_EXTENDED | DISK_ERRORS))
 556                         setup(print_disk_header);
 557                 if (do_tty)
 558                         setup(print_tty_hdr2);
 559                 if (do_cpu)
 560                         setup(print_cpu_hdr2);
 561                 setup(do_newline);
 562         }
 563 }
 564 
 565 /*
 566  * Write out a line for this disk - note that show_disk writes out
 567  * full lines or blocks for each selected disk.
 568  */
 569 static void
 570 show_disk(void *v1, void *v2, void *data)
 571 {
 572         uint32_t err_counters[NUMBER_OF_ERR_COUNTERS];
 573         boolean_t display_err_counters = do_disk & DISK_ERRORS;
 574         struct iodev_snapshot *old = (struct iodev_snapshot *)v1;
 575         struct iodev_snapshot *new = (struct iodev_snapshot *)v2;
 576         int *count = (int *)data;
 577         double rps, wps, tps, mtps, krps, kwps, kps, avw, avr, w_pct, r_pct;
 578         double wserv, rserv, serv;
 579         double iosize;  /* kb/sec or MB/sec */
 580         double etime, hr_etime;
 581         char *disk_name;
 582         u_longlong_t ldeltas;
 583         uint_t udeltas;
 584         uint64_t t_delta;
 585         uint64_t w_delta;
 586         uint64_t r_delta;
 587         int doit = 1;
 588         uint_t toterrs;
 589         char *fstr;
 590 
 591         if (new == NULL)
 592                 return;
 593 
 594         switch (show_disk_mode) {
 595         case SHOW_FIRST_ONLY:
 596                 if (count != NULL && *count)
 597                         return;
 598                 break;
 599 
 600         case SHOW_SECOND_ONWARDS:
 601                 if (count != NULL && !*count) {
 602                         (*count)++;
 603                         return;
 604                 }
 605                 break;
 606 
 607         default:
 608                 break;
 609         }
 610 
 611         disk_name = do_conversions ? new->is_pretty : new->is_name;
 612         disk_name = disk_name ? disk_name : new->is_name;
 613 
 614         /*
 615          * Only do if we want IO stats - Avoids errors traveling this
 616          * section if that's all we want to see.
 617          */
 618         if (do_disk & DISK_IO_MASK) {
 619                 if (old) {
 620                         t_delta = hrtime_delta(old->is_snaptime,
 621                             new->is_snaptime);
 622                 } else {
 623                         t_delta = hrtime_delta(new->is_crtime,
 624                             new->is_snaptime);
 625                 }
 626 
 627                 if (new->is_nr_children) {
 628                         if (new->is_type == IODEV_CONTROLLER) {
 629                                 t_delta /= new->is_nr_children;
 630                         } else if ((new->is_type == IODEV_IOPATH_LT) ||
 631                             (new->is_type == IODEV_IOPATH_LI)) {
 632                                 /* synthetic path */
 633                                 if (!old) {
 634                                         t_delta = new->is_crtime;
 635                                 }
 636                                 t_delta /= new->is_nr_children;
 637                         }
 638                 }
 639 
 640                 hr_etime = (double)t_delta;
 641                 if (hr_etime == 0.0)
 642                         hr_etime = (double)NANOSEC;
 643                 etime = hr_etime / (double)NANOSEC;
 644 
 645                 /* reads per second */
 646                 udeltas = u32_delta(old ? old->is_stats.reads : 0,
 647                     new->is_stats.reads);
 648                 rps = (double)udeltas;
 649                 rps /= etime;
 650 
 651                 /* writes per second */
 652                 udeltas = u32_delta(old ? old->is_stats.writes : 0,
 653                     new->is_stats.writes);
 654                 wps = (double)udeltas;
 655                 wps /= etime;
 656 
 657                 tps = rps + wps;
 658                         /* transactions per second */
 659 
 660                 /*
 661                  * report throughput as either kb/sec or MB/sec
 662                  */
 663 
 664                 if (!do_megabytes)
 665                         iosize = 1024.0;
 666                 else
 667                         iosize = 1048576.0;
 668 
 669                 ldeltas = ull_delta(old ? old->is_stats.nread : 0,
 670                     new->is_stats.nread);
 671                 if (ldeltas) {
 672                         krps = (double)ldeltas;
 673                         krps /= etime;
 674                         krps /= iosize;
 675                 } else
 676                         krps = 0.0;
 677 
 678                 ldeltas = ull_delta(old ? old->is_stats.nwritten : 0,
 679                     new->is_stats.nwritten);
 680                 if (ldeltas) {
 681                         kwps = (double)ldeltas;
 682                         kwps /= etime;
 683                         kwps /= iosize;
 684                 } else
 685                         kwps = 0.0;
 686 
 687                 /*
 688                  * Blocks transferred per second
 689                  */
 690                 kps = krps + kwps;
 691 
 692                 /*
 693                  * Average number of wait transactions waiting
 694                  */
 695                 w_delta = hrtime_delta((u_longlong_t)
 696                     (old ? old->is_stats.wlentime : 0),
 697                     new->is_stats.wlentime);
 698                 if (w_delta) {
 699                         avw = (double)w_delta;
 700                         avw /= hr_etime;
 701                 } else
 702                         avw = 0.0;
 703 
 704                 /*
 705                  * Average number of run transactions waiting
 706                  */
 707                 r_delta = hrtime_delta(old ? old->is_stats.rlentime : 0,
 708                     new->is_stats.rlentime);
 709                 if (r_delta) {
 710                         avr = (double)r_delta;
 711                         avr /= hr_etime;
 712                 } else
 713                         avr = 0.0;
 714 
 715                 /*
 716                  * Average wait service time in milliseconds
 717                  */
 718                 if (tps > 0.0 && (avw != 0.0 || avr != 0.0)) {
 719                         mtps = 1000.0 / tps;
 720                         if (avw != 0.0)
 721                                 wserv = avw * mtps;
 722                         else
 723                                 wserv = 0.0;
 724 
 725                         if (avr != 0.0)
 726                                 rserv = avr * mtps;
 727                         else
 728                                 rserv = 0.0;
 729                         serv = rserv + wserv;
 730                 } else {
 731                         rserv = 0.0;
 732                         wserv = 0.0;
 733                         serv = 0.0;
 734                 }
 735 
 736                 /* % of time there is a transaction waiting for service */
 737                 t_delta = hrtime_delta(old ? old->is_stats.wtime : 0,
 738                     new->is_stats.wtime);
 739                 if (t_delta) {
 740                         w_pct = (double)t_delta;
 741                         w_pct /= hr_etime;
 742                         w_pct *= 100.0;
 743 
 744                         /*
 745                          * Average the wait queue utilization over the
 746                          * the controller's devices, if this is a controller.
 747                          */
 748                         if (new->is_type == IODEV_CONTROLLER)
 749                                 w_pct /= new->is_nr_children;
 750                 } else
 751                         w_pct = 0.0;
 752 
 753                 /* % of time there is a transaction running */
 754                 t_delta = hrtime_delta(old ? old->is_stats.rtime : 0,
 755                     new->is_stats.rtime);
 756                 if (t_delta) {
 757                         r_pct = (double)t_delta;
 758                         r_pct /= hr_etime;
 759                         r_pct *= 100.0;
 760 
 761                         /*
 762                          * Average the percent busy over the controller's
 763                          * devices, if this is a controller.
 764                          */
 765                         if (new->is_type == IODEV_CONTROLLER)
 766                                 w_pct /= new->is_nr_children;
 767                 } else {
 768                         r_pct = 0.0;
 769                 }
 770 
 771                 /* % of time there is a transaction running */
 772                 if (do_interval) {
 773                         rps     *= etime;
 774                         wps     *= etime;
 775                         tps     *= etime;
 776                         krps    *= etime;
 777                         kwps    *= etime;
 778                         kps     *= etime;
 779                 }
 780         }
 781 
 782         if (do_disk & (DISK_EXTENDED | DISK_ERRORS)) {
 783                 if ((!do_conversions) && ((suppress_zero == 0) ||
 784                     ((do_disk & DISK_EXTENDED) == 0))) {
 785                         if (do_raw == 0) {
 786                                 push_out("%-*.*s",
 787                                     iodevs_nl, iodevs_nl, disk_name);
 788                         } else {
 789                                 push_out(disk_name);
 790                         }
 791                 }
 792         }
 793 
 794         /*
 795          * The error counters are read first (if asked for and if they are
 796          * available).
 797          */
 798         bzero(err_counters, sizeof (err_counters));
 799         toterrs = 0;
 800         if (display_err_counters && (new->is_errors.ks_data != NULL)) {
 801                 kstat_named_t   *knp;
 802                 int             i;
 803 
 804                 knp = KSTAT_NAMED_PTR(&new->is_errors);
 805                 for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++) {
 806                         switch (knp[i].data_type) {
 807                                 case KSTAT_DATA_ULONG:
 808                                 case KSTAT_DATA_ULONGLONG:
 809                                         err_counters[i] = knp[i].value.ui32;
 810                                         toterrs += knp[i].value.ui32;
 811                                         break;
 812                                 default:
 813                                         break;
 814                         }
 815                 }
 816         }
 817 
 818         switch (do_disk & DISK_IO_MASK) {
 819         case DISK_OLD:
 820                 if (do_raw == 0)
 821                         fstr = "%3.0f %3.0f %4.0f  ";
 822                 else
 823                         fstr = "%.0f,%.0f,%.0f";
 824                 push_out(fstr, kps, tps, serv);
 825                 break;
 826         case DISK_NEW:
 827                 if (do_raw == 0)
 828                         fstr = "%3.0f %3.0f %4.1f  ";
 829                 else
 830                         fstr = "%.0f,%.0f,%.1f";
 831                 push_out(fstr, rps, wps, r_pct);
 832                 break;
 833         case DISK_EXTENDED:
 834                 if (suppress_zero) {
 835                         if (fzero(rps) && fzero(wps) && fzero(krps) &&
 836                             fzero(kwps) && fzero(avw) && fzero(avr) &&
 837                             fzero(serv) && fzero(w_pct) && fzero(r_pct) &&
 838                             (toterrs == 0)) {
 839                                 doit = 0;
 840                                 display_err_counters = B_FALSE;
 841                         } else if (do_conversions == 0) {
 842                                 if (do_raw == 0) {
 843                                         push_out("%-*.*s",
 844                                             iodevs_nl, iodevs_nl, disk_name);
 845                                 } else {
 846                                         push_out(disk_name);
 847                                 }
 848                         }
 849                 }
 850                 if (doit) {
 851                         if (!do_conversions) {
 852                                 if (do_raw == 0) {
 853                                         fstr = " %6.1f %6.1f %6.1f %6.1f "
 854                                             "%4.1f %4.1f %6.1f %3.0f "
 855                                             "%3.0f ";
 856                                 } else {
 857                                         fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
 858                                             "%.1f,%.0f,%.0f";
 859                                 }
 860                                 push_out(fstr, rps, wps, krps, kwps, avw, avr,
 861                                     serv, w_pct, r_pct);
 862                         } else {
 863                                 if (do_raw == 0) {
 864                                         fstr = " %6.1f %6.1f %6.1f %6.1f "
 865                                             "%4.1f %4.1f %6.1f %6.1f "
 866                                             "%3.0f %3.0f ";
 867                                 } else {
 868                                         fstr = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,"
 869                                             "%.1f,%.1f,%.0f,%.0f";
 870                                 }
 871                                 push_out(fstr, rps, wps, krps, kwps, avw, avr,
 872                                     wserv, rserv, w_pct, r_pct);
 873                         }
 874                 }
 875                 break;
 876         }
 877 
 878         if (display_err_counters) {
 879                 char    *efstr;
 880                 int     i;
 881 
 882                 if (do_raw == 0) {
 883                         if (do_disk == DISK_ERRORS)
 884                                 push_out(two_blanks);
 885                         efstr = "%3u ";
 886                 } else {
 887                         efstr = "%u";
 888                 }
 889 
 890                 for (i = 0; i < NUMBER_OF_ERR_COUNTERS; i++)
 891                         push_out(efstr, err_counters[i]);
 892 
 893                 push_out(efstr, toterrs);
 894         }
 895 
 896         if (suppress_zero == 0 || doit == 1) {
 897                 if ((do_disk & (DISK_EXTENDED | DISK_ERRORS)) &&
 898                     do_conversions) {
 899                         push_out("%s", disk_name);
 900                         if (show_mountpts && new->is_dname) {
 901                                 mnt_t *mount_pt;
 902                                 char *lu;
 903                                 char *dnlu;
 904                                 char lub[SMALL_SCRATCH_BUFLEN];
 905 
 906                                 lu = strrchr(new->is_dname, '/');
 907                                 if (lu) {
 908                                         /* only the part after a possible '/' */
 909                                         dnlu = strrchr(disk_name, '/');
 910                                         if (dnlu != NULL &&
 911                                             strcmp(dnlu, lu) == 0)
 912                                                 lu = new->is_dname;
 913                                         else {
 914                                                 *lu = 0;
 915                                                 (void) strcpy(lub,
 916                                                     new->is_dname);
 917                                                 *lu = '/';
 918                                                 (void) strcat(lub, "/");
 919                                                 (void) strcat(lub,
 920                                                     disk_name);
 921                                                 lu = lub;
 922                                         }
 923                                 } else
 924                                         lu = disk_name;
 925                                 mount_pt = lookup_mntent_byname(lu);
 926                                 if (mount_pt) {
 927                                         if (do_raw == 0)
 928                                                 push_out(" (%s)",
 929                                                     mount_pt->mount_point);
 930                                         else
 931                                                 push_out("(%s)",
 932                                                     mount_pt->mount_point);
 933                                 }
 934                         }
 935                 }
 936         }
 937 
 938         if ((do_disk & PRINT_VERTICAL) && show_disk_mode != SHOW_FIRST_ONLY)
 939                 do_newline();
 940 
 941         if (count != NULL)
 942                 (*count)++;
 943 }
 944 
 945 static void
 946 usage(void)
 947 {
 948         (void) fprintf(stderr,
 949             "Usage: iostat [-cCdDeEiImMnpPrstxXYz] "
 950             " [-l n] [-T d|u] [disk ...] [interval [count]]\n"
 951             "\t\t-c:    report percentage of time system has spent\n"
 952             "\t\t\tin user/system/dtrace/idle mode\n"
 953             "\t\t-C:    report disk statistics by controller\n"
 954             "\t\t-d:    display disk Kb/sec, transfers/sec, avg. \n"
 955             "\t\t\tservice time in milliseconds  \n"
 956             "\t\t-D:    display disk reads/sec, writes/sec, \n"
 957             "\t\t\tpercentage disk utilization \n"
 958             "\t\t-e:    report device error summary statistics\n"
 959             "\t\t-E:    report extended device error statistics\n"
 960             "\t\t-i:    show device IDs for -E output\n"
 961             "\t\t-I:    report the counts in each interval,\n"
 962             "\t\t\tinstead of rates, where applicable\n"
 963             "\t\t-l n:  Limit the number of disks to n\n"
 964             "\t\t-m:    Display mount points (most useful with -p)\n"
 965             "\t\t-M:    Display data throughput in MB/sec "
 966             "instead of Kb/sec\n"
 967             "\t\t-n:    convert device names to cXdYtZ format\n"
 968             "\t\t-p:    report per-partition disk statistics\n"
 969             "\t\t-P:    report per-partition disk statistics only,\n"
 970             "\t\t\tno per-device disk statistics\n"
 971             "\t\t-f:    report ZFS-level statistics for ZFS pool and\n"
 972             "\t\t\tindividual vdevs\n"
 973             "\t\t-F:    report ZFS pool and individual physical vdevs\n"
 974             "\t\t\tstatistics only, no per-device statistics\n"
 975             "\t\t-r:    Display data in comma separated format\n"
 976             "\t\t-s:    Suppress state change messages\n"
 977             "\t\t-T d|u Display a timestamp in date (d) or unix "
 978             "time_t (u)\n"
 979             "\t\t-t:    display chars read/written to terminals\n"
 980             "\t\t-x:    display extended disk statistics\n"
 981             "\t\t-X:    display I/O path statistics\n"
 982             "\t\t-Y:    display I/O path (I/T/L) statistics\n"
 983             "\t\t-z:    Suppress entries with all zero values\n");
 984         exit(1);
 985 }
 986 
 987 /*ARGSUSED*/
 988 static void
 989 show_disk_errors(void *v1, void *v2, void *d)
 990 {
 991         struct iodev_snapshot *disk = (struct iodev_snapshot *)v2;
 992         kstat_named_t *knp;
 993         size_t  col;
 994         int     i, len;
 995         char    *dev_name;
 996 
 997         if (disk->is_errors.ks_ndata == 0)
 998                 return;
 999         if (disk->is_type == IODEV_CONTROLLER)
1000                 return;
1001 
1002         dev_name = do_conversions ? disk->is_pretty : disk->is_name;
1003         dev_name = dev_name ? dev_name : disk->is_name;
1004 
1005         len = strlen(dev_name);
1006         if (len > 20)
1007                 push_out("%s ", dev_name);
1008         else if (len > 16)
1009                 push_out("%-20.20s ", dev_name);
1010         else {
1011                 if (do_conversions)
1012                         push_out("%-16.16s ", dev_name);
1013                 else
1014                         push_out("%-9.9s ", dev_name);
1015         }
1016         col = 0;
1017 
1018         knp = KSTAT_NAMED_PTR(&disk->is_errors);
1019         for (i = 0; i < disk->is_errors.ks_ndata; i++) {
1020                 /* skip kstats that the driver did not kstat_named_init */
1021                 if (knp[i].name[0] == 0)
1022                         continue;
1023 
1024                 col += strlen(knp[i].name);
1025 
1026                 switch (knp[i].data_type) {
1027                         case KSTAT_DATA_CHAR:
1028                         case KSTAT_DATA_STRING:
1029                                 if ((strcmp(knp[i].name, "Serial No") == 0) &&
1030                                     do_devid) {
1031                                         if (disk->is_devid) {
1032                                                 push_out("Device Id: %s ",
1033                                                     disk->is_devid);
1034                                                 col += strlen(disk->is_devid);
1035                                         } else {
1036                                                 push_out("Device Id: ");
1037                                         }
1038 
1039                                         break;
1040                                 }
1041                                 if (knp[i].data_type == KSTAT_DATA_CHAR) {
1042                                         push_out("%s: %-.16s ", knp[i].name,
1043                                             &knp[i].value.c[0]);
1044                                         col += strnlen(&knp[i].value.c[0], 16);
1045                                 } else {
1046                                         push_out("%s: %s ", knp[i].name,
1047                                             KSTAT_NAMED_STR_PTR(&knp[i]));
1048                                         col +=
1049                                             KSTAT_NAMED_STR_BUFLEN(&knp[i]) - 1;
1050                                 }
1051                                 break;
1052                         case KSTAT_DATA_ULONG:
1053                                 push_out("%s: %u ", knp[i].name,
1054                                     knp[i].value.ui32);
1055                                 col += 4;
1056                                 break;
1057                         case KSTAT_DATA_ULONGLONG:
1058                                 if (strcmp(knp[i].name, "Size") == 0) {
1059                                         do_newline();
1060                                         push_out("%s: %2.2fGB <%llu bytes>",
1061                                             knp[i].name,
1062                                             (float)knp[i].value.ui64 /
1063                                             DISK_GIGABYTE,
1064                                             knp[i].value.ui64);
1065                                         do_newline();
1066                                         col = 0;
1067                                         break;
1068                                 }
1069                                 push_out("%s: %u ", knp[i].name,
1070                                     knp[i].value.ui32);
1071                                 col += 4;
1072                                 break;
1073                         }
1074                 if ((col >= 62) || (i == 2)) {
1075                         do_newline();
1076                         col = 0;
1077                 }
1078         }
1079         if (col > 0) {
1080                 do_newline();
1081         }
1082         do_newline();
1083 }
1084 
1085 void
1086 do_args(int argc, char **argv)
1087 {
1088         int             c;
1089         int             errflg = 0;
1090         extern char     *optarg;
1091         extern int      optind;
1092 
1093         while ((c = getopt(argc, argv, "tdDxXYCciIpPfFnmMeEszrT:l:")) != EOF)
1094                 switch (c) {
1095                 case 't':
1096                         do_tty++;
1097                         break;
1098                 case 'd':
1099                         do_disk |= DISK_OLD;
1100                         break;
1101                 case 'D':
1102                         do_disk |= DISK_NEW;
1103                         break;
1104                 case 'x':
1105                         do_disk |= DISK_EXTENDED;
1106                         break;
1107                 case 'X':
1108                         if (do_disk & DISK_IOPATH_LTI)
1109                                 errflg++;       /* -Y already used */
1110                         else
1111                                 do_disk |= DISK_IOPATH_LI;
1112                         break;
1113                 case 'Y':
1114                         if (do_disk & DISK_IOPATH_LI)
1115                                 errflg++;       /* -X already used */
1116                         else
1117                                 do_disk |= DISK_IOPATH_LTI;
1118                         break;
1119                 case 'C':
1120                         do_controller++;
1121                         break;
1122                 case 'c':
1123                         do_cpu++;
1124                         break;
1125                 case 'I':
1126                         do_interval++;
1127                         break;
1128                 case 'p':
1129                         do_partitions++;
1130                         break;
1131                 case 'P':
1132                         do_partitions_only++;
1133                         break;
1134                 case 'f':
1135                         do_zfs++;
1136                         break;
1137                 case 'F':
1138                         do_zfs_only++;
1139                         break;
1140                 case 'n':
1141                         do_conversions++;
1142                         break;
1143                 case 'M':
1144                         do_megabytes++;
1145                         break;
1146                 case 'e':
1147                         do_disk |= DISK_ERRORS;
1148                         break;
1149                 case 'E':
1150                         do_disk |= DISK_EXTENDED_ERRORS;
1151                         break;
1152                 case 'i':
1153                         do_devid = 1;
1154                         break;
1155                 case 's':
1156                         suppress_state = 1;
1157                         break;
1158                 case 'z':
1159                         suppress_zero = 1;
1160                         break;
1161                 case 'm':
1162                         show_mountpts = 1;
1163                         break;
1164                 case 'T':
1165                         if (optarg) {
1166                                 if (*optarg == 'u')
1167                                         timestamp_fmt = UDATE;
1168                                 else if (*optarg == 'd')
1169                                         timestamp_fmt = DDATE;
1170                                 else
1171                                         errflg++;
1172                         } else {
1173                                 errflg++;
1174                         }
1175                         break;
1176                 case 'r':
1177                         do_raw = 1;
1178                         break;
1179                 case 'l':
1180                         df.if_max_iodevs = safe_strtoi(optarg, "invalid limit");
1181                         if (df.if_max_iodevs < 1)
1182                                 usage();
1183                         break;
1184                 case '?':
1185                         errflg++;
1186         }
1187 
1188         if ((do_disk & DISK_OLD) && (do_disk & DISK_NEW)) {
1189                 (void) fprintf(stderr, "-d and -D are incompatible.\n");
1190                 usage();
1191         }
1192 
1193         if (errflg) {
1194                 usage();
1195         }
1196 
1197         /* if no output classes explicity specified, use defaults */
1198         if (do_tty == 0 && do_disk == 0 && do_cpu == 0)
1199                 do_tty = do_cpu = 1, do_disk = DISK_OLD;
1200 
1201         /*
1202          * multi-path options (-X, -Y) without a specific vertical
1203          * output format (-x, -e, -E) imply extended -x format
1204          */
1205         if ((do_disk & (DISK_IOPATH_LI | DISK_IOPATH_LTI)) &&
1206             !(do_disk & PRINT_VERTICAL))
1207                 do_disk |= DISK_EXTENDED;
1208 
1209         /*
1210          * If conflicting options take the preferred
1211          * -D and -x result in -x
1212          * -d or -D and -e or -E gives only whatever -d or -D was specified
1213          */
1214         if ((do_disk & DISK_EXTENDED) && (do_disk & DISK_NORMAL))
1215                 do_disk &= ~DISK_NORMAL;
1216         if ((do_disk & DISK_NORMAL) && (do_disk & DISK_ERROR_MASK))
1217                 do_disk &= ~DISK_ERROR_MASK;
1218 
1219         /* nfs, tape, always shown */
1220         df.if_allowed_types = IODEV_NFS | IODEV_TAPE;
1221 
1222         /*
1223          * If limit == 0 then no command line limit was set, else if any of
1224          * the flags that cause unlimited disks were not set,
1225          * use the default of 4
1226          */
1227         if (df.if_max_iodevs == 0) {
1228                 df.if_max_iodevs = DEFAULT_LIMIT;
1229                 df.if_skip_floppy = 1;
1230                 if (do_disk & (DISK_EXTENDED | DISK_ERRORS |
1231                     DISK_EXTENDED_ERRORS)) {
1232                         df.if_max_iodevs = UNLIMITED_IODEVS;
1233                         df.if_skip_floppy = 0;
1234                 }
1235         }
1236         if (do_disk) {
1237                 size_t count = 0;
1238                 size_t i = optind;
1239 
1240                 while (i < argc && !isdigit(argv[i][0])) {
1241                         count++;
1242                         i++;
1243                 }
1244 
1245                 /*
1246                  * "Note:  disks  explicitly  requested
1247                  * are not subject to this disk limit"
1248                  */
1249                 if ((count > df.if_max_iodevs) ||
1250                     (count && (df.if_max_iodevs == UNLIMITED_IODEVS)))
1251                         df.if_max_iodevs = count;
1252 
1253                 df.if_names = safe_alloc(count * sizeof (char *));
1254                 (void) memset(df.if_names, 0, count * sizeof (char *));
1255 
1256                 df.if_nr_names = 0;
1257                 while (optind < argc && !isdigit(argv[optind][0]))
1258                         df.if_names[df.if_nr_names++] = argv[optind++];
1259         }
1260         if (optind < argc) {
1261                 interval = safe_strtoi(argv[optind], "invalid interval");
1262                 if (interval < 1)
1263                         fail(0, "invalid interval");
1264                 optind++;
1265 
1266                 if (optind < argc) {
1267                         iter = safe_strtoi(argv[optind], "invalid count");
1268                         if (iter < 1)
1269                                 fail(0, "invalid count");
1270                         optind++;
1271                 }
1272         }
1273         if (interval == 0)
1274                 iter = 1;
1275         if (optind < argc)
1276                 usage();
1277 }
1278 
1279 /*
1280  * Driver for doing the extended header formatting. Will produce
1281  * the function stack needed to output an extended header based
1282  * on the options selected.
1283  */
1284 
1285 void
1286 do_format(void)
1287 {
1288         char    header[SMALL_SCRATCH_BUFLEN] = {0};
1289         char    ch;
1290         char    iosz;
1291         const char    *fstr;
1292 
1293         disk_header[0] = 0;
1294         ch = (do_interval ? 'i' : 's');
1295         iosz = (do_megabytes ? 'M' : 'k');
1296         if (do_disk & DISK_ERRORS) {
1297                 if (do_raw == 0) {
1298                         (void) sprintf(header, "s/w h/w trn tot ");
1299                 } else
1300                         (void) sprintf(header, "s/w,h/w,trn,tot");
1301         }
1302         switch (do_disk & DISK_IO_MASK) {
1303                 case DISK_OLD:
1304                         if (do_raw == 0)
1305                                 fstr = "%cp%c tp%c serv  ";
1306                         else
1307                                 fstr = "%cp%c,tp%c,serv";
1308                         (void) snprintf(disk_header, sizeof (disk_header),
1309                             fstr, iosz, ch, ch);
1310                         break;
1311                 case DISK_NEW:
1312                         if (do_raw == 0)
1313                                 fstr = "rp%c wp%c util  ";
1314                         else
1315                                 fstr = "%rp%c,wp%c,util";
1316                         (void) snprintf(disk_header, sizeof (disk_header),
1317                             fstr, ch, ch);
1318                         break;
1319                 case DISK_EXTENDED:
1320                         /* This is -x option */
1321                         if (!do_conversions) {
1322                                 /* without -n option */
1323                                 if (do_raw == 0) {
1324                                         /* without -r option */
1325                                         (void) snprintf(disk_header,
1326                                             sizeof (disk_header),
1327                                             "%-*.*s    r/%c    w/%c   "
1328                                             "%cr/%c   %cw/%c wait actv  "
1329                                             "svc_t  %%%%w  %%%%b %s",
1330                                             iodevs_nl, iodevs_nl, "device",
1331                                             ch, ch, iosz, ch, iosz, ch, header);
1332                                 } else {
1333                                         /* with -r option */
1334                                         (void) snprintf(disk_header,
1335                                             sizeof (disk_header),
1336                                             "device,r/%c,w/%c,%cr/%c,%cw/%c,"
1337                                             "wait,actv,svc_t,%%%%w,"
1338                                             "%%%%b%s%s",
1339                                             ch, ch, iosz, ch, iosz, ch,
1340                                             *header == '\0' ? "" : ",",
1341                                             header);
1342                                         /*
1343                                          * if no -e flag, header == '\0...'
1344                                          * Ternary operator above is to prevent
1345                                          * trailing comma in full disk_header
1346                                          */
1347                                 }
1348                         } else {
1349                                 /* with -n option */
1350                                 if (do_raw == 0) {
1351                                         fstr = "    r/%c    w/%c   %cr/%c   "
1352                                             "%cw/%c wait actv wsvc_t asvc_t  "
1353                                             "%%%%w  %%%%b %sdevice";
1354                                 } else {
1355                                         fstr = "r/%c,w/%c,%cr/%c,%cw/%c,"
1356                                             "wait,actv,wsvc_t,asvc_t,"
1357                                             "%%%%w,%%%%b,%sdevice";
1358                                         /*
1359                                          * if -rnxe, "tot" (from -e) and
1360                                          * "device" are run together
1361                                          * due to lack of trailing comma
1362                                          * in 'header'. However, adding
1363                                          * trailing comma to header at
1364                                          * its definition leads to prob-
1365                                          * lems elsewhere so it's added
1366                                          * here in this edge case -rnxe
1367                                          */
1368                                         if (*header != '\0')
1369                                                 (void) strcat(header, ",");
1370                                 }
1371                                 (void) snprintf(disk_header,
1372                                     sizeof (disk_header),
1373                                     fstr, ch, ch, iosz, ch, iosz,
1374                                     ch, header);
1375                         }
1376                         break;
1377                 default:
1378                         break;
1379         }
1380 
1381         /* do DISK_ERRORS header (already added above for DISK_EXTENDED) */
1382         if ((do_disk & DISK_ERRORS) &&
1383             ((do_disk & DISK_IO_MASK) != DISK_EXTENDED)) {
1384                 if (!do_conversions) {
1385                         if (do_raw == 0)
1386                                 (void) snprintf(disk_header,
1387                                     sizeof (disk_header), "%-*.*s  %s",
1388                                     iodevs_nl, iodevs_nl, "device", header);
1389                         else
1390                                 (void) snprintf(disk_header,
1391                                     sizeof (disk_header), "device,%s", header);
1392                 } else {
1393                         if (do_raw == 0) {
1394                                 (void) snprintf(disk_header,
1395                                     sizeof (disk_header),
1396                                     "  %sdevice", header);
1397                         } else {
1398                                 (void) snprintf(disk_header,
1399                                     sizeof (disk_header),
1400                                     "%s,device", header);
1401                         }
1402                 }
1403         } else {
1404                 /*
1405                  * Need to subtract two characters for the % escape in
1406                  * the string.
1407                  */
1408                 dh_len = strlen(disk_header) - 2;
1409         }
1410 
1411         /*
1412          * -n *and* (-E *or* -e *or* -x)
1413          */
1414         if (do_conversions && (do_disk & PRINT_VERTICAL)) {
1415                 if (do_tty)
1416                         setup(print_tty_hdr1);
1417                 if (do_cpu)
1418                         setup(print_cpu_hdr1);
1419                 if (do_tty || do_cpu)
1420                         setup(do_newline);
1421                 if (do_tty)
1422                         setup(print_tty_hdr2);
1423                 if (do_cpu)
1424                         setup(print_cpu_hdr2);
1425                 if (do_tty || do_cpu)
1426                         setup(do_newline);
1427                 if (do_tty)
1428                         setup(print_tty_data);
1429                 if (do_cpu)
1430                         setup(print_cpu_data);
1431                 if (do_tty || do_cpu)
1432                         setup(do_newline);
1433                 printxhdr();
1434 
1435                 setup(show_all_disks);
1436         } else {
1437                 /*
1438                  * These unholy gymnastics are necessary to place CPU/tty
1439                  * data to the right of the disks/errors for the first
1440                  * line in vertical mode.
1441                  */
1442                 if (do_disk & PRINT_VERTICAL) {
1443                         printxhdr();
1444 
1445                         setup(show_first_disk);
1446                         if (do_tty)
1447                                 setup(print_tty_data);
1448                         if (do_cpu)
1449                                 setup(print_cpu_data);
1450                         setup(do_newline);
1451 
1452                         setup(show_other_disks);
1453                 } else {
1454                         setup(hdrout);
1455                         if (do_tty)
1456                                 setup(print_tty_data);
1457                         setup(show_all_disks);
1458                         if (do_cpu)
1459                                 setup(print_cpu_data);
1460                 }
1461 
1462                 setup(do_newline);
1463         }
1464         if (do_disk & DISK_EXTENDED_ERRORS)
1465                 setup(disk_errors);
1466 }
1467 
1468 /*
1469  * Add a new function to the list of functions
1470  * for this invocation. Once on the stack the
1471  * function is never removed nor does its place
1472  * change.
1473  */
1474 void
1475 setup(void (*nfunc)(void))
1476 {
1477         format_t *tmp;
1478 
1479         tmp = safe_alloc(sizeof (format_t));
1480         tmp->nfunc = nfunc;
1481         tmp->next = 0;
1482         if (formatter_end)
1483                 formatter_end->next = tmp;
1484         else
1485                 formatter_list = tmp;
1486         formatter_end = tmp;
1487 
1488 }
1489 
1490 /*
1491  * The functions after this comment are devoted to printing
1492  * various parts of the header. They are selected based on the
1493  * options provided when the program was invoked. The functions
1494  * are either directly invoked in printhdr() or are indirectly
1495  * invoked by being placed on the list of functions used when
1496  * extended headers are used.
1497  */
1498 void
1499 print_tty_hdr1(void)
1500 {
1501         char *fstr;
1502         char *dstr;
1503 
1504         if (do_raw == 0) {
1505                 fstr = "%10.10s";
1506                 dstr = "tty    ";
1507         } else {
1508                 fstr = "%s";
1509                 dstr = "tty";
1510         }
1511         push_out(fstr, dstr);
1512 }
1513 
1514 void
1515 print_tty_hdr2(void)
1516 {
1517         if (do_raw == 0)
1518                 push_out("%-10.10s", " tin tout");
1519         else
1520                 push_out("tin,tout");
1521 }
1522 
1523 void
1524 print_cpu_hdr1(void)
1525 {
1526         char *dstr;
1527 
1528         if (do_raw == 0)
1529                 dstr = "     cpu";
1530         else
1531                 dstr = "cpu";
1532         push_out(dstr);
1533 }
1534 
1535 void
1536 print_cpu_hdr2(void)
1537 {
1538         char *dstr;
1539 
1540         if (do_raw == 0)
1541                 dstr = " us sy dt id";
1542         else
1543                 dstr = "us,sy,dt,id";
1544         push_out(dstr);
1545 }
1546 
1547 /*
1548  * Assumption is that tty data is always first - no need for raw mode leading
1549  * comma.
1550  */
1551 void
1552 print_tty_data(void)
1553 {
1554         char *fstr;
1555         uint64_t deltas;
1556         double raw;
1557         double outch;
1558         kstat_t *oldks = NULL;
1559 
1560         if (oldss)
1561                 oldks = &oldss->s_sys.ss_agg_sys;
1562 
1563         if (do_raw == 0)
1564                 fstr = " %3.0f %4.0f ";
1565         else
1566                 fstr = "%.0f,%.0f";
1567         deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "rawch");
1568         raw = deltas;
1569         raw /= getime;
1570         deltas = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "outch");
1571         outch = deltas;
1572         outch /= getime;
1573         push_out(fstr, raw, outch);
1574 }
1575 
1576 /*
1577  * Write out CPU data
1578  */
1579 void
1580 print_cpu_data(void)
1581 {
1582         char *fstr;
1583         uint64_t idle;
1584         uint64_t user;
1585         uint64_t kern;
1586         uint64_t dtrace;
1587         uint64_t nsec_elapsed;
1588         kstat_t *oldks = NULL;
1589 
1590         if (oldss)
1591                 oldks = &oldss->s_sys.ss_agg_sys;
1592 
1593         if (do_raw == 0)
1594                 fstr = " %2.0f %2.0f %2.0f %2.0f";
1595         else
1596                 fstr = "%.0f,%.0f,%.0f,%.0f";
1597 
1598         idle = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_idle");
1599         user = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_user");
1600         kern = kstat_delta(oldks, &newss->s_sys.ss_agg_sys, "cpu_ticks_kernel");
1601         dtrace = kstat_delta(oldks, &newss->s_sys.ss_agg_sys,
1602             "cpu_nsec_dtrace");
1603         nsec_elapsed = newss->s_sys.ss_agg_sys.ks_snaptime -
1604             (oldks == NULL ? 0 : oldks->ks_snaptime);
1605         push_out(fstr, user * percent, kern * percent,
1606             dtrace * 100.0 / nsec_elapsed / newss->s_nr_active_cpus,
1607             idle * percent);
1608 }
1609 
1610 /*
1611  * Emit the appropriate header.
1612  */
1613 void
1614 hdrout(void)
1615 {
1616         if (do_raw == 0) {
1617                 if (--tohdr == 0)
1618                         printhdr(0);
1619         } else if (hdr_out == 0) {
1620                 printhdr(0);
1621                 hdr_out = 1;
1622         }
1623 }
1624 
1625 /*
1626  * Write out disk errors when -E is specified.
1627  */
1628 void
1629 disk_errors(void)
1630 {
1631         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk_errors, NULL);
1632 }
1633 
1634 void
1635 show_first_disk(void)
1636 {
1637         int count = 0;
1638 
1639         show_disk_mode = SHOW_FIRST_ONLY;
1640 
1641         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1642 }
1643 
1644 void
1645 show_other_disks(void)
1646 {
1647         int count = 0;
1648 
1649         show_disk_mode = SHOW_SECOND_ONWARDS;
1650 
1651         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1652 }
1653 
1654 void
1655 show_all_disks(void)
1656 {
1657         int count = 0;
1658 
1659         show_disk_mode = SHOW_ALL;
1660 
1661         (void) snapshot_walk(SNAP_IODEVS, oldss, newss, show_disk, &count);
1662 }
1663 
1664 /*
1665  * Write a newline out and clear the lineout flag.
1666  */
1667 static void
1668 do_newline(void)
1669 {
1670         if (lineout) {
1671                 (void) putchar('\n');
1672                 lineout = 0;
1673         }
1674 }
1675 
1676 /*
1677  * Generalized printf function that determines what extra
1678  * to print out if we're in raw mode. At this time we
1679  * don't care about errors.
1680  */
1681 static void
1682 push_out(const char *message, ...)
1683 {
1684         va_list args;
1685 
1686         va_start(args, message);
1687         if (do_raw && lineout == 1)
1688                 (void) putchar(',');
1689         (void) vprintf(message, args);
1690         va_end(args);
1691         lineout = 1;
1692 }
1693 
1694 /*
1695  * Emit the header string when -e is specified.
1696  */
1697 static void
1698 print_err_hdr(void)
1699 {
1700         char obuf[SMALL_SCRATCH_BUFLEN];
1701 
1702         if (do_raw) {
1703                 push_out("errors");
1704                 return;
1705         }
1706 
1707         if (do_conversions == 0) {
1708                 if (!(do_disk & DISK_EXTENDED)) {
1709                         (void) snprintf(obuf, sizeof (obuf),
1710                             "%11s", one_blank);
1711                         push_out(obuf);
1712                 }
1713         } else if (do_disk == DISK_ERRORS)
1714                 push_out(two_blanks);
1715         else
1716                 push_out(one_blank);
1717         push_out("---- errors --- ");
1718 }
1719 
1720 /*
1721  * Emit the header string when -e is specified.
1722  */
1723 static void
1724 print_disk_header(void)
1725 {
1726         push_out(disk_header);
1727 }
1728 
1729 /*
1730  * No, UINTMAX_MAX isn't the right thing here since
1731  * it is #defined to be either INT32_MAX or INT64_MAX
1732  * depending on the whether _LP64 is defined.
1733  *
1734  * We want to handle the odd future case of having
1735  * ulonglong_t be more than 64 bits but we have
1736  * no nice #define MAX value we can drop in place
1737  * without having to change this code in the future.
1738  */
1739 
1740 u_longlong_t
1741 ull_delta(u_longlong_t old, u_longlong_t new)
1742 {
1743         if (new >= old)
1744                 return (new - old);
1745         else
1746                 return ((UINT64_MAX - old) + new + 1);
1747 }
1748 
1749 /*
1750  * Take the difference of an unsigned 32
1751  * bit int attempting to cater for
1752  * overflow.
1753  */
1754 uint_t
1755 u32_delta(uint_t old, uint_t new)
1756 {
1757         if (new >= old)
1758                 return (new - old);
1759         else
1760                 return ((UINT32_MAX - old) + new + 1);
1761 }
1762 
1763 /*
1764  * This is exactly what is needed for standard iostat output,
1765  * but make sure to use it only for that
1766  */
1767 #define EPSILON (0.1)
1768 static int
1769 fzero(double value)
1770 {
1771         return (value >= 0.0 && value < EPSILON);
1772 }
1773 
1774 static int
1775 safe_strtoi(char const *val, char *errmsg)
1776 {
1777         char *end;
1778         long tmp;
1779 
1780         errno = 0;
1781         tmp = strtol(val, &end, 10);
1782         if (*end != '\0' || errno)
1783                 fail(0, "%s %s", errmsg, val);
1784         return ((int)tmp);
1785 }