Print this page
    
NEX-13937 Improve kstat performance
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-4425 support KSTAT_DATA_STRING in non-virtual named kstats
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
SMB-50 User-mode SMB server
 Includes work by these authors:
 Thomas Keiser <thomas.keiser@nexenta.com>
 Albert Lee <trisk@nexenta.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/sys/kstat.h
          +++ new/usr/src/uts/common/sys/kstat.h
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   *
  25   25   * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  26   26   */
  27   27  
  28   28  #ifndef _SYS_KSTAT_H
  29   29  #define _SYS_KSTAT_H
  30   30  
  31   31  /*
  32   32   * Definition of general kernel statistics structures and /dev/kstat ioctls
  33   33   */
  34   34  
  35   35  #include <sys/types.h>
  36   36  #include <sys/time.h>
  37   37  
  38   38  #ifdef  __cplusplus
  39   39  extern "C" {
  40   40  #endif
  41   41  
  42   42  typedef int     kid_t;          /* unique kstat id */
  43   43  
  44   44  /*
  45   45   * Kernel statistics driver (/dev/kstat) ioctls
  46   46   */
  47   47  
  48   48  #define KSTAT_IOC_BASE          ('K' << 8)
  49   49  
  50   50  #define KSTAT_IOC_CHAIN_ID      KSTAT_IOC_BASE | 0x01
  51   51  #define KSTAT_IOC_READ          KSTAT_IOC_BASE | 0x02
  52   52  #define KSTAT_IOC_WRITE         KSTAT_IOC_BASE | 0x03
  53   53  
  54   54  /*
  55   55   * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
  56   56   *
  57   57   *      kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
  58   58   *      kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
  59   59   *      kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
  60   60   */
  61   61  
  62   62  #define KSTAT_STRLEN    31      /* 30 chars + NULL; must be 16 * n - 1 */
  63   63  
  64   64  /*
  65   65   * The generic kstat header
  66   66   */
  67   67  
  68   68  typedef struct kstat {
  69   69          /*
  70   70           * Fields relevant to both kernel and user
  71   71           */
  72   72          hrtime_t        ks_crtime;      /* creation time (from gethrtime()) */
  73   73          struct kstat    *ks_next;       /* kstat chain linkage */
  74   74          kid_t           ks_kid;         /* unique kstat ID */
  75   75          char            ks_module[KSTAT_STRLEN]; /* provider module name */
  76   76          uchar_t         ks_resv;        /* reserved, currently just padding */
  77   77          int             ks_instance;    /* provider module's instance */
  78   78          char            ks_name[KSTAT_STRLEN]; /* kstat name */
  79   79          uchar_t         ks_type;        /* kstat data type */
  80   80          char            ks_class[KSTAT_STRLEN]; /* kstat class */
  81   81          uchar_t         ks_flags;       /* kstat flags */
  82   82          void            *ks_data;       /* kstat type-specific data */
  83   83          uint_t          ks_ndata;       /* # of type-specific data records */
  84   84          size_t          ks_data_size;   /* total size of kstat data section */
  85   85          hrtime_t        ks_snaptime;    /* time of last data shapshot */
  86   86          /*
  87   87           * Fields relevant to kernel only
  88   88           */
  89   89          int             (*ks_update)(struct kstat *, int); /* dynamic update */
  90   90          void            *ks_private;    /* arbitrary provider-private data */
  91   91          int             (*ks_snapshot)(struct kstat *, void *, int);
  92   92          void            *ks_lock;       /* protects this kstat's data */
  93   93  } kstat_t;
  94   94  
  95   95  #ifdef _SYSCALL32
  96   96  
  97   97  typedef int32_t kid32_t;
  98   98  
  99   99  typedef struct kstat32 {
 100  100          /*
 101  101           * Fields relevant to both kernel and user
 102  102           */
 103  103          hrtime_t        ks_crtime;
 104  104          caddr32_t       ks_next;                /* struct kstat pointer */
 105  105          kid32_t         ks_kid;
 106  106          char            ks_module[KSTAT_STRLEN];
 107  107          uint8_t         ks_resv;
 108  108          int32_t         ks_instance;
 109  109          char            ks_name[KSTAT_STRLEN];
 110  110          uint8_t         ks_type;
 111  111          char            ks_class[KSTAT_STRLEN];
 112  112          uint8_t         ks_flags;
 113  113          caddr32_t       ks_data;                /* type-specific data */
 114  114          uint32_t        ks_ndata;
 115  115          size32_t        ks_data_size;
 116  116          hrtime_t        ks_snaptime;
 117  117          /*
 118  118           * Fields relevant to kernel only (only needed here for padding)
 119  119           */
 120  120          int32_t         _ks_update;
 121  121          caddr32_t       _ks_private;
 122  122          int32_t         _ks_snapshot;
 123  123          caddr32_t       _ks_lock;
 124  124  } kstat32_t;
 125  125  
 126  126  #endif  /* _SYSCALL32 */
 127  127  
 128  128  /*
 129  129   * kstat structure and locking strategy
 130  130   *
 131  131   * Each kstat consists of a header section (a kstat_t) and a data section.
 132  132   * The system maintains a set of kstats, protected by kstat_chain_lock.
 133  133   * kstat_chain_lock protects all additions to/deletions from this set,
 134  134   * as well as all changes to kstat headers.  kstat data sections are
 135  135   * *optionally* protected by the per-kstat ks_lock.  If ks_lock is non-NULL,
 136  136   * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
 137  137   * operations on that kstat.  It is up to the kstat provider to decide whether
 138  138   * guaranteeing consistent data to kstat clients is sufficiently important
 139  139   * to justify the locking cost.  Note, however, that most statistic updates
 140  140   * already occur under one of the provider's mutexes, so if the provider sets
 141  141   * ks_lock to point to that mutex, then kstat data locking is free.
 142  142   *
 143  143   * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
 144  144   * data-size races with kstat clients.
 145  145   *
 146  146   * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
 147  147   * in the kstat header so that users don't have to be exposed to all of the
 148  148   * kernel's lock-related data structures.
 149  149   */
 150  150  
 151  151  #if     defined(_KERNEL)
 152  152  
 153  153  #define KSTAT_ENTER(k)  \
 154  154          { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
 155  155  
 156  156  #define KSTAT_EXIT(k)   \
 157  157          { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
 158  158  
 159  159  #define KSTAT_UPDATE(k, rw)             (*(k)->ks_update)((k), (rw))
 160  160  
 161  161  #define KSTAT_SNAPSHOT(k, buf, rw)      (*(k)->ks_snapshot)((k), (buf), (rw))
 162  162  
 163  163  #endif  /* defined(_KERNEL) */
 164  164  
 165  165  /*
 166  166   * kstat time
 167  167   *
 168  168   * All times associated with kstats (e.g. creation time, snapshot time,
 169  169   * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
 170  170   * as returned by gethrtime().  The accuracy of these timestamps is machine
 171  171   * dependent, but the precision (units) is the same across all platforms.
 172  172   */
 173  173  
 174  174  /*
 175  175   * kstat identity (KID)
 176  176   *
 177  177   * Each kstat is assigned a unique KID (kstat ID) when it is added to the
 178  178   * global kstat chain.  The KID is used as a cookie by /dev/kstat to
 179  179   * request information about the corresponding kstat.  There is also
 180  180   * an identity associated with the entire kstat chain, kstat_chain_id,
 181  181   * which is bumped each time a kstat is added or deleted.  /dev/kstat uses
 182  182   * the chain ID to detect changes in the kstat chain (e.g., a new disk
 183  183   * coming online) between ioctl()s.
 184  184   */
 185  185  
 186  186  /*
 187  187   * kstat module, kstat instance
 188  188   *
 189  189   * ks_module and ks_instance contain the name and instance of the module
 190  190   * that created the kstat.  In cases where there can only be one instance,
 191  191   * ks_instance is 0.  The kernel proper (/kernel/unix) uses "unix" as its
 192  192   * module name.
 193  193   */
 194  194  
 195  195  /*
 196  196   * kstat name
 197  197   *
 198  198   * ks_name gives a meaningful name to a kstat.  The full kstat namespace
 199  199   * is module.instance.name, so the name only need be unique within a
 200  200   * module.  kstat_create() will fail if you try to create a kstat with
 201  201   * an already-used (ks_module, ks_instance, ks_name) triplet.  Spaces are
 202  202   * allowed in kstat names, but strongly discouraged, since they hinder
 203  203   * awk-style processing at user level.
 204  204   */
 205  205  
 206  206  /*
 207  207   * kstat type
 208  208   *
 209  209   * The kstat mechanism provides several flavors of kstat data, defined
 210  210   * below.  The "raw" kstat type is just treated as an array of bytes; you
 211  211   * can use this to export any kind of data you want.
 212  212   *
 213  213   * Some kstat types allow multiple data structures per kstat, e.g.
 214  214   * KSTAT_TYPE_NAMED; others do not.  This is part of the spec for each
 215  215   * kstat data type.
 216  216   *
 217  217   * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES.  To
 218  218   * get this information, read out the standard system kstat "kstat_types".
 219  219   */
 220  220  
 221  221  #define KSTAT_TYPE_RAW          0       /* can be anything */
 222  222                                          /* ks_ndata >= 1 */
 223  223  #define KSTAT_TYPE_NAMED        1       /* name/value pair */
 224  224                                          /* ks_ndata >= 1 */
 225  225  #define KSTAT_TYPE_INTR         2       /* interrupt statistics */
 226  226                                          /* ks_ndata == 1 */
 227  227  #define KSTAT_TYPE_IO           3       /* I/O statistics */
 228  228                                          /* ks_ndata == 1 */
 229  229  #define KSTAT_TYPE_TIMER        4       /* event timer */
 230  230                                          /* ks_ndata >= 1 */
 231  231  
 232  232  #define KSTAT_NUM_TYPES         5
 233  233  
 234  234  /*
 235  235   * kstat class
 236  236   *
 237  237   * Each kstat can be characterized as belonging to some broad class
 238  238   * of statistics, e.g. disk, tape, net, vm, streams, etc.  This field
 239  239   * can be used as a filter to extract related kstats.  The following
 240  240   * values are currently in use: disk, tape, net, controller, vm, kvm,
 241  241   * hat, streams, kstat, and misc.  (The kstat class encompasses things
 242  242   * like kstat_types.)
 243  243   */
 244  244  
 245  245  /*
 246  246   * kstat flags
 247  247   *
 248  248   * Any of the following flags may be passed to kstat_create().  They are
 249  249   * all zero by default.
 250  250   *
 251  251   *      KSTAT_FLAG_VIRTUAL:
 252  252   *
 253  253   *              Tells kstat_create() not to allocate memory for the
 254  254   *              kstat data section; instead, you will set the ks_data
 255  255   *              field to point to the data you wish to export.  This
 256  256   *              provides a convenient way to export existing data
 257  257   *              structures.
 258  258   *
 259  259   *      KSTAT_FLAG_VAR_SIZE:
 260  260   *
 261  261   *              The size of the kstat you are creating will vary over time.
 262  262   *              For example, you may want to use the kstat mechanism to
 263  263   *              export a linked list.  NOTE: The kstat framework does not
 264  264   *              manage the data section, so all variable-size kstats must be
 265  265   *              virtual kstats.  Moreover, variable-size kstats MUST employ
 266  266   *              kstat data locking to prevent data-size races with kstat
 267  267   *              clients.  See the section on "kstat snapshot" for details.
 268  268   *
 269  269   *      KSTAT_FLAG_WRITABLE:
 270  270   *
 271  271   *              Makes the kstat's data section writable by root.
 272  272   *              The ks_snapshot routine (see below) does not need to check for
 273  273   *              this; permission checking is handled in the kstat driver.
 274  274   *
 275  275   *      KSTAT_FLAG_PERSISTENT:
 276  276   *
 277  277   *              Indicates that this kstat is to be persistent over time.
 278  278   *              For persistent kstats, kstat_delete() simply marks the
 279  279   *              kstat as dormant; a subsequent kstat_create() reactivates
 280  280   *              the kstat.  This feature is provided so that statistics
 281  281   *              are not lost across driver close/open (e.g., raw disk I/O
 282  282   *              on a disk with no mounted partitions.)
 283  283   *              NOTE: Persistent kstats cannot be virtual, since ks_data
 284  284   *              points to garbage as soon as the driver goes away.
 285  285   *
 286  286   * The following flags are maintained by the kstat framework:
 287  287   *
 288  288   *      KSTAT_FLAG_DORMANT:
 289  289   *
 290  290   *              For persistent kstats, indicates that the kstat is in the
 291  291   *              dormant state (e.g., the corresponding device is closed).
 292  292   *
 293  293   *      KSTAT_FLAG_INVALID:
 294  294   *
 295  295   *              This flag is set when a kstat is in a transitional state,
 296  296   *              e.g. between kstat_create() and kstat_install().
 297  297   *              kstat clients must not attempt to access the kstat's data
 298  298   *              if this flag is set.
 299  299   *
 300  300   *      KSTAT_FLAG_LONGSTRINGS:
 301  301   *
 302  302   *              Indicates that this kstat contains long strings (which
 303  303   *              are stored outside of the kstat data section). When copied
 304  304   *              out to user space the string data will be held in the data
 305  305   *              section provided by the user.
 306  306   */
 307  307  
 308  308  #define KSTAT_FLAG_VIRTUAL              0x01
 309  309  #define KSTAT_FLAG_VAR_SIZE             0x02
 310  310  #define KSTAT_FLAG_WRITABLE             0x04
 311  311  #define KSTAT_FLAG_PERSISTENT           0x08
 312  312  #define KSTAT_FLAG_DORMANT              0x10
 313  313  #define KSTAT_FLAG_INVALID              0x20
 314  314  #define KSTAT_FLAG_LONGSTRINGS          0x40
 315  315  
 316  316  /*
 317  317   * Dynamic update support
 318  318   *
 319  319   * The kstat mechanism allows for an optional ks_update function to update
 320  320   * kstat data.  This is useful for drivers where the underlying device
 321  321   * keeps cheap hardware stats, but extraction is expensive.  Instead of
 322  322   * constantly keeping the kstat data section up to date, you can supply a
 323  323   * ks_update function which updates the kstat's data section on demand.
 324  324   * To take advantage of this feature, simply set the ks_update field before
 325  325   * calling kstat_install().
 326  326   *
 327  327   * The ks_update function, if supplied, must have the following structure:
 328  328   *
 329  329   *      int
 330  330   *      foo_kstat_update(kstat_t *ksp, int rw)
 331  331   *      {
 332  332   *              if (rw == KSTAT_WRITE) {
 333  333   *                      ... update the native stats from ksp->ks_data;
 334  334   *                              return EACCES if you don't support this
 335  335   *              } else {
 336  336   *                      ... update ksp->ks_data from the native stats
 337  337   *              }
 338  338   *      }
 339  339   *
 340  340   * The ks_update return codes are: 0 for success, EACCES if you don't allow
 341  341   * KSTAT_WRITE, and EIO for any other type of error.
 342  342   *
 343  343   * In general, the ks_update function may need to refer to provider-private
 344  344   * data; for example, it may need a pointer to the provider's raw statistics.
 345  345   * The ks_private field is available for this purpose.  Its use is entirely
 346  346   * at the provider's discretion.
 347  347   *
 348  348   * All variable-size kstats MUST supply a ks_update routine, which computes
 349  349   * and sets ks_data_size (and ks_ndata if that is meaningful), since these
 350  350   * are needed to perform kstat snapshots (see below).
 351  351   *
 352  352   * No kstat locking should be done inside the ks_update routine.  The caller
 353  353   * will already be holding the kstat's ks_lock (to ensure consistent data).
 354  354   */
 355  355  
 356  356  #define KSTAT_READ      0
 357  357  #define KSTAT_WRITE     1
 358  358  
 359  359  /*
 360  360   * Kstat snapshot
 361  361   *
 362  362   * In order to get a consistent view of a kstat's data, clients must obey
 363  363   * the kstat's locking strategy.  However, these clients may need to perform
 364  364   * operations on the data which could cause a fault (e.g. copyout()), or
 365  365   * operations which are simply expensive.  Doing so could cause deadlock
 366  366   * (e.g. if you're holding a disk's kstat lock which is ultimately required
 367  367   * to resolve a copyout() fault), performance degradation (since the providers'
 368  368   * activity is serialized at the kstat lock), device timing problems, etc.
 369  369   *
 370  370   * To avoid these problems, kstat data is provided via snapshots.  Taking
 371  371   * a snapshot is a simple process: allocate a wired-down kernel buffer,
 372  372   * acquire the kstat's data lock, copy the data into the buffer ("take the
 373  373   * snapshot"), and release the lock.  This ensures that the kstat's data lock
 374  374   * will be held as briefly as possible, and that no faults will occur while
 375  375   * the lock is held.
 376  376   *
 377  377   * Normally, the snapshot is taken by default_kstat_snapshot(), which
 378  378   * timestamps the data (sets ks_snaptime), copies it, and does a little
 379  379   * massaging to deal with incomplete transactions on i/o kstats.  However,
 380  380   * this routine only works for kstats with contiguous data (the typical case).
 381  381   * If you create a kstat whose data is, say, a linked list, you must provide
 382  382   * your own ks_snapshot routine.  The routine you supply must have the
 383  383   * following prototype (replace "foo" with something appropriate):
 384  384   *
 385  385   *      int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
 386  386   *
 387  387   * The minimal snapshot routine -- one which copies contiguous data that
 388  388   * doesn't need any massaging -- would be this:
 389  389   *
 390  390   *      ksp->ks_snaptime = gethrtime();
 391  391   *      if (rw == KSTAT_WRITE)
 392  392   *              bcopy(buf, ksp->ks_data, ksp->ks_data_size);
 393  393   *      else
 394  394   *              bcopy(ksp->ks_data, buf, ksp->ks_data_size);
 395  395   *      return (0);
 396  396   *
 397  397   * A more illuminating example is taking a snapshot of a linked list:
 398  398   *
 399  399   *      ksp->ks_snaptime = gethrtime();
 400  400   *      if (rw == KSTAT_WRITE)
 401  401   *              return (EACCES);                ... See below ...
 402  402   *      for (foo = first_foo; foo; foo = foo->next) {
 403  403   *              bcopy((char *) foo, (char *) buf, sizeof (struct foo));
 404  404   *              buf = ((struct foo *) buf) + 1;
 405  405   *      }
 406  406   *      return (0);
 407  407   *
 408  408   * In the example above, we have decided that we don't want to allow
 409  409   * KSTAT_WRITE access, so we return EACCES if this is attempted.
 410  410   *
 411  411   * The key points are:
 412  412   *
 413  413   *      (1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
 414  414   *      (2) Data gets copied from the kstat to the buffer on KSTAT_READ,
 415  415   *              and from the buffer to the kstat on KSTAT_WRITE.
 416  416   *      (3) ks_snapshot return values are: 0 for success, EACCES if you
 417  417   *              don't allow KSTAT_WRITE, and EIO for any other type of error.
 418  418   *
 419  419   * Named kstats (see section on "Named statistics" below) containing long
 420  420   * strings (KSTAT_DATA_STRING) need special handling.  The kstat driver
 421  421   * assumes that all strings are copied into the buffer after the array of
 422  422   * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
 423  423   * into the copy within the buffer. The default snapshot routine does this,
 424  424   * but overriding routines should contain at least the following:
 425  425   *
 426  426   * if (rw == KSTAT_READ) {
 427  427   *      kstat_named_t *knp = buf;
 428  428   *      char *end = knp + ksp->ks_ndata;
 429  429   *      uint_t i;
 430  430   *
 431  431   *      ... Do the regular copy ...
 432  432   *      bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata);
 433  433   *
 434  434   *      for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 435  435   *              if (knp[i].data_type == KSTAT_DATA_STRING &&
 436  436   *                  KSTAT_NAMED_STR_PTR(knp) != NULL) {
 437  437   *                      bcopy(KSTAT_NAMED_STR_PTR(knp), end,
 438  438   *                          KSTAT_NAMED_STR_BUFLEN(knp));
 439  439   *                      KSTAT_NAMED_STR_PTR(knp) = end;
 440  440   *                      end += KSTAT_NAMED_STR_BUFLEN(knp);
 441  441   *              }
 442  442   *      }
 443  443   */
 444  444  
 445  445  /*
 446  446   * Named statistics.
 447  447   *
 448  448   * List of arbitrary name=value statistics.
 449  449   */
 450  450  
 451  451  typedef struct kstat_named {
 452  452          char    name[KSTAT_STRLEN];     /* name of counter */
 453  453          uchar_t data_type;              /* data type */
 454  454          union {
 455  455                  char            c[16];  /* enough for 128-bit ints */
 456  456                  int32_t         i32;
 457  457                  uint32_t        ui32;
 458  458                  struct {
 459  459                          union {
 460  460                                  char            *ptr;   /* NULL-term string */
 461  461  #if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
 462  462                                  caddr32_t       ptr32;
 463  463  #endif
 464  464                                  char            __pad[8]; /* 64-bit padding */
 465  465                          } addr;
 466  466                          uint32_t        len;    /* # bytes for strlen + '\0' */
 467  467                  } str;
 468  468  /*
 469  469   * The int64_t and uint64_t types are not valid for a maximally conformant
 470  470   * 32-bit compilation environment (cc -Xc) using compilers prior to the
 471  471   * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
 472  472   * In these cases, the visibility of i64 and ui64 is only permitted for
 473  473   * 64-bit compilation environments or 32-bit non-maximally conformant
 474  474   * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
 475  475   * C99 ANSI C compilation environment, the long long type is supported.
 476  476   * The _INT64_TYPE is defined by the implementation (see sys/int_types.h).
 477  477   */
 478  478  #if defined(_INT64_TYPE)
 479  479                  int64_t         i64;
 480  480                  uint64_t        ui64;
 481  481  #endif
 482  482                  long            l;
 483  483                  ulong_t         ul;
 484  484  
 485  485                  /* These structure members are obsolete */
 486  486  
 487  487                  longlong_t      ll;
 488  488                  u_longlong_t    ull;
 489  489                  float           f;
 490  490                  double          d;
 491  491          } value;                        /* value of counter */
 492  492  } kstat_named_t;
 493  493  
 494  494  #define KSTAT_DATA_CHAR         0
 495  495  #define KSTAT_DATA_INT32        1
 496  496  #define KSTAT_DATA_UINT32       2
 497  497  #define KSTAT_DATA_INT64        3
 498  498  #define KSTAT_DATA_UINT64       4
 499  499  
 500  500  #if !defined(_LP64)
 501  501  #define KSTAT_DATA_LONG         KSTAT_DATA_INT32
 502  502  #define KSTAT_DATA_ULONG        KSTAT_DATA_UINT32
 503  503  #else
 504  504  #if !defined(_KERNEL)
 505  505  #define KSTAT_DATA_LONG         KSTAT_DATA_INT64
 506  506  #define KSTAT_DATA_ULONG        KSTAT_DATA_UINT64
 507  507  #else
 508  508  #define KSTAT_DATA_LONG         7       /* only visible to the kernel */
 509  509  #define KSTAT_DATA_ULONG        8       /* only visible to the kernel */
 510  510  #endif  /* !_KERNEL */
 511  511  #endif  /* !_LP64 */
 512  512  
 513  513  /*
 514  514   * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
 515  515   * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
 516  516   * (kstat_named_t)).  ks_data_size in these cases is equal to the sum of the
 517  517   * amount of space required to store the strings (ie, the sum of
 518  518   * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
 519  519   * space required to store the kstat_named_t's.
 520  520   *
 521  521   * The default update routine will update ks_data_size automatically for
 522  522   * variable-length kstats containing long strings (using the default update
 523  523   * routine only makes sense if the string is the only thing that is changing
 524  524   * in size, and ks_ndata is constant).  Fixed-length kstats containing long
 525  525   * strings must explicitly change ks_data_size (after creation but before
 526  526   * initialization) to reflect the correct amount of space required for the
 527  527   * long strings and the kstat_named_t's.
 528  528   */
 529  529  #define KSTAT_DATA_STRING       9
 530  530  
 531  531  /* These types are obsolete */
 532  532  
 533  533  #define KSTAT_DATA_LONGLONG     KSTAT_DATA_INT64
 534  534  #define KSTAT_DATA_ULONGLONG    KSTAT_DATA_UINT64
 535  535  #define KSTAT_DATA_FLOAT        5
 536  536  #define KSTAT_DATA_DOUBLE       6
 537  537  
 538  538  #define KSTAT_NAMED_PTR(kptr)   ((kstat_named_t *)(kptr)->ks_data)
 539  539  
 540  540  /*
 541  541   * Retrieve the pointer of the string contained in the given named kstat.
 542  542   */
 543  543  #define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
 544  544  
 545  545  /*
 546  546   * Retrieve the length of the buffer required to store the string in the given
 547  547   * named kstat.
 548  548   */
 549  549  #define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
 550  550  
 551  551  /*
 552  552   * Interrupt statistics.
 553  553   *
 554  554   * An interrupt is a hard interrupt (sourced from the hardware device
 555  555   * itself), a soft interrupt (induced by the system via the use of
 556  556   * some system interrupt source), a watchdog interrupt (induced by
 557  557   * a periodic timer call), spurious (an interrupt entry point was
 558  558   * entered but there was no interrupt condition to service),
 559  559   * or multiple service (an interrupt condition was detected and
 560  560   * serviced just prior to returning from any of the other types).
 561  561   *
 562  562   * Measurement of the spurious class of interrupts is useful for
 563  563   * autovectored devices in order to pinpoint any interrupt latency
 564  564   * problems in a particular system configuration.
 565  565   *
 566  566   * Devices that have more than one interrupt of the same
 567  567   * type should use multiple structures.
 568  568   */
 569  569  
 570  570  #define KSTAT_INTR_HARD                 0
 571  571  #define KSTAT_INTR_SOFT                 1
 572  572  #define KSTAT_INTR_WATCHDOG             2
 573  573  #define KSTAT_INTR_SPURIOUS             3
 574  574  #define KSTAT_INTR_MULTSVC              4
 575  575  
 576  576  #define KSTAT_NUM_INTRS                 5
 577  577  
 578  578  typedef struct kstat_intr {
 579  579          uint_t  intrs[KSTAT_NUM_INTRS]; /* interrupt counters */
 580  580  } kstat_intr_t;
 581  581  
 582  582  #define KSTAT_INTR_PTR(kptr)    ((kstat_intr_t *)(kptr)->ks_data)
 583  583  
 584  584  /*
 585  585   * I/O statistics.
 586  586   */
 587  587  
 588  588  typedef struct kstat_io {
 589  589  
 590  590          /*
 591  591           * Basic counters.
 592  592           *
 593  593           * The counters should be updated at the end of service
 594  594           * (e.g., just prior to calling biodone()).
 595  595           */
 596  596  
 597  597          u_longlong_t    nread;          /* number of bytes read */
 598  598          u_longlong_t    nwritten;       /* number of bytes written */
 599  599          uint_t          reads;          /* number of read operations */
 600  600          uint_t          writes;         /* number of write operations */
 601  601  
 602  602          /*
 603  603           * Accumulated time and queue length statistics.
 604  604           *
 605  605           * Accumulated time statistics are kept as a running sum
 606  606           * of "active" time.  Queue length statistics are kept as a
 607  607           * running sum of the product of queue length and elapsed time
 608  608           * at that length -- i.e., a Riemann sum for queue length
 609  609           * integrated against time.  (You can also think of the active time
 610  610           * as a Riemann sum, for the boolean function (queue_length > 0)
 611  611           * integrated against time, or you can think of it as the
 612  612           * Lebesgue measure of the set on which queue_length > 0.)
 613  613           *
 614  614           *              ^
 615  615           *              |                       _________
 616  616           *              8                       | i4    |
 617  617           *              |                       |       |
 618  618           *      Queue   6                       |       |
 619  619           *      Length  |       _________       |       |
 620  620           *              4       | i2    |_______|       |
 621  621           *              |       |           i3          |
 622  622           *              2_______|                       |
 623  623           *              |    i1                         |
 624  624           *              |_______________________________|
 625  625           *              Time->  t1      t2      t3      t4
 626  626           *
 627  627           * At each change of state (entry or exit from the queue),
 628  628           * we add the elapsed time (since the previous state change)
 629  629           * to the active time if the queue length was non-zero during
 630  630           * that interval; and we add the product of the elapsed time
 631  631           * times the queue length to the running length*time sum.
 632  632           *
 633  633           * This method is generalizable to measuring residency
 634  634           * in any defined system: instead of queue lengths, think
 635  635           * of "outstanding RPC calls to server X".
 636  636           *
 637  637           * A large number of I/O subsystems have at least two basic
 638  638           * "lists" of transactions they manage: one for transactions
 639  639           * that have been accepted for processing but for which processing
 640  640           * has yet to begin, and one for transactions which are actively
 641  641           * being processed (but not done). For this reason, two cumulative
 642  642           * time statistics are defined here: wait (pre-service) time,
 643  643           * and run (service) time.
 644  644           *
 645  645           * All times are 64-bit nanoseconds (hrtime_t), as returned by
 646  646           * gethrtime().
 647  647           *
 648  648           * The units of cumulative busy time are accumulated nanoseconds.
 649  649           * The units of cumulative length*time products are elapsed time
 650  650           * times queue length.
 651  651           *
 652  652           * Updates to the fields below are performed implicitly by calls to
 653  653           * these five functions:
 654  654           *
 655  655           *      kstat_waitq_enter()
 656  656           *      kstat_waitq_exit()
 657  657           *      kstat_runq_enter()
 658  658           *      kstat_runq_exit()
 659  659           *
 660  660           *      kstat_waitq_to_runq()           (see below)
 661  661           *      kstat_runq_back_to_waitq()      (see below)
 662  662           *
 663  663           * Since kstat_waitq_exit() is typically followed immediately
 664  664           * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
 665  665           * function which performs both operations.  This is a performance
 666  666           * win since only one timestamp is required.
 667  667           *
 668  668           * In some instances, it may be necessary to move a request from
 669  669           * the run queue back to the wait queue, e.g. for write throttling.
 670  670           * For these situations, call kstat_runq_back_to_waitq().
 671  671           *
 672  672           * These fields should never be updated by any other means.
 673  673           */
 674  674  
 675  675          hrtime_t wtime;         /* cumulative wait (pre-service) time */
 676  676          hrtime_t wlentime;      /* cumulative wait length*time product */
 677  677          hrtime_t wlastupdate;   /* last time wait queue changed */
 678  678          hrtime_t rtime;         /* cumulative run (service) time */
 679  679          hrtime_t rlentime;      /* cumulative run length*time product */
 680  680          hrtime_t rlastupdate;   /* last time run queue changed */
 681  681  
 682  682          uint_t  wcnt;           /* count of elements in wait state */
 683  683          uint_t  rcnt;           /* count of elements in run state */
 684  684  
 685  685  } kstat_io_t;
 686  686  
 687  687  #define KSTAT_IO_PTR(kptr)      ((kstat_io_t *)(kptr)->ks_data)
 688  688  
 689  689  /*
 690  690   * Event timer statistics - cumulative elapsed time and number of events.
 691  691   *
 692  692   * Updates to these fields are performed implicitly by calls to
 693  693   * kstat_timer_start() and kstat_timer_stop().
 694  694   */
 695  695  
 696  696  typedef struct kstat_timer {
 697  697          char            name[KSTAT_STRLEN];     /* event name */
 698  698          uchar_t         resv;                   /* reserved */
 699  699          u_longlong_t    num_events;             /* number of events */
 700  700          hrtime_t        elapsed_time;           /* cumulative elapsed time */
 701  701          hrtime_t        min_time;               /* shortest event duration */
 702  702          hrtime_t        max_time;               /* longest event duration */
 703  703          hrtime_t        start_time;             /* previous event start time */
 704  704          hrtime_t        stop_time;              /* previous event stop time */
 705  705  } kstat_timer_t;
 706  706  
 707  707  #define KSTAT_TIMER_PTR(kptr)   ((kstat_timer_t *)(kptr)->ks_data)
 708  708  
 709  709  #if     defined(_KERNEL) || defined(_FAKE_KERNEL)
 710  710  
 711  711  #include <sys/t_lock.h>
 712  712  
 713  713  extern kid_t    kstat_chain_id;         /* bumped at each state change */
 714  714  extern void     kstat_init(void);       /* initialize kstat framework */
 715  715  
 716  716  /*
 717  717   * Adding and deleting kstats.
 718  718   *
 719  719   * The typical sequence to add a kstat is:
 720  720   *
 721  721   *      ksp = kstat_create(module, instance, name, class, type, ndata, flags);
 722  722   *      if (ksp) {
 723  723   *              ... provider initialization, if necessary
 724  724   *              kstat_install(ksp);
 725  725   *      }
 726  726   *
 727  727   * There are three logically distinct steps here:
 728  728   *
 729  729   * Step 1: System Initialization (kstat_create)
 730  730   *
 731  731   * kstat_create() performs system initialization.  kstat_create()
 732  732   * allocates memory for the entire kstat (header plus data), initializes
 733  733   * all header fields, initializes the data section to all zeroes, assigns
 734  734   * a unique KID, and puts the kstat onto the system's kstat chain.
 735  735   * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
 736  736   * because the provider (caller) has not yet had a chance to initialize
 737  737   * the data section.
 738  738   *
 739  739   * By default, kstats are exported to all zones on the system.  A kstat may be
 740  740   * created via kstat_create_zone() to specify a zone to which the statistics
 741  741   * should be exported.  kstat_zone_add() may be used to specify additional
 742  742   * zones to which the statistics are to be exported.
 743  743   *
 744  744   * Step 2: Provider Initialization
 745  745   *
 746  746   * The provider performs any necessary initialization of the data section,
 747  747   * e.g. setting the name fields in a KSTAT_TYPE_NAMED.  Virtual kstats set
 748  748   * the ks_data field at this time.  The provider may also set the ks_update,
 749  749   * ks_snapshot, ks_private, and ks_lock fields if necessary.
 750  750   *
 751  751   * Step 3: Installation (kstat_install)
 752  752   *
 753  753   * Once the kstat is completely initialized, kstat_install() clears the
 754  754   * INVALID flag, thus making the kstat accessible to the outside world.
 755  755   * kstat_install() also clears the DORMANT flag for persistent kstats.
 756  756   *
 757  757   * Removing a kstat from the system
 758  758   *
 759  759   * kstat_delete(ksp) removes ksp from the kstat chain and frees all
 760  760   * associated system resources.  NOTE: When you call kstat_delete(),
 761  761   * you must NOT be holding that kstat's ks_lock.  Otherwise, you may
 762  762   * deadlock with a kstat reader.
 763  763   *
 764  764   * Persistent kstats
 765  765   *
 766  766   * From the provider's point of view, persistence is transparent.  The only
 767  767   * difference between ephemeral (normal) kstats and persistent kstats
 768  768   * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create().  Magically,
 769  769   * this has the effect of making your data visible even when you're
 770  770   * not home.  Persistence is important to tools like iostat, which want
 771  771   * to get a meaningful picture of disk activity.  Without persistence,
 772  772   * raw disk i/o statistics could never accumulate: they would come and
 773  773   * go with each open/close of the raw device.
 774  774   *
 775  775   * The magic of persistence works by slightly altering the behavior of
 776  776   * kstat_create() and kstat_delete().  The first call to kstat_create()
 777  777   * creates a new kstat, as usual.  However, kstat_delete() does not
 778  778   * actually delete the kstat: it performs one final update of the data
 779  779   * (i.e., calls the ks_update routine), marks the kstat as dormant, and
 780  780   * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
 781  781   * to their default values (since they might otherwise point to garbage,
 782  782   * e.g. if the provider is going away).  kstat clients can still access
 783  783   * the dormant kstat just like a live kstat; they just continue to see
 784  784   * the final data values as long as the kstat remains dormant.
 785  785   * All subsequent kstat_create() calls simply find the already-existing,
 786  786   * dormant kstat and return a pointer to it, without altering any fields.
 787  787   * The provider then performs its usual initialization sequence, and
 788  788   * calls kstat_install().  kstat_install() uses the old data values to
 789  789   * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
 790  790   * thus making it seem like you were never gone.
 791  791   */
 792  792  
 793  793  extern kstat_t *kstat_create(const char *, int, const char *, const char *,
 794  794      uchar_t, uint_t, uchar_t);
 795  795  extern kstat_t *kstat_create_zone(const char *, int, const char *,
 796  796      const char *, uchar_t, uint_t, uchar_t, zoneid_t);
 797  797  extern void kstat_install(kstat_t *);
 798  798  extern void kstat_delete(kstat_t *);
 799  799  extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
 800  800  extern void kstat_set_string(char *, const char *);
 801  801  extern void kstat_delete_byname(const char *, int, const char *);
 802  802  extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
 803  803  extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
  
    | 
      ↓ open down ↓ | 
    803 lines elided | 
    
      ↑ open up ↑ | 
  
 804  804  extern void kstat_timer_init(kstat_timer_t *, const char *);
 805  805  extern void kstat_waitq_enter(kstat_io_t *);
 806  806  extern void kstat_waitq_exit(kstat_io_t *);
 807  807  extern void kstat_runq_enter(kstat_io_t *);
 808  808  extern void kstat_runq_exit(kstat_io_t *);
 809  809  extern void kstat_waitq_to_runq(kstat_io_t *);
 810  810  extern void kstat_runq_back_to_waitq(kstat_io_t *);
 811  811  extern void kstat_timer_start(kstat_timer_t *);
 812  812  extern void kstat_timer_stop(kstat_timer_t *);
 813  813  
      814 +extern void kstat_waitq_enter_time(kstat_io_t *, hrtime_t);
      815 +extern void kstat_waitq_exit_time(kstat_io_t *, hrtime_t);
      816 +extern void kstat_runq_enter_time(kstat_io_t *, hrtime_t);
      817 +extern void kstat_runq_exit_time(kstat_io_t *, hrtime_t);
      818 +
 814  819  extern void kstat_zone_add(kstat_t *, zoneid_t);
 815  820  extern void kstat_zone_remove(kstat_t *, zoneid_t);
 816  821  extern int kstat_zone_find(kstat_t *, zoneid_t);
 817  822  
 818  823  extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
 819  824  extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
 820  825  extern void kstat_rele(kstat_t *);
 821  826  
 822  827  #endif  /* defined(_KERNEL) */
 823  828  
 824  829  #ifdef  __cplusplus
 825  830  }
 826  831  #endif
 827  832  
 828  833  #endif  /* _SYS_KSTAT_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX