Print this page
    
NEX-13937 Improve kstat performance
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-4425 support KSTAT_DATA_STRING in non-virtual named kstats
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/kstat_fr.c
          +++ new/usr/src/uts/common/os/kstat_fr.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2014, Joyent, Inc. All rights reserved.
  24   24   * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  /*
  28   28   * Kernel statistics framework
  29   29   */
  30   30  
  31   31  #include <sys/types.h>
  32   32  #include <sys/time.h>
  33   33  #include <sys/systm.h>
  34   34  #include <sys/vmsystm.h>
  35   35  #include <sys/t_lock.h>
  36   36  #include <sys/param.h>
  37   37  #include <sys/errno.h>
  38   38  #include <sys/vmem.h>
  39   39  #include <sys/sysmacros.h>
  40   40  #include <sys/cmn_err.h>
  41   41  #include <sys/kstat.h>
  42   42  #include <sys/sysinfo.h>
  43   43  #include <sys/cpuvar.h>
  44   44  #include <sys/fcntl.h>
  45   45  #include <sys/flock.h>
  46   46  #include <sys/vnode.h>
  47   47  #include <sys/vfs.h>
  48   48  #include <sys/dnlc.h>
  49   49  #include <sys/var.h>
  50   50  #include <sys/debug.h>
  51   51  #include <sys/kobj.h>
  52   52  #include <sys/avl.h>
  53   53  #include <sys/pool_pset.h>
  54   54  #include <sys/cpupart.h>
  55   55  #include <sys/zone.h>
  56   56  #include <sys/loadavg.h>
  57   57  #include <vm/page.h>
  58   58  #include <vm/anon.h>
  59   59  #include <vm/seg_kmem.h>
  60   60  
  61   61  /*
  62   62   * Global lock to protect the AVL trees and kstat_chain_id.
  63   63   */
  64   64  static kmutex_t kstat_chain_lock;
  65   65  
  66   66  /*
  67   67   * Every install/delete kstat bumps kstat_chain_id.  This is used by:
  68   68   *
  69   69   * (1)  /dev/kstat, to detect changes in the kstat chain across ioctls;
  70   70   *
  71   71   * (2)  kstat_create(), to assign a KID (kstat ID) to each new kstat.
  72   72   *      /dev/kstat uses the KID as a cookie for kstat lookups.
  73   73   *
  74   74   * We reserve the first two IDs because some kstats are created before
  75   75   * the well-known ones (kstat_headers = 0, kstat_types = 1).
  76   76   *
  77   77   * We also bump the kstat_chain_id if a zone is gaining or losing visibility
  78   78   * into a particular kstat, which is logically equivalent to a kstat being
  79   79   * installed/deleted.
  80   80   */
  81   81  
  82   82  kid_t kstat_chain_id = 2;
  83   83  
  84   84  /*
  85   85   * As far as zones are concerned, there are 3 types of kstat:
  86   86   *
  87   87   * 1) Those which have a well-known name, and which should return per-zone data
  88   88   * depending on which zone is doing the kstat_read().  sockfs:0:sock_unix_list
  89   89   * is an example of this type of kstat.
  90   90   *
  91   91   * 2) Those which should only be exported to a particular list of zones.
  92   92   * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
  93   93   * able to see NFS mounts associated with zone B, while we want the
  94   94   * global zone to be able to see all mounts on the system.
  95   95   *
  96   96   * 3) Those that can be exported to all zones.  Most system-related
  97   97   * kstats fall within this category.
  98   98   *
  99   99   * An ekstat_t thus contains a list of kstats that the zone is to be
 100  100   * exported to.  The lookup of a name:instance:module thus translates to a
 101  101   * lookup of name:instance:module:myzone; if the kstat is not exported
 102  102   * to all zones, and does not have the caller's zoneid explicitly
 103  103   * enumerated in the list of zones to be exported to, it is the same as
 104  104   * if the kstat didn't exist.
 105  105   *
 106  106   * Writing to kstats is currently disallowed from within a non-global
 107  107   * zone, although this restriction could be removed in the future.
 108  108   */
 109  109  typedef struct kstat_zone {
 110  110          zoneid_t zoneid;
 111  111          struct kstat_zone *next;
 112  112  } kstat_zone_t;
 113  113  
 114  114  /*
 115  115   * Extended kstat structure -- for internal use only.
 116  116   */
 117  117  typedef struct ekstat {
 118  118          kstat_t         e_ks;           /* the kstat itself */
 119  119          size_t          e_size;         /* total allocation size */
 120  120          kthread_t       *e_owner;       /* thread holding this kstat */
 121  121          kcondvar_t      e_cv;           /* wait for owner == NULL */
 122  122          avl_node_t      e_avl_bykid;    /* AVL tree to sort by KID */
 123  123          avl_node_t      e_avl_byname;   /* AVL tree to sort by name */
 124  124          kstat_zone_t    e_zone;         /* zone to export stats to */
 125  125  } ekstat_t;
 126  126  
 127  127  static uint64_t kstat_initial[8192];
 128  128  static void *kstat_initial_ptr = kstat_initial;
 129  129  static size_t kstat_initial_avail = sizeof (kstat_initial);
 130  130  static vmem_t *kstat_arena;
 131  131  
 132  132  #define KSTAT_ALIGN     (sizeof (uint64_t))
 133  133  
 134  134  static avl_tree_t kstat_avl_bykid;
 135  135  static avl_tree_t kstat_avl_byname;
 136  136  
 137  137  /*
 138  138   * Various pointers we need to create kstats at boot time in kstat_init()
 139  139   */
 140  140  extern  kstat_named_t   *segmapcnt_ptr;
 141  141  extern  uint_t          segmapcnt_ndata;
 142  142  extern  int             segmap_kstat_update(kstat_t *, int);
 143  143  extern  kstat_named_t   *biostats_ptr;
 144  144  extern  uint_t          biostats_ndata;
 145  145  extern  kstat_named_t   *pollstats_ptr;
 146  146  extern  uint_t          pollstats_ndata;
 147  147  
 148  148  extern  int     vac;
 149  149  extern  uint_t  nproc;
 150  150  extern  time_t  boot_time;
 151  151  extern  sysinfo_t       sysinfo;
 152  152  extern  vminfo_t        vminfo;
 153  153  
 154  154  struct {
 155  155          kstat_named_t ncpus;
 156  156          kstat_named_t lbolt;
 157  157          kstat_named_t deficit;
 158  158          kstat_named_t clk_intr;
 159  159          kstat_named_t vac;
 160  160          kstat_named_t nproc;
 161  161          kstat_named_t avenrun_1min;
 162  162          kstat_named_t avenrun_5min;
 163  163          kstat_named_t avenrun_15min;
 164  164          kstat_named_t boot_time;
 165  165          kstat_named_t nsec_per_tick;
 166  166  } system_misc_kstat = {
 167  167          { "ncpus",              KSTAT_DATA_UINT32 },
 168  168          { "lbolt",              KSTAT_DATA_UINT32 },
 169  169          { "deficit",            KSTAT_DATA_UINT32 },
 170  170          { "clk_intr",           KSTAT_DATA_UINT32 },
 171  171          { "vac",                KSTAT_DATA_UINT32 },
 172  172          { "nproc",              KSTAT_DATA_UINT32 },
 173  173          { "avenrun_1min",       KSTAT_DATA_UINT32 },
 174  174          { "avenrun_5min",       KSTAT_DATA_UINT32 },
 175  175          { "avenrun_15min",      KSTAT_DATA_UINT32 },
 176  176          { "boot_time",          KSTAT_DATA_UINT32 },
 177  177          { "nsec_per_tick",      KSTAT_DATA_UINT32 },
 178  178  };
 179  179  
 180  180  struct {
 181  181          kstat_named_t physmem;
 182  182          kstat_named_t nalloc;
 183  183          kstat_named_t nfree;
 184  184          kstat_named_t nalloc_calls;
 185  185          kstat_named_t nfree_calls;
 186  186          kstat_named_t kernelbase;
 187  187          kstat_named_t econtig;
 188  188          kstat_named_t freemem;
 189  189          kstat_named_t availrmem;
 190  190          kstat_named_t lotsfree;
 191  191          kstat_named_t desfree;
 192  192          kstat_named_t minfree;
 193  193          kstat_named_t fastscan;
 194  194          kstat_named_t slowscan;
 195  195          kstat_named_t nscan;
 196  196          kstat_named_t desscan;
 197  197          kstat_named_t pp_kernel;
 198  198          kstat_named_t pagesfree;
 199  199          kstat_named_t pageslocked;
 200  200          kstat_named_t pagestotal;
 201  201  } system_pages_kstat = {
 202  202          { "physmem",            KSTAT_DATA_ULONG },
 203  203          { "nalloc",             KSTAT_DATA_ULONG },
 204  204          { "nfree",              KSTAT_DATA_ULONG },
 205  205          { "nalloc_calls",       KSTAT_DATA_ULONG },
 206  206          { "nfree_calls",        KSTAT_DATA_ULONG },
 207  207          { "kernelbase",         KSTAT_DATA_ULONG },
 208  208          { "econtig",            KSTAT_DATA_ULONG },
 209  209          { "freemem",            KSTAT_DATA_ULONG },
 210  210          { "availrmem",          KSTAT_DATA_ULONG },
 211  211          { "lotsfree",           KSTAT_DATA_ULONG },
 212  212          { "desfree",            KSTAT_DATA_ULONG },
 213  213          { "minfree",            KSTAT_DATA_ULONG },
 214  214          { "fastscan",           KSTAT_DATA_ULONG },
 215  215          { "slowscan",           KSTAT_DATA_ULONG },
 216  216          { "nscan",              KSTAT_DATA_ULONG },
 217  217          { "desscan",            KSTAT_DATA_ULONG },
 218  218          { "pp_kernel",          KSTAT_DATA_ULONG },
 219  219          { "pagesfree",          KSTAT_DATA_ULONG },
 220  220          { "pageslocked",        KSTAT_DATA_ULONG },
 221  221          { "pagestotal",         KSTAT_DATA_ULONG },
 222  222  };
 223  223  
 224  224  static int header_kstat_update(kstat_t *, int);
 225  225  static int header_kstat_snapshot(kstat_t *, void *, int);
 226  226  static int system_misc_kstat_update(kstat_t *, int);
 227  227  static int system_pages_kstat_update(kstat_t *, int);
 228  228  
 229  229  static struct {
 230  230          char    name[KSTAT_STRLEN];
 231  231          size_t  size;
 232  232          uint_t  min_ndata;
 233  233          uint_t  max_ndata;
 234  234  } kstat_data_type[KSTAT_NUM_TYPES] = {
 235  235          { "raw",                1,                      0,      INT_MAX },
 236  236          { "name=value",         sizeof (kstat_named_t), 0,      INT_MAX },
 237  237          { "interrupt",          sizeof (kstat_intr_t),  1,      1       },
 238  238          { "i/o",                sizeof (kstat_io_t),    1,      1       },
 239  239          { "event_timer",        sizeof (kstat_timer_t), 0,      INT_MAX },
 240  240  };
 241  241  
 242  242  int
 243  243  kstat_zone_find(kstat_t *k, zoneid_t zoneid)
 244  244  {
 245  245          ekstat_t *e = (ekstat_t *)k;
 246  246          kstat_zone_t *kz;
 247  247  
 248  248          ASSERT(MUTEX_HELD(&kstat_chain_lock));
 249  249          for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
 250  250                  if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
 251  251                          return (1);
 252  252                  if (zoneid == kz->zoneid)
 253  253                          return (1);
 254  254          }
 255  255          return (0);
 256  256  }
 257  257  
 258  258  void
 259  259  kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
 260  260  {
 261  261          ekstat_t *e = (ekstat_t *)k;
 262  262          kstat_zone_t *kz, *t = NULL;
 263  263  
 264  264          mutex_enter(&kstat_chain_lock);
 265  265          if (zoneid == e->e_zone.zoneid) {
 266  266                  kz = e->e_zone.next;
 267  267                  ASSERT(kz != NULL);
 268  268                  e->e_zone.zoneid = kz->zoneid;
 269  269                  e->e_zone.next = kz->next;
 270  270                  goto out;
 271  271          }
 272  272          for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
 273  273                  if (kz->next->zoneid == zoneid) {
 274  274                          t = kz->next;
 275  275                          kz->next = t->next;
 276  276                          break;
 277  277                  }
 278  278          }
 279  279          ASSERT(t != NULL);      /* we removed something */
 280  280          kz = t;
 281  281  out:
 282  282          kstat_chain_id++;
 283  283          mutex_exit(&kstat_chain_lock);
 284  284          kmem_free(kz, sizeof (*kz));
 285  285  }
 286  286  
 287  287  void
 288  288  kstat_zone_add(kstat_t *k, zoneid_t zoneid)
 289  289  {
 290  290          ekstat_t *e = (ekstat_t *)k;
 291  291          kstat_zone_t *kz;
 292  292  
 293  293          kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
 294  294          if (kz == NULL)
 295  295                  return;
 296  296          mutex_enter(&kstat_chain_lock);
 297  297          kz->zoneid = zoneid;
 298  298          kz->next = e->e_zone.next;
 299  299          e->e_zone.next = kz;
 300  300          kstat_chain_id++;
 301  301          mutex_exit(&kstat_chain_lock);
 302  302  }
 303  303  
 304  304  /*
 305  305   * Compare the list of zones for the given kstats, returning 0 if they match
 306  306   * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
 307  307   * In practice, this is called indirectly by kstat_hold_byname(), so one of the
 308  308   * two lists always has one element, and this is an O(n) operation rather than
 309  309   * O(n^2).
 310  310   */
 311  311  static int
 312  312  kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
 313  313  {
 314  314          kstat_zone_t *kz1, *kz2;
 315  315  
 316  316          ASSERT(MUTEX_HELD(&kstat_chain_lock));
 317  317          for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
 318  318                  for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
 319  319                          if (kz1->zoneid == ALL_ZONES ||
 320  320                              kz2->zoneid == ALL_ZONES)
 321  321                                  return (0);
 322  322                          if (kz1->zoneid == kz2->zoneid)
 323  323                                  return (0);
 324  324                  }
 325  325          }
 326  326          return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
 327  327  }
 328  328  
 329  329  /*
 330  330   * Support for keeping kstats sorted in AVL trees for fast lookups.
 331  331   */
 332  332  static int
 333  333  kstat_compare_bykid(const void *a1, const void *a2)
 334  334  {
 335  335          const kstat_t *k1 = a1;
 336  336          const kstat_t *k2 = a2;
 337  337  
 338  338          if (k1->ks_kid < k2->ks_kid)
 339  339                  return (-1);
 340  340          if (k1->ks_kid > k2->ks_kid)
 341  341                  return (1);
 342  342          return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 343  343  }
 344  344  
 345  345  static int
 346  346  kstat_compare_byname(const void *a1, const void *a2)
 347  347  {
 348  348          const kstat_t *k1 = a1;
 349  349          const kstat_t *k2 = a2;
 350  350          int s;
 351  351  
 352  352          s = strcmp(k1->ks_module, k2->ks_module);
 353  353          if (s > 0)
 354  354                  return (1);
 355  355          if (s < 0)
 356  356                  return (-1);
 357  357  
 358  358          if (k1->ks_instance < k2->ks_instance)
 359  359                  return (-1);
 360  360          if (k1->ks_instance > k2->ks_instance)
 361  361                  return (1);
 362  362  
 363  363          s = strcmp(k1->ks_name, k2->ks_name);
 364  364          if (s > 0)
 365  365                  return (1);
 366  366          if (s < 0)
 367  367                  return (-1);
 368  368  
 369  369          return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
 370  370  }
 371  371  
 372  372  static kstat_t *
 373  373  kstat_hold(avl_tree_t *t, ekstat_t *template)
 374  374  {
 375  375          kstat_t *ksp;
 376  376          ekstat_t *e;
 377  377  
 378  378          mutex_enter(&kstat_chain_lock);
 379  379          for (;;) {
 380  380                  ksp = avl_find(t, template, NULL);
 381  381                  if (ksp == NULL)
 382  382                          break;
 383  383                  e = (ekstat_t *)ksp;
 384  384                  if (e->e_owner == NULL) {
 385  385                          e->e_owner = curthread;
 386  386                          break;
 387  387                  }
 388  388                  cv_wait(&e->e_cv, &kstat_chain_lock);
 389  389          }
 390  390          mutex_exit(&kstat_chain_lock);
 391  391          return (ksp);
 392  392  }
 393  393  
 394  394  void
 395  395  kstat_rele(kstat_t *ksp)
 396  396  {
 397  397          ekstat_t *e = (ekstat_t *)ksp;
 398  398  
 399  399          mutex_enter(&kstat_chain_lock);
 400  400          ASSERT(e->e_owner == curthread);
 401  401          e->e_owner = NULL;
 402  402          cv_broadcast(&e->e_cv);
 403  403          mutex_exit(&kstat_chain_lock);
 404  404  }
 405  405  
 406  406  kstat_t *
 407  407  kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
 408  408  {
 409  409          ekstat_t e;
 410  410  
 411  411          e.e_ks.ks_kid = kid;
 412  412          e.e_zone.zoneid = zoneid;
 413  413          e.e_zone.next = NULL;
 414  414  
 415  415          return (kstat_hold(&kstat_avl_bykid, &e));
 416  416  }
 417  417  
 418  418  kstat_t *
 419  419  kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
 420  420      zoneid_t ks_zoneid)
 421  421  {
 422  422          ekstat_t e;
 423  423  
 424  424          kstat_set_string(e.e_ks.ks_module, ks_module);
 425  425          e.e_ks.ks_instance = ks_instance;
 426  426          kstat_set_string(e.e_ks.ks_name, ks_name);
 427  427          e.e_zone.zoneid = ks_zoneid;
 428  428          e.e_zone.next = NULL;
 429  429          return (kstat_hold(&kstat_avl_byname, &e));
 430  430  }
 431  431  
 432  432  static ekstat_t *
 433  433  kstat_alloc(size_t size)
 434  434  {
 435  435          ekstat_t *e = NULL;
 436  436  
 437  437          size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
 438  438  
 439  439          if (kstat_arena == NULL) {
 440  440                  if (size <= kstat_initial_avail) {
 441  441                          e = kstat_initial_ptr;
 442  442                          kstat_initial_ptr = (char *)kstat_initial_ptr + size;
 443  443                          kstat_initial_avail -= size;
 444  444                  }
 445  445          } else {
 446  446                  e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
 447  447          }
 448  448  
 449  449          if (e != NULL) {
 450  450                  bzero(e, size);
 451  451                  e->e_size = size;
 452  452                  cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
 453  453          }
 454  454  
 455  455          return (e);
 456  456  }
 457  457  
 458  458  static void
 459  459  kstat_free(ekstat_t *e)
 460  460  {
 461  461          cv_destroy(&e->e_cv);
 462  462          vmem_free(kstat_arena, e, e->e_size);
 463  463  }
 464  464  
 465  465  /*
 466  466   * Create various system kstats.
 467  467   */
 468  468  void
 469  469  kstat_init(void)
 470  470  {
 471  471          kstat_t *ksp;
 472  472          ekstat_t *e;
 473  473          avl_tree_t *t = &kstat_avl_bykid;
 474  474  
 475  475          /*
 476  476           * Set up the kstat vmem arena.
 477  477           */
 478  478          kstat_arena = vmem_create("kstat",
 479  479              kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
 480  480              segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
 481  481  
 482  482          /*
 483  483           * Make initial kstats appear as though they were allocated.
 484  484           */
 485  485          for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
 486  486                  (void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
 487  487                      0, 0, e, (char *)e + e->e_size,
 488  488                      VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 489  489  
 490  490          /*
 491  491           * The mother of all kstats.  The first kstat in the system, which
 492  492           * always has KID 0, has the headers for all kstats (including itself)
 493  493           * as its data.  Thus, the kstat driver does not need any special
 494  494           * interface to extract the kstat chain.
 495  495           */
 496  496          kstat_chain_id = 0;
 497  497          ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
 498  498              0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
 499  499          if (ksp) {
 500  500                  ksp->ks_lock = &kstat_chain_lock;
 501  501                  ksp->ks_update = header_kstat_update;
 502  502                  ksp->ks_snapshot = header_kstat_snapshot;
 503  503                  kstat_install(ksp);
 504  504          } else {
 505  505                  panic("cannot create kstat 'kstat_headers'");
 506  506          }
 507  507  
 508  508          ksp = kstat_create("unix", 0, "kstat_types", "kstat",
 509  509              KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
 510  510          if (ksp) {
 511  511                  int i;
 512  512                  kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
 513  513  
 514  514                  for (i = 0; i < KSTAT_NUM_TYPES; i++) {
 515  515                          kstat_named_init(&kn[i], kstat_data_type[i].name,
 516  516                              KSTAT_DATA_ULONG);
 517  517                          kn[i].value.ul = i;
 518  518                  }
 519  519                  kstat_install(ksp);
 520  520          }
 521  521  
 522  522          ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
 523  523              sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
 524  524          if (ksp) {
 525  525                  ksp->ks_data = (void *) &sysinfo;
 526  526                  kstat_install(ksp);
 527  527          }
 528  528  
 529  529          ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
 530  530              sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
 531  531          if (ksp) {
 532  532                  ksp->ks_data = (void *) &vminfo;
 533  533                  kstat_install(ksp);
 534  534          }
 535  535  
 536  536          ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
 537  537              segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
 538  538          if (ksp) {
 539  539                  ksp->ks_data = (void *) segmapcnt_ptr;
 540  540                  ksp->ks_update = segmap_kstat_update;
 541  541                  kstat_install(ksp);
 542  542          }
 543  543  
 544  544          ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
 545  545              biostats_ndata, KSTAT_FLAG_VIRTUAL);
 546  546          if (ksp) {
 547  547                  ksp->ks_data = (void *) biostats_ptr;
 548  548                  kstat_install(ksp);
 549  549          }
 550  550  
 551  551          ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
 552  552              sizeof (struct var), KSTAT_FLAG_VIRTUAL);
 553  553          if (ksp) {
 554  554                  ksp->ks_data = (void *) &v;
 555  555                  kstat_install(ksp);
 556  556          }
 557  557  
 558  558          ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
 559  559              sizeof (system_misc_kstat) / sizeof (kstat_named_t),
 560  560              KSTAT_FLAG_VIRTUAL);
 561  561          if (ksp) {
 562  562                  ksp->ks_data = (void *) &system_misc_kstat;
 563  563                  ksp->ks_update = system_misc_kstat_update;
 564  564                  kstat_install(ksp);
 565  565          }
 566  566  
 567  567          ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
 568  568              sizeof (system_pages_kstat) / sizeof (kstat_named_t),
 569  569              KSTAT_FLAG_VIRTUAL);
 570  570          if (ksp) {
 571  571                  ksp->ks_data = (void *) &system_pages_kstat;
 572  572                  ksp->ks_update = system_pages_kstat_update;
 573  573                  kstat_install(ksp);
 574  574          }
 575  575  
 576  576          ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
 577  577              pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
 578  578  
 579  579          if (ksp) {
 580  580                  ksp->ks_data = pollstats_ptr;
 581  581                  kstat_install(ksp);
 582  582          }
 583  583  }
 584  584  
 585  585  /*
 586  586   * Caller of this should ensure that the string pointed by src
 587  587   * doesn't change while kstat's lock is held. Not doing so defeats
 588  588   * kstat's snapshot strategy as explained in <sys/kstat.h>
 589  589   */
 590  590  void
 591  591  kstat_named_setstr(kstat_named_t *knp, const char *src)
 592  592  {
 593  593          if (knp->data_type != KSTAT_DATA_STRING)
 594  594                  panic("kstat_named_setstr('%p', '%p'): "
 595  595                      "named kstat is not of type KSTAT_DATA_STRING",
 596  596                      (void *)knp, (void *)src);
 597  597  
 598  598          KSTAT_NAMED_STR_PTR(knp) = (char *)src;
 599  599          if (src != NULL)
 600  600                  KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
 601  601          else
 602  602                  KSTAT_NAMED_STR_BUFLEN(knp) = 0;
 603  603  }
 604  604  
 605  605  void
 606  606  kstat_set_string(char *dst, const char *src)
 607  607  {
 608  608          bzero(dst, KSTAT_STRLEN);
 609  609          (void) strncpy(dst, src, KSTAT_STRLEN - 1);
 610  610  }
 611  611  
 612  612  void
 613  613  kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
 614  614  {
 615  615          kstat_set_string(knp->name, name);
 616  616          knp->data_type = data_type;
 617  617  
 618  618          if (data_type == KSTAT_DATA_STRING)
 619  619                  kstat_named_setstr(knp, NULL);
 620  620  }
 621  621  
 622  622  void
 623  623  kstat_timer_init(kstat_timer_t *ktp, const char *name)
 624  624  {
 625  625          kstat_set_string(ktp->name, name);
 626  626  }
 627  627  
 628  628  /* ARGSUSED */
 629  629  static int
 630  630  default_kstat_update(kstat_t *ksp, int rw)
 631  631  {
 632  632          uint_t i;
 633  633          size_t len = 0;
 634  634          kstat_named_t *knp;
 635  635  
 636  636          /*
 637  637           * Named kstats with variable-length long strings have a standard
 638  638           * way of determining how much space is needed to hold the snapshot:
 639  639           */
 640  640          if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
 641  641              (ksp->ks_flags & (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_LONGSTRINGS))) {
 642  642  
 643  643                  /*
 644  644                   * Add in the space required for the strings
 645  645                   */
 646  646                  knp = KSTAT_NAMED_PTR(ksp);
 647  647                  for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 648  648                          if (knp->data_type == KSTAT_DATA_STRING)
 649  649                                  len += KSTAT_NAMED_STR_BUFLEN(knp);
 650  650                  }
 651  651                  ksp->ks_data_size =
 652  652                      ksp->ks_ndata * sizeof (kstat_named_t) + len;
 653  653          }
 654  654          return (0);
 655  655  }
 656  656  
 657  657  static int
 658  658  default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
 659  659  {
 660  660          kstat_io_t *kiop;
 661  661          hrtime_t cur_time;
 662  662          size_t  namedsz;
 663  663  
 664  664          ksp->ks_snaptime = cur_time = gethrtime();
 665  665  
 666  666          if (rw == KSTAT_WRITE) {
 667  667                  if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
 668  668                          return (EACCES);
 669  669                  bcopy(buf, ksp->ks_data, ksp->ks_data_size);
 670  670                  return (0);
 671  671          }
 672  672  
 673  673          /*
 674  674           * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
 675  675           * number of kstat_named_t structures, followed by an optional
 676  676           * string segment. The ks_data generally holds only the
 677  677           * kstat_named_t structures. So we copy it first. The strings,
 678  678           * if any, are copied below. For other kstat types, ks_data holds the
 679  679           * entire buffer.
 680  680           */
 681  681  
 682  682          namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
 683  683          if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
 684  684                  bcopy(ksp->ks_data, buf, namedsz);
 685  685          else
 686  686                  bcopy(ksp->ks_data, buf, ksp->ks_data_size);
 687  687  
 688  688          /*
 689  689           * Apply kstat type-specific data massaging
 690  690           */
 691  691          switch (ksp->ks_type) {
 692  692  
 693  693          case KSTAT_TYPE_IO:
 694  694                  /*
 695  695                   * Normalize time units and deal with incomplete transactions
 696  696                   */
 697  697                  kiop = (kstat_io_t *)buf;
 698  698  
 699  699                  scalehrtime(&kiop->wtime);
 700  700                  scalehrtime(&kiop->wlentime);
 701  701                  scalehrtime(&kiop->wlastupdate);
 702  702                  scalehrtime(&kiop->rtime);
 703  703                  scalehrtime(&kiop->rlentime);
 704  704                  scalehrtime(&kiop->rlastupdate);
 705  705  
 706  706                  if (kiop->wcnt != 0) {
 707  707                          /* like kstat_waitq_exit */
 708  708                          hrtime_t wfix = cur_time - kiop->wlastupdate;
 709  709                          kiop->wlastupdate = cur_time;
 710  710                          kiop->wlentime += kiop->wcnt * wfix;
 711  711                          kiop->wtime += wfix;
 712  712                  }
 713  713  
 714  714                  if (kiop->rcnt != 0) {
 715  715                          /* like kstat_runq_exit */
 716  716                          hrtime_t rfix = cur_time - kiop->rlastupdate;
 717  717                          kiop->rlastupdate = cur_time;
 718  718                          kiop->rlentime += kiop->rcnt * rfix;
 719  719                          kiop->rtime += rfix;
 720  720                  }
 721  721                  break;
 722  722  
 723  723          case KSTAT_TYPE_NAMED:
 724  724                  /*
 725  725                   * Massage any long strings in at the end of the buffer
 726  726                   */
 727  727                  if (ksp->ks_data_size > namedsz) {
 728  728                          uint_t i;
 729  729                          kstat_named_t *knp = buf;
 730  730                          char *dst = (char *)(knp + ksp->ks_ndata);
 731  731                          /*
 732  732                           * Copy strings and update pointers
 733  733                           */
 734  734                          for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 735  735                                  if (knp->data_type == KSTAT_DATA_STRING &&
 736  736                                      KSTAT_NAMED_STR_PTR(knp) != NULL) {
 737  737                                          bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
 738  738                                              KSTAT_NAMED_STR_BUFLEN(knp));
 739  739                                          KSTAT_NAMED_STR_PTR(knp) = dst;
 740  740                                          dst += KSTAT_NAMED_STR_BUFLEN(knp);
 741  741                                  }
 742  742                          }
 743  743                          ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
 744  744                  }
 745  745                  break;
 746  746          }
 747  747          return (0);
 748  748  }
 749  749  
 750  750  static int
 751  751  header_kstat_update(kstat_t *header_ksp, int rw)
 752  752  {
 753  753          int nkstats = 0;
 754  754          ekstat_t *e;
 755  755          avl_tree_t *t = &kstat_avl_bykid;
 756  756          zoneid_t zoneid;
 757  757  
 758  758          if (rw == KSTAT_WRITE)
 759  759                  return (EACCES);
 760  760  
 761  761          ASSERT(MUTEX_HELD(&kstat_chain_lock));
 762  762  
 763  763          zoneid = getzoneid();
 764  764          for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 765  765                  if (kstat_zone_find((kstat_t *)e, zoneid) &&
 766  766                      (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
 767  767                          nkstats++;
 768  768                  }
 769  769          }
 770  770          header_ksp->ks_ndata = nkstats;
 771  771          header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
 772  772          return (0);
 773  773  }
 774  774  
 775  775  /*
 776  776   * Copy out the data section of kstat 0, which consists of the list
 777  777   * of all kstat headers.  By specification, these headers must be
 778  778   * copied out in order of increasing KID.
 779  779   */
 780  780  static int
 781  781  header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
 782  782  {
 783  783          ekstat_t *e;
 784  784          avl_tree_t *t = &kstat_avl_bykid;
 785  785          zoneid_t zoneid;
 786  786  
 787  787          header_ksp->ks_snaptime = gethrtime();
 788  788  
 789  789          if (rw == KSTAT_WRITE)
 790  790                  return (EACCES);
 791  791  
 792  792          ASSERT(MUTEX_HELD(&kstat_chain_lock));
 793  793  
 794  794          zoneid = getzoneid();
 795  795          for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
 796  796                  if (kstat_zone_find((kstat_t *)e, zoneid) &&
 797  797                      (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
 798  798                          bcopy(&e->e_ks, buf, sizeof (kstat_t));
 799  799                          buf = (char *)buf + sizeof (kstat_t);
 800  800                  }
 801  801          }
 802  802  
 803  803          return (0);
 804  804  }
 805  805  
 806  806  /* ARGSUSED */
 807  807  static int
 808  808  system_misc_kstat_update(kstat_t *ksp, int rw)
 809  809  {
 810  810          int myncpus = ncpus;
 811  811          int *loadavgp = &avenrun[0];
 812  812          time_t zone_boot_time;
 813  813          clock_t zone_lbolt;
 814  814          hrtime_t zone_hrtime;
 815  815          size_t zone_nproc;
 816  816  
 817  817          if (rw == KSTAT_WRITE)
 818  818                  return (EACCES);
 819  819  
 820  820          if (!INGLOBALZONE(curproc)) {
 821  821                  /*
 822  822                   * Here we grab cpu_lock which is OK as long as no-one in the
 823  823                   * future attempts to lookup this particular kstat
 824  824                   * (unix:0:system_misc) while holding cpu_lock.
 825  825                   */
 826  826                  mutex_enter(&cpu_lock);
 827  827                  if (pool_pset_enabled()) {
 828  828                          myncpus = zone_ncpus_get(curproc->p_zone);
 829  829                          ASSERT(myncpus > 0);
 830  830                  }
 831  831                  mutex_exit(&cpu_lock);
 832  832                  loadavgp = &curproc->p_zone->zone_avenrun[0];
 833  833          }
 834  834  
 835  835          if (INGLOBALZONE(curproc)) {
 836  836                  zone_boot_time = boot_time;
 837  837                  zone_lbolt = ddi_get_lbolt();
 838  838                  zone_nproc = nproc;
 839  839          } else {
 840  840                  zone_boot_time = curproc->p_zone->zone_boot_time;
 841  841  
 842  842                  zone_hrtime = gethrtime();
 843  843                  zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
 844  844                      NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
 845  845                  mutex_enter(&curproc->p_zone->zone_nlwps_lock);
 846  846                  zone_nproc = curproc->p_zone->zone_nprocs;
 847  847                  mutex_exit(&curproc->p_zone->zone_nlwps_lock);
 848  848          }
 849  849  
 850  850          system_misc_kstat.ncpus.value.ui32              = (uint32_t)myncpus;
 851  851          system_misc_kstat.lbolt.value.ui32              = (uint32_t)zone_lbolt;
 852  852          system_misc_kstat.deficit.value.ui32            = (uint32_t)deficit;
 853  853          system_misc_kstat.clk_intr.value.ui32           = (uint32_t)zone_lbolt;
 854  854          system_misc_kstat.vac.value.ui32                = (uint32_t)vac;
 855  855          system_misc_kstat.nproc.value.ui32              = (uint32_t)zone_nproc;
 856  856          system_misc_kstat.avenrun_1min.value.ui32       = (uint32_t)loadavgp[0];
 857  857          system_misc_kstat.avenrun_5min.value.ui32       = (uint32_t)loadavgp[1];
 858  858          system_misc_kstat.avenrun_15min.value.ui32      = (uint32_t)loadavgp[2];
 859  859          system_misc_kstat.boot_time.value.ui32          = (uint32_t)
 860  860              zone_boot_time;
 861  861          system_misc_kstat.nsec_per_tick.value.ui32      = (uint32_t)
 862  862              nsec_per_tick;
 863  863          return (0);
 864  864  }
 865  865  
 866  866  #ifdef  __sparc
 867  867  extern caddr_t  econtig32;
 868  868  #else   /* !__sparc */
 869  869  extern caddr_t  econtig;
 870  870  #endif  /* __sparc */
 871  871  
 872  872  /* ARGSUSED */
 873  873  static int
 874  874  system_pages_kstat_update(kstat_t *ksp, int rw)
 875  875  {
 876  876          kobj_stat_t kobj_stat;
 877  877  
 878  878          if (rw == KSTAT_WRITE) {
 879  879                  return (EACCES);
 880  880          }
 881  881  
 882  882          kobj_stat_get(&kobj_stat);
 883  883          system_pages_kstat.physmem.value.ul     = (ulong_t)physmem;
 884  884          system_pages_kstat.nalloc.value.ul      = kobj_stat.nalloc;
 885  885          system_pages_kstat.nfree.value.ul       = kobj_stat.nfree;
 886  886          system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
 887  887          system_pages_kstat.nfree_calls.value.ul = kobj_stat.nfree_calls;
 888  888          system_pages_kstat.kernelbase.value.ul  = (ulong_t)KERNELBASE;
 889  889  
 890  890  #ifdef  __sparc
 891  891          /*
 892  892           * kstat should REALLY be modified to also report kmem64_base and
 893  893           * kmem64_end (see sun4u/os/startup.c), as the virtual address range
 894  894           * [ kernelbase .. econtig ] no longer is truly reflective of the
 895  895           * kernel's vallocs...
 896  896           */
 897  897          system_pages_kstat.econtig.value.ul     = (ulong_t)econtig32;
 898  898  #else   /* !__sparc */
 899  899          system_pages_kstat.econtig.value.ul     = (ulong_t)econtig;
 900  900  #endif  /* __sparc */
 901  901  
 902  902          system_pages_kstat.freemem.value.ul     = (ulong_t)freemem;
 903  903          system_pages_kstat.availrmem.value.ul   = (ulong_t)availrmem;
 904  904          system_pages_kstat.lotsfree.value.ul    = (ulong_t)lotsfree;
 905  905          system_pages_kstat.desfree.value.ul     = (ulong_t)desfree;
 906  906          system_pages_kstat.minfree.value.ul     = (ulong_t)minfree;
 907  907          system_pages_kstat.fastscan.value.ul    = (ulong_t)fastscan;
 908  908          system_pages_kstat.slowscan.value.ul    = (ulong_t)slowscan;
 909  909          system_pages_kstat.nscan.value.ul       = (ulong_t)nscan;
 910  910          system_pages_kstat.desscan.value.ul     = (ulong_t)desscan;
 911  911          system_pages_kstat.pagesfree.value.ul   = (ulong_t)freemem;
 912  912          system_pages_kstat.pageslocked.value.ul = (ulong_t)(availrmem_initial -
 913  913              availrmem);
 914  914          system_pages_kstat.pagestotal.value.ul  = (ulong_t)total_pages;
 915  915          /*
 916  916           * pp_kernel represents total pages used by the kernel since the
 917  917           * startup. This formula takes into account the boottime kernel
 918  918           * footprint and also considers the availrmem changes because of
 919  919           * user explicit page locking.
 920  920           */
 921  921          system_pages_kstat.pp_kernel.value.ul   = (ulong_t)(physinstalled -
 922  922              obp_pages - availrmem - k_anoninfo.ani_mem_resv -
 923  923              anon_segkp_pages_locked - pages_locked -
 924  924              pages_claimed - pages_useclaim);
 925  925  
 926  926          return (0);
 927  927  }
 928  928  
 929  929  kstat_t *
 930  930  kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
 931  931      const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
 932  932  {
 933  933          return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
 934  934              ks_type, ks_ndata, ks_flags, ALL_ZONES));
 935  935  }
 936  936  
 937  937  /*
 938  938   * Allocate and initialize a kstat structure.  Or, if a dormant kstat with
 939  939   * the specified name exists, reactivate it.  Returns a pointer to the kstat
 940  940   * on success, NULL on failure.  The kstat will not be visible to the
 941  941   * kstat driver until kstat_install().
 942  942   */
 943  943  kstat_t *
 944  944  kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
 945  945      const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
 946  946      zoneid_t ks_zoneid)
 947  947  {
 948  948          size_t ks_data_size;
 949  949          kstat_t *ksp;
 950  950          ekstat_t *e;
 951  951          avl_index_t where;
 952  952          char namebuf[KSTAT_STRLEN + 16];
 953  953  
 954  954          if (avl_numnodes(&kstat_avl_bykid) == 0) {
 955  955                  avl_create(&kstat_avl_bykid, kstat_compare_bykid,
 956  956                      sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
 957  957  
 958  958                  avl_create(&kstat_avl_byname, kstat_compare_byname,
 959  959                      sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
 960  960          }
 961  961  
 962  962          /*
 963  963           * If ks_name == NULL, set the ks_name to <module><instance>.
 964  964           */
 965  965          if (ks_name == NULL) {
 966  966                  char buf[KSTAT_STRLEN];
 967  967                  kstat_set_string(buf, ks_module);
 968  968                  (void) sprintf(namebuf, "%s%d", buf, ks_instance);
 969  969                  ks_name = namebuf;
 970  970          }
 971  971  
 972  972          /*
 973  973           * Make sure it's a valid kstat data type
 974  974           */
 975  975          if (ks_type >= KSTAT_NUM_TYPES) {
 976  976                  cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 977  977                      "invalid kstat type %d",
 978  978                      ks_module, ks_instance, ks_name, ks_type);
 979  979                  return (NULL);
 980  980          }
 981  981  
 982  982          /*
 983  983           * Don't allow persistent virtual kstats -- it makes no sense.
 984  984           * ks_data points to garbage when the client goes away.
 985  985           */
 986  986          if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
 987  987              (ks_flags & KSTAT_FLAG_VIRTUAL)) {
 988  988                  cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
 989  989                      "cannot create persistent virtual kstat",
 990  990                      ks_module, ks_instance, ks_name);
 991  991                  return (NULL);
 992  992          }
 993  993  
 994  994          /*
 995  995           * Don't allow variable-size physical kstats, since the framework's
 996  996           * memory allocation for physical kstat data is fixed at creation time.
 997  997           */
 998  998          if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
 999  999              !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1000 1000                  cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1001 1001                      "cannot create variable-size physical kstat",
1002 1002                      ks_module, ks_instance, ks_name);
1003 1003                  return (NULL);
1004 1004          }
1005 1005  
1006 1006          /*
1007 1007           * Make sure the number of data fields is within legal range
1008 1008           */
1009 1009          if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1010 1010              ks_ndata > kstat_data_type[ks_type].max_ndata) {
1011 1011                  cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1012 1012                      "ks_ndata=%d out of range [%d, %d]",
1013 1013                      ks_module, ks_instance, ks_name, (int)ks_ndata,
1014 1014                      kstat_data_type[ks_type].min_ndata,
1015 1015                      kstat_data_type[ks_type].max_ndata);
1016 1016                  return (NULL);
1017 1017          }
1018 1018  
1019 1019          ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1020 1020  
1021 1021          /*
1022 1022           * If the named kstat already exists and is dormant, reactivate it.
1023 1023           */
1024 1024          ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1025 1025          if (ksp != NULL) {
1026 1026                  if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1027 1027                          /*
1028 1028                           * The named kstat exists but is not dormant --
1029 1029                           * this is a kstat namespace collision.
1030 1030                           */
1031 1031                          kstat_rele(ksp);
1032 1032                          cmn_err(CE_WARN,
1033 1033                              "kstat_create('%s', %d, '%s'): namespace collision",
1034 1034                              ks_module, ks_instance, ks_name);
1035 1035                          return (NULL);
1036 1036                  }
1037 1037                  if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1038 1038                      (ksp->ks_type != ks_type) ||
1039 1039                      (ksp->ks_ndata != ks_ndata) ||
1040 1040                      (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1041 1041                          /*
1042 1042                           * The name is the same, but the other key parameters
1043 1043                           * differ from those of the dormant kstat -- bogus.
1044 1044                           */
1045 1045                          kstat_rele(ksp);
1046 1046                          cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1047 1047                              "invalid reactivation of dormant kstat",
1048 1048                              ks_module, ks_instance, ks_name);
1049 1049                          return (NULL);
1050 1050                  }
1051 1051                  /*
1052 1052                   * Return dormant kstat pointer to caller.  As usual,
1053 1053                   * the kstat is marked invalid until kstat_install().
1054 1054                   */
1055 1055                  ksp->ks_flags |= KSTAT_FLAG_INVALID;
1056 1056                  kstat_rele(ksp);
1057 1057                  return (ksp);
1058 1058          }
1059 1059  
1060 1060          /*
1061 1061           * Allocate memory for the new kstat header and, if this is a physical
1062 1062           * kstat, the data section.
1063 1063           */
1064 1064          e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1065 1065          if (e == NULL) {
1066 1066                  cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1067 1067                      "insufficient kernel memory",
1068 1068                      ks_module, ks_instance, ks_name);
1069 1069                  return (NULL);
1070 1070          }
1071 1071  
1072 1072          /*
1073 1073           * Initialize as many fields as we can.  The caller may reset
1074 1074           * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1075 1075           * Creators of virtual kstats may also reset ks_data.  It is
1076 1076           * also up to the caller to initialize the kstat data section,
1077 1077           * if necessary.  All initialization must be complete before
1078 1078           * calling kstat_install().
1079 1079           */
1080 1080          e->e_zone.zoneid = ks_zoneid;
1081 1081          e->e_zone.next = NULL;
1082 1082  
1083 1083          ksp = &e->e_ks;
1084 1084          ksp->ks_crtime          = gethrtime();
1085 1085          kstat_set_string(ksp->ks_module, ks_module);
1086 1086          ksp->ks_instance        = ks_instance;
1087 1087          kstat_set_string(ksp->ks_name, ks_name);
1088 1088          ksp->ks_type            = ks_type;
1089 1089          kstat_set_string(ksp->ks_class, ks_class);
1090 1090          ksp->ks_flags           = ks_flags | KSTAT_FLAG_INVALID;
1091 1091          if (ks_flags & KSTAT_FLAG_VIRTUAL)
1092 1092                  ksp->ks_data    = NULL;
1093 1093          else
1094 1094                  ksp->ks_data    = (void *)(e + 1);
1095 1095          ksp->ks_ndata           = ks_ndata;
1096 1096          ksp->ks_data_size       = ks_data_size;
1097 1097          ksp->ks_snaptime        = ksp->ks_crtime;
1098 1098          ksp->ks_update          = default_kstat_update;
1099 1099          ksp->ks_private         = NULL;
1100 1100          ksp->ks_snapshot        = default_kstat_snapshot;
1101 1101          ksp->ks_lock            = NULL;
1102 1102  
1103 1103          mutex_enter(&kstat_chain_lock);
1104 1104  
1105 1105          /*
1106 1106           * Add our kstat to the AVL trees.
1107 1107           */
1108 1108          if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1109 1109                  mutex_exit(&kstat_chain_lock);
1110 1110                  cmn_err(CE_WARN,
1111 1111                      "kstat_create('%s', %d, '%s'): namespace collision",
1112 1112                      ks_module, ks_instance, ks_name);
1113 1113                  kstat_free(e);
1114 1114                  return (NULL);
1115 1115          }
1116 1116          avl_insert(&kstat_avl_byname, e, where);
1117 1117  
1118 1118          /*
1119 1119           * Loop around until we find an unused KID.
1120 1120           */
1121 1121          do {
1122 1122                  ksp->ks_kid = kstat_chain_id++;
1123 1123          } while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1124 1124          avl_insert(&kstat_avl_bykid, e, where);
1125 1125  
1126 1126          mutex_exit(&kstat_chain_lock);
1127 1127  
1128 1128          return (ksp);
1129 1129  }
1130 1130  
1131 1131  /*
1132 1132   * Activate a fully initialized kstat and make it visible to /dev/kstat.
1133 1133   */
1134 1134  void
1135 1135  kstat_install(kstat_t *ksp)
1136 1136  {
1137 1137          zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1138 1138  
1139 1139          /*
1140 1140           * If this is a variable-size kstat, it MUST provide kstat data locking
1141 1141           * to prevent data-size races with kstat readers.
1142 1142           */
1143 1143          if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1144 1144                  panic("kstat_install('%s', %d, '%s'): "
1145 1145                      "cannot create variable-size kstat without data lock",
1146 1146                      ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1147 1147          }
1148 1148  
1149 1149          if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1150 1150                  cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1151 1151                      (void *)ksp);
1152 1152                  return;
1153 1153          }
1154 1154  
1155 1155          if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1156 1156                  uint_t i;
1157 1157                  kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1158 1158  
1159 1159                  for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1160 1160                          if (knp->data_type == KSTAT_DATA_STRING) {
1161 1161                                  ksp->ks_flags |= KSTAT_FLAG_LONGSTRINGS;
1162 1162                                  break;
1163 1163                          }
1164 1164                  }
1165 1165                  /*
1166 1166                   * The default snapshot routine does not handle KSTAT_WRITE
1167 1167                   * for long strings.
1168 1168                   */
1169 1169                  if ((ksp->ks_flags & KSTAT_FLAG_LONGSTRINGS) &&
1170 1170                      (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1171 1171                      (ksp->ks_snapshot == default_kstat_snapshot)) {
1172 1172                          panic("kstat_install('%s', %d, '%s'): "
1173 1173                              "named kstat containing KSTAT_DATA_STRING "
1174 1174                              "is writable but uses default snapshot routine",
1175 1175                              ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1176 1176                  }
1177 1177          }
1178 1178  
1179 1179          if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1180 1180  
1181 1181                  /*
1182 1182                   * We are reactivating a dormant kstat.  Initialize the
1183 1183                   * caller's underlying data to the value it had when the
1184 1184                   * kstat went dormant, and mark the kstat as active.
1185 1185                   * Grab the provider's kstat lock if it's not already held.
1186 1186                   */
1187 1187                  kmutex_t *lp = ksp->ks_lock;
1188 1188                  if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1189 1189                          mutex_enter(lp);
1190 1190                          (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1191 1191                          mutex_exit(lp);
1192 1192                  } else {
1193 1193                          (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1194 1194                  }
1195 1195                  ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1196 1196          }
1197 1197  
1198 1198          /*
1199 1199           * Now that the kstat is active, make it visible to the kstat driver.
1200 1200           * When copying out kstats the count is determined in
1201 1201           * header_kstat_update() and actually copied into kbuf in
1202 1202           * header_kstat_snapshot(). kstat_chain_lock is held across the two
1203 1203           * calls to ensure that this list doesn't change. Thus, we need to
1204 1204           * also take the lock to ensure that the we don't copy the new kstat
1205 1205           * in the 2nd pass and overrun the buf.
1206 1206           */
1207 1207          mutex_enter(&kstat_chain_lock);
1208 1208          ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1209 1209          mutex_exit(&kstat_chain_lock);
1210 1210          kstat_rele(ksp);
1211 1211  }
1212 1212  
1213 1213  /*
1214 1214   * Remove a kstat from the system.  Or, if it's a persistent kstat,
1215 1215   * just update the data and mark it as dormant.
1216 1216   */
1217 1217  void
1218 1218  kstat_delete(kstat_t *ksp)
1219 1219  {
1220 1220          kmutex_t *lp;
1221 1221          ekstat_t *e = (ekstat_t *)ksp;
1222 1222          zoneid_t zoneid;
1223 1223          kstat_zone_t *kz;
1224 1224  
1225 1225          ASSERT(ksp != NULL);
1226 1226  
1227 1227          if (ksp == NULL)
1228 1228                  return;
1229 1229  
1230 1230          zoneid = e->e_zone.zoneid;
1231 1231  
1232 1232          lp = ksp->ks_lock;
1233 1233  
1234 1234          if (lp != NULL && MUTEX_HELD(lp)) {
1235 1235                  panic("kstat_delete(%p): caller holds data lock %p",
1236 1236                      (void *)ksp, (void *)lp);
1237 1237          }
1238 1238  
1239 1239          if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1240 1240                  cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1241 1241                      (void *)ksp);
1242 1242                  return;
1243 1243          }
1244 1244  
1245 1245          if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1246 1246                  /*
1247 1247                   * Update the data one last time, so that all activity
1248 1248                   * prior to going dormant has been accounted for.
1249 1249                   */
1250 1250                  KSTAT_ENTER(ksp);
1251 1251                  (void) KSTAT_UPDATE(ksp, KSTAT_READ);
1252 1252                  KSTAT_EXIT(ksp);
1253 1253  
1254 1254                  /*
1255 1255                   * Mark the kstat as dormant and restore caller-modifiable
1256 1256                   * fields to default values, so the kstat is readable during
1257 1257                   * the dormant phase.
1258 1258                   */
1259 1259                  ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1260 1260                  ksp->ks_lock = NULL;
1261 1261                  ksp->ks_update = default_kstat_update;
1262 1262                  ksp->ks_private = NULL;
1263 1263                  ksp->ks_snapshot = default_kstat_snapshot;
1264 1264                  kstat_rele(ksp);
1265 1265                  return;
1266 1266          }
1267 1267  
1268 1268          /*
1269 1269           * Remove the kstat from the framework's AVL trees,
1270 1270           * free the allocated memory, and increment kstat_chain_id so
1271 1271           * /dev/kstat clients can detect the event.
1272 1272           */
1273 1273          mutex_enter(&kstat_chain_lock);
1274 1274          avl_remove(&kstat_avl_bykid, e);
1275 1275          avl_remove(&kstat_avl_byname, e);
1276 1276          kstat_chain_id++;
1277 1277          mutex_exit(&kstat_chain_lock);
1278 1278  
1279 1279          kz = e->e_zone.next;
1280 1280          while (kz != NULL) {
1281 1281                  kstat_zone_t *t = kz;
1282 1282  
1283 1283                  kz = kz->next;
1284 1284                  kmem_free(t, sizeof (*t));
1285 1285          }
1286 1286          kstat_rele(ksp);
1287 1287          kstat_free(e);
1288 1288  }
1289 1289  
1290 1290  void
1291 1291  kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1292 1292      const char *ks_name, zoneid_t ks_zoneid)
1293 1293  {
1294 1294          kstat_t *ksp;
1295 1295  
1296 1296          ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1297 1297          if (ksp != NULL) {
1298 1298                  kstat_rele(ksp);
  
    | 
      ↓ open down ↓ | 
    1298 lines elided | 
    
      ↑ open up ↑ | 
  
1299 1299                  kstat_delete(ksp);
1300 1300          }
1301 1301  }
1302 1302  
1303 1303  void
1304 1304  kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1305 1305  {
1306 1306          kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1307 1307  }
1308 1308  
1309      -/*
1310      - * The sparc V9 versions of these routines can be much cheaper than
1311      - * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1312      - * For simplicity, however, we always feed the C versions to lint.
1313      - */
1314      -#if !defined(__sparc) || defined(lint) || defined(__lint)
1315      -
1316 1309  void
1317      -kstat_waitq_enter(kstat_io_t *kiop)
     1310 +kstat_waitq_enter_time(kstat_io_t *kiop, const hrtime_t new)
1318 1311  {
1319      -        hrtime_t new, delta;
     1312 +        hrtime_t delta;
1320 1313          ulong_t wcnt;
1321 1314  
1322      -        new = gethrtime_unscaled();
     1315 +        ASSERT(kiop != NULL);
1323 1316          delta = new - kiop->wlastupdate;
1324 1317          kiop->wlastupdate = new;
1325 1318          wcnt = kiop->wcnt++;
1326 1319          if (wcnt != 0) {
1327 1320                  kiop->wlentime += delta * wcnt;
1328 1321                  kiop->wtime += delta;
1329 1322          }
1330 1323  }
1331 1324  
1332 1325  void
1333      -kstat_waitq_exit(kstat_io_t *kiop)
     1326 +kstat_waitq_exit_time(kstat_io_t *kiop, const hrtime_t new)
1334 1327  {
1335      -        hrtime_t new, delta;
     1328 +        hrtime_t delta;
1336 1329          ulong_t wcnt;
1337 1330  
1338      -        new = gethrtime_unscaled();
     1331 +        ASSERT(kiop != NULL);
1339 1332          delta = new - kiop->wlastupdate;
1340 1333          kiop->wlastupdate = new;
1341 1334          wcnt = kiop->wcnt--;
1342 1335          ASSERT((int)wcnt > 0);
1343 1336          kiop->wlentime += delta * wcnt;
1344 1337          kiop->wtime += delta;
1345 1338  }
1346 1339  
1347 1340  void
1348      -kstat_runq_enter(kstat_io_t *kiop)
     1341 +kstat_runq_enter_time(kstat_io_t *kiop, const hrtime_t new)
1349 1342  {
1350      -        hrtime_t new, delta;
     1343 +        hrtime_t delta;
1351 1344          ulong_t rcnt;
1352 1345  
1353      -        new = gethrtime_unscaled();
     1346 +        ASSERT(kiop != NULL);
1354 1347          delta = new - kiop->rlastupdate;
1355 1348          kiop->rlastupdate = new;
1356 1349          rcnt = kiop->rcnt++;
1357 1350          if (rcnt != 0) {
1358 1351                  kiop->rlentime += delta * rcnt;
1359 1352                  kiop->rtime += delta;
1360 1353          }
1361 1354  }
1362 1355  
1363 1356  void
1364      -kstat_runq_exit(kstat_io_t *kiop)
     1357 +kstat_runq_exit_time(kstat_io_t *kiop, const hrtime_t new)
1365 1358  {
1366      -        hrtime_t new, delta;
     1359 +        hrtime_t delta;
1367 1360          ulong_t rcnt;
1368 1361  
1369      -        new = gethrtime_unscaled();
     1362 +        ASSERT(kiop != NULL);
1370 1363          delta = new - kiop->rlastupdate;
1371 1364          kiop->rlastupdate = new;
1372 1365          rcnt = kiop->rcnt--;
1373 1366          ASSERT((int)rcnt > 0);
1374 1367          kiop->rlentime += delta * rcnt;
1375 1368          kiop->rtime += delta;
1376 1369  }
1377 1370  
     1371 +/*
     1372 + * The sparc V9 versions of these routines can be much cheaper than
     1373 + * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
     1374 + * For simplicity, however, we always feed the C versions to lint.
     1375 + */
     1376 +#if !defined(__sparc) || defined(lint) || defined(__lint)
     1377 +
1378 1378  void
1379      -kstat_waitq_to_runq(kstat_io_t *kiop)
     1379 +kstat_waitq_enter(kstat_io_t *kiop)
1380 1380  {
1381      -        hrtime_t new, delta;
1382      -        ulong_t wcnt, rcnt;
     1381 +        kstat_waitq_enter_time(kiop, gethrtime_unscaled());
     1382 +}
1383 1383  
1384      -        new = gethrtime_unscaled();
     1384 +void
     1385 +kstat_waitq_exit(kstat_io_t *kiop)
     1386 +{
     1387 +        kstat_waitq_exit_time(kiop, gethrtime_unscaled());
     1388 +}
1385 1389  
1386      -        delta = new - kiop->wlastupdate;
1387      -        kiop->wlastupdate = new;
1388      -        wcnt = kiop->wcnt--;
1389      -        ASSERT((int)wcnt > 0);
1390      -        kiop->wlentime += delta * wcnt;
1391      -        kiop->wtime += delta;
     1390 +void
     1391 +kstat_runq_enter(kstat_io_t *kiop)
     1392 +{
     1393 +        kstat_runq_enter_time(kiop, gethrtime_unscaled());
     1394 +}
1392 1395  
1393      -        delta = new - kiop->rlastupdate;
1394      -        kiop->rlastupdate = new;
1395      -        rcnt = kiop->rcnt++;
1396      -        if (rcnt != 0) {
1397      -                kiop->rlentime += delta * rcnt;
1398      -                kiop->rtime += delta;
1399      -        }
     1396 +void
     1397 +kstat_runq_exit(kstat_io_t *kiop)
     1398 +{
     1399 +        kstat_runq_exit_time(kiop, gethrtime_unscaled());
1400 1400  }
1401 1401  
1402 1402  void
1403      -kstat_runq_back_to_waitq(kstat_io_t *kiop)
     1403 +kstat_waitq_to_runq(kstat_io_t *kiop)
1404 1404  {
1405      -        hrtime_t new, delta;
1406      -        ulong_t wcnt, rcnt;
     1405 +        hrtime_t new = gethrtime_unscaled();
     1406 +        ASSERT(kiop != NULL);
     1407 +        kstat_waitq_exit_time(kiop, new);
     1408 +        kstat_runq_enter_time(kiop, new);
     1409 +}
1407 1410  
1408      -        new = gethrtime_unscaled();
1409      -
1410      -        delta = new - kiop->rlastupdate;
1411      -        kiop->rlastupdate = new;
1412      -        rcnt = kiop->rcnt--;
1413      -        ASSERT((int)rcnt > 0);
1414      -        kiop->rlentime += delta * rcnt;
1415      -        kiop->rtime += delta;
1416      -
1417      -        delta = new - kiop->wlastupdate;
1418      -        kiop->wlastupdate = new;
1419      -        wcnt = kiop->wcnt++;
1420      -        if (wcnt != 0) {
1421      -                kiop->wlentime += delta * wcnt;
1422      -                kiop->wtime += delta;
1423      -        }
     1411 +void
     1412 +kstat_runq_back_to_waitq(kstat_io_t *kiop)
     1413 +{
     1414 +        hrtime_t new = gethrtime_unscaled();
     1415 +        ASSERT(kiop != NULL);
     1416 +        kstat_runq_exit_time(kiop, new);
     1417 +        kstat_waitq_enter_time(kiop, new);
1424 1418  }
1425 1419  
1426 1420  #endif
1427 1421  
1428 1422  void
1429 1423  kstat_timer_start(kstat_timer_t *ktp)
1430 1424  {
1431 1425          ktp->start_time = gethrtime();
1432 1426  }
1433 1427  
1434 1428  void
1435 1429  kstat_timer_stop(kstat_timer_t *ktp)
1436 1430  {
1437 1431          hrtime_t        etime;
1438 1432          u_longlong_t    num_events;
1439 1433  
1440 1434          ktp->stop_time = etime = gethrtime();
1441 1435          etime -= ktp->start_time;
1442 1436          num_events = ktp->num_events;
1443 1437          if (etime < ktp->min_time || num_events == 0)
1444 1438                  ktp->min_time = etime;
1445 1439          if (etime > ktp->max_time)
1446 1440                  ktp->max_time = etime;
1447 1441          ktp->elapsed_time += etime;
1448 1442          ktp->num_events = num_events + 1;
1449 1443  }
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX