Print this page
NEX-13937 Improve kstat performance
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Evan Layton <evan.layton@nexenta.com>
NEX-4425 support KSTAT_DATA_STRING in non-virtual named kstats
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/kstat_fr.c
+++ new/usr/src/uts/common/os/kstat_fr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright 2014, Joyent, Inc. All rights reserved.
24 24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 25 */
26 26
27 27 /*
28 28 * Kernel statistics framework
29 29 */
30 30
31 31 #include <sys/types.h>
32 32 #include <sys/time.h>
33 33 #include <sys/systm.h>
34 34 #include <sys/vmsystm.h>
35 35 #include <sys/t_lock.h>
36 36 #include <sys/param.h>
37 37 #include <sys/errno.h>
38 38 #include <sys/vmem.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/cmn_err.h>
41 41 #include <sys/kstat.h>
42 42 #include <sys/sysinfo.h>
43 43 #include <sys/cpuvar.h>
44 44 #include <sys/fcntl.h>
45 45 #include <sys/flock.h>
46 46 #include <sys/vnode.h>
47 47 #include <sys/vfs.h>
48 48 #include <sys/dnlc.h>
49 49 #include <sys/var.h>
50 50 #include <sys/debug.h>
51 51 #include <sys/kobj.h>
52 52 #include <sys/avl.h>
53 53 #include <sys/pool_pset.h>
54 54 #include <sys/cpupart.h>
55 55 #include <sys/zone.h>
56 56 #include <sys/loadavg.h>
57 57 #include <vm/page.h>
58 58 #include <vm/anon.h>
59 59 #include <vm/seg_kmem.h>
60 60
61 61 /*
62 62 * Global lock to protect the AVL trees and kstat_chain_id.
63 63 */
64 64 static kmutex_t kstat_chain_lock;
65 65
66 66 /*
67 67 * Every install/delete kstat bumps kstat_chain_id. This is used by:
68 68 *
69 69 * (1) /dev/kstat, to detect changes in the kstat chain across ioctls;
70 70 *
71 71 * (2) kstat_create(), to assign a KID (kstat ID) to each new kstat.
72 72 * /dev/kstat uses the KID as a cookie for kstat lookups.
73 73 *
74 74 * We reserve the first two IDs because some kstats are created before
75 75 * the well-known ones (kstat_headers = 0, kstat_types = 1).
76 76 *
77 77 * We also bump the kstat_chain_id if a zone is gaining or losing visibility
78 78 * into a particular kstat, which is logically equivalent to a kstat being
79 79 * installed/deleted.
80 80 */
81 81
82 82 kid_t kstat_chain_id = 2;
83 83
84 84 /*
85 85 * As far as zones are concerned, there are 3 types of kstat:
86 86 *
87 87 * 1) Those which have a well-known name, and which should return per-zone data
88 88 * depending on which zone is doing the kstat_read(). sockfs:0:sock_unix_list
89 89 * is an example of this type of kstat.
90 90 *
91 91 * 2) Those which should only be exported to a particular list of zones.
92 92 * For example, in the case of nfs:*:mntinfo, we don't want zone A to be
93 93 * able to see NFS mounts associated with zone B, while we want the
94 94 * global zone to be able to see all mounts on the system.
95 95 *
96 96 * 3) Those that can be exported to all zones. Most system-related
97 97 * kstats fall within this category.
98 98 *
99 99 * An ekstat_t thus contains a list of kstats that the zone is to be
100 100 * exported to. The lookup of a name:instance:module thus translates to a
101 101 * lookup of name:instance:module:myzone; if the kstat is not exported
102 102 * to all zones, and does not have the caller's zoneid explicitly
103 103 * enumerated in the list of zones to be exported to, it is the same as
104 104 * if the kstat didn't exist.
105 105 *
106 106 * Writing to kstats is currently disallowed from within a non-global
107 107 * zone, although this restriction could be removed in the future.
108 108 */
109 109 typedef struct kstat_zone {
110 110 zoneid_t zoneid;
111 111 struct kstat_zone *next;
112 112 } kstat_zone_t;
113 113
114 114 /*
115 115 * Extended kstat structure -- for internal use only.
116 116 */
117 117 typedef struct ekstat {
118 118 kstat_t e_ks; /* the kstat itself */
119 119 size_t e_size; /* total allocation size */
120 120 kthread_t *e_owner; /* thread holding this kstat */
121 121 kcondvar_t e_cv; /* wait for owner == NULL */
122 122 avl_node_t e_avl_bykid; /* AVL tree to sort by KID */
123 123 avl_node_t e_avl_byname; /* AVL tree to sort by name */
124 124 kstat_zone_t e_zone; /* zone to export stats to */
125 125 } ekstat_t;
126 126
127 127 static uint64_t kstat_initial[8192];
128 128 static void *kstat_initial_ptr = kstat_initial;
129 129 static size_t kstat_initial_avail = sizeof (kstat_initial);
130 130 static vmem_t *kstat_arena;
131 131
132 132 #define KSTAT_ALIGN (sizeof (uint64_t))
133 133
134 134 static avl_tree_t kstat_avl_bykid;
135 135 static avl_tree_t kstat_avl_byname;
136 136
137 137 /*
138 138 * Various pointers we need to create kstats at boot time in kstat_init()
139 139 */
140 140 extern kstat_named_t *segmapcnt_ptr;
141 141 extern uint_t segmapcnt_ndata;
142 142 extern int segmap_kstat_update(kstat_t *, int);
143 143 extern kstat_named_t *biostats_ptr;
144 144 extern uint_t biostats_ndata;
145 145 extern kstat_named_t *pollstats_ptr;
146 146 extern uint_t pollstats_ndata;
147 147
148 148 extern int vac;
149 149 extern uint_t nproc;
150 150 extern time_t boot_time;
151 151 extern sysinfo_t sysinfo;
152 152 extern vminfo_t vminfo;
153 153
154 154 struct {
155 155 kstat_named_t ncpus;
156 156 kstat_named_t lbolt;
157 157 kstat_named_t deficit;
158 158 kstat_named_t clk_intr;
159 159 kstat_named_t vac;
160 160 kstat_named_t nproc;
161 161 kstat_named_t avenrun_1min;
162 162 kstat_named_t avenrun_5min;
163 163 kstat_named_t avenrun_15min;
164 164 kstat_named_t boot_time;
165 165 kstat_named_t nsec_per_tick;
166 166 } system_misc_kstat = {
167 167 { "ncpus", KSTAT_DATA_UINT32 },
168 168 { "lbolt", KSTAT_DATA_UINT32 },
169 169 { "deficit", KSTAT_DATA_UINT32 },
170 170 { "clk_intr", KSTAT_DATA_UINT32 },
171 171 { "vac", KSTAT_DATA_UINT32 },
172 172 { "nproc", KSTAT_DATA_UINT32 },
173 173 { "avenrun_1min", KSTAT_DATA_UINT32 },
174 174 { "avenrun_5min", KSTAT_DATA_UINT32 },
175 175 { "avenrun_15min", KSTAT_DATA_UINT32 },
176 176 { "boot_time", KSTAT_DATA_UINT32 },
177 177 { "nsec_per_tick", KSTAT_DATA_UINT32 },
178 178 };
179 179
180 180 struct {
181 181 kstat_named_t physmem;
182 182 kstat_named_t nalloc;
183 183 kstat_named_t nfree;
184 184 kstat_named_t nalloc_calls;
185 185 kstat_named_t nfree_calls;
186 186 kstat_named_t kernelbase;
187 187 kstat_named_t econtig;
188 188 kstat_named_t freemem;
189 189 kstat_named_t availrmem;
190 190 kstat_named_t lotsfree;
191 191 kstat_named_t desfree;
192 192 kstat_named_t minfree;
193 193 kstat_named_t fastscan;
194 194 kstat_named_t slowscan;
195 195 kstat_named_t nscan;
196 196 kstat_named_t desscan;
197 197 kstat_named_t pp_kernel;
198 198 kstat_named_t pagesfree;
199 199 kstat_named_t pageslocked;
200 200 kstat_named_t pagestotal;
201 201 } system_pages_kstat = {
202 202 { "physmem", KSTAT_DATA_ULONG },
203 203 { "nalloc", KSTAT_DATA_ULONG },
204 204 { "nfree", KSTAT_DATA_ULONG },
205 205 { "nalloc_calls", KSTAT_DATA_ULONG },
206 206 { "nfree_calls", KSTAT_DATA_ULONG },
207 207 { "kernelbase", KSTAT_DATA_ULONG },
208 208 { "econtig", KSTAT_DATA_ULONG },
209 209 { "freemem", KSTAT_DATA_ULONG },
210 210 { "availrmem", KSTAT_DATA_ULONG },
211 211 { "lotsfree", KSTAT_DATA_ULONG },
212 212 { "desfree", KSTAT_DATA_ULONG },
213 213 { "minfree", KSTAT_DATA_ULONG },
214 214 { "fastscan", KSTAT_DATA_ULONG },
215 215 { "slowscan", KSTAT_DATA_ULONG },
216 216 { "nscan", KSTAT_DATA_ULONG },
217 217 { "desscan", KSTAT_DATA_ULONG },
218 218 { "pp_kernel", KSTAT_DATA_ULONG },
219 219 { "pagesfree", KSTAT_DATA_ULONG },
220 220 { "pageslocked", KSTAT_DATA_ULONG },
221 221 { "pagestotal", KSTAT_DATA_ULONG },
222 222 };
223 223
224 224 static int header_kstat_update(kstat_t *, int);
225 225 static int header_kstat_snapshot(kstat_t *, void *, int);
226 226 static int system_misc_kstat_update(kstat_t *, int);
227 227 static int system_pages_kstat_update(kstat_t *, int);
228 228
229 229 static struct {
230 230 char name[KSTAT_STRLEN];
231 231 size_t size;
232 232 uint_t min_ndata;
233 233 uint_t max_ndata;
234 234 } kstat_data_type[KSTAT_NUM_TYPES] = {
235 235 { "raw", 1, 0, INT_MAX },
236 236 { "name=value", sizeof (kstat_named_t), 0, INT_MAX },
237 237 { "interrupt", sizeof (kstat_intr_t), 1, 1 },
238 238 { "i/o", sizeof (kstat_io_t), 1, 1 },
239 239 { "event_timer", sizeof (kstat_timer_t), 0, INT_MAX },
240 240 };
241 241
242 242 int
243 243 kstat_zone_find(kstat_t *k, zoneid_t zoneid)
244 244 {
245 245 ekstat_t *e = (ekstat_t *)k;
246 246 kstat_zone_t *kz;
247 247
248 248 ASSERT(MUTEX_HELD(&kstat_chain_lock));
249 249 for (kz = &e->e_zone; kz != NULL; kz = kz->next) {
250 250 if (zoneid == ALL_ZONES || kz->zoneid == ALL_ZONES)
251 251 return (1);
252 252 if (zoneid == kz->zoneid)
253 253 return (1);
254 254 }
255 255 return (0);
256 256 }
257 257
258 258 void
259 259 kstat_zone_remove(kstat_t *k, zoneid_t zoneid)
260 260 {
261 261 ekstat_t *e = (ekstat_t *)k;
262 262 kstat_zone_t *kz, *t = NULL;
263 263
264 264 mutex_enter(&kstat_chain_lock);
265 265 if (zoneid == e->e_zone.zoneid) {
266 266 kz = e->e_zone.next;
267 267 ASSERT(kz != NULL);
268 268 e->e_zone.zoneid = kz->zoneid;
269 269 e->e_zone.next = kz->next;
270 270 goto out;
271 271 }
272 272 for (kz = &e->e_zone; kz->next != NULL; kz = kz->next) {
273 273 if (kz->next->zoneid == zoneid) {
274 274 t = kz->next;
275 275 kz->next = t->next;
276 276 break;
277 277 }
278 278 }
279 279 ASSERT(t != NULL); /* we removed something */
280 280 kz = t;
281 281 out:
282 282 kstat_chain_id++;
283 283 mutex_exit(&kstat_chain_lock);
284 284 kmem_free(kz, sizeof (*kz));
285 285 }
286 286
287 287 void
288 288 kstat_zone_add(kstat_t *k, zoneid_t zoneid)
289 289 {
290 290 ekstat_t *e = (ekstat_t *)k;
291 291 kstat_zone_t *kz;
292 292
293 293 kz = kmem_alloc(sizeof (*kz), KM_NOSLEEP);
294 294 if (kz == NULL)
295 295 return;
296 296 mutex_enter(&kstat_chain_lock);
297 297 kz->zoneid = zoneid;
298 298 kz->next = e->e_zone.next;
299 299 e->e_zone.next = kz;
300 300 kstat_chain_id++;
301 301 mutex_exit(&kstat_chain_lock);
302 302 }
303 303
304 304 /*
305 305 * Compare the list of zones for the given kstats, returning 0 if they match
306 306 * (ie, one list contains ALL_ZONES or both lists contain the same zoneid).
307 307 * In practice, this is called indirectly by kstat_hold_byname(), so one of the
308 308 * two lists always has one element, and this is an O(n) operation rather than
309 309 * O(n^2).
310 310 */
311 311 static int
312 312 kstat_zone_compare(ekstat_t *e1, ekstat_t *e2)
313 313 {
314 314 kstat_zone_t *kz1, *kz2;
315 315
316 316 ASSERT(MUTEX_HELD(&kstat_chain_lock));
317 317 for (kz1 = &e1->e_zone; kz1 != NULL; kz1 = kz1->next) {
318 318 for (kz2 = &e2->e_zone; kz2 != NULL; kz2 = kz2->next) {
319 319 if (kz1->zoneid == ALL_ZONES ||
320 320 kz2->zoneid == ALL_ZONES)
321 321 return (0);
322 322 if (kz1->zoneid == kz2->zoneid)
323 323 return (0);
324 324 }
325 325 }
326 326 return (e1->e_zone.zoneid < e2->e_zone.zoneid ? -1 : 1);
327 327 }
328 328
329 329 /*
330 330 * Support for keeping kstats sorted in AVL trees for fast lookups.
331 331 */
332 332 static int
333 333 kstat_compare_bykid(const void *a1, const void *a2)
334 334 {
335 335 const kstat_t *k1 = a1;
336 336 const kstat_t *k2 = a2;
337 337
338 338 if (k1->ks_kid < k2->ks_kid)
339 339 return (-1);
340 340 if (k1->ks_kid > k2->ks_kid)
341 341 return (1);
342 342 return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
343 343 }
344 344
345 345 static int
346 346 kstat_compare_byname(const void *a1, const void *a2)
347 347 {
348 348 const kstat_t *k1 = a1;
349 349 const kstat_t *k2 = a2;
350 350 int s;
351 351
352 352 s = strcmp(k1->ks_module, k2->ks_module);
353 353 if (s > 0)
354 354 return (1);
355 355 if (s < 0)
356 356 return (-1);
357 357
358 358 if (k1->ks_instance < k2->ks_instance)
359 359 return (-1);
360 360 if (k1->ks_instance > k2->ks_instance)
361 361 return (1);
362 362
363 363 s = strcmp(k1->ks_name, k2->ks_name);
364 364 if (s > 0)
365 365 return (1);
366 366 if (s < 0)
367 367 return (-1);
368 368
369 369 return (kstat_zone_compare((ekstat_t *)k1, (ekstat_t *)k2));
370 370 }
371 371
372 372 static kstat_t *
373 373 kstat_hold(avl_tree_t *t, ekstat_t *template)
374 374 {
375 375 kstat_t *ksp;
376 376 ekstat_t *e;
377 377
378 378 mutex_enter(&kstat_chain_lock);
379 379 for (;;) {
380 380 ksp = avl_find(t, template, NULL);
381 381 if (ksp == NULL)
382 382 break;
383 383 e = (ekstat_t *)ksp;
384 384 if (e->e_owner == NULL) {
385 385 e->e_owner = curthread;
386 386 break;
387 387 }
388 388 cv_wait(&e->e_cv, &kstat_chain_lock);
389 389 }
390 390 mutex_exit(&kstat_chain_lock);
391 391 return (ksp);
392 392 }
393 393
394 394 void
395 395 kstat_rele(kstat_t *ksp)
396 396 {
397 397 ekstat_t *e = (ekstat_t *)ksp;
398 398
399 399 mutex_enter(&kstat_chain_lock);
400 400 ASSERT(e->e_owner == curthread);
401 401 e->e_owner = NULL;
402 402 cv_broadcast(&e->e_cv);
403 403 mutex_exit(&kstat_chain_lock);
404 404 }
405 405
406 406 kstat_t *
407 407 kstat_hold_bykid(kid_t kid, zoneid_t zoneid)
408 408 {
409 409 ekstat_t e;
410 410
411 411 e.e_ks.ks_kid = kid;
412 412 e.e_zone.zoneid = zoneid;
413 413 e.e_zone.next = NULL;
414 414
415 415 return (kstat_hold(&kstat_avl_bykid, &e));
416 416 }
417 417
418 418 kstat_t *
419 419 kstat_hold_byname(const char *ks_module, int ks_instance, const char *ks_name,
420 420 zoneid_t ks_zoneid)
421 421 {
422 422 ekstat_t e;
423 423
424 424 kstat_set_string(e.e_ks.ks_module, ks_module);
425 425 e.e_ks.ks_instance = ks_instance;
426 426 kstat_set_string(e.e_ks.ks_name, ks_name);
427 427 e.e_zone.zoneid = ks_zoneid;
428 428 e.e_zone.next = NULL;
429 429 return (kstat_hold(&kstat_avl_byname, &e));
430 430 }
431 431
432 432 static ekstat_t *
433 433 kstat_alloc(size_t size)
434 434 {
435 435 ekstat_t *e = NULL;
436 436
437 437 size = P2ROUNDUP(sizeof (ekstat_t) + size, KSTAT_ALIGN);
438 438
439 439 if (kstat_arena == NULL) {
440 440 if (size <= kstat_initial_avail) {
441 441 e = kstat_initial_ptr;
442 442 kstat_initial_ptr = (char *)kstat_initial_ptr + size;
443 443 kstat_initial_avail -= size;
444 444 }
445 445 } else {
446 446 e = vmem_alloc(kstat_arena, size, VM_NOSLEEP);
447 447 }
448 448
449 449 if (e != NULL) {
450 450 bzero(e, size);
451 451 e->e_size = size;
452 452 cv_init(&e->e_cv, NULL, CV_DEFAULT, NULL);
453 453 }
454 454
455 455 return (e);
456 456 }
457 457
458 458 static void
459 459 kstat_free(ekstat_t *e)
460 460 {
461 461 cv_destroy(&e->e_cv);
462 462 vmem_free(kstat_arena, e, e->e_size);
463 463 }
464 464
465 465 /*
466 466 * Create various system kstats.
467 467 */
468 468 void
469 469 kstat_init(void)
470 470 {
471 471 kstat_t *ksp;
472 472 ekstat_t *e;
473 473 avl_tree_t *t = &kstat_avl_bykid;
474 474
475 475 /*
476 476 * Set up the kstat vmem arena.
477 477 */
478 478 kstat_arena = vmem_create("kstat",
479 479 kstat_initial, sizeof (kstat_initial), KSTAT_ALIGN,
480 480 segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP);
481 481
482 482 /*
483 483 * Make initial kstats appear as though they were allocated.
484 484 */
485 485 for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER))
486 486 (void) vmem_xalloc(kstat_arena, e->e_size, KSTAT_ALIGN,
487 487 0, 0, e, (char *)e + e->e_size,
488 488 VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
489 489
490 490 /*
491 491 * The mother of all kstats. The first kstat in the system, which
492 492 * always has KID 0, has the headers for all kstats (including itself)
493 493 * as its data. Thus, the kstat driver does not need any special
494 494 * interface to extract the kstat chain.
495 495 */
496 496 kstat_chain_id = 0;
497 497 ksp = kstat_create("unix", 0, "kstat_headers", "kstat", KSTAT_TYPE_RAW,
498 498 0, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
499 499 if (ksp) {
500 500 ksp->ks_lock = &kstat_chain_lock;
501 501 ksp->ks_update = header_kstat_update;
502 502 ksp->ks_snapshot = header_kstat_snapshot;
503 503 kstat_install(ksp);
504 504 } else {
505 505 panic("cannot create kstat 'kstat_headers'");
506 506 }
507 507
508 508 ksp = kstat_create("unix", 0, "kstat_types", "kstat",
509 509 KSTAT_TYPE_NAMED, KSTAT_NUM_TYPES, 0);
510 510 if (ksp) {
511 511 int i;
512 512 kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
513 513
514 514 for (i = 0; i < KSTAT_NUM_TYPES; i++) {
515 515 kstat_named_init(&kn[i], kstat_data_type[i].name,
516 516 KSTAT_DATA_ULONG);
517 517 kn[i].value.ul = i;
518 518 }
519 519 kstat_install(ksp);
520 520 }
521 521
522 522 ksp = kstat_create("unix", 0, "sysinfo", "misc", KSTAT_TYPE_RAW,
523 523 sizeof (sysinfo_t), KSTAT_FLAG_VIRTUAL);
524 524 if (ksp) {
525 525 ksp->ks_data = (void *) &sysinfo;
526 526 kstat_install(ksp);
527 527 }
528 528
529 529 ksp = kstat_create("unix", 0, "vminfo", "vm", KSTAT_TYPE_RAW,
530 530 sizeof (vminfo_t), KSTAT_FLAG_VIRTUAL);
531 531 if (ksp) {
532 532 ksp->ks_data = (void *) &vminfo;
533 533 kstat_install(ksp);
534 534 }
535 535
536 536 ksp = kstat_create("unix", 0, "segmap", "vm", KSTAT_TYPE_NAMED,
537 537 segmapcnt_ndata, KSTAT_FLAG_VIRTUAL);
538 538 if (ksp) {
539 539 ksp->ks_data = (void *) segmapcnt_ptr;
540 540 ksp->ks_update = segmap_kstat_update;
541 541 kstat_install(ksp);
542 542 }
543 543
544 544 ksp = kstat_create("unix", 0, "biostats", "misc", KSTAT_TYPE_NAMED,
545 545 biostats_ndata, KSTAT_FLAG_VIRTUAL);
546 546 if (ksp) {
547 547 ksp->ks_data = (void *) biostats_ptr;
548 548 kstat_install(ksp);
549 549 }
550 550
551 551 ksp = kstat_create("unix", 0, "var", "misc", KSTAT_TYPE_RAW,
552 552 sizeof (struct var), KSTAT_FLAG_VIRTUAL);
553 553 if (ksp) {
554 554 ksp->ks_data = (void *) &v;
555 555 kstat_install(ksp);
556 556 }
557 557
558 558 ksp = kstat_create("unix", 0, "system_misc", "misc", KSTAT_TYPE_NAMED,
559 559 sizeof (system_misc_kstat) / sizeof (kstat_named_t),
560 560 KSTAT_FLAG_VIRTUAL);
561 561 if (ksp) {
562 562 ksp->ks_data = (void *) &system_misc_kstat;
563 563 ksp->ks_update = system_misc_kstat_update;
564 564 kstat_install(ksp);
565 565 }
566 566
567 567 ksp = kstat_create("unix", 0, "system_pages", "pages", KSTAT_TYPE_NAMED,
568 568 sizeof (system_pages_kstat) / sizeof (kstat_named_t),
569 569 KSTAT_FLAG_VIRTUAL);
570 570 if (ksp) {
571 571 ksp->ks_data = (void *) &system_pages_kstat;
572 572 ksp->ks_update = system_pages_kstat_update;
573 573 kstat_install(ksp);
574 574 }
575 575
576 576 ksp = kstat_create("poll", 0, "pollstats", "misc", KSTAT_TYPE_NAMED,
577 577 pollstats_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
578 578
579 579 if (ksp) {
580 580 ksp->ks_data = pollstats_ptr;
581 581 kstat_install(ksp);
582 582 }
583 583 }
584 584
585 585 /*
586 586 * Caller of this should ensure that the string pointed by src
587 587 * doesn't change while kstat's lock is held. Not doing so defeats
588 588 * kstat's snapshot strategy as explained in <sys/kstat.h>
589 589 */
590 590 void
591 591 kstat_named_setstr(kstat_named_t *knp, const char *src)
592 592 {
593 593 if (knp->data_type != KSTAT_DATA_STRING)
594 594 panic("kstat_named_setstr('%p', '%p'): "
595 595 "named kstat is not of type KSTAT_DATA_STRING",
596 596 (void *)knp, (void *)src);
597 597
598 598 KSTAT_NAMED_STR_PTR(knp) = (char *)src;
599 599 if (src != NULL)
600 600 KSTAT_NAMED_STR_BUFLEN(knp) = strlen(src) + 1;
601 601 else
602 602 KSTAT_NAMED_STR_BUFLEN(knp) = 0;
603 603 }
604 604
605 605 void
606 606 kstat_set_string(char *dst, const char *src)
607 607 {
608 608 bzero(dst, KSTAT_STRLEN);
609 609 (void) strncpy(dst, src, KSTAT_STRLEN - 1);
610 610 }
611 611
612 612 void
613 613 kstat_named_init(kstat_named_t *knp, const char *name, uchar_t data_type)
614 614 {
615 615 kstat_set_string(knp->name, name);
616 616 knp->data_type = data_type;
617 617
618 618 if (data_type == KSTAT_DATA_STRING)
619 619 kstat_named_setstr(knp, NULL);
620 620 }
621 621
622 622 void
623 623 kstat_timer_init(kstat_timer_t *ktp, const char *name)
624 624 {
625 625 kstat_set_string(ktp->name, name);
626 626 }
627 627
628 628 /* ARGSUSED */
629 629 static int
630 630 default_kstat_update(kstat_t *ksp, int rw)
631 631 {
632 632 uint_t i;
633 633 size_t len = 0;
634 634 kstat_named_t *knp;
635 635
636 636 /*
637 637 * Named kstats with variable-length long strings have a standard
638 638 * way of determining how much space is needed to hold the snapshot:
639 639 */
640 640 if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED &&
641 641 (ksp->ks_flags & (KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_LONGSTRINGS))) {
642 642
643 643 /*
644 644 * Add in the space required for the strings
645 645 */
646 646 knp = KSTAT_NAMED_PTR(ksp);
647 647 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
648 648 if (knp->data_type == KSTAT_DATA_STRING)
649 649 len += KSTAT_NAMED_STR_BUFLEN(knp);
650 650 }
651 651 ksp->ks_data_size =
652 652 ksp->ks_ndata * sizeof (kstat_named_t) + len;
653 653 }
654 654 return (0);
655 655 }
656 656
657 657 static int
658 658 default_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
659 659 {
660 660 kstat_io_t *kiop;
661 661 hrtime_t cur_time;
662 662 size_t namedsz;
663 663
664 664 ksp->ks_snaptime = cur_time = gethrtime();
665 665
666 666 if (rw == KSTAT_WRITE) {
667 667 if (!(ksp->ks_flags & KSTAT_FLAG_WRITABLE))
668 668 return (EACCES);
669 669 bcopy(buf, ksp->ks_data, ksp->ks_data_size);
670 670 return (0);
671 671 }
672 672
673 673 /*
674 674 * KSTAT_TYPE_NAMED kstats are defined to have ks_ndata
675 675 * number of kstat_named_t structures, followed by an optional
676 676 * string segment. The ks_data generally holds only the
677 677 * kstat_named_t structures. So we copy it first. The strings,
678 678 * if any, are copied below. For other kstat types, ks_data holds the
679 679 * entire buffer.
680 680 */
681 681
682 682 namedsz = sizeof (kstat_named_t) * ksp->ks_ndata;
683 683 if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data_size > namedsz)
684 684 bcopy(ksp->ks_data, buf, namedsz);
685 685 else
686 686 bcopy(ksp->ks_data, buf, ksp->ks_data_size);
687 687
688 688 /*
689 689 * Apply kstat type-specific data massaging
690 690 */
691 691 switch (ksp->ks_type) {
692 692
693 693 case KSTAT_TYPE_IO:
694 694 /*
695 695 * Normalize time units and deal with incomplete transactions
696 696 */
697 697 kiop = (kstat_io_t *)buf;
698 698
699 699 scalehrtime(&kiop->wtime);
700 700 scalehrtime(&kiop->wlentime);
701 701 scalehrtime(&kiop->wlastupdate);
702 702 scalehrtime(&kiop->rtime);
703 703 scalehrtime(&kiop->rlentime);
704 704 scalehrtime(&kiop->rlastupdate);
705 705
706 706 if (kiop->wcnt != 0) {
707 707 /* like kstat_waitq_exit */
708 708 hrtime_t wfix = cur_time - kiop->wlastupdate;
709 709 kiop->wlastupdate = cur_time;
710 710 kiop->wlentime += kiop->wcnt * wfix;
711 711 kiop->wtime += wfix;
712 712 }
713 713
714 714 if (kiop->rcnt != 0) {
715 715 /* like kstat_runq_exit */
716 716 hrtime_t rfix = cur_time - kiop->rlastupdate;
717 717 kiop->rlastupdate = cur_time;
718 718 kiop->rlentime += kiop->rcnt * rfix;
719 719 kiop->rtime += rfix;
720 720 }
721 721 break;
722 722
723 723 case KSTAT_TYPE_NAMED:
724 724 /*
725 725 * Massage any long strings in at the end of the buffer
726 726 */
727 727 if (ksp->ks_data_size > namedsz) {
728 728 uint_t i;
729 729 kstat_named_t *knp = buf;
730 730 char *dst = (char *)(knp + ksp->ks_ndata);
731 731 /*
732 732 * Copy strings and update pointers
733 733 */
734 734 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
735 735 if (knp->data_type == KSTAT_DATA_STRING &&
736 736 KSTAT_NAMED_STR_PTR(knp) != NULL) {
737 737 bcopy(KSTAT_NAMED_STR_PTR(knp), dst,
738 738 KSTAT_NAMED_STR_BUFLEN(knp));
739 739 KSTAT_NAMED_STR_PTR(knp) = dst;
740 740 dst += KSTAT_NAMED_STR_BUFLEN(knp);
741 741 }
742 742 }
743 743 ASSERT(dst <= ((char *)buf + ksp->ks_data_size));
744 744 }
745 745 break;
746 746 }
747 747 return (0);
748 748 }
749 749
750 750 static int
751 751 header_kstat_update(kstat_t *header_ksp, int rw)
752 752 {
753 753 int nkstats = 0;
754 754 ekstat_t *e;
755 755 avl_tree_t *t = &kstat_avl_bykid;
756 756 zoneid_t zoneid;
757 757
758 758 if (rw == KSTAT_WRITE)
759 759 return (EACCES);
760 760
761 761 ASSERT(MUTEX_HELD(&kstat_chain_lock));
762 762
763 763 zoneid = getzoneid();
764 764 for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
765 765 if (kstat_zone_find((kstat_t *)e, zoneid) &&
766 766 (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
767 767 nkstats++;
768 768 }
769 769 }
770 770 header_ksp->ks_ndata = nkstats;
771 771 header_ksp->ks_data_size = nkstats * sizeof (kstat_t);
772 772 return (0);
773 773 }
774 774
775 775 /*
776 776 * Copy out the data section of kstat 0, which consists of the list
777 777 * of all kstat headers. By specification, these headers must be
778 778 * copied out in order of increasing KID.
779 779 */
780 780 static int
781 781 header_kstat_snapshot(kstat_t *header_ksp, void *buf, int rw)
782 782 {
783 783 ekstat_t *e;
784 784 avl_tree_t *t = &kstat_avl_bykid;
785 785 zoneid_t zoneid;
786 786
787 787 header_ksp->ks_snaptime = gethrtime();
788 788
789 789 if (rw == KSTAT_WRITE)
790 790 return (EACCES);
791 791
792 792 ASSERT(MUTEX_HELD(&kstat_chain_lock));
793 793
794 794 zoneid = getzoneid();
795 795 for (e = avl_first(t); e != NULL; e = avl_walk(t, e, AVL_AFTER)) {
796 796 if (kstat_zone_find((kstat_t *)e, zoneid) &&
797 797 (e->e_ks.ks_flags & KSTAT_FLAG_INVALID) == 0) {
798 798 bcopy(&e->e_ks, buf, sizeof (kstat_t));
799 799 buf = (char *)buf + sizeof (kstat_t);
800 800 }
801 801 }
802 802
803 803 return (0);
804 804 }
805 805
806 806 /* ARGSUSED */
807 807 static int
808 808 system_misc_kstat_update(kstat_t *ksp, int rw)
809 809 {
810 810 int myncpus = ncpus;
811 811 int *loadavgp = &avenrun[0];
812 812 time_t zone_boot_time;
813 813 clock_t zone_lbolt;
814 814 hrtime_t zone_hrtime;
815 815 size_t zone_nproc;
816 816
817 817 if (rw == KSTAT_WRITE)
818 818 return (EACCES);
819 819
820 820 if (!INGLOBALZONE(curproc)) {
821 821 /*
822 822 * Here we grab cpu_lock which is OK as long as no-one in the
823 823 * future attempts to lookup this particular kstat
824 824 * (unix:0:system_misc) while holding cpu_lock.
825 825 */
826 826 mutex_enter(&cpu_lock);
827 827 if (pool_pset_enabled()) {
828 828 myncpus = zone_ncpus_get(curproc->p_zone);
829 829 ASSERT(myncpus > 0);
830 830 }
831 831 mutex_exit(&cpu_lock);
832 832 loadavgp = &curproc->p_zone->zone_avenrun[0];
833 833 }
834 834
835 835 if (INGLOBALZONE(curproc)) {
836 836 zone_boot_time = boot_time;
837 837 zone_lbolt = ddi_get_lbolt();
838 838 zone_nproc = nproc;
839 839 } else {
840 840 zone_boot_time = curproc->p_zone->zone_boot_time;
841 841
842 842 zone_hrtime = gethrtime();
843 843 zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
844 844 NSEC_TO_TICK(curproc->p_zone->zone_zsched->p_mstart));
845 845 mutex_enter(&curproc->p_zone->zone_nlwps_lock);
846 846 zone_nproc = curproc->p_zone->zone_nprocs;
847 847 mutex_exit(&curproc->p_zone->zone_nlwps_lock);
848 848 }
849 849
850 850 system_misc_kstat.ncpus.value.ui32 = (uint32_t)myncpus;
851 851 system_misc_kstat.lbolt.value.ui32 = (uint32_t)zone_lbolt;
852 852 system_misc_kstat.deficit.value.ui32 = (uint32_t)deficit;
853 853 system_misc_kstat.clk_intr.value.ui32 = (uint32_t)zone_lbolt;
854 854 system_misc_kstat.vac.value.ui32 = (uint32_t)vac;
855 855 system_misc_kstat.nproc.value.ui32 = (uint32_t)zone_nproc;
856 856 system_misc_kstat.avenrun_1min.value.ui32 = (uint32_t)loadavgp[0];
857 857 system_misc_kstat.avenrun_5min.value.ui32 = (uint32_t)loadavgp[1];
858 858 system_misc_kstat.avenrun_15min.value.ui32 = (uint32_t)loadavgp[2];
859 859 system_misc_kstat.boot_time.value.ui32 = (uint32_t)
860 860 zone_boot_time;
861 861 system_misc_kstat.nsec_per_tick.value.ui32 = (uint32_t)
862 862 nsec_per_tick;
863 863 return (0);
864 864 }
865 865
866 866 #ifdef __sparc
867 867 extern caddr_t econtig32;
868 868 #else /* !__sparc */
869 869 extern caddr_t econtig;
870 870 #endif /* __sparc */
871 871
872 872 /* ARGSUSED */
873 873 static int
874 874 system_pages_kstat_update(kstat_t *ksp, int rw)
875 875 {
876 876 kobj_stat_t kobj_stat;
877 877
878 878 if (rw == KSTAT_WRITE) {
879 879 return (EACCES);
880 880 }
881 881
882 882 kobj_stat_get(&kobj_stat);
883 883 system_pages_kstat.physmem.value.ul = (ulong_t)physmem;
884 884 system_pages_kstat.nalloc.value.ul = kobj_stat.nalloc;
885 885 system_pages_kstat.nfree.value.ul = kobj_stat.nfree;
886 886 system_pages_kstat.nalloc_calls.value.ul = kobj_stat.nalloc_calls;
887 887 system_pages_kstat.nfree_calls.value.ul = kobj_stat.nfree_calls;
888 888 system_pages_kstat.kernelbase.value.ul = (ulong_t)KERNELBASE;
889 889
890 890 #ifdef __sparc
891 891 /*
892 892 * kstat should REALLY be modified to also report kmem64_base and
893 893 * kmem64_end (see sun4u/os/startup.c), as the virtual address range
894 894 * [ kernelbase .. econtig ] no longer is truly reflective of the
895 895 * kernel's vallocs...
896 896 */
897 897 system_pages_kstat.econtig.value.ul = (ulong_t)econtig32;
898 898 #else /* !__sparc */
899 899 system_pages_kstat.econtig.value.ul = (ulong_t)econtig;
900 900 #endif /* __sparc */
901 901
902 902 system_pages_kstat.freemem.value.ul = (ulong_t)freemem;
903 903 system_pages_kstat.availrmem.value.ul = (ulong_t)availrmem;
904 904 system_pages_kstat.lotsfree.value.ul = (ulong_t)lotsfree;
905 905 system_pages_kstat.desfree.value.ul = (ulong_t)desfree;
906 906 system_pages_kstat.minfree.value.ul = (ulong_t)minfree;
907 907 system_pages_kstat.fastscan.value.ul = (ulong_t)fastscan;
908 908 system_pages_kstat.slowscan.value.ul = (ulong_t)slowscan;
909 909 system_pages_kstat.nscan.value.ul = (ulong_t)nscan;
910 910 system_pages_kstat.desscan.value.ul = (ulong_t)desscan;
911 911 system_pages_kstat.pagesfree.value.ul = (ulong_t)freemem;
912 912 system_pages_kstat.pageslocked.value.ul = (ulong_t)(availrmem_initial -
913 913 availrmem);
914 914 system_pages_kstat.pagestotal.value.ul = (ulong_t)total_pages;
915 915 /*
916 916 * pp_kernel represents total pages used by the kernel since the
917 917 * startup. This formula takes into account the boottime kernel
918 918 * footprint and also considers the availrmem changes because of
919 919 * user explicit page locking.
920 920 */
921 921 system_pages_kstat.pp_kernel.value.ul = (ulong_t)(physinstalled -
922 922 obp_pages - availrmem - k_anoninfo.ani_mem_resv -
923 923 anon_segkp_pages_locked - pages_locked -
924 924 pages_claimed - pages_useclaim);
925 925
926 926 return (0);
927 927 }
928 928
929 929 kstat_t *
930 930 kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
931 931 const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags)
932 932 {
933 933 return (kstat_create_zone(ks_module, ks_instance, ks_name, ks_class,
934 934 ks_type, ks_ndata, ks_flags, ALL_ZONES));
935 935 }
936 936
937 937 /*
938 938 * Allocate and initialize a kstat structure. Or, if a dormant kstat with
939 939 * the specified name exists, reactivate it. Returns a pointer to the kstat
940 940 * on success, NULL on failure. The kstat will not be visible to the
941 941 * kstat driver until kstat_install().
942 942 */
943 943 kstat_t *
944 944 kstat_create_zone(const char *ks_module, int ks_instance, const char *ks_name,
945 945 const char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
946 946 zoneid_t ks_zoneid)
947 947 {
948 948 size_t ks_data_size;
949 949 kstat_t *ksp;
950 950 ekstat_t *e;
951 951 avl_index_t where;
952 952 char namebuf[KSTAT_STRLEN + 16];
953 953
954 954 if (avl_numnodes(&kstat_avl_bykid) == 0) {
955 955 avl_create(&kstat_avl_bykid, kstat_compare_bykid,
956 956 sizeof (ekstat_t), offsetof(struct ekstat, e_avl_bykid));
957 957
958 958 avl_create(&kstat_avl_byname, kstat_compare_byname,
959 959 sizeof (ekstat_t), offsetof(struct ekstat, e_avl_byname));
960 960 }
961 961
962 962 /*
963 963 * If ks_name == NULL, set the ks_name to <module><instance>.
964 964 */
965 965 if (ks_name == NULL) {
966 966 char buf[KSTAT_STRLEN];
967 967 kstat_set_string(buf, ks_module);
968 968 (void) sprintf(namebuf, "%s%d", buf, ks_instance);
969 969 ks_name = namebuf;
970 970 }
971 971
972 972 /*
973 973 * Make sure it's a valid kstat data type
974 974 */
975 975 if (ks_type >= KSTAT_NUM_TYPES) {
976 976 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
977 977 "invalid kstat type %d",
978 978 ks_module, ks_instance, ks_name, ks_type);
979 979 return (NULL);
980 980 }
981 981
982 982 /*
983 983 * Don't allow persistent virtual kstats -- it makes no sense.
984 984 * ks_data points to garbage when the client goes away.
985 985 */
986 986 if ((ks_flags & KSTAT_FLAG_PERSISTENT) &&
987 987 (ks_flags & KSTAT_FLAG_VIRTUAL)) {
988 988 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
989 989 "cannot create persistent virtual kstat",
990 990 ks_module, ks_instance, ks_name);
991 991 return (NULL);
992 992 }
993 993
994 994 /*
995 995 * Don't allow variable-size physical kstats, since the framework's
996 996 * memory allocation for physical kstat data is fixed at creation time.
997 997 */
998 998 if ((ks_flags & KSTAT_FLAG_VAR_SIZE) &&
999 999 !(ks_flags & KSTAT_FLAG_VIRTUAL)) {
1000 1000 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1001 1001 "cannot create variable-size physical kstat",
1002 1002 ks_module, ks_instance, ks_name);
1003 1003 return (NULL);
1004 1004 }
1005 1005
1006 1006 /*
1007 1007 * Make sure the number of data fields is within legal range
1008 1008 */
1009 1009 if (ks_ndata < kstat_data_type[ks_type].min_ndata ||
1010 1010 ks_ndata > kstat_data_type[ks_type].max_ndata) {
1011 1011 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1012 1012 "ks_ndata=%d out of range [%d, %d]",
1013 1013 ks_module, ks_instance, ks_name, (int)ks_ndata,
1014 1014 kstat_data_type[ks_type].min_ndata,
1015 1015 kstat_data_type[ks_type].max_ndata);
1016 1016 return (NULL);
1017 1017 }
1018 1018
1019 1019 ks_data_size = kstat_data_type[ks_type].size * ks_ndata;
1020 1020
1021 1021 /*
1022 1022 * If the named kstat already exists and is dormant, reactivate it.
1023 1023 */
1024 1024 ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1025 1025 if (ksp != NULL) {
1026 1026 if (!(ksp->ks_flags & KSTAT_FLAG_DORMANT)) {
1027 1027 /*
1028 1028 * The named kstat exists but is not dormant --
1029 1029 * this is a kstat namespace collision.
1030 1030 */
1031 1031 kstat_rele(ksp);
1032 1032 cmn_err(CE_WARN,
1033 1033 "kstat_create('%s', %d, '%s'): namespace collision",
1034 1034 ks_module, ks_instance, ks_name);
1035 1035 return (NULL);
1036 1036 }
1037 1037 if ((strcmp(ksp->ks_class, ks_class) != 0) ||
1038 1038 (ksp->ks_type != ks_type) ||
1039 1039 (ksp->ks_ndata != ks_ndata) ||
1040 1040 (ks_flags & KSTAT_FLAG_VIRTUAL)) {
1041 1041 /*
1042 1042 * The name is the same, but the other key parameters
1043 1043 * differ from those of the dormant kstat -- bogus.
1044 1044 */
1045 1045 kstat_rele(ksp);
1046 1046 cmn_err(CE_WARN, "kstat_create('%s', %d, '%s'): "
1047 1047 "invalid reactivation of dormant kstat",
1048 1048 ks_module, ks_instance, ks_name);
1049 1049 return (NULL);
1050 1050 }
1051 1051 /*
1052 1052 * Return dormant kstat pointer to caller. As usual,
1053 1053 * the kstat is marked invalid until kstat_install().
1054 1054 */
1055 1055 ksp->ks_flags |= KSTAT_FLAG_INVALID;
1056 1056 kstat_rele(ksp);
1057 1057 return (ksp);
1058 1058 }
1059 1059
1060 1060 /*
1061 1061 * Allocate memory for the new kstat header and, if this is a physical
1062 1062 * kstat, the data section.
1063 1063 */
1064 1064 e = kstat_alloc(ks_flags & KSTAT_FLAG_VIRTUAL ? 0 : ks_data_size);
1065 1065 if (e == NULL) {
1066 1066 cmn_err(CE_NOTE, "kstat_create('%s', %d, '%s'): "
1067 1067 "insufficient kernel memory",
1068 1068 ks_module, ks_instance, ks_name);
1069 1069 return (NULL);
1070 1070 }
1071 1071
1072 1072 /*
1073 1073 * Initialize as many fields as we can. The caller may reset
1074 1074 * ks_lock, ks_update, ks_private, and ks_snapshot as necessary.
1075 1075 * Creators of virtual kstats may also reset ks_data. It is
1076 1076 * also up to the caller to initialize the kstat data section,
1077 1077 * if necessary. All initialization must be complete before
1078 1078 * calling kstat_install().
1079 1079 */
1080 1080 e->e_zone.zoneid = ks_zoneid;
1081 1081 e->e_zone.next = NULL;
1082 1082
1083 1083 ksp = &e->e_ks;
1084 1084 ksp->ks_crtime = gethrtime();
1085 1085 kstat_set_string(ksp->ks_module, ks_module);
1086 1086 ksp->ks_instance = ks_instance;
1087 1087 kstat_set_string(ksp->ks_name, ks_name);
1088 1088 ksp->ks_type = ks_type;
1089 1089 kstat_set_string(ksp->ks_class, ks_class);
1090 1090 ksp->ks_flags = ks_flags | KSTAT_FLAG_INVALID;
1091 1091 if (ks_flags & KSTAT_FLAG_VIRTUAL)
1092 1092 ksp->ks_data = NULL;
1093 1093 else
1094 1094 ksp->ks_data = (void *)(e + 1);
1095 1095 ksp->ks_ndata = ks_ndata;
1096 1096 ksp->ks_data_size = ks_data_size;
1097 1097 ksp->ks_snaptime = ksp->ks_crtime;
1098 1098 ksp->ks_update = default_kstat_update;
1099 1099 ksp->ks_private = NULL;
1100 1100 ksp->ks_snapshot = default_kstat_snapshot;
1101 1101 ksp->ks_lock = NULL;
1102 1102
1103 1103 mutex_enter(&kstat_chain_lock);
1104 1104
1105 1105 /*
1106 1106 * Add our kstat to the AVL trees.
1107 1107 */
1108 1108 if (avl_find(&kstat_avl_byname, e, &where) != NULL) {
1109 1109 mutex_exit(&kstat_chain_lock);
1110 1110 cmn_err(CE_WARN,
1111 1111 "kstat_create('%s', %d, '%s'): namespace collision",
1112 1112 ks_module, ks_instance, ks_name);
1113 1113 kstat_free(e);
1114 1114 return (NULL);
1115 1115 }
1116 1116 avl_insert(&kstat_avl_byname, e, where);
1117 1117
1118 1118 /*
1119 1119 * Loop around until we find an unused KID.
1120 1120 */
1121 1121 do {
1122 1122 ksp->ks_kid = kstat_chain_id++;
1123 1123 } while (avl_find(&kstat_avl_bykid, e, &where) != NULL);
1124 1124 avl_insert(&kstat_avl_bykid, e, where);
1125 1125
1126 1126 mutex_exit(&kstat_chain_lock);
1127 1127
1128 1128 return (ksp);
1129 1129 }
1130 1130
1131 1131 /*
1132 1132 * Activate a fully initialized kstat and make it visible to /dev/kstat.
1133 1133 */
1134 1134 void
1135 1135 kstat_install(kstat_t *ksp)
1136 1136 {
1137 1137 zoneid_t zoneid = ((ekstat_t *)ksp)->e_zone.zoneid;
1138 1138
1139 1139 /*
1140 1140 * If this is a variable-size kstat, it MUST provide kstat data locking
1141 1141 * to prevent data-size races with kstat readers.
1142 1142 */
1143 1143 if ((ksp->ks_flags & KSTAT_FLAG_VAR_SIZE) && ksp->ks_lock == NULL) {
1144 1144 panic("kstat_install('%s', %d, '%s'): "
1145 1145 "cannot create variable-size kstat without data lock",
1146 1146 ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1147 1147 }
1148 1148
1149 1149 if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1150 1150 cmn_err(CE_WARN, "kstat_install(%p): does not exist",
1151 1151 (void *)ksp);
1152 1152 return;
1153 1153 }
1154 1154
1155 1155 if (ksp->ks_type == KSTAT_TYPE_NAMED && ksp->ks_data != NULL) {
1156 1156 uint_t i;
1157 1157 kstat_named_t *knp = KSTAT_NAMED_PTR(ksp);
1158 1158
1159 1159 for (i = 0; i < ksp->ks_ndata; i++, knp++) {
1160 1160 if (knp->data_type == KSTAT_DATA_STRING) {
1161 1161 ksp->ks_flags |= KSTAT_FLAG_LONGSTRINGS;
1162 1162 break;
1163 1163 }
1164 1164 }
1165 1165 /*
1166 1166 * The default snapshot routine does not handle KSTAT_WRITE
1167 1167 * for long strings.
1168 1168 */
1169 1169 if ((ksp->ks_flags & KSTAT_FLAG_LONGSTRINGS) &&
1170 1170 (ksp->ks_flags & KSTAT_FLAG_WRITABLE) &&
1171 1171 (ksp->ks_snapshot == default_kstat_snapshot)) {
1172 1172 panic("kstat_install('%s', %d, '%s'): "
1173 1173 "named kstat containing KSTAT_DATA_STRING "
1174 1174 "is writable but uses default snapshot routine",
1175 1175 ksp->ks_module, ksp->ks_instance, ksp->ks_name);
1176 1176 }
1177 1177 }
1178 1178
1179 1179 if (ksp->ks_flags & KSTAT_FLAG_DORMANT) {
1180 1180
1181 1181 /*
1182 1182 * We are reactivating a dormant kstat. Initialize the
1183 1183 * caller's underlying data to the value it had when the
1184 1184 * kstat went dormant, and mark the kstat as active.
1185 1185 * Grab the provider's kstat lock if it's not already held.
1186 1186 */
1187 1187 kmutex_t *lp = ksp->ks_lock;
1188 1188 if (lp != NULL && MUTEX_NOT_HELD(lp)) {
1189 1189 mutex_enter(lp);
1190 1190 (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1191 1191 mutex_exit(lp);
1192 1192 } else {
1193 1193 (void) KSTAT_UPDATE(ksp, KSTAT_WRITE);
1194 1194 }
1195 1195 ksp->ks_flags &= ~KSTAT_FLAG_DORMANT;
1196 1196 }
1197 1197
1198 1198 /*
1199 1199 * Now that the kstat is active, make it visible to the kstat driver.
1200 1200 * When copying out kstats the count is determined in
1201 1201 * header_kstat_update() and actually copied into kbuf in
1202 1202 * header_kstat_snapshot(). kstat_chain_lock is held across the two
1203 1203 * calls to ensure that this list doesn't change. Thus, we need to
1204 1204 * also take the lock to ensure that the we don't copy the new kstat
1205 1205 * in the 2nd pass and overrun the buf.
1206 1206 */
1207 1207 mutex_enter(&kstat_chain_lock);
1208 1208 ksp->ks_flags &= ~KSTAT_FLAG_INVALID;
1209 1209 mutex_exit(&kstat_chain_lock);
1210 1210 kstat_rele(ksp);
1211 1211 }
1212 1212
1213 1213 /*
1214 1214 * Remove a kstat from the system. Or, if it's a persistent kstat,
1215 1215 * just update the data and mark it as dormant.
1216 1216 */
1217 1217 void
1218 1218 kstat_delete(kstat_t *ksp)
1219 1219 {
1220 1220 kmutex_t *lp;
1221 1221 ekstat_t *e = (ekstat_t *)ksp;
1222 1222 zoneid_t zoneid;
1223 1223 kstat_zone_t *kz;
1224 1224
1225 1225 ASSERT(ksp != NULL);
1226 1226
1227 1227 if (ksp == NULL)
1228 1228 return;
1229 1229
1230 1230 zoneid = e->e_zone.zoneid;
1231 1231
1232 1232 lp = ksp->ks_lock;
1233 1233
1234 1234 if (lp != NULL && MUTEX_HELD(lp)) {
1235 1235 panic("kstat_delete(%p): caller holds data lock %p",
1236 1236 (void *)ksp, (void *)lp);
1237 1237 }
1238 1238
1239 1239 if (kstat_hold_bykid(ksp->ks_kid, zoneid) != ksp) {
1240 1240 cmn_err(CE_WARN, "kstat_delete(%p): does not exist",
1241 1241 (void *)ksp);
1242 1242 return;
1243 1243 }
1244 1244
1245 1245 if (ksp->ks_flags & KSTAT_FLAG_PERSISTENT) {
1246 1246 /*
1247 1247 * Update the data one last time, so that all activity
1248 1248 * prior to going dormant has been accounted for.
1249 1249 */
1250 1250 KSTAT_ENTER(ksp);
1251 1251 (void) KSTAT_UPDATE(ksp, KSTAT_READ);
1252 1252 KSTAT_EXIT(ksp);
1253 1253
1254 1254 /*
1255 1255 * Mark the kstat as dormant and restore caller-modifiable
1256 1256 * fields to default values, so the kstat is readable during
1257 1257 * the dormant phase.
1258 1258 */
1259 1259 ksp->ks_flags |= KSTAT_FLAG_DORMANT;
1260 1260 ksp->ks_lock = NULL;
1261 1261 ksp->ks_update = default_kstat_update;
1262 1262 ksp->ks_private = NULL;
1263 1263 ksp->ks_snapshot = default_kstat_snapshot;
1264 1264 kstat_rele(ksp);
1265 1265 return;
1266 1266 }
1267 1267
1268 1268 /*
1269 1269 * Remove the kstat from the framework's AVL trees,
1270 1270 * free the allocated memory, and increment kstat_chain_id so
1271 1271 * /dev/kstat clients can detect the event.
1272 1272 */
1273 1273 mutex_enter(&kstat_chain_lock);
1274 1274 avl_remove(&kstat_avl_bykid, e);
1275 1275 avl_remove(&kstat_avl_byname, e);
1276 1276 kstat_chain_id++;
1277 1277 mutex_exit(&kstat_chain_lock);
1278 1278
1279 1279 kz = e->e_zone.next;
1280 1280 while (kz != NULL) {
1281 1281 kstat_zone_t *t = kz;
1282 1282
1283 1283 kz = kz->next;
1284 1284 kmem_free(t, sizeof (*t));
1285 1285 }
1286 1286 kstat_rele(ksp);
1287 1287 kstat_free(e);
1288 1288 }
1289 1289
1290 1290 void
1291 1291 kstat_delete_byname_zone(const char *ks_module, int ks_instance,
1292 1292 const char *ks_name, zoneid_t ks_zoneid)
1293 1293 {
1294 1294 kstat_t *ksp;
1295 1295
1296 1296 ksp = kstat_hold_byname(ks_module, ks_instance, ks_name, ks_zoneid);
1297 1297 if (ksp != NULL) {
1298 1298 kstat_rele(ksp);
|
↓ open down ↓ |
1298 lines elided |
↑ open up ↑ |
1299 1299 kstat_delete(ksp);
1300 1300 }
1301 1301 }
1302 1302
1303 1303 void
1304 1304 kstat_delete_byname(const char *ks_module, int ks_instance, const char *ks_name)
1305 1305 {
1306 1306 kstat_delete_byname_zone(ks_module, ks_instance, ks_name, ALL_ZONES);
1307 1307 }
1308 1308
1309 -/*
1310 - * The sparc V9 versions of these routines can be much cheaper than
1311 - * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1312 - * For simplicity, however, we always feed the C versions to lint.
1313 - */
1314 -#if !defined(__sparc) || defined(lint) || defined(__lint)
1315 -
1316 1309 void
1317 -kstat_waitq_enter(kstat_io_t *kiop)
1310 +kstat_waitq_enter_time(kstat_io_t *kiop, const hrtime_t new)
1318 1311 {
1319 - hrtime_t new, delta;
1312 + hrtime_t delta;
1320 1313 ulong_t wcnt;
1321 1314
1322 - new = gethrtime_unscaled();
1315 + ASSERT(kiop != NULL);
1323 1316 delta = new - kiop->wlastupdate;
1324 1317 kiop->wlastupdate = new;
1325 1318 wcnt = kiop->wcnt++;
1326 1319 if (wcnt != 0) {
1327 1320 kiop->wlentime += delta * wcnt;
1328 1321 kiop->wtime += delta;
1329 1322 }
1330 1323 }
1331 1324
1332 1325 void
1333 -kstat_waitq_exit(kstat_io_t *kiop)
1326 +kstat_waitq_exit_time(kstat_io_t *kiop, const hrtime_t new)
1334 1327 {
1335 - hrtime_t new, delta;
1328 + hrtime_t delta;
1336 1329 ulong_t wcnt;
1337 1330
1338 - new = gethrtime_unscaled();
1331 + ASSERT(kiop != NULL);
1339 1332 delta = new - kiop->wlastupdate;
1340 1333 kiop->wlastupdate = new;
1341 1334 wcnt = kiop->wcnt--;
1342 1335 ASSERT((int)wcnt > 0);
1343 1336 kiop->wlentime += delta * wcnt;
1344 1337 kiop->wtime += delta;
1345 1338 }
1346 1339
1347 1340 void
1348 -kstat_runq_enter(kstat_io_t *kiop)
1341 +kstat_runq_enter_time(kstat_io_t *kiop, const hrtime_t new)
1349 1342 {
1350 - hrtime_t new, delta;
1343 + hrtime_t delta;
1351 1344 ulong_t rcnt;
1352 1345
1353 - new = gethrtime_unscaled();
1346 + ASSERT(kiop != NULL);
1354 1347 delta = new - kiop->rlastupdate;
1355 1348 kiop->rlastupdate = new;
1356 1349 rcnt = kiop->rcnt++;
1357 1350 if (rcnt != 0) {
1358 1351 kiop->rlentime += delta * rcnt;
1359 1352 kiop->rtime += delta;
1360 1353 }
1361 1354 }
1362 1355
1363 1356 void
1364 -kstat_runq_exit(kstat_io_t *kiop)
1357 +kstat_runq_exit_time(kstat_io_t *kiop, const hrtime_t new)
1365 1358 {
1366 - hrtime_t new, delta;
1359 + hrtime_t delta;
1367 1360 ulong_t rcnt;
1368 1361
1369 - new = gethrtime_unscaled();
1362 + ASSERT(kiop != NULL);
1370 1363 delta = new - kiop->rlastupdate;
1371 1364 kiop->rlastupdate = new;
1372 1365 rcnt = kiop->rcnt--;
1373 1366 ASSERT((int)rcnt > 0);
1374 1367 kiop->rlentime += delta * rcnt;
1375 1368 kiop->rtime += delta;
1376 1369 }
1377 1370
1371 +/*
1372 + * The sparc V9 versions of these routines can be much cheaper than
1373 + * the poor 32-bit compiler can comprehend, so they're in sparcv9_subr.s.
1374 + * For simplicity, however, we always feed the C versions to lint.
1375 + */
1376 +#if !defined(__sparc) || defined(lint) || defined(__lint)
1377 +
1378 1378 void
1379 -kstat_waitq_to_runq(kstat_io_t *kiop)
1379 +kstat_waitq_enter(kstat_io_t *kiop)
1380 1380 {
1381 - hrtime_t new, delta;
1382 - ulong_t wcnt, rcnt;
1381 + kstat_waitq_enter_time(kiop, gethrtime_unscaled());
1382 +}
1383 1383
1384 - new = gethrtime_unscaled();
1384 +void
1385 +kstat_waitq_exit(kstat_io_t *kiop)
1386 +{
1387 + kstat_waitq_exit_time(kiop, gethrtime_unscaled());
1388 +}
1385 1389
1386 - delta = new - kiop->wlastupdate;
1387 - kiop->wlastupdate = new;
1388 - wcnt = kiop->wcnt--;
1389 - ASSERT((int)wcnt > 0);
1390 - kiop->wlentime += delta * wcnt;
1391 - kiop->wtime += delta;
1390 +void
1391 +kstat_runq_enter(kstat_io_t *kiop)
1392 +{
1393 + kstat_runq_enter_time(kiop, gethrtime_unscaled());
1394 +}
1392 1395
1393 - delta = new - kiop->rlastupdate;
1394 - kiop->rlastupdate = new;
1395 - rcnt = kiop->rcnt++;
1396 - if (rcnt != 0) {
1397 - kiop->rlentime += delta * rcnt;
1398 - kiop->rtime += delta;
1399 - }
1396 +void
1397 +kstat_runq_exit(kstat_io_t *kiop)
1398 +{
1399 + kstat_runq_exit_time(kiop, gethrtime_unscaled());
1400 1400 }
1401 1401
1402 1402 void
1403 -kstat_runq_back_to_waitq(kstat_io_t *kiop)
1403 +kstat_waitq_to_runq(kstat_io_t *kiop)
1404 1404 {
1405 - hrtime_t new, delta;
1406 - ulong_t wcnt, rcnt;
1405 + hrtime_t new = gethrtime_unscaled();
1406 + ASSERT(kiop != NULL);
1407 + kstat_waitq_exit_time(kiop, new);
1408 + kstat_runq_enter_time(kiop, new);
1409 +}
1407 1410
1408 - new = gethrtime_unscaled();
1409 -
1410 - delta = new - kiop->rlastupdate;
1411 - kiop->rlastupdate = new;
1412 - rcnt = kiop->rcnt--;
1413 - ASSERT((int)rcnt > 0);
1414 - kiop->rlentime += delta * rcnt;
1415 - kiop->rtime += delta;
1416 -
1417 - delta = new - kiop->wlastupdate;
1418 - kiop->wlastupdate = new;
1419 - wcnt = kiop->wcnt++;
1420 - if (wcnt != 0) {
1421 - kiop->wlentime += delta * wcnt;
1422 - kiop->wtime += delta;
1423 - }
1411 +void
1412 +kstat_runq_back_to_waitq(kstat_io_t *kiop)
1413 +{
1414 + hrtime_t new = gethrtime_unscaled();
1415 + ASSERT(kiop != NULL);
1416 + kstat_runq_exit_time(kiop, new);
1417 + kstat_waitq_enter_time(kiop, new);
1424 1418 }
1425 1419
1426 1420 #endif
1427 1421
1428 1422 void
1429 1423 kstat_timer_start(kstat_timer_t *ktp)
1430 1424 {
1431 1425 ktp->start_time = gethrtime();
1432 1426 }
1433 1427
1434 1428 void
1435 1429 kstat_timer_stop(kstat_timer_t *ktp)
1436 1430 {
1437 1431 hrtime_t etime;
1438 1432 u_longlong_t num_events;
1439 1433
1440 1434 ktp->stop_time = etime = gethrtime();
1441 1435 etime -= ktp->start_time;
1442 1436 num_events = ktp->num_events;
1443 1437 if (etime < ktp->min_time || num_events == 0)
1444 1438 ktp->min_time = etime;
1445 1439 if (etime > ktp->max_time)
1446 1440 ktp->max_time = etime;
1447 1441 ktp->elapsed_time += etime;
1448 1442 ktp->num_events = num_events + 1;
1449 1443 }
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX