Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zonestat/zonestatd/zonestatd.c
+++ new/usr/src/cmd/zonestat/zonestatd/zonestatd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
25 25 */
26 26 #include <alloca.h>
27 27 #include <assert.h>
28 28 #include <dirent.h>
29 29 #include <dlfcn.h>
30 30 #include <door.h>
31 31 #include <errno.h>
32 32 #include <exacct.h>
33 33 #include <ctype.h>
34 34 #include <fcntl.h>
35 35 #include <kstat.h>
36 36 #include <libcontract.h>
37 37 #include <libintl.h>
38 38 #include <libscf.h>
39 39 #include <zonestat.h>
40 40 #include <zonestat_impl.h>
41 41 #include <limits.h>
42 42 #include <pool.h>
43 43 #include <procfs.h>
44 44 #include <rctl.h>
45 45 #include <thread.h>
46 46 #include <signal.h>
47 47 #include <stdarg.h>
48 48 #include <stddef.h>
49 49 #include <stdio.h>
50 50 #include <stdlib.h>
51 51 #include <strings.h>
52 52 #include <synch.h>
53 53 #include <sys/acctctl.h>
54 54 #include <sys/contract/process.h>
55 55 #include <sys/ctfs.h>
56 56 #include <sys/fork.h>
57 57 #include <sys/param.h>
58 58 #include <sys/priocntl.h>
59 59 #include <sys/fxpriocntl.h>
60 60 #include <sys/processor.h>
61 61 #include <sys/pset.h>
62 62 #include <sys/socket.h>
63 63 #include <sys/stat.h>
64 64 #include <sys/statvfs.h>
65 65 #include <sys/swap.h>
66 66 #include <sys/systeminfo.h>
67 67 #include <thread.h>
68 68 #include <sys/list.h>
69 69 #include <sys/time.h>
70 70 #include <sys/types.h>
71 71 #include <sys/vm_usage.h>
72 72 #include <sys/wait.h>
73 73 #include <sys/zone.h>
74 74 #include <time.h>
75 75 #include <ucred.h>
76 76 #include <unistd.h>
77 77 #include <vm/anon.h>
78 78 #include <zone.h>
79 79 #include <zonestat.h>
80 80
81 81 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */
82 82 #define ZSD_PSET_UNLIMITED UINT16_MAX
83 83 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process"
84 84
85 85 /*
86 86 * zonestatd implements gathering cpu and memory utilization data for
87 87 * running zones. It has these components:
88 88 *
89 89 * zsd_server:
90 90 * Door server to respond to client connections. Each client
91 91 * will connect using libzonestat.so, which will open and
92 92 * call /var/tmp/.zonestat_door. Each connecting client is given
93 93 * a file descriptor to the stat server.
94 94 *
95 95 * The zsd_server also responds to zoneadmd, which reports when a
96 96 * new zone is booted. This is used to fattach the zsd_server door
97 97 * into the new zone.
98 98 *
99 99 * zsd_stat_server:
100 100 * Receives client requests for the current utilization data. Each
101 101 * client request will cause zonestatd to update the current utilization
102 102 * data by kicking the stat_thread.
103 103 *
104 104 * If the client is in a non-global zone, the utilization data will
105 105 * be filtered to only show the given zone. The usage by all other zones
106 106 * will be added to the system utilization.
107 107 *
108 108 * stat_thread:
109 109 * The stat thread implements querying the system to determine the
110 110 * current utilization data for each running zone. This includes
111 111 * inspecting the system's processor set configuration, as well as details
112 112 * of each zone, such as their configured limits, and which processor
113 113 * sets they are running in.
114 114 *
115 115 * The stat_thread will only update memory utilization data as often as
116 116 * the configured config/sample_interval on the zones-monitoring service.
117 117 */
118 118
119 119 /*
120 120 * The private vmusage structure unfortunately uses size_t types, and assumes
121 121 * the caller's bitness matches the kernel's bitness. Since the getvmusage()
122 122 * system call is contracted, and zonestatd is 32 bit, the following structures
123 123 * are used to interact with a 32bit or 64 bit kernel.
124 124 */
125 125 typedef struct zsd_vmusage32 {
126 126 id_t vmu_zoneid;
127 127 uint_t vmu_type;
128 128 id_t vmu_id;
129 129
130 130 uint32_t vmu_rss_all;
131 131 uint32_t vmu_rss_private;
132 132 uint32_t vmu_rss_shared;
133 133 uint32_t vmu_swap_all;
134 134 uint32_t vmu_swap_private;
135 135 uint32_t vmu_swap_shared;
136 136 } zsd_vmusage32_t;
137 137
138 138 typedef struct zsd_vmusage64 {
139 139 id_t vmu_zoneid;
140 140 uint_t vmu_type;
141 141 id_t vmu_id;
142 142 /*
143 143 * An amd64 kernel will align the following uint64_t members, but a
144 144 * 32bit i386 process will not without help.
145 145 */
146 146 int vmu_align_next_members_on_8_bytes;
147 147 uint64_t vmu_rss_all;
148 148 uint64_t vmu_rss_private;
149 149 uint64_t vmu_rss_shared;
150 150 uint64_t vmu_swap_all;
151 151 uint64_t vmu_swap_private;
152 152 uint64_t vmu_swap_shared;
153 153 } zsd_vmusage64_t;
154 154
155 155 struct zsd_zone;
156 156
157 157 /* Used to store a zone's usage of a pset */
158 158 typedef struct zsd_pset_usage {
159 159 struct zsd_zone *zsu_zone;
160 160 struct zsd_pset *zsu_pset;
161 161
162 162 list_node_t zsu_next;
163 163
164 164 zoneid_t zsu_zoneid;
165 165 boolean_t zsu_found; /* zone bound at end of interval */
166 166 boolean_t zsu_active; /* zone was bound during interval */
167 167 boolean_t zsu_new; /* zone newly bound in this interval */
168 168 boolean_t zsu_deleted; /* zone was unbound in this interval */
169 169 boolean_t zsu_empty; /* no procs in pset in this interval */
170 170 time_t zsu_start; /* time when zone was found in pset */
171 171 hrtime_t zsu_hrstart; /* time when zone was found in pset */
172 172 uint64_t zsu_cpu_shares;
173 173 uint_t zsu_scheds; /* schedulers found in this pass */
174 174 timestruc_t zsu_cpu_usage; /* cpu time used */
175 175 } zsd_pset_usage_t;
176 176
177 177 /* Used to store a pset's utilization */
178 178 typedef struct zsd_pset {
179 179 psetid_t zsp_id;
180 180 list_node_t zsp_next;
181 181 char zsp_name[ZS_PSETNAME_MAX];
182 182
183 183 uint_t zsp_cputype; /* default, dedicated or shared */
184 184 boolean_t zsp_found; /* pset found at end of interval */
185 185 boolean_t zsp_new; /* pset new in this interval */
186 186 boolean_t zsp_deleted; /* pset deleted in this interval */
187 187 boolean_t zsp_active; /* pset existed during interval */
188 188 boolean_t zsp_empty; /* no processes in pset */
189 189 time_t zsp_start;
190 190 hrtime_t zsp_hrstart;
191 191
192 192 uint64_t zsp_online; /* online cpus in interval */
193 193 uint64_t zsp_size; /* size in this interval */
194 194 uint64_t zsp_min; /* configured min in this interval */
195 195 uint64_t zsp_max; /* configured max in this interval */
196 196 int64_t zsp_importance; /* configured max in this interval */
197 197
198 198 uint_t zsp_scheds; /* scheds of processes found in pset */
199 199 uint64_t zsp_cpu_shares; /* total shares in this interval */
200 200
201 201 timestruc_t zsp_total_time;
202 202 timestruc_t zsp_usage_kern;
203 203 timestruc_t zsp_usage_zones;
204 204
205 205 /* Individual zone usages of pset */
206 206 list_t zsp_usage_list;
207 207 int zsp_nusage;
208 208
209 209 /* Summed kstat values from individual cpus in pset */
210 210 timestruc_t zsp_idle;
211 211 timestruc_t zsp_intr;
212 212 timestruc_t zsp_kern;
213 213 timestruc_t zsp_user;
214 214
215 215 } zsd_pset_t;
216 216
217 217 /* Used to track an individual cpu's utilization as reported by kstats */
218 218 typedef struct zsd_cpu {
219 219 processorid_t zsc_id;
220 220 list_node_t zsc_next;
221 221 psetid_t zsc_psetid;
222 222 psetid_t zsc_psetid_prev;
223 223 zsd_pset_t *zsc_pset;
224 224
225 225 boolean_t zsc_found; /* cpu online in this interval */
226 226 boolean_t zsc_onlined; /* cpu onlined during this interval */
227 227 boolean_t zsc_offlined; /* cpu offlined during this interval */
228 228 boolean_t zsc_active; /* cpu online during this interval */
229 229 boolean_t zsc_allocated; /* True if cpu has ever been found */
230 230
231 231 /* kstats this interval */
232 232 uint64_t zsc_nsec_idle;
233 233 uint64_t zsc_nsec_intr;
234 234 uint64_t zsc_nsec_kern;
235 235 uint64_t zsc_nsec_user;
236 236
237 237 /* kstats in most recent interval */
238 238 uint64_t zsc_nsec_idle_prev;
239 239 uint64_t zsc_nsec_intr_prev;
240 240 uint64_t zsc_nsec_kern_prev;
241 241 uint64_t zsc_nsec_user_prev;
242 242
243 243 /* Total kstat increases since zonestatd started reading kstats */
244 244 timestruc_t zsc_idle;
245 245 timestruc_t zsc_intr;
246 246 timestruc_t zsc_kern;
247 247 timestruc_t zsc_user;
248 248
249 249 } zsd_cpu_t;
250 250
251 251 /* Used to describe an individual zone and its utilization */
252 252 typedef struct zsd_zone {
253 253 zoneid_t zsz_id;
254 254 list_node_t zsz_next;
255 255 char zsz_name[ZS_ZONENAME_MAX];
256 256 uint_t zsz_cputype;
257 257 uint_t zsz_iptype;
258 258 time_t zsz_start;
259 259 hrtime_t zsz_hrstart;
260 260
261 261 char zsz_pool[ZS_POOLNAME_MAX];
262 262 char zsz_pset[ZS_PSETNAME_MAX];
263 263 int zsz_default_sched;
264 264 /* These are deduced by inspecting processes */
265 265 psetid_t zsz_psetid;
266 266 uint_t zsz_scheds;
267 267
268 268 boolean_t zsz_new; /* zone booted during this interval */
269 269 boolean_t zsz_deleted; /* halted during this interval */
270 270 boolean_t zsz_active; /* running in this interval */
271 271 boolean_t zsz_empty; /* no processes in this interval */
272 272 boolean_t zsz_gone; /* not installed in this interval */
273 273 boolean_t zsz_found; /* Running at end of this interval */
274 274
275 275 uint64_t zsz_cpu_shares;
276 276 uint64_t zsz_cpu_cap;
277 277 uint64_t zsz_ram_cap;
278 278 uint64_t zsz_locked_cap;
279 279 uint64_t zsz_vm_cap;
280 280
281 281 uint64_t zsz_cpus_online;
282 282 timestruc_t zsz_cpu_usage; /* cpu time of cpu cap */
283 283 timestruc_t zsz_cap_time; /* cpu time of cpu cap */
284 284 timestruc_t zsz_share_time; /* cpu time of share of cpu */
285 285 timestruc_t zsz_pset_time; /* time of all psets zone is bound to */
286 286
287 287 uint64_t zsz_usage_ram;
288 288 uint64_t zsz_usage_locked;
289 289 uint64_t zsz_usage_vm;
290 290
291 291 uint64_t zsz_processes_cap;
292 292 uint64_t zsz_lwps_cap;
293 293 uint64_t zsz_shm_cap;
294 294 uint64_t zsz_shmids_cap;
295 295 uint64_t zsz_semids_cap;
296 296 uint64_t zsz_msgids_cap;
297 297 uint64_t zsz_lofi_cap;
298 298
299 299 uint64_t zsz_processes;
300 300 uint64_t zsz_lwps;
301 301 uint64_t zsz_shm;
302 302 uint64_t zsz_shmids;
303 303 uint64_t zsz_semids;
304 304 uint64_t zsz_msgids;
305 305 uint64_t zsz_lofi;
306 306
307 307 } zsd_zone_t;
308 308
309 309 /*
310 310 * Used to track the cpu usage of an individual processes.
311 311 *
312 312 * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
313 313 * to their zone. As processes exit, their extended accounting records are
314 314 * read and the difference of their total and known usage is charged to their
315 315 * zone.
316 316 *
317 317 * If a process is never seen in /proc, the total usage on its extended
318 318 * accounting record will be charged to its zone.
319 319 */
320 320 typedef struct zsd_proc {
321 321 list_node_t zspr_next;
322 322 pid_t zspr_ppid;
323 323 psetid_t zspr_psetid;
324 324 zoneid_t zspr_zoneid;
325 325 int zspr_sched;
326 326 timestruc_t zspr_usage;
327 327 } zsd_proc_t;
328 328
329 329 /* Used to track the overall resource usage of the system */
330 330 typedef struct zsd_system {
331 331
332 332 uint64_t zss_ram_total;
333 333 uint64_t zss_ram_kern;
334 334 uint64_t zss_ram_zones;
335 335
336 336 uint64_t zss_locked_kern;
337 337 uint64_t zss_locked_zones;
338 338
339 339 uint64_t zss_vm_total;
340 340 uint64_t zss_vm_kern;
341 341 uint64_t zss_vm_zones;
342 342
343 343 uint64_t zss_swap_total;
344 344 uint64_t zss_swap_used;
345 345
346 346 timestruc_t zss_idle;
347 347 timestruc_t zss_intr;
348 348 timestruc_t zss_kern;
349 349 timestruc_t zss_user;
350 350
351 351 timestruc_t zss_cpu_total_time;
352 352 timestruc_t zss_cpu_usage_kern;
353 353 timestruc_t zss_cpu_usage_zones;
354 354
355 355 uint64_t zss_maxpid;
356 356 uint64_t zss_processes_max;
357 357 uint64_t zss_lwps_max;
358 358 uint64_t zss_shm_max;
359 359 uint64_t zss_shmids_max;
360 360 uint64_t zss_semids_max;
361 361 uint64_t zss_msgids_max;
362 362 uint64_t zss_lofi_max;
363 363
364 364 uint64_t zss_processes;
365 365 uint64_t zss_lwps;
366 366 uint64_t zss_shm;
367 367 uint64_t zss_shmids;
368 368 uint64_t zss_semids;
369 369 uint64_t zss_msgids;
370 370 uint64_t zss_lofi;
371 371
372 372 uint64_t zss_ncpus;
373 373 uint64_t zss_ncpus_online;
374 374
375 375 } zsd_system_t;
376 376
377 377 /*
378 378 * A dumping ground for various information and structures used to compute
379 379 * utilization.
380 380 *
381 381 * This structure is used to track the system while clients are connected.
382 382 * When The first client connects, a zsd_ctl is allocated and configured by
383 383 * zsd_open(). When all clients disconnect, the zsd_ctl is closed.
384 384 */
385 385 typedef struct zsd_ctl {
386 386 kstat_ctl_t *zsctl_kstat_ctl;
387 387
388 388 /* To track extended accounting */
389 389 int zsctl_proc_fd; /* Log currently being used */
390 390 ea_file_t zsctl_proc_eaf;
391 391 struct stat64 zsctl_proc_stat;
392 392 int zsctl_proc_open;
393 393 int zsctl_proc_fd_next; /* Log file to use next */
394 394 ea_file_t zsctl_proc_eaf_next;
395 395 struct stat64 zsctl_proc_stat_next;
396 396 int zsctl_proc_open_next;
397 397
398 398 /* pool configuration handle */
399 399 pool_conf_t *zsctl_pool_conf;
400 400 int zsctl_pool_status;
401 401 int zsctl_pool_changed;
402 402
403 403 /* The above usage tacking structures */
404 404 zsd_system_t *zsctl_system;
405 405 list_t zsctl_zones;
406 406 list_t zsctl_psets;
407 407 list_t zsctl_cpus;
408 408 zsd_cpu_t *zsctl_cpu_array;
409 409 zsd_proc_t *zsctl_proc_array;
410 410
411 411 /* Various system info */
412 412 uint64_t zsctl_maxcpuid;
413 413 uint64_t zsctl_maxproc;
414 414 uint64_t zsctl_kern_bits;
415 415 uint64_t zsctl_pagesize;
416 416
417 417 /* Used to track time available under a cpu cap. */
418 418 uint64_t zsctl_hrtime;
419 419 uint64_t zsctl_hrtime_prev;
420 420 timestruc_t zsctl_hrtime_total;
421 421
422 422 struct timeval zsctl_timeofday;
423 423
424 424 /* Caches for arrays allocated for use by various system calls */
425 425 psetid_t *zsctl_pset_cache;
426 426 uint_t zsctl_pset_ncache;
427 427 processorid_t *zsctl_cpu_cache;
428 428 uint_t zsctl_cpu_ncache;
429 429 zoneid_t *zsctl_zone_cache;
430 430 uint_t zsctl_zone_ncache;
431 431 struct swaptable *zsctl_swap_cache;
432 432 uint64_t zsctl_swap_cache_size;
433 433 uint64_t zsctl_swap_cache_num;
434 434 zsd_vmusage64_t *zsctl_vmusage_cache;
435 435 uint64_t zsctl_vmusage_cache_num;
436 436
437 437 /* Info about procfs for scanning /proc */
438 438 struct dirent *zsctl_procfs_dent;
439 439 long zsctl_procfs_dent_size;
440 440 pool_value_t *zsctl_pool_vals[3];
441 441
442 442 /* Counts on tracked entities */
443 443 uint_t zsctl_nzones;
444 444 uint_t zsctl_npsets;
445 445 uint_t zsctl_npset_usages;
446 446 } zsd_ctl_t;
447 447
448 448 zsd_ctl_t *g_ctl;
449 449 boolean_t g_open; /* True if g_ctl is open */
450 450 int g_hasclient; /* True if any clients are connected */
451 451
452 452 /*
453 453 * The usage cache is updated by the stat_thread, and copied to clients by
454 454 * the zsd_stat_server. Mutex and cond are to synchronize between the
455 455 * stat_thread and the stat_server.
456 456 */
457 457 zs_usage_cache_t *g_usage_cache;
458 458 mutex_t g_usage_cache_lock;
459 459 cond_t g_usage_cache_kick;
460 460 uint_t g_usage_cache_kickers;
461 461 cond_t g_usage_cache_wait;
462 462 char *g_usage_cache_buf;
463 463 uint_t g_usage_cache_bufsz;
464 464 uint64_t g_gen_next;
465 465
466 466 /* fds of door servers */
467 467 int g_server_door;
468 468 int g_stat_door;
469 469
470 470 /*
471 471 * Starting and current time. Used to throttle memory calculation, and to
472 472 * mark new zones and psets with their boot and creation time.
473 473 */
474 474 time_t g_now;
475 475 time_t g_start;
476 476 hrtime_t g_hrnow;
477 477 hrtime_t g_hrstart;
478 478 uint64_t g_interval;
479 479
480 480 /*
481 481 * main() thread.
482 482 */
483 483 thread_t g_main;
484 484
485 485 /* PRINTFLIKE1 */
486 486 static void
487 487 zsd_warn(const char *fmt, ...)
488 488 {
489 489 va_list alist;
490 490
491 491 va_start(alist, fmt);
492 492
493 493 (void) fprintf(stderr, gettext("zonestat: Warning: "));
494 494 (void) vfprintf(stderr, fmt, alist);
495 495 (void) fprintf(stderr, "\n");
496 496 va_end(alist);
497 497 }
498 498
499 499 /* PRINTFLIKE1 */
500 500 static void
501 501 zsd_error(const char *fmt, ...)
502 502 {
503 503 va_list alist;
504 504
505 505 va_start(alist, fmt);
506 506
507 507 (void) fprintf(stderr, gettext("zonestat: Error: "));
508 508 (void) vfprintf(stderr, fmt, alist);
509 509 (void) fprintf(stderr, "\n");
510 510 va_end(alist);
511 511 exit(1);
512 512 }
513 513
514 514 /* Turns on extended accounting if not configured externally */
515 515 int
516 516 zsd_enable_cpu_stats()
517 517 {
518 518 char *path = ZONESTAT_EXACCT_FILE;
519 519 char oldfile[MAXPATHLEN];
520 520 int ret, state = AC_ON;
521 521 ac_res_t res[6];
522 522
523 523 /*
524 524 * Start a new accounting file if accounting not configured
525 525 * externally.
526 526 */
527 527
528 528 res[0].ar_id = AC_PROC_PID;
529 529 res[0].ar_state = AC_ON;
530 530 res[1].ar_id = AC_PROC_ANCPID;
531 531 res[1].ar_state = AC_ON;
532 532 res[2].ar_id = AC_PROC_CPU;
533 533 res[2].ar_state = AC_ON;
534 534 res[3].ar_id = AC_PROC_TIME;
535 535 res[3].ar_state = AC_ON;
536 536 res[4].ar_id = AC_PROC_ZONENAME;
537 537 res[4].ar_state = AC_ON;
538 538 res[5].ar_id = AC_NONE;
539 539 res[5].ar_state = AC_ON;
540 540 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
541 541 zsd_warn(gettext("Unable to set accounting resources"));
542 542 return (-1);
543 543 }
544 544 /* Only set accounting file if none is configured */
545 545 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
546 546 if (ret < 0) {
547 547
548 548 (void) unlink(path);
549 549 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1)
550 550 == -1) {
551 551 zsd_warn(gettext("Unable to set accounting file"));
552 552 return (-1);
553 553 }
554 554 }
555 555 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
556 556 zsd_warn(gettext("Unable to enable accounting"));
557 557 return (-1);
558 558 }
559 559 return (0);
560 560 }
561 561
562 562 /* Turns off extended accounting if not configured externally */
563 563 int
564 564 zsd_disable_cpu_stats()
565 565 {
566 566 char *path = ZONESTAT_EXACCT_FILE;
567 567 int ret, state = AC_OFF;
568 568 ac_res_t res[6];
569 569 char oldfile[MAXPATHLEN];
570 570
571 571 /* If accounting file is externally configured, leave it alone */
572 572 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
573 573 if (ret == 0 && strcmp(oldfile, path) != 0)
574 574 return (0);
575 575
576 576 res[0].ar_id = AC_PROC_PID;
577 577 res[0].ar_state = AC_OFF;
578 578 res[1].ar_id = AC_PROC_ANCPID;
579 579 res[1].ar_state = AC_OFF;
580 580 res[2].ar_id = AC_PROC_CPU;
581 581 res[2].ar_state = AC_OFF;
582 582 res[3].ar_id = AC_PROC_TIME;
583 583 res[3].ar_state = AC_OFF;
584 584 res[4].ar_id = AC_PROC_ZONENAME;
585 585 res[4].ar_state = AC_OFF;
586 586 res[5].ar_id = AC_NONE;
587 587 res[5].ar_state = AC_OFF;
588 588 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
589 589 zsd_warn(gettext("Unable to clear accounting resources"));
590 590 return (-1);
591 591 }
592 592 if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) {
593 593 zsd_warn(gettext("Unable to clear accounting file"));
594 594 return (-1);
595 595 }
596 596 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
597 597 zsd_warn(gettext("Unable to diable accounting"));
598 598 return (-1);
599 599 }
600 600
601 601 (void) unlink(path);
602 602 return (0);
603 603 }
604 604
605 605 /*
606 606 * If not configured externally, deletes the current extended accounting file
607 607 * and starts a new one.
608 608 *
609 609 * Since the stat_thread holds an open handle to the accounting file, it will
610 610 * read all remaining entries from the old file before switching to
611 611 * read the new one.
612 612 */
613 613 int
614 614 zsd_roll_exacct(void)
615 615 {
616 616 int ret;
617 617 char *path = ZONESTAT_EXACCT_FILE;
618 618 char oldfile[MAXPATHLEN];
619 619
620 620 /* If accounting file is externally configured, leave it alone */
621 621 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
622 622 if (ret == 0 && strcmp(oldfile, path) != 0)
623 623 return (0);
624 624
625 625 if (unlink(path) != 0)
626 626 /* Roll it next time */
627 627 return (0);
628 628
629 629 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) {
630 630 zsd_warn(gettext("Unable to set accounting file"));
631 631 return (-1);
632 632 }
633 633 return (0);
634 634 }
635 635
636 636 /* Contract stuff for zone_enter() */
637 637 int
638 638 init_template(void)
639 639 {
640 640 int fd;
641 641 int err = 0;
642 642
643 643 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
644 644 if (fd == -1)
645 645 return (-1);
646 646
647 647 /*
648 648 * For now, zoneadmd doesn't do anything with the contract.
649 649 * Deliver no events, don't inherit, and allow it to be orphaned.
650 650 */
651 651 err |= ct_tmpl_set_critical(fd, 0);
652 652 err |= ct_tmpl_set_informative(fd, 0);
653 653 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
654 654 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
655 655 if (err || ct_tmpl_activate(fd)) {
656 656 (void) close(fd);
657 657 return (-1);
658 658 }
659 659
660 660 return (fd);
661 661 }
662 662
663 663 /*
664 664 * Contract stuff for zone_enter()
665 665 */
666 666 int
667 667 contract_latest(ctid_t *id)
668 668 {
669 669 int cfd, r;
670 670 ct_stathdl_t st;
671 671 ctid_t result;
672 672
673 673 if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
674 674 return (errno);
675 675
676 676 if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
677 677 (void) close(cfd);
678 678 return (r);
679 679 }
680 680
681 681 result = ct_status_get_id(st);
682 682 ct_status_free(st);
683 683 (void) close(cfd);
684 684
685 685 *id = result;
686 686 return (0);
687 687 }
688 688
689 689 static int
690 690 close_on_exec(int fd)
691 691 {
692 692 int flags = fcntl(fd, F_GETFD, 0);
693 693 if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
694 694 return (0);
695 695 return (-1);
696 696 }
697 697
698 698 int
699 699 contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
700 700 {
701 701 char path[PATH_MAX];
702 702 int n, fd;
703 703
704 704 if (type == NULL)
705 705 type = "all";
706 706
707 707 n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
708 708 if (n >= sizeof (path)) {
709 709 errno = ENAMETOOLONG;
710 710 return (-1);
711 711 }
712 712
713 713 fd = open64(path, oflag);
714 714 if (fd != -1) {
715 715 if (close_on_exec(fd) == -1) {
716 716 int err = errno;
717 717 (void) close(fd);
718 718 errno = err;
719 719 return (-1);
720 720 }
721 721 }
722 722 return (fd);
723 723 }
724 724
725 725 int
726 726 contract_abandon_id(ctid_t ctid)
727 727 {
728 728 int fd, err;
729 729
730 730 fd = contract_open(ctid, "all", "ctl", O_WRONLY);
731 731 if (fd == -1)
732 732 return (errno);
733 733
734 734 err = ct_ctl_abandon(fd);
735 735 (void) close(fd);
736 736
737 737 return (err);
738 738 }
739 739 /*
740 740 * Attach the zsd_server to a zone. Called for each zone when zonestatd
741 741 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
742 742 *
743 743 * Zone_enter is used to avoid reaching into zone to fattach door.
744 744 */
745 745 static void
746 746 zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only)
747 747 {
748 748 char *path = ZS_DOOR_PATH;
749 749 int fd, pid, stat, tmpl_fd;
750 750 ctid_t ct;
751 751
752 752 if ((tmpl_fd = init_template()) == -1) {
753 753 zsd_warn("Unable to init template");
754 754 return;
755 755 }
756 756
757 757 pid = forkx(0);
758 758 if (pid < 0) {
759 759 (void) ct_tmpl_clear(tmpl_fd);
760 760 zsd_warn(gettext(
761 761 "Unable to fork to add zonestat to zoneid %d\n"), zid);
762 762 return;
763 763 }
764 764
765 765 if (pid == 0) {
766 766 (void) ct_tmpl_clear(tmpl_fd);
767 767 (void) close(tmpl_fd);
768 768 if (zid != 0 && zone_enter(zid) != 0) {
769 769 if (errno == EINVAL) {
770 770 _exit(0);
771 771 }
772 772 _exit(1);
773 773 }
774 774 (void) fdetach(path);
775 775 (void) unlink(path);
776 776 if (detach_only)
777 777 _exit(0);
778 778 fd = open(path, O_CREAT|O_RDWR, 0644);
779 779 if (fd < 0)
780 780 _exit(2);
781 781 if (fattach(door, path) != 0)
782 782 _exit(3);
783 783 _exit(0);
784 784 }
785 785 if (contract_latest(&ct) == -1)
786 786 ct = -1;
787 787 (void) ct_tmpl_clear(tmpl_fd);
788 788 (void) close(tmpl_fd);
789 789 (void) contract_abandon_id(ct);
790 790 while (waitpid(pid, &stat, 0) != pid)
791 791 ;
792 792 if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0)
793 793 return;
794 794
795 795 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid);
796 796
797 797 if (WEXITSTATUS(stat) == 1)
798 798 zsd_warn(gettext("Cannot entering zone"));
799 799 else if (WEXITSTATUS(stat) == 2)
800 800 zsd_warn(gettext("Unable to create door file: %s"), path);
801 801 else if (WEXITSTATUS(stat) == 3)
802 802 zsd_warn(gettext("Unable to fattach file: %s"), path);
803 803
804 804 zsd_warn(gettext("Internal error entering zone: %d"), zid);
805 805 }
806 806
807 807 /*
808 808 * Zone lookup and allocation functions to manage list of currently running
809 809 * zones.
810 810 */
811 811 static zsd_zone_t *
812 812 zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
813 813 {
814 814 zsd_zone_t *zone;
815 815
816 816 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
817 817 zone = list_next(&ctl->zsctl_zones, zone)) {
818 818 if (strcmp(zone->zsz_name, zonename) == 0) {
819 819 if (zoneid != -1)
820 820 zone->zsz_id = zoneid;
821 821 return (zone);
822 822 }
823 823 }
824 824 return (NULL);
825 825 }
826 826
827 827 static zsd_zone_t *
828 828 zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid)
829 829 {
830 830 zsd_zone_t *zone;
831 831
832 832 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
833 833 zone = list_next(&ctl->zsctl_zones, zone)) {
834 834 if (zone->zsz_id == zoneid)
835 835 return (zone);
836 836 }
837 837 return (NULL);
838 838 }
839 839
840 840 static zsd_zone_t *
841 841 zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
842 842 {
843 843 zsd_zone_t *zone;
844 844
845 845 if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL)
846 846 return (NULL);
847 847
848 848 (void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name));
849 849 zone->zsz_id = zoneid;
850 850 zone->zsz_found = B_FALSE;
851 851
852 852 /*
853 853 * Allocate as deleted so if not found in first pass, zone is deleted
854 854 * from list. This can happen if zone is returned by zone_list, but
855 855 * exits before first attempt to fetch zone details.
856 856 */
857 857 zone->zsz_start = g_now;
858 858 zone->zsz_hrstart = g_hrnow;
859 859 zone->zsz_deleted = B_TRUE;
860 860
861 861 zone->zsz_cpu_shares = ZS_LIMIT_NONE;
862 862 zone->zsz_cpu_cap = ZS_LIMIT_NONE;
863 863 zone->zsz_ram_cap = ZS_LIMIT_NONE;
864 864 zone->zsz_locked_cap = ZS_LIMIT_NONE;
865 865 zone->zsz_vm_cap = ZS_LIMIT_NONE;
866 866
867 867 zone->zsz_processes_cap = ZS_LIMIT_NONE;
868 868 zone->zsz_lwps_cap = ZS_LIMIT_NONE;
869 869 zone->zsz_shm_cap = ZS_LIMIT_NONE;
870 870 zone->zsz_shmids_cap = ZS_LIMIT_NONE;
871 871 zone->zsz_semids_cap = ZS_LIMIT_NONE;
872 872 zone->zsz_msgids_cap = ZS_LIMIT_NONE;
873 873 zone->zsz_lofi_cap = ZS_LIMIT_NONE;
874 874
875 875 ctl->zsctl_nzones++;
876 876
877 877 return (zone);
878 878 }
879 879
880 880 static zsd_zone_t *
881 881 zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
882 882 {
883 883 zsd_zone_t *zone, *tmp;
884 884
885 885 if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL)
886 886 return (zone);
887 887
888 888 if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL)
889 889 return (NULL);
890 890
891 891 /* Insert sorted by zonename */
892 892 tmp = list_head(&ctl->zsctl_zones);
893 893 while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0)
894 894 tmp = list_next(&ctl->zsctl_zones, tmp);
895 895
896 896 list_insert_before(&ctl->zsctl_zones, tmp, zone);
897 897 return (zone);
898 898 }
899 899
900 900 /*
901 901 * Mark all zones as not existing. As zones are found, they will
902 902 * be marked as existing. If a zone is not found, then it must have
903 903 * halted.
904 904 */
905 905 static void
906 906 zsd_mark_zones_start(zsd_ctl_t *ctl)
907 907 {
908 908
909 909 zsd_zone_t *zone;
910 910
911 911 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
912 912 zone = list_next(&ctl->zsctl_zones, zone)) {
913 913 zone->zsz_found = B_FALSE;
914 914 }
915 915 }
916 916
917 917 /*
918 918 * Mark each zone as not using pset. If processes are found using the
919 919 * pset, the zone will remain bound to the pset. If none of a zones
920 920 * processes are bound to the pset, the zone's usage of the pset will
921 921 * be deleted.
922 922 *
923 923 */
924 924 static void
925 925 zsd_mark_pset_usage_start(zsd_pset_t *pset)
926 926 {
927 927 zsd_pset_usage_t *usage;
928 928
929 929 for (usage = list_head(&pset->zsp_usage_list);
930 930 usage != NULL;
931 931 usage = list_next(&pset->zsp_usage_list, usage)) {
932 932 usage->zsu_found = B_FALSE;
933 933 usage->zsu_empty = B_TRUE;
934 934 }
935 935 }
936 936
937 937 /*
938 938 * Mark each pset as not existing. If a pset is found, it will be marked
939 939 * as existing. If a pset is not found, it wil be deleted.
940 940 */
941 941 static void
942 942 zsd_mark_psets_start(zsd_ctl_t *ctl)
943 943 {
944 944 zsd_pset_t *pset;
945 945
946 946 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
947 947 pset = list_next(&ctl->zsctl_psets, pset)) {
948 948 pset->zsp_found = B_FALSE;
949 949 zsd_mark_pset_usage_start(pset);
950 950 }
951 951 }
952 952
953 953 /*
954 954 * A pset was found. Update its information
955 955 */
956 956 static void
957 957 zsd_mark_pset_found(zsd_pset_t *pset, uint_t type, uint64_t online,
958 958 uint64_t size, uint64_t min, uint64_t max, int64_t importance)
959 959 {
960 960 pset->zsp_empty = B_TRUE;
961 961 pset->zsp_deleted = B_FALSE;
962 962
963 963 assert(pset->zsp_found == B_FALSE);
964 964
965 965 /* update pset flags */
966 966 if (pset->zsp_active == B_FALSE)
967 967 /* pset not seen on previous interval. It is new. */
968 968 pset->zsp_new = B_TRUE;
969 969 else
970 970 pset->zsp_new = B_FALSE;
971 971
972 972 pset->zsp_found = B_TRUE;
973 973 pset->zsp_cputype = type;
974 974 pset->zsp_online = online;
975 975 pset->zsp_size = size;
976 976 pset->zsp_min = min;
977 977 pset->zsp_max = max;
978 978 pset->zsp_importance = importance;
979 979 pset->zsp_cpu_shares = 0;
980 980 pset->zsp_scheds = 0;
981 981 pset->zsp_active = B_TRUE;
982 982 }
983 983
984 984 /*
985 985 * A zone's process was found using a pset. Charge the process to the pset and
986 986 * the per-zone data for the pset.
987 987 */
988 988 static void
989 989 zsd_mark_pset_usage_found(zsd_pset_usage_t *usage, uint_t sched)
990 990 {
991 991 zsd_zone_t *zone = usage->zsu_zone;
992 992 zsd_pset_t *pset = usage->zsu_pset;
993 993
994 994 /* Nothing to do if already found */
995 995 if (usage->zsu_found == B_TRUE)
996 996 goto add_stats;
997 997
998 998 usage->zsu_found = B_TRUE;
999 999 usage->zsu_empty = B_FALSE;
1000 1000
1001 1001 usage->zsu_deleted = B_FALSE;
1002 1002 /* update usage flags */
1003 1003 if (usage->zsu_active == B_FALSE)
1004 1004 usage->zsu_new = B_TRUE;
1005 1005 else
1006 1006 usage->zsu_new = B_FALSE;
1007 1007
1008 1008 usage->zsu_scheds = 0;
1009 1009 usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1010 1010 usage->zsu_active = B_TRUE;
1011 1011 pset->zsp_empty = B_FALSE;
1012 1012 zone->zsz_empty = B_FALSE;
1013 1013
1014 1014 add_stats:
1015 1015 /* Detect zone's pset id, and if it is bound to multiple psets */
1016 1016 if (zone->zsz_psetid == ZS_PSET_ERROR)
1017 1017 zone->zsz_psetid = pset->zsp_id;
1018 1018 else if (zone->zsz_psetid != pset->zsp_id)
1019 1019 zone->zsz_psetid = ZS_PSET_MULTI;
1020 1020
1021 1021 usage->zsu_scheds |= sched;
1022 1022 pset->zsp_scheds |= sched;
1023 1023 zone->zsz_scheds |= sched;
1024 1024
1025 1025 /* Record if FSS is co-habitating with conflicting scheduler */
1026 1026 if ((pset->zsp_scheds & ZS_SCHED_FSS) &&
1027 1027 usage->zsu_scheds & (
1028 1028 ZS_SCHED_TS | ZS_SCHED_IA | ZS_SCHED_FX)) {
1029 1029 usage->zsu_scheds |= ZS_SCHED_CONFLICT;
1030 1030
1031 1031 pset->zsp_scheds |= ZS_SCHED_CONFLICT;
1032 1032 }
1033 1033
1034 1034 }
1035 1035
1036 1036 /* Add cpu time for a process to a pset, zone, and system totals */
1037 1037 static void
1038 1038 zsd_add_usage(zsd_ctl_t *ctl, zsd_pset_usage_t *usage, timestruc_t *delta)
1039 1039 {
1040 1040 zsd_system_t *system = ctl->zsctl_system;
1041 1041 zsd_zone_t *zone = usage->zsu_zone;
1042 1042 zsd_pset_t *pset = usage->zsu_pset;
1043 1043
1044 1044 TIMESTRUC_ADD_TIMESTRUC(usage->zsu_cpu_usage, *delta);
1045 1045 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_zones, *delta);
1046 1046 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_cpu_usage, *delta);
1047 1047 TIMESTRUC_ADD_TIMESTRUC(system->zss_cpu_usage_zones, *delta);
1048 1048 }
1049 1049
1050 1050 /* Determine which processor sets have been deleted */
1051 1051 static void
1052 1052 zsd_mark_psets_end(zsd_ctl_t *ctl)
1053 1053 {
1054 1054 zsd_pset_t *pset, *tmp;
1055 1055
1056 1056 /*
1057 1057 * Mark pset as not exists, and deleted if it existed
1058 1058 * previous interval.
1059 1059 */
1060 1060 pset = list_head(&ctl->zsctl_psets);
1061 1061 while (pset != NULL) {
1062 1062 if (pset->zsp_found == B_FALSE) {
1063 1063 pset->zsp_empty = B_TRUE;
1064 1064 if (pset->zsp_deleted == B_TRUE) {
1065 1065 tmp = pset;
1066 1066 pset = list_next(&ctl->zsctl_psets, pset);
1067 1067 list_remove(&ctl->zsctl_psets, tmp);
1068 1068 free(tmp);
1069 1069 ctl->zsctl_npsets--;
1070 1070 continue;
1071 1071 } else {
1072 1072 /* Pset vanished during this interval */
1073 1073 pset->zsp_new = B_FALSE;
1074 1074 pset->zsp_deleted = B_TRUE;
1075 1075 pset->zsp_active = B_TRUE;
1076 1076 }
1077 1077 }
1078 1078 pset = list_next(&ctl->zsctl_psets, pset);
1079 1079 }
1080 1080 }
1081 1081
1082 1082 /* Determine which zones are no longer bound to processor sets */
1083 1083 static void
1084 1084 zsd_mark_pset_usages_end(zsd_ctl_t *ctl)
1085 1085 {
1086 1086 zsd_pset_t *pset;
1087 1087 zsd_zone_t *zone;
1088 1088 zsd_pset_usage_t *usage, *tmp;
1089 1089
1090 1090 /*
1091 1091 * Mark pset as not exists, and deleted if it existed previous
1092 1092 * interval.
1093 1093 */
1094 1094 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1095 1095 pset = list_next(&ctl->zsctl_psets, pset)) {
1096 1096 usage = list_head(&pset->zsp_usage_list);
1097 1097 while (usage != NULL) {
1098 1098 /*
1099 1099 * Mark pset as not exists, and deleted if it existed
1100 1100 * previous interval.
1101 1101 */
1102 1102 if (usage->zsu_found == B_FALSE ||
1103 1103 usage->zsu_zone->zsz_deleted == B_TRUE ||
1104 1104 usage->zsu_pset->zsp_deleted == B_TRUE) {
1105 1105 tmp = usage;
1106 1106 usage = list_next(&pset->zsp_usage_list,
1107 1107 usage);
1108 1108 list_remove(&pset->zsp_usage_list, tmp);
1109 1109 free(tmp);
1110 1110 pset->zsp_nusage--;
1111 1111 ctl->zsctl_npset_usages--;
1112 1112 continue;
1113 1113 } else {
1114 1114 usage->zsu_new = B_FALSE;
1115 1115 usage->zsu_deleted = B_TRUE;
1116 1116 usage->zsu_active = B_TRUE;
1117 1117 }
1118 1118 /* Add cpu shares for usages that are in FSS */
1119 1119 zone = usage->zsu_zone;
1120 1120 if (usage->zsu_scheds & ZS_SCHED_FSS &&
1121 1121 zone->zsz_cpu_shares != ZS_SHARES_UNLIMITED &&
1122 1122 zone->zsz_cpu_shares != 0) {
1123 1123 zone = usage->zsu_zone;
1124 1124 usage->zsu_cpu_shares = zone->zsz_cpu_shares;
1125 1125 pset->zsp_cpu_shares += zone->zsz_cpu_shares;
1126 1126 }
1127 1127 usage = list_next(&pset->zsp_usage_list,
1128 1128 usage);
1129 1129 }
1130 1130 }
1131 1131 }
1132 1132
1133 1133 /* A zone has been found. Update its information */
1134 1134 static void
1135 1135 zsd_mark_zone_found(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t cpu_shares,
1136 1136 uint64_t cpu_cap, uint64_t ram_cap, uint64_t locked_cap,
1137 1137 uint64_t vm_cap, uint64_t processes_cap, uint64_t processes,
1138 1138 uint64_t lwps_cap, uint64_t lwps, uint64_t shm_cap, uint64_t shm,
1139 1139 uint64_t shmids_cap, uint64_t shmids, uint64_t semids_cap,
1140 1140 uint64_t semids, uint64_t msgids_cap, uint64_t msgids, uint64_t lofi_cap,
1141 1141 uint64_t lofi, char *poolname, char *psetname, uint_t sched, uint_t cputype,
1142 1142 uint_t iptype)
1143 1143 {
1144 1144 zsd_system_t *sys = ctl->zsctl_system;
1145 1145
1146 1146 assert(zone->zsz_found == B_FALSE);
1147 1147
1148 1148 /*
1149 1149 * Mark zone as exists, and new if it did not exist in previous
1150 1150 * interval.
1151 1151 */
1152 1152 zone->zsz_found = B_TRUE;
1153 1153 zone->zsz_empty = B_TRUE;
1154 1154 zone->zsz_deleted = B_FALSE;
1155 1155
1156 1156 /*
1157 1157 * Zone is new. Assume zone's properties are the same over entire
1158 1158 * interval.
1159 1159 */
1160 1160 if (zone->zsz_active == B_FALSE)
1161 1161 zone->zsz_new = B_TRUE;
1162 1162 else
1163 1163 zone->zsz_new = B_FALSE;
1164 1164
1165 1165 (void) strlcpy(zone->zsz_pool, poolname, sizeof (zone->zsz_pool));
1166 1166 (void) strlcpy(zone->zsz_pset, psetname, sizeof (zone->zsz_pset));
1167 1167 zone->zsz_default_sched = sched;
1168 1168
1169 1169 /* Schedulers updated later as processes are found */
1170 1170 zone->zsz_scheds = 0;
1171 1171
1172 1172 /* Cpus updated later as psets bound are identified */
1173 1173 zone->zsz_cpus_online = 0;
1174 1174
1175 1175 zone->zsz_cputype = cputype;
1176 1176 zone->zsz_iptype = iptype;
1177 1177 zone->zsz_psetid = ZS_PSET_ERROR;
1178 1178 zone->zsz_cpu_cap = cpu_cap;
1179 1179 zone->zsz_cpu_shares = cpu_shares;
1180 1180 zone->zsz_ram_cap = ram_cap;
1181 1181 zone->zsz_locked_cap = locked_cap;
1182 1182 zone->zsz_vm_cap = vm_cap;
1183 1183 zone->zsz_processes_cap = processes_cap;
1184 1184 zone->zsz_processes = processes;
1185 1185 zone->zsz_lwps_cap = lwps_cap;
1186 1186 zone->zsz_lwps = lwps;
1187 1187 zone->zsz_shm_cap = shm_cap;
1188 1188 zone->zsz_shm = shm;
1189 1189 zone->zsz_shmids_cap = shmids_cap;
1190 1190 zone->zsz_shmids = shmids;
1191 1191 zone->zsz_semids_cap = semids_cap;
1192 1192 zone->zsz_semids = semids;
1193 1193 zone->zsz_msgids_cap = msgids_cap;
1194 1194 zone->zsz_msgids = msgids;
1195 1195 zone->zsz_lofi_cap = lofi_cap;
1196 1196 zone->zsz_lofi = lofi;
1197 1197
1198 1198 sys->zss_processes += processes;
1199 1199 sys->zss_lwps += lwps;
1200 1200 sys->zss_shm += shm;
1201 1201 sys->zss_shmids += shmids;
1202 1202 sys->zss_semids += semids;
1203 1203 sys->zss_msgids += msgids;
1204 1204 sys->zss_lofi += lofi;
1205 1205 zone->zsz_active = B_TRUE;
1206 1206 }
1207 1207
1208 1208
1209 1209 /* Determine which zones have halted */
1210 1210 static void
1211 1211 zsd_mark_zones_end(zsd_ctl_t *ctl)
1212 1212 {
1213 1213 zsd_zone_t *zone, *tmp;
1214 1214
1215 1215 /*
1216 1216 * Mark zone as not existing, or delete if it did not exist in
1217 1217 * previous interval.
1218 1218 */
1219 1219 zone = list_head(&ctl->zsctl_zones);
1220 1220 while (zone != NULL) {
1221 1221 if (zone->zsz_found == B_FALSE) {
1222 1222 zone->zsz_empty = B_TRUE;
1223 1223 if (zone->zsz_deleted == B_TRUE) {
1224 1224 /*
1225 1225 * Zone deleted in prior interval,
1226 1226 * so it no longer exists.
1227 1227 */
1228 1228 tmp = zone;
1229 1229 zone = list_next(&ctl->zsctl_zones, zone);
1230 1230 list_remove(&ctl->zsctl_zones, tmp);
1231 1231 free(tmp);
1232 1232 ctl->zsctl_nzones--;
1233 1233 continue;
1234 1234 } else {
1235 1235 zone->zsz_new = B_FALSE;
1236 1236 zone->zsz_deleted = B_TRUE;
1237 1237 zone->zsz_active = B_TRUE;
1238 1238 }
1239 1239 }
1240 1240 zone = list_next(&ctl->zsctl_zones, zone);
1241 1241 }
1242 1242 }
1243 1243
1244 1244 /*
1245 1245 * Mark cpus as not existing. If a cpu is found, it will be updated. If
1246 1246 * a cpu is not found, then it must have gone offline, so it will be
1247 1247 * deleted.
1248 1248 *
1249 1249 * The kstat tracking data is rolled so that the usage since the previous
1250 1250 * interval can be determined.
1251 1251 */
1252 1252 static void
1253 1253 zsd_mark_cpus_start(zsd_ctl_t *ctl, boolean_t roll)
1254 1254 {
1255 1255 zsd_cpu_t *cpu;
1256 1256
1257 1257 /*
1258 1258 * Mark all cpus as not existing. As cpus are found, they will
1259 1259 * be marked as existing.
1260 1260 */
1261 1261 for (cpu = list_head(&ctl->zsctl_cpus); cpu != NULL;
1262 1262 cpu = list_next(&ctl->zsctl_cpus, cpu)) {
1263 1263 cpu->zsc_found = B_FALSE;
1264 1264 if (cpu->zsc_active == B_TRUE && roll) {
1265 1265 cpu->zsc_psetid_prev = cpu->zsc_psetid;
1266 1266 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1267 1267 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1268 1268 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1269 1269 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1270 1270 }
1271 1271 }
1272 1272 }
1273 1273
1274 1274 /*
1275 1275 * An array the size of the maximum number of cpus is kept. Within this array
1276 1276 * a list of the online cpus is maintained.
1277 1277 */
1278 1278 zsd_cpu_t *
1279 1279 zsd_lookup_insert_cpu(zsd_ctl_t *ctl, processorid_t cpuid)
1280 1280 {
1281 1281 zsd_cpu_t *cpu;
1282 1282
1283 1283 assert(cpuid < ctl->zsctl_maxcpuid);
1284 1284 cpu = &(ctl->zsctl_cpu_array[cpuid]);
1285 1285 assert(cpuid == cpu->zsc_id);
1286 1286
1287 1287 if (cpu->zsc_allocated == B_FALSE) {
1288 1288 cpu->zsc_allocated = B_TRUE;
1289 1289 list_insert_tail(&ctl->zsctl_cpus, cpu);
1290 1290 }
1291 1291 return (cpu);
1292 1292 }
1293 1293
1294 1294 /* A cpu has been found. Update its information */
1295 1295 static void
1296 1296 zsd_mark_cpu_found(zsd_cpu_t *cpu, zsd_pset_t *pset, psetid_t psetid)
1297 1297 {
1298 1298 /*
1299 1299 * legacy processor sets, the cpu may move while zonestatd is
1300 1300 * inspecting, causing it to be found twice. In this case, just
1301 1301 * leave cpu in the first processor set in which it was found.
1302 1302 */
1303 1303 if (cpu->zsc_found == B_TRUE)
1304 1304 return;
1305 1305
1306 1306 /* Mark cpu as online */
1307 1307 cpu->zsc_found = B_TRUE;
1308 1308 cpu->zsc_offlined = B_FALSE;
1309 1309 cpu->zsc_pset = pset;
1310 1310 /*
1311 1311 * cpu is newly online.
1312 1312 */
1313 1313 if (cpu->zsc_active == B_FALSE) {
1314 1314 /*
1315 1315 * Cpu is newly online.
1316 1316 */
1317 1317 cpu->zsc_onlined = B_TRUE;
1318 1318 cpu->zsc_psetid = psetid;
1319 1319 cpu->zsc_psetid_prev = psetid;
1320 1320 } else {
1321 1321 /*
1322 1322 * cpu online during previous interval. Save properties at
1323 1323 * start of interval
1324 1324 */
1325 1325 cpu->zsc_onlined = B_FALSE;
1326 1326 cpu->zsc_psetid = psetid;
1327 1327
1328 1328 }
1329 1329 cpu->zsc_active = B_TRUE;
1330 1330 }
1331 1331
1332 1332 /* Remove all offlined cpus from the list of tracked cpus */
1333 1333 static void
1334 1334 zsd_mark_cpus_end(zsd_ctl_t *ctl)
1335 1335 {
1336 1336 zsd_cpu_t *cpu, *tmp;
1337 1337 int id;
1338 1338
1339 1339 /* Mark cpu as online or offline */
1340 1340 cpu = list_head(&ctl->zsctl_cpus);
1341 1341 while (cpu != NULL) {
1342 1342 if (cpu->zsc_found == B_FALSE) {
1343 1343 if (cpu->zsc_offlined == B_TRUE) {
1344 1344 /*
1345 1345 * cpu offlined in prior interval. It is gone.
1346 1346 */
1347 1347 tmp = cpu;
1348 1348 cpu = list_next(&ctl->zsctl_cpus, cpu);
1349 1349 list_remove(&ctl->zsctl_cpus, tmp);
1350 1350 /* Clear structure for future use */
1351 1351 id = tmp->zsc_id;
1352 1352 bzero(tmp, sizeof (zsd_cpu_t));
1353 1353 tmp->zsc_id = id;
1354 1354 tmp->zsc_allocated = B_FALSE;
1355 1355 tmp->zsc_psetid = ZS_PSET_ERROR;
1356 1356 tmp->zsc_psetid_prev = ZS_PSET_ERROR;
1357 1357
1358 1358 } else {
1359 1359 /*
1360 1360 * cpu online at start of interval. Treat
1361 1361 * as still online, since it was online for
1362 1362 * some portion of the interval.
1363 1363 */
1364 1364 cpu->zsc_offlined = B_TRUE;
1365 1365 cpu->zsc_onlined = B_FALSE;
1366 1366 cpu->zsc_active = B_TRUE;
1367 1367 cpu->zsc_psetid = cpu->zsc_psetid_prev;
1368 1368 cpu->zsc_pset = NULL;
1369 1369 }
1370 1370 }
1371 1371 cpu = list_next(&ctl->zsctl_cpus, cpu);
1372 1372 }
1373 1373 }
1374 1374
1375 1375 /* Some utility functions for managing the list of processor sets */
1376 1376 static zsd_pset_t *
1377 1377 zsd_lookup_pset_byid(zsd_ctl_t *ctl, psetid_t psetid)
1378 1378 {
1379 1379 zsd_pset_t *pset;
1380 1380
1381 1381 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1382 1382 pset = list_next(&ctl->zsctl_psets, pset)) {
1383 1383 if (pset->zsp_id == psetid)
1384 1384 return (pset);
1385 1385 }
1386 1386 return (NULL);
1387 1387 }
1388 1388
1389 1389 static zsd_pset_t *
1390 1390 zsd_lookup_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1391 1391 {
1392 1392 zsd_pset_t *pset;
1393 1393
1394 1394 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1395 1395 pset = list_next(&ctl->zsctl_psets, pset)) {
1396 1396 if (strcmp(pset->zsp_name, psetname) == 0) {
1397 1397 if (psetid != -1)
1398 1398 pset->zsp_id = psetid;
1399 1399 return (pset);
1400 1400 }
1401 1401 }
1402 1402 return (NULL);
1403 1403 }
1404 1404
1405 1405 static zsd_pset_t *
1406 1406 zsd_allocate_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1407 1407 {
1408 1408 zsd_pset_t *pset;
1409 1409
1410 1410 if ((pset = (zsd_pset_t *)calloc(1, sizeof (zsd_pset_t))) == NULL)
1411 1411 return (NULL);
1412 1412
1413 1413 (void) strlcpy(pset->zsp_name, psetname, sizeof (pset->zsp_name));
1414 1414 pset->zsp_id = psetid;
1415 1415 pset->zsp_found = B_FALSE;
1416 1416 /*
1417 1417 * Allocate as deleted so if not found in first pass, pset is deleted
1418 1418 * from list. This can happen if pset is returned by pset_list, but
1419 1419 * is destroyed before first attempt to fetch pset details.
1420 1420 */
1421 1421 list_create(&pset->zsp_usage_list, sizeof (zsd_pset_usage_t),
1422 1422 offsetof(zsd_pset_usage_t, zsu_next));
1423 1423
1424 1424 pset->zsp_hrstart = g_hrnow;
1425 1425 pset->zsp_deleted = B_TRUE;
1426 1426 pset->zsp_empty = B_TRUE;
1427 1427 ctl->zsctl_npsets++;
1428 1428
1429 1429 return (pset);
1430 1430 }
1431 1431
1432 1432 static zsd_pset_t *
1433 1433 zsd_lookup_insert_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1434 1434 {
1435 1435 zsd_pset_t *pset, *tmp;
1436 1436
1437 1437 if ((pset = zsd_lookup_pset(ctl, psetname, psetid)) != NULL)
1438 1438 return (pset);
1439 1439
1440 1440 if ((pset = zsd_allocate_pset(ctl, psetname, psetid)) == NULL)
1441 1441 return (NULL);
1442 1442
1443 1443 /* Insert sorted by psetname */
1444 1444 tmp = list_head(&ctl->zsctl_psets);
1445 1445 while (tmp != NULL && strcmp(psetname, tmp->zsp_name) > 0)
1446 1446 tmp = list_next(&ctl->zsctl_psets, tmp);
1447 1447
1448 1448 list_insert_before(&ctl->zsctl_psets, tmp, pset);
1449 1449 return (pset);
1450 1450 }
1451 1451
1452 1452 /* Some utility functions for managing the list of zones using each pset */
1453 1453 static zsd_pset_usage_t *
1454 1454 zsd_lookup_usage(zsd_pset_t *pset, zsd_zone_t *zone)
1455 1455 {
1456 1456 zsd_pset_usage_t *usage;
1457 1457
1458 1458 for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
1459 1459 usage = list_next(&pset->zsp_usage_list, usage))
1460 1460 if (usage->zsu_zone == zone)
1461 1461 return (usage);
1462 1462
1463 1463 return (NULL);
1464 1464 }
1465 1465
1466 1466 static zsd_pset_usage_t *
1467 1467 zsd_allocate_pset_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1468 1468 {
1469 1469 zsd_pset_usage_t *usage;
1470 1470
1471 1471 if ((usage = (zsd_pset_usage_t *)calloc(1, sizeof (zsd_pset_usage_t)))
1472 1472 == NULL)
1473 1473 return (NULL);
1474 1474
1475 1475 list_link_init(&usage->zsu_next);
1476 1476 usage->zsu_zone = zone;
1477 1477 usage->zsu_zoneid = zone->zsz_id;
1478 1478 usage->zsu_pset = pset;
1479 1479 usage->zsu_found = B_FALSE;
1480 1480 usage->zsu_active = B_FALSE;
1481 1481 usage->zsu_new = B_FALSE;
1482 1482 /*
1483 1483 * Allocate as not deleted. If a process is found in a pset for
1484 1484 * a zone, the usage will not be deleted until at least the next
1485 1485 * interval.
1486 1486 */
1487 1487 usage->zsu_start = g_now;
1488 1488 usage->zsu_hrstart = g_hrnow;
1489 1489 usage->zsu_deleted = B_FALSE;
1490 1490 usage->zsu_empty = B_TRUE;
1491 1491 usage->zsu_scheds = 0;
1492 1492 usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1493 1493
1494 1494 ctl->zsctl_npset_usages++;
1495 1495 pset->zsp_nusage++;
1496 1496
1497 1497 return (usage);
1498 1498 }
1499 1499
1500 1500 static zsd_pset_usage_t *
1501 1501 zsd_lookup_insert_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1502 1502 {
1503 1503 zsd_pset_usage_t *usage, *tmp;
1504 1504
1505 1505 if ((usage = zsd_lookup_usage(pset, zone))
1506 1506 != NULL)
1507 1507 return (usage);
1508 1508
1509 1509 if ((usage = zsd_allocate_pset_usage(ctl, pset, zone)) == NULL)
1510 1510 return (NULL);
1511 1511
1512 1512 tmp = list_head(&pset->zsp_usage_list);
1513 1513 while (tmp != NULL && strcmp(zone->zsz_name, tmp->zsu_zone->zsz_name)
1514 1514 > 0)
1515 1515 tmp = list_next(&pset->zsp_usage_list, tmp);
1516 1516
1517 1517 list_insert_before(&pset->zsp_usage_list, tmp, usage);
1518 1518 return (usage);
1519 1519 }
1520 1520
1521 1521 static void
1522 1522 zsd_refresh_system(zsd_ctl_t *ctl)
1523 1523 {
1524 1524 zsd_system_t *system = ctl->zsctl_system;
1525 1525
1526 1526 /* Re-count these values each interval */
1527 1527 system->zss_processes = 0;
1528 1528 system->zss_lwps = 0;
1529 1529 system->zss_shm = 0;
1530 1530 system->zss_shmids = 0;
1531 1531 system->zss_semids = 0;
1532 1532 system->zss_msgids = 0;
1533 1533 system->zss_lofi = 0;
1534 1534 }
1535 1535
1536 1536
1537 1537 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */
1538 1538 static void
1539 1539 zsd_update_cpu_stats(zsd_ctl_t *ctl, zsd_cpu_t *cpu)
1540 1540 {
1541 1541 zsd_system_t *sys;
1542 1542 processorid_t cpuid;
1543 1543 zsd_pset_t *pset_prev;
1544 1544 zsd_pset_t *pset;
1545 1545 kstat_t *kstat;
1546 1546 kstat_named_t *knp;
1547 1547 kid_t kid;
1548 1548 uint64_t idle, intr, kern, user;
1549 1549
1550 1550 sys = ctl->zsctl_system;
1551 1551 pset = cpu->zsc_pset;
1552 1552 knp = NULL;
1553 1553 kid = -1;
1554 1554 cpuid = cpu->zsc_id;
1555 1555
1556 1556 /* Get the cpu time totals for this cpu */
1557 1557 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "cpu", cpuid, "sys");
1558 1558 if (kstat == NULL)
1559 1559 return;
1560 1560
1561 1561 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
1562 1562 if (kid == -1)
1563 1563 return;
1564 1564
1565 1565 knp = kstat_data_lookup(kstat, "cpu_nsec_idle");
1566 1566 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1567 1567 return;
1568 1568
1569 1569 idle = knp->value.ui64;
1570 1570
1571 1571 knp = kstat_data_lookup(kstat, "cpu_nsec_kernel");
1572 1572 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1573 1573 return;
1574 1574
1575 1575 kern = knp->value.ui64;
1576 1576
1577 1577 knp = kstat_data_lookup(kstat, "cpu_nsec_user");
1578 1578 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1579 1579 return;
1580 1580
1581 1581 user = knp->value.ui64;
1582 1582
1583 1583 /*
1584 1584 * Tracking intr time per cpu just exists for future enhancements.
1585 1585 * The value is presently always zero.
1586 1586 */
1587 1587 intr = 0;
1588 1588 cpu->zsc_nsec_idle = idle;
1589 1589 cpu->zsc_nsec_intr = intr;
1590 1590 cpu->zsc_nsec_kern = kern;
1591 1591 cpu->zsc_nsec_user = user;
1592 1592
1593 1593 if (cpu->zsc_onlined == B_TRUE) {
1594 1594 /*
1595 1595 * cpu is newly online. There is no reference value,
1596 1596 * so just record its current stats for comparison
1597 1597 * on next stat read.
1598 1598 */
1599 1599 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1600 1600 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1601 1601 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1602 1602 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1603 1603 return;
1604 1604 }
1605 1605
1606 1606 /*
1607 1607 * Calculate relative time since previous refresh.
1608 1608 * Paranoia. Don't let time go backwards.
1609 1609 */
1610 1610 idle = intr = kern = user = 0;
1611 1611 if (cpu->zsc_nsec_idle > cpu->zsc_nsec_idle_prev)
1612 1612 idle = cpu->zsc_nsec_idle - cpu->zsc_nsec_idle_prev;
1613 1613
1614 1614 if (cpu->zsc_nsec_intr > cpu->zsc_nsec_intr_prev)
1615 1615 intr = cpu->zsc_nsec_intr - cpu->zsc_nsec_intr_prev;
1616 1616
1617 1617 if (cpu->zsc_nsec_kern > cpu->zsc_nsec_kern_prev)
1618 1618 kern = cpu->zsc_nsec_kern - cpu->zsc_nsec_kern_prev;
1619 1619
1620 1620 if (cpu->zsc_nsec_user > cpu->zsc_nsec_user_prev)
1621 1621 user = cpu->zsc_nsec_user - cpu->zsc_nsec_user_prev;
1622 1622
1623 1623 /* Update totals for cpu usage */
1624 1624 TIMESTRUC_ADD_NANOSEC(cpu->zsc_idle, idle);
1625 1625 TIMESTRUC_ADD_NANOSEC(cpu->zsc_intr, intr);
1626 1626 TIMESTRUC_ADD_NANOSEC(cpu->zsc_kern, kern);
1627 1627 TIMESTRUC_ADD_NANOSEC(cpu->zsc_user, user);
1628 1628
1629 1629 /*
1630 1630 * Add cpu's stats to its pset if it is known to be in
1631 1631 * the pset since previous read.
1632 1632 */
1633 1633 if (cpu->zsc_psetid == cpu->zsc_psetid_prev ||
1634 1634 cpu->zsc_psetid_prev == ZS_PSET_ERROR ||
1635 1635 (pset_prev = zsd_lookup_pset_byid(ctl,
1636 1636 cpu->zsc_psetid_prev)) == NULL) {
1637 1637 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, idle);
1638 1638 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, intr);
1639 1639 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, kern);
1640 1640 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, user);
1641 1641 } else {
1642 1642 /*
1643 1643 * Last pset was different than current pset.
1644 1644 * Best guess is to split usage between the two.
1645 1645 */
1646 1646 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_idle, idle / 2);
1647 1647 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_intr, intr / 2);
1648 1648 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_kern, kern / 2);
1649 1649 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_user, user / 2);
1650 1650
1651 1651 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle,
1652 1652 (idle / 2) + (idle % 2));
1653 1653 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr,
1654 1654 (intr / 2) + (intr % 2));
1655 1655 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern,
1656 1656 (kern / 2) + (kern % 2));
1657 1657 TIMESTRUC_ADD_NANOSEC(pset->zsp_user,
1658 1658 (user / 2) + (user % 2));
1659 1659 }
1660 1660 TIMESTRUC_ADD_NANOSEC(sys->zss_idle, idle);
1661 1661 TIMESTRUC_ADD_NANOSEC(sys->zss_intr, intr);
1662 1662 TIMESTRUC_ADD_NANOSEC(sys->zss_kern, kern);
1663 1663 TIMESTRUC_ADD_NANOSEC(sys->zss_user, user);
1664 1664 }
1665 1665
1666 1666 /* Determine the details of a processor set by pset_id */
1667 1667 static int
1668 1668 zsd_get_pool_pset(zsd_ctl_t *ctl, psetid_t psetid, char *psetname,
1669 1669 size_t namelen, uint_t *cputype, uint64_t *online, uint64_t *size,
1670 1670 uint64_t *min, uint64_t *max, int64_t *importance)
1671 1671 {
1672 1672 uint_t old, num;
1673 1673
1674 1674 pool_conf_t *conf = ctl->zsctl_pool_conf;
1675 1675 pool_value_t **vals = ctl->zsctl_pool_vals;
1676 1676 pool_resource_t **res_list = NULL;
1677 1677 pool_resource_t *pset;
1678 1678 pool_component_t **cpus = NULL;
1679 1679 processorid_t *cache;
1680 1680 const char *string;
1681 1681 uint64_t uint64;
1682 1682 int64_t int64;
1683 1683 int i, ret, type;
1684 1684
1685 1685 if (ctl->zsctl_pool_status == POOL_DISABLED) {
1686 1686
1687 1687 /*
1688 1688 * Inspect legacy psets
1689 1689 */
1690 1690 for (;;) {
1691 1691 old = num = ctl->zsctl_cpu_ncache;
1692 1692 ret = pset_info(psetid, &type, &num,
1693 1693 ctl->zsctl_cpu_cache);
1694 1694 if (ret < 0) {
1695 1695 /* pset is gone. Tell caller to retry */
1696 1696 errno = EINTR;
1697 1697 return (-1);
1698 1698 }
1699 1699 if (num <= old) {
1700 1700 /* Success */
1701 1701 break;
1702 1702 }
1703 1703 if ((cache = (processorid_t *)realloc(
1704 1704 ctl->zsctl_cpu_cache, num *
1705 1705 sizeof (processorid_t))) != NULL) {
1706 1706 ctl->zsctl_cpu_ncache = num;
1707 1707 ctl->zsctl_cpu_cache = cache;
1708 1708 } else {
1709 1709 /*
1710 1710 * Could not allocate to get new cpu list.
1711 1711 */
1712 1712 zsd_warn(gettext(
1713 1713 "Could not allocate for cpu list"));
1714 1714 errno = ENOMEM;
1715 1715 return (-1);
1716 1716 }
1717 1717 }
1718 1718 /*
1719 1719 * Old school pset. Just make min and max equal
1720 1720 * to its size
1721 1721 */
1722 1722 if (psetid == ZS_PSET_DEFAULT) {
1723 1723 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
1724 1724 (void) strlcpy(psetname, "pset_default", namelen);
1725 1725 } else {
1726 1726 *cputype = ZS_CPUTYPE_PSRSET_PSET;
1727 1727 (void) snprintf(psetname, namelen,
1728 1728 "SUNWlegacy_pset_%d", psetid);
1729 1729 }
1730 1730
1731 1731 /*
1732 1732 * Just treat legacy pset as a simple pool pset
1733 1733 */
1734 1734 *online = num;
1735 1735 *size = num;
1736 1736 *min = num;
1737 1737 *max = num;
1738 1738 *importance = 1;
1739 1739
1740 1740 return (0);
1741 1741 }
1742 1742
1743 1743 /* Look up the pool pset using the pset id */
1744 1744 res_list = NULL;
1745 1745 pool_value_set_int64(vals[1], psetid);
1746 1746 if (pool_value_set_name(vals[1], "pset.sys_id")
1747 1747 != PO_SUCCESS)
1748 1748 goto err;
1749 1749
1750 1750 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1751 1751 goto err;
1752 1752 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1753 1753 goto err;
1754 1754 if ((res_list = pool_query_resources(conf, &num, vals)) == NULL)
1755 1755 goto err;
1756 1756 if (num != 1)
1757 1757 goto err;
1758 1758 pset = res_list[0];
1759 1759 free(res_list);
1760 1760 res_list = NULL;
1761 1761 if (pool_get_property(conf, pool_resource_to_elem(conf, pset),
1762 1762 "pset.name", vals[0]) != POC_STRING ||
1763 1763 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1764 1764 goto err;
1765 1765
1766 1766 (void) strlcpy(psetname, string, namelen);
1767 1767 if (strncmp(psetname, "SUNWtmp", strlen("SUNWtmp")) == 0)
1768 1768 *cputype = ZS_CPUTYPE_DEDICATED;
1769 1769 else if (psetid == ZS_PSET_DEFAULT)
1770 1770 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
1771 1771 else
1772 1772 *cputype = ZS_CPUTYPE_POOL_PSET;
1773 1773
1774 1774 /* Get size, min, max, and importance */
1775 1775 if (pool_get_property(conf, pool_resource_to_elem(conf,
1776 1776 pset), "pset.size", vals[0]) == POC_UINT &&
1777 1777 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1778 1778 *size = uint64;
1779 1779 else
1780 1780 *size = 0;
1781 1781
1782 1782 /* Get size, min, max, and importance */
1783 1783 if (pool_get_property(conf, pool_resource_to_elem(conf,
1784 1784 pset), "pset.min", vals[0]) == POC_UINT &&
1785 1785 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1786 1786 *min = uint64;
1787 1787 else
1788 1788 *min = 0;
1789 1789 if (*min >= ZSD_PSET_UNLIMITED)
1790 1790 *min = ZS_LIMIT_NONE;
1791 1791
1792 1792 if (pool_get_property(conf, pool_resource_to_elem(conf,
1793 1793 pset), "pset.max", vals[0]) == POC_UINT &&
1794 1794 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1795 1795 *max = uint64;
1796 1796 else
1797 1797 *max = ZS_LIMIT_NONE;
1798 1798
1799 1799 if (*max >= ZSD_PSET_UNLIMITED)
1800 1800 *max = ZS_LIMIT_NONE;
1801 1801
1802 1802 if (pool_get_property(conf, pool_resource_to_elem(conf,
1803 1803 pset), "pset.importance", vals[0]) == POC_INT &&
1804 1804 pool_value_get_int64(vals[0], &int64) == PO_SUCCESS)
1805 1805 *importance = int64;
1806 1806 else
1807 1807 *importance = (uint64_t)1;
1808 1808
1809 1809 *online = 0;
1810 1810 if (*size == 0)
1811 1811 return (0);
1812 1812
1813 1813 /* get cpus */
1814 1814 cpus = pool_query_resource_components(conf, pset, &num, NULL);
1815 1815 if (cpus == NULL)
1816 1816 goto err;
1817 1817
1818 1818 /* Make sure there is space for cpu id list */
1819 1819 if (num > ctl->zsctl_cpu_ncache) {
1820 1820 if ((cache = (processorid_t *)realloc(
1821 1821 ctl->zsctl_cpu_cache, num *
1822 1822 sizeof (processorid_t))) != NULL) {
1823 1823 ctl->zsctl_cpu_ncache = num;
1824 1824 ctl->zsctl_cpu_cache = cache;
1825 1825 } else {
1826 1826 /*
1827 1827 * Could not allocate to get new cpu list.
1828 1828 */
1829 1829 zsd_warn(gettext(
1830 1830 "Could not allocate for cpu list"));
1831 1831 goto err;
1832 1832 }
1833 1833 }
1834 1834
1835 1835 /* count the online cpus */
1836 1836 for (i = 0; i < num; i++) {
1837 1837 if (pool_get_property(conf, pool_component_to_elem(
1838 1838 conf, cpus[i]), "cpu.status", vals[0]) != POC_STRING ||
1839 1839 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1840 1840 goto err;
1841 1841
1842 1842 if (strcmp(string, "on-line") != 0 &&
1843 1843 strcmp(string, "no-intr") != 0)
1844 1844 continue;
1845 1845
1846 1846 if (pool_get_property(conf, pool_component_to_elem(
1847 1847 conf, cpus[i]), "cpu.sys_id", vals[0]) != POC_INT ||
1848 1848 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
1849 1849 goto err;
1850 1850
1851 1851 (*online)++;
1852 1852 ctl->zsctl_cpu_cache[i] = (psetid_t)int64;
1853 1853 }
1854 1854 free(cpus);
1855 1855 return (0);
1856 1856 err:
1857 1857 if (res_list != NULL)
1858 1858 free(res_list);
1859 1859 if (cpus != NULL)
1860 1860 free(cpus);
1861 1861
1862 1862 /*
1863 1863 * The pools operations should succeed since the conf is a consistent
1864 1864 * snapshot. Tell caller there is no need to retry.
1865 1865 */
1866 1866 errno = EINVAL;
1867 1867 return (-1);
1868 1868 }
1869 1869
1870 1870 /*
1871 1871 * Update the current list of processor sets.
1872 1872 * This also updates the list of online cpus, and each cpu's pset membership.
1873 1873 */
1874 1874 static void
1875 1875 zsd_refresh_psets(zsd_ctl_t *ctl)
1876 1876 {
1877 1877 int i, j, ret, state;
1878 1878 uint_t old, num;
1879 1879 uint_t cputype;
1880 1880 int64_t sys_id, importance;
1881 1881 uint64_t online, size, min, max;
1882 1882 zsd_system_t *system;
1883 1883 zsd_pset_t *pset;
1884 1884 zsd_cpu_t *cpu;
1885 1885 psetid_t *cache;
1886 1886 char psetname[ZS_PSETNAME_MAX];
1887 1887 processorid_t cpuid;
1888 1888 pool_value_t *pv_save = NULL;
1889 1889 pool_resource_t **res_list = NULL;
1890 1890 pool_resource_t *res;
1891 1891 pool_value_t **vals;
1892 1892 pool_conf_t *conf;
1893 1893 boolean_t roll_cpus = B_TRUE;
1894 1894
1895 1895 /* Zero cpu counters to recount them */
1896 1896 system = ctl->zsctl_system;
1897 1897 system->zss_ncpus = 0;
1898 1898 system->zss_ncpus_online = 0;
1899 1899 retry:
1900 1900 ret = pool_get_status(&state);
1901 1901 if (ret == 0 && state == POOL_ENABLED) {
1902 1902
1903 1903 conf = ctl->zsctl_pool_conf;
1904 1904 vals = ctl->zsctl_pool_vals;
1905 1905 pv_save = vals[1];
1906 1906 vals[1] = NULL;
1907 1907
1908 1908 if (ctl->zsctl_pool_status == POOL_DISABLED) {
1909 1909 if (pool_conf_open(ctl->zsctl_pool_conf,
1910 1910 pool_dynamic_location(), PO_RDONLY) == 0) {
1911 1911 ctl->zsctl_pool_status = POOL_ENABLED;
1912 1912 ctl->zsctl_pool_changed = POU_PSET;
1913 1913 }
1914 1914 } else {
1915 1915 ctl->zsctl_pool_changed = 0;
1916 1916 ret = pool_conf_update(ctl->zsctl_pool_conf,
1917 1917 &(ctl->zsctl_pool_changed));
1918 1918 if (ret < 0) {
1919 1919 /* Pools must have become disabled */
1920 1920 (void) pool_conf_close(ctl->zsctl_pool_conf);
1921 1921 ctl->zsctl_pool_status = POOL_DISABLED;
1922 1922 if (pool_error() == POE_SYSTEM && errno ==
1923 1923 ENOTACTIVE)
1924 1924 goto retry;
1925 1925
1926 1926 zsd_warn(gettext(
1927 1927 "Unable to update pool configuration"));
1928 1928 /* Not able to get pool info. Don't update. */
1929 1929 goto err;
1930 1930 }
1931 1931 }
1932 1932 /* Get the list of psets using libpool */
1933 1933 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1934 1934 goto err;
1935 1935
1936 1936 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1937 1937 goto err;
1938 1938 if ((res_list = pool_query_resources(conf, &num, vals))
1939 1939 == NULL)
1940 1940 goto err;
1941 1941
1942 1942 if (num > ctl->zsctl_pset_ncache) {
1943 1943 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1944 1944 (num) * sizeof (psetid_t))) == NULL) {
1945 1945 goto err;
1946 1946 }
1947 1947 ctl->zsctl_pset_ncache = num;
1948 1948 ctl->zsctl_pset_cache = cache;
1949 1949 }
1950 1950 /* Save the pset id of each pset */
1951 1951 for (i = 0; i < num; i++) {
1952 1952 res = res_list[i];
1953 1953 if (pool_get_property(conf, pool_resource_to_elem(conf,
1954 1954 res), "pset.sys_id", vals[0]) != POC_INT ||
1955 1955 pool_value_get_int64(vals[0], &sys_id)
1956 1956 != PO_SUCCESS)
1957 1957 goto err;
1958 1958 ctl->zsctl_pset_cache[i] = (int)sys_id;
1959 1959 }
1960 1960 vals[1] = pv_save;
1961 1961 pv_save = NULL;
1962 1962 } else {
1963 1963 if (ctl->zsctl_pool_status == POOL_ENABLED) {
1964 1964 (void) pool_conf_close(ctl->zsctl_pool_conf);
1965 1965 ctl->zsctl_pool_status = POOL_DISABLED;
1966 1966 }
1967 1967 /* Get the pset list using legacy psets */
1968 1968 for (;;) {
1969 1969 old = num = ctl->zsctl_pset_ncache;
1970 1970 (void) pset_list(ctl->zsctl_pset_cache, &num);
1971 1971 if ((num + 1) <= old) {
1972 1972 break;
1973 1973 }
1974 1974 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1975 1975 (num + 1) * sizeof (psetid_t))) != NULL) {
1976 1976 ctl->zsctl_pset_ncache = num + 1;
1977 1977 ctl->zsctl_pset_cache = cache;
1978 1978 } else {
1979 1979 /*
1980 1980 * Could not allocate to get new pset list.
1981 1981 * Give up
1982 1982 */
1983 1983 return;
1984 1984 }
1985 1985 }
1986 1986 /* Add the default pset to list */
1987 1987 ctl->zsctl_pset_cache[num] = ctl->zsctl_pset_cache[0];
1988 1988 ctl->zsctl_pset_cache[0] = ZS_PSET_DEFAULT;
1989 1989 num++;
1990 1990 }
1991 1991 psets_changed:
1992 1992 zsd_mark_cpus_start(ctl, roll_cpus);
1993 1993 zsd_mark_psets_start(ctl);
1994 1994 roll_cpus = B_FALSE;
1995 1995
1996 1996 /* Refresh cpu membership of all psets */
1997 1997 for (i = 0; i < num; i++) {
1998 1998
1999 1999 /* Get pool pset information */
2000 2000 sys_id = ctl->zsctl_pset_cache[i];
2001 2001 if (zsd_get_pool_pset(ctl, sys_id, psetname, sizeof (psetname),
2002 2002 &cputype, &online, &size, &min, &max, &importance)
2003 2003 != 0) {
2004 2004 if (errno == EINTR)
2005 2005 goto psets_changed;
2006 2006 zsd_warn(gettext("Failed to get info for pset %d"),
2007 2007 sys_id);
2008 2008 continue;
2009 2009 }
2010 2010
2011 2011 system->zss_ncpus += size;
2012 2012 system->zss_ncpus_online += online;
2013 2013
2014 2014 pset = zsd_lookup_insert_pset(ctl, psetname,
2015 2015 ctl->zsctl_pset_cache[i]);
2016 2016
2017 2017 /* update pset info */
2018 2018 zsd_mark_pset_found(pset, cputype, online, size, min,
2019 2019 max, importance);
2020 2020
2021 2021 /* update each cpu in pset */
2022 2022 for (j = 0; j < pset->zsp_online; j++) {
2023 2023 cpuid = ctl->zsctl_cpu_cache[j];
2024 2024 cpu = zsd_lookup_insert_cpu(ctl, cpuid);
2025 2025 zsd_mark_cpu_found(cpu, pset, sys_id);
2026 2026 }
2027 2027 }
2028 2028 err:
2029 2029 if (res_list != NULL)
2030 2030 free(res_list);
2031 2031 if (pv_save != NULL)
2032 2032 vals[1] = pv_save;
2033 2033 }
2034 2034
2035 2035
2036 2036
2037 2037 /*
2038 2038 * Fetch the current pool and pset name for the given zone.
2039 2039 */
2040 2040 static void
2041 2041 zsd_get_zone_pool_pset(zsd_ctl_t *ctl, zsd_zone_t *zone,
2042 2042 char *pool, int poollen, char *pset, int psetlen, uint_t *cputype)
2043 2043 {
2044 2044 poolid_t poolid;
2045 2045 pool_t **pools = NULL;
2046 2046 pool_resource_t **res_list = NULL;
2047 2047 char poolname[ZS_POOLNAME_MAX];
2048 2048 char psetname[ZS_PSETNAME_MAX];
2049 2049 pool_conf_t *conf = ctl->zsctl_pool_conf;
2050 2050 pool_value_t *pv_save = NULL;
2051 2051 pool_value_t **vals = ctl->zsctl_pool_vals;
2052 2052 const char *string;
2053 2053 int ret;
2054 2054 int64_t int64;
2055 2055 uint_t num;
2056 2056
2057 2057 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_POOLID,
2058 2058 &poolid, sizeof (poolid));
2059 2059 if (ret < 0)
2060 2060 goto lookup_done;
2061 2061
2062 2062 pv_save = vals[1];
2063 2063 vals[1] = NULL;
2064 2064 pools = NULL;
2065 2065 res_list = NULL;
2066 2066
2067 2067 /* Default values if lookup fails */
2068 2068 (void) strlcpy(poolname, "pool_default", sizeof (poolname));
2069 2069 (void) strlcpy(psetname, "pset_default", sizeof (poolname));
2070 2070 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
2071 2071
2072 2072 /* no dedicated cpu if pools are disabled */
2073 2073 if (ctl->zsctl_pool_status == POOL_DISABLED)
2074 2074 goto lookup_done;
2075 2075
2076 2076 /* Get the pool name using the id */
2077 2077 pool_value_set_int64(vals[0], poolid);
2078 2078 if (pool_value_set_name(vals[0], "pool.sys_id") != PO_SUCCESS)
2079 2079 goto lookup_done;
2080 2080
2081 2081 if ((pools = pool_query_pools(conf, &num, vals)) == NULL)
2082 2082 goto lookup_done;
2083 2083
2084 2084 if (num != 1)
2085 2085 goto lookup_done;
2086 2086
2087 2087 if (pool_get_property(conf, pool_to_elem(conf, pools[0]),
2088 2088 "pool.name", vals[0]) != POC_STRING ||
2089 2089 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2090 2090 goto lookup_done;
2091 2091 (void) strlcpy(poolname, (char *)string, sizeof (poolname));
2092 2092
2093 2093 /* Get the name of the pset for the pool */
2094 2094 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
2095 2095 goto lookup_done;
2096 2096
2097 2097 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
2098 2098 goto lookup_done;
2099 2099
2100 2100 if ((res_list = pool_query_pool_resources(conf, pools[0], &num, vals))
2101 2101 == NULL)
2102 2102 goto lookup_done;
2103 2103
2104 2104 if (num != 1)
2105 2105 goto lookup_done;
2106 2106
2107 2107 if (pool_get_property(conf, pool_resource_to_elem(conf,
2108 2108 res_list[0]), "pset.sys_id", vals[0]) != POC_INT ||
2109 2109 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
2110 2110 goto lookup_done;
2111 2111
2112 2112 if (int64 == ZS_PSET_DEFAULT)
2113 2113 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
2114 2114
2115 2115 if (pool_get_property(conf, pool_resource_to_elem(conf,
2116 2116 res_list[0]), "pset.name", vals[0]) != POC_STRING ||
2117 2117 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2118 2118 goto lookup_done;
2119 2119
2120 2120 (void) strlcpy(psetname, (char *)string, sizeof (psetname));
2121 2121
2122 2122 if (strncmp(psetname, "SUNWtmp_", strlen("SUNWtmp_")) == 0)
2123 2123 *cputype = ZS_CPUTYPE_DEDICATED;
2124 2124 if (strncmp(psetname, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0)
2125 2125 *cputype = ZS_CPUTYPE_PSRSET_PSET;
2126 2126 else
2127 2127 *cputype = ZS_CPUTYPE_POOL_PSET;
2128 2128
2129 2129 lookup_done:
2130 2130
2131 2131 if (pv_save != NULL)
2132 2132 vals[1] = pv_save;
2133 2133
2134 2134 if (res_list)
2135 2135 free(res_list);
2136 2136 if (pools)
2137 2137 free(pools);
2138 2138
2139 2139 (void) strlcpy(pool, poolname, poollen);
2140 2140 (void) strlcpy(pset, psetname, psetlen);
2141 2141 }
2142 2142
2143 2143 /* Convert scheduler names to ZS_* scheduler flags */
2144 2144 static uint_t
2145 2145 zsd_schedname2int(char *clname, int pri)
2146 2146 {
2147 2147 uint_t sched = 0;
2148 2148
2149 2149 if (strcmp(clname, "TS") == 0) {
2150 2150 sched = ZS_SCHED_TS;
2151 2151 } else if (strcmp(clname, "IA") == 0) {
2152 2152 sched = ZS_SCHED_IA;
2153 2153 } else if (strcmp(clname, "FX") == 0) {
2154 2154 if (pri > 59) {
2155 2155 sched = ZS_SCHED_FX_60;
2156 2156 } else {
2157 2157 sched = ZS_SCHED_FX;
2158 2158 }
2159 2159 } else if (strcmp(clname, "RT") == 0) {
2160 2160 sched = ZS_SCHED_RT;
2161 2161
2162 2162 } else if (strcmp(clname, "FSS") == 0) {
2163 2163 sched = ZS_SCHED_FSS;
2164 2164 }
2165 2165 return (sched);
2166 2166 }
2167 2167
2168 2168 static uint64_t
2169 2169 zsd_get_zone_rctl_limit(char *name)
2170 2170 {
2171 2171 rctlblk_t *rblk;
2172 2172
2173 2173 rblk = (rctlblk_t *)alloca(rctlblk_size());
2174 2174 if (getrctl(name, NULL, rblk, RCTL_FIRST)
2175 2175 != 0) {
2176 2176 return (ZS_LIMIT_NONE);
2177 2177 }
2178 2178 return (rctlblk_get_value(rblk));
2179 2179 }
2180 2180
2181 2181 static uint64_t
2182 2182 zsd_get_zone_rctl_usage(char *name)
2183 2183 {
2184 2184 rctlblk_t *rblk;
2185 2185
2186 2186 rblk = (rctlblk_t *)alloca(rctlblk_size());
2187 2187 if (getrctl(name, NULL, rblk, RCTL_USAGE)
2188 2188 != 0) {
2189 2189 return (0);
2190 2190 }
2191 2191 return (rctlblk_get_value(rblk));
2192 2192 }
2193 2193
2194 2194 #define ZSD_NUM_RCTL_VALS 20
2195 2195
2196 2196 /*
2197 2197 * Fetch the limit information for a zone. This uses zone_enter() as the
2198 2198 * getrctl(2) system call only returns rctl information for the zone of
2199 2199 * the caller.
2200 2200 */
2201 2201 static int
2202 2202 zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
2203 2203 uint64_t *cpu_cap, uint64_t *ram_cap, uint64_t *locked_cap,
2204 2204 uint64_t *vm_cap, uint64_t *processes_cap, uint64_t *processes,
2205 2205 uint64_t *lwps_cap, uint64_t *lwps, uint64_t *shm_cap, uint64_t *shm,
2206 2206 uint64_t *shmids_cap, uint64_t *shmids, uint64_t *semids_cap,
2207 2207 uint64_t *semids, uint64_t *msgids_cap, uint64_t *msgids,
2208 2208 uint64_t *lofi_cap, uint64_t *lofi, uint_t *sched)
2209 2209 {
2210 2210 int p[2], pid, tmpl_fd, ret;
2211 2211 ctid_t ct;
2212 2212 char class[PC_CLNMSZ];
2213 2213 uint64_t vals[ZSD_NUM_RCTL_VALS];
2214 2214 zsd_system_t *sys = ctl->zsctl_system;
2215 2215 int i = 0;
2216 2216 int res = 0;
2217 2217
2218 2218 /* Treat all caps as no cap on error */
2219 2219 *cpu_shares = ZS_LIMIT_NONE;
2220 2220 *cpu_cap = ZS_LIMIT_NONE;
2221 2221 *ram_cap = ZS_LIMIT_NONE;
2222 2222 *locked_cap = ZS_LIMIT_NONE;
2223 2223 *vm_cap = ZS_LIMIT_NONE;
2224 2224
2225 2225 *processes_cap = ZS_LIMIT_NONE;
2226 2226 *lwps_cap = ZS_LIMIT_NONE;
2227 2227 *shm_cap = ZS_LIMIT_NONE;
2228 2228 *shmids_cap = ZS_LIMIT_NONE;
2229 2229 *semids_cap = ZS_LIMIT_NONE;
2230 2230 *msgids_cap = ZS_LIMIT_NONE;
2231 2231 *lofi_cap = ZS_LIMIT_NONE;
2232 2232
2233 2233 *processes = 0;
2234 2234 *lwps = 0;
2235 2235 *shm = 0;
2236 2236 *shmids = 0;
2237 2237 *semids = 0;
2238 2238 *msgids = 0;
2239 2239 *lofi = 0;
2240 2240
2241 2241 /* Get the zone's default scheduling class */
2242 2242 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS,
2243 2243 class, sizeof (class));
2244 2244 if (ret < 0)
2245 2245 return (-1);
2246 2246
2247 2247 *sched = zsd_schedname2int(class, 0);
2248 2248
2249 2249 /* rctl caps must be fetched from within the zone */
2250 2250 if (pipe(p) != 0)
2251 2251 return (-1);
2252 2252
2253 2253 if ((tmpl_fd = init_template()) == -1) {
2254 2254 (void) close(p[0]);
2255 2255 (void) close(p[1]);
2256 2256 return (-1);
2257 2257 }
2258 2258 pid = forkx(0);
2259 2259 if (pid < 0) {
2260 2260 (void) ct_tmpl_clear(tmpl_fd);
2261 2261 (void) close(p[0]);
2262 2262 (void) close(p[1]);
2263 2263 return (-1);
2264 2264 }
2265 2265 if (pid == 0) {
2266 2266
2267 2267 (void) ct_tmpl_clear(tmpl_fd);
2268 2268 (void) close(tmpl_fd);
2269 2269 (void) close(p[0]);
2270 2270 if (zone->zsz_id != getzoneid()) {
2271 2271 if (zone_enter(zone->zsz_id) < 0) {
2272 2272 (void) close(p[1]);
2273 2273 _exit(0);
2274 2274 }
2275 2275 }
2276 2276
2277 2277 /* Get caps for zone, and write them to zonestatd parent. */
2278 2278 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-shares");
2279 2279 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-cap");
2280 2280 vals[i++] = zsd_get_zone_rctl_limit("zone.max-locked-memory");
2281 2281 vals[i++] = zsd_get_zone_rctl_limit("zone.max-swap");
2282 2282 vals[i++] = zsd_get_zone_rctl_limit("zone.max-processes");
2283 2283 vals[i++] = zsd_get_zone_rctl_usage("zone.max-processes");
2284 2284 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lwps");
2285 2285 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lwps");
2286 2286 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-memory");
2287 2287 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-memory");
2288 2288 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-ids");
2289 2289 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-ids");
2290 2290 vals[i++] = zsd_get_zone_rctl_limit("zone.max-sem-ids");
2291 2291 vals[i++] = zsd_get_zone_rctl_usage("zone.max-sem-ids");
2292 2292 vals[i++] = zsd_get_zone_rctl_limit("zone.max-msg-ids");
2293 2293 vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
2294 2294 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi");
2295 2295 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi");
2296 2296 vals[i++] = zsd_get_zone_rctl_usage("zone.max-physical-memory");
2297 2297
2298 2298 if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2299 2299 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2300 2300 (void) close(p[1]);
2301 2301 _exit(1);
2302 2302 }
2303 2303
2304 2304 (void) close(p[1]);
2305 2305 _exit(0);
2306 2306 }
2307 2307 if (contract_latest(&ct) == -1)
2308 2308 ct = -1;
2309 2309
2310 2310 (void) ct_tmpl_clear(tmpl_fd);
2311 2311 (void) close(tmpl_fd);
2312 2312 (void) close(p[1]);
2313 2313 while (waitpid(pid, NULL, 0) != pid)
2314 2314 ;
2315 2315
2316 2316 /* Read cap from child in zone */
2317 2317 if (read(p[0], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2318 2318 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2319 2319 res = -1;
2320 2320 goto cleanup;
2321 2321 }
2322 2322 i = 0;
2323 2323 *cpu_shares = vals[i++];
2324 2324 *cpu_cap = vals[i++];
2325 2325 *locked_cap = vals[i++];
2326 2326 *vm_cap = vals[i++];
2327 2327 *processes_cap = vals[i++];
2328 2328 *processes = vals[i++];
2329 2329 *lwps_cap = vals[i++];
2330 2330 *lwps = vals[i++];
2331 2331 *shm_cap = vals[i++];
2332 2332 *shm = vals[i++];
2333 2333 *shmids_cap = vals[i++];
2334 2334 *shmids = vals[i++];
2335 2335 *semids_cap = vals[i++];
2336 2336 *semids = vals[i++];
2337 2337 *msgids_cap = vals[i++];
2338 2338 *msgids = vals[i++];
2339 2339 *lofi_cap = vals[i++];
2340 2340 *lofi = vals[i++];
2341 2341 *ram_cap = vals[i++];
2342 2342
2343 2343 /* Interpret maximum values as no cap */
2344 2344 if (*cpu_cap == UINT32_MAX || *cpu_cap == 0)
2345 2345 *cpu_cap = ZS_LIMIT_NONE;
2346 2346 if (*processes_cap == sys->zss_processes_max)
2347 2347 *processes_cap = ZS_LIMIT_NONE;
2348 2348 if (*lwps_cap == sys->zss_lwps_max)
2349 2349 *lwps_cap = ZS_LIMIT_NONE;
2350 2350 if (*shm_cap == sys->zss_shm_max)
2351 2351 *shm_cap = ZS_LIMIT_NONE;
2352 2352 if (*shmids_cap == sys->zss_shmids_max)
2353 2353 *shmids_cap = ZS_LIMIT_NONE;
2354 2354 if (*semids_cap == sys->zss_semids_max)
2355 2355 *semids_cap = ZS_LIMIT_NONE;
2356 2356 if (*msgids_cap == sys->zss_msgids_max)
2357 2357 *msgids_cap = ZS_LIMIT_NONE;
2358 2358 if (*lofi_cap == sys->zss_lofi_max)
2359 2359 *lofi_cap = ZS_LIMIT_NONE;
2360 2360
2361 2361
2362 2362 cleanup:
2363 2363 (void) close(p[0]);
2364 2364 (void) ct_tmpl_clear(tmpl_fd);
2365 2365 (void) close(tmpl_fd);
2366 2366 (void) contract_abandon_id(ct);
2367 2367
2368 2368 return (res);
2369 2369 }
2370 2370
2371 2371 /* Update the current list of running zones */
2372 2372 static void
2373 2373 zsd_refresh_zones(zsd_ctl_t *ctl)
2374 2374 {
2375 2375 zsd_zone_t *zone;
2376 2376 uint_t old, num;
2377 2377 ushort_t flags;
2378 2378 int i, ret;
2379 2379 zoneid_t *cache;
2380 2380 uint64_t cpu_shares;
2381 2381 uint64_t cpu_cap;
2382 2382 uint64_t ram_cap;
2383 2383 uint64_t locked_cap;
2384 2384 uint64_t vm_cap;
2385 2385 uint64_t processes_cap;
2386 2386 uint64_t processes;
2387 2387 uint64_t lwps_cap;
2388 2388 uint64_t lwps;
2389 2389 uint64_t shm_cap;
2390 2390 uint64_t shm;
2391 2391 uint64_t shmids_cap;
2392 2392 uint64_t shmids;
2393 2393 uint64_t semids_cap;
2394 2394 uint64_t semids;
2395 2395 uint64_t msgids_cap;
2396 2396 uint64_t msgids;
2397 2397 uint64_t lofi_cap;
2398 2398 uint64_t lofi;
2399 2399
2400 2400 char zonename[ZS_ZONENAME_MAX];
2401 2401 char poolname[ZS_POOLNAME_MAX];
2402 2402 char psetname[ZS_PSETNAME_MAX];
2403 2403 uint_t sched;
2404 2404 uint_t cputype;
2405 2405 uint_t iptype;
2406 2406
2407 2407 /* Get the current list of running zones */
2408 2408 for (;;) {
2409 2409 old = num = ctl->zsctl_zone_ncache;
2410 2410 (void) zone_list(ctl->zsctl_zone_cache, &num);
2411 2411 if (num <= old)
2412 2412 break;
2413 2413 if ((cache = (zoneid_t *)realloc(ctl->zsctl_zone_cache,
2414 2414 (num) * sizeof (zoneid_t))) != NULL) {
2415 2415 ctl->zsctl_zone_ncache = num;
2416 2416 ctl->zsctl_zone_cache = cache;
2417 2417 } else {
2418 2418 /* Could not allocate to get new zone list. Give up */
2419 2419 return;
2420 2420 }
2421 2421 }
2422 2422
2423 2423 zsd_mark_zones_start(ctl);
2424 2424
2425 2425 for (i = 0; i < num; i++) {
2426 2426
2427 2427 ret = getzonenamebyid(ctl->zsctl_zone_cache[i],
2428 2428 zonename, sizeof (zonename));
2429 2429 if (ret < 0)
2430 2430 continue;
2431 2431
2432 2432 zone = zsd_lookup_insert_zone(ctl, zonename,
2433 2433 ctl->zsctl_zone_cache[i]);
2434 2434
2435 2435 ret = zone_getattr(ctl->zsctl_zone_cache[i], ZONE_ATTR_FLAGS,
2436 2436 &flags, sizeof (flags));
2437 2437 if (ret < 0)
2438 2438 continue;
2439 2439
2440 2440 if (flags & ZF_NET_EXCL)
2441 2441 iptype = ZS_IPTYPE_EXCLUSIVE;
2442 2442 else
2443 2443 iptype = ZS_IPTYPE_SHARED;
2444 2444
2445 2445 zsd_get_zone_pool_pset(ctl, zone, poolname, sizeof (poolname),
2446 2446 psetname, sizeof (psetname), &cputype);
2447 2447
2448 2448 if (zsd_get_zone_caps(ctl, zone, &cpu_shares, &cpu_cap,
2449 2449 &ram_cap, &locked_cap, &vm_cap, &processes_cap, &processes,
2450 2450 &lwps_cap, &lwps, &shm_cap, &shm, &shmids_cap, &shmids,
2451 2451 &semids_cap, &semids, &msgids_cap, &msgids, &lofi_cap,
2452 2452 &lofi, &sched) != 0)
2453 2453 continue;
2454 2454
2455 2455 zsd_mark_zone_found(ctl, zone, cpu_shares, cpu_cap, ram_cap,
2456 2456 locked_cap, vm_cap, processes_cap, processes, lwps_cap,
2457 2457 lwps, shm_cap, shm, shmids_cap, shmids, semids_cap,
2458 2458 semids, msgids_cap, msgids, lofi_cap, lofi, poolname,
2459 2459 psetname, sched, cputype, iptype);
2460 2460 }
2461 2461 }
2462 2462
2463 2463 /* Fetch the details of a process from its psinfo_t */
2464 2464 static void
2465 2465 zsd_get_proc_info(zsd_ctl_t *ctl, psinfo_t *psinfo, psetid_t *psetid,
2466 2466 psetid_t *prev_psetid, zoneid_t *zoneid, zoneid_t *prev_zoneid,
2467 2467 timestruc_t *delta, uint_t *sched)
2468 2468 {
2469 2469 timestruc_t d;
2470 2470 zsd_proc_t *proc;
2471 2471
2472 2472 /* Get cached data for proc */
2473 2473 proc = &(ctl->zsctl_proc_array[psinfo->pr_pid]);
2474 2474 *psetid = psinfo->pr_lwp.pr_bindpset;
2475 2475
2476 2476 if (proc->zspr_psetid == ZS_PSET_ERROR)
2477 2477 *prev_psetid = *psetid;
2478 2478 else
2479 2479 *prev_psetid = proc->zspr_psetid;
2480 2480
2481 2481 *zoneid = psinfo->pr_zoneid;
2482 2482 if (proc->zspr_zoneid == -1)
2483 2483 *prev_zoneid = *zoneid;
2484 2484 else
2485 2485 *prev_zoneid = proc->zspr_zoneid;
2486 2486
2487 2487 TIMESTRUC_DELTA(d, psinfo->pr_time, proc->zspr_usage);
2488 2488 *delta = d;
2489 2489
2490 2490 *sched = zsd_schedname2int(psinfo->pr_lwp.pr_clname,
2491 2491 psinfo->pr_lwp.pr_pri);
2492 2492
2493 2493 /* Update cached data for proc */
2494 2494 proc->zspr_psetid = psinfo->pr_lwp.pr_bindpset;
2495 2495 proc->zspr_zoneid = psinfo->pr_zoneid;
2496 2496 proc->zspr_sched = *sched;
2497 2497 proc->zspr_usage.tv_sec = psinfo->pr_time.tv_sec;
2498 2498 proc->zspr_usage.tv_nsec = psinfo->pr_time.tv_nsec;
2499 2499 proc->zspr_ppid = psinfo->pr_ppid;
2500 2500 }
2501 2501
2502 2502 /*
2503 2503 * Reset the known cpu usage of a process. This is done after a process
2504 2504 * exits so that if the pid is recycled, data from its previous life is
2505 2505 * not reused
2506 2506 */
2507 2507 static void
2508 2508 zsd_flush_proc_info(zsd_proc_t *proc)
2509 2509 {
2510 2510 proc->zspr_usage.tv_sec = 0;
2511 2511 proc->zspr_usage.tv_nsec = 0;
2512 2512 }
2513 2513
2514 2514 /*
2515 2515 * Open the current extended accounting file. On initialization, open the
2516 2516 * file as the current file to be used. Otherwise, open the file as the
2517 2517 * next file to use of the current file reaches EOF.
2518 2518 */
2519 2519 static int
2520 2520 zsd_open_exacct(zsd_ctl_t *ctl, boolean_t init)
2521 2521 {
2522 2522 int ret, oret, state, trys = 0, flags;
2523 2523 int *fd, *open;
2524 2524 ea_file_t *eaf;
2525 2525 struct stat64 *stat;
2526 2526 char path[MAXPATHLEN];
2527 2527
2528 2528 /*
2529 2529 * The accounting file is first opened at the tail. Following
2530 2530 * opens to new accounting files are opened at the head.
2531 2531 */
2532 2532 if (init == B_TRUE) {
2533 2533 flags = EO_NO_VALID_HDR | EO_TAIL;
2534 2534 fd = &ctl->zsctl_proc_fd;
2535 2535 eaf = &ctl->zsctl_proc_eaf;
2536 2536 stat = &ctl->zsctl_proc_stat;
2537 2537 open = &ctl->zsctl_proc_open;
2538 2538 } else {
2539 2539 flags = EO_NO_VALID_HDR | EO_HEAD;
2540 2540 fd = &ctl->zsctl_proc_fd_next;
2541 2541 eaf = &ctl->zsctl_proc_eaf_next;
2542 2542 stat = &ctl->zsctl_proc_stat_next;
2543 2543 open = &ctl->zsctl_proc_open_next;
2544 2544 }
2545 2545
2546 2546 *fd = -1;
2547 2547 *open = 0;
2548 2548 retry:
2549 2549 /* open accounting files for cpu consumption */
2550 2550 ret = acctctl(AC_STATE_GET | AC_PROC, &state, sizeof (state));
2551 2551 if (ret != 0) {
2552 2552 zsd_warn(gettext("Unable to get process accounting state"));
2553 2553 goto err;
2554 2554 }
2555 2555 if (state != AC_ON) {
2556 2556 if (trys > 0) {
2557 2557 zsd_warn(gettext(
2558 2558 "Unable to enable process accounting"));
2559 2559 goto err;
2560 2560 }
2561 2561 (void) zsd_enable_cpu_stats();
2562 2562 trys++;
2563 2563 goto retry;
2564 2564 }
2565 2565
2566 2566 ret = acctctl(AC_FILE_GET | AC_PROC, path, sizeof (path));
2567 2567 if (ret != 0) {
2568 2568 zsd_warn(gettext("Unable to get process accounting file"));
2569 2569 goto err;
2570 2570 }
2571 2571
2572 2572 if ((*fd = open64(path, O_RDONLY, 0)) >= 0 &&
2573 2573 (oret = ea_fdopen(eaf, *fd, NULL, flags, O_RDONLY)) == 0)
2574 2574 ret = fstat64(*fd, stat);
2575 2575
2576 2576 if (*fd < 0 || oret < 0 || ret < 0) {
2577 2577 struct timespec ts;
2578 2578
2579 2579 /*
2580 2580 * It is possible the accounting file is momentarily unavailable
2581 2581 * because it is being rolled. Try for up to half a second.
2582 2582 *
2583 2583 * If failure to open accounting file persists, give up.
2584 2584 */
2585 2585 if (oret == 0)
2586 2586 (void) ea_close(eaf);
2587 2587 else if (*fd >= 0)
2588 2588 (void) close(*fd);
2589 2589 if (trys > 500) {
2590 2590 zsd_warn(gettext(
2591 2591 "Unable to open process accounting file"));
2592 2592 goto err;
2593 2593 }
2594 2594 /* wait one millisecond */
2595 2595 ts.tv_sec = 0;
2596 2596 ts.tv_nsec = NANOSEC / 1000;
2597 2597 (void) nanosleep(&ts, NULL);
2598 2598 goto retry;
2599 2599 }
2600 2600 *open = 1;
2601 2601 return (0);
2602 2602 err:
2603 2603 if (*fd >= 0)
2604 2604 (void) close(*fd);
2605 2605 *open = 0;
2606 2606 *fd = -1;
2607 2607 return (-1);
2608 2608 }
2609 2609
2610 2610 /*
2611 2611 * Walk /proc and charge each process to its zone and processor set.
2612 2612 * Then read exacct data for exited processes, and charge them as well.
2613 2613 */
2614 2614 static void
2615 2615 zsd_refresh_procs(zsd_ctl_t *ctl, boolean_t init)
2616 2616 {
2617 2617 DIR *dir;
2618 2618 struct dirent *dent;
2619 2619 psinfo_t psinfo;
2620 2620 int fd, ret;
2621 2621 zsd_proc_t *proc, *pproc, *tmp, *next;
2622 2622 list_t pplist, plist;
2623 2623 zsd_zone_t *zone, *prev_zone;
2624 2624 zsd_pset_t *pset, *prev_pset;
2625 2625 psetid_t psetid, prev_psetid;
2626 2626 zoneid_t zoneid, prev_zoneid;
2627 2627 zsd_pset_usage_t *usage, *prev_usage;
2628 2628 char path[MAXPATHLEN];
2629 2629
2630 2630 ea_object_t object;
2631 2631 ea_object_t pobject;
2632 2632 boolean_t hrtime_expired = B_FALSE;
2633 2633 struct timeval interval_end;
2634 2634
2635 2635 timestruc_t delta, d1, d2;
2636 2636 uint_t sched = 0;
2637 2637
2638 2638 /*
2639 2639 * Get the current accounting file. The current accounting file
2640 2640 * may be different than the file in use, as the accounting file
2641 2641 * may have been rolled, or manually changed by an admin.
2642 2642 */
2643 2643 ret = zsd_open_exacct(ctl, init);
2644 2644 if (ret != 0) {
2645 2645 zsd_warn(gettext("Unable to track process accounting"));
2646 2646 return;
2647 2647 }
2648 2648
2649 2649 /*
2650 2650 * Mark the current time as the interval end time. Don't track
2651 2651 * processes that exit after this time.
2652 2652 */
2653 2653 (void) gettimeofday(&interval_end, NULL);
2654 2654
2655 2655 dir = opendir("/proc");
2656 2656 if (dir == NULL) {
2657 2657 zsd_warn(gettext("Unable to open /proc"));
2658 2658 return;
2659 2659 }
2660 2660
2661 2661 dent = ctl->zsctl_procfs_dent;
2662 2662
2663 2663 (void) memset(dent, 0, ctl->zsctl_procfs_dent_size);
2664 2664
2665 2665 /* Walk all processes and compute each zone's usage on each pset. */
2666 2666 while (readdir_r(dir, dent) != 0) {
2667 2667
2668 2668 if (strcmp(dent->d_name, ".") == 0 ||
2669 2669 strcmp(dent->d_name, "..") == 0)
2670 2670 continue;
2671 2671
2672 2672 (void) snprintf(path, sizeof (path), "/proc/%s/psinfo",
2673 2673 dent->d_name);
2674 2674
2675 2675 fd = open(path, O_RDONLY);
2676 2676 if (fd < 0)
2677 2677 continue;
2678 2678
2679 2679 if (read(fd, &psinfo, sizeof (psinfo)) != sizeof (psinfo)) {
2680 2680 (void) close(fd);
2681 2681 continue;
2682 2682 }
2683 2683 (void) close(fd);
2684 2684
2685 2685 zsd_get_proc_info(ctl, &psinfo, &psetid, &prev_psetid,
2686 2686 &zoneid, &prev_zoneid, &delta, &sched);
2687 2687
2688 2688 d1.tv_sec = delta.tv_sec / 2;
2689 2689 d1.tv_nsec = delta.tv_nsec / 2;
2690 2690 d2.tv_sec = (delta.tv_sec / 2) + (delta.tv_sec % 2);
2691 2691 d2.tv_nsec = (delta.tv_nsec / 2) + (delta.tv_nsec % 2);
2692 2692
2693 2693 /* Get the zone and pset this process is running in */
2694 2694 zone = zsd_lookup_zone_byid(ctl, zoneid);
2695 2695 if (zone == NULL)
2696 2696 continue;
2697 2697 pset = zsd_lookup_pset_byid(ctl, psetid);
2698 2698 if (pset == NULL)
2699 2699 continue;
2700 2700 usage = zsd_lookup_insert_usage(ctl, pset, zone);
2701 2701 if (usage == NULL)
2702 2702 continue;
2703 2703
2704 2704 /*
2705 2705 * Get the usage of the previous zone and pset if they were
2706 2706 * different.
2707 2707 */
2708 2708 if (zoneid != prev_zoneid)
2709 2709 prev_zone = zsd_lookup_zone_byid(ctl, prev_zoneid);
2710 2710 else
2711 2711 prev_zone = NULL;
2712 2712
2713 2713 if (psetid != prev_psetid)
2714 2714 prev_pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2715 2715 else
2716 2716 prev_pset = NULL;
2717 2717
2718 2718 prev_usage = NULL;
2719 2719 if (prev_zone != NULL || prev_pset != NULL) {
2720 2720 if (prev_zone == NULL)
2721 2721 prev_zone = zone;
2722 2722 if (prev_pset == NULL)
2723 2723 prev_pset = pset;
2724 2724
2725 2725 prev_usage = zsd_lookup_insert_usage(ctl, prev_pset,
2726 2726 prev_zone);
2727 2727 }
2728 2728
2729 2729 /* Update the usage with the processes info */
2730 2730 if (prev_usage == NULL) {
2731 2731 zsd_mark_pset_usage_found(usage, sched);
2732 2732 } else {
2733 2733 zsd_mark_pset_usage_found(usage, sched);
2734 2734 zsd_mark_pset_usage_found(prev_usage, sched);
2735 2735 }
2736 2736
2737 2737 /*
2738 2738 * First time around is just to get a starting point. All
2739 2739 * usages will be zero.
2740 2740 */
2741 2741 if (init == B_TRUE)
2742 2742 continue;
2743 2743
2744 2744 if (prev_usage == NULL) {
2745 2745 zsd_add_usage(ctl, usage, &delta);
2746 2746 } else {
2747 2747 zsd_add_usage(ctl, usage, &d1);
2748 2748 zsd_add_usage(ctl, prev_usage, &d2);
2749 2749 }
2750 2750 }
2751 2751 (void) closedir(dir);
2752 2752
2753 2753 /*
2754 2754 * No need to collect exited proc data on initialization. Just
2755 2755 * caching the usage of the known processes to get a zero starting
2756 2756 * point.
2757 2757 */
2758 2758 if (init == B_TRUE)
2759 2759 return;
2760 2760
2761 2761 /*
2762 2762 * Add accounting records to account for processes which have
2763 2763 * exited.
2764 2764 */
2765 2765 list_create(&plist, sizeof (zsd_proc_t),
2766 2766 offsetof(zsd_proc_t, zspr_next));
2767 2767 list_create(&pplist, sizeof (zsd_proc_t),
2768 2768 offsetof(zsd_proc_t, zspr_next));
2769 2769
2770 2770 for (;;) {
2771 2771 pid_t pid;
2772 2772 pid_t ppid;
2773 2773 timestruc_t user, sys, proc_usage;
2774 2774 timestruc_t finish;
2775 2775 int numfound = 0;
2776 2776
2777 2777 bzero(&object, sizeof (object));
2778 2778 proc = NULL;
2779 2779 zone = NULL;
2780 2780 pset = NULL;
2781 2781 usage = NULL;
2782 2782 ret = ea_get_object(&ctl->zsctl_proc_eaf, &object);
2783 2783 if (ret == EO_ERROR) {
2784 2784 if (ea_error() == EXR_EOF) {
2785 2785
2786 2786 struct stat64 *stat;
2787 2787 struct stat64 *stat_next;
2788 2788
2789 2789 /*
2790 2790 * See if the next accounting file is the
2791 2791 * same as the current accounting file.
2792 2792 */
2793 2793 stat = &(ctl->zsctl_proc_stat);
2794 2794 stat_next = &(ctl->zsctl_proc_stat_next);
2795 2795 if (stat->st_ino == stat_next->st_ino &&
2796 2796 stat->st_dev == stat_next->st_dev) {
2797 2797 /*
2798 2798 * End of current accounting file is
2799 2799 * reached, so finished. Clear EOF
2800 2800 * bit for next time around.
2801 2801 */
2802 2802 ea_clear(&ctl->zsctl_proc_eaf);
2803 2803 break;
2804 2804 } else {
2805 2805 /*
2806 2806 * Accounting file has changed. Move
2807 2807 * to current accounting file.
2808 2808 */
2809 2809 (void) ea_close(&ctl->zsctl_proc_eaf);
2810 2810
2811 2811 ctl->zsctl_proc_fd =
2812 2812 ctl->zsctl_proc_fd_next;
2813 2813 ctl->zsctl_proc_eaf =
2814 2814 ctl->zsctl_proc_eaf_next;
2815 2815 ctl->zsctl_proc_stat =
2816 2816 ctl->zsctl_proc_stat_next;
2817 2817
2818 2818 ctl->zsctl_proc_fd_next = -1;
2819 2819 ctl->zsctl_proc_open_next = 0;
2820 2820 continue;
2821 2821 }
2822 2822 } else {
2823 2823 /*
2824 2824 * Other accounting error. Give up on
2825 2825 * accounting.
2826 2826 */
2827 2827 goto ea_err;
2828 2828 }
2829 2829 }
2830 2830 /* Skip if not a process group */
2831 2831 if ((object.eo_catalog & EXT_TYPE_MASK) != EXT_GROUP ||
2832 2832 (object.eo_catalog & EXD_DATA_MASK) != EXD_GROUP_PROC) {
2833 2833 (void) ea_free_item(&object, EUP_ALLOC);
2834 2834 continue;
2835 2835 }
2836 2836
2837 2837 /* The process group entry should be complete */
2838 2838 while (numfound < 9) {
2839 2839 bzero(&pobject, sizeof (pobject));
2840 2840 ret = ea_get_object(&ctl->zsctl_proc_eaf,
2841 2841 &pobject);
2842 2842 if (ret < 0) {
2843 2843 (void) ea_free_item(&object, EUP_ALLOC);
2844 2844 zsd_warn(
2845 2845 "unable to get process accounting data");
2846 2846 goto ea_err;
2847 2847 }
2848 2848 /* Next entries should be process data */
2849 2849 if ((pobject.eo_catalog & EXT_TYPE_MASK) ==
2850 2850 EXT_GROUP) {
2851 2851 (void) ea_free_item(&object, EUP_ALLOC);
2852 2852 (void) ea_free_item(&pobject, EUP_ALLOC);
2853 2853 zsd_warn(
2854 2854 "process data of wrong type");
2855 2855 goto ea_err;
2856 2856 }
2857 2857 switch (pobject.eo_catalog & EXD_DATA_MASK) {
2858 2858 case EXD_PROC_PID:
2859 2859 pid = pobject.eo_item.ei_uint32;
2860 2860 proc = &(ctl->zsctl_proc_array[pid]);
2861 2861 /*
2862 2862 * This process should not be currently in
2863 2863 * the list of processes to process.
2864 2864 */
2865 2865 assert(!list_link_active(&proc->zspr_next));
2866 2866 numfound++;
2867 2867 break;
2868 2868 case EXD_PROC_ANCPID:
2869 2869 ppid = pobject.eo_item.ei_uint32;
2870 2870 pproc = &(ctl->zsctl_proc_array[ppid]);
2871 2871 numfound++;
2872 2872 break;
2873 2873 case EXD_PROC_ZONENAME:
2874 2874 zone = zsd_lookup_zone(ctl,
2875 2875 pobject.eo_item.ei_string, -1);
2876 2876 numfound++;
2877 2877 break;
2878 2878 case EXD_PROC_CPU_USER_SEC:
2879 2879 user.tv_sec =
2880 2880 pobject.eo_item.ei_uint64;
2881 2881 numfound++;
2882 2882 break;
2883 2883 case EXD_PROC_CPU_USER_NSEC:
2884 2884 user.tv_nsec =
2885 2885 pobject.eo_item.ei_uint64;
2886 2886 numfound++;
2887 2887 break;
2888 2888 case EXD_PROC_CPU_SYS_SEC:
2889 2889 sys.tv_sec =
2890 2890 pobject.eo_item.ei_uint64;
2891 2891 numfound++;
2892 2892 break;
2893 2893 case EXD_PROC_CPU_SYS_NSEC:
2894 2894 sys.tv_nsec =
2895 2895 pobject.eo_item.ei_uint64;
2896 2896 numfound++;
2897 2897 break;
2898 2898 case EXD_PROC_FINISH_SEC:
2899 2899 finish.tv_sec =
2900 2900 pobject.eo_item.ei_uint64;
2901 2901 numfound++;
2902 2902 break;
2903 2903 case EXD_PROC_FINISH_NSEC:
2904 2904 finish.tv_nsec =
2905 2905 pobject.eo_item.ei_uint64;
2906 2906 numfound++;
2907 2907 break;
2908 2908 }
2909 2909 (void) ea_free_item(&pobject, EUP_ALLOC);
2910 2910 }
2911 2911 (void) ea_free_item(&object, EUP_ALLOC);
2912 2912 if (numfound != 9) {
2913 2913 zsd_warn(gettext(
2914 2914 "Malformed process accounting entry found"));
2915 2915 goto proc_done;
2916 2916 }
2917 2917
2918 2918 if (finish.tv_sec > interval_end.tv_sec ||
2919 2919 (finish.tv_sec == interval_end.tv_sec &&
2920 2920 finish.tv_nsec > (interval_end.tv_usec * 1000)))
2921 2921 hrtime_expired = B_TRUE;
2922 2922
2923 2923 /*
2924 2924 * Try to identify the zone and pset to which this
2925 2925 * exited process belongs.
2926 2926 */
2927 2927 if (zone == NULL)
2928 2928 goto proc_done;
2929 2929
2930 2930 /* Save proc info */
2931 2931 proc->zspr_ppid = ppid;
2932 2932 proc->zspr_zoneid = zone->zsz_id;
2933 2933
2934 2934 prev_psetid = ZS_PSET_ERROR;
2935 2935 sched = 0;
2936 2936
2937 2937 /*
2938 2938 * The following tries to deduce the processes pset.
2939 2939 *
2940 2940 * First choose pset and sched using cached value from the
2941 2941 * most recent time the process has been seen.
2942 2942 *
2943 2943 * pset and sched can change across zone_enter, so make sure
2944 2944 * most recent sighting of this process was in the same
2945 2945 * zone before using most recent known value.
2946 2946 *
2947 2947 * If there is no known value, use value of processes
2948 2948 * parent. If parent is unknown, walk parents until a known
2949 2949 * parent is found.
2950 2950 *
2951 2951 * If no parent in the zone is found, use the zone's default
2952 2952 * pset and scheduling class.
2953 2953 */
2954 2954 if (proc->zspr_psetid != ZS_PSET_ERROR) {
2955 2955 prev_psetid = proc->zspr_psetid;
2956 2956 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2957 2957 sched = proc->zspr_sched;
2958 2958 } else if (pproc->zspr_zoneid == zone->zsz_id &&
2959 2959 pproc->zspr_psetid != ZS_PSET_ERROR) {
2960 2960 prev_psetid = pproc->zspr_psetid;
2961 2961 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2962 2962 sched = pproc->zspr_sched;
2963 2963 }
2964 2964
2965 2965 if (pset == NULL) {
2966 2966 /*
2967 2967 * Process or processes parent has never been seen.
2968 2968 * Save to deduce a known parent later.
2969 2969 */
2970 2970 proc_usage = sys;
2971 2971 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2972 2972 TIMESTRUC_DELTA(delta, proc_usage,
2973 2973 proc->zspr_usage);
2974 2974 proc->zspr_usage = delta;
2975 2975 list_insert_tail(&plist, proc);
2976 2976 continue;
2977 2977 }
2978 2978
2979 2979 /* Add the zone's usage to the pset */
2980 2980 usage = zsd_lookup_insert_usage(ctl, pset, zone);
2981 2981 if (usage == NULL)
2982 2982 goto proc_done;
2983 2983
2984 2984 zsd_mark_pset_usage_found(usage, sched);
2985 2985
2986 2986 /* compute the usage to add for the exited proc */
2987 2987 proc_usage = sys;
2988 2988 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2989 2989 TIMESTRUC_DELTA(delta, proc_usage,
2990 2990 proc->zspr_usage);
2991 2991
2992 2992 zsd_add_usage(ctl, usage, &delta);
2993 2993 proc_done:
2994 2994 zsd_flush_proc_info(proc);
2995 2995
2996 2996 if (hrtime_expired == B_TRUE)
2997 2997 break;
2998 2998 }
2999 2999 /*
3000 3000 * close next accounting file.
3001 3001 */
3002 3002 if (ctl->zsctl_proc_open_next) {
3003 3003 (void) ea_close(
3004 3004 &ctl->zsctl_proc_eaf_next);
3005 3005 ctl->zsctl_proc_open_next = 0;
3006 3006 ctl->zsctl_proc_fd_next = -1;
3007 3007 }
3008 3008
3009 3009 /* For the remaining processes, use pset and sched of a known parent */
3010 3010 proc = list_head(&plist);
3011 3011 while (proc != NULL) {
3012 3012 next = proc;
3013 3013 for (;;) {
3014 3014 if (next->zspr_ppid == 0 || next->zspr_ppid == -1) {
3015 3015 /*
3016 3016 * Kernel process, or parent is unknown, skip
3017 3017 * process, remove from process list.
3018 3018 */
3019 3019 tmp = proc;
3020 3020 proc = list_next(&plist, proc);
3021 3021 list_link_init(&tmp->zspr_next);
3022 3022 break;
3023 3023 }
3024 3024 pproc = &(ctl->zsctl_proc_array[next->zspr_ppid]);
3025 3025 if (pproc->zspr_zoneid != proc->zspr_zoneid) {
3026 3026 /*
3027 3027 * Parent in different zone. Save process and
3028 3028 * use zone's default pset and sched below
3029 3029 */
3030 3030 tmp = proc;
3031 3031 proc = list_next(&plist, proc);
3032 3032 list_remove(&plist, tmp);
3033 3033 list_insert_tail(&pplist, tmp);
3034 3034 break;
3035 3035 }
3036 3036 /* Parent has unknown pset, Search parent's parent */
3037 3037 if (pproc->zspr_psetid == ZS_PSET_ERROR) {
3038 3038 next = pproc;
3039 3039 continue;
3040 3040 }
3041 3041 /* Found parent with known pset. Use its info */
3042 3042 proc->zspr_psetid = pproc->zspr_psetid;
3043 3043 proc->zspr_sched = pproc->zspr_sched;
3044 3044 next->zspr_psetid = pproc->zspr_psetid;
3045 3045 next->zspr_sched = pproc->zspr_sched;
3046 3046 zone = zsd_lookup_zone_byid(ctl,
3047 3047 proc->zspr_zoneid);
3048 3048 if (zone == NULL) {
3049 3049 tmp = proc;
3050 3050 proc = list_next(&plist, proc);
3051 3051 list_remove(&plist, tmp);
3052 3052 list_link_init(&tmp->zspr_next);
3053 3053 break;
3054 3054 }
3055 3055 pset = zsd_lookup_pset_byid(ctl,
3056 3056 proc->zspr_psetid);
3057 3057 if (pset == NULL) {
3058 3058 tmp = proc;
3059 3059 proc = list_next(&plist, proc);
3060 3060 list_remove(&plist, tmp);
3061 3061 list_link_init(&tmp->zspr_next);
3062 3062 break;
3063 3063 }
3064 3064 /* Add the zone's usage to the pset */
3065 3065 usage = zsd_lookup_insert_usage(ctl, pset, zone);
3066 3066 if (usage == NULL) {
3067 3067 tmp = proc;
3068 3068 proc = list_next(&plist, proc);
3069 3069 list_remove(&plist, tmp);
3070 3070 list_link_init(&tmp->zspr_next);
3071 3071 break;
3072 3072 }
3073 3073 zsd_mark_pset_usage_found(usage, proc->zspr_sched);
3074 3074 zsd_add_usage(ctl, usage, &proc->zspr_usage);
3075 3075 zsd_flush_proc_info(proc);
3076 3076 tmp = proc;
3077 3077 proc = list_next(&plist, proc);
3078 3078 list_remove(&plist, tmp);
3079 3079 list_link_init(&tmp->zspr_next);
3080 3080 break;
3081 3081 }
3082 3082 }
3083 3083 /*
3084 3084 * Process has never been seen. Using zone info to
3085 3085 * determine pset and scheduling class.
3086 3086 */
3087 3087 proc = list_head(&pplist);
3088 3088 while (proc != NULL) {
3089 3089
3090 3090 zone = zsd_lookup_zone_byid(ctl, proc->zspr_zoneid);
3091 3091 if (zone == NULL)
3092 3092 goto next;
3093 3093 if (zone->zsz_psetid != ZS_PSET_ERROR &&
3094 3094 zone->zsz_psetid != ZS_PSET_MULTI) {
3095 3095 prev_psetid = zone->zsz_psetid;
3096 3096 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
3097 3097 } else {
3098 3098 pset = zsd_lookup_pset(ctl, zone->zsz_pset, -1);
3099 3099 if (pset != NULL)
3100 3100 prev_psetid = pset->zsp_id;
3101 3101 }
3102 3102 if (pset == NULL)
3103 3103 goto next;
3104 3104
3105 3105 sched = zone->zsz_scheds;
3106 3106 /*
3107 3107 * Ignore FX high scheduling class if it is not the
3108 3108 * only scheduling class in the zone.
3109 3109 */
3110 3110 if (sched != ZS_SCHED_FX_60)
3111 3111 sched &= (~ZS_SCHED_FX_60);
3112 3112 /*
3113 3113 * If more than one scheduling class has been found
3114 3114 * in the zone, use zone's default scheduling class for
3115 3115 * this process.
3116 3116 */
3117 3117 if ((sched & (sched - 1)) != 0)
3118 3118 sched = zone->zsz_default_sched;
3119 3119
3120 3120 /* Add the zone's usage to the pset */
3121 3121 usage = zsd_lookup_insert_usage(ctl, pset, zone);
3122 3122 if (usage == NULL)
3123 3123 goto next;
3124 3124
3125 3125 zsd_mark_pset_usage_found(usage, sched);
3126 3126 zsd_add_usage(ctl, usage, &proc->zspr_usage);
3127 3127 next:
3128 3128 tmp = proc;
3129 3129 proc = list_next(&pplist, proc);
3130 3130 zsd_flush_proc_info(tmp);
3131 3131 list_link_init(&tmp->zspr_next);
3132 3132 }
3133 3133 return;
3134 3134 ea_err:
3135 3135 /*
3136 3136 * Close the next accounting file if we have not transitioned to it
3137 3137 * yet.
3138 3138 */
3139 3139 if (ctl->zsctl_proc_open_next) {
3140 3140 (void) ea_close(&ctl->zsctl_proc_eaf_next);
3141 3141 ctl->zsctl_proc_open_next = 0;
3142 3142 ctl->zsctl_proc_fd_next = -1;
3143 3143 }
3144 3144 }
3145 3145
3146 3146 /*
3147 3147 * getvmusage(2) uses size_t's in the passwd data structure, which differ
3148 3148 * in size for 32bit and 64 bit kernels. Since this is a contracted interface,
3149 3149 * and zonestatd does not necessarily match the kernel's bitness, marshal
3150 3150 * results appropriately.
3151 3151 */
3152 3152 static int
3153 3153 zsd_getvmusage(zsd_ctl_t *ctl, uint_t flags, time_t age, zsd_vmusage64_t *buf,
3154 3154 uint64_t *nres)
3155 3155 {
3156 3156 zsd_vmusage32_t *vmu32;
3157 3157 zsd_vmusage64_t *vmu64;
3158 3158 uint32_t nres32;
3159 3159 int i;
3160 3160 int ret;
3161 3161
3162 3162 if (ctl->zsctl_kern_bits == 32) {
3163 3163 nres32 = *nres;
3164 3164 ret = syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3165 3165 flags, age, (uintptr_t)buf, (uintptr_t)&nres32);
3166 3166 *nres = nres32;
3167 3167 if (ret == 0 && buf != NULL) {
3168 3168 /*
3169 3169 * An array of vmusage32_t's has been returned.
3170 3170 * Convert it to an array of vmusage64_t's.
3171 3171 */
3172 3172 vmu32 = (zsd_vmusage32_t *)buf;
3173 3173 vmu64 = (zsd_vmusage64_t *)buf;
3174 3174 for (i = nres32 - 1; i >= 0; i--) {
3175 3175
3176 3176 vmu64[i].vmu_zoneid = vmu32[i].vmu_zoneid;
3177 3177 vmu64[i].vmu_type = vmu32[i].vmu_type;
3178 3178 vmu64[i].vmu_type = vmu32[i].vmu_type;
3179 3179 vmu64[i].vmu_rss_all = vmu32[i].vmu_rss_all;
3180 3180 vmu64[i].vmu_rss_private =
3181 3181 vmu32[i].vmu_rss_private;
3182 3182 vmu64[i].vmu_rss_shared =
3183 3183 vmu32[i].vmu_rss_shared;
3184 3184 vmu64[i].vmu_swap_all = vmu32[i].vmu_swap_all;
3185 3185 vmu64[i].vmu_swap_private =
3186 3186 vmu32[i].vmu_swap_private;
3187 3187 vmu64[i].vmu_swap_shared =
3188 3188 vmu32[i].vmu_swap_shared;
3189 3189 }
3190 3190 }
3191 3191 return (ret);
3192 3192 } else {
3193 3193 /*
3194 3194 * kernel is 64 bit, so use 64 bit structures as zonestat
3195 3195 * expects.
3196 3196 */
3197 3197 return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3198 3198 flags, age, (uintptr_t)buf, (uintptr_t)nres));
3199 3199
3200 3200 }
3201 3201 }
3202 3202
3203 3203 /*
3204 3204 * Update the current physical, virtual, and locked memory usage of the
3205 3205 * running zones.
3206 3206 */
3207 3207 static void
3208 3208 zsd_refresh_memory(zsd_ctl_t *ctl, boolean_t init)
3209 3209 {
3210 3210
3211 3211 uint64_t phys_total;
3212 3212 uint64_t phys_used;
3213 3213 uint64_t phys_zones;
3214 3214 uint64_t phys_zones_overcount;
3215 3215 uint64_t phys_zones_extra;
3216 3216 uint64_t phys_zones_credit;
3217 3217
3218 3218 uint64_t vm_free;
3219 3219 uint64_t vm_used;
3220 3220
3221 3221 uint64_t disk_swap_total;
3222 3222 uint64_t disk_swap_used; /* disk swap with contents */
3223 3223
3224 3224 uint64_t physmem;
3225 3225 uint64_t pp_kernel;
3226 3226 uint64_t arc_size = 0;
3227 3227 struct anoninfo ani;
3228 3228
3229 3229 int num_swap_devices;
3230 3230 struct swaptable *swt;
3231 3231 struct swapent *swent;
3232 3232 size_t swt_size;
3233 3233 char *path;
3234 3234
3235 3235 zsd_vmusage64_t *vmusage;
3236 3236 uint64_t num_vmusage;
3237 3237
3238 3238 int i, ret;
3239 3239
3240 3240 zsd_system_t *sys;
3241 3241 zsd_zone_t *zone;
3242 3242 int vmu_nzones;
3243 3243
3244 3244 kstat_t *kstat;
3245 3245 char kstat_name[KSTAT_STRLEN];
3246 3246 kstat_named_t *knp;
3247 3247 kid_t kid;
3248 3248
3249 3249 if (init)
3250 3250 return;
3251 3251
3252 3252 sys = ctl->zsctl_system;
3253 3253
3254 3254 /* interrogate swap devices to find the amount of disk swap */
3255 3255 disk_swap_again:
3256 3256 num_swap_devices = swapctl(SC_GETNSWP, NULL);
3257 3257
3258 3258 if (num_swap_devices == 0) {
3259 3259 sys->zss_swap_total = disk_swap_total = 0;
3260 3260 sys->zss_swap_used = disk_swap_used = 0;
3261 3261 /* No disk swap */
3262 3262 goto disk_swap_done;
3263 3263 }
3264 3264 /* see if swap table needs to be larger */
3265 3265 if (num_swap_devices > ctl->zsctl_swap_cache_num) {
3266 3266 swt_size = sizeof (int) +
3267 3267 (num_swap_devices * sizeof (struct swapent)) +
3268 3268 (num_swap_devices * MAXPATHLEN);
3269 3269 if (ctl->zsctl_swap_cache != NULL)
3270 3270 free(ctl->zsctl_swap_cache);
3271 3271
3272 3272 swt = (struct swaptable *)malloc(swt_size);
3273 3273 if (swt == NULL) {
3274 3274 /*
3275 3275 * Could not allocate to get list of swap devices.
3276 3276 * Just use data from the most recent read, which will
3277 3277 * be zero if this is the first read.
3278 3278 */
3279 3279 zsd_warn(gettext("Unable to allocate to determine "
3280 3280 "virtual memory"));
3281 3281 disk_swap_total = sys->zss_swap_total;
3282 3282 disk_swap_used = sys->zss_swap_used;
3283 3283 goto disk_swap_done;
3284 3284 }
3285 3285 swent = swt->swt_ent;
3286 3286 path = (char *)swt + (sizeof (int) +
3287 3287 num_swap_devices * sizeof (swapent_t));
3288 3288 for (i = 0; i < num_swap_devices; i++, swent++) {
3289 3289 swent->ste_path = path;
3290 3290 path += MAXPATHLEN;
3291 3291 }
3292 3292 swt->swt_n = num_swap_devices;
3293 3293 ctl->zsctl_swap_cache = swt;
3294 3294 ctl->zsctl_swap_cache_size = swt_size;
3295 3295 ctl->zsctl_swap_cache_num = num_swap_devices;
3296 3296 }
3297 3297 num_swap_devices = swapctl(SC_LIST, ctl->zsctl_swap_cache);
3298 3298 if (num_swap_devices < 0) {
3299 3299 /* More swap devices have arrived */
3300 3300 if (errno == ENOMEM)
3301 3301 goto disk_swap_again;
3302 3302
3303 3303 zsd_warn(gettext("Unable to determine disk swap devices"));
3304 3304 /* Unexpected error. Use existing data */
3305 3305 disk_swap_total = sys->zss_swap_total;
3306 3306 disk_swap_used = sys->zss_swap_used;
3307 3307 goto disk_swap_done;
3308 3308 }
3309 3309
3310 3310 /* add up the disk swap */
3311 3311 disk_swap_total = 0;
3312 3312 disk_swap_used = 0;
3313 3313 swent = ctl->zsctl_swap_cache->swt_ent;
3314 3314 for (i = 0; i < num_swap_devices; i++, swent++) {
3315 3315 disk_swap_total += swent->ste_pages;
3316 3316 disk_swap_used += (swent->ste_pages - swent->ste_free);
3317 3317 }
3318 3318 disk_swap_total *= ctl->zsctl_pagesize;
3319 3319 disk_swap_used *= ctl->zsctl_pagesize;
3320 3320
3321 3321 sys->zss_swap_total = disk_swap_total;
3322 3322 sys->zss_swap_used = disk_swap_used;
3323 3323
3324 3324 disk_swap_done:
3325 3325
3326 3326 /* get system pages kstat */
3327 3327 kid = -1;
3328 3328 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "unix", 0, "system_pages");
3329 3329 if (kstat == NULL)
3330 3330 zsd_warn(gettext("Unable to lookup system pages kstat"));
3331 3331 else
3332 3332 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3333 3333
3334 3334 if (kid == -1) {
3335 3335 zsd_warn(gettext("Unable to read system pages kstat"));
3336 3336 return;
3337 3337 } else {
3338 3338 knp = kstat_data_lookup(kstat, "physmem");
3339 3339 if (knp == NULL) {
3340 3340 zsd_warn(gettext("Unable to read physmem"));
3341 3341 } else {
3342 3342 if (knp->data_type == KSTAT_DATA_UINT64)
3343 3343 physmem = knp->value.ui64;
3344 3344 else if (knp->data_type == KSTAT_DATA_UINT32)
3345 3345 physmem = knp->value.ui32;
3346 3346 else
3347 3347 return;
3348 3348 }
3349 3349 knp = kstat_data_lookup(kstat, "pp_kernel");
3350 3350 if (knp == NULL) {
3351 3351 zsd_warn(gettext("Unable to read pp_kernel"));
3352 3352 } else {
3353 3353 if (knp->data_type == KSTAT_DATA_UINT64)
3354 3354 pp_kernel = knp->value.ui64;
3355 3355 else if (knp->data_type == KSTAT_DATA_UINT32)
3356 3356 pp_kernel = knp->value.ui32;
3357 3357 else
3358 3358 return;
3359 3359 }
3360 3360 }
3361 3361 physmem *= ctl->zsctl_pagesize;
3362 3362 pp_kernel *= ctl->zsctl_pagesize;
3363 3363
3364 3364 /* get the zfs arc size if available */
3365 3365 arc_size = 0;
3366 3366 kid = -1;
3367 3367 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "zfs", 0, "arcstats");
3368 3368 if (kstat != NULL)
3369 3369 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3370 3370 if (kid != -1) {
3371 3371 knp = kstat_data_lookup(kstat, "size");
3372 3372 if (knp != NULL)
3373 3373 if (knp->data_type == KSTAT_DATA_UINT64)
3374 3374 arc_size = knp->value.ui64;
3375 3375 }
3376 3376
3377 3377 /* Try to get swap information */
3378 3378 if (swapctl(SC_AINFO, &ani) < 0) {
3379 3379 zsd_warn(gettext("Unable to get swap info"));
3380 3380 return;
3381 3381 }
3382 3382
3383 3383 vmusage_again:
3384 3384 /* getvmusage to get physical memory usage */
3385 3385 vmusage = ctl->zsctl_vmusage_cache;
3386 3386 num_vmusage = ctl->zsctl_vmusage_cache_num;
3387 3387
3388 3388 ret = zsd_getvmusage(ctl, VMUSAGE_SYSTEM | VMUSAGE_ALL_ZONES, 0,
3389 3389 vmusage, &num_vmusage);
3390 3390
3391 3391 if (ret != 0) {
3392 3392 /* Unexpected error. Use existing data */
3393 3393 if (errno != EOVERFLOW) {
3394 3394 zsd_warn(gettext(
3395 3395 "Unable to read physical memory usage"));
3396 3396 phys_zones = sys->zss_ram_zones;
3397 3397 goto vmusage_done;
3398 3398 }
3399 3399 }
3400 3400 /* vmusage results cache too small */
3401 3401 if (num_vmusage > ctl->zsctl_vmusage_cache_num) {
3402 3402
3403 3403 size_t size = sizeof (zsd_vmusage64_t) * num_vmusage;
3404 3404
3405 3405 if (ctl->zsctl_vmusage_cache != NULL)
3406 3406 free(ctl->zsctl_vmusage_cache);
3407 3407 vmusage = (zsd_vmusage64_t *)malloc(size);
3408 3408 if (vmusage == NULL) {
3409 3409 zsd_warn(gettext("Unable to alloc to determine "
3410 3410 "physical memory usage"));
3411 3411 phys_zones = sys->zss_ram_zones;
3412 3412 goto vmusage_done;
3413 3413 }
3414 3414 ctl->zsctl_vmusage_cache = vmusage;
3415 3415 ctl->zsctl_vmusage_cache_num = num_vmusage;
3416 3416 goto vmusage_again;
3417 3417 }
3418 3418
3419 3419 phys_zones_overcount = 0;
3420 3420 vmu_nzones = 0;
3421 3421 for (i = 0; i < num_vmusage; i++) {
3422 3422 switch (vmusage[i].vmu_type) {
3423 3423 case VMUSAGE_SYSTEM:
3424 3424 /* total pages backing user process mappings */
3425 3425 phys_zones = sys->zss_ram_zones =
3426 3426 vmusage[i].vmu_rss_all;
3427 3427 break;
3428 3428 case VMUSAGE_ZONE:
3429 3429 vmu_nzones++;
3430 3430 phys_zones_overcount += vmusage[i].vmu_rss_all;
3431 3431 zone = zsd_lookup_zone_byid(ctl, vmusage[i].vmu_id);
3432 3432 if (zone != NULL)
3433 3433 zone->zsz_usage_ram = vmusage[i].vmu_rss_all;
3434 3434 break;
3435 3435 default:
3436 3436 break;
3437 3437 }
3438 3438 }
3439 3439 /*
3440 3440 * Figure how much memory was double counted due to text sharing
3441 3441 * between zones. Credit this back so that the sum of the zones
3442 3442 * equals the total zone ram usage;
3443 3443 */
3444 3444 phys_zones_extra = phys_zones_overcount - phys_zones;
3445 3445 phys_zones_credit = phys_zones_extra / vmu_nzones;
3446 3446
3447 3447 vmusage_done:
3448 3448
3449 3449 /* walk the zones to get swap and locked kstats. Fetch ram cap. */
3450 3450 sys->zss_locked_zones = 0;
3451 3451 sys->zss_vm_zones = 0;
3452 3452 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3453 3453 zone = list_next(&ctl->zsctl_zones, zone)) {
3454 3454
3455 3455 /* If zone halted during interval, show memory usage as none */
3456 3456 if (zone->zsz_active == B_FALSE ||
3457 3457 zone->zsz_deleted == B_TRUE) {
3458 3458 zone->zsz_usage_ram = 0;
3459 3459 zone->zsz_usage_vm = 0;
3460 3460 zone->zsz_usage_locked = 0;
3461 3461 continue;
3462 3462 }
3463 3463
3464 3464 if (phys_zones_credit > 0) {
3465 3465 if (zone->zsz_usage_ram > phys_zones_credit) {
3466 3466 zone->zsz_usage_ram -= phys_zones_credit;
3467 3467 }
3468 3468 }
3469 3469 /*
3470 3470 * Get zone's swap usage. Since zone could have halted,
3471 3471 * treats as zero if cannot read
3472 3472 */
3473 3473 zone->zsz_usage_vm = 0;
3474 3474 (void) snprintf(kstat_name, sizeof (kstat_name),
3475 3475 "swapresv_zone_%d", zone->zsz_id);
3476 3476 kid = -1;
3477 3477 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3478 3478 zone->zsz_id, kstat_name);
3479 3479 if (kstat != NULL)
3480 3480 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3481 3481 if (kid != -1) {
3482 3482 knp = kstat_data_lookup(kstat, "usage");
3483 3483 if (knp != NULL &&
3484 3484 knp->data_type == KSTAT_DATA_UINT64) {
3485 3485 zone->zsz_usage_vm = knp->value.ui64;
3486 3486 sys->zss_vm_zones += knp->value.ui64;
3487 3487 }
3488 3488 }
3489 3489 /*
3490 3490 * Get zone's locked usage. Since zone could have halted,
3491 3491 * treats as zero if cannot read
3492 3492 */
3493 3493 zone->zsz_usage_locked = 0;
3494 3494 (void) snprintf(kstat_name, sizeof (kstat_name),
3495 3495 "lockedmem_zone_%d", zone->zsz_id);
3496 3496 kid = -1;
3497 3497 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3498 3498 zone->zsz_id, kstat_name);
3499 3499 if (kstat != NULL)
3500 3500 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3501 3501 if (kid != -1) {
3502 3502 knp = kstat_data_lookup(kstat, "usage");
3503 3503 if (knp != NULL &&
3504 3504 knp->data_type == KSTAT_DATA_UINT64) {
3505 3505 zone->zsz_usage_locked = knp->value.ui64;
3506 3506 /*
3507 3507 * Since locked memory accounting for zones
3508 3508 * can double count ddi locked memory, cap each
3509 3509 * zone's locked usage at its ram usage.
3510 3510 */
3511 3511 if (zone->zsz_usage_locked >
3512 3512 zone->zsz_usage_ram)
3513 3513 zone->zsz_usage_locked =
3514 3514 zone->zsz_usage_ram;
3515 3515 sys->zss_locked_zones +=
3516 3516 zone->zsz_usage_locked;
3517 3517 }
3518 3518 }
3519 3519 }
3520 3520
3521 3521 phys_total =
3522 3522 sysconf(_SC_PHYS_PAGES) * ctl->zsctl_pagesize;
3523 3523
3524 3524 phys_used = (sysconf(_SC_PHYS_PAGES) - sysconf(_SC_AVPHYS_PAGES))
3525 3525 * ctl->zsctl_pagesize;
3526 3526
3527 3527 /* Compute remaining statistics */
3528 3528 sys->zss_ram_total = phys_total;
3529 3529 sys->zss_ram_zones = phys_zones;
3530 3530 sys->zss_ram_kern = phys_used - phys_zones - arc_size;
3531 3531
3532 3532 /*
3533 3533 * The total for kernel locked memory should include
3534 3534 * segkp locked pages, but oh well. The arc size is subtracted,
3535 3535 * as that physical memory is reclaimable.
3536 3536 */
3537 3537 sys->zss_locked_kern = pp_kernel - arc_size;
3538 3538 /* Add memory used by kernel startup and obp to kernel locked */
3539 3539 if ((phys_total - physmem) > 0)
3540 3540 sys->zss_locked_kern += phys_total - physmem;
3541 3541
3542 3542 /*
3543 3543 * Add in the portion of (RAM+DISK) that is not available as swap,
3544 3544 * and consider it swap used by the kernel.
3545 3545 */
3546 3546 sys->zss_vm_total = phys_total + disk_swap_total;
3547 3547 vm_free = (ani.ani_max - ani.ani_resv) * ctl->zsctl_pagesize;
3548 3548 vm_used = sys->zss_vm_total - vm_free;
3549 3549 sys->zss_vm_kern = vm_used - sys->zss_vm_zones - arc_size;
3550 3550 }
3551 3551
3552 3552 /*
3553 3553 * Charge each cpu's usage to its processor sets. Also add the cpu's total
3554 3554 * time to each zone using the processor set. This tracks the maximum
3555 3555 * amount of cpu time that a zone could have used.
3556 3556 */
3557 3557 static void
3558 3558 zsd_refresh_cpu_stats(zsd_ctl_t *ctl, boolean_t init)
3559 3559 {
3560 3560 zsd_system_t *sys;
3561 3561 zsd_zone_t *zone;
3562 3562 zsd_pset_usage_t *usage;
3563 3563 zsd_cpu_t *cpu;
3564 3564 zsd_cpu_t *cpu_next;
3565 3565 zsd_pset_t *pset;
3566 3566 timestruc_t ts;
3567 3567 uint64_t hrtime;
3568 3568 timestruc_t delta;
3569 3569
3570 3570 /* Update the per-cpu kstat data */
3571 3571 cpu_next = list_head(&ctl->zsctl_cpus);
3572 3572 while (cpu_next != NULL) {
3573 3573 cpu = cpu_next;
3574 3574 cpu_next = list_next(&ctl->zsctl_cpus, cpu);
3575 3575 zsd_update_cpu_stats(ctl, cpu);
3576 3576 }
3577 3577 /* Update the elapsed real time */
3578 3578 hrtime = gethrtime();
3579 3579 if (init) {
3580 3580 /* first time around, store hrtime for future comparision */
3581 3581 ctl->zsctl_hrtime = hrtime;
3582 3582 ctl->zsctl_hrtime_prev = hrtime;
3583 3583
3584 3584 } else {
3585 3585 /* Compute increase in hrtime since the most recent read */
3586 3586 ctl->zsctl_hrtime_prev = ctl->zsctl_hrtime;
3587 3587 ctl->zsctl_hrtime = hrtime;
3588 3588 if ((hrtime = hrtime - ctl->zsctl_hrtime_prev) > 0)
3589 3589 TIMESTRUC_ADD_NANOSEC(ctl->zsctl_hrtime_total, hrtime);
3590 3590 }
3591 3591
3592 3592 /* On initialization, all psets have zero time */
3593 3593 if (init)
3594 3594 return;
3595 3595
3596 3596 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
3597 3597 pset = list_next(&ctl->zsctl_psets, pset)) {
3598 3598
3599 3599 if (pset->zsp_active == B_FALSE) {
3600 3600 zsd_warn(gettext("Internal error,inactive pset found"));
3601 3601 continue;
3602 3602 }
3603 3603
3604 3604 /* sum total used time for pset */
3605 3605 ts.tv_sec = 0;
3606 3606 ts.tv_nsec = 0;
3607 3607 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_intr);
3608 3608 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_kern);
3609 3609 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_user);
3610 3610 /* kernel time in pset is total time minus zone time */
3611 3611 TIMESTRUC_DELTA(pset->zsp_usage_kern, ts,
3612 3612 pset->zsp_usage_zones);
3613 3613 if (pset->zsp_usage_kern.tv_sec < 0 ||
3614 3614 pset->zsp_usage_kern.tv_nsec < 0) {
3615 3615 pset->zsp_usage_kern.tv_sec = 0;
3616 3616 pset->zsp_usage_kern.tv_nsec = 0;
3617 3617 }
3618 3618 /* Total pset elapsed time is used time plus idle time */
3619 3619 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_idle);
3620 3620
3621 3621 TIMESTRUC_DELTA(delta, ts, pset->zsp_total_time);
3622 3622
3623 3623 for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
3624 3624 usage = list_next(&pset->zsp_usage_list, usage)) {
3625 3625
3626 3626 zone = usage->zsu_zone;
3627 3627 if (usage->zsu_cpu_shares != ZS_LIMIT_NONE &&
3628 3628 usage->zsu_cpu_shares != ZS_SHARES_UNLIMITED &&
3629 3629 usage->zsu_cpu_shares != 0) {
3630 3630 /*
3631 3631 * Figure out how many nanoseconds of share time
3632 3632 * to give to the zone
3633 3633 */
3634 3634 hrtime = delta.tv_sec;
3635 3635 hrtime *= NANOSEC;
3636 3636 hrtime += delta.tv_nsec;
3637 3637 hrtime *= usage->zsu_cpu_shares;
3638 3638 hrtime /= pset->zsp_cpu_shares;
3639 3639 TIMESTRUC_ADD_NANOSEC(zone->zsz_share_time,
3640 3640 hrtime);
3641 3641 }
3642 3642 /* Add pset time to each zone using pset */
3643 3643 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_pset_time, delta);
3644 3644
3645 3645 zone->zsz_cpus_online += pset->zsp_online;
3646 3646 }
3647 3647 pset->zsp_total_time = ts;
3648 3648 }
3649 3649
3650 3650 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3651 3651 zone = list_next(&ctl->zsctl_zones, zone)) {
3652 3652
3653 3653 /* update cpu cap tracking if the zone has a cpu cap */
3654 3654 if (zone->zsz_cpu_cap != ZS_LIMIT_NONE) {
3655 3655 uint64_t elapsed;
3656 3656
3657 3657 elapsed = ctl->zsctl_hrtime - ctl->zsctl_hrtime_prev;
3658 3658 elapsed *= zone->zsz_cpu_cap;
3659 3659 elapsed = elapsed / 100;
3660 3660 TIMESTRUC_ADD_NANOSEC(zone->zsz_cap_time, elapsed);
3661 3661 }
3662 3662 }
3663 3663 sys = ctl->zsctl_system;
3664 3664 ts.tv_sec = 0;
3665 3665 ts.tv_nsec = 0;
3666 3666 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_intr);
3667 3667 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_kern);
3668 3668 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_user);
3669 3669
3670 3670 /* kernel time in pset is total time minus zone time */
3671 3671 TIMESTRUC_DELTA(sys->zss_cpu_usage_kern, ts,
3672 3672 sys->zss_cpu_usage_zones);
3673 3673 if (sys->zss_cpu_usage_kern.tv_sec < 0 ||
3674 3674 sys->zss_cpu_usage_kern.tv_nsec < 0) {
3675 3675 sys->zss_cpu_usage_kern.tv_sec = 0;
3676 3676 sys->zss_cpu_usage_kern.tv_nsec = 0;
3677 3677 }
3678 3678 /* Total pset elapsed time is used time plus idle time */
3679 3679 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_idle);
3680 3680 sys->zss_cpu_total_time = ts;
3681 3681 }
3682 3682
3683 3683 /*
3684 3684 * Saves current usage data to a cache that is read by libzonestat when
3685 3685 * calling zs_usage_read().
3686 3686 *
3687 3687 * All pointers in the cached data structure are set to NULL. When
3688 3688 * libzonestat reads the cached data, it will set the pointers relative to
3689 3689 * its address space.
3690 3690 */
3691 3691 static void
3692 3692 zsd_usage_cache_update(zsd_ctl_t *ctl)
3693 3693 {
3694 3694 zs_usage_cache_t *cache;
3695 3695 zs_usage_cache_t *old;
3696 3696 zs_usage_t *usage;
3697 3697
3698 3698 zs_system_t *sys;
3699 3699 zsd_system_t *dsys;
3700 3700 zs_zone_t *zone = NULL;
3701 3701 zsd_zone_t *dzone;
3702 3702 zs_pset_t *pset = NULL;
3703 3703 zsd_pset_t *dpset;
3704 3704 zs_pset_zone_t *pusage;
3705 3705 zsd_pset_usage_t *dpusage;
3706 3706
3707 3707 char *next;
3708 3708 uint_t size, i, j;
3709 3709
3710 3710 size =
3711 3711 sizeof (zs_usage_cache_t) +
3712 3712 sizeof (zs_usage_t) +
3713 3713 sizeof (zs_system_t) +
3714 3714 sizeof (zs_zone_t) * ctl->zsctl_nzones +
3715 3715 sizeof (zs_pset_t) * ctl->zsctl_npsets +
3716 3716 sizeof (zs_pset_zone_t) * ctl->zsctl_npset_usages;
3717 3717
3718 3718 cache = (zs_usage_cache_t *)malloc(size);
3719 3719 if (cache == NULL) {
3720 3720 zsd_warn(gettext("Unable to allocate usage cache\n"));
3721 3721 return;
3722 3722 }
3723 3723
3724 3724 next = (char *)cache;
3725 3725 cache->zsuc_size = size - sizeof (zs_usage_cache_t);
3726 3726 next += sizeof (zs_usage_cache_t);
3727 3727
3728 3728 /* LINTED */
3729 3729 usage = cache->zsuc_usage = (zs_usage_t *)next;
3730 3730 next += sizeof (zs_usage_t);
3731 3731 usage->zsu_start = g_start;
3732 3732 usage->zsu_hrstart = g_hrstart;
3733 3733 usage->zsu_time = g_now;
3734 3734 usage->zsu_hrtime = g_hrnow;
3735 3735 usage->zsu_nzones = ctl->zsctl_nzones;
3736 3736 usage->zsu_npsets = ctl->zsctl_npsets;
3737 3737 usage->zsu_system = NULL;
3738 3738
3739 3739 /* LINTED */
3740 3740 sys = (zs_system_t *)next;
3741 3741 next += sizeof (zs_system_t);
3742 3742 dsys = ctl->zsctl_system;
3743 3743 sys->zss_ram_total = dsys->zss_ram_total;
3744 3744 sys->zss_ram_kern = dsys->zss_ram_kern;
3745 3745 sys->zss_ram_zones = dsys->zss_ram_zones;
3746 3746 sys->zss_locked_kern = dsys->zss_locked_kern;
3747 3747 sys->zss_locked_zones = dsys->zss_locked_zones;
3748 3748 sys->zss_vm_total = dsys->zss_vm_total;
3749 3749 sys->zss_vm_kern = dsys->zss_vm_kern;
3750 3750 sys->zss_vm_zones = dsys->zss_vm_zones;
3751 3751 sys->zss_swap_total = dsys->zss_swap_total;
3752 3752 sys->zss_swap_used = dsys->zss_swap_used;
3753 3753 sys->zss_ncpus = dsys->zss_ncpus;
3754 3754 sys->zss_ncpus_online = dsys->zss_ncpus_online;
3755 3755
3756 3756 sys->zss_processes_max = dsys->zss_maxpid;
3757 3757 sys->zss_lwps_max = dsys->zss_lwps_max;
3758 3758 sys->zss_shm_max = dsys->zss_shm_max;
3759 3759 sys->zss_shmids_max = dsys->zss_shmids_max;
3760 3760 sys->zss_semids_max = dsys->zss_semids_max;
3761 3761 sys->zss_msgids_max = dsys->zss_msgids_max;
3762 3762 sys->zss_lofi_max = dsys->zss_lofi_max;
3763 3763
3764 3764 sys->zss_processes = dsys->zss_processes;
3765 3765 sys->zss_lwps = dsys->zss_lwps;
3766 3766 sys->zss_shm = dsys->zss_shm;
3767 3767 sys->zss_shmids = dsys->zss_shmids;
3768 3768 sys->zss_semids = dsys->zss_semids;
3769 3769 sys->zss_msgids = dsys->zss_msgids;
3770 3770 sys->zss_lofi = dsys->zss_lofi;
3771 3771
3772 3772 sys->zss_cpu_total_time = dsys->zss_cpu_total_time;
3773 3773 sys->zss_cpu_usage_zones = dsys->zss_cpu_usage_zones;
3774 3774 sys->zss_cpu_usage_kern = dsys->zss_cpu_usage_kern;
3775 3775
3776 3776 for (i = 0, dzone = list_head(&ctl->zsctl_zones);
3777 3777 i < ctl->zsctl_nzones;
3778 3778 i++, dzone = list_next(&ctl->zsctl_zones, dzone)) {
3779 3779 /* LINTED */
3780 3780 zone = (zs_zone_t *)next;
3781 3781 next += sizeof (zs_zone_t);
3782 3782 list_link_init(&zone->zsz_next);
3783 3783 zone->zsz_system = NULL;
3784 3784
3785 3785 (void) strlcpy(zone->zsz_name, dzone->zsz_name,
3786 3786 sizeof (zone->zsz_name));
3787 3787 (void) strlcpy(zone->zsz_pool, dzone->zsz_pool,
3788 3788 sizeof (zone->zsz_pool));
3789 3789 (void) strlcpy(zone->zsz_pset, dzone->zsz_pset,
3790 3790 sizeof (zone->zsz_pset));
3791 3791 zone->zsz_id = dzone->zsz_id;
3792 3792 zone->zsz_cputype = dzone->zsz_cputype;
3793 3793 zone->zsz_iptype = dzone->zsz_iptype;
3794 3794 zone->zsz_start = dzone->zsz_start;
3795 3795 zone->zsz_hrstart = dzone->zsz_hrstart;
3796 3796 zone->zsz_scheds = dzone->zsz_scheds;
3797 3797 zone->zsz_cpu_shares = dzone->zsz_cpu_shares;
3798 3798 zone->zsz_cpu_cap = dzone->zsz_cpu_cap;
3799 3799 zone->zsz_ram_cap = dzone->zsz_ram_cap;
3800 3800 zone->zsz_vm_cap = dzone->zsz_vm_cap;
3801 3801 zone->zsz_locked_cap = dzone->zsz_locked_cap;
3802 3802 zone->zsz_cpu_usage = dzone->zsz_cpu_usage;
3803 3803 zone->zsz_cpus_online = dzone->zsz_cpus_online;
3804 3804 zone->zsz_pset_time = dzone->zsz_pset_time;
3805 3805 zone->zsz_cap_time = dzone->zsz_cap_time;
3806 3806 zone->zsz_share_time = dzone->zsz_share_time;
3807 3807 zone->zsz_usage_ram = dzone->zsz_usage_ram;
3808 3808 zone->zsz_usage_locked = dzone->zsz_usage_locked;
3809 3809 zone->zsz_usage_vm = dzone->zsz_usage_vm;
3810 3810
3811 3811 zone->zsz_processes_cap = dzone->zsz_processes_cap;
3812 3812 zone->zsz_lwps_cap = dzone->zsz_lwps_cap;
3813 3813 zone->zsz_shm_cap = dzone->zsz_shm_cap;
3814 3814 zone->zsz_shmids_cap = dzone->zsz_shmids_cap;
3815 3815 zone->zsz_semids_cap = dzone->zsz_semids_cap;
3816 3816 zone->zsz_msgids_cap = dzone->zsz_msgids_cap;
3817 3817 zone->zsz_lofi_cap = dzone->zsz_lofi_cap;
3818 3818
3819 3819 zone->zsz_processes = dzone->zsz_processes;
3820 3820 zone->zsz_lwps = dzone->zsz_lwps;
3821 3821 zone->zsz_shm = dzone->zsz_shm;
3822 3822 zone->zsz_shmids = dzone->zsz_shmids;
3823 3823 zone->zsz_semids = dzone->zsz_semids;
3824 3824 zone->zsz_msgids = dzone->zsz_msgids;
3825 3825 zone->zsz_lofi = dzone->zsz_lofi;
3826 3826 }
3827 3827
3828 3828 for (i = 0, dpset = list_head(&ctl->zsctl_psets);
3829 3829 i < ctl->zsctl_npsets;
3830 3830 i++, dpset = list_next(&ctl->zsctl_psets, dpset)) {
3831 3831 /* LINTED */
3832 3832 pset = (zs_pset_t *)next;
3833 3833 next += sizeof (zs_pset_t);
3834 3834 list_link_init(&pset->zsp_next);
3835 3835 (void) strlcpy(pset->zsp_name, dpset->zsp_name,
3836 3836 sizeof (pset->zsp_name));
3837 3837 pset->zsp_id = dpset->zsp_id;
3838 3838 pset->zsp_cputype = dpset->zsp_cputype;
3839 3839 pset->zsp_start = dpset->zsp_start;
3840 3840 pset->zsp_hrstart = dpset->zsp_hrstart;
3841 3841 pset->zsp_online = dpset->zsp_online;
3842 3842 pset->zsp_size = dpset->zsp_size;
3843 3843 pset->zsp_min = dpset->zsp_min;
3844 3844 pset->zsp_max = dpset->zsp_max;
3845 3845 pset->zsp_importance = dpset->zsp_importance;
3846 3846 pset->zsp_scheds = dpset->zsp_scheds;
3847 3847 pset->zsp_cpu_shares = dpset->zsp_cpu_shares;
3848 3848 pset->zsp_total_time = dpset->zsp_total_time;
3849 3849 pset->zsp_usage_kern = dpset->zsp_usage_kern;
3850 3850 pset->zsp_usage_zones = dpset->zsp_usage_zones;
3851 3851 pset->zsp_nusage = dpset->zsp_nusage;
3852 3852 /* Add pset usages for pset */
3853 3853 for (j = 0, dpusage = list_head(&dpset->zsp_usage_list);
3854 3854 j < dpset->zsp_nusage;
3855 3855 j++, dpusage = list_next(&dpset->zsp_usage_list, dpusage)) {
3856 3856 /* LINTED */
3857 3857 pusage = (zs_pset_zone_t *)next;
3858 3858 next += sizeof (zs_pset_zone_t);
3859 3859 /* pointers are computed by client */
3860 3860 pusage->zspz_pset = NULL;
3861 3861 pusage->zspz_zone = NULL;
3862 3862 list_link_init(&pusage->zspz_next);
3863 3863 pusage->zspz_zoneid = dpusage->zsu_zone->zsz_id;
3864 3864 pusage->zspz_start = dpusage->zsu_start;
3865 3865 pusage->zspz_hrstart = dpusage->zsu_hrstart;
3866 3866 pusage->zspz_hrstart = dpusage->zsu_hrstart;
3867 3867 pusage->zspz_cpu_shares = dpusage->zsu_cpu_shares;
3868 3868 pusage->zspz_scheds = dpusage->zsu_scheds;
3869 3869 pusage->zspz_cpu_usage = dpusage->zsu_cpu_usage;
3870 3870 }
3871 3871 }
3872 3872
3873 3873 /* Update the current cache pointer */
3874 3874 (void) mutex_lock(&g_usage_cache_lock);
3875 3875 old = g_usage_cache;
3876 3876 cache->zsuc_ref = 1;
3877 3877 cache->zsuc_gen = g_gen_next;
3878 3878 usage->zsu_gen = g_gen_next;
3879 3879 usage->zsu_size = size;
3880 3880 g_usage_cache = cache;
3881 3881 if (old != NULL) {
3882 3882 old->zsuc_ref--;
3883 3883 if (old->zsuc_ref == 0)
3884 3884 free(old);
3885 3885 }
3886 3886 g_gen_next++;
3887 3887 /* Wake up any clients that are waiting for this calculation */
3888 3888 if (g_usage_cache_kickers > 0) {
3889 3889 (void) cond_broadcast(&g_usage_cache_wait);
3890 3890 }
3891 3891 (void) mutex_unlock(&g_usage_cache_lock);
3892 3892 }
3893 3893
3894 3894 static zs_usage_cache_t *
3895 3895 zsd_usage_cache_hold_locked()
3896 3896 {
3897 3897 zs_usage_cache_t *ret;
3898 3898
3899 3899 ret = g_usage_cache;
3900 3900 ret->zsuc_ref++;
3901 3901 return (ret);
3902 3902 }
3903 3903
3904 3904 void
3905 3905 zsd_usage_cache_rele(zs_usage_cache_t *cache)
3906 3906 {
3907 3907 (void) mutex_lock(&g_usage_cache_lock);
3908 3908 cache->zsuc_ref--;
3909 3909 if (cache->zsuc_ref == 0)
3910 3910 free(cache);
3911 3911 (void) mutex_unlock(&g_usage_cache_lock);
3912 3912 }
3913 3913
3914 3914 /* Close the handles held by zsd_open() */
3915 3915 void
3916 3916 zsd_close(zsd_ctl_t *ctl)
3917 3917 {
3918 3918 zsd_zone_t *zone;
3919 3919 zsd_pset_t *pset;
3920 3920 zsd_pset_usage_t *usage;
3921 3921 zsd_cpu_t *cpu;
3922 3922 int id;
3923 3923
3924 3924 if (ctl->zsctl_kstat_ctl) {
3925 3925 (void) kstat_close(ctl->zsctl_kstat_ctl);
3926 3926 ctl->zsctl_kstat_ctl = NULL;
3927 3927 }
3928 3928 if (ctl->zsctl_proc_open) {
3929 3929 (void) ea_close(&ctl->zsctl_proc_eaf);
3930 3930 ctl->zsctl_proc_open = 0;
3931 3931 ctl->zsctl_proc_fd = -1;
3932 3932 }
3933 3933 if (ctl->zsctl_pool_conf) {
3934 3934 if (ctl->zsctl_pool_status == POOL_ENABLED)
3935 3935 (void) pool_conf_close(ctl->zsctl_pool_conf);
3936 3936 ctl->zsctl_pool_status = POOL_DISABLED;
3937 3937 }
3938 3938
3939 3939 while ((zone = list_head(&ctl->zsctl_zones)) != NULL) {
3940 3940 list_remove(&ctl->zsctl_zones, zone);
3941 3941 free(zone);
3942 3942 ctl->zsctl_nzones--;
3943 3943 }
3944 3944
3945 3945 while ((pset = list_head(&ctl->zsctl_psets)) != NULL) {
3946 3946 while ((usage = list_head(&pset->zsp_usage_list))
3947 3947 != NULL) {
3948 3948 list_remove(&pset->zsp_usage_list, usage);
3949 3949 ctl->zsctl_npset_usages--;
3950 3950 free(usage);
3951 3951 }
3952 3952 list_remove(&ctl->zsctl_psets, pset);
3953 3953 free(pset);
3954 3954 ctl->zsctl_npsets--;
3955 3955 }
3956 3956
3957 3957 /* Release all cpus being tracked */
3958 3958 while (cpu = list_head(&ctl->zsctl_cpus)) {
3959 3959 list_remove(&ctl->zsctl_cpus, cpu);
3960 3960 id = cpu->zsc_id;
3961 3961 bzero(cpu, sizeof (zsd_cpu_t));
3962 3962 cpu->zsc_id = id;
3963 3963 cpu->zsc_allocated = B_FALSE;
3964 3964 cpu->zsc_psetid = ZS_PSET_ERROR;
3965 3965 cpu->zsc_psetid_prev = ZS_PSET_ERROR;
3966 3966 }
3967 3967
3968 3968 assert(ctl->zsctl_npset_usages == 0);
3969 3969 assert(ctl->zsctl_npsets == 0);
3970 3970 assert(ctl->zsctl_nzones == 0);
3971 3971 (void) zsd_disable_cpu_stats();
3972 3972 }
3973 3973
3974 3974
3975 3975 /*
3976 3976 * Update the utilization data for all zones and processor sets.
3977 3977 */
3978 3978 static int
3979 3979 zsd_read(zsd_ctl_t *ctl, boolean_t init, boolean_t do_memory)
3980 3980 {
3981 3981 (void) kstat_chain_update(ctl->zsctl_kstat_ctl);
3982 3982 (void) gettimeofday(&(ctl->zsctl_timeofday), NULL);
3983 3983
3984 3984 zsd_refresh_system(ctl);
3985 3985
3986 3986 /*
3987 3987 * Memory calculation is expensive. Only update it on sample
3988 3988 * intervals.
3989 3989 */
3990 3990 if (do_memory == B_TRUE)
3991 3991 zsd_refresh_memory(ctl, init);
3992 3992 zsd_refresh_zones(ctl);
3993 3993 zsd_refresh_psets(ctl);
3994 3994 zsd_refresh_procs(ctl, init);
3995 3995 zsd_refresh_cpu_stats(ctl, init);
3996 3996
3997 3997 /*
3998 3998 * Delete objects that no longer exist.
3999 3999 * Pset usages must be deleted first as they point to zone and
4000 4000 * pset objects.
4001 4001 */
4002 4002 zsd_mark_pset_usages_end(ctl);
4003 4003 zsd_mark_psets_end(ctl);
4004 4004 zsd_mark_cpus_end(ctl);
4005 4005 zsd_mark_zones_end(ctl);
4006 4006
4007 4007 /*
4008 4008 * Save results for clients.
4009 4009 */
4010 4010 zsd_usage_cache_update(ctl);
4011 4011
4012 4012 /*
4013 4013 * Roll process accounting file.
4014 4014 */
4015 4015 (void) zsd_roll_exacct();
4016 4016 return (0);
4017 4017 }
4018 4018
4019 4019 /*
4020 4020 * Get the system rctl, which is the upper most limit
4021 4021 */
4022 4022 static uint64_t
4023 4023 zsd_get_system_rctl(char *name)
4024 4024 {
4025 4025 rctlblk_t *rblk, *rblk_last;
4026 4026
4027 4027 rblk = (rctlblk_t *)alloca(rctlblk_size());
4028 4028 rblk_last = (rctlblk_t *)alloca(rctlblk_size());
4029 4029
4030 4030 if (getrctl(name, NULL, rblk_last, RCTL_FIRST) != 0)
4031 4031 return (ZS_LIMIT_NONE);
4032 4032
4033 4033 while (getrctl(name, rblk_last, rblk, RCTL_NEXT) == 0)
4034 4034 (void) bcopy(rblk, rblk_last, rctlblk_size());
4035 4035
4036 4036 return (rctlblk_get_value(rblk_last));
4037 4037 }
4038 4038
4039 4039 /*
4040 4040 * Open any necessary subsystems for collecting utilization data,
4041 4041 * allocate and initialize data structures, and get initial utilization.
4042 4042 *
4043 4043 * Errors:
4044 4044 * ENOMEM out of memory
4045 4045 * EINVAL other error
4046 4046 */
4047 4047 static zsd_ctl_t *
4048 4048 zsd_open(zsd_ctl_t *ctl)
4049 4049 {
4050 4050 zsd_system_t *system;
4051 4051
4052 4052 char path[MAXPATHLEN];
4053 4053 long pathmax;
4054 4054 struct statvfs svfs;
4055 4055 int ret;
4056 4056 int i;
4057 4057 size_t size;
4058 4058 int err;
4059 4059
4060 4060 if (ctl == NULL && (ctl = (zsd_ctl_t *)calloc(1,
4061 4061 sizeof (zsd_ctl_t))) == NULL) {
4062 4062 zsd_warn(gettext("Out of Memory"));
4063 4063 errno = ENOMEM;
4064 4064 goto err;
4065 4065 }
4066 4066 ctl->zsctl_proc_fd = -1;
4067 4067
4068 4068 /* open kstats */
4069 4069 if (ctl->zsctl_kstat_ctl == NULL &&
4070 4070 (ctl->zsctl_kstat_ctl = kstat_open()) == NULL) {
4071 4071 err = errno;
4072 4072 zsd_warn(gettext("Unable to open kstats"));
4073 4073 errno = err;
4074 4074 if (errno != ENOMEM)
4075 4075 errno = EAGAIN;
4076 4076 goto err;
4077 4077 }
4078 4078
4079 4079 /*
4080 4080 * These are set when the accounting file is opened by
4081 4081 * zsd_update_procs()
4082 4082 */
4083 4083 ctl->zsctl_proc_fd = -1;
4084 4084 ctl->zsctl_proc_fd_next = -1;
4085 4085 ctl->zsctl_proc_open = 0;
4086 4086 ctl->zsctl_proc_open_next = 0;
4087 4087
4088 4088 check_exacct:
4089 4089 (void) zsd_enable_cpu_stats();
4090 4090
4091 4091 /* Create structures to track usage */
4092 4092 if (ctl->zsctl_system == NULL && (ctl->zsctl_system = (zsd_system_t *)
4093 4093 calloc(1, sizeof (zsd_system_t))) == NULL) {
4094 4094 ret = -1;
4095 4095 zsd_warn(gettext("Out of Memory"));
4096 4096 errno = ENOMEM;
4097 4097 goto err;
4098 4098 }
4099 4099 system = ctl->zsctl_system;
4100 4100 /* get the kernel bitness to know structure layout for getvmusage */
4101 4101 ret = sysinfo(SI_ARCHITECTURE_64, path, sizeof (path));
4102 4102 if (ret < 0)
4103 4103 ctl->zsctl_kern_bits = 32;
4104 4104 else
4105 4105 ctl->zsctl_kern_bits = 64;
4106 4106 ctl->zsctl_pagesize = sysconf(_SC_PAGESIZE);
4107 4107
4108 4108 size = sysconf(_SC_CPUID_MAX);
4109 4109 ctl->zsctl_maxcpuid = size;
4110 4110 if (ctl->zsctl_cpu_array == NULL && (ctl->zsctl_cpu_array =
4111 4111 (zsd_cpu_t *)calloc(size + 1, sizeof (zsd_cpu_t))) == NULL) {
4112 4112 zsd_warn(gettext("Out of Memory"));
4113 4113 errno = ENOMEM;
4114 4114 goto err;
4115 4115 }
4116 4116 for (i = 0; i <= ctl->zsctl_maxcpuid; i++) {
4117 4117 ctl->zsctl_cpu_array[i].zsc_id = i;
4118 4118 ctl->zsctl_cpu_array[i].zsc_allocated = B_FALSE;
4119 4119 ctl->zsctl_cpu_array[i].zsc_psetid = ZS_PSET_ERROR;
4120 4120 ctl->zsctl_cpu_array[i].zsc_psetid_prev = ZS_PSET_ERROR;
4121 4121 }
4122 4122 if (statvfs("/proc", &svfs) != 0 ||
4123 4123 strcmp("/proc", svfs.f_fstr) != 0) {
4124 4124 zsd_warn(gettext("/proc not a procfs filesystem"));
4125 4125 errno = EINVAL;
4126 4126 goto err;
4127 4127 }
4128 4128
4129 4129 size = sysconf(_SC_MAXPID) + 1;
4130 4130 ctl->zsctl_maxproc = size;
4131 4131 if (ctl->zsctl_proc_array == NULL &&
4132 4132 (ctl->zsctl_proc_array = (zsd_proc_t *)calloc(size,
4133 4133 sizeof (zsd_proc_t))) == NULL) {
4134 4134 zsd_warn(gettext("Out of Memory"));
4135 4135 errno = ENOMEM;
4136 4136 goto err;
4137 4137 }
4138 4138 for (i = 0; i <= ctl->zsctl_maxproc; i++) {
4139 4139 list_link_init(&(ctl->zsctl_proc_array[i].zspr_next));
4140 4140 ctl->zsctl_proc_array[i].zspr_psetid = ZS_PSET_ERROR;
4141 4141 ctl->zsctl_proc_array[i].zspr_zoneid = -1;
4142 4142 ctl->zsctl_proc_array[i].zspr_usage.tv_sec = 0;
4143 4143 ctl->zsctl_proc_array[i].zspr_usage.tv_nsec = 0;
4144 4144 ctl->zsctl_proc_array[i].zspr_ppid = -1;
4145 4145 }
4146 4146
4147 4147 list_create(&ctl->zsctl_zones, sizeof (zsd_zone_t),
4148 4148 offsetof(zsd_zone_t, zsz_next));
4149 4149
4150 4150 list_create(&ctl->zsctl_psets, sizeof (zsd_pset_t),
4151 4151 offsetof(zsd_pset_t, zsp_next));
4152 4152
4153 4153 list_create(&ctl->zsctl_cpus, sizeof (zsd_cpu_t),
4154 4154 offsetof(zsd_cpu_t, zsc_next));
4155 4155
4156 4156 pathmax = pathconf("/proc", _PC_NAME_MAX);
4157 4157 if (pathmax < 0) {
4158 4158 zsd_warn(gettext("Unable to determine max path of /proc"));
4159 4159 errno = EINVAL;
4160 4160 goto err;
4161 4161 }
4162 4162 size = sizeof (struct dirent) + pathmax + 1;
4163 4163
4164 4164 ctl->zsctl_procfs_dent_size = size;
4165 4165 if (ctl->zsctl_procfs_dent == NULL &&
4166 4166 (ctl->zsctl_procfs_dent = (struct dirent *)calloc(1, size))
4167 4167 == NULL) {
4168 4168 zsd_warn(gettext("Out of Memory"));
4169 4169 errno = ENOMEM;
4170 4170 goto err;
4171 4171 }
4172 4172
4173 4173 if (ctl->zsctl_pool_conf == NULL &&
4174 4174 (ctl->zsctl_pool_conf = pool_conf_alloc()) == NULL) {
4175 4175 zsd_warn(gettext("Out of Memory"));
4176 4176 errno = ENOMEM;
4177 4177 goto err;
4178 4178 }
4179 4179 ctl->zsctl_pool_status = POOL_DISABLED;
4180 4180 ctl->zsctl_pool_changed = 0;
4181 4181
4182 4182 if (ctl->zsctl_pool_vals[0] == NULL &&
4183 4183 (ctl->zsctl_pool_vals[0] = pool_value_alloc()) == NULL) {
4184 4184 zsd_warn(gettext("Out of Memory"));
4185 4185 errno = ENOMEM;
4186 4186 goto err;
4187 4187 }
4188 4188 if (ctl->zsctl_pool_vals[1] == NULL &&
4189 4189 (ctl->zsctl_pool_vals[1] = pool_value_alloc()) == NULL) {
4190 4190 zsd_warn(gettext("Out of Memory"));
4191 4191 errno = ENOMEM;
4192 4192 goto err;
4193 4193 }
4194 4194 ctl->zsctl_pool_vals[2] = NULL;
4195 4195
4196 4196 /*
4197 4197 * get system limits
4198 4198 */
4199 4199 system->zss_maxpid = size = sysconf(_SC_MAXPID);
4200 4200 system->zss_processes_max = zsd_get_system_rctl("zone.max-processes");
4201 4201 system->zss_lwps_max = zsd_get_system_rctl("zone.max-lwps");
4202 4202 system->zss_shm_max = zsd_get_system_rctl("zone.max-shm-memory");
4203 4203 system->zss_shmids_max = zsd_get_system_rctl("zone.max-shm-ids");
4204 4204 system->zss_semids_max = zsd_get_system_rctl("zone.max-sem-ids");
4205 4205 system->zss_msgids_max = zsd_get_system_rctl("zone.max-msg-ids");
4206 4206 system->zss_lofi_max = zsd_get_system_rctl("zone.max-lofi");
4207 4207
4208 4208 g_gen_next = 1;
4209 4209
4210 4210 if (zsd_read(ctl, B_TRUE, B_FALSE) != 0)
4211 4211 zsd_warn(gettext("Reading zone statistics failed"));
4212 4212
4213 4213 return (ctl);
4214 4214 err:
4215 4215 if (ctl)
4216 4216 zsd_close(ctl);
4217 4217
4218 4218 return (NULL);
4219 4219 }
4220 4220
4221 4221 /* Copy utilization data to buffer, filtering data if non-global zone. */
4222 4222 static void
4223 4223 zsd_usage_filter(zoneid_t zid, zs_usage_cache_t *cache, zs_usage_t *usage,
4224 4224 boolean_t is_gz)
4225 4225 {
4226 4226 zs_usage_t *cusage;
4227 4227 zs_system_t *sys, *csys;
4228 4228 zs_zone_t *zone, *czone;
4229 4229 zs_pset_t *pset, *cpset;
4230 4230 zs_pset_zone_t *pz, *cpz, *foundpz;
4231 4231 size_t size = 0, csize = 0;
4232 4232 char *start, *cstart;
4233 4233 int i, j;
4234 4234 timestruc_t delta;
4235 4235
4236 4236 /* Privileged users in the global zone get everything */
4237 4237 if (is_gz) {
4238 4238 cusage = cache->zsuc_usage;
4239 4239 (void) bcopy(cusage, usage, cusage->zsu_size);
4240 4240 return;
4241 4241 }
4242 4242
4243 4243 /* Zones just get their own usage */
4244 4244 cusage = cache->zsuc_usage;
4245 4245
4246 4246 start = (char *)usage;
4247 4247 cstart = (char *)cusage;
4248 4248 size += sizeof (zs_usage_t);
4249 4249 csize += sizeof (zs_usage_t);
4250 4250
4251 4251 usage->zsu_start = cusage->zsu_start;
4252 4252 usage->zsu_hrstart = cusage->zsu_hrstart;
4253 4253 usage->zsu_time = cusage->zsu_time;
4254 4254 usage->zsu_hrtime = cusage->zsu_hrtime;
4255 4255 usage->zsu_gen = cusage->zsu_gen;
4256 4256 usage->zsu_nzones = 1;
4257 4257 usage->zsu_npsets = 0;
4258 4258
4259 4259 /* LINTED */
4260 4260 sys = (zs_system_t *)(start + size);
4261 4261 /* LINTED */
4262 4262 csys = (zs_system_t *)(cstart + csize);
4263 4263 size += sizeof (zs_system_t);
4264 4264 csize += sizeof (zs_system_t);
4265 4265
4266 4266 /* Save system limits but not usage */
4267 4267 *sys = *csys;
4268 4268 sys->zss_ncpus = 0;
4269 4269 sys->zss_ncpus_online = 0;
4270 4270
4271 4271 /* LINTED */
4272 4272 zone = (zs_zone_t *)(start + size);
4273 4273 /* LINTED */
4274 4274 czone = (zs_zone_t *)(cstart + csize);
4275 4275 /* Find the matching zone */
4276 4276 for (i = 0; i < cusage->zsu_nzones; i++) {
4277 4277 if (czone->zsz_id == zid) {
4278 4278 *zone = *czone;
4279 4279 size += sizeof (zs_zone_t);
4280 4280 }
4281 4281 csize += sizeof (zs_zone_t);
4282 4282 /* LINTED */
4283 4283 czone = (zs_zone_t *)(cstart + csize);
4284 4284 }
4285 4285 sys->zss_ram_kern += (sys->zss_ram_zones - zone->zsz_usage_ram);
4286 4286 sys->zss_ram_zones = zone->zsz_usage_ram;
4287 4287
4288 4288 sys->zss_vm_kern += (sys->zss_vm_zones - zone->zsz_usage_vm);
4289 4289 sys->zss_vm_zones = zone->zsz_usage_vm;
4290 4290
4291 4291 sys->zss_locked_kern += (sys->zss_locked_zones -
4292 4292 zone->zsz_usage_locked);
4293 4293 sys->zss_locked_zones = zone->zsz_usage_locked;
4294 4294
4295 4295 TIMESTRUC_DELTA(delta, sys->zss_cpu_usage_zones, zone->zsz_cpu_usage);
4296 4296 TIMESTRUC_ADD_TIMESTRUC(sys->zss_cpu_usage_kern, delta);
4297 4297 sys->zss_cpu_usage_zones = zone->zsz_cpu_usage;
4298 4298
4299 4299 /* LINTED */
4300 4300 pset = (zs_pset_t *)(start + size);
4301 4301 /* LINTED */
4302 4302 cpset = (zs_pset_t *)(cstart + csize);
4303 4303 for (i = 0; i < cusage->zsu_npsets; i++) {
4304 4304 csize += sizeof (zs_pset_t);
4305 4305 /* LINTED */
4306 4306 cpz = (zs_pset_zone_t *)(csize + cstart);
4307 4307 foundpz = NULL;
4308 4308 for (j = 0; j < cpset->zsp_nusage; j++) {
4309 4309 if (cpz->zspz_zoneid == zid)
4310 4310 foundpz = cpz;
4311 4311
4312 4312 csize += sizeof (zs_pset_zone_t);
4313 4313 /* LINTED */
4314 4314 cpz = (zs_pset_zone_t *)(csize + cstart);
4315 4315 }
4316 4316 if (foundpz != NULL) {
4317 4317 size += sizeof (zs_pset_t);
4318 4318 /* LINTED */
4319 4319 pz = (zs_pset_zone_t *)(start + size);
4320 4320 size += sizeof (zs_pset_zone_t);
4321 4321
4322 4322 *pset = *cpset;
4323 4323 *pz = *foundpz;
4324 4324
4325 4325 TIMESTRUC_DELTA(delta, pset->zsp_usage_zones,
4326 4326 pz->zspz_cpu_usage);
4327 4327 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_kern, delta);
4328 4328 pset->zsp_usage_zones = pz->zspz_cpu_usage;
4329 4329 pset->zsp_nusage = 1;
4330 4330 usage->zsu_npsets++;
4331 4331 sys->zss_ncpus += pset->zsp_size;
4332 4332 sys->zss_ncpus_online += pset->zsp_online;
4333 4333 }
4334 4334 /* LINTED */
4335 4335 cpset = (zs_pset_t *)(cstart + csize);
4336 4336 }
4337 4337 usage->zsu_size = size;
4338 4338 }
4339 4339
4340 4340 /*
4341 4341 * Respond to new connections from libzonestat.so. Also respond to zoneadmd,
4342 4342 * which reports new zones.
4343 4343 */
4344 4344 /* ARGSUSED */
4345 4345 static void
4346 4346 zsd_server(void *cookie, char *argp, size_t arg_size,
4347 4347 door_desc_t *dp, uint_t n_desc)
4348 4348 {
4349 4349 int *args, cmd;
4350 4350 door_desc_t door;
4351 4351 ucred_t *ucred;
4352 4352 const priv_set_t *eset;
4353 4353
4354 4354 if (argp == DOOR_UNREF_DATA) {
4355 4355 (void) door_return(NULL, 0, NULL, 0);
4356 4356 thr_exit(NULL);
4357 4357 }
4358 4358
4359 4359 if (arg_size != sizeof (cmd) * 2) {
4360 4360 (void) door_return(NULL, 0, NULL, 0);
4361 4361 thr_exit(NULL);
4362 4362 }
4363 4363
4364 4364 /* LINTED */
4365 4365 args = (int *)argp;
4366 4366 cmd = args[0];
4367 4367
4368 4368 /* If connection, return door to stat server */
4369 4369 if (cmd == ZSD_CMD_CONNECT) {
4370 4370
4371 4371 /* Verify client compilation version */
4372 4372 if (args[1] != ZS_VERSION) {
4373 4373 args[1] = ZSD_STATUS_VERSION_MISMATCH;
4374 4374 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4375 4375 thr_exit(NULL);
4376 4376 }
4377 4377 ucred = alloca(ucred_size());
4378 4378 /* Verify client permission */
4379 4379 if (door_ucred(&ucred) != 0) {
4380 4380 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4381 4381 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4382 4382 thr_exit(NULL);
4383 4383 }
4384 4384
4385 4385 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4386 4386 if (eset == NULL) {
4387 4387 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4388 4388 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4389 4389 thr_exit(NULL);
4390 4390 }
4391 4391 if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4392 4392 args[1] = ZSD_STATUS_PERMISSION;
4393 4393 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4394 4394 thr_exit(NULL);
4395 4395 }
4396 4396
4397 4397 /* Return stat server door */
4398 4398 args[1] = ZSD_STATUS_OK;
4399 4399 door.d_attributes = DOOR_DESCRIPTOR;
4400 4400 door.d_data.d_desc.d_descriptor = g_stat_door;
4401 4401 (void) door_return(argp, sizeof (cmd) * 2, &door, 1);
4402 4402 thr_exit(NULL);
4403 4403 }
4404 4404
4405 4405 /* Respond to zoneadmd informing zonestatd of a new zone */
4406 4406 if (cmd == ZSD_CMD_NEW_ZONE) {
4407 4407 zsd_fattach_zone(args[1], g_server_door, B_FALSE);
4408 4408 (void) door_return(NULL, 0, NULL, 0);
4409 4409 thr_exit(NULL);
4410 4410 }
4411 4411
4412 4412 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4413 4413 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4414 4414 thr_exit(NULL);
4415 4415 }
4416 4416
4417 4417 /*
4418 4418 * Respond to libzonestat.so clients with the current utlilzation data.
4419 4419 */
4420 4420 /* ARGSUSED */
4421 4421 static void
4422 4422 zsd_stat_server(void *cookie, char *argp, size_t arg_size,
4423 4423 door_desc_t *dp, uint_t n_desc)
4424 4424 {
4425 4425 uint64_t *args, cmd;
4426 4426 zs_usage_cache_t *cache;
4427 4427 int ret;
4428 4428 char *rvalp;
4429 4429 size_t rvals;
4430 4430 zs_usage_t *usage;
4431 4431 ucred_t *ucred;
4432 4432 zoneid_t zoneid;
4433 4433 const priv_set_t *eset;
4434 4434 boolean_t is_gz = B_FALSE;
4435 4435
4436 4436 /* Tell stat thread there are no more clients */
4437 4437 if (argp == DOOR_UNREF_DATA) {
4438 4438 (void) mutex_lock(&g_usage_cache_lock);
4439 4439 g_hasclient = B_FALSE;
4440 4440 (void) cond_signal(&g_usage_cache_kick);
4441 4441 (void) mutex_unlock(&g_usage_cache_lock);
4442 4442 (void) door_return(NULL, 0, NULL, 0);
4443 4443 thr_exit(NULL);
4444 4444 }
4445 4445 if (arg_size != sizeof (cmd) * 2) {
4446 4446 (void) door_return(NULL, 0, NULL, 0);
4447 4447 thr_exit(NULL);
4448 4448 }
4449 4449 /* LINTED */
4450 4450 args = (uint64_t *)argp;
4451 4451 cmd = args[0];
4452 4452 if (cmd != ZSD_CMD_READ) {
4453 4453 (void) door_return(NULL, 0, NULL, 0);
4454 4454 thr_exit(NULL);
4455 4455 }
4456 4456 ucred = alloca(ucred_size());
4457 4457 if (door_ucred(&ucred) != 0) {
4458 4458 (void) door_return(NULL, 0, NULL, 0);
4459 4459 thr_exit(NULL);
4460 4460 }
4461 4461 zoneid = ucred_getzoneid(ucred);
4462 4462
4463 4463 if (zoneid == GLOBAL_ZONEID)
4464 4464 is_gz = B_TRUE;
4465 4465
4466 4466 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4467 4467 if (eset == NULL) {
4468 4468 (void) door_return(NULL, 0, NULL, 0);
4469 4469 thr_exit(NULL);
4470 4470 }
4471 4471 if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4472 4472 (void) door_return(NULL, 0, NULL, 0);
4473 4473 thr_exit(NULL);
4474 4474 }
4475 4475 (void) mutex_lock(&g_usage_cache_lock);
4476 4476 g_hasclient = B_TRUE;
4477 4477
4478 4478 /*
4479 4479 * Force a new cpu calculation for client. This will force a
4480 4480 * new memory calculation if the memory data is older than the
4481 4481 * sample period.
4482 4482 */
4483 4483 g_usage_cache_kickers++;
4484 4484 (void) cond_signal(&g_usage_cache_kick);
4485 4485 ret = cond_wait(&g_usage_cache_wait, &g_usage_cache_lock);
4486 4486 g_usage_cache_kickers--;
4487 4487 if (ret != 0 && errno == EINTR) {
4488 4488 (void) mutex_unlock(&g_usage_cache_lock);
4489 4489 zsd_warn(gettext(
4490 4490 "Interrupted before writing usage size to client\n"));
4491 4491 (void) door_return(NULL, 0, NULL, 0);
4492 4492 thr_exit(NULL);
4493 4493 }
4494 4494 cache = zsd_usage_cache_hold_locked();
4495 4495 if (cache == NULL) {
4496 4496 zsd_warn(gettext("Usage cache empty.\n"));
4497 4497 (void) door_return(NULL, 0, NULL, 0);
4498 4498 thr_exit(NULL);
4499 4499 }
4500 4500 (void) mutex_unlock(&g_usage_cache_lock);
4501 4501
4502 4502 /* Copy current usage data to stack to send to client */
4503 4503 usage = (zs_usage_t *)alloca(cache->zsuc_size);
4504 4504
4505 4505 /* Filter out results if caller is non-global zone */
4506 4506 zsd_usage_filter(zoneid, cache, usage, is_gz);
4507 4507
4508 4508 rvalp = (void *)usage;
4509 4509 rvals = usage->zsu_size;
4510 4510 zsd_usage_cache_rele(cache);
4511 4511
4512 4512 (void) door_return(rvalp, rvals, 0, NULL);
4513 4513 thr_exit(NULL);
4514 4514 }
4515 4515
4516 4516 static volatile boolean_t g_quit;
4517 4517
4518 4518 /* ARGSUSED */
4519 4519 static void
4520 4520 zonestat_quithandler(int sig)
4521 4521 {
4522 4522 g_quit = B_TRUE;
4523 4523 }
4524 4524
4525 4525 /*
4526 4526 * The stat thread generates new utilization data when clients request
4527 4527 * it. It also manages opening and closing the subsystems used to gather
4528 4528 * data depending on if clients exist.
4529 4529 */
4530 4530 /* ARGSUSED */
4531 4531 void *
4532 4532 stat_thread(void *arg)
4533 4533 {
4534 4534 time_t start;
4535 4535 time_t now;
4536 4536 time_t next_memory;
4537 4537 boolean_t do_memory;
4538 4538 boolean_t do_read;
4539 4539 boolean_t do_close;
4540 4540
4541 4541 start = time(NULL);
4542 4542 if (start < 0) {
4543 4543 if (g_quit == B_TRUE)
4544 4544 goto quit;
4545 4545 zsd_warn(gettext("Unable to fetch current time"));
4546 4546 g_quit = B_TRUE;
4547 4547 goto quit;
4548 4548 }
4549 4549
4550 4550 next_memory = start;
4551 4551 while (g_quit == B_FALSE) {
4552 4552 for (;;) {
4553 4553 /*
4554 4554 * These are used to decide if the most recent memory
4555 4555 * calculation was within a sample interval,
4556 4556 * and weather or not the usage collection needs to
4557 4557 * be opened or closed.
4558 4558 */
4559 4559 do_memory = B_FALSE;
4560 4560 do_read = B_FALSE;
4561 4561 do_close = B_FALSE;
4562 4562
4563 4563 /*
4564 4564 * If all clients have gone, close usage collecting
4565 4565 */
4566 4566 (void) mutex_lock(&g_usage_cache_lock);
4567 4567 if (!g_hasclient && g_open == B_TRUE) {
4568 4568 do_close = B_TRUE;
4569 4569 (void) mutex_unlock(&g_usage_cache_lock);
4570 4570 break;
4571 4571 }
4572 4572 if (g_quit == B_TRUE) {
4573 4573 (void) mutex_unlock(
4574 4574 &g_usage_cache_lock);
4575 4575 break;
4576 4576 }
4577 4577 /*
4578 4578 * Wait for a usage data request
4579 4579 */
4580 4580 if (g_usage_cache_kickers == 0) {
4581 4581 (void) cond_wait(&g_usage_cache_kick,
4582 4582 &g_usage_cache_lock);
4583 4583 }
4584 4584 now = time(NULL);
4585 4585 if (now < 0) {
4586 4586 if (g_quit == B_TRUE) {
4587 4587 (void) mutex_unlock(
4588 4588 &g_usage_cache_lock);
4589 4589 goto quit;
4590 4590 }
4591 4591 g_quit = B_TRUE;
4592 4592 (void) mutex_unlock(&g_usage_cache_lock);
4593 4593 zsd_warn(gettext(
4594 4594 "Unable to fetch current time"));
4595 4595 goto quit;
4596 4596 }
4597 4597 if (g_hasclient) {
4598 4598 do_read = B_TRUE;
4599 4599 if (now >= next_memory) {
4600 4600 do_memory = B_TRUE;
4601 4601 next_memory = now + g_interval;
4602 4602 }
4603 4603 } else {
4604 4604 do_close = B_TRUE;
4605 4605 }
4606 4606 (void) mutex_unlock(&g_usage_cache_lock);
4607 4607 if (do_read || do_close)
4608 4608 break;
4609 4609 }
4610 4610 g_now = now;
4611 4611 g_hrnow = gethrtime();
4612 4612 if (g_hasclient && g_open == B_FALSE) {
4613 4613 g_start = g_now;
4614 4614 g_hrstart = g_hrnow;
4615 4615 g_ctl = zsd_open(g_ctl);
4616 4616 if (g_ctl == NULL)
4617 4617 zsd_warn(gettext(
4618 4618 "Unable to open zone statistics"));
4619 4619 else
4620 4620 g_open = B_TRUE;
4621 4621 }
4622 4622 if (do_read && g_ctl) {
4623 4623 if (zsd_read(g_ctl, B_FALSE, do_memory) != 0) {
4624 4624 zsd_warn(gettext(
4625 4625 "Unable to read zone statistics"));
4626 4626 g_quit = B_TRUE;
4627 4627 return (NULL);
4628 4628 }
4629 4629 }
4630 4630 (void) mutex_lock(&g_usage_cache_lock);
4631 4631 if (!g_hasclient && g_open == B_TRUE && g_ctl) {
4632 4632 (void) mutex_unlock(&g_usage_cache_lock);
4633 4633 zsd_close(g_ctl);
4634 4634 g_open = B_FALSE;
4635 4635 } else {
4636 4636 (void) mutex_unlock(&g_usage_cache_lock);
4637 4637 }
4638 4638 }
4639 4639 quit:
4640 4640 if (g_open)
4641 4641 zsd_close(g_ctl);
4642 4642
4643 4643 (void) thr_kill(g_main, SIGINT);
4644 4644 thr_exit(NULL);
4645 4645 return (NULL);
4646 4646 }
4647 4647
4648 4648 void
4649 4649 zsd_set_fx()
4650 4650 {
4651 4651 pcinfo_t pcinfo;
4652 4652 pcparms_t pcparms;
4653 4653
4654 4654 (void) strlcpy(pcinfo.pc_clname, "FX", sizeof (pcinfo.pc_clname));
4655 4655 if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1) {
4656 4656 zsd_warn(gettext("cannot get FX class parameters"));
4657 4657 return;
4658 4658 }
4659 4659 pcparms.pc_cid = pcinfo.pc_cid;
4660 4660 ((fxparms_t *)pcparms.pc_clparms)->fx_upri = 60;
4661 4661 ((fxparms_t *)pcparms.pc_clparms)->fx_uprilim = 60;
4662 4662 ((fxparms_t *)pcparms.pc_clparms)->fx_tqsecs = 0;
4663 4663 ((fxparms_t *)pcparms.pc_clparms)->fx_tqnsecs = FX_NOCHANGE;
4664 4664 if (priocntl(P_PID, getpid(), PC_SETPARMS, (caddr_t)&pcparms) == -1)
4665 4665 zsd_warn(gettext("cannot enter the FX class"));
4666 4666 }
4667 4667
4668 4668 static int pipe_fd;
4669 4669
4670 4670 static void
4671 4671 daemonize_ready(char status)
4672 4672 {
4673 4673 /*
4674 4674 * wake the parent with a clue
4675 4675 */
4676 4676 (void) write(pipe_fd, &status, 1);
4677 4677 (void) close(pipe_fd);
4678 4678 }
4679 4679
4680 4680 static int
4681 4681 daemonize_start(void)
4682 4682 {
4683 4683 char data;
4684 4684 int status;
4685 4685
4686 4686 int filedes[2];
4687 4687 pid_t pid;
4688 4688
4689 4689 (void) close(0);
4690 4690 (void) dup2(2, 1);
4691 4691
4692 4692 if (pipe(filedes) < 0)
4693 4693 return (-1);
4694 4694
4695 4695 (void) fflush(NULL);
4696 4696
4697 4697 if ((pid = fork1()) < 0)
4698 4698 return (-1);
4699 4699
4700 4700 if (pid != 0) {
4701 4701 /*
4702 4702 * parent
4703 4703 */
4704 4704 struct sigaction act;
4705 4705
4706 4706 act.sa_sigaction = SIG_DFL;
4707 4707 (void) sigemptyset(&act.sa_mask);
4708 4708 act.sa_flags = 0;
4709 4709
4710 4710 (void) sigaction(SIGPIPE, &act, NULL); /* ignore SIGPIPE */
4711 4711
4712 4712 (void) close(filedes[1]);
4713 4713 if (read(filedes[0], &data, 1) == 1) {
4714 4714 /* forward ready code via exit status */
4715 4715 exit(data);
4716 4716 }
4717 4717 status = -1;
4718 4718 (void) wait4(pid, &status, 0, NULL);
4719 4719 /* daemon process exited before becoming ready */
4720 4720 if (WIFEXITED(status)) {
4721 4721 /* assume daemon process printed useful message */
4722 4722 exit(WEXITSTATUS(status));
4723 4723 } else {
4724 4724 zsd_warn(gettext("daemon process killed or died"));
4725 4725 exit(1);
4726 4726 }
4727 4727 }
4728 4728
4729 4729 /*
4730 4730 * child
4731 4731 */
4732 4732 pipe_fd = filedes[1];
4733 4733 (void) close(filedes[0]);
4734 4734
4735 4735 /*
4736 4736 * generic Unix setup
4737 4737 */
4738 4738 (void) setsid();
4739 4739 (void) umask(0000);
4740 4740
4741 4741 return (0);
4742 4742 }
4743 4743
4744 4744 static void
4745 4745 fattach_all_zones(boolean_t detach_only)
4746 4746 {
4747 4747 zoneid_t *zids;
4748 4748 uint_t nzids, nzids_last;
4749 4749 int i;
4750 4750
4751 4751 again:
4752 4752 (void) zone_list(NULL, &nzids);
4753 4753 nzids_last = nzids;
4754 4754 zids = (zoneid_t *)malloc(sizeof (zoneid_t) * nzids_last);
4755 4755 if (zids == NULL)
4756 4756 zsd_error(gettext("Out of memory"));
4757 4757
4758 4758 (void) zone_list(zids, &nzids);
4759 4759 if (nzids > nzids_last) {
4760 4760 free(zids);
4761 4761 goto again;
4762 4762 }
4763 4763 for (i = 0; i < nzids; i++)
4764 4764 zsd_fattach_zone(zids[i], g_server_door, detach_only);
4765 4765
4766 4766 free(zids);
4767 4767 }
4768 4768
4769 4769 int
4770 4770 main(int argc, char *argv[])
4771 4771 {
4772 4772
4773 4773 int arg;
4774 4774 thread_t tid;
4775 4775 scf_simple_prop_t *prop;
4776 4776 uint64_t *intervalp;
4777 4777 boolean_t opt_cleanup = B_FALSE;
4778 4778
4779 4779 g_main = thr_self();
4780 4780 g_quit = B_FALSE;
4781 4781 (void) signal(SIGINT, zonestat_quithandler);
4782 4782 (void) signal(SIGTERM, zonestat_quithandler);
4783 4783 (void) signal(SIGHUP, zonestat_quithandler);
4784 4784 /* (void) sigignore(SIGCHLD); */
4785 4785 (void) sigignore(SIGPIPE);
4786 4786
4787 4787 if (getzoneid() != GLOBAL_ZONEID)
4788 4788 zsd_error(gettext("Must be run from global zone only"));
4789 4789
4790 4790 while ((arg = getopt(argc, argv, "c"))
4791 4791 != EOF) {
4792 4792 switch (arg) {
4793 4793 case 'c':
4794 4794 opt_cleanup = B_TRUE;
4795 4795 break;
4796 4796 default:
4797 4797 zsd_error(gettext("Invalid option"));
4798 4798 }
4799 4799 }
4800 4800
4801 4801 if (opt_cleanup) {
4802 4802 if (zsd_disable_cpu_stats() != 0)
4803 4803 exit(1);
4804 4804 else
4805 4805 exit(0);
4806 4806 }
4807 4807
4808 4808 /* Get the configured sample interval */
4809 4809 prop = scf_simple_prop_get(NULL, "svc:/system/zones-monitoring:default",
4810 4810 "config", "sample_interval");
4811 4811 if (prop == NULL)
4812 4812 zsd_error(gettext("Unable to fetch SMF property "
4813 4813 "\"config/sample_interval\""));
4814 4814
4815 4815 if (scf_simple_prop_type(prop) != SCF_TYPE_COUNT)
4816 4816 zsd_error(gettext("Malformed SMF property "
4817 4817 "\"config/sample_interval\". Must be of type \"count\""));
4818 4818
4819 4819 intervalp = scf_simple_prop_next_count(prop);
4820 4820 g_interval = *intervalp;
4821 4821 if (g_interval == 0)
4822 4822 zsd_error(gettext("Malformed SMF property "
4823 4823 "\"config/sample_interval\". Must be greater than zero"));
4824 4824
4825 4825 scf_simple_prop_free(prop);
4826 4826
4827 4827 if (daemonize_start() < 0)
4828 4828 zsd_error(gettext("Unable to start daemon\n"));
4829 4829
4830 4830 /* Run at high priority */
4831 4831 zsd_set_fx();
4832 4832
4833 4833 (void) mutex_init(&g_usage_cache_lock, USYNC_THREAD, NULL);
4834 4834 (void) cond_init(&g_usage_cache_kick, USYNC_THREAD, NULL);
4835 4835 (void) cond_init(&g_usage_cache_wait, USYNC_THREAD, NULL);
4836 4836
4837 4837 g_server_door = door_create(zsd_server, NULL,
4838 4838 DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4839 4839 if (g_server_door < 0)
4840 4840 zsd_error(gettext("Unable to create server door\n"));
4841 4841
4842 4842
4843 4843 g_stat_door = door_create(zsd_stat_server, NULL, DOOR_UNREF_MULTI |
4844 4844 DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4845 4845 if (g_stat_door < 0)
4846 4846 zsd_error(gettext("Unable to create statistics door\n"));
4847 4847
4848 4848 fattach_all_zones(B_FALSE);
4849 4849
4850 4850 if (thr_create(NULL, 0, stat_thread, NULL, 0, &tid) != 0)
4851 4851 zsd_error(gettext("Unable to create statistics thread\n"));
4852 4852
4853 4853 daemonize_ready(0);
4854 4854
4855 4855 /* Wait for signal to quit */
4856 4856 while (g_quit == B_FALSE)
4857 4857 (void) pause();
4858 4858
4859 4859 /* detach doors */
4860 4860 fattach_all_zones(B_TRUE);
4861 4861
4862 4862 (void) door_revoke(g_server_door);
4863 4863 (void) door_revoke(g_stat_door);
4864 4864
4865 4865 /* kick stat thread and wait for it to close the statistics */
4866 4866 (void) mutex_lock(&g_usage_cache_lock);
4867 4867 g_quit = B_TRUE;
4868 4868 (void) cond_signal(&g_usage_cache_kick);
4869 4869 (void) mutex_unlock(&g_usage_cache_lock);
4870 4870 end:
4871 4871 (void) thr_join(tid, NULL, NULL);
4872 4872 return (0);
4873 4873 }
|
↓ open down ↓ |
4873 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX