Print this page
OS-399 zone phys. mem. cap should be a rctl and have associated kstat
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zonestat/zonestatd/zonestatd.c
+++ new/usr/src/cmd/zonestat/zonestatd/zonestatd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright (c) 2011, Joyent, Inc. All rights reserved.
24 25 */
25 26 #include <alloca.h>
26 27 #include <assert.h>
27 28 #include <dirent.h>
28 29 #include <dlfcn.h>
29 30 #include <door.h>
30 31 #include <errno.h>
31 32 #include <exacct.h>
32 33 #include <ctype.h>
33 34 #include <fcntl.h>
34 35 #include <kstat.h>
35 36 #include <libcontract.h>
36 37 #include <libintl.h>
37 38 #include <libscf.h>
38 39 #include <zonestat.h>
39 40 #include <zonestat_impl.h>
40 41 #include <limits.h>
41 42 #include <pool.h>
42 43 #include <procfs.h>
43 44 #include <rctl.h>
44 45 #include <thread.h>
45 46 #include <signal.h>
46 47 #include <stdarg.h>
47 48 #include <stddef.h>
48 49 #include <stdio.h>
49 50 #include <stdlib.h>
50 51 #include <strings.h>
51 52 #include <synch.h>
52 53 #include <sys/acctctl.h>
53 54 #include <sys/contract/process.h>
54 55 #include <sys/ctfs.h>
55 56 #include <sys/fork.h>
56 57 #include <sys/param.h>
57 58 #include <sys/priocntl.h>
58 59 #include <sys/fxpriocntl.h>
59 60 #include <sys/processor.h>
60 61 #include <sys/pset.h>
61 62 #include <sys/socket.h>
62 63 #include <sys/stat.h>
63 64 #include <sys/statvfs.h>
64 65 #include <sys/swap.h>
65 66 #include <sys/systeminfo.h>
66 67 #include <thread.h>
67 68 #include <sys/list.h>
68 69 #include <sys/time.h>
69 70 #include <sys/types.h>
70 71 #include <sys/vm_usage.h>
71 72 #include <sys/wait.h>
72 73 #include <sys/zone.h>
73 74 #include <time.h>
74 75 #include <ucred.h>
75 76 #include <unistd.h>
76 77 #include <vm/anon.h>
77 78 #include <zone.h>
78 79 #include <zonestat.h>
79 80
80 81 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */
81 82 #define ZSD_PSET_UNLIMITED UINT16_MAX
82 83 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process"
83 84
84 85 /*
85 86 * zonestatd implements gathering cpu and memory utilization data for
86 87 * running zones. It has these components:
87 88 *
88 89 * zsd_server:
89 90 * Door server to respond to client connections. Each client
90 91 * will connect using libzonestat.so, which will open and
91 92 * call /var/tmp/.zonestat_door. Each connecting client is given
92 93 * a file descriptor to the stat server.
93 94 *
94 95 * The zsd_server also responds to zoneadmd, which reports when a
95 96 * new zone is booted. This is used to fattach the zsd_server door
96 97 * into the new zone.
97 98 *
98 99 * zsd_stat_server:
99 100 * Receives client requests for the current utilization data. Each
100 101 * client request will cause zonestatd to update the current utilization
101 102 * data by kicking the stat_thread.
102 103 *
103 104 * If the client is in a non-global zone, the utilization data will
104 105 * be filtered to only show the given zone. The usage by all other zones
105 106 * will be added to the system utilization.
106 107 *
107 108 * stat_thread:
108 109 * The stat thread implements querying the system to determine the
109 110 * current utilization data for each running zone. This includes
110 111 * inspecting the system's processor set configuration, as well as details
111 112 * of each zone, such as their configured limits, and which processor
112 113 * sets they are running in.
113 114 *
114 115 * The stat_thread will only update memory utilization data as often as
115 116 * the configured config/sample_interval on the zones-monitoring service.
116 117 */
117 118
118 119 /*
119 120 * The private vmusage structure unfortunately uses size_t types, and assumes
120 121 * the caller's bitness matches the kernel's bitness. Since the getvmusage()
121 122 * system call is contracted, and zonestatd is 32 bit, the following structures
122 123 * are used to interact with a 32bit or 64 bit kernel.
123 124 */
124 125 typedef struct zsd_vmusage32 {
125 126 id_t vmu_zoneid;
126 127 uint_t vmu_type;
127 128 id_t vmu_id;
128 129
129 130 uint32_t vmu_rss_all;
130 131 uint32_t vmu_rss_private;
131 132 uint32_t vmu_rss_shared;
132 133 uint32_t vmu_swap_all;
133 134 uint32_t vmu_swap_private;
134 135 uint32_t vmu_swap_shared;
135 136 } zsd_vmusage32_t;
136 137
137 138 typedef struct zsd_vmusage64 {
138 139 id_t vmu_zoneid;
139 140 uint_t vmu_type;
140 141 id_t vmu_id;
141 142 /*
142 143 * An amd64 kernel will align the following uint64_t members, but a
143 144 * 32bit i386 process will not without help.
144 145 */
145 146 int vmu_align_next_members_on_8_bytes;
146 147 uint64_t vmu_rss_all;
147 148 uint64_t vmu_rss_private;
148 149 uint64_t vmu_rss_shared;
149 150 uint64_t vmu_swap_all;
150 151 uint64_t vmu_swap_private;
151 152 uint64_t vmu_swap_shared;
152 153 } zsd_vmusage64_t;
153 154
154 155 struct zsd_zone;
155 156
156 157 /* Used to store a zone's usage of a pset */
157 158 typedef struct zsd_pset_usage {
158 159 struct zsd_zone *zsu_zone;
159 160 struct zsd_pset *zsu_pset;
160 161
161 162 list_node_t zsu_next;
162 163
163 164 zoneid_t zsu_zoneid;
164 165 boolean_t zsu_found; /* zone bound at end of interval */
165 166 boolean_t zsu_active; /* zone was bound during interval */
166 167 boolean_t zsu_new; /* zone newly bound in this interval */
167 168 boolean_t zsu_deleted; /* zone was unbound in this interval */
168 169 boolean_t zsu_empty; /* no procs in pset in this interval */
169 170 time_t zsu_start; /* time when zone was found in pset */
170 171 hrtime_t zsu_hrstart; /* time when zone was found in pset */
171 172 uint64_t zsu_cpu_shares;
172 173 uint_t zsu_scheds; /* schedulers found in this pass */
173 174 timestruc_t zsu_cpu_usage; /* cpu time used */
174 175 } zsd_pset_usage_t;
175 176
176 177 /* Used to store a pset's utilization */
177 178 typedef struct zsd_pset {
178 179 psetid_t zsp_id;
179 180 list_node_t zsp_next;
180 181 char zsp_name[ZS_PSETNAME_MAX];
181 182
182 183 uint_t zsp_cputype; /* default, dedicated or shared */
183 184 boolean_t zsp_found; /* pset found at end of interval */
184 185 boolean_t zsp_new; /* pset new in this interval */
185 186 boolean_t zsp_deleted; /* pset deleted in this interval */
186 187 boolean_t zsp_active; /* pset existed during interval */
187 188 boolean_t zsp_empty; /* no processes in pset */
188 189 time_t zsp_start;
189 190 hrtime_t zsp_hrstart;
190 191
191 192 uint64_t zsp_online; /* online cpus in interval */
192 193 uint64_t zsp_size; /* size in this interval */
193 194 uint64_t zsp_min; /* configured min in this interval */
194 195 uint64_t zsp_max; /* configured max in this interval */
195 196 int64_t zsp_importance; /* configured max in this interval */
196 197
197 198 uint_t zsp_scheds; /* scheds of processes found in pset */
198 199 uint64_t zsp_cpu_shares; /* total shares in this interval */
199 200
200 201 timestruc_t zsp_total_time;
201 202 timestruc_t zsp_usage_kern;
202 203 timestruc_t zsp_usage_zones;
203 204
204 205 /* Individual zone usages of pset */
205 206 list_t zsp_usage_list;
206 207 int zsp_nusage;
207 208
208 209 /* Summed kstat values from individual cpus in pset */
209 210 timestruc_t zsp_idle;
210 211 timestruc_t zsp_intr;
211 212 timestruc_t zsp_kern;
212 213 timestruc_t zsp_user;
213 214
214 215 } zsd_pset_t;
215 216
216 217 /* Used to track an individual cpu's utilization as reported by kstats */
217 218 typedef struct zsd_cpu {
218 219 processorid_t zsc_id;
219 220 list_node_t zsc_next;
220 221 psetid_t zsc_psetid;
221 222 psetid_t zsc_psetid_prev;
222 223 zsd_pset_t *zsc_pset;
223 224
224 225 boolean_t zsc_found; /* cpu online in this interval */
225 226 boolean_t zsc_onlined; /* cpu onlined during this interval */
226 227 boolean_t zsc_offlined; /* cpu offlined during this interval */
227 228 boolean_t zsc_active; /* cpu online during this interval */
228 229 boolean_t zsc_allocated; /* True if cpu has ever been found */
229 230
230 231 /* kstats this interval */
231 232 uint64_t zsc_nsec_idle;
232 233 uint64_t zsc_nsec_intr;
233 234 uint64_t zsc_nsec_kern;
234 235 uint64_t zsc_nsec_user;
235 236
236 237 /* kstats in most recent interval */
237 238 uint64_t zsc_nsec_idle_prev;
238 239 uint64_t zsc_nsec_intr_prev;
239 240 uint64_t zsc_nsec_kern_prev;
240 241 uint64_t zsc_nsec_user_prev;
241 242
242 243 /* Total kstat increases since zonestatd started reading kstats */
243 244 timestruc_t zsc_idle;
244 245 timestruc_t zsc_intr;
245 246 timestruc_t zsc_kern;
246 247 timestruc_t zsc_user;
247 248
248 249 } zsd_cpu_t;
249 250
250 251 /* Used to describe an individual zone and its utilization */
251 252 typedef struct zsd_zone {
252 253 zoneid_t zsz_id;
253 254 list_node_t zsz_next;
254 255 char zsz_name[ZS_ZONENAME_MAX];
255 256 uint_t zsz_cputype;
256 257 uint_t zsz_iptype;
257 258 time_t zsz_start;
258 259 hrtime_t zsz_hrstart;
259 260
260 261 char zsz_pool[ZS_POOLNAME_MAX];
261 262 char zsz_pset[ZS_PSETNAME_MAX];
262 263 int zsz_default_sched;
263 264 /* These are deduced by inspecting processes */
264 265 psetid_t zsz_psetid;
265 266 uint_t zsz_scheds;
266 267
267 268 boolean_t zsz_new; /* zone booted during this interval */
268 269 boolean_t zsz_deleted; /* halted during this interval */
269 270 boolean_t zsz_active; /* running in this interval */
270 271 boolean_t zsz_empty; /* no processes in this interval */
271 272 boolean_t zsz_gone; /* not installed in this interval */
272 273 boolean_t zsz_found; /* Running at end of this interval */
273 274
274 275 uint64_t zsz_cpu_shares;
275 276 uint64_t zsz_cpu_cap;
276 277 uint64_t zsz_ram_cap;
277 278 uint64_t zsz_locked_cap;
278 279 uint64_t zsz_vm_cap;
279 280
280 281 uint64_t zsz_cpus_online;
281 282 timestruc_t zsz_cpu_usage; /* cpu time of cpu cap */
282 283 timestruc_t zsz_cap_time; /* cpu time of cpu cap */
283 284 timestruc_t zsz_share_time; /* cpu time of share of cpu */
284 285 timestruc_t zsz_pset_time; /* time of all psets zone is bound to */
285 286
286 287 uint64_t zsz_usage_ram;
287 288 uint64_t zsz_usage_locked;
288 289 uint64_t zsz_usage_vm;
289 290
290 291 uint64_t zsz_processes_cap;
291 292 uint64_t zsz_lwps_cap;
292 293 uint64_t zsz_shm_cap;
293 294 uint64_t zsz_shmids_cap;
294 295 uint64_t zsz_semids_cap;
295 296 uint64_t zsz_msgids_cap;
296 297 uint64_t zsz_lofi_cap;
297 298
298 299 uint64_t zsz_processes;
299 300 uint64_t zsz_lwps;
300 301 uint64_t zsz_shm;
301 302 uint64_t zsz_shmids;
302 303 uint64_t zsz_semids;
303 304 uint64_t zsz_msgids;
304 305 uint64_t zsz_lofi;
305 306
306 307 } zsd_zone_t;
307 308
308 309 /*
309 310 * Used to track the cpu usage of an individual processes.
310 311 *
311 312 * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
312 313 * to their zone. As processes exit, their extended accounting records are
313 314 * read and the difference of their total and known usage is charged to their
314 315 * zone.
315 316 *
316 317 * If a process is never seen in /proc, the total usage on its extended
317 318 * accounting record will be charged to its zone.
318 319 */
319 320 typedef struct zsd_proc {
320 321 list_node_t zspr_next;
321 322 pid_t zspr_ppid;
322 323 psetid_t zspr_psetid;
323 324 zoneid_t zspr_zoneid;
324 325 int zspr_sched;
325 326 timestruc_t zspr_usage;
326 327 } zsd_proc_t;
327 328
328 329 /* Used to track the overall resource usage of the system */
329 330 typedef struct zsd_system {
330 331
331 332 uint64_t zss_ram_total;
332 333 uint64_t zss_ram_kern;
333 334 uint64_t zss_ram_zones;
334 335
335 336 uint64_t zss_locked_kern;
336 337 uint64_t zss_locked_zones;
337 338
338 339 uint64_t zss_vm_total;
339 340 uint64_t zss_vm_kern;
340 341 uint64_t zss_vm_zones;
341 342
342 343 uint64_t zss_swap_total;
343 344 uint64_t zss_swap_used;
344 345
345 346 timestruc_t zss_idle;
346 347 timestruc_t zss_intr;
347 348 timestruc_t zss_kern;
348 349 timestruc_t zss_user;
349 350
350 351 timestruc_t zss_cpu_total_time;
351 352 timestruc_t zss_cpu_usage_kern;
352 353 timestruc_t zss_cpu_usage_zones;
353 354
354 355 uint64_t zss_maxpid;
355 356 uint64_t zss_processes_max;
356 357 uint64_t zss_lwps_max;
357 358 uint64_t zss_shm_max;
358 359 uint64_t zss_shmids_max;
359 360 uint64_t zss_semids_max;
360 361 uint64_t zss_msgids_max;
361 362 uint64_t zss_lofi_max;
362 363
363 364 uint64_t zss_processes;
364 365 uint64_t zss_lwps;
365 366 uint64_t zss_shm;
366 367 uint64_t zss_shmids;
367 368 uint64_t zss_semids;
368 369 uint64_t zss_msgids;
369 370 uint64_t zss_lofi;
370 371
371 372 uint64_t zss_ncpus;
372 373 uint64_t zss_ncpus_online;
373 374
374 375 } zsd_system_t;
375 376
376 377 /*
377 378 * A dumping ground for various information and structures used to compute
378 379 * utilization.
379 380 *
380 381 * This structure is used to track the system while clients are connected.
381 382 * When The first client connects, a zsd_ctl is allocated and configured by
382 383 * zsd_open(). When all clients disconnect, the zsd_ctl is closed.
383 384 */
384 385 typedef struct zsd_ctl {
385 386 kstat_ctl_t *zsctl_kstat_ctl;
386 387
387 388 /* To track extended accounting */
388 389 int zsctl_proc_fd; /* Log currently being used */
389 390 ea_file_t zsctl_proc_eaf;
390 391 struct stat64 zsctl_proc_stat;
391 392 int zsctl_proc_open;
392 393 int zsctl_proc_fd_next; /* Log file to use next */
393 394 ea_file_t zsctl_proc_eaf_next;
394 395 struct stat64 zsctl_proc_stat_next;
395 396 int zsctl_proc_open_next;
396 397
397 398 /* pool configuration handle */
398 399 pool_conf_t *zsctl_pool_conf;
399 400 int zsctl_pool_status;
400 401 int zsctl_pool_changed;
401 402
402 403 /* The above usage tacking structures */
403 404 zsd_system_t *zsctl_system;
404 405 list_t zsctl_zones;
405 406 list_t zsctl_psets;
406 407 list_t zsctl_cpus;
407 408 zsd_cpu_t *zsctl_cpu_array;
408 409 zsd_proc_t *zsctl_proc_array;
409 410
410 411 /* Various system info */
411 412 uint64_t zsctl_maxcpuid;
412 413 uint64_t zsctl_maxproc;
413 414 uint64_t zsctl_kern_bits;
414 415 uint64_t zsctl_pagesize;
415 416
416 417 /* Used to track time available under a cpu cap. */
417 418 uint64_t zsctl_hrtime;
418 419 uint64_t zsctl_hrtime_prev;
419 420 timestruc_t zsctl_hrtime_total;
420 421
421 422 struct timeval zsctl_timeofday;
422 423
423 424 /* Caches for arrays allocated for use by various system calls */
424 425 psetid_t *zsctl_pset_cache;
425 426 uint_t zsctl_pset_ncache;
426 427 processorid_t *zsctl_cpu_cache;
427 428 uint_t zsctl_cpu_ncache;
428 429 zoneid_t *zsctl_zone_cache;
429 430 uint_t zsctl_zone_ncache;
430 431 struct swaptable *zsctl_swap_cache;
431 432 uint64_t zsctl_swap_cache_size;
432 433 uint64_t zsctl_swap_cache_num;
433 434 zsd_vmusage64_t *zsctl_vmusage_cache;
434 435 uint64_t zsctl_vmusage_cache_num;
435 436
436 437 /* Info about procfs for scanning /proc */
437 438 struct dirent *zsctl_procfs_dent;
438 439 long zsctl_procfs_dent_size;
439 440 pool_value_t *zsctl_pool_vals[3];
440 441
441 442 /* Counts on tracked entities */
442 443 uint_t zsctl_nzones;
443 444 uint_t zsctl_npsets;
444 445 uint_t zsctl_npset_usages;
445 446 } zsd_ctl_t;
446 447
447 448 zsd_ctl_t *g_ctl;
448 449 boolean_t g_open; /* True if g_ctl is open */
449 450 int g_hasclient; /* True if any clients are connected */
450 451
451 452 /*
452 453 * The usage cache is updated by the stat_thread, and copied to clients by
453 454 * the zsd_stat_server. Mutex and cond are to synchronize between the
454 455 * stat_thread and the stat_server.
455 456 */
456 457 zs_usage_cache_t *g_usage_cache;
457 458 mutex_t g_usage_cache_lock;
458 459 cond_t g_usage_cache_kick;
459 460 uint_t g_usage_cache_kickers;
460 461 cond_t g_usage_cache_wait;
461 462 char *g_usage_cache_buf;
462 463 uint_t g_usage_cache_bufsz;
463 464 uint64_t g_gen_next;
464 465
465 466 /* fds of door servers */
466 467 int g_server_door;
467 468 int g_stat_door;
468 469
469 470 /*
470 471 * Starting and current time. Used to throttle memory calculation, and to
471 472 * mark new zones and psets with their boot and creation time.
472 473 */
473 474 time_t g_now;
474 475 time_t g_start;
475 476 hrtime_t g_hrnow;
476 477 hrtime_t g_hrstart;
477 478 uint64_t g_interval;
478 479
479 480 /*
480 481 * main() thread.
481 482 */
482 483 thread_t g_main;
483 484
484 485 /* PRINTFLIKE1 */
485 486 static void
486 487 zsd_warn(const char *fmt, ...)
487 488 {
488 489 va_list alist;
489 490
490 491 va_start(alist, fmt);
491 492
492 493 (void) fprintf(stderr, gettext("zonestat: Warning: "));
493 494 (void) vfprintf(stderr, fmt, alist);
494 495 (void) fprintf(stderr, "\n");
495 496 va_end(alist);
496 497 }
497 498
498 499 /* PRINTFLIKE1 */
499 500 static void
500 501 zsd_error(const char *fmt, ...)
501 502 {
502 503 va_list alist;
503 504
504 505 va_start(alist, fmt);
505 506
506 507 (void) fprintf(stderr, gettext("zonestat: Error: "));
507 508 (void) vfprintf(stderr, fmt, alist);
508 509 (void) fprintf(stderr, "\n");
509 510 va_end(alist);
510 511 exit(1);
511 512 }
512 513
513 514 /* Turns on extended accounting if not configured externally */
514 515 int
515 516 zsd_enable_cpu_stats()
516 517 {
517 518 char *path = ZONESTAT_EXACCT_FILE;
518 519 char oldfile[MAXPATHLEN];
519 520 int ret, state = AC_ON;
520 521 ac_res_t res[6];
521 522
522 523 /*
523 524 * Start a new accounting file if accounting not configured
524 525 * externally.
525 526 */
526 527
527 528 res[0].ar_id = AC_PROC_PID;
528 529 res[0].ar_state = AC_ON;
529 530 res[1].ar_id = AC_PROC_ANCPID;
530 531 res[1].ar_state = AC_ON;
531 532 res[2].ar_id = AC_PROC_CPU;
532 533 res[2].ar_state = AC_ON;
533 534 res[3].ar_id = AC_PROC_TIME;
534 535 res[3].ar_state = AC_ON;
535 536 res[4].ar_id = AC_PROC_ZONENAME;
536 537 res[4].ar_state = AC_ON;
537 538 res[5].ar_id = AC_NONE;
538 539 res[5].ar_state = AC_ON;
539 540 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
540 541 zsd_warn(gettext("Unable to set accounting resources"));
541 542 return (-1);
542 543 }
543 544 /* Only set accounting file if none is configured */
544 545 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
545 546 if (ret < 0) {
546 547
547 548 (void) unlink(path);
548 549 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1)
549 550 == -1) {
550 551 zsd_warn(gettext("Unable to set accounting file"));
551 552 return (-1);
552 553 }
553 554 }
554 555 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
555 556 zsd_warn(gettext("Unable to enable accounting"));
556 557 return (-1);
557 558 }
558 559 return (0);
559 560 }
560 561
561 562 /* Turns off extended accounting if not configured externally */
562 563 int
563 564 zsd_disable_cpu_stats()
564 565 {
565 566 char *path = ZONESTAT_EXACCT_FILE;
566 567 int ret, state = AC_OFF;
567 568 ac_res_t res[6];
568 569 char oldfile[MAXPATHLEN];
569 570
570 571 /* If accounting file is externally configured, leave it alone */
571 572 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
572 573 if (ret == 0 && strcmp(oldfile, path) != 0)
573 574 return (0);
574 575
575 576 res[0].ar_id = AC_PROC_PID;
576 577 res[0].ar_state = AC_OFF;
577 578 res[1].ar_id = AC_PROC_ANCPID;
578 579 res[1].ar_state = AC_OFF;
579 580 res[2].ar_id = AC_PROC_CPU;
580 581 res[2].ar_state = AC_OFF;
581 582 res[3].ar_id = AC_PROC_TIME;
582 583 res[3].ar_state = AC_OFF;
583 584 res[4].ar_id = AC_PROC_ZONENAME;
584 585 res[4].ar_state = AC_OFF;
585 586 res[5].ar_id = AC_NONE;
586 587 res[5].ar_state = AC_OFF;
587 588 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
588 589 zsd_warn(gettext("Unable to clear accounting resources"));
589 590 return (-1);
590 591 }
591 592 if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) {
592 593 zsd_warn(gettext("Unable to clear accounting file"));
593 594 return (-1);
594 595 }
595 596 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
596 597 zsd_warn(gettext("Unable to diable accounting"));
597 598 return (-1);
598 599 }
599 600
600 601 (void) unlink(path);
601 602 return (0);
602 603 }
603 604
604 605 /*
605 606 * If not configured externally, deletes the current extended accounting file
606 607 * and starts a new one.
607 608 *
608 609 * Since the stat_thread holds an open handle to the accounting file, it will
609 610 * read all remaining entries from the old file before switching to
610 611 * read the new one.
611 612 */
612 613 int
613 614 zsd_roll_exacct(void)
614 615 {
615 616 int ret;
616 617 char *path = ZONESTAT_EXACCT_FILE;
617 618 char oldfile[MAXPATHLEN];
618 619
619 620 /* If accounting file is externally configured, leave it alone */
620 621 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
621 622 if (ret == 0 && strcmp(oldfile, path) != 0)
622 623 return (0);
623 624
624 625 if (unlink(path) != 0)
625 626 /* Roll it next time */
626 627 return (0);
627 628
628 629 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) {
629 630 zsd_warn(gettext("Unable to set accounting file"));
630 631 return (-1);
631 632 }
632 633 return (0);
633 634 }
634 635
635 636 /* Contract stuff for zone_enter() */
636 637 int
637 638 init_template(void)
638 639 {
639 640 int fd;
640 641 int err = 0;
641 642
642 643 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
643 644 if (fd == -1)
644 645 return (-1);
645 646
646 647 /*
647 648 * For now, zoneadmd doesn't do anything with the contract.
648 649 * Deliver no events, don't inherit, and allow it to be orphaned.
649 650 */
650 651 err |= ct_tmpl_set_critical(fd, 0);
651 652 err |= ct_tmpl_set_informative(fd, 0);
652 653 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
653 654 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
654 655 if (err || ct_tmpl_activate(fd)) {
655 656 (void) close(fd);
656 657 return (-1);
657 658 }
658 659
659 660 return (fd);
660 661 }
661 662
662 663 /*
663 664 * Contract stuff for zone_enter()
664 665 */
665 666 int
666 667 contract_latest(ctid_t *id)
667 668 {
668 669 int cfd, r;
669 670 ct_stathdl_t st;
670 671 ctid_t result;
671 672
672 673 if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
673 674 return (errno);
674 675
675 676 if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
676 677 (void) close(cfd);
677 678 return (r);
678 679 }
679 680
680 681 result = ct_status_get_id(st);
681 682 ct_status_free(st);
682 683 (void) close(cfd);
683 684
684 685 *id = result;
685 686 return (0);
686 687 }
687 688
688 689 static int
689 690 close_on_exec(int fd)
690 691 {
691 692 int flags = fcntl(fd, F_GETFD, 0);
692 693 if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
693 694 return (0);
694 695 return (-1);
695 696 }
696 697
697 698 int
698 699 contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
699 700 {
700 701 char path[PATH_MAX];
701 702 int n, fd;
702 703
703 704 if (type == NULL)
704 705 type = "all";
705 706
706 707 n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
707 708 if (n >= sizeof (path)) {
708 709 errno = ENAMETOOLONG;
709 710 return (-1);
710 711 }
711 712
712 713 fd = open64(path, oflag);
713 714 if (fd != -1) {
714 715 if (close_on_exec(fd) == -1) {
715 716 int err = errno;
716 717 (void) close(fd);
717 718 errno = err;
718 719 return (-1);
719 720 }
720 721 }
721 722 return (fd);
722 723 }
723 724
724 725 int
725 726 contract_abandon_id(ctid_t ctid)
726 727 {
727 728 int fd, err;
728 729
729 730 fd = contract_open(ctid, "all", "ctl", O_WRONLY);
730 731 if (fd == -1)
731 732 return (errno);
732 733
733 734 err = ct_ctl_abandon(fd);
734 735 (void) close(fd);
735 736
736 737 return (err);
737 738 }
738 739 /*
739 740 * Attach the zsd_server to a zone. Called for each zone when zonestatd
740 741 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
741 742 *
742 743 * Zone_enter is used to avoid reaching into zone to fattach door.
743 744 */
744 745 static void
745 746 zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only)
746 747 {
747 748 char *path = ZS_DOOR_PATH;
748 749 int fd, pid, stat, tmpl_fd;
749 750 ctid_t ct;
750 751
751 752 if ((tmpl_fd = init_template()) == -1) {
752 753 zsd_warn("Unable to init template");
753 754 return;
754 755 }
755 756
756 757 pid = forkx(0);
757 758 if (pid < 0) {
758 759 (void) ct_tmpl_clear(tmpl_fd);
759 760 zsd_warn(gettext(
760 761 "Unable to fork to add zonestat to zoneid %d\n"), zid);
761 762 return;
762 763 }
763 764
764 765 if (pid == 0) {
765 766 (void) ct_tmpl_clear(tmpl_fd);
766 767 (void) close(tmpl_fd);
767 768 if (zid != 0 && zone_enter(zid) != 0) {
768 769 if (errno == EINVAL) {
769 770 _exit(0);
770 771 }
771 772 _exit(1);
772 773 }
773 774 (void) fdetach(path);
774 775 (void) unlink(path);
775 776 if (detach_only)
776 777 _exit(0);
777 778 fd = open(path, O_CREAT|O_RDWR, 0644);
778 779 if (fd < 0)
779 780 _exit(2);
780 781 if (fattach(door, path) != 0)
781 782 _exit(3);
782 783 _exit(0);
783 784 }
784 785 if (contract_latest(&ct) == -1)
785 786 ct = -1;
786 787 (void) ct_tmpl_clear(tmpl_fd);
787 788 (void) close(tmpl_fd);
788 789 (void) contract_abandon_id(ct);
789 790 while (waitpid(pid, &stat, 0) != pid)
790 791 ;
791 792 if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0)
792 793 return;
793 794
794 795 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid);
795 796
796 797 if (WEXITSTATUS(stat) == 1)
797 798 zsd_warn(gettext("Cannot entering zone"));
798 799 else if (WEXITSTATUS(stat) == 2)
799 800 zsd_warn(gettext("Unable to create door file: %s"), path);
800 801 else if (WEXITSTATUS(stat) == 3)
801 802 zsd_warn(gettext("Unable to fattach file: %s"), path);
802 803
803 804 zsd_warn(gettext("Internal error entering zone: %d"), zid);
804 805 }
805 806
806 807 /*
807 808 * Zone lookup and allocation functions to manage list of currently running
808 809 * zones.
809 810 */
810 811 static zsd_zone_t *
811 812 zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
812 813 {
813 814 zsd_zone_t *zone;
814 815
815 816 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
816 817 zone = list_next(&ctl->zsctl_zones, zone)) {
817 818 if (strcmp(zone->zsz_name, zonename) == 0) {
818 819 if (zoneid != -1)
819 820 zone->zsz_id = zoneid;
820 821 return (zone);
821 822 }
822 823 }
823 824 return (NULL);
824 825 }
825 826
826 827 static zsd_zone_t *
827 828 zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid)
828 829 {
829 830 zsd_zone_t *zone;
830 831
831 832 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
832 833 zone = list_next(&ctl->zsctl_zones, zone)) {
833 834 if (zone->zsz_id == zoneid)
834 835 return (zone);
835 836 }
836 837 return (NULL);
837 838 }
838 839
839 840 static zsd_zone_t *
840 841 zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
841 842 {
842 843 zsd_zone_t *zone;
843 844
844 845 if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL)
845 846 return (NULL);
846 847
847 848 (void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name));
848 849 zone->zsz_id = zoneid;
849 850 zone->zsz_found = B_FALSE;
850 851
851 852 /*
852 853 * Allocate as deleted so if not found in first pass, zone is deleted
853 854 * from list. This can happen if zone is returned by zone_list, but
854 855 * exits before first attempt to fetch zone details.
855 856 */
856 857 zone->zsz_start = g_now;
857 858 zone->zsz_hrstart = g_hrnow;
858 859 zone->zsz_deleted = B_TRUE;
859 860
860 861 zone->zsz_cpu_shares = ZS_LIMIT_NONE;
861 862 zone->zsz_cpu_cap = ZS_LIMIT_NONE;
862 863 zone->zsz_ram_cap = ZS_LIMIT_NONE;
863 864 zone->zsz_locked_cap = ZS_LIMIT_NONE;
864 865 zone->zsz_vm_cap = ZS_LIMIT_NONE;
865 866
866 867 zone->zsz_processes_cap = ZS_LIMIT_NONE;
867 868 zone->zsz_lwps_cap = ZS_LIMIT_NONE;
868 869 zone->zsz_shm_cap = ZS_LIMIT_NONE;
869 870 zone->zsz_shmids_cap = ZS_LIMIT_NONE;
870 871 zone->zsz_semids_cap = ZS_LIMIT_NONE;
871 872 zone->zsz_msgids_cap = ZS_LIMIT_NONE;
872 873 zone->zsz_lofi_cap = ZS_LIMIT_NONE;
873 874
874 875 ctl->zsctl_nzones++;
875 876
876 877 return (zone);
877 878 }
878 879
879 880 static zsd_zone_t *
880 881 zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
881 882 {
882 883 zsd_zone_t *zone, *tmp;
883 884
884 885 if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL)
885 886 return (zone);
886 887
887 888 if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL)
888 889 return (NULL);
889 890
890 891 /* Insert sorted by zonename */
891 892 tmp = list_head(&ctl->zsctl_zones);
892 893 while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0)
893 894 tmp = list_next(&ctl->zsctl_zones, tmp);
894 895
895 896 list_insert_before(&ctl->zsctl_zones, tmp, zone);
896 897 return (zone);
897 898 }
898 899
899 900 /*
900 901 * Mark all zones as not existing. As zones are found, they will
901 902 * be marked as existing. If a zone is not found, then it must have
902 903 * halted.
903 904 */
904 905 static void
905 906 zsd_mark_zones_start(zsd_ctl_t *ctl)
906 907 {
907 908
908 909 zsd_zone_t *zone;
909 910
910 911 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
911 912 zone = list_next(&ctl->zsctl_zones, zone)) {
912 913 zone->zsz_found = B_FALSE;
913 914 }
914 915 }
915 916
916 917 /*
917 918 * Mark each zone as not using pset. If processes are found using the
918 919 * pset, the zone will remain bound to the pset. If none of a zones
919 920 * processes are bound to the pset, the zone's usage of the pset will
920 921 * be deleted.
921 922 *
922 923 */
923 924 static void
924 925 zsd_mark_pset_usage_start(zsd_pset_t *pset)
925 926 {
926 927 zsd_pset_usage_t *usage;
927 928
928 929 for (usage = list_head(&pset->zsp_usage_list);
929 930 usage != NULL;
930 931 usage = list_next(&pset->zsp_usage_list, usage)) {
931 932 usage->zsu_found = B_FALSE;
932 933 usage->zsu_empty = B_TRUE;
933 934 }
934 935 }
935 936
936 937 /*
937 938 * Mark each pset as not existing. If a pset is found, it will be marked
938 939 * as existing. If a pset is not found, it wil be deleted.
939 940 */
940 941 static void
941 942 zsd_mark_psets_start(zsd_ctl_t *ctl)
942 943 {
943 944 zsd_pset_t *pset;
944 945
945 946 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
946 947 pset = list_next(&ctl->zsctl_psets, pset)) {
947 948 pset->zsp_found = B_FALSE;
948 949 zsd_mark_pset_usage_start(pset);
949 950 }
950 951 }
951 952
952 953 /*
953 954 * A pset was found. Update its information
954 955 */
955 956 static void
956 957 zsd_mark_pset_found(zsd_pset_t *pset, uint_t type, uint64_t online,
957 958 uint64_t size, uint64_t min, uint64_t max, int64_t importance)
958 959 {
959 960 pset->zsp_empty = B_TRUE;
960 961 pset->zsp_deleted = B_FALSE;
961 962
962 963 assert(pset->zsp_found == B_FALSE);
963 964
964 965 /* update pset flags */
965 966 if (pset->zsp_active == B_FALSE)
966 967 /* pset not seen on previous interval. It is new. */
967 968 pset->zsp_new = B_TRUE;
968 969 else
969 970 pset->zsp_new = B_FALSE;
970 971
971 972 pset->zsp_found = B_TRUE;
972 973 pset->zsp_cputype = type;
973 974 pset->zsp_online = online;
974 975 pset->zsp_size = size;
975 976 pset->zsp_min = min;
976 977 pset->zsp_max = max;
977 978 pset->zsp_importance = importance;
978 979 pset->zsp_cpu_shares = 0;
979 980 pset->zsp_scheds = 0;
980 981 pset->zsp_active = B_TRUE;
981 982 }
982 983
983 984 /*
984 985 * A zone's process was found using a pset. Charge the process to the pset and
985 986 * the per-zone data for the pset.
986 987 */
987 988 static void
988 989 zsd_mark_pset_usage_found(zsd_pset_usage_t *usage, uint_t sched)
989 990 {
990 991 zsd_zone_t *zone = usage->zsu_zone;
991 992 zsd_pset_t *pset = usage->zsu_pset;
992 993
993 994 /* Nothing to do if already found */
994 995 if (usage->zsu_found == B_TRUE)
995 996 goto add_stats;
996 997
997 998 usage->zsu_found = B_TRUE;
998 999 usage->zsu_empty = B_FALSE;
999 1000
1000 1001 usage->zsu_deleted = B_FALSE;
1001 1002 /* update usage flags */
1002 1003 if (usage->zsu_active == B_FALSE)
1003 1004 usage->zsu_new = B_TRUE;
1004 1005 else
1005 1006 usage->zsu_new = B_FALSE;
1006 1007
1007 1008 usage->zsu_scheds = 0;
1008 1009 usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1009 1010 usage->zsu_active = B_TRUE;
1010 1011 pset->zsp_empty = B_FALSE;
1011 1012 zone->zsz_empty = B_FALSE;
1012 1013
1013 1014 add_stats:
1014 1015 /* Detect zone's pset id, and if it is bound to multiple psets */
1015 1016 if (zone->zsz_psetid == ZS_PSET_ERROR)
1016 1017 zone->zsz_psetid = pset->zsp_id;
1017 1018 else if (zone->zsz_psetid != pset->zsp_id)
1018 1019 zone->zsz_psetid = ZS_PSET_MULTI;
1019 1020
1020 1021 usage->zsu_scheds |= sched;
1021 1022 pset->zsp_scheds |= sched;
1022 1023 zone->zsz_scheds |= sched;
1023 1024
1024 1025 /* Record if FSS is co-habitating with conflicting scheduler */
1025 1026 if ((pset->zsp_scheds & ZS_SCHED_FSS) &&
1026 1027 usage->zsu_scheds & (
1027 1028 ZS_SCHED_TS | ZS_SCHED_IA | ZS_SCHED_FX)) {
1028 1029 usage->zsu_scheds |= ZS_SCHED_CONFLICT;
1029 1030
1030 1031 pset->zsp_scheds |= ZS_SCHED_CONFLICT;
1031 1032 }
1032 1033
1033 1034 }
1034 1035
1035 1036 /* Add cpu time for a process to a pset, zone, and system totals */
1036 1037 static void
1037 1038 zsd_add_usage(zsd_ctl_t *ctl, zsd_pset_usage_t *usage, timestruc_t *delta)
1038 1039 {
1039 1040 zsd_system_t *system = ctl->zsctl_system;
1040 1041 zsd_zone_t *zone = usage->zsu_zone;
1041 1042 zsd_pset_t *pset = usage->zsu_pset;
1042 1043
1043 1044 TIMESTRUC_ADD_TIMESTRUC(usage->zsu_cpu_usage, *delta);
1044 1045 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_zones, *delta);
1045 1046 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_cpu_usage, *delta);
1046 1047 TIMESTRUC_ADD_TIMESTRUC(system->zss_cpu_usage_zones, *delta);
1047 1048 }
1048 1049
1049 1050 /* Determine which processor sets have been deleted */
1050 1051 static void
1051 1052 zsd_mark_psets_end(zsd_ctl_t *ctl)
1052 1053 {
1053 1054 zsd_pset_t *pset, *tmp;
1054 1055
1055 1056 /*
1056 1057 * Mark pset as not exists, and deleted if it existed
1057 1058 * previous interval.
1058 1059 */
1059 1060 pset = list_head(&ctl->zsctl_psets);
1060 1061 while (pset != NULL) {
1061 1062 if (pset->zsp_found == B_FALSE) {
1062 1063 pset->zsp_empty = B_TRUE;
1063 1064 if (pset->zsp_deleted == B_TRUE) {
1064 1065 tmp = pset;
1065 1066 pset = list_next(&ctl->zsctl_psets, pset);
1066 1067 list_remove(&ctl->zsctl_psets, tmp);
1067 1068 free(tmp);
1068 1069 ctl->zsctl_npsets--;
1069 1070 continue;
1070 1071 } else {
1071 1072 /* Pset vanished during this interval */
1072 1073 pset->zsp_new = B_FALSE;
1073 1074 pset->zsp_deleted = B_TRUE;
1074 1075 pset->zsp_active = B_TRUE;
1075 1076 }
1076 1077 }
1077 1078 pset = list_next(&ctl->zsctl_psets, pset);
1078 1079 }
1079 1080 }
1080 1081
1081 1082 /* Determine which zones are no longer bound to processor sets */
1082 1083 static void
1083 1084 zsd_mark_pset_usages_end(zsd_ctl_t *ctl)
1084 1085 {
1085 1086 zsd_pset_t *pset;
1086 1087 zsd_zone_t *zone;
1087 1088 zsd_pset_usage_t *usage, *tmp;
1088 1089
1089 1090 /*
1090 1091 * Mark pset as not exists, and deleted if it existed previous
1091 1092 * interval.
1092 1093 */
1093 1094 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1094 1095 pset = list_next(&ctl->zsctl_psets, pset)) {
1095 1096 usage = list_head(&pset->zsp_usage_list);
1096 1097 while (usage != NULL) {
1097 1098 /*
1098 1099 * Mark pset as not exists, and deleted if it existed
1099 1100 * previous interval.
1100 1101 */
1101 1102 if (usage->zsu_found == B_FALSE ||
1102 1103 usage->zsu_zone->zsz_deleted == B_TRUE ||
1103 1104 usage->zsu_pset->zsp_deleted == B_TRUE) {
1104 1105 tmp = usage;
1105 1106 usage = list_next(&pset->zsp_usage_list,
1106 1107 usage);
1107 1108 list_remove(&pset->zsp_usage_list, tmp);
1108 1109 free(tmp);
1109 1110 pset->zsp_nusage--;
1110 1111 ctl->zsctl_npset_usages--;
1111 1112 continue;
1112 1113 } else {
1113 1114 usage->zsu_new = B_FALSE;
1114 1115 usage->zsu_deleted = B_TRUE;
1115 1116 usage->zsu_active = B_TRUE;
1116 1117 }
1117 1118 /* Add cpu shares for usages that are in FSS */
1118 1119 zone = usage->zsu_zone;
1119 1120 if (usage->zsu_scheds & ZS_SCHED_FSS &&
1120 1121 zone->zsz_cpu_shares != ZS_SHARES_UNLIMITED &&
1121 1122 zone->zsz_cpu_shares != 0) {
1122 1123 zone = usage->zsu_zone;
1123 1124 usage->zsu_cpu_shares = zone->zsz_cpu_shares;
1124 1125 pset->zsp_cpu_shares += zone->zsz_cpu_shares;
1125 1126 }
1126 1127 usage = list_next(&pset->zsp_usage_list,
1127 1128 usage);
1128 1129 }
1129 1130 }
1130 1131 }
1131 1132
1132 1133 /* A zone has been found. Update its information */
1133 1134 static void
1134 1135 zsd_mark_zone_found(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t cpu_shares,
1135 1136 uint64_t cpu_cap, uint64_t ram_cap, uint64_t locked_cap,
1136 1137 uint64_t vm_cap, uint64_t processes_cap, uint64_t processes,
1137 1138 uint64_t lwps_cap, uint64_t lwps, uint64_t shm_cap, uint64_t shm,
1138 1139 uint64_t shmids_cap, uint64_t shmids, uint64_t semids_cap,
1139 1140 uint64_t semids, uint64_t msgids_cap, uint64_t msgids, uint64_t lofi_cap,
1140 1141 uint64_t lofi, char *poolname, char *psetname, uint_t sched, uint_t cputype,
1141 1142 uint_t iptype)
1142 1143 {
1143 1144 zsd_system_t *sys = ctl->zsctl_system;
1144 1145
1145 1146 assert(zone->zsz_found == B_FALSE);
1146 1147
1147 1148 /*
1148 1149 * Mark zone as exists, and new if it did not exist in previous
1149 1150 * interval.
1150 1151 */
1151 1152 zone->zsz_found = B_TRUE;
1152 1153 zone->zsz_empty = B_TRUE;
1153 1154 zone->zsz_deleted = B_FALSE;
1154 1155
1155 1156 /*
1156 1157 * Zone is new. Assume zone's properties are the same over entire
1157 1158 * interval.
1158 1159 */
1159 1160 if (zone->zsz_active == B_FALSE)
1160 1161 zone->zsz_new = B_TRUE;
1161 1162 else
1162 1163 zone->zsz_new = B_FALSE;
1163 1164
1164 1165 (void) strlcpy(zone->zsz_pool, poolname, sizeof (zone->zsz_pool));
1165 1166 (void) strlcpy(zone->zsz_pset, psetname, sizeof (zone->zsz_pset));
1166 1167 zone->zsz_default_sched = sched;
1167 1168
1168 1169 /* Schedulers updated later as processes are found */
1169 1170 zone->zsz_scheds = 0;
1170 1171
1171 1172 /* Cpus updated later as psets bound are identified */
1172 1173 zone->zsz_cpus_online = 0;
1173 1174
1174 1175 zone->zsz_cputype = cputype;
1175 1176 zone->zsz_iptype = iptype;
1176 1177 zone->zsz_psetid = ZS_PSET_ERROR;
1177 1178 zone->zsz_cpu_cap = cpu_cap;
1178 1179 zone->zsz_cpu_shares = cpu_shares;
1179 1180 zone->zsz_ram_cap = ram_cap;
1180 1181 zone->zsz_locked_cap = locked_cap;
1181 1182 zone->zsz_vm_cap = vm_cap;
1182 1183 zone->zsz_processes_cap = processes_cap;
1183 1184 zone->zsz_processes = processes;
1184 1185 zone->zsz_lwps_cap = lwps_cap;
1185 1186 zone->zsz_lwps = lwps;
1186 1187 zone->zsz_shm_cap = shm_cap;
1187 1188 zone->zsz_shm = shm;
1188 1189 zone->zsz_shmids_cap = shmids_cap;
1189 1190 zone->zsz_shmids = shmids;
1190 1191 zone->zsz_semids_cap = semids_cap;
1191 1192 zone->zsz_semids = semids;
1192 1193 zone->zsz_msgids_cap = msgids_cap;
1193 1194 zone->zsz_msgids = msgids;
1194 1195 zone->zsz_lofi_cap = lofi_cap;
1195 1196 zone->zsz_lofi = lofi;
1196 1197
1197 1198 sys->zss_processes += processes;
1198 1199 sys->zss_lwps += lwps;
1199 1200 sys->zss_shm += shm;
1200 1201 sys->zss_shmids += shmids;
1201 1202 sys->zss_semids += semids;
1202 1203 sys->zss_msgids += msgids;
1203 1204 sys->zss_lofi += lofi;
1204 1205 zone->zsz_active = B_TRUE;
1205 1206 }
1206 1207
1207 1208
1208 1209 /* Determine which zones have halted */
1209 1210 static void
1210 1211 zsd_mark_zones_end(zsd_ctl_t *ctl)
1211 1212 {
1212 1213 zsd_zone_t *zone, *tmp;
1213 1214
1214 1215 /*
1215 1216 * Mark zone as not existing, or delete if it did not exist in
1216 1217 * previous interval.
1217 1218 */
1218 1219 zone = list_head(&ctl->zsctl_zones);
1219 1220 while (zone != NULL) {
1220 1221 if (zone->zsz_found == B_FALSE) {
1221 1222 zone->zsz_empty = B_TRUE;
1222 1223 if (zone->zsz_deleted == B_TRUE) {
1223 1224 /*
1224 1225 * Zone deleted in prior interval,
1225 1226 * so it no longer exists.
1226 1227 */
1227 1228 tmp = zone;
1228 1229 zone = list_next(&ctl->zsctl_zones, zone);
1229 1230 list_remove(&ctl->zsctl_zones, tmp);
1230 1231 free(tmp);
1231 1232 ctl->zsctl_nzones--;
1232 1233 continue;
1233 1234 } else {
1234 1235 zone->zsz_new = B_FALSE;
1235 1236 zone->zsz_deleted = B_TRUE;
1236 1237 zone->zsz_active = B_TRUE;
1237 1238 }
1238 1239 }
1239 1240 zone = list_next(&ctl->zsctl_zones, zone);
1240 1241 }
1241 1242 }
1242 1243
1243 1244 /*
1244 1245 * Mark cpus as not existing. If a cpu is found, it will be updated. If
1245 1246 * a cpu is not found, then it must have gone offline, so it will be
1246 1247 * deleted.
1247 1248 *
1248 1249 * The kstat tracking data is rolled so that the usage since the previous
1249 1250 * interval can be determined.
1250 1251 */
1251 1252 static void
1252 1253 zsd_mark_cpus_start(zsd_ctl_t *ctl, boolean_t roll)
1253 1254 {
1254 1255 zsd_cpu_t *cpu;
1255 1256
1256 1257 /*
1257 1258 * Mark all cpus as not existing. As cpus are found, they will
1258 1259 * be marked as existing.
1259 1260 */
1260 1261 for (cpu = list_head(&ctl->zsctl_cpus); cpu != NULL;
1261 1262 cpu = list_next(&ctl->zsctl_cpus, cpu)) {
1262 1263 cpu->zsc_found = B_FALSE;
1263 1264 if (cpu->zsc_active == B_TRUE && roll) {
1264 1265 cpu->zsc_psetid_prev = cpu->zsc_psetid;
1265 1266 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1266 1267 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1267 1268 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1268 1269 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1269 1270 }
1270 1271 }
1271 1272 }
1272 1273
1273 1274 /*
1274 1275 * An array the size of the maximum number of cpus is kept. Within this array
1275 1276 * a list of the online cpus is maintained.
1276 1277 */
1277 1278 zsd_cpu_t *
1278 1279 zsd_lookup_insert_cpu(zsd_ctl_t *ctl, processorid_t cpuid)
1279 1280 {
1280 1281 zsd_cpu_t *cpu;
1281 1282
1282 1283 assert(cpuid < ctl->zsctl_maxcpuid);
1283 1284 cpu = &(ctl->zsctl_cpu_array[cpuid]);
1284 1285 assert(cpuid == cpu->zsc_id);
1285 1286
1286 1287 if (cpu->zsc_allocated == B_FALSE) {
1287 1288 cpu->zsc_allocated = B_TRUE;
1288 1289 list_insert_tail(&ctl->zsctl_cpus, cpu);
1289 1290 }
1290 1291 return (cpu);
1291 1292 }
1292 1293
1293 1294 /* A cpu has been found. Update its information */
1294 1295 static void
1295 1296 zsd_mark_cpu_found(zsd_cpu_t *cpu, zsd_pset_t *pset, psetid_t psetid)
1296 1297 {
1297 1298 /*
1298 1299 * legacy processor sets, the cpu may move while zonestatd is
1299 1300 * inspecting, causing it to be found twice. In this case, just
1300 1301 * leave cpu in the first processor set in which it was found.
1301 1302 */
1302 1303 if (cpu->zsc_found == B_TRUE)
1303 1304 return;
1304 1305
1305 1306 /* Mark cpu as online */
1306 1307 cpu->zsc_found = B_TRUE;
1307 1308 cpu->zsc_offlined = B_FALSE;
1308 1309 cpu->zsc_pset = pset;
1309 1310 /*
1310 1311 * cpu is newly online.
1311 1312 */
1312 1313 if (cpu->zsc_active == B_FALSE) {
1313 1314 /*
1314 1315 * Cpu is newly online.
1315 1316 */
1316 1317 cpu->zsc_onlined = B_TRUE;
1317 1318 cpu->zsc_psetid = psetid;
1318 1319 cpu->zsc_psetid_prev = psetid;
1319 1320 } else {
1320 1321 /*
1321 1322 * cpu online during previous interval. Save properties at
1322 1323 * start of interval
1323 1324 */
1324 1325 cpu->zsc_onlined = B_FALSE;
1325 1326 cpu->zsc_psetid = psetid;
1326 1327
1327 1328 }
1328 1329 cpu->zsc_active = B_TRUE;
1329 1330 }
1330 1331
1331 1332 /* Remove all offlined cpus from the list of tracked cpus */
1332 1333 static void
1333 1334 zsd_mark_cpus_end(zsd_ctl_t *ctl)
1334 1335 {
1335 1336 zsd_cpu_t *cpu, *tmp;
1336 1337 int id;
1337 1338
1338 1339 /* Mark cpu as online or offline */
1339 1340 cpu = list_head(&ctl->zsctl_cpus);
1340 1341 while (cpu != NULL) {
1341 1342 if (cpu->zsc_found == B_FALSE) {
1342 1343 if (cpu->zsc_offlined == B_TRUE) {
1343 1344 /*
1344 1345 * cpu offlined in prior interval. It is gone.
1345 1346 */
1346 1347 tmp = cpu;
1347 1348 cpu = list_next(&ctl->zsctl_cpus, cpu);
1348 1349 list_remove(&ctl->zsctl_cpus, tmp);
1349 1350 /* Clear structure for future use */
1350 1351 id = tmp->zsc_id;
1351 1352 bzero(tmp, sizeof (zsd_cpu_t));
1352 1353 tmp->zsc_id = id;
1353 1354 tmp->zsc_allocated = B_FALSE;
1354 1355 tmp->zsc_psetid = ZS_PSET_ERROR;
1355 1356 tmp->zsc_psetid_prev = ZS_PSET_ERROR;
1356 1357
1357 1358 } else {
1358 1359 /*
1359 1360 * cpu online at start of interval. Treat
1360 1361 * as still online, since it was online for
1361 1362 * some portion of the interval.
1362 1363 */
1363 1364 cpu->zsc_offlined = B_TRUE;
1364 1365 cpu->zsc_onlined = B_FALSE;
1365 1366 cpu->zsc_active = B_TRUE;
1366 1367 cpu->zsc_psetid = cpu->zsc_psetid_prev;
1367 1368 cpu->zsc_pset = NULL;
1368 1369 }
1369 1370 }
1370 1371 cpu = list_next(&ctl->zsctl_cpus, cpu);
1371 1372 }
1372 1373 }
1373 1374
1374 1375 /* Some utility functions for managing the list of processor sets */
1375 1376 static zsd_pset_t *
1376 1377 zsd_lookup_pset_byid(zsd_ctl_t *ctl, psetid_t psetid)
1377 1378 {
1378 1379 zsd_pset_t *pset;
1379 1380
1380 1381 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1381 1382 pset = list_next(&ctl->zsctl_psets, pset)) {
1382 1383 if (pset->zsp_id == psetid)
1383 1384 return (pset);
1384 1385 }
1385 1386 return (NULL);
1386 1387 }
1387 1388
1388 1389 static zsd_pset_t *
1389 1390 zsd_lookup_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1390 1391 {
1391 1392 zsd_pset_t *pset;
1392 1393
1393 1394 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
1394 1395 pset = list_next(&ctl->zsctl_psets, pset)) {
1395 1396 if (strcmp(pset->zsp_name, psetname) == 0) {
1396 1397 if (psetid != -1)
1397 1398 pset->zsp_id = psetid;
1398 1399 return (pset);
1399 1400 }
1400 1401 }
1401 1402 return (NULL);
1402 1403 }
1403 1404
1404 1405 static zsd_pset_t *
1405 1406 zsd_allocate_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1406 1407 {
1407 1408 zsd_pset_t *pset;
1408 1409
1409 1410 if ((pset = (zsd_pset_t *)calloc(1, sizeof (zsd_pset_t))) == NULL)
1410 1411 return (NULL);
1411 1412
1412 1413 (void) strlcpy(pset->zsp_name, psetname, sizeof (pset->zsp_name));
1413 1414 pset->zsp_id = psetid;
1414 1415 pset->zsp_found = B_FALSE;
1415 1416 /*
1416 1417 * Allocate as deleted so if not found in first pass, pset is deleted
1417 1418 * from list. This can happen if pset is returned by pset_list, but
1418 1419 * is destroyed before first attempt to fetch pset details.
1419 1420 */
1420 1421 list_create(&pset->zsp_usage_list, sizeof (zsd_pset_usage_t),
1421 1422 offsetof(zsd_pset_usage_t, zsu_next));
1422 1423
1423 1424 pset->zsp_hrstart = g_hrnow;
1424 1425 pset->zsp_deleted = B_TRUE;
1425 1426 pset->zsp_empty = B_TRUE;
1426 1427 ctl->zsctl_npsets++;
1427 1428
1428 1429 return (pset);
1429 1430 }
1430 1431
1431 1432 static zsd_pset_t *
1432 1433 zsd_lookup_insert_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid)
1433 1434 {
1434 1435 zsd_pset_t *pset, *tmp;
1435 1436
1436 1437 if ((pset = zsd_lookup_pset(ctl, psetname, psetid)) != NULL)
1437 1438 return (pset);
1438 1439
1439 1440 if ((pset = zsd_allocate_pset(ctl, psetname, psetid)) == NULL)
1440 1441 return (NULL);
1441 1442
1442 1443 /* Insert sorted by psetname */
1443 1444 tmp = list_head(&ctl->zsctl_psets);
1444 1445 while (tmp != NULL && strcmp(psetname, tmp->zsp_name) > 0)
1445 1446 tmp = list_next(&ctl->zsctl_psets, tmp);
1446 1447
1447 1448 list_insert_before(&ctl->zsctl_psets, tmp, pset);
1448 1449 return (pset);
1449 1450 }
1450 1451
1451 1452 /* Some utility functions for managing the list of zones using each pset */
1452 1453 static zsd_pset_usage_t *
1453 1454 zsd_lookup_usage(zsd_pset_t *pset, zsd_zone_t *zone)
1454 1455 {
1455 1456 zsd_pset_usage_t *usage;
1456 1457
1457 1458 for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
1458 1459 usage = list_next(&pset->zsp_usage_list, usage))
1459 1460 if (usage->zsu_zone == zone)
1460 1461 return (usage);
1461 1462
1462 1463 return (NULL);
1463 1464 }
1464 1465
1465 1466 static zsd_pset_usage_t *
1466 1467 zsd_allocate_pset_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1467 1468 {
1468 1469 zsd_pset_usage_t *usage;
1469 1470
1470 1471 if ((usage = (zsd_pset_usage_t *)calloc(1, sizeof (zsd_pset_usage_t)))
1471 1472 == NULL)
1472 1473 return (NULL);
1473 1474
1474 1475 list_link_init(&usage->zsu_next);
1475 1476 usage->zsu_zone = zone;
1476 1477 usage->zsu_zoneid = zone->zsz_id;
1477 1478 usage->zsu_pset = pset;
1478 1479 usage->zsu_found = B_FALSE;
1479 1480 usage->zsu_active = B_FALSE;
1480 1481 usage->zsu_new = B_FALSE;
1481 1482 /*
1482 1483 * Allocate as not deleted. If a process is found in a pset for
1483 1484 * a zone, the usage will not be deleted until at least the next
1484 1485 * interval.
1485 1486 */
1486 1487 usage->zsu_start = g_now;
1487 1488 usage->zsu_hrstart = g_hrnow;
1488 1489 usage->zsu_deleted = B_FALSE;
1489 1490 usage->zsu_empty = B_TRUE;
1490 1491 usage->zsu_scheds = 0;
1491 1492 usage->zsu_cpu_shares = ZS_LIMIT_NONE;
1492 1493
1493 1494 ctl->zsctl_npset_usages++;
1494 1495 pset->zsp_nusage++;
1495 1496
1496 1497 return (usage);
1497 1498 }
1498 1499
1499 1500 static zsd_pset_usage_t *
1500 1501 zsd_lookup_insert_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone)
1501 1502 {
1502 1503 zsd_pset_usage_t *usage, *tmp;
1503 1504
1504 1505 if ((usage = zsd_lookup_usage(pset, zone))
1505 1506 != NULL)
1506 1507 return (usage);
1507 1508
1508 1509 if ((usage = zsd_allocate_pset_usage(ctl, pset, zone)) == NULL)
1509 1510 return (NULL);
1510 1511
1511 1512 tmp = list_head(&pset->zsp_usage_list);
1512 1513 while (tmp != NULL && strcmp(zone->zsz_name, tmp->zsu_zone->zsz_name)
1513 1514 > 0)
1514 1515 tmp = list_next(&pset->zsp_usage_list, tmp);
1515 1516
1516 1517 list_insert_before(&pset->zsp_usage_list, tmp, usage);
1517 1518 return (usage);
1518 1519 }
1519 1520
1520 1521 static void
1521 1522 zsd_refresh_system(zsd_ctl_t *ctl)
1522 1523 {
1523 1524 zsd_system_t *system = ctl->zsctl_system;
1524 1525
1525 1526 /* Re-count these values each interval */
1526 1527 system->zss_processes = 0;
1527 1528 system->zss_lwps = 0;
1528 1529 system->zss_shm = 0;
1529 1530 system->zss_shmids = 0;
1530 1531 system->zss_semids = 0;
1531 1532 system->zss_msgids = 0;
1532 1533 system->zss_lofi = 0;
1533 1534 }
1534 1535
1535 1536
1536 1537 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */
1537 1538 static void
1538 1539 zsd_update_cpu_stats(zsd_ctl_t *ctl, zsd_cpu_t *cpu)
1539 1540 {
1540 1541 zsd_system_t *sys;
1541 1542 processorid_t cpuid;
1542 1543 zsd_pset_t *pset_prev;
1543 1544 zsd_pset_t *pset;
1544 1545 kstat_t *kstat;
1545 1546 kstat_named_t *knp;
1546 1547 kid_t kid;
1547 1548 uint64_t idle, intr, kern, user;
1548 1549
1549 1550 sys = ctl->zsctl_system;
1550 1551 pset = cpu->zsc_pset;
1551 1552 knp = NULL;
1552 1553 kid = -1;
1553 1554 cpuid = cpu->zsc_id;
1554 1555
1555 1556 /* Get the cpu time totals for this cpu */
1556 1557 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "cpu", cpuid, "sys");
1557 1558 if (kstat == NULL)
1558 1559 return;
1559 1560
1560 1561 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
1561 1562 if (kid == -1)
1562 1563 return;
1563 1564
1564 1565 knp = kstat_data_lookup(kstat, "cpu_nsec_idle");
1565 1566 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1566 1567 return;
1567 1568
1568 1569 idle = knp->value.ui64;
1569 1570
1570 1571 knp = kstat_data_lookup(kstat, "cpu_nsec_kernel");
1571 1572 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1572 1573 return;
1573 1574
1574 1575 kern = knp->value.ui64;
1575 1576
1576 1577 knp = kstat_data_lookup(kstat, "cpu_nsec_user");
1577 1578 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64)
1578 1579 return;
1579 1580
1580 1581 user = knp->value.ui64;
1581 1582
1582 1583 /*
1583 1584 * Tracking intr time per cpu just exists for future enhancements.
1584 1585 * The value is presently always zero.
1585 1586 */
1586 1587 intr = 0;
1587 1588 cpu->zsc_nsec_idle = idle;
1588 1589 cpu->zsc_nsec_intr = intr;
1589 1590 cpu->zsc_nsec_kern = kern;
1590 1591 cpu->zsc_nsec_user = user;
1591 1592
1592 1593 if (cpu->zsc_onlined == B_TRUE) {
1593 1594 /*
1594 1595 * cpu is newly online. There is no reference value,
1595 1596 * so just record its current stats for comparison
1596 1597 * on next stat read.
1597 1598 */
1598 1599 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle;
1599 1600 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr;
1600 1601 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern;
1601 1602 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user;
1602 1603 return;
1603 1604 }
1604 1605
1605 1606 /*
1606 1607 * Calculate relative time since previous refresh.
1607 1608 * Paranoia. Don't let time go backwards.
1608 1609 */
1609 1610 idle = intr = kern = user = 0;
1610 1611 if (cpu->zsc_nsec_idle > cpu->zsc_nsec_idle_prev)
1611 1612 idle = cpu->zsc_nsec_idle - cpu->zsc_nsec_idle_prev;
1612 1613
1613 1614 if (cpu->zsc_nsec_intr > cpu->zsc_nsec_intr_prev)
1614 1615 intr = cpu->zsc_nsec_intr - cpu->zsc_nsec_intr_prev;
1615 1616
1616 1617 if (cpu->zsc_nsec_kern > cpu->zsc_nsec_kern_prev)
1617 1618 kern = cpu->zsc_nsec_kern - cpu->zsc_nsec_kern_prev;
1618 1619
1619 1620 if (cpu->zsc_nsec_user > cpu->zsc_nsec_user_prev)
1620 1621 user = cpu->zsc_nsec_user - cpu->zsc_nsec_user_prev;
1621 1622
1622 1623 /* Update totals for cpu usage */
1623 1624 TIMESTRUC_ADD_NANOSEC(cpu->zsc_idle, idle);
1624 1625 TIMESTRUC_ADD_NANOSEC(cpu->zsc_intr, intr);
1625 1626 TIMESTRUC_ADD_NANOSEC(cpu->zsc_kern, kern);
1626 1627 TIMESTRUC_ADD_NANOSEC(cpu->zsc_user, user);
1627 1628
1628 1629 /*
1629 1630 * Add cpu's stats to its pset if it is known to be in
1630 1631 * the pset since previous read.
1631 1632 */
1632 1633 if (cpu->zsc_psetid == cpu->zsc_psetid_prev ||
1633 1634 cpu->zsc_psetid_prev == ZS_PSET_ERROR ||
1634 1635 (pset_prev = zsd_lookup_pset_byid(ctl,
1635 1636 cpu->zsc_psetid_prev)) == NULL) {
1636 1637 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, idle);
1637 1638 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, intr);
1638 1639 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, kern);
1639 1640 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, user);
1640 1641 } else {
1641 1642 /*
1642 1643 * Last pset was different than current pset.
1643 1644 * Best guess is to split usage between the two.
1644 1645 */
1645 1646 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_idle, idle / 2);
1646 1647 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_intr, intr / 2);
1647 1648 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_kern, kern / 2);
1648 1649 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_user, user / 2);
1649 1650
1650 1651 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle,
1651 1652 (idle / 2) + (idle % 2));
1652 1653 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr,
1653 1654 (intr / 2) + (intr % 2));
1654 1655 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern,
1655 1656 (kern / 2) + (kern % 2));
1656 1657 TIMESTRUC_ADD_NANOSEC(pset->zsp_user,
1657 1658 (user / 2) + (user % 2));
1658 1659 }
1659 1660 TIMESTRUC_ADD_NANOSEC(sys->zss_idle, idle);
1660 1661 TIMESTRUC_ADD_NANOSEC(sys->zss_intr, intr);
1661 1662 TIMESTRUC_ADD_NANOSEC(sys->zss_kern, kern);
1662 1663 TIMESTRUC_ADD_NANOSEC(sys->zss_user, user);
1663 1664 }
1664 1665
1665 1666 /* Determine the details of a processor set by pset_id */
1666 1667 static int
1667 1668 zsd_get_pool_pset(zsd_ctl_t *ctl, psetid_t psetid, char *psetname,
1668 1669 size_t namelen, uint_t *cputype, uint64_t *online, uint64_t *size,
1669 1670 uint64_t *min, uint64_t *max, int64_t *importance)
1670 1671 {
1671 1672 uint_t old, num;
1672 1673
1673 1674 pool_conf_t *conf = ctl->zsctl_pool_conf;
1674 1675 pool_value_t **vals = ctl->zsctl_pool_vals;
1675 1676 pool_resource_t **res_list = NULL;
1676 1677 pool_resource_t *pset;
1677 1678 pool_component_t **cpus = NULL;
1678 1679 processorid_t *cache;
1679 1680 const char *string;
1680 1681 uint64_t uint64;
1681 1682 int64_t int64;
1682 1683 int i, ret, type;
1683 1684
1684 1685 if (ctl->zsctl_pool_status == POOL_DISABLED) {
1685 1686
1686 1687 /*
1687 1688 * Inspect legacy psets
1688 1689 */
1689 1690 for (;;) {
1690 1691 old = num = ctl->zsctl_cpu_ncache;
1691 1692 ret = pset_info(psetid, &type, &num,
1692 1693 ctl->zsctl_cpu_cache);
1693 1694 if (ret < 0) {
1694 1695 /* pset is gone. Tell caller to retry */
1695 1696 errno = EINTR;
1696 1697 return (-1);
1697 1698 }
1698 1699 if (num <= old) {
1699 1700 /* Success */
1700 1701 break;
1701 1702 }
1702 1703 if ((cache = (processorid_t *)realloc(
1703 1704 ctl->zsctl_cpu_cache, num *
1704 1705 sizeof (processorid_t))) != NULL) {
1705 1706 ctl->zsctl_cpu_ncache = num;
1706 1707 ctl->zsctl_cpu_cache = cache;
1707 1708 } else {
1708 1709 /*
1709 1710 * Could not allocate to get new cpu list.
1710 1711 */
1711 1712 zsd_warn(gettext(
1712 1713 "Could not allocate for cpu list"));
1713 1714 errno = ENOMEM;
1714 1715 return (-1);
1715 1716 }
1716 1717 }
1717 1718 /*
1718 1719 * Old school pset. Just make min and max equal
1719 1720 * to its size
1720 1721 */
1721 1722 if (psetid == ZS_PSET_DEFAULT) {
1722 1723 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
1723 1724 (void) strlcpy(psetname, "pset_default", namelen);
1724 1725 } else {
1725 1726 *cputype = ZS_CPUTYPE_PSRSET_PSET;
1726 1727 (void) snprintf(psetname, namelen,
1727 1728 "SUNWlegacy_pset_%d", psetid);
1728 1729 }
1729 1730
1730 1731 /*
1731 1732 * Just treat legacy pset as a simple pool pset
1732 1733 */
1733 1734 *online = num;
1734 1735 *size = num;
1735 1736 *min = num;
1736 1737 *max = num;
1737 1738 *importance = 1;
1738 1739
1739 1740 return (0);
1740 1741 }
1741 1742
1742 1743 /* Look up the pool pset using the pset id */
1743 1744 res_list = NULL;
1744 1745 pool_value_set_int64(vals[1], psetid);
1745 1746 if (pool_value_set_name(vals[1], "pset.sys_id")
1746 1747 != PO_SUCCESS)
1747 1748 goto err;
1748 1749
1749 1750 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1750 1751 goto err;
1751 1752 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1752 1753 goto err;
1753 1754 if ((res_list = pool_query_resources(conf, &num, vals)) == NULL)
1754 1755 goto err;
1755 1756 if (num != 1)
1756 1757 goto err;
1757 1758 pset = res_list[0];
1758 1759 free(res_list);
1759 1760 res_list = NULL;
1760 1761 if (pool_get_property(conf, pool_resource_to_elem(conf, pset),
1761 1762 "pset.name", vals[0]) != POC_STRING ||
1762 1763 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1763 1764 goto err;
1764 1765
1765 1766 (void) strlcpy(psetname, string, namelen);
1766 1767 if (strncmp(psetname, "SUNWtmp", strlen("SUNWtmp")) == 0)
1767 1768 *cputype = ZS_CPUTYPE_DEDICATED;
1768 1769 else if (psetid == ZS_PSET_DEFAULT)
1769 1770 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
1770 1771 else
1771 1772 *cputype = ZS_CPUTYPE_POOL_PSET;
1772 1773
1773 1774 /* Get size, min, max, and importance */
1774 1775 if (pool_get_property(conf, pool_resource_to_elem(conf,
1775 1776 pset), "pset.size", vals[0]) == POC_UINT &&
1776 1777 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1777 1778 *size = uint64;
1778 1779 else
1779 1780 *size = 0;
1780 1781
1781 1782 /* Get size, min, max, and importance */
1782 1783 if (pool_get_property(conf, pool_resource_to_elem(conf,
1783 1784 pset), "pset.min", vals[0]) == POC_UINT &&
1784 1785 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1785 1786 *min = uint64;
1786 1787 else
1787 1788 *min = 0;
1788 1789 if (*min >= ZSD_PSET_UNLIMITED)
1789 1790 *min = ZS_LIMIT_NONE;
1790 1791
1791 1792 if (pool_get_property(conf, pool_resource_to_elem(conf,
1792 1793 pset), "pset.max", vals[0]) == POC_UINT &&
1793 1794 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS)
1794 1795 *max = uint64;
1795 1796 else
1796 1797 *max = ZS_LIMIT_NONE;
1797 1798
1798 1799 if (*max >= ZSD_PSET_UNLIMITED)
1799 1800 *max = ZS_LIMIT_NONE;
1800 1801
1801 1802 if (pool_get_property(conf, pool_resource_to_elem(conf,
1802 1803 pset), "pset.importance", vals[0]) == POC_INT &&
1803 1804 pool_value_get_int64(vals[0], &int64) == PO_SUCCESS)
1804 1805 *importance = int64;
1805 1806 else
1806 1807 *importance = (uint64_t)1;
1807 1808
1808 1809 *online = 0;
1809 1810 if (*size == 0)
1810 1811 return (0);
1811 1812
1812 1813 /* get cpus */
1813 1814 cpus = pool_query_resource_components(conf, pset, &num, NULL);
1814 1815 if (cpus == NULL)
1815 1816 goto err;
1816 1817
1817 1818 /* Make sure there is space for cpu id list */
1818 1819 if (num > ctl->zsctl_cpu_ncache) {
1819 1820 if ((cache = (processorid_t *)realloc(
1820 1821 ctl->zsctl_cpu_cache, num *
1821 1822 sizeof (processorid_t))) != NULL) {
1822 1823 ctl->zsctl_cpu_ncache = num;
1823 1824 ctl->zsctl_cpu_cache = cache;
1824 1825 } else {
1825 1826 /*
1826 1827 * Could not allocate to get new cpu list.
1827 1828 */
1828 1829 zsd_warn(gettext(
1829 1830 "Could not allocate for cpu list"));
1830 1831 goto err;
1831 1832 }
1832 1833 }
1833 1834
1834 1835 /* count the online cpus */
1835 1836 for (i = 0; i < num; i++) {
1836 1837 if (pool_get_property(conf, pool_component_to_elem(
1837 1838 conf, cpus[i]), "cpu.status", vals[0]) != POC_STRING ||
1838 1839 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
1839 1840 goto err;
1840 1841
1841 1842 if (strcmp(string, "on-line") != 0 &&
1842 1843 strcmp(string, "no-intr") != 0)
1843 1844 continue;
1844 1845
1845 1846 if (pool_get_property(conf, pool_component_to_elem(
1846 1847 conf, cpus[i]), "cpu.sys_id", vals[0]) != POC_INT ||
1847 1848 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
1848 1849 goto err;
1849 1850
1850 1851 (*online)++;
1851 1852 ctl->zsctl_cpu_cache[i] = (psetid_t)int64;
1852 1853 }
1853 1854 free(cpus);
1854 1855 return (0);
1855 1856 err:
1856 1857 if (res_list != NULL)
1857 1858 free(res_list);
1858 1859 if (cpus != NULL)
1859 1860 free(cpus);
1860 1861
1861 1862 /*
1862 1863 * The pools operations should succeed since the conf is a consistent
1863 1864 * snapshot. Tell caller there is no need to retry.
1864 1865 */
1865 1866 errno = EINVAL;
1866 1867 return (-1);
1867 1868 }
1868 1869
1869 1870 /*
1870 1871 * Update the current list of processor sets.
1871 1872 * This also updates the list of online cpus, and each cpu's pset membership.
1872 1873 */
1873 1874 static void
1874 1875 zsd_refresh_psets(zsd_ctl_t *ctl)
1875 1876 {
1876 1877 int i, j, ret, state;
1877 1878 uint_t old, num;
1878 1879 uint_t cputype;
1879 1880 int64_t sys_id, importance;
1880 1881 uint64_t online, size, min, max;
1881 1882 zsd_system_t *system;
1882 1883 zsd_pset_t *pset;
1883 1884 zsd_cpu_t *cpu;
1884 1885 psetid_t *cache;
1885 1886 char psetname[ZS_PSETNAME_MAX];
1886 1887 processorid_t cpuid;
1887 1888 pool_value_t *pv_save = NULL;
1888 1889 pool_resource_t **res_list = NULL;
1889 1890 pool_resource_t *res;
1890 1891 pool_value_t **vals;
1891 1892 pool_conf_t *conf;
1892 1893 boolean_t roll_cpus = B_TRUE;
1893 1894
1894 1895 /* Zero cpu counters to recount them */
1895 1896 system = ctl->zsctl_system;
1896 1897 system->zss_ncpus = 0;
1897 1898 system->zss_ncpus_online = 0;
1898 1899 retry:
1899 1900 ret = pool_get_status(&state);
1900 1901 if (ret == 0 && state == POOL_ENABLED) {
1901 1902
1902 1903 conf = ctl->zsctl_pool_conf;
1903 1904 vals = ctl->zsctl_pool_vals;
1904 1905 pv_save = vals[1];
1905 1906 vals[1] = NULL;
1906 1907
1907 1908 if (ctl->zsctl_pool_status == POOL_DISABLED) {
1908 1909 if (pool_conf_open(ctl->zsctl_pool_conf,
1909 1910 pool_dynamic_location(), PO_RDONLY) == 0) {
1910 1911 ctl->zsctl_pool_status = POOL_ENABLED;
1911 1912 ctl->zsctl_pool_changed = POU_PSET;
1912 1913 }
1913 1914 } else {
1914 1915 ctl->zsctl_pool_changed = 0;
1915 1916 ret = pool_conf_update(ctl->zsctl_pool_conf,
1916 1917 &(ctl->zsctl_pool_changed));
1917 1918 if (ret < 0) {
1918 1919 /* Pools must have become disabled */
1919 1920 (void) pool_conf_close(ctl->zsctl_pool_conf);
1920 1921 ctl->zsctl_pool_status = POOL_DISABLED;
1921 1922 if (pool_error() == POE_SYSTEM && errno ==
1922 1923 ENOTACTIVE)
1923 1924 goto retry;
1924 1925
1925 1926 zsd_warn(gettext(
1926 1927 "Unable to update pool configuration"));
1927 1928 /* Not able to get pool info. Don't update. */
1928 1929 goto err;
1929 1930 }
1930 1931 }
1931 1932 /* Get the list of psets using libpool */
1932 1933 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
1933 1934 goto err;
1934 1935
1935 1936 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
1936 1937 goto err;
1937 1938 if ((res_list = pool_query_resources(conf, &num, vals))
1938 1939 == NULL)
1939 1940 goto err;
1940 1941
1941 1942 if (num > ctl->zsctl_pset_ncache) {
1942 1943 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1943 1944 (num) * sizeof (psetid_t))) == NULL) {
1944 1945 goto err;
1945 1946 }
1946 1947 ctl->zsctl_pset_ncache = num;
1947 1948 ctl->zsctl_pset_cache = cache;
1948 1949 }
1949 1950 /* Save the pset id of each pset */
1950 1951 for (i = 0; i < num; i++) {
1951 1952 res = res_list[i];
1952 1953 if (pool_get_property(conf, pool_resource_to_elem(conf,
1953 1954 res), "pset.sys_id", vals[0]) != POC_INT ||
1954 1955 pool_value_get_int64(vals[0], &sys_id)
1955 1956 != PO_SUCCESS)
1956 1957 goto err;
1957 1958 ctl->zsctl_pset_cache[i] = (int)sys_id;
1958 1959 }
1959 1960 vals[1] = pv_save;
1960 1961 pv_save = NULL;
1961 1962 } else {
1962 1963 if (ctl->zsctl_pool_status == POOL_ENABLED) {
1963 1964 (void) pool_conf_close(ctl->zsctl_pool_conf);
1964 1965 ctl->zsctl_pool_status = POOL_DISABLED;
1965 1966 }
1966 1967 /* Get the pset list using legacy psets */
1967 1968 for (;;) {
1968 1969 old = num = ctl->zsctl_pset_ncache;
1969 1970 (void) pset_list(ctl->zsctl_pset_cache, &num);
1970 1971 if ((num + 1) <= old) {
1971 1972 break;
1972 1973 }
1973 1974 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache,
1974 1975 (num + 1) * sizeof (psetid_t))) != NULL) {
1975 1976 ctl->zsctl_pset_ncache = num + 1;
1976 1977 ctl->zsctl_pset_cache = cache;
1977 1978 } else {
1978 1979 /*
1979 1980 * Could not allocate to get new pset list.
1980 1981 * Give up
1981 1982 */
1982 1983 return;
1983 1984 }
1984 1985 }
1985 1986 /* Add the default pset to list */
1986 1987 ctl->zsctl_pset_cache[num] = ctl->zsctl_pset_cache[0];
1987 1988 ctl->zsctl_pset_cache[0] = ZS_PSET_DEFAULT;
1988 1989 num++;
1989 1990 }
1990 1991 psets_changed:
1991 1992 zsd_mark_cpus_start(ctl, roll_cpus);
1992 1993 zsd_mark_psets_start(ctl);
1993 1994 roll_cpus = B_FALSE;
1994 1995
1995 1996 /* Refresh cpu membership of all psets */
1996 1997 for (i = 0; i < num; i++) {
1997 1998
1998 1999 /* Get pool pset information */
1999 2000 sys_id = ctl->zsctl_pset_cache[i];
2000 2001 if (zsd_get_pool_pset(ctl, sys_id, psetname, sizeof (psetname),
2001 2002 &cputype, &online, &size, &min, &max, &importance)
2002 2003 != 0) {
2003 2004 if (errno == EINTR)
2004 2005 goto psets_changed;
2005 2006 zsd_warn(gettext("Failed to get info for pset %d"),
2006 2007 sys_id);
2007 2008 continue;
2008 2009 }
2009 2010
2010 2011 system->zss_ncpus += size;
2011 2012 system->zss_ncpus_online += online;
2012 2013
2013 2014 pset = zsd_lookup_insert_pset(ctl, psetname,
2014 2015 ctl->zsctl_pset_cache[i]);
2015 2016
2016 2017 /* update pset info */
2017 2018 zsd_mark_pset_found(pset, cputype, online, size, min,
2018 2019 max, importance);
2019 2020
2020 2021 /* update each cpu in pset */
2021 2022 for (j = 0; j < pset->zsp_online; j++) {
2022 2023 cpuid = ctl->zsctl_cpu_cache[j];
2023 2024 cpu = zsd_lookup_insert_cpu(ctl, cpuid);
2024 2025 zsd_mark_cpu_found(cpu, pset, sys_id);
2025 2026 }
2026 2027 }
2027 2028 err:
2028 2029 if (res_list != NULL)
2029 2030 free(res_list);
2030 2031 if (pv_save != NULL)
2031 2032 vals[1] = pv_save;
2032 2033 }
2033 2034
2034 2035
2035 2036
2036 2037 /*
2037 2038 * Fetch the current pool and pset name for the given zone.
2038 2039 */
2039 2040 static void
2040 2041 zsd_get_zone_pool_pset(zsd_ctl_t *ctl, zsd_zone_t *zone,
2041 2042 char *pool, int poollen, char *pset, int psetlen, uint_t *cputype)
2042 2043 {
2043 2044 poolid_t poolid;
2044 2045 pool_t **pools = NULL;
2045 2046 pool_resource_t **res_list = NULL;
2046 2047 char poolname[ZS_POOLNAME_MAX];
2047 2048 char psetname[ZS_PSETNAME_MAX];
2048 2049 pool_conf_t *conf = ctl->zsctl_pool_conf;
2049 2050 pool_value_t *pv_save = NULL;
2050 2051 pool_value_t **vals = ctl->zsctl_pool_vals;
2051 2052 const char *string;
2052 2053 int ret;
2053 2054 int64_t int64;
2054 2055 uint_t num;
2055 2056
2056 2057 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_POOLID,
2057 2058 &poolid, sizeof (poolid));
2058 2059 if (ret < 0)
2059 2060 goto lookup_done;
2060 2061
2061 2062 pv_save = vals[1];
2062 2063 vals[1] = NULL;
2063 2064 pools = NULL;
2064 2065 res_list = NULL;
2065 2066
2066 2067 /* Default values if lookup fails */
2067 2068 (void) strlcpy(poolname, "pool_default", sizeof (poolname));
2068 2069 (void) strlcpy(psetname, "pset_default", sizeof (poolname));
2069 2070 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
2070 2071
2071 2072 /* no dedicated cpu if pools are disabled */
2072 2073 if (ctl->zsctl_pool_status == POOL_DISABLED)
2073 2074 goto lookup_done;
2074 2075
2075 2076 /* Get the pool name using the id */
2076 2077 pool_value_set_int64(vals[0], poolid);
2077 2078 if (pool_value_set_name(vals[0], "pool.sys_id") != PO_SUCCESS)
2078 2079 goto lookup_done;
2079 2080
2080 2081 if ((pools = pool_query_pools(conf, &num, vals)) == NULL)
2081 2082 goto lookup_done;
2082 2083
2083 2084 if (num != 1)
2084 2085 goto lookup_done;
2085 2086
2086 2087 if (pool_get_property(conf, pool_to_elem(conf, pools[0]),
2087 2088 "pool.name", vals[0]) != POC_STRING ||
2088 2089 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2089 2090 goto lookup_done;
2090 2091 (void) strlcpy(poolname, (char *)string, sizeof (poolname));
2091 2092
2092 2093 /* Get the name of the pset for the pool */
2093 2094 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS)
2094 2095 goto lookup_done;
2095 2096
2096 2097 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS)
2097 2098 goto lookup_done;
2098 2099
2099 2100 if ((res_list = pool_query_pool_resources(conf, pools[0], &num, vals))
2100 2101 == NULL)
2101 2102 goto lookup_done;
2102 2103
2103 2104 if (num != 1)
2104 2105 goto lookup_done;
2105 2106
2106 2107 if (pool_get_property(conf, pool_resource_to_elem(conf,
2107 2108 res_list[0]), "pset.sys_id", vals[0]) != POC_INT ||
2108 2109 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS)
2109 2110 goto lookup_done;
2110 2111
2111 2112 if (int64 == ZS_PSET_DEFAULT)
2112 2113 *cputype = ZS_CPUTYPE_DEFAULT_PSET;
2113 2114
2114 2115 if (pool_get_property(conf, pool_resource_to_elem(conf,
2115 2116 res_list[0]), "pset.name", vals[0]) != POC_STRING ||
2116 2117 pool_value_get_string(vals[0], &string) != PO_SUCCESS)
2117 2118 goto lookup_done;
2118 2119
2119 2120 (void) strlcpy(psetname, (char *)string, sizeof (psetname));
2120 2121
2121 2122 if (strncmp(psetname, "SUNWtmp_", strlen("SUNWtmp_")) == 0)
2122 2123 *cputype = ZS_CPUTYPE_DEDICATED;
2123 2124 if (strncmp(psetname, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0)
2124 2125 *cputype = ZS_CPUTYPE_PSRSET_PSET;
2125 2126 else
2126 2127 *cputype = ZS_CPUTYPE_POOL_PSET;
2127 2128
2128 2129 lookup_done:
2129 2130
2130 2131 if (pv_save != NULL)
2131 2132 vals[1] = pv_save;
2132 2133
2133 2134 if (res_list)
2134 2135 free(res_list);
2135 2136 if (pools)
2136 2137 free(pools);
2137 2138
2138 2139 (void) strlcpy(pool, poolname, poollen);
2139 2140 (void) strlcpy(pset, psetname, psetlen);
2140 2141 }
2141 2142
2142 2143 /* Convert scheduler names to ZS_* scheduler flags */
2143 2144 static uint_t
2144 2145 zsd_schedname2int(char *clname, int pri)
2145 2146 {
2146 2147 uint_t sched = 0;
2147 2148
2148 2149 if (strcmp(clname, "TS") == 0) {
2149 2150 sched = ZS_SCHED_TS;
2150 2151 } else if (strcmp(clname, "IA") == 0) {
2151 2152 sched = ZS_SCHED_IA;
2152 2153 } else if (strcmp(clname, "FX") == 0) {
2153 2154 if (pri > 59) {
2154 2155 sched = ZS_SCHED_FX_60;
2155 2156 } else {
2156 2157 sched = ZS_SCHED_FX;
2157 2158 }
2158 2159 } else if (strcmp(clname, "RT") == 0) {
2159 2160 sched = ZS_SCHED_RT;
2160 2161
2161 2162 } else if (strcmp(clname, "FSS") == 0) {
2162 2163 sched = ZS_SCHED_FSS;
2163 2164 }
2164 2165 return (sched);
2165 2166 }
2166 2167
2167 2168 static uint64_t
2168 2169 zsd_get_zone_rctl_limit(char *name)
2169 2170 {
2170 2171 rctlblk_t *rblk;
2171 2172
2172 2173 rblk = (rctlblk_t *)alloca(rctlblk_size());
2173 2174 if (getrctl(name, NULL, rblk, RCTL_FIRST)
2174 2175 != 0) {
2175 2176 return (ZS_LIMIT_NONE);
2176 2177 }
2177 2178 return (rctlblk_get_value(rblk));
2178 2179 }
2179 2180
2180 2181 static uint64_t
2181 2182 zsd_get_zone_rctl_usage(char *name)
2182 2183 {
|
↓ open down ↓ |
2149 lines elided |
↑ open up ↑ |
2183 2184 rctlblk_t *rblk;
2184 2185
2185 2186 rblk = (rctlblk_t *)alloca(rctlblk_size());
2186 2187 if (getrctl(name, NULL, rblk, RCTL_USAGE)
2187 2188 != 0) {
2188 2189 return (0);
2189 2190 }
2190 2191 return (rctlblk_get_value(rblk));
2191 2192 }
2192 2193
2193 -#define ZSD_NUM_RCTL_VALS 19
2194 +#define ZSD_NUM_RCTL_VALS 20
2194 2195
2195 2196 /*
2196 2197 * Fetch the limit information for a zone. This uses zone_enter() as the
2197 2198 * getrctl(2) system call only returns rctl information for the zone of
2198 2199 * the caller.
2199 2200 */
2200 2201 static int
2201 2202 zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
2202 2203 uint64_t *cpu_cap, uint64_t *ram_cap, uint64_t *locked_cap,
2203 2204 uint64_t *vm_cap, uint64_t *processes_cap, uint64_t *processes,
2204 2205 uint64_t *lwps_cap, uint64_t *lwps, uint64_t *shm_cap, uint64_t *shm,
2205 2206 uint64_t *shmids_cap, uint64_t *shmids, uint64_t *semids_cap,
2206 2207 uint64_t *semids, uint64_t *msgids_cap, uint64_t *msgids,
2207 2208 uint64_t *lofi_cap, uint64_t *lofi, uint_t *sched)
2208 2209 {
2209 2210 int p[2], pid, tmpl_fd, ret;
2210 2211 ctid_t ct;
2211 2212 char class[PC_CLNMSZ];
2212 2213 uint64_t vals[ZSD_NUM_RCTL_VALS];
2213 2214 zsd_system_t *sys = ctl->zsctl_system;
2214 2215 int i = 0;
2215 2216 int res = 0;
2216 2217
2217 2218 /* Treat all caps as no cap on error */
2218 2219 *cpu_shares = ZS_LIMIT_NONE;
2219 2220 *cpu_cap = ZS_LIMIT_NONE;
2220 2221 *ram_cap = ZS_LIMIT_NONE;
2221 2222 *locked_cap = ZS_LIMIT_NONE;
2222 2223 *vm_cap = ZS_LIMIT_NONE;
2223 2224
2224 2225 *processes_cap = ZS_LIMIT_NONE;
2225 2226 *lwps_cap = ZS_LIMIT_NONE;
2226 2227 *shm_cap = ZS_LIMIT_NONE;
2227 2228 *shmids_cap = ZS_LIMIT_NONE;
2228 2229 *semids_cap = ZS_LIMIT_NONE;
2229 2230 *msgids_cap = ZS_LIMIT_NONE;
|
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
2230 2231 *lofi_cap = ZS_LIMIT_NONE;
2231 2232
2232 2233 *processes = 0;
2233 2234 *lwps = 0;
2234 2235 *shm = 0;
2235 2236 *shmids = 0;
2236 2237 *semids = 0;
2237 2238 *msgids = 0;
2238 2239 *lofi = 0;
2239 2240
2240 - /* Get the ram cap first since it is a zone attr */
2241 - ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP,
2242 - ram_cap, sizeof (*ram_cap));
2243 - if (ret < 0 || *ram_cap == 0)
2244 - *ram_cap = ZS_LIMIT_NONE;
2245 -
2246 2241 /* Get the zone's default scheduling class */
2247 2242 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS,
2248 2243 class, sizeof (class));
2249 2244 if (ret < 0)
2250 2245 return (-1);
2251 2246
2252 2247 *sched = zsd_schedname2int(class, 0);
2253 2248
2254 2249 /* rctl caps must be fetched from within the zone */
2255 2250 if (pipe(p) != 0)
2256 2251 return (-1);
2257 2252
2258 2253 if ((tmpl_fd = init_template()) == -1) {
2259 2254 (void) close(p[0]);
2260 2255 (void) close(p[1]);
2261 2256 return (-1);
2262 2257 }
2263 2258 pid = forkx(0);
2264 2259 if (pid < 0) {
2265 2260 (void) ct_tmpl_clear(tmpl_fd);
2266 2261 (void) close(p[0]);
2267 2262 (void) close(p[1]);
2268 2263 return (-1);
2269 2264 }
2270 2265 if (pid == 0) {
2271 2266
2272 2267 (void) ct_tmpl_clear(tmpl_fd);
2273 2268 (void) close(tmpl_fd);
2274 2269 (void) close(p[0]);
2275 2270 if (zone->zsz_id != getzoneid()) {
2276 2271 if (zone_enter(zone->zsz_id) < 0) {
2277 2272 (void) close(p[1]);
2278 2273 _exit(0);
2279 2274 }
2280 2275 }
2281 2276
2282 2277 /* Get caps for zone, and write them to zonestatd parent. */
2283 2278 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-shares");
2284 2279 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-cap");
2285 2280 vals[i++] = zsd_get_zone_rctl_limit("zone.max-locked-memory");
2286 2281 vals[i++] = zsd_get_zone_rctl_limit("zone.max-swap");
2287 2282 vals[i++] = zsd_get_zone_rctl_limit("zone.max-processes");
2288 2283 vals[i++] = zsd_get_zone_rctl_usage("zone.max-processes");
2289 2284 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lwps");
2290 2285 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lwps");
|
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
2291 2286 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-memory");
2292 2287 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-memory");
2293 2288 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-ids");
2294 2289 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-ids");
2295 2290 vals[i++] = zsd_get_zone_rctl_limit("zone.max-sem-ids");
2296 2291 vals[i++] = zsd_get_zone_rctl_usage("zone.max-sem-ids");
2297 2292 vals[i++] = zsd_get_zone_rctl_limit("zone.max-msg-ids");
2298 2293 vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
2299 2294 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi");
2300 2295 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi");
2296 + vals[i++] = zsd_get_zone_rctl_usage("zone.max-physical-memory");
2301 2297
2302 2298 if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2303 2299 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2304 2300 (void) close(p[1]);
2305 2301 _exit(1);
2306 2302 }
2307 2303
2308 2304 (void) close(p[1]);
2309 2305 _exit(0);
2310 2306 }
2311 2307 if (contract_latest(&ct) == -1)
2312 2308 ct = -1;
2313 2309
2314 2310 (void) ct_tmpl_clear(tmpl_fd);
2315 2311 (void) close(tmpl_fd);
2316 2312 (void) close(p[1]);
2317 2313 while (waitpid(pid, NULL, 0) != pid)
2318 2314 ;
2319 2315
2320 2316 /* Read cap from child in zone */
2321 2317 if (read(p[0], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
2322 2318 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
2323 2319 res = -1;
2324 2320 goto cleanup;
2325 2321 }
2326 2322 i = 0;
2327 2323 *cpu_shares = vals[i++];
2328 2324 *cpu_cap = vals[i++];
2329 2325 *locked_cap = vals[i++];
2330 2326 *vm_cap = vals[i++];
2331 2327 *processes_cap = vals[i++];
2332 2328 *processes = vals[i++];
2333 2329 *lwps_cap = vals[i++];
2334 2330 *lwps = vals[i++];
|
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
2335 2331 *shm_cap = vals[i++];
2336 2332 *shm = vals[i++];
2337 2333 *shmids_cap = vals[i++];
2338 2334 *shmids = vals[i++];
2339 2335 *semids_cap = vals[i++];
2340 2336 *semids = vals[i++];
2341 2337 *msgids_cap = vals[i++];
2342 2338 *msgids = vals[i++];
2343 2339 *lofi_cap = vals[i++];
2344 2340 *lofi = vals[i++];
2341 + *ram_cap = vals[i++];
2345 2342
2346 2343 /* Interpret maximum values as no cap */
2347 2344 if (*cpu_cap == UINT32_MAX || *cpu_cap == 0)
2348 2345 *cpu_cap = ZS_LIMIT_NONE;
2349 2346 if (*processes_cap == sys->zss_processes_max)
2350 2347 *processes_cap = ZS_LIMIT_NONE;
2351 2348 if (*lwps_cap == sys->zss_lwps_max)
2352 2349 *lwps_cap = ZS_LIMIT_NONE;
2353 2350 if (*shm_cap == sys->zss_shm_max)
2354 2351 *shm_cap = ZS_LIMIT_NONE;
2355 2352 if (*shmids_cap == sys->zss_shmids_max)
2356 2353 *shmids_cap = ZS_LIMIT_NONE;
2357 2354 if (*semids_cap == sys->zss_semids_max)
2358 2355 *semids_cap = ZS_LIMIT_NONE;
2359 2356 if (*msgids_cap == sys->zss_msgids_max)
2360 2357 *msgids_cap = ZS_LIMIT_NONE;
2361 2358 if (*lofi_cap == sys->zss_lofi_max)
2362 2359 *lofi_cap = ZS_LIMIT_NONE;
2363 2360
2364 2361
2365 2362 cleanup:
2366 2363 (void) close(p[0]);
2367 2364 (void) ct_tmpl_clear(tmpl_fd);
2368 2365 (void) close(tmpl_fd);
2369 2366 (void) contract_abandon_id(ct);
2370 2367
2371 2368 return (res);
2372 2369 }
2373 2370
2374 2371 /* Update the current list of running zones */
2375 2372 static void
2376 2373 zsd_refresh_zones(zsd_ctl_t *ctl)
2377 2374 {
2378 2375 zsd_zone_t *zone;
2379 2376 uint_t old, num;
2380 2377 ushort_t flags;
2381 2378 int i, ret;
2382 2379 zoneid_t *cache;
2383 2380 uint64_t cpu_shares;
2384 2381 uint64_t cpu_cap;
2385 2382 uint64_t ram_cap;
2386 2383 uint64_t locked_cap;
2387 2384 uint64_t vm_cap;
2388 2385 uint64_t processes_cap;
2389 2386 uint64_t processes;
2390 2387 uint64_t lwps_cap;
2391 2388 uint64_t lwps;
2392 2389 uint64_t shm_cap;
2393 2390 uint64_t shm;
2394 2391 uint64_t shmids_cap;
2395 2392 uint64_t shmids;
2396 2393 uint64_t semids_cap;
2397 2394 uint64_t semids;
2398 2395 uint64_t msgids_cap;
2399 2396 uint64_t msgids;
2400 2397 uint64_t lofi_cap;
2401 2398 uint64_t lofi;
2402 2399
2403 2400 char zonename[ZS_ZONENAME_MAX];
2404 2401 char poolname[ZS_POOLNAME_MAX];
2405 2402 char psetname[ZS_PSETNAME_MAX];
2406 2403 uint_t sched;
2407 2404 uint_t cputype;
2408 2405 uint_t iptype;
2409 2406
2410 2407 /* Get the current list of running zones */
2411 2408 for (;;) {
2412 2409 old = num = ctl->zsctl_zone_ncache;
2413 2410 (void) zone_list(ctl->zsctl_zone_cache, &num);
2414 2411 if (num <= old)
2415 2412 break;
2416 2413 if ((cache = (zoneid_t *)realloc(ctl->zsctl_zone_cache,
2417 2414 (num) * sizeof (zoneid_t))) != NULL) {
2418 2415 ctl->zsctl_zone_ncache = num;
2419 2416 ctl->zsctl_zone_cache = cache;
2420 2417 } else {
2421 2418 /* Could not allocate to get new zone list. Give up */
2422 2419 return;
2423 2420 }
2424 2421 }
2425 2422
2426 2423 zsd_mark_zones_start(ctl);
2427 2424
2428 2425 for (i = 0; i < num; i++) {
2429 2426
2430 2427 ret = getzonenamebyid(ctl->zsctl_zone_cache[i],
2431 2428 zonename, sizeof (zonename));
2432 2429 if (ret < 0)
2433 2430 continue;
2434 2431
2435 2432 zone = zsd_lookup_insert_zone(ctl, zonename,
2436 2433 ctl->zsctl_zone_cache[i]);
2437 2434
2438 2435 ret = zone_getattr(ctl->zsctl_zone_cache[i], ZONE_ATTR_FLAGS,
2439 2436 &flags, sizeof (flags));
2440 2437 if (ret < 0)
2441 2438 continue;
2442 2439
2443 2440 if (flags & ZF_NET_EXCL)
2444 2441 iptype = ZS_IPTYPE_EXCLUSIVE;
2445 2442 else
2446 2443 iptype = ZS_IPTYPE_SHARED;
2447 2444
2448 2445 zsd_get_zone_pool_pset(ctl, zone, poolname, sizeof (poolname),
2449 2446 psetname, sizeof (psetname), &cputype);
2450 2447
2451 2448 if (zsd_get_zone_caps(ctl, zone, &cpu_shares, &cpu_cap,
2452 2449 &ram_cap, &locked_cap, &vm_cap, &processes_cap, &processes,
2453 2450 &lwps_cap, &lwps, &shm_cap, &shm, &shmids_cap, &shmids,
2454 2451 &semids_cap, &semids, &msgids_cap, &msgids, &lofi_cap,
2455 2452 &lofi, &sched) != 0)
2456 2453 continue;
2457 2454
2458 2455 zsd_mark_zone_found(ctl, zone, cpu_shares, cpu_cap, ram_cap,
2459 2456 locked_cap, vm_cap, processes_cap, processes, lwps_cap,
2460 2457 lwps, shm_cap, shm, shmids_cap, shmids, semids_cap,
2461 2458 semids, msgids_cap, msgids, lofi_cap, lofi, poolname,
2462 2459 psetname, sched, cputype, iptype);
2463 2460 }
2464 2461 }
2465 2462
2466 2463 /* Fetch the details of a process from its psinfo_t */
2467 2464 static void
2468 2465 zsd_get_proc_info(zsd_ctl_t *ctl, psinfo_t *psinfo, psetid_t *psetid,
2469 2466 psetid_t *prev_psetid, zoneid_t *zoneid, zoneid_t *prev_zoneid,
2470 2467 timestruc_t *delta, uint_t *sched)
2471 2468 {
2472 2469 timestruc_t d;
2473 2470 zsd_proc_t *proc;
2474 2471
2475 2472 /* Get cached data for proc */
2476 2473 proc = &(ctl->zsctl_proc_array[psinfo->pr_pid]);
2477 2474 *psetid = psinfo->pr_lwp.pr_bindpset;
2478 2475
2479 2476 if (proc->zspr_psetid == ZS_PSET_ERROR)
2480 2477 *prev_psetid = *psetid;
2481 2478 else
2482 2479 *prev_psetid = proc->zspr_psetid;
2483 2480
2484 2481 *zoneid = psinfo->pr_zoneid;
2485 2482 if (proc->zspr_zoneid == -1)
2486 2483 *prev_zoneid = *zoneid;
2487 2484 else
2488 2485 *prev_zoneid = proc->zspr_zoneid;
2489 2486
2490 2487 TIMESTRUC_DELTA(d, psinfo->pr_time, proc->zspr_usage);
2491 2488 *delta = d;
2492 2489
2493 2490 *sched = zsd_schedname2int(psinfo->pr_lwp.pr_clname,
2494 2491 psinfo->pr_lwp.pr_pri);
2495 2492
2496 2493 /* Update cached data for proc */
2497 2494 proc->zspr_psetid = psinfo->pr_lwp.pr_bindpset;
2498 2495 proc->zspr_zoneid = psinfo->pr_zoneid;
2499 2496 proc->zspr_sched = *sched;
2500 2497 proc->zspr_usage.tv_sec = psinfo->pr_time.tv_sec;
2501 2498 proc->zspr_usage.tv_nsec = psinfo->pr_time.tv_nsec;
2502 2499 proc->zspr_ppid = psinfo->pr_ppid;
2503 2500 }
2504 2501
2505 2502 /*
2506 2503 * Reset the known cpu usage of a process. This is done after a process
2507 2504 * exits so that if the pid is recycled, data from its previous life is
2508 2505 * not reused
2509 2506 */
2510 2507 static void
2511 2508 zsd_flush_proc_info(zsd_proc_t *proc)
2512 2509 {
2513 2510 proc->zspr_usage.tv_sec = 0;
2514 2511 proc->zspr_usage.tv_nsec = 0;
2515 2512 }
2516 2513
2517 2514 /*
2518 2515 * Open the current extended accounting file. On initialization, open the
2519 2516 * file as the current file to be used. Otherwise, open the file as the
2520 2517 * next file to use of the current file reaches EOF.
2521 2518 */
2522 2519 static int
2523 2520 zsd_open_exacct(zsd_ctl_t *ctl, boolean_t init)
2524 2521 {
2525 2522 int ret, oret, state, trys = 0, flags;
2526 2523 int *fd, *open;
2527 2524 ea_file_t *eaf;
2528 2525 struct stat64 *stat;
2529 2526 char path[MAXPATHLEN];
2530 2527
2531 2528 /*
2532 2529 * The accounting file is first opened at the tail. Following
2533 2530 * opens to new accounting files are opened at the head.
2534 2531 */
2535 2532 if (init == B_TRUE) {
2536 2533 flags = EO_NO_VALID_HDR | EO_TAIL;
2537 2534 fd = &ctl->zsctl_proc_fd;
2538 2535 eaf = &ctl->zsctl_proc_eaf;
2539 2536 stat = &ctl->zsctl_proc_stat;
2540 2537 open = &ctl->zsctl_proc_open;
2541 2538 } else {
2542 2539 flags = EO_NO_VALID_HDR | EO_HEAD;
2543 2540 fd = &ctl->zsctl_proc_fd_next;
2544 2541 eaf = &ctl->zsctl_proc_eaf_next;
2545 2542 stat = &ctl->zsctl_proc_stat_next;
2546 2543 open = &ctl->zsctl_proc_open_next;
2547 2544 }
2548 2545
2549 2546 *fd = -1;
2550 2547 *open = 0;
2551 2548 retry:
2552 2549 /* open accounting files for cpu consumption */
2553 2550 ret = acctctl(AC_STATE_GET | AC_PROC, &state, sizeof (state));
2554 2551 if (ret != 0) {
2555 2552 zsd_warn(gettext("Unable to get process accounting state"));
2556 2553 goto err;
2557 2554 }
2558 2555 if (state != AC_ON) {
2559 2556 if (trys > 0) {
2560 2557 zsd_warn(gettext(
2561 2558 "Unable to enable process accounting"));
2562 2559 goto err;
2563 2560 }
2564 2561 (void) zsd_enable_cpu_stats();
2565 2562 trys++;
2566 2563 goto retry;
2567 2564 }
2568 2565
2569 2566 ret = acctctl(AC_FILE_GET | AC_PROC, path, sizeof (path));
2570 2567 if (ret != 0) {
2571 2568 zsd_warn(gettext("Unable to get process accounting file"));
2572 2569 goto err;
2573 2570 }
2574 2571
2575 2572 if ((*fd = open64(path, O_RDONLY, 0)) >= 0 &&
2576 2573 (oret = ea_fdopen(eaf, *fd, NULL, flags, O_RDONLY)) == 0)
2577 2574 ret = fstat64(*fd, stat);
2578 2575
2579 2576 if (*fd < 0 || oret < 0 || ret < 0) {
2580 2577 struct timespec ts;
2581 2578
2582 2579 /*
2583 2580 * It is possible the accounting file is momentarily unavailable
2584 2581 * because it is being rolled. Try for up to half a second.
2585 2582 *
2586 2583 * If failure to open accounting file persists, give up.
2587 2584 */
2588 2585 if (oret == 0)
2589 2586 (void) ea_close(eaf);
2590 2587 else if (*fd >= 0)
2591 2588 (void) close(*fd);
2592 2589 if (trys > 500) {
2593 2590 zsd_warn(gettext(
2594 2591 "Unable to open process accounting file"));
2595 2592 goto err;
2596 2593 }
2597 2594 /* wait one millisecond */
2598 2595 ts.tv_sec = 0;
2599 2596 ts.tv_nsec = NANOSEC / 1000;
2600 2597 (void) nanosleep(&ts, NULL);
2601 2598 goto retry;
2602 2599 }
2603 2600 *open = 1;
2604 2601 return (0);
2605 2602 err:
2606 2603 if (*fd >= 0)
2607 2604 (void) close(*fd);
2608 2605 *open = 0;
2609 2606 *fd = -1;
2610 2607 return (-1);
2611 2608 }
2612 2609
2613 2610 /*
2614 2611 * Walk /proc and charge each process to its zone and processor set.
2615 2612 * Then read exacct data for exited processes, and charge them as well.
2616 2613 */
2617 2614 static void
2618 2615 zsd_refresh_procs(zsd_ctl_t *ctl, boolean_t init)
2619 2616 {
2620 2617 DIR *dir;
2621 2618 struct dirent *dent;
2622 2619 psinfo_t psinfo;
2623 2620 int fd, ret;
2624 2621 zsd_proc_t *proc, *pproc, *tmp, *next;
2625 2622 list_t pplist, plist;
2626 2623 zsd_zone_t *zone, *prev_zone;
2627 2624 zsd_pset_t *pset, *prev_pset;
2628 2625 psetid_t psetid, prev_psetid;
2629 2626 zoneid_t zoneid, prev_zoneid;
2630 2627 zsd_pset_usage_t *usage, *prev_usage;
2631 2628 char path[MAXPATHLEN];
2632 2629
2633 2630 ea_object_t object;
2634 2631 ea_object_t pobject;
2635 2632 boolean_t hrtime_expired = B_FALSE;
2636 2633 struct timeval interval_end;
2637 2634
2638 2635 timestruc_t delta, d1, d2;
2639 2636 uint_t sched = 0;
2640 2637
2641 2638 /*
2642 2639 * Get the current accounting file. The current accounting file
2643 2640 * may be different than the file in use, as the accounting file
2644 2641 * may have been rolled, or manually changed by an admin.
2645 2642 */
2646 2643 ret = zsd_open_exacct(ctl, init);
2647 2644 if (ret != 0) {
2648 2645 zsd_warn(gettext("Unable to track process accounting"));
2649 2646 return;
2650 2647 }
2651 2648
2652 2649 /*
2653 2650 * Mark the current time as the interval end time. Don't track
2654 2651 * processes that exit after this time.
2655 2652 */
2656 2653 (void) gettimeofday(&interval_end, NULL);
2657 2654
2658 2655 dir = opendir("/proc");
2659 2656 if (dir == NULL) {
2660 2657 zsd_warn(gettext("Unable to open /proc"));
2661 2658 return;
2662 2659 }
2663 2660
2664 2661 dent = ctl->zsctl_procfs_dent;
2665 2662
2666 2663 (void) memset(dent, 0, ctl->zsctl_procfs_dent_size);
2667 2664
2668 2665 /* Walk all processes and compute each zone's usage on each pset. */
2669 2666 while (readdir_r(dir, dent) != 0) {
2670 2667
2671 2668 if (strcmp(dent->d_name, ".") == 0 ||
2672 2669 strcmp(dent->d_name, "..") == 0)
2673 2670 continue;
2674 2671
2675 2672 (void) snprintf(path, sizeof (path), "/proc/%s/psinfo",
2676 2673 dent->d_name);
2677 2674
2678 2675 fd = open(path, O_RDONLY);
2679 2676 if (fd < 0)
2680 2677 continue;
2681 2678
2682 2679 if (read(fd, &psinfo, sizeof (psinfo)) != sizeof (psinfo)) {
2683 2680 (void) close(fd);
2684 2681 continue;
2685 2682 }
2686 2683 (void) close(fd);
2687 2684
2688 2685 zsd_get_proc_info(ctl, &psinfo, &psetid, &prev_psetid,
2689 2686 &zoneid, &prev_zoneid, &delta, &sched);
2690 2687
2691 2688 d1.tv_sec = delta.tv_sec / 2;
2692 2689 d1.tv_nsec = delta.tv_nsec / 2;
2693 2690 d2.tv_sec = (delta.tv_sec / 2) + (delta.tv_sec % 2);
2694 2691 d2.tv_nsec = (delta.tv_nsec / 2) + (delta.tv_nsec % 2);
2695 2692
2696 2693 /* Get the zone and pset this process is running in */
2697 2694 zone = zsd_lookup_zone_byid(ctl, zoneid);
2698 2695 if (zone == NULL)
2699 2696 continue;
2700 2697 pset = zsd_lookup_pset_byid(ctl, psetid);
2701 2698 if (pset == NULL)
2702 2699 continue;
2703 2700 usage = zsd_lookup_insert_usage(ctl, pset, zone);
2704 2701 if (usage == NULL)
2705 2702 continue;
2706 2703
2707 2704 /*
2708 2705 * Get the usage of the previous zone and pset if they were
2709 2706 * different.
2710 2707 */
2711 2708 if (zoneid != prev_zoneid)
2712 2709 prev_zone = zsd_lookup_zone_byid(ctl, prev_zoneid);
2713 2710 else
2714 2711 prev_zone = NULL;
2715 2712
2716 2713 if (psetid != prev_psetid)
2717 2714 prev_pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2718 2715 else
2719 2716 prev_pset = NULL;
2720 2717
2721 2718 prev_usage = NULL;
2722 2719 if (prev_zone != NULL || prev_pset != NULL) {
2723 2720 if (prev_zone == NULL)
2724 2721 prev_zone = zone;
2725 2722 if (prev_pset == NULL)
2726 2723 prev_pset = pset;
2727 2724
2728 2725 prev_usage = zsd_lookup_insert_usage(ctl, prev_pset,
2729 2726 prev_zone);
2730 2727 }
2731 2728
2732 2729 /* Update the usage with the processes info */
2733 2730 if (prev_usage == NULL) {
2734 2731 zsd_mark_pset_usage_found(usage, sched);
2735 2732 } else {
2736 2733 zsd_mark_pset_usage_found(usage, sched);
2737 2734 zsd_mark_pset_usage_found(prev_usage, sched);
2738 2735 }
2739 2736
2740 2737 /*
2741 2738 * First time around is just to get a starting point. All
2742 2739 * usages will be zero.
2743 2740 */
2744 2741 if (init == B_TRUE)
2745 2742 continue;
2746 2743
2747 2744 if (prev_usage == NULL) {
2748 2745 zsd_add_usage(ctl, usage, &delta);
2749 2746 } else {
2750 2747 zsd_add_usage(ctl, usage, &d1);
2751 2748 zsd_add_usage(ctl, prev_usage, &d2);
2752 2749 }
2753 2750 }
2754 2751 (void) closedir(dir);
2755 2752
2756 2753 /*
2757 2754 * No need to collect exited proc data on initialization. Just
2758 2755 * caching the usage of the known processes to get a zero starting
2759 2756 * point.
2760 2757 */
2761 2758 if (init == B_TRUE)
2762 2759 return;
2763 2760
2764 2761 /*
2765 2762 * Add accounting records to account for processes which have
2766 2763 * exited.
2767 2764 */
2768 2765 list_create(&plist, sizeof (zsd_proc_t),
2769 2766 offsetof(zsd_proc_t, zspr_next));
2770 2767 list_create(&pplist, sizeof (zsd_proc_t),
2771 2768 offsetof(zsd_proc_t, zspr_next));
2772 2769
2773 2770 for (;;) {
2774 2771 pid_t pid;
2775 2772 pid_t ppid;
2776 2773 timestruc_t user, sys, proc_usage;
2777 2774 timestruc_t finish;
2778 2775 int numfound = 0;
2779 2776
2780 2777 bzero(&object, sizeof (object));
2781 2778 proc = NULL;
2782 2779 zone = NULL;
2783 2780 pset = NULL;
2784 2781 usage = NULL;
2785 2782 ret = ea_get_object(&ctl->zsctl_proc_eaf, &object);
2786 2783 if (ret == EO_ERROR) {
2787 2784 if (ea_error() == EXR_EOF) {
2788 2785
2789 2786 struct stat64 *stat;
2790 2787 struct stat64 *stat_next;
2791 2788
2792 2789 /*
2793 2790 * See if the next accounting file is the
2794 2791 * same as the current accounting file.
2795 2792 */
2796 2793 stat = &(ctl->zsctl_proc_stat);
2797 2794 stat_next = &(ctl->zsctl_proc_stat_next);
2798 2795 if (stat->st_ino == stat_next->st_ino &&
2799 2796 stat->st_dev == stat_next->st_dev) {
2800 2797 /*
2801 2798 * End of current accounting file is
2802 2799 * reached, so finished. Clear EOF
2803 2800 * bit for next time around.
2804 2801 */
2805 2802 ea_clear(&ctl->zsctl_proc_eaf);
2806 2803 break;
2807 2804 } else {
2808 2805 /*
2809 2806 * Accounting file has changed. Move
2810 2807 * to current accounting file.
2811 2808 */
2812 2809 (void) ea_close(&ctl->zsctl_proc_eaf);
2813 2810
2814 2811 ctl->zsctl_proc_fd =
2815 2812 ctl->zsctl_proc_fd_next;
2816 2813 ctl->zsctl_proc_eaf =
2817 2814 ctl->zsctl_proc_eaf_next;
2818 2815 ctl->zsctl_proc_stat =
2819 2816 ctl->zsctl_proc_stat_next;
2820 2817
2821 2818 ctl->zsctl_proc_fd_next = -1;
2822 2819 ctl->zsctl_proc_open_next = 0;
2823 2820 continue;
2824 2821 }
2825 2822 } else {
2826 2823 /*
2827 2824 * Other accounting error. Give up on
2828 2825 * accounting.
2829 2826 */
2830 2827 goto ea_err;
2831 2828 }
2832 2829 }
2833 2830 /* Skip if not a process group */
2834 2831 if ((object.eo_catalog & EXT_TYPE_MASK) != EXT_GROUP ||
2835 2832 (object.eo_catalog & EXD_DATA_MASK) != EXD_GROUP_PROC) {
2836 2833 (void) ea_free_item(&object, EUP_ALLOC);
2837 2834 continue;
2838 2835 }
2839 2836
2840 2837 /* The process group entry should be complete */
2841 2838 while (numfound < 9) {
2842 2839 bzero(&pobject, sizeof (pobject));
2843 2840 ret = ea_get_object(&ctl->zsctl_proc_eaf,
2844 2841 &pobject);
2845 2842 if (ret < 0) {
2846 2843 (void) ea_free_item(&object, EUP_ALLOC);
2847 2844 zsd_warn(
2848 2845 "unable to get process accounting data");
2849 2846 goto ea_err;
2850 2847 }
2851 2848 /* Next entries should be process data */
2852 2849 if ((pobject.eo_catalog & EXT_TYPE_MASK) ==
2853 2850 EXT_GROUP) {
2854 2851 (void) ea_free_item(&object, EUP_ALLOC);
2855 2852 (void) ea_free_item(&pobject, EUP_ALLOC);
2856 2853 zsd_warn(
2857 2854 "process data of wrong type");
2858 2855 goto ea_err;
2859 2856 }
2860 2857 switch (pobject.eo_catalog & EXD_DATA_MASK) {
2861 2858 case EXD_PROC_PID:
2862 2859 pid = pobject.eo_item.ei_uint32;
2863 2860 proc = &(ctl->zsctl_proc_array[pid]);
2864 2861 /*
2865 2862 * This process should not be currently in
2866 2863 * the list of processes to process.
2867 2864 */
2868 2865 assert(!list_link_active(&proc->zspr_next));
2869 2866 numfound++;
2870 2867 break;
2871 2868 case EXD_PROC_ANCPID:
2872 2869 ppid = pobject.eo_item.ei_uint32;
2873 2870 pproc = &(ctl->zsctl_proc_array[ppid]);
2874 2871 numfound++;
2875 2872 break;
2876 2873 case EXD_PROC_ZONENAME:
2877 2874 zone = zsd_lookup_zone(ctl,
2878 2875 pobject.eo_item.ei_string, -1);
2879 2876 numfound++;
2880 2877 break;
2881 2878 case EXD_PROC_CPU_USER_SEC:
2882 2879 user.tv_sec =
2883 2880 pobject.eo_item.ei_uint64;
2884 2881 numfound++;
2885 2882 break;
2886 2883 case EXD_PROC_CPU_USER_NSEC:
2887 2884 user.tv_nsec =
2888 2885 pobject.eo_item.ei_uint64;
2889 2886 numfound++;
2890 2887 break;
2891 2888 case EXD_PROC_CPU_SYS_SEC:
2892 2889 sys.tv_sec =
2893 2890 pobject.eo_item.ei_uint64;
2894 2891 numfound++;
2895 2892 break;
2896 2893 case EXD_PROC_CPU_SYS_NSEC:
2897 2894 sys.tv_nsec =
2898 2895 pobject.eo_item.ei_uint64;
2899 2896 numfound++;
2900 2897 break;
2901 2898 case EXD_PROC_FINISH_SEC:
2902 2899 finish.tv_sec =
2903 2900 pobject.eo_item.ei_uint64;
2904 2901 numfound++;
2905 2902 break;
2906 2903 case EXD_PROC_FINISH_NSEC:
2907 2904 finish.tv_nsec =
2908 2905 pobject.eo_item.ei_uint64;
2909 2906 numfound++;
2910 2907 break;
2911 2908 }
2912 2909 (void) ea_free_item(&pobject, EUP_ALLOC);
2913 2910 }
2914 2911 (void) ea_free_item(&object, EUP_ALLOC);
2915 2912 if (numfound != 9) {
2916 2913 zsd_warn(gettext(
2917 2914 "Malformed process accounting entry found"));
2918 2915 goto proc_done;
2919 2916 }
2920 2917
2921 2918 if (finish.tv_sec > interval_end.tv_sec ||
2922 2919 (finish.tv_sec == interval_end.tv_sec &&
2923 2920 finish.tv_nsec > (interval_end.tv_usec * 1000)))
2924 2921 hrtime_expired = B_TRUE;
2925 2922
2926 2923 /*
2927 2924 * Try to identify the zone and pset to which this
2928 2925 * exited process belongs.
2929 2926 */
2930 2927 if (zone == NULL)
2931 2928 goto proc_done;
2932 2929
2933 2930 /* Save proc info */
2934 2931 proc->zspr_ppid = ppid;
2935 2932 proc->zspr_zoneid = zone->zsz_id;
2936 2933
2937 2934 prev_psetid = ZS_PSET_ERROR;
2938 2935 sched = 0;
2939 2936
2940 2937 /*
2941 2938 * The following tries to deduce the processes pset.
2942 2939 *
2943 2940 * First choose pset and sched using cached value from the
2944 2941 * most recent time the process has been seen.
2945 2942 *
2946 2943 * pset and sched can change across zone_enter, so make sure
2947 2944 * most recent sighting of this process was in the same
2948 2945 * zone before using most recent known value.
2949 2946 *
2950 2947 * If there is no known value, use value of processes
2951 2948 * parent. If parent is unknown, walk parents until a known
2952 2949 * parent is found.
2953 2950 *
2954 2951 * If no parent in the zone is found, use the zone's default
2955 2952 * pset and scheduling class.
2956 2953 */
2957 2954 if (proc->zspr_psetid != ZS_PSET_ERROR) {
2958 2955 prev_psetid = proc->zspr_psetid;
2959 2956 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2960 2957 sched = proc->zspr_sched;
2961 2958 } else if (pproc->zspr_zoneid == zone->zsz_id &&
2962 2959 pproc->zspr_psetid != ZS_PSET_ERROR) {
2963 2960 prev_psetid = pproc->zspr_psetid;
2964 2961 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
2965 2962 sched = pproc->zspr_sched;
2966 2963 }
2967 2964
2968 2965 if (pset == NULL) {
2969 2966 /*
2970 2967 * Process or processes parent has never been seen.
2971 2968 * Save to deduce a known parent later.
2972 2969 */
2973 2970 proc_usage = sys;
2974 2971 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2975 2972 TIMESTRUC_DELTA(delta, proc_usage,
2976 2973 proc->zspr_usage);
2977 2974 proc->zspr_usage = delta;
2978 2975 list_insert_tail(&plist, proc);
2979 2976 continue;
2980 2977 }
2981 2978
2982 2979 /* Add the zone's usage to the pset */
2983 2980 usage = zsd_lookup_insert_usage(ctl, pset, zone);
2984 2981 if (usage == NULL)
2985 2982 goto proc_done;
2986 2983
2987 2984 zsd_mark_pset_usage_found(usage, sched);
2988 2985
2989 2986 /* compute the usage to add for the exited proc */
2990 2987 proc_usage = sys;
2991 2988 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user);
2992 2989 TIMESTRUC_DELTA(delta, proc_usage,
2993 2990 proc->zspr_usage);
2994 2991
2995 2992 zsd_add_usage(ctl, usage, &delta);
2996 2993 proc_done:
2997 2994 zsd_flush_proc_info(proc);
2998 2995
2999 2996 if (hrtime_expired == B_TRUE)
3000 2997 break;
3001 2998 }
3002 2999 /*
3003 3000 * close next accounting file.
3004 3001 */
3005 3002 if (ctl->zsctl_proc_open_next) {
3006 3003 (void) ea_close(
3007 3004 &ctl->zsctl_proc_eaf_next);
3008 3005 ctl->zsctl_proc_open_next = 0;
3009 3006 ctl->zsctl_proc_fd_next = -1;
3010 3007 }
3011 3008
3012 3009 /* For the remaining processes, use pset and sched of a known parent */
3013 3010 proc = list_head(&plist);
3014 3011 while (proc != NULL) {
3015 3012 next = proc;
3016 3013 for (;;) {
3017 3014 if (next->zspr_ppid == 0 || next->zspr_ppid == -1) {
3018 3015 /*
3019 3016 * Kernel process, or parent is unknown, skip
3020 3017 * process, remove from process list.
3021 3018 */
3022 3019 tmp = proc;
3023 3020 proc = list_next(&plist, proc);
3024 3021 list_link_init(&tmp->zspr_next);
3025 3022 break;
3026 3023 }
3027 3024 pproc = &(ctl->zsctl_proc_array[next->zspr_ppid]);
3028 3025 if (pproc->zspr_zoneid != proc->zspr_zoneid) {
3029 3026 /*
3030 3027 * Parent in different zone. Save process and
3031 3028 * use zone's default pset and sched below
3032 3029 */
3033 3030 tmp = proc;
3034 3031 proc = list_next(&plist, proc);
3035 3032 list_remove(&plist, tmp);
3036 3033 list_insert_tail(&pplist, tmp);
3037 3034 break;
3038 3035 }
3039 3036 /* Parent has unknown pset, Search parent's parent */
3040 3037 if (pproc->zspr_psetid == ZS_PSET_ERROR) {
3041 3038 next = pproc;
3042 3039 continue;
3043 3040 }
3044 3041 /* Found parent with known pset. Use its info */
3045 3042 proc->zspr_psetid = pproc->zspr_psetid;
3046 3043 proc->zspr_sched = pproc->zspr_sched;
3047 3044 next->zspr_psetid = pproc->zspr_psetid;
3048 3045 next->zspr_sched = pproc->zspr_sched;
3049 3046 zone = zsd_lookup_zone_byid(ctl,
3050 3047 proc->zspr_zoneid);
3051 3048 if (zone == NULL) {
3052 3049 tmp = proc;
3053 3050 proc = list_next(&plist, proc);
3054 3051 list_remove(&plist, tmp);
3055 3052 list_link_init(&tmp->zspr_next);
3056 3053 break;
3057 3054 }
3058 3055 pset = zsd_lookup_pset_byid(ctl,
3059 3056 proc->zspr_psetid);
3060 3057 if (pset == NULL) {
3061 3058 tmp = proc;
3062 3059 proc = list_next(&plist, proc);
3063 3060 list_remove(&plist, tmp);
3064 3061 list_link_init(&tmp->zspr_next);
3065 3062 break;
3066 3063 }
3067 3064 /* Add the zone's usage to the pset */
3068 3065 usage = zsd_lookup_insert_usage(ctl, pset, zone);
3069 3066 if (usage == NULL) {
3070 3067 tmp = proc;
3071 3068 proc = list_next(&plist, proc);
3072 3069 list_remove(&plist, tmp);
3073 3070 list_link_init(&tmp->zspr_next);
3074 3071 break;
3075 3072 }
3076 3073 zsd_mark_pset_usage_found(usage, proc->zspr_sched);
3077 3074 zsd_add_usage(ctl, usage, &proc->zspr_usage);
3078 3075 zsd_flush_proc_info(proc);
3079 3076 tmp = proc;
3080 3077 proc = list_next(&plist, proc);
3081 3078 list_remove(&plist, tmp);
3082 3079 list_link_init(&tmp->zspr_next);
3083 3080 break;
3084 3081 }
3085 3082 }
3086 3083 /*
3087 3084 * Process has never been seen. Using zone info to
3088 3085 * determine pset and scheduling class.
3089 3086 */
3090 3087 proc = list_head(&pplist);
3091 3088 while (proc != NULL) {
3092 3089
3093 3090 zone = zsd_lookup_zone_byid(ctl, proc->zspr_zoneid);
3094 3091 if (zone == NULL)
3095 3092 goto next;
3096 3093 if (zone->zsz_psetid != ZS_PSET_ERROR &&
3097 3094 zone->zsz_psetid != ZS_PSET_MULTI) {
3098 3095 prev_psetid = zone->zsz_psetid;
3099 3096 pset = zsd_lookup_pset_byid(ctl, prev_psetid);
3100 3097 } else {
3101 3098 pset = zsd_lookup_pset(ctl, zone->zsz_pset, -1);
3102 3099 if (pset != NULL)
3103 3100 prev_psetid = pset->zsp_id;
3104 3101 }
3105 3102 if (pset == NULL)
3106 3103 goto next;
3107 3104
3108 3105 sched = zone->zsz_scheds;
3109 3106 /*
3110 3107 * Ignore FX high scheduling class if it is not the
3111 3108 * only scheduling class in the zone.
3112 3109 */
3113 3110 if (sched != ZS_SCHED_FX_60)
3114 3111 sched &= (~ZS_SCHED_FX_60);
3115 3112 /*
3116 3113 * If more than one scheduling class has been found
3117 3114 * in the zone, use zone's default scheduling class for
3118 3115 * this process.
3119 3116 */
3120 3117 if ((sched & (sched - 1)) != 0)
3121 3118 sched = zone->zsz_default_sched;
3122 3119
3123 3120 /* Add the zone's usage to the pset */
3124 3121 usage = zsd_lookup_insert_usage(ctl, pset, zone);
3125 3122 if (usage == NULL)
3126 3123 goto next;
3127 3124
3128 3125 zsd_mark_pset_usage_found(usage, sched);
3129 3126 zsd_add_usage(ctl, usage, &proc->zspr_usage);
3130 3127 next:
3131 3128 tmp = proc;
3132 3129 proc = list_next(&pplist, proc);
3133 3130 zsd_flush_proc_info(tmp);
3134 3131 list_link_init(&tmp->zspr_next);
3135 3132 }
3136 3133 return;
3137 3134 ea_err:
3138 3135 /*
3139 3136 * Close the next accounting file if we have not transitioned to it
3140 3137 * yet.
3141 3138 */
3142 3139 if (ctl->zsctl_proc_open_next) {
3143 3140 (void) ea_close(&ctl->zsctl_proc_eaf_next);
3144 3141 ctl->zsctl_proc_open_next = 0;
3145 3142 ctl->zsctl_proc_fd_next = -1;
3146 3143 }
3147 3144 }
3148 3145
3149 3146 /*
3150 3147 * getvmusage(2) uses size_t's in the passwd data structure, which differ
3151 3148 * in size for 32bit and 64 bit kernels. Since this is a contracted interface,
3152 3149 * and zonestatd does not necessarily match the kernel's bitness, marshal
3153 3150 * results appropriately.
3154 3151 */
3155 3152 static int
3156 3153 zsd_getvmusage(zsd_ctl_t *ctl, uint_t flags, time_t age, zsd_vmusage64_t *buf,
3157 3154 uint64_t *nres)
3158 3155 {
3159 3156 zsd_vmusage32_t *vmu32;
3160 3157 zsd_vmusage64_t *vmu64;
3161 3158 uint32_t nres32;
3162 3159 int i;
3163 3160 int ret;
3164 3161
3165 3162 if (ctl->zsctl_kern_bits == 32) {
3166 3163 nres32 = *nres;
3167 3164 ret = syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3168 3165 flags, age, (uintptr_t)buf, (uintptr_t)&nres32);
3169 3166 *nres = nres32;
3170 3167 if (ret == 0 && buf != NULL) {
3171 3168 /*
3172 3169 * An array of vmusage32_t's has been returned.
3173 3170 * Convert it to an array of vmusage64_t's.
3174 3171 */
3175 3172 vmu32 = (zsd_vmusage32_t *)buf;
3176 3173 vmu64 = (zsd_vmusage64_t *)buf;
3177 3174 for (i = nres32 - 1; i >= 0; i--) {
3178 3175
3179 3176 vmu64[i].vmu_zoneid = vmu32[i].vmu_zoneid;
3180 3177 vmu64[i].vmu_type = vmu32[i].vmu_type;
3181 3178 vmu64[i].vmu_type = vmu32[i].vmu_type;
3182 3179 vmu64[i].vmu_rss_all = vmu32[i].vmu_rss_all;
3183 3180 vmu64[i].vmu_rss_private =
3184 3181 vmu32[i].vmu_rss_private;
3185 3182 vmu64[i].vmu_rss_shared =
3186 3183 vmu32[i].vmu_rss_shared;
3187 3184 vmu64[i].vmu_swap_all = vmu32[i].vmu_swap_all;
3188 3185 vmu64[i].vmu_swap_private =
3189 3186 vmu32[i].vmu_swap_private;
3190 3187 vmu64[i].vmu_swap_shared =
3191 3188 vmu32[i].vmu_swap_shared;
3192 3189 }
3193 3190 }
3194 3191 return (ret);
3195 3192 } else {
3196 3193 /*
3197 3194 * kernel is 64 bit, so use 64 bit structures as zonestat
3198 3195 * expects.
3199 3196 */
3200 3197 return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE,
3201 3198 flags, age, (uintptr_t)buf, (uintptr_t)nres));
3202 3199
3203 3200 }
3204 3201 }
3205 3202
3206 3203 /*
3207 3204 * Update the current physical, virtual, and locked memory usage of the
3208 3205 * running zones.
3209 3206 */
3210 3207 static void
3211 3208 zsd_refresh_memory(zsd_ctl_t *ctl, boolean_t init)
3212 3209 {
3213 3210
3214 3211 uint64_t phys_total;
3215 3212 uint64_t phys_used;
3216 3213 uint64_t phys_zones;
3217 3214 uint64_t phys_zones_overcount;
3218 3215 uint64_t phys_zones_extra;
3219 3216 uint64_t phys_zones_credit;
3220 3217
3221 3218 uint64_t vm_free;
3222 3219 uint64_t vm_used;
3223 3220
3224 3221 uint64_t disk_swap_total;
3225 3222 uint64_t disk_swap_used; /* disk swap with contents */
3226 3223
3227 3224 uint64_t physmem;
3228 3225 uint64_t pp_kernel;
3229 3226 uint64_t arc_size = 0;
3230 3227 struct anoninfo ani;
3231 3228
3232 3229 int num_swap_devices;
3233 3230 struct swaptable *swt;
3234 3231 struct swapent *swent;
3235 3232 size_t swt_size;
3236 3233 char *path;
3237 3234
3238 3235 zsd_vmusage64_t *vmusage;
3239 3236 uint64_t num_vmusage;
3240 3237
3241 3238 int i, ret;
3242 3239
3243 3240 zsd_system_t *sys;
3244 3241 zsd_zone_t *zone;
3245 3242 int vmu_nzones;
3246 3243
3247 3244 kstat_t *kstat;
3248 3245 char kstat_name[KSTAT_STRLEN];
3249 3246 kstat_named_t *knp;
3250 3247 kid_t kid;
3251 3248
3252 3249 if (init)
3253 3250 return;
3254 3251
3255 3252 sys = ctl->zsctl_system;
3256 3253
3257 3254 /* interrogate swap devices to find the amount of disk swap */
3258 3255 disk_swap_again:
3259 3256 num_swap_devices = swapctl(SC_GETNSWP, NULL);
3260 3257
3261 3258 if (num_swap_devices == 0) {
3262 3259 sys->zss_swap_total = disk_swap_total = 0;
3263 3260 sys->zss_swap_used = disk_swap_used = 0;
3264 3261 /* No disk swap */
3265 3262 goto disk_swap_done;
3266 3263 }
3267 3264 /* see if swap table needs to be larger */
3268 3265 if (num_swap_devices > ctl->zsctl_swap_cache_num) {
3269 3266 swt_size = sizeof (int) +
3270 3267 (num_swap_devices * sizeof (struct swapent)) +
3271 3268 (num_swap_devices * MAXPATHLEN);
3272 3269 if (ctl->zsctl_swap_cache != NULL)
3273 3270 free(ctl->zsctl_swap_cache);
3274 3271
3275 3272 swt = (struct swaptable *)malloc(swt_size);
3276 3273 if (swt == NULL) {
3277 3274 /*
3278 3275 * Could not allocate to get list of swap devices.
3279 3276 * Just use data from the most recent read, which will
3280 3277 * be zero if this is the first read.
3281 3278 */
3282 3279 zsd_warn(gettext("Unable to allocate to determine "
3283 3280 "virtual memory"));
3284 3281 disk_swap_total = sys->zss_swap_total;
3285 3282 disk_swap_used = sys->zss_swap_used;
3286 3283 goto disk_swap_done;
3287 3284 }
3288 3285 swent = swt->swt_ent;
3289 3286 path = (char *)swt + (sizeof (int) +
3290 3287 num_swap_devices * sizeof (swapent_t));
3291 3288 for (i = 0; i < num_swap_devices; i++, swent++) {
3292 3289 swent->ste_path = path;
3293 3290 path += MAXPATHLEN;
3294 3291 }
3295 3292 swt->swt_n = num_swap_devices;
3296 3293 ctl->zsctl_swap_cache = swt;
3297 3294 ctl->zsctl_swap_cache_size = swt_size;
3298 3295 ctl->zsctl_swap_cache_num = num_swap_devices;
3299 3296 }
3300 3297 num_swap_devices = swapctl(SC_LIST, ctl->zsctl_swap_cache);
3301 3298 if (num_swap_devices < 0) {
3302 3299 /* More swap devices have arrived */
3303 3300 if (errno == ENOMEM)
3304 3301 goto disk_swap_again;
3305 3302
3306 3303 zsd_warn(gettext("Unable to determine disk swap devices"));
3307 3304 /* Unexpected error. Use existing data */
3308 3305 disk_swap_total = sys->zss_swap_total;
3309 3306 disk_swap_used = sys->zss_swap_used;
3310 3307 goto disk_swap_done;
3311 3308 }
3312 3309
3313 3310 /* add up the disk swap */
3314 3311 disk_swap_total = 0;
3315 3312 disk_swap_used = 0;
3316 3313 swent = ctl->zsctl_swap_cache->swt_ent;
3317 3314 for (i = 0; i < num_swap_devices; i++, swent++) {
3318 3315 disk_swap_total += swent->ste_pages;
3319 3316 disk_swap_used += (swent->ste_pages - swent->ste_free);
3320 3317 }
3321 3318 disk_swap_total *= ctl->zsctl_pagesize;
3322 3319 disk_swap_used *= ctl->zsctl_pagesize;
3323 3320
3324 3321 sys->zss_swap_total = disk_swap_total;
3325 3322 sys->zss_swap_used = disk_swap_used;
3326 3323
3327 3324 disk_swap_done:
3328 3325
3329 3326 /* get system pages kstat */
3330 3327 kid = -1;
3331 3328 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "unix", 0, "system_pages");
3332 3329 if (kstat == NULL)
3333 3330 zsd_warn(gettext("Unable to lookup system pages kstat"));
3334 3331 else
3335 3332 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3336 3333
3337 3334 if (kid == -1) {
3338 3335 zsd_warn(gettext("Unable to read system pages kstat"));
3339 3336 return;
3340 3337 } else {
3341 3338 knp = kstat_data_lookup(kstat, "physmem");
3342 3339 if (knp == NULL) {
3343 3340 zsd_warn(gettext("Unable to read physmem"));
3344 3341 } else {
3345 3342 if (knp->data_type == KSTAT_DATA_UINT64)
3346 3343 physmem = knp->value.ui64;
3347 3344 else if (knp->data_type == KSTAT_DATA_UINT32)
3348 3345 physmem = knp->value.ui32;
3349 3346 else
3350 3347 return;
3351 3348 }
3352 3349 knp = kstat_data_lookup(kstat, "pp_kernel");
3353 3350 if (knp == NULL) {
3354 3351 zsd_warn(gettext("Unable to read pp_kernel"));
3355 3352 } else {
3356 3353 if (knp->data_type == KSTAT_DATA_UINT64)
3357 3354 pp_kernel = knp->value.ui64;
3358 3355 else if (knp->data_type == KSTAT_DATA_UINT32)
3359 3356 pp_kernel = knp->value.ui32;
3360 3357 else
3361 3358 return;
3362 3359 }
3363 3360 }
3364 3361 physmem *= ctl->zsctl_pagesize;
3365 3362 pp_kernel *= ctl->zsctl_pagesize;
3366 3363
3367 3364 /* get the zfs arc size if available */
3368 3365 arc_size = 0;
3369 3366 kid = -1;
3370 3367 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "zfs", 0, "arcstats");
3371 3368 if (kstat != NULL)
3372 3369 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3373 3370 if (kid != -1) {
3374 3371 knp = kstat_data_lookup(kstat, "size");
3375 3372 if (knp != NULL)
3376 3373 if (knp->data_type == KSTAT_DATA_UINT64)
3377 3374 arc_size = knp->value.ui64;
3378 3375 }
3379 3376
3380 3377 /* Try to get swap information */
3381 3378 if (swapctl(SC_AINFO, &ani) < 0) {
3382 3379 zsd_warn(gettext("Unable to get swap info"));
3383 3380 return;
3384 3381 }
3385 3382
3386 3383 vmusage_again:
3387 3384 /* getvmusage to get physical memory usage */
3388 3385 vmusage = ctl->zsctl_vmusage_cache;
3389 3386 num_vmusage = ctl->zsctl_vmusage_cache_num;
3390 3387
3391 3388 ret = zsd_getvmusage(ctl, VMUSAGE_SYSTEM | VMUSAGE_ALL_ZONES, 0,
3392 3389 vmusage, &num_vmusage);
3393 3390
3394 3391 if (ret != 0) {
3395 3392 /* Unexpected error. Use existing data */
3396 3393 if (errno != EOVERFLOW) {
3397 3394 zsd_warn(gettext(
3398 3395 "Unable to read physical memory usage"));
3399 3396 phys_zones = sys->zss_ram_zones;
3400 3397 goto vmusage_done;
3401 3398 }
3402 3399 }
3403 3400 /* vmusage results cache too small */
3404 3401 if (num_vmusage > ctl->zsctl_vmusage_cache_num) {
3405 3402
3406 3403 size_t size = sizeof (zsd_vmusage64_t) * num_vmusage;
3407 3404
3408 3405 if (ctl->zsctl_vmusage_cache != NULL)
3409 3406 free(ctl->zsctl_vmusage_cache);
3410 3407 vmusage = (zsd_vmusage64_t *)malloc(size);
3411 3408 if (vmusage == NULL) {
3412 3409 zsd_warn(gettext("Unable to alloc to determine "
3413 3410 "physical memory usage"));
3414 3411 phys_zones = sys->zss_ram_zones;
3415 3412 goto vmusage_done;
3416 3413 }
3417 3414 ctl->zsctl_vmusage_cache = vmusage;
3418 3415 ctl->zsctl_vmusage_cache_num = num_vmusage;
3419 3416 goto vmusage_again;
3420 3417 }
3421 3418
3422 3419 phys_zones_overcount = 0;
3423 3420 vmu_nzones = 0;
3424 3421 for (i = 0; i < num_vmusage; i++) {
3425 3422 switch (vmusage[i].vmu_type) {
3426 3423 case VMUSAGE_SYSTEM:
3427 3424 /* total pages backing user process mappings */
3428 3425 phys_zones = sys->zss_ram_zones =
3429 3426 vmusage[i].vmu_rss_all;
3430 3427 break;
3431 3428 case VMUSAGE_ZONE:
3432 3429 vmu_nzones++;
3433 3430 phys_zones_overcount += vmusage[i].vmu_rss_all;
3434 3431 zone = zsd_lookup_zone_byid(ctl, vmusage[i].vmu_id);
3435 3432 if (zone != NULL)
3436 3433 zone->zsz_usage_ram = vmusage[i].vmu_rss_all;
3437 3434 break;
3438 3435 default:
3439 3436 break;
3440 3437 }
3441 3438 }
3442 3439 /*
3443 3440 * Figure how much memory was double counted due to text sharing
3444 3441 * between zones. Credit this back so that the sum of the zones
3445 3442 * equals the total zone ram usage;
3446 3443 */
3447 3444 phys_zones_extra = phys_zones_overcount - phys_zones;
3448 3445 phys_zones_credit = phys_zones_extra / vmu_nzones;
3449 3446
3450 3447 vmusage_done:
3451 3448
3452 3449 /* walk the zones to get swap and locked kstats. Fetch ram cap. */
3453 3450 sys->zss_locked_zones = 0;
3454 3451 sys->zss_vm_zones = 0;
3455 3452 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3456 3453 zone = list_next(&ctl->zsctl_zones, zone)) {
3457 3454
3458 3455 /* If zone halted during interval, show memory usage as none */
3459 3456 if (zone->zsz_active == B_FALSE ||
3460 3457 zone->zsz_deleted == B_TRUE) {
3461 3458 zone->zsz_usage_ram = 0;
3462 3459 zone->zsz_usage_vm = 0;
3463 3460 zone->zsz_usage_locked = 0;
3464 3461 continue;
3465 3462 }
3466 3463
3467 3464 if (phys_zones_credit > 0) {
3468 3465 if (zone->zsz_usage_ram > phys_zones_credit) {
3469 3466 zone->zsz_usage_ram -= phys_zones_credit;
3470 3467 }
3471 3468 }
3472 3469 /*
3473 3470 * Get zone's swap usage. Since zone could have halted,
3474 3471 * treats as zero if cannot read
3475 3472 */
3476 3473 zone->zsz_usage_vm = 0;
3477 3474 (void) snprintf(kstat_name, sizeof (kstat_name),
3478 3475 "swapresv_zone_%d", zone->zsz_id);
3479 3476 kid = -1;
3480 3477 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3481 3478 zone->zsz_id, kstat_name);
3482 3479 if (kstat != NULL)
3483 3480 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3484 3481 if (kid != -1) {
3485 3482 knp = kstat_data_lookup(kstat, "usage");
3486 3483 if (knp != NULL &&
3487 3484 knp->data_type == KSTAT_DATA_UINT64) {
3488 3485 zone->zsz_usage_vm = knp->value.ui64;
3489 3486 sys->zss_vm_zones += knp->value.ui64;
3490 3487 }
3491 3488 }
3492 3489 /*
3493 3490 * Get zone's locked usage. Since zone could have halted,
3494 3491 * treats as zero if cannot read
3495 3492 */
3496 3493 zone->zsz_usage_locked = 0;
3497 3494 (void) snprintf(kstat_name, sizeof (kstat_name),
3498 3495 "lockedmem_zone_%d", zone->zsz_id);
3499 3496 kid = -1;
3500 3497 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "caps",
3501 3498 zone->zsz_id, kstat_name);
3502 3499 if (kstat != NULL)
3503 3500 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL);
3504 3501 if (kid != -1) {
3505 3502 knp = kstat_data_lookup(kstat, "usage");
3506 3503 if (knp != NULL &&
3507 3504 knp->data_type == KSTAT_DATA_UINT64) {
3508 3505 zone->zsz_usage_locked = knp->value.ui64;
3509 3506 /*
3510 3507 * Since locked memory accounting for zones
3511 3508 * can double count ddi locked memory, cap each
3512 3509 * zone's locked usage at its ram usage.
3513 3510 */
3514 3511 if (zone->zsz_usage_locked >
3515 3512 zone->zsz_usage_ram)
3516 3513 zone->zsz_usage_locked =
3517 3514 zone->zsz_usage_ram;
3518 3515 sys->zss_locked_zones +=
3519 3516 zone->zsz_usage_locked;
3520 3517 }
3521 3518 }
3522 3519 }
3523 3520
3524 3521 phys_total =
3525 3522 sysconf(_SC_PHYS_PAGES) * ctl->zsctl_pagesize;
3526 3523
3527 3524 phys_used = (sysconf(_SC_PHYS_PAGES) - sysconf(_SC_AVPHYS_PAGES))
3528 3525 * ctl->zsctl_pagesize;
3529 3526
3530 3527 /* Compute remaining statistics */
3531 3528 sys->zss_ram_total = phys_total;
3532 3529 sys->zss_ram_zones = phys_zones;
3533 3530 sys->zss_ram_kern = phys_used - phys_zones - arc_size;
3534 3531
3535 3532 /*
3536 3533 * The total for kernel locked memory should include
3537 3534 * segkp locked pages, but oh well. The arc size is subtracted,
3538 3535 * as that physical memory is reclaimable.
3539 3536 */
3540 3537 sys->zss_locked_kern = pp_kernel - arc_size;
3541 3538 /* Add memory used by kernel startup and obp to kernel locked */
3542 3539 if ((phys_total - physmem) > 0)
3543 3540 sys->zss_locked_kern += phys_total - physmem;
3544 3541
3545 3542 /*
3546 3543 * Add in the portion of (RAM+DISK) that is not available as swap,
3547 3544 * and consider it swap used by the kernel.
3548 3545 */
3549 3546 sys->zss_vm_total = phys_total + disk_swap_total;
3550 3547 vm_free = (ani.ani_max - ani.ani_resv) * ctl->zsctl_pagesize;
3551 3548 vm_used = sys->zss_vm_total - vm_free;
3552 3549 sys->zss_vm_kern = vm_used - sys->zss_vm_zones - arc_size;
3553 3550 }
3554 3551
3555 3552 /*
3556 3553 * Charge each cpu's usage to its processor sets. Also add the cpu's total
3557 3554 * time to each zone using the processor set. This tracks the maximum
3558 3555 * amount of cpu time that a zone could have used.
3559 3556 */
3560 3557 static void
3561 3558 zsd_refresh_cpu_stats(zsd_ctl_t *ctl, boolean_t init)
3562 3559 {
3563 3560 zsd_system_t *sys;
3564 3561 zsd_zone_t *zone;
3565 3562 zsd_pset_usage_t *usage;
3566 3563 zsd_cpu_t *cpu;
3567 3564 zsd_cpu_t *cpu_next;
3568 3565 zsd_pset_t *pset;
3569 3566 timestruc_t ts;
3570 3567 uint64_t hrtime;
3571 3568 timestruc_t delta;
3572 3569
3573 3570 /* Update the per-cpu kstat data */
3574 3571 cpu_next = list_head(&ctl->zsctl_cpus);
3575 3572 while (cpu_next != NULL) {
3576 3573 cpu = cpu_next;
3577 3574 cpu_next = list_next(&ctl->zsctl_cpus, cpu);
3578 3575 zsd_update_cpu_stats(ctl, cpu);
3579 3576 }
3580 3577 /* Update the elapsed real time */
3581 3578 hrtime = gethrtime();
3582 3579 if (init) {
3583 3580 /* first time around, store hrtime for future comparision */
3584 3581 ctl->zsctl_hrtime = hrtime;
3585 3582 ctl->zsctl_hrtime_prev = hrtime;
3586 3583
3587 3584 } else {
3588 3585 /* Compute increase in hrtime since the most recent read */
3589 3586 ctl->zsctl_hrtime_prev = ctl->zsctl_hrtime;
3590 3587 ctl->zsctl_hrtime = hrtime;
3591 3588 if ((hrtime = hrtime - ctl->zsctl_hrtime_prev) > 0)
3592 3589 TIMESTRUC_ADD_NANOSEC(ctl->zsctl_hrtime_total, hrtime);
3593 3590 }
3594 3591
3595 3592 /* On initialization, all psets have zero time */
3596 3593 if (init)
3597 3594 return;
3598 3595
3599 3596 for (pset = list_head(&ctl->zsctl_psets); pset != NULL;
3600 3597 pset = list_next(&ctl->zsctl_psets, pset)) {
3601 3598
3602 3599 if (pset->zsp_active == B_FALSE) {
3603 3600 zsd_warn(gettext("Internal error,inactive pset found"));
3604 3601 continue;
3605 3602 }
3606 3603
3607 3604 /* sum total used time for pset */
3608 3605 ts.tv_sec = 0;
3609 3606 ts.tv_nsec = 0;
3610 3607 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_intr);
3611 3608 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_kern);
3612 3609 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_user);
3613 3610 /* kernel time in pset is total time minus zone time */
3614 3611 TIMESTRUC_DELTA(pset->zsp_usage_kern, ts,
3615 3612 pset->zsp_usage_zones);
3616 3613 if (pset->zsp_usage_kern.tv_sec < 0 ||
3617 3614 pset->zsp_usage_kern.tv_nsec < 0) {
3618 3615 pset->zsp_usage_kern.tv_sec = 0;
3619 3616 pset->zsp_usage_kern.tv_nsec = 0;
3620 3617 }
3621 3618 /* Total pset elapsed time is used time plus idle time */
3622 3619 TIMESTRUC_ADD_TIMESTRUC(ts, pset->zsp_idle);
3623 3620
3624 3621 TIMESTRUC_DELTA(delta, ts, pset->zsp_total_time);
3625 3622
3626 3623 for (usage = list_head(&pset->zsp_usage_list); usage != NULL;
3627 3624 usage = list_next(&pset->zsp_usage_list, usage)) {
3628 3625
3629 3626 zone = usage->zsu_zone;
3630 3627 if (usage->zsu_cpu_shares != ZS_LIMIT_NONE &&
3631 3628 usage->zsu_cpu_shares != ZS_SHARES_UNLIMITED &&
3632 3629 usage->zsu_cpu_shares != 0) {
3633 3630 /*
3634 3631 * Figure out how many nanoseconds of share time
3635 3632 * to give to the zone
3636 3633 */
3637 3634 hrtime = delta.tv_sec;
3638 3635 hrtime *= NANOSEC;
3639 3636 hrtime += delta.tv_nsec;
3640 3637 hrtime *= usage->zsu_cpu_shares;
3641 3638 hrtime /= pset->zsp_cpu_shares;
3642 3639 TIMESTRUC_ADD_NANOSEC(zone->zsz_share_time,
3643 3640 hrtime);
3644 3641 }
3645 3642 /* Add pset time to each zone using pset */
3646 3643 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_pset_time, delta);
3647 3644
3648 3645 zone->zsz_cpus_online += pset->zsp_online;
3649 3646 }
3650 3647 pset->zsp_total_time = ts;
3651 3648 }
3652 3649
3653 3650 for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
3654 3651 zone = list_next(&ctl->zsctl_zones, zone)) {
3655 3652
3656 3653 /* update cpu cap tracking if the zone has a cpu cap */
3657 3654 if (zone->zsz_cpu_cap != ZS_LIMIT_NONE) {
3658 3655 uint64_t elapsed;
3659 3656
3660 3657 elapsed = ctl->zsctl_hrtime - ctl->zsctl_hrtime_prev;
3661 3658 elapsed *= zone->zsz_cpu_cap;
3662 3659 elapsed = elapsed / 100;
3663 3660 TIMESTRUC_ADD_NANOSEC(zone->zsz_cap_time, elapsed);
3664 3661 }
3665 3662 }
3666 3663 sys = ctl->zsctl_system;
3667 3664 ts.tv_sec = 0;
3668 3665 ts.tv_nsec = 0;
3669 3666 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_intr);
3670 3667 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_kern);
3671 3668 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_user);
3672 3669
3673 3670 /* kernel time in pset is total time minus zone time */
3674 3671 TIMESTRUC_DELTA(sys->zss_cpu_usage_kern, ts,
3675 3672 sys->zss_cpu_usage_zones);
3676 3673 if (sys->zss_cpu_usage_kern.tv_sec < 0 ||
3677 3674 sys->zss_cpu_usage_kern.tv_nsec < 0) {
3678 3675 sys->zss_cpu_usage_kern.tv_sec = 0;
3679 3676 sys->zss_cpu_usage_kern.tv_nsec = 0;
3680 3677 }
3681 3678 /* Total pset elapsed time is used time plus idle time */
3682 3679 TIMESTRUC_ADD_TIMESTRUC(ts, sys->zss_idle);
3683 3680 sys->zss_cpu_total_time = ts;
3684 3681 }
3685 3682
3686 3683 /*
3687 3684 * Saves current usage data to a cache that is read by libzonestat when
3688 3685 * calling zs_usage_read().
3689 3686 *
3690 3687 * All pointers in the cached data structure are set to NULL. When
3691 3688 * libzonestat reads the cached data, it will set the pointers relative to
3692 3689 * its address space.
3693 3690 */
3694 3691 static void
3695 3692 zsd_usage_cache_update(zsd_ctl_t *ctl)
3696 3693 {
3697 3694 zs_usage_cache_t *cache;
3698 3695 zs_usage_cache_t *old;
3699 3696 zs_usage_t *usage;
3700 3697
3701 3698 zs_system_t *sys;
3702 3699 zsd_system_t *dsys;
3703 3700 zs_zone_t *zone = NULL;
3704 3701 zsd_zone_t *dzone;
3705 3702 zs_pset_t *pset = NULL;
3706 3703 zsd_pset_t *dpset;
3707 3704 zs_pset_zone_t *pusage;
3708 3705 zsd_pset_usage_t *dpusage;
3709 3706
3710 3707 char *next;
3711 3708 uint_t size, i, j;
3712 3709
3713 3710 size =
3714 3711 sizeof (zs_usage_cache_t) +
3715 3712 sizeof (zs_usage_t) +
3716 3713 sizeof (zs_system_t) +
3717 3714 sizeof (zs_zone_t) * ctl->zsctl_nzones +
3718 3715 sizeof (zs_pset_t) * ctl->zsctl_npsets +
3719 3716 sizeof (zs_pset_zone_t) * ctl->zsctl_npset_usages;
3720 3717
3721 3718 cache = (zs_usage_cache_t *)malloc(size);
3722 3719 if (cache == NULL) {
3723 3720 zsd_warn(gettext("Unable to allocate usage cache\n"));
3724 3721 return;
3725 3722 }
3726 3723
3727 3724 next = (char *)cache;
3728 3725 cache->zsuc_size = size - sizeof (zs_usage_cache_t);
3729 3726 next += sizeof (zs_usage_cache_t);
3730 3727
3731 3728 /* LINTED */
3732 3729 usage = cache->zsuc_usage = (zs_usage_t *)next;
3733 3730 next += sizeof (zs_usage_t);
3734 3731 usage->zsu_start = g_start;
3735 3732 usage->zsu_hrstart = g_hrstart;
3736 3733 usage->zsu_time = g_now;
3737 3734 usage->zsu_hrtime = g_hrnow;
3738 3735 usage->zsu_nzones = ctl->zsctl_nzones;
3739 3736 usage->zsu_npsets = ctl->zsctl_npsets;
3740 3737 usage->zsu_system = NULL;
3741 3738
3742 3739 /* LINTED */
3743 3740 sys = (zs_system_t *)next;
3744 3741 next += sizeof (zs_system_t);
3745 3742 dsys = ctl->zsctl_system;
3746 3743 sys->zss_ram_total = dsys->zss_ram_total;
3747 3744 sys->zss_ram_kern = dsys->zss_ram_kern;
3748 3745 sys->zss_ram_zones = dsys->zss_ram_zones;
3749 3746 sys->zss_locked_kern = dsys->zss_locked_kern;
3750 3747 sys->zss_locked_zones = dsys->zss_locked_zones;
3751 3748 sys->zss_vm_total = dsys->zss_vm_total;
3752 3749 sys->zss_vm_kern = dsys->zss_vm_kern;
3753 3750 sys->zss_vm_zones = dsys->zss_vm_zones;
3754 3751 sys->zss_swap_total = dsys->zss_swap_total;
3755 3752 sys->zss_swap_used = dsys->zss_swap_used;
3756 3753 sys->zss_ncpus = dsys->zss_ncpus;
3757 3754 sys->zss_ncpus_online = dsys->zss_ncpus_online;
3758 3755
3759 3756 sys->zss_processes_max = dsys->zss_maxpid;
3760 3757 sys->zss_lwps_max = dsys->zss_lwps_max;
3761 3758 sys->zss_shm_max = dsys->zss_shm_max;
3762 3759 sys->zss_shmids_max = dsys->zss_shmids_max;
3763 3760 sys->zss_semids_max = dsys->zss_semids_max;
3764 3761 sys->zss_msgids_max = dsys->zss_msgids_max;
3765 3762 sys->zss_lofi_max = dsys->zss_lofi_max;
3766 3763
3767 3764 sys->zss_processes = dsys->zss_processes;
3768 3765 sys->zss_lwps = dsys->zss_lwps;
3769 3766 sys->zss_shm = dsys->zss_shm;
3770 3767 sys->zss_shmids = dsys->zss_shmids;
3771 3768 sys->zss_semids = dsys->zss_semids;
3772 3769 sys->zss_msgids = dsys->zss_msgids;
3773 3770 sys->zss_lofi = dsys->zss_lofi;
3774 3771
3775 3772 sys->zss_cpu_total_time = dsys->zss_cpu_total_time;
3776 3773 sys->zss_cpu_usage_zones = dsys->zss_cpu_usage_zones;
3777 3774 sys->zss_cpu_usage_kern = dsys->zss_cpu_usage_kern;
3778 3775
3779 3776 for (i = 0, dzone = list_head(&ctl->zsctl_zones);
3780 3777 i < ctl->zsctl_nzones;
3781 3778 i++, dzone = list_next(&ctl->zsctl_zones, dzone)) {
3782 3779 /* LINTED */
3783 3780 zone = (zs_zone_t *)next;
3784 3781 next += sizeof (zs_zone_t);
3785 3782 list_link_init(&zone->zsz_next);
3786 3783 zone->zsz_system = NULL;
3787 3784
3788 3785 (void) strlcpy(zone->zsz_name, dzone->zsz_name,
3789 3786 sizeof (zone->zsz_name));
3790 3787 (void) strlcpy(zone->zsz_pool, dzone->zsz_pool,
3791 3788 sizeof (zone->zsz_pool));
3792 3789 (void) strlcpy(zone->zsz_pset, dzone->zsz_pset,
3793 3790 sizeof (zone->zsz_pset));
3794 3791 zone->zsz_id = dzone->zsz_id;
3795 3792 zone->zsz_cputype = dzone->zsz_cputype;
3796 3793 zone->zsz_iptype = dzone->zsz_iptype;
3797 3794 zone->zsz_start = dzone->zsz_start;
3798 3795 zone->zsz_hrstart = dzone->zsz_hrstart;
3799 3796 zone->zsz_scheds = dzone->zsz_scheds;
3800 3797 zone->zsz_cpu_shares = dzone->zsz_cpu_shares;
3801 3798 zone->zsz_cpu_cap = dzone->zsz_cpu_cap;
3802 3799 zone->zsz_ram_cap = dzone->zsz_ram_cap;
3803 3800 zone->zsz_vm_cap = dzone->zsz_vm_cap;
3804 3801 zone->zsz_locked_cap = dzone->zsz_locked_cap;
3805 3802 zone->zsz_cpu_usage = dzone->zsz_cpu_usage;
3806 3803 zone->zsz_cpus_online = dzone->zsz_cpus_online;
3807 3804 zone->zsz_pset_time = dzone->zsz_pset_time;
3808 3805 zone->zsz_cap_time = dzone->zsz_cap_time;
3809 3806 zone->zsz_share_time = dzone->zsz_share_time;
3810 3807 zone->zsz_usage_ram = dzone->zsz_usage_ram;
3811 3808 zone->zsz_usage_locked = dzone->zsz_usage_locked;
3812 3809 zone->zsz_usage_vm = dzone->zsz_usage_vm;
3813 3810
3814 3811 zone->zsz_processes_cap = dzone->zsz_processes_cap;
3815 3812 zone->zsz_lwps_cap = dzone->zsz_lwps_cap;
3816 3813 zone->zsz_shm_cap = dzone->zsz_shm_cap;
3817 3814 zone->zsz_shmids_cap = dzone->zsz_shmids_cap;
3818 3815 zone->zsz_semids_cap = dzone->zsz_semids_cap;
3819 3816 zone->zsz_msgids_cap = dzone->zsz_msgids_cap;
3820 3817 zone->zsz_lofi_cap = dzone->zsz_lofi_cap;
3821 3818
3822 3819 zone->zsz_processes = dzone->zsz_processes;
3823 3820 zone->zsz_lwps = dzone->zsz_lwps;
3824 3821 zone->zsz_shm = dzone->zsz_shm;
3825 3822 zone->zsz_shmids = dzone->zsz_shmids;
3826 3823 zone->zsz_semids = dzone->zsz_semids;
3827 3824 zone->zsz_msgids = dzone->zsz_msgids;
3828 3825 zone->zsz_lofi = dzone->zsz_lofi;
3829 3826 }
3830 3827
3831 3828 for (i = 0, dpset = list_head(&ctl->zsctl_psets);
3832 3829 i < ctl->zsctl_npsets;
3833 3830 i++, dpset = list_next(&ctl->zsctl_psets, dpset)) {
3834 3831 /* LINTED */
3835 3832 pset = (zs_pset_t *)next;
3836 3833 next += sizeof (zs_pset_t);
3837 3834 list_link_init(&pset->zsp_next);
3838 3835 (void) strlcpy(pset->zsp_name, dpset->zsp_name,
3839 3836 sizeof (pset->zsp_name));
3840 3837 pset->zsp_id = dpset->zsp_id;
3841 3838 pset->zsp_cputype = dpset->zsp_cputype;
3842 3839 pset->zsp_start = dpset->zsp_start;
3843 3840 pset->zsp_hrstart = dpset->zsp_hrstart;
3844 3841 pset->zsp_online = dpset->zsp_online;
3845 3842 pset->zsp_size = dpset->zsp_size;
3846 3843 pset->zsp_min = dpset->zsp_min;
3847 3844 pset->zsp_max = dpset->zsp_max;
3848 3845 pset->zsp_importance = dpset->zsp_importance;
3849 3846 pset->zsp_scheds = dpset->zsp_scheds;
3850 3847 pset->zsp_cpu_shares = dpset->zsp_cpu_shares;
3851 3848 pset->zsp_total_time = dpset->zsp_total_time;
3852 3849 pset->zsp_usage_kern = dpset->zsp_usage_kern;
3853 3850 pset->zsp_usage_zones = dpset->zsp_usage_zones;
3854 3851 pset->zsp_nusage = dpset->zsp_nusage;
3855 3852 /* Add pset usages for pset */
3856 3853 for (j = 0, dpusage = list_head(&dpset->zsp_usage_list);
3857 3854 j < dpset->zsp_nusage;
3858 3855 j++, dpusage = list_next(&dpset->zsp_usage_list, dpusage)) {
3859 3856 /* LINTED */
3860 3857 pusage = (zs_pset_zone_t *)next;
3861 3858 next += sizeof (zs_pset_zone_t);
3862 3859 /* pointers are computed by client */
3863 3860 pusage->zspz_pset = NULL;
3864 3861 pusage->zspz_zone = NULL;
3865 3862 list_link_init(&pusage->zspz_next);
3866 3863 pusage->zspz_zoneid = dpusage->zsu_zone->zsz_id;
3867 3864 pusage->zspz_start = dpusage->zsu_start;
3868 3865 pusage->zspz_hrstart = dpusage->zsu_hrstart;
3869 3866 pusage->zspz_hrstart = dpusage->zsu_hrstart;
3870 3867 pusage->zspz_cpu_shares = dpusage->zsu_cpu_shares;
3871 3868 pusage->zspz_scheds = dpusage->zsu_scheds;
3872 3869 pusage->zspz_cpu_usage = dpusage->zsu_cpu_usage;
3873 3870 }
3874 3871 }
3875 3872
3876 3873 /* Update the current cache pointer */
3877 3874 (void) mutex_lock(&g_usage_cache_lock);
3878 3875 old = g_usage_cache;
3879 3876 cache->zsuc_ref = 1;
3880 3877 cache->zsuc_gen = g_gen_next;
3881 3878 usage->zsu_gen = g_gen_next;
3882 3879 usage->zsu_size = size;
3883 3880 g_usage_cache = cache;
3884 3881 if (old != NULL) {
3885 3882 old->zsuc_ref--;
3886 3883 if (old->zsuc_ref == 0)
3887 3884 free(old);
3888 3885 }
3889 3886 g_gen_next++;
3890 3887 /* Wake up any clients that are waiting for this calculation */
3891 3888 if (g_usage_cache_kickers > 0) {
3892 3889 (void) cond_broadcast(&g_usage_cache_wait);
3893 3890 }
3894 3891 (void) mutex_unlock(&g_usage_cache_lock);
3895 3892 }
3896 3893
3897 3894 static zs_usage_cache_t *
3898 3895 zsd_usage_cache_hold_locked()
3899 3896 {
3900 3897 zs_usage_cache_t *ret;
3901 3898
3902 3899 ret = g_usage_cache;
3903 3900 ret->zsuc_ref++;
3904 3901 return (ret);
3905 3902 }
3906 3903
3907 3904 void
3908 3905 zsd_usage_cache_rele(zs_usage_cache_t *cache)
3909 3906 {
3910 3907 (void) mutex_lock(&g_usage_cache_lock);
3911 3908 cache->zsuc_ref--;
3912 3909 if (cache->zsuc_ref == 0)
3913 3910 free(cache);
3914 3911 (void) mutex_unlock(&g_usage_cache_lock);
3915 3912 }
3916 3913
3917 3914 /* Close the handles held by zsd_open() */
3918 3915 void
3919 3916 zsd_close(zsd_ctl_t *ctl)
3920 3917 {
3921 3918 zsd_zone_t *zone;
3922 3919 zsd_pset_t *pset;
3923 3920 zsd_pset_usage_t *usage;
3924 3921 zsd_cpu_t *cpu;
3925 3922 int id;
3926 3923
3927 3924 if (ctl->zsctl_kstat_ctl) {
3928 3925 (void) kstat_close(ctl->zsctl_kstat_ctl);
3929 3926 ctl->zsctl_kstat_ctl = NULL;
3930 3927 }
3931 3928 if (ctl->zsctl_proc_open) {
3932 3929 (void) ea_close(&ctl->zsctl_proc_eaf);
3933 3930 ctl->zsctl_proc_open = 0;
3934 3931 ctl->zsctl_proc_fd = -1;
3935 3932 }
3936 3933 if (ctl->zsctl_pool_conf) {
3937 3934 if (ctl->zsctl_pool_status == POOL_ENABLED)
3938 3935 (void) pool_conf_close(ctl->zsctl_pool_conf);
3939 3936 ctl->zsctl_pool_status = POOL_DISABLED;
3940 3937 }
3941 3938
3942 3939 while ((zone = list_head(&ctl->zsctl_zones)) != NULL) {
3943 3940 list_remove(&ctl->zsctl_zones, zone);
3944 3941 free(zone);
3945 3942 ctl->zsctl_nzones--;
3946 3943 }
3947 3944
3948 3945 while ((pset = list_head(&ctl->zsctl_psets)) != NULL) {
3949 3946 while ((usage = list_head(&pset->zsp_usage_list))
3950 3947 != NULL) {
3951 3948 list_remove(&pset->zsp_usage_list, usage);
3952 3949 ctl->zsctl_npset_usages--;
3953 3950 free(usage);
3954 3951 }
3955 3952 list_remove(&ctl->zsctl_psets, pset);
3956 3953 free(pset);
3957 3954 ctl->zsctl_npsets--;
3958 3955 }
3959 3956
3960 3957 /* Release all cpus being tracked */
3961 3958 while (cpu = list_head(&ctl->zsctl_cpus)) {
3962 3959 list_remove(&ctl->zsctl_cpus, cpu);
3963 3960 id = cpu->zsc_id;
3964 3961 bzero(cpu, sizeof (zsd_cpu_t));
3965 3962 cpu->zsc_id = id;
3966 3963 cpu->zsc_allocated = B_FALSE;
3967 3964 cpu->zsc_psetid = ZS_PSET_ERROR;
3968 3965 cpu->zsc_psetid_prev = ZS_PSET_ERROR;
3969 3966 }
3970 3967
3971 3968 assert(ctl->zsctl_npset_usages == 0);
3972 3969 assert(ctl->zsctl_npsets == 0);
3973 3970 assert(ctl->zsctl_nzones == 0);
3974 3971 (void) zsd_disable_cpu_stats();
3975 3972 }
3976 3973
3977 3974
3978 3975 /*
3979 3976 * Update the utilization data for all zones and processor sets.
3980 3977 */
3981 3978 static int
3982 3979 zsd_read(zsd_ctl_t *ctl, boolean_t init, boolean_t do_memory)
3983 3980 {
3984 3981 (void) kstat_chain_update(ctl->zsctl_kstat_ctl);
3985 3982 (void) gettimeofday(&(ctl->zsctl_timeofday), NULL);
3986 3983
3987 3984 zsd_refresh_system(ctl);
3988 3985
3989 3986 /*
3990 3987 * Memory calculation is expensive. Only update it on sample
3991 3988 * intervals.
3992 3989 */
3993 3990 if (do_memory == B_TRUE)
3994 3991 zsd_refresh_memory(ctl, init);
3995 3992 zsd_refresh_zones(ctl);
3996 3993 zsd_refresh_psets(ctl);
3997 3994 zsd_refresh_procs(ctl, init);
3998 3995 zsd_refresh_cpu_stats(ctl, init);
3999 3996
4000 3997 /*
4001 3998 * Delete objects that no longer exist.
4002 3999 * Pset usages must be deleted first as they point to zone and
4003 4000 * pset objects.
4004 4001 */
4005 4002 zsd_mark_pset_usages_end(ctl);
4006 4003 zsd_mark_psets_end(ctl);
4007 4004 zsd_mark_cpus_end(ctl);
4008 4005 zsd_mark_zones_end(ctl);
4009 4006
4010 4007 /*
4011 4008 * Save results for clients.
4012 4009 */
4013 4010 zsd_usage_cache_update(ctl);
4014 4011
4015 4012 /*
4016 4013 * Roll process accounting file.
4017 4014 */
4018 4015 (void) zsd_roll_exacct();
4019 4016 return (0);
4020 4017 }
4021 4018
4022 4019 /*
4023 4020 * Get the system rctl, which is the upper most limit
4024 4021 */
4025 4022 static uint64_t
4026 4023 zsd_get_system_rctl(char *name)
4027 4024 {
4028 4025 rctlblk_t *rblk, *rblk_last;
4029 4026
4030 4027 rblk = (rctlblk_t *)alloca(rctlblk_size());
4031 4028 rblk_last = (rctlblk_t *)alloca(rctlblk_size());
4032 4029
4033 4030 if (getrctl(name, NULL, rblk_last, RCTL_FIRST) != 0)
4034 4031 return (ZS_LIMIT_NONE);
4035 4032
4036 4033 while (getrctl(name, rblk_last, rblk, RCTL_NEXT) == 0)
4037 4034 (void) bcopy(rblk, rblk_last, rctlblk_size());
4038 4035
4039 4036 return (rctlblk_get_value(rblk_last));
4040 4037 }
4041 4038
4042 4039 /*
4043 4040 * Open any necessary subsystems for collecting utilization data,
4044 4041 * allocate and initialize data structures, and get initial utilization.
4045 4042 *
4046 4043 * Errors:
4047 4044 * ENOMEM out of memory
4048 4045 * EINVAL other error
4049 4046 */
4050 4047 static zsd_ctl_t *
4051 4048 zsd_open(zsd_ctl_t *ctl)
4052 4049 {
4053 4050 zsd_system_t *system;
4054 4051
4055 4052 char path[MAXPATHLEN];
4056 4053 long pathmax;
4057 4054 struct statvfs svfs;
4058 4055 int ret;
4059 4056 int i;
4060 4057 size_t size;
4061 4058 int err;
4062 4059
4063 4060 if (ctl == NULL && (ctl = (zsd_ctl_t *)calloc(1,
4064 4061 sizeof (zsd_ctl_t))) == NULL) {
4065 4062 zsd_warn(gettext("Out of Memory"));
4066 4063 errno = ENOMEM;
4067 4064 goto err;
4068 4065 }
4069 4066 ctl->zsctl_proc_fd = -1;
4070 4067
4071 4068 /* open kstats */
4072 4069 if (ctl->zsctl_kstat_ctl == NULL &&
4073 4070 (ctl->zsctl_kstat_ctl = kstat_open()) == NULL) {
4074 4071 err = errno;
4075 4072 zsd_warn(gettext("Unable to open kstats"));
4076 4073 errno = err;
4077 4074 if (errno != ENOMEM)
4078 4075 errno = EAGAIN;
4079 4076 goto err;
4080 4077 }
4081 4078
4082 4079 /*
4083 4080 * These are set when the accounting file is opened by
4084 4081 * zsd_update_procs()
4085 4082 */
4086 4083 ctl->zsctl_proc_fd = -1;
4087 4084 ctl->zsctl_proc_fd_next = -1;
4088 4085 ctl->zsctl_proc_open = 0;
4089 4086 ctl->zsctl_proc_open_next = 0;
4090 4087
4091 4088 check_exacct:
4092 4089 (void) zsd_enable_cpu_stats();
4093 4090
4094 4091 /* Create structures to track usage */
4095 4092 if (ctl->zsctl_system == NULL && (ctl->zsctl_system = (zsd_system_t *)
4096 4093 calloc(1, sizeof (zsd_system_t))) == NULL) {
4097 4094 ret = -1;
4098 4095 zsd_warn(gettext("Out of Memory"));
4099 4096 errno = ENOMEM;
4100 4097 goto err;
4101 4098 }
4102 4099 system = ctl->zsctl_system;
4103 4100 /* get the kernel bitness to know structure layout for getvmusage */
4104 4101 ret = sysinfo(SI_ARCHITECTURE_64, path, sizeof (path));
4105 4102 if (ret < 0)
4106 4103 ctl->zsctl_kern_bits = 32;
4107 4104 else
4108 4105 ctl->zsctl_kern_bits = 64;
4109 4106 ctl->zsctl_pagesize = sysconf(_SC_PAGESIZE);
4110 4107
4111 4108 size = sysconf(_SC_CPUID_MAX);
4112 4109 ctl->zsctl_maxcpuid = size;
4113 4110 if (ctl->zsctl_cpu_array == NULL && (ctl->zsctl_cpu_array =
4114 4111 (zsd_cpu_t *)calloc(size + 1, sizeof (zsd_cpu_t))) == NULL) {
4115 4112 zsd_warn(gettext("Out of Memory"));
4116 4113 errno = ENOMEM;
4117 4114 goto err;
4118 4115 }
4119 4116 for (i = 0; i <= ctl->zsctl_maxcpuid; i++) {
4120 4117 ctl->zsctl_cpu_array[i].zsc_id = i;
4121 4118 ctl->zsctl_cpu_array[i].zsc_allocated = B_FALSE;
4122 4119 ctl->zsctl_cpu_array[i].zsc_psetid = ZS_PSET_ERROR;
4123 4120 ctl->zsctl_cpu_array[i].zsc_psetid_prev = ZS_PSET_ERROR;
4124 4121 }
4125 4122 if (statvfs("/proc", &svfs) != 0 ||
4126 4123 strcmp("/proc", svfs.f_fstr) != 0) {
4127 4124 zsd_warn(gettext("/proc not a procfs filesystem"));
4128 4125 errno = EINVAL;
4129 4126 goto err;
4130 4127 }
4131 4128
4132 4129 size = sysconf(_SC_MAXPID) + 1;
4133 4130 ctl->zsctl_maxproc = size;
4134 4131 if (ctl->zsctl_proc_array == NULL &&
4135 4132 (ctl->zsctl_proc_array = (zsd_proc_t *)calloc(size,
4136 4133 sizeof (zsd_proc_t))) == NULL) {
4137 4134 zsd_warn(gettext("Out of Memory"));
4138 4135 errno = ENOMEM;
4139 4136 goto err;
4140 4137 }
4141 4138 for (i = 0; i <= ctl->zsctl_maxproc; i++) {
4142 4139 list_link_init(&(ctl->zsctl_proc_array[i].zspr_next));
4143 4140 ctl->zsctl_proc_array[i].zspr_psetid = ZS_PSET_ERROR;
4144 4141 ctl->zsctl_proc_array[i].zspr_zoneid = -1;
4145 4142 ctl->zsctl_proc_array[i].zspr_usage.tv_sec = 0;
4146 4143 ctl->zsctl_proc_array[i].zspr_usage.tv_nsec = 0;
4147 4144 ctl->zsctl_proc_array[i].zspr_ppid = -1;
4148 4145 }
4149 4146
4150 4147 list_create(&ctl->zsctl_zones, sizeof (zsd_zone_t),
4151 4148 offsetof(zsd_zone_t, zsz_next));
4152 4149
4153 4150 list_create(&ctl->zsctl_psets, sizeof (zsd_pset_t),
4154 4151 offsetof(zsd_pset_t, zsp_next));
4155 4152
4156 4153 list_create(&ctl->zsctl_cpus, sizeof (zsd_cpu_t),
4157 4154 offsetof(zsd_cpu_t, zsc_next));
4158 4155
4159 4156 pathmax = pathconf("/proc", _PC_NAME_MAX);
4160 4157 if (pathmax < 0) {
4161 4158 zsd_warn(gettext("Unable to determine max path of /proc"));
4162 4159 errno = EINVAL;
4163 4160 goto err;
4164 4161 }
4165 4162 size = sizeof (struct dirent) + pathmax + 1;
4166 4163
4167 4164 ctl->zsctl_procfs_dent_size = size;
4168 4165 if (ctl->zsctl_procfs_dent == NULL &&
4169 4166 (ctl->zsctl_procfs_dent = (struct dirent *)calloc(1, size))
4170 4167 == NULL) {
4171 4168 zsd_warn(gettext("Out of Memory"));
4172 4169 errno = ENOMEM;
4173 4170 goto err;
4174 4171 }
4175 4172
4176 4173 if (ctl->zsctl_pool_conf == NULL &&
4177 4174 (ctl->zsctl_pool_conf = pool_conf_alloc()) == NULL) {
4178 4175 zsd_warn(gettext("Out of Memory"));
4179 4176 errno = ENOMEM;
4180 4177 goto err;
4181 4178 }
4182 4179 ctl->zsctl_pool_status = POOL_DISABLED;
4183 4180 ctl->zsctl_pool_changed = 0;
4184 4181
4185 4182 if (ctl->zsctl_pool_vals[0] == NULL &&
4186 4183 (ctl->zsctl_pool_vals[0] = pool_value_alloc()) == NULL) {
4187 4184 zsd_warn(gettext("Out of Memory"));
4188 4185 errno = ENOMEM;
4189 4186 goto err;
4190 4187 }
4191 4188 if (ctl->zsctl_pool_vals[1] == NULL &&
4192 4189 (ctl->zsctl_pool_vals[1] = pool_value_alloc()) == NULL) {
4193 4190 zsd_warn(gettext("Out of Memory"));
4194 4191 errno = ENOMEM;
4195 4192 goto err;
4196 4193 }
4197 4194 ctl->zsctl_pool_vals[2] = NULL;
4198 4195
4199 4196 /*
4200 4197 * get system limits
4201 4198 */
4202 4199 system->zss_maxpid = size = sysconf(_SC_MAXPID);
4203 4200 system->zss_processes_max = zsd_get_system_rctl("zone.max-processes");
4204 4201 system->zss_lwps_max = zsd_get_system_rctl("zone.max-lwps");
4205 4202 system->zss_shm_max = zsd_get_system_rctl("zone.max-shm-memory");
4206 4203 system->zss_shmids_max = zsd_get_system_rctl("zone.max-shm-ids");
4207 4204 system->zss_semids_max = zsd_get_system_rctl("zone.max-sem-ids");
4208 4205 system->zss_msgids_max = zsd_get_system_rctl("zone.max-msg-ids");
4209 4206 system->zss_lofi_max = zsd_get_system_rctl("zone.max-lofi");
4210 4207
4211 4208 g_gen_next = 1;
4212 4209
4213 4210 if (zsd_read(ctl, B_TRUE, B_FALSE) != 0)
4214 4211 zsd_warn(gettext("Reading zone statistics failed"));
4215 4212
4216 4213 return (ctl);
4217 4214 err:
4218 4215 if (ctl)
4219 4216 zsd_close(ctl);
4220 4217
4221 4218 return (NULL);
4222 4219 }
4223 4220
4224 4221 /* Copy utilization data to buffer, filtering data if non-global zone. */
4225 4222 static void
4226 4223 zsd_usage_filter(zoneid_t zid, zs_usage_cache_t *cache, zs_usage_t *usage,
4227 4224 boolean_t is_gz)
4228 4225 {
4229 4226 zs_usage_t *cusage;
4230 4227 zs_system_t *sys, *csys;
4231 4228 zs_zone_t *zone, *czone;
4232 4229 zs_pset_t *pset, *cpset;
4233 4230 zs_pset_zone_t *pz, *cpz, *foundpz;
4234 4231 size_t size = 0, csize = 0;
4235 4232 char *start, *cstart;
4236 4233 int i, j;
4237 4234 timestruc_t delta;
4238 4235
4239 4236 /* Privileged users in the global zone get everything */
4240 4237 if (is_gz) {
4241 4238 cusage = cache->zsuc_usage;
4242 4239 (void) bcopy(cusage, usage, cusage->zsu_size);
4243 4240 return;
4244 4241 }
4245 4242
4246 4243 /* Zones just get their own usage */
4247 4244 cusage = cache->zsuc_usage;
4248 4245
4249 4246 start = (char *)usage;
4250 4247 cstart = (char *)cusage;
4251 4248 size += sizeof (zs_usage_t);
4252 4249 csize += sizeof (zs_usage_t);
4253 4250
4254 4251 usage->zsu_start = cusage->zsu_start;
4255 4252 usage->zsu_hrstart = cusage->zsu_hrstart;
4256 4253 usage->zsu_time = cusage->zsu_time;
4257 4254 usage->zsu_hrtime = cusage->zsu_hrtime;
4258 4255 usage->zsu_gen = cusage->zsu_gen;
4259 4256 usage->zsu_nzones = 1;
4260 4257 usage->zsu_npsets = 0;
4261 4258
4262 4259 /* LINTED */
4263 4260 sys = (zs_system_t *)(start + size);
4264 4261 /* LINTED */
4265 4262 csys = (zs_system_t *)(cstart + csize);
4266 4263 size += sizeof (zs_system_t);
4267 4264 csize += sizeof (zs_system_t);
4268 4265
4269 4266 /* Save system limits but not usage */
4270 4267 *sys = *csys;
4271 4268 sys->zss_ncpus = 0;
4272 4269 sys->zss_ncpus_online = 0;
4273 4270
4274 4271 /* LINTED */
4275 4272 zone = (zs_zone_t *)(start + size);
4276 4273 /* LINTED */
4277 4274 czone = (zs_zone_t *)(cstart + csize);
4278 4275 /* Find the matching zone */
4279 4276 for (i = 0; i < cusage->zsu_nzones; i++) {
4280 4277 if (czone->zsz_id == zid) {
4281 4278 *zone = *czone;
4282 4279 size += sizeof (zs_zone_t);
4283 4280 }
4284 4281 csize += sizeof (zs_zone_t);
4285 4282 /* LINTED */
4286 4283 czone = (zs_zone_t *)(cstart + csize);
4287 4284 }
4288 4285 sys->zss_ram_kern += (sys->zss_ram_zones - zone->zsz_usage_ram);
4289 4286 sys->zss_ram_zones = zone->zsz_usage_ram;
4290 4287
4291 4288 sys->zss_vm_kern += (sys->zss_vm_zones - zone->zsz_usage_vm);
4292 4289 sys->zss_vm_zones = zone->zsz_usage_vm;
4293 4290
4294 4291 sys->zss_locked_kern += (sys->zss_locked_zones -
4295 4292 zone->zsz_usage_locked);
4296 4293 sys->zss_locked_zones = zone->zsz_usage_locked;
4297 4294
4298 4295 TIMESTRUC_DELTA(delta, sys->zss_cpu_usage_zones, zone->zsz_cpu_usage);
4299 4296 TIMESTRUC_ADD_TIMESTRUC(sys->zss_cpu_usage_kern, delta);
4300 4297 sys->zss_cpu_usage_zones = zone->zsz_cpu_usage;
4301 4298
4302 4299 /* LINTED */
4303 4300 pset = (zs_pset_t *)(start + size);
4304 4301 /* LINTED */
4305 4302 cpset = (zs_pset_t *)(cstart + csize);
4306 4303 for (i = 0; i < cusage->zsu_npsets; i++) {
4307 4304 csize += sizeof (zs_pset_t);
4308 4305 /* LINTED */
4309 4306 cpz = (zs_pset_zone_t *)(csize + cstart);
4310 4307 foundpz = NULL;
4311 4308 for (j = 0; j < cpset->zsp_nusage; j++) {
4312 4309 if (cpz->zspz_zoneid == zid)
4313 4310 foundpz = cpz;
4314 4311
4315 4312 csize += sizeof (zs_pset_zone_t);
4316 4313 /* LINTED */
4317 4314 cpz = (zs_pset_zone_t *)(csize + cstart);
4318 4315 }
4319 4316 if (foundpz != NULL) {
4320 4317 size += sizeof (zs_pset_t);
4321 4318 /* LINTED */
4322 4319 pz = (zs_pset_zone_t *)(start + size);
4323 4320 size += sizeof (zs_pset_zone_t);
4324 4321
4325 4322 *pset = *cpset;
4326 4323 *pz = *foundpz;
4327 4324
4328 4325 TIMESTRUC_DELTA(delta, pset->zsp_usage_zones,
4329 4326 pz->zspz_cpu_usage);
4330 4327 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_kern, delta);
4331 4328 pset->zsp_usage_zones = pz->zspz_cpu_usage;
4332 4329 pset->zsp_nusage = 1;
4333 4330 usage->zsu_npsets++;
4334 4331 sys->zss_ncpus += pset->zsp_size;
4335 4332 sys->zss_ncpus_online += pset->zsp_online;
4336 4333 }
4337 4334 /* LINTED */
4338 4335 cpset = (zs_pset_t *)(cstart + csize);
4339 4336 }
4340 4337 usage->zsu_size = size;
4341 4338 }
4342 4339
4343 4340 /*
4344 4341 * Respond to new connections from libzonestat.so. Also respond to zoneadmd,
4345 4342 * which reports new zones.
4346 4343 */
4347 4344 /* ARGSUSED */
4348 4345 static void
4349 4346 zsd_server(void *cookie, char *argp, size_t arg_size,
4350 4347 door_desc_t *dp, uint_t n_desc)
4351 4348 {
4352 4349 int *args, cmd;
4353 4350 door_desc_t door;
4354 4351 ucred_t *ucred;
4355 4352 const priv_set_t *eset;
4356 4353
4357 4354 if (argp == DOOR_UNREF_DATA) {
4358 4355 (void) door_return(NULL, 0, NULL, 0);
4359 4356 thr_exit(NULL);
4360 4357 }
4361 4358
4362 4359 if (arg_size != sizeof (cmd) * 2) {
4363 4360 (void) door_return(NULL, 0, NULL, 0);
4364 4361 thr_exit(NULL);
4365 4362 }
4366 4363
4367 4364 /* LINTED */
4368 4365 args = (int *)argp;
4369 4366 cmd = args[0];
4370 4367
4371 4368 /* If connection, return door to stat server */
4372 4369 if (cmd == ZSD_CMD_CONNECT) {
4373 4370
4374 4371 /* Verify client compilation version */
4375 4372 if (args[1] != ZS_VERSION) {
4376 4373 args[1] = ZSD_STATUS_VERSION_MISMATCH;
4377 4374 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4378 4375 thr_exit(NULL);
4379 4376 }
4380 4377 ucred = alloca(ucred_size());
4381 4378 /* Verify client permission */
4382 4379 if (door_ucred(&ucred) != 0) {
4383 4380 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4384 4381 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4385 4382 thr_exit(NULL);
4386 4383 }
4387 4384
4388 4385 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4389 4386 if (eset == NULL) {
4390 4387 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4391 4388 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4392 4389 thr_exit(NULL);
4393 4390 }
4394 4391 if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4395 4392 args[1] = ZSD_STATUS_PERMISSION;
4396 4393 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4397 4394 thr_exit(NULL);
4398 4395 }
4399 4396
4400 4397 /* Return stat server door */
4401 4398 args[1] = ZSD_STATUS_OK;
4402 4399 door.d_attributes = DOOR_DESCRIPTOR;
4403 4400 door.d_data.d_desc.d_descriptor = g_stat_door;
4404 4401 (void) door_return(argp, sizeof (cmd) * 2, &door, 1);
4405 4402 thr_exit(NULL);
4406 4403 }
4407 4404
4408 4405 /* Respond to zoneadmd informing zonestatd of a new zone */
4409 4406 if (cmd == ZSD_CMD_NEW_ZONE) {
4410 4407 zsd_fattach_zone(args[1], g_server_door, B_FALSE);
4411 4408 (void) door_return(NULL, 0, NULL, 0);
4412 4409 thr_exit(NULL);
4413 4410 }
4414 4411
4415 4412 args[1] = ZSD_STATUS_INTERNAL_ERROR;
4416 4413 (void) door_return(argp, sizeof (cmd) * 2, NULL, 0);
4417 4414 thr_exit(NULL);
4418 4415 }
4419 4416
4420 4417 /*
4421 4418 * Respond to libzonestat.so clients with the current utlilzation data.
4422 4419 */
4423 4420 /* ARGSUSED */
4424 4421 static void
4425 4422 zsd_stat_server(void *cookie, char *argp, size_t arg_size,
4426 4423 door_desc_t *dp, uint_t n_desc)
4427 4424 {
4428 4425 uint64_t *args, cmd;
4429 4426 zs_usage_cache_t *cache;
4430 4427 int ret;
4431 4428 char *rvalp;
4432 4429 size_t rvals;
4433 4430 zs_usage_t *usage;
4434 4431 ucred_t *ucred;
4435 4432 zoneid_t zoneid;
4436 4433 const priv_set_t *eset;
4437 4434 boolean_t is_gz = B_FALSE;
4438 4435
4439 4436 /* Tell stat thread there are no more clients */
4440 4437 if (argp == DOOR_UNREF_DATA) {
4441 4438 (void) mutex_lock(&g_usage_cache_lock);
4442 4439 g_hasclient = B_FALSE;
4443 4440 (void) cond_signal(&g_usage_cache_kick);
4444 4441 (void) mutex_unlock(&g_usage_cache_lock);
4445 4442 (void) door_return(NULL, 0, NULL, 0);
4446 4443 thr_exit(NULL);
4447 4444 }
4448 4445 if (arg_size != sizeof (cmd) * 2) {
4449 4446 (void) door_return(NULL, 0, NULL, 0);
4450 4447 thr_exit(NULL);
4451 4448 }
4452 4449 /* LINTED */
4453 4450 args = (uint64_t *)argp;
4454 4451 cmd = args[0];
4455 4452 if (cmd != ZSD_CMD_READ) {
4456 4453 (void) door_return(NULL, 0, NULL, 0);
4457 4454 thr_exit(NULL);
4458 4455 }
4459 4456 ucred = alloca(ucred_size());
4460 4457 if (door_ucred(&ucred) != 0) {
4461 4458 (void) door_return(NULL, 0, NULL, 0);
4462 4459 thr_exit(NULL);
4463 4460 }
4464 4461 zoneid = ucred_getzoneid(ucred);
4465 4462
4466 4463 if (zoneid == GLOBAL_ZONEID)
4467 4464 is_gz = B_TRUE;
4468 4465
4469 4466 eset = ucred_getprivset(ucred, PRIV_EFFECTIVE);
4470 4467 if (eset == NULL) {
4471 4468 (void) door_return(NULL, 0, NULL, 0);
4472 4469 thr_exit(NULL);
4473 4470 }
4474 4471 if (!priv_ismember(eset, PRIV_PROC_INFO)) {
4475 4472 (void) door_return(NULL, 0, NULL, 0);
4476 4473 thr_exit(NULL);
4477 4474 }
4478 4475 (void) mutex_lock(&g_usage_cache_lock);
4479 4476 g_hasclient = B_TRUE;
4480 4477
4481 4478 /*
4482 4479 * Force a new cpu calculation for client. This will force a
4483 4480 * new memory calculation if the memory data is older than the
4484 4481 * sample period.
4485 4482 */
4486 4483 g_usage_cache_kickers++;
4487 4484 (void) cond_signal(&g_usage_cache_kick);
4488 4485 ret = cond_wait(&g_usage_cache_wait, &g_usage_cache_lock);
4489 4486 g_usage_cache_kickers--;
4490 4487 if (ret != 0 && errno == EINTR) {
4491 4488 (void) mutex_unlock(&g_usage_cache_lock);
4492 4489 zsd_warn(gettext(
4493 4490 "Interrupted before writing usage size to client\n"));
4494 4491 (void) door_return(NULL, 0, NULL, 0);
4495 4492 thr_exit(NULL);
4496 4493 }
4497 4494 cache = zsd_usage_cache_hold_locked();
4498 4495 if (cache == NULL) {
4499 4496 zsd_warn(gettext("Usage cache empty.\n"));
4500 4497 (void) door_return(NULL, 0, NULL, 0);
4501 4498 thr_exit(NULL);
4502 4499 }
4503 4500 (void) mutex_unlock(&g_usage_cache_lock);
4504 4501
4505 4502 /* Copy current usage data to stack to send to client */
4506 4503 usage = (zs_usage_t *)alloca(cache->zsuc_size);
4507 4504
4508 4505 /* Filter out results if caller is non-global zone */
4509 4506 zsd_usage_filter(zoneid, cache, usage, is_gz);
4510 4507
4511 4508 rvalp = (void *)usage;
4512 4509 rvals = usage->zsu_size;
4513 4510 zsd_usage_cache_rele(cache);
4514 4511
4515 4512 (void) door_return(rvalp, rvals, 0, NULL);
4516 4513 thr_exit(NULL);
4517 4514 }
4518 4515
4519 4516 static volatile boolean_t g_quit;
4520 4517
4521 4518 /* ARGSUSED */
4522 4519 static void
4523 4520 zonestat_quithandler(int sig)
4524 4521 {
4525 4522 g_quit = B_TRUE;
4526 4523 }
4527 4524
4528 4525 /*
4529 4526 * The stat thread generates new utilization data when clients request
4530 4527 * it. It also manages opening and closing the subsystems used to gather
4531 4528 * data depending on if clients exist.
4532 4529 */
4533 4530 /* ARGSUSED */
4534 4531 void *
4535 4532 stat_thread(void *arg)
4536 4533 {
4537 4534 time_t start;
4538 4535 time_t now;
4539 4536 time_t next_memory;
4540 4537 boolean_t do_memory;
4541 4538 boolean_t do_read;
4542 4539 boolean_t do_close;
4543 4540
4544 4541 start = time(NULL);
4545 4542 if (start < 0) {
4546 4543 if (g_quit == B_TRUE)
4547 4544 goto quit;
4548 4545 zsd_warn(gettext("Unable to fetch current time"));
4549 4546 g_quit = B_TRUE;
4550 4547 goto quit;
4551 4548 }
4552 4549
4553 4550 next_memory = start;
4554 4551 while (g_quit == B_FALSE) {
4555 4552 for (;;) {
4556 4553 /*
4557 4554 * These are used to decide if the most recent memory
4558 4555 * calculation was within a sample interval,
4559 4556 * and weather or not the usage collection needs to
4560 4557 * be opened or closed.
4561 4558 */
4562 4559 do_memory = B_FALSE;
4563 4560 do_read = B_FALSE;
4564 4561 do_close = B_FALSE;
4565 4562
4566 4563 /*
4567 4564 * If all clients have gone, close usage collecting
4568 4565 */
4569 4566 (void) mutex_lock(&g_usage_cache_lock);
4570 4567 if (!g_hasclient && g_open == B_TRUE) {
4571 4568 do_close = B_TRUE;
4572 4569 (void) mutex_unlock(&g_usage_cache_lock);
4573 4570 break;
4574 4571 }
4575 4572 if (g_quit == B_TRUE) {
4576 4573 (void) mutex_unlock(
4577 4574 &g_usage_cache_lock);
4578 4575 break;
4579 4576 }
4580 4577 /*
4581 4578 * Wait for a usage data request
4582 4579 */
4583 4580 if (g_usage_cache_kickers == 0) {
4584 4581 (void) cond_wait(&g_usage_cache_kick,
4585 4582 &g_usage_cache_lock);
4586 4583 }
4587 4584 now = time(NULL);
4588 4585 if (now < 0) {
4589 4586 if (g_quit == B_TRUE) {
4590 4587 (void) mutex_unlock(
4591 4588 &g_usage_cache_lock);
4592 4589 goto quit;
4593 4590 }
4594 4591 g_quit = B_TRUE;
4595 4592 (void) mutex_unlock(&g_usage_cache_lock);
4596 4593 zsd_warn(gettext(
4597 4594 "Unable to fetch current time"));
4598 4595 goto quit;
4599 4596 }
4600 4597 if (g_hasclient) {
4601 4598 do_read = B_TRUE;
4602 4599 if (now >= next_memory) {
4603 4600 do_memory = B_TRUE;
4604 4601 next_memory = now + g_interval;
4605 4602 }
4606 4603 } else {
4607 4604 do_close = B_TRUE;
4608 4605 }
4609 4606 (void) mutex_unlock(&g_usage_cache_lock);
4610 4607 if (do_read || do_close)
4611 4608 break;
4612 4609 }
4613 4610 g_now = now;
4614 4611 g_hrnow = gethrtime();
4615 4612 if (g_hasclient && g_open == B_FALSE) {
4616 4613 g_start = g_now;
4617 4614 g_hrstart = g_hrnow;
4618 4615 g_ctl = zsd_open(g_ctl);
4619 4616 if (g_ctl == NULL)
4620 4617 zsd_warn(gettext(
4621 4618 "Unable to open zone statistics"));
4622 4619 else
4623 4620 g_open = B_TRUE;
4624 4621 }
4625 4622 if (do_read && g_ctl) {
4626 4623 if (zsd_read(g_ctl, B_FALSE, do_memory) != 0) {
4627 4624 zsd_warn(gettext(
4628 4625 "Unable to read zone statistics"));
4629 4626 g_quit = B_TRUE;
4630 4627 return (NULL);
4631 4628 }
4632 4629 }
4633 4630 (void) mutex_lock(&g_usage_cache_lock);
4634 4631 if (!g_hasclient && g_open == B_TRUE && g_ctl) {
4635 4632 (void) mutex_unlock(&g_usage_cache_lock);
4636 4633 zsd_close(g_ctl);
4637 4634 g_open = B_FALSE;
4638 4635 } else {
4639 4636 (void) mutex_unlock(&g_usage_cache_lock);
4640 4637 }
4641 4638 }
4642 4639 quit:
4643 4640 if (g_open)
4644 4641 zsd_close(g_ctl);
4645 4642
4646 4643 (void) thr_kill(g_main, SIGINT);
4647 4644 thr_exit(NULL);
4648 4645 return (NULL);
4649 4646 }
4650 4647
4651 4648 void
4652 4649 zsd_set_fx()
4653 4650 {
4654 4651 pcinfo_t pcinfo;
4655 4652 pcparms_t pcparms;
4656 4653
4657 4654 (void) strlcpy(pcinfo.pc_clname, "FX", sizeof (pcinfo.pc_clname));
4658 4655 if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) == -1) {
4659 4656 zsd_warn(gettext("cannot get FX class parameters"));
4660 4657 return;
4661 4658 }
4662 4659 pcparms.pc_cid = pcinfo.pc_cid;
4663 4660 ((fxparms_t *)pcparms.pc_clparms)->fx_upri = 60;
4664 4661 ((fxparms_t *)pcparms.pc_clparms)->fx_uprilim = 60;
4665 4662 ((fxparms_t *)pcparms.pc_clparms)->fx_tqsecs = 0;
4666 4663 ((fxparms_t *)pcparms.pc_clparms)->fx_tqnsecs = FX_NOCHANGE;
4667 4664 if (priocntl(P_PID, getpid(), PC_SETPARMS, (caddr_t)&pcparms) == -1)
4668 4665 zsd_warn(gettext("cannot enter the FX class"));
4669 4666 }
4670 4667
4671 4668 static int pipe_fd;
4672 4669
4673 4670 static void
4674 4671 daemonize_ready(char status)
4675 4672 {
4676 4673 /*
4677 4674 * wake the parent with a clue
4678 4675 */
4679 4676 (void) write(pipe_fd, &status, 1);
4680 4677 (void) close(pipe_fd);
4681 4678 }
4682 4679
4683 4680 static int
4684 4681 daemonize_start(void)
4685 4682 {
4686 4683 char data;
4687 4684 int status;
4688 4685
4689 4686 int filedes[2];
4690 4687 pid_t pid;
4691 4688
4692 4689 (void) close(0);
4693 4690 (void) dup2(2, 1);
4694 4691
4695 4692 if (pipe(filedes) < 0)
4696 4693 return (-1);
4697 4694
4698 4695 (void) fflush(NULL);
4699 4696
4700 4697 if ((pid = fork1()) < 0)
4701 4698 return (-1);
4702 4699
4703 4700 if (pid != 0) {
4704 4701 /*
4705 4702 * parent
4706 4703 */
4707 4704 struct sigaction act;
4708 4705
4709 4706 act.sa_sigaction = SIG_DFL;
4710 4707 (void) sigemptyset(&act.sa_mask);
4711 4708 act.sa_flags = 0;
4712 4709
4713 4710 (void) sigaction(SIGPIPE, &act, NULL); /* ignore SIGPIPE */
4714 4711
4715 4712 (void) close(filedes[1]);
4716 4713 if (read(filedes[0], &data, 1) == 1) {
4717 4714 /* forward ready code via exit status */
4718 4715 exit(data);
4719 4716 }
4720 4717 status = -1;
4721 4718 (void) wait4(pid, &status, 0, NULL);
4722 4719 /* daemon process exited before becoming ready */
4723 4720 if (WIFEXITED(status)) {
4724 4721 /* assume daemon process printed useful message */
4725 4722 exit(WEXITSTATUS(status));
4726 4723 } else {
4727 4724 zsd_warn(gettext("daemon process killed or died"));
4728 4725 exit(1);
4729 4726 }
4730 4727 }
4731 4728
4732 4729 /*
4733 4730 * child
4734 4731 */
4735 4732 pipe_fd = filedes[1];
4736 4733 (void) close(filedes[0]);
4737 4734
4738 4735 /*
4739 4736 * generic Unix setup
4740 4737 */
4741 4738 (void) setsid();
4742 4739 (void) umask(0000);
4743 4740
4744 4741 return (0);
4745 4742 }
4746 4743
4747 4744 static void
4748 4745 fattach_all_zones(boolean_t detach_only)
4749 4746 {
4750 4747 zoneid_t *zids;
4751 4748 uint_t nzids, nzids_last;
4752 4749 int i;
4753 4750
4754 4751 again:
4755 4752 (void) zone_list(NULL, &nzids);
4756 4753 nzids_last = nzids;
4757 4754 zids = (zoneid_t *)malloc(sizeof (zoneid_t) * nzids_last);
4758 4755 if (zids == NULL)
4759 4756 zsd_error(gettext("Out of memory"));
4760 4757
4761 4758 (void) zone_list(zids, &nzids);
4762 4759 if (nzids > nzids_last) {
4763 4760 free(zids);
4764 4761 goto again;
4765 4762 }
4766 4763 for (i = 0; i < nzids; i++)
4767 4764 zsd_fattach_zone(zids[i], g_server_door, detach_only);
4768 4765
4769 4766 free(zids);
4770 4767 }
4771 4768
4772 4769 int
4773 4770 main(int argc, char *argv[])
4774 4771 {
4775 4772
4776 4773 int arg;
4777 4774 thread_t tid;
4778 4775 scf_simple_prop_t *prop;
4779 4776 uint64_t *intervalp;
4780 4777 boolean_t opt_cleanup = B_FALSE;
4781 4778
4782 4779 g_main = thr_self();
4783 4780 g_quit = B_FALSE;
4784 4781 (void) signal(SIGINT, zonestat_quithandler);
4785 4782 (void) signal(SIGTERM, zonestat_quithandler);
4786 4783 (void) signal(SIGHUP, zonestat_quithandler);
4787 4784 /* (void) sigignore(SIGCHLD); */
4788 4785 (void) sigignore(SIGPIPE);
4789 4786
4790 4787 if (getzoneid() != GLOBAL_ZONEID)
4791 4788 zsd_error(gettext("Must be run from global zone only"));
4792 4789
4793 4790 while ((arg = getopt(argc, argv, "c"))
4794 4791 != EOF) {
4795 4792 switch (arg) {
4796 4793 case 'c':
4797 4794 opt_cleanup = B_TRUE;
4798 4795 break;
4799 4796 default:
4800 4797 zsd_error(gettext("Invalid option"));
4801 4798 }
4802 4799 }
4803 4800
4804 4801 if (opt_cleanup) {
4805 4802 if (zsd_disable_cpu_stats() != 0)
4806 4803 exit(1);
4807 4804 else
4808 4805 exit(0);
4809 4806 }
4810 4807
4811 4808 /* Get the configured sample interval */
4812 4809 prop = scf_simple_prop_get(NULL, "svc:/system/zones-monitoring:default",
4813 4810 "config", "sample_interval");
4814 4811 if (prop == NULL)
4815 4812 zsd_error(gettext("Unable to fetch SMF property "
4816 4813 "\"config/sample_interval\""));
4817 4814
4818 4815 if (scf_simple_prop_type(prop) != SCF_TYPE_COUNT)
4819 4816 zsd_error(gettext("Malformed SMF property "
4820 4817 "\"config/sample_interval\". Must be of type \"count\""));
4821 4818
4822 4819 intervalp = scf_simple_prop_next_count(prop);
4823 4820 g_interval = *intervalp;
4824 4821 if (g_interval == 0)
4825 4822 zsd_error(gettext("Malformed SMF property "
4826 4823 "\"config/sample_interval\". Must be greater than zero"));
4827 4824
4828 4825 scf_simple_prop_free(prop);
4829 4826
4830 4827 if (daemonize_start() < 0)
4831 4828 zsd_error(gettext("Unable to start daemon\n"));
4832 4829
4833 4830 /* Run at high priority */
4834 4831 zsd_set_fx();
4835 4832
4836 4833 (void) mutex_init(&g_usage_cache_lock, USYNC_THREAD, NULL);
4837 4834 (void) cond_init(&g_usage_cache_kick, USYNC_THREAD, NULL);
4838 4835 (void) cond_init(&g_usage_cache_wait, USYNC_THREAD, NULL);
4839 4836
4840 4837 g_server_door = door_create(zsd_server, NULL,
4841 4838 DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4842 4839 if (g_server_door < 0)
4843 4840 zsd_error(gettext("Unable to create server door\n"));
4844 4841
4845 4842
4846 4843 g_stat_door = door_create(zsd_stat_server, NULL, DOOR_UNREF_MULTI |
4847 4844 DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
4848 4845 if (g_stat_door < 0)
4849 4846 zsd_error(gettext("Unable to create statistics door\n"));
4850 4847
4851 4848 fattach_all_zones(B_FALSE);
4852 4849
4853 4850 if (thr_create(NULL, 0, stat_thread, NULL, 0, &tid) != 0)
4854 4851 zsd_error(gettext("Unable to create statistics thread\n"));
4855 4852
4856 4853 daemonize_ready(0);
4857 4854
4858 4855 /* Wait for signal to quit */
4859 4856 while (g_quit == B_FALSE)
4860 4857 (void) pause();
4861 4858
4862 4859 /* detach doors */
4863 4860 fattach_all_zones(B_TRUE);
4864 4861
4865 4862 (void) door_revoke(g_server_door);
4866 4863 (void) door_revoke(g_stat_door);
4867 4864
4868 4865 /* kick stat thread and wait for it to close the statistics */
4869 4866 (void) mutex_lock(&g_usage_cache_lock);
4870 4867 g_quit = B_TRUE;
4871 4868 (void) cond_signal(&g_usage_cache_kick);
4872 4869 (void) mutex_unlock(&g_usage_cache_lock);
4873 4870 end:
4874 4871 (void) thr_join(tid, NULL, NULL);
4875 4872 return (0);
4876 4873 }
|
↓ open down ↓ |
2522 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX