Print this page
Reduce lint
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zoneadmd/zoneadmd.c
+++ new/usr/src/cmd/zoneadmd/zoneadmd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 30 * non-global zone on the system. This daemon juggles four jobs:
31 31 *
32 32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 33 * unmount filesystems; create and destroy network interfaces; communicate
34 34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 35 * console device; configure process runtime attributes such as resource
36 36 * controls, pool bindings, fine-grained privileges.
37 37 *
38 38 * - Launch the zone's init(1M) process.
39 39 *
40 40 * - Implement a door server; clients (like zoneadm) connect to the door
41 41 * server and request zone state changes. The kernel is also a client of
42 42 * this door server. A request to halt or reboot the zone which originates
43 43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 44 *
45 45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 46 * back to the zoneadm process making the request. These messages need to
47 47 * be rendered in the client's locale; so, this is passed in as part of the
48 48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 49 * messages are syslog'd.
50 50 *
51 51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 53 * strings which do not need to be translated.
54 54 *
55 55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 56 * for more information about the zone console architecture.
57 57 *
58 58 * DESIGN NOTES
59 59 *
60 60 * Restart:
61 61 * A chief design constraint of zoneadmd is that it should be restartable in
62 62 * the case that the administrator kills it off, or it suffers a fatal error,
63 63 * without the running zone being impacted; this is akin to being able to
64 64 * reboot the service processor of a server without affecting the OS instance.
65 65 */
66 66
67 67 #include <sys/param.h>
68 68 #include <sys/mman.h>
69 69 #include <sys/types.h>
70 70 #include <sys/stat.h>
71 71 #include <sys/sysmacros.h>
72 72 #include <sys/time.h>
73 73
74 74 #include <bsm/adt.h>
75 75 #include <bsm/adt_event.h>
76 76
77 77 #include <alloca.h>
78 78 #include <assert.h>
79 79 #include <errno.h>
80 80 #include <door.h>
81 81 #include <fcntl.h>
82 82 #include <locale.h>
83 83 #include <signal.h>
84 84 #include <stdarg.h>
85 85 #include <stdio.h>
86 86 #include <stdlib.h>
87 87 #include <string.h>
88 88 #include <strings.h>
89 89 #include <synch.h>
90 90 #include <syslog.h>
91 91 #include <thread.h>
92 92 #include <unistd.h>
93 93 #include <wait.h>
94 94 #include <limits.h>
95 95 #include <zone.h>
96 96 #include <libbrand.h>
97 97 #include <sys/brand.h>
98 98 #include <libcontract.h>
99 99 #include <libcontract_priv.h>
100 100 #include <sys/brand.h>
101 101 #include <sys/contract/process.h>
102 102 #include <sys/ctfs.h>
103 103 #include <libdladm.h>
104 104 #include <sys/dls_mgmt.h>
105 105 #include <libscf.h>
106 106
107 107 #include <libzonecfg.h>
108 108 #include <zonestat_impl.h>
109 109 #include "zoneadmd.h"
110 110
111 111 static char *progname;
112 112 char *zone_name; /* zone which we are managing */
113 113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 114 char zonepath[MAXNAMELEN];
115 115 char pool_name[MAXNAMELEN];
116 116 char default_brand[MAXNAMELEN];
117 117 char brand_name[MAXNAMELEN];
118 118 boolean_t zone_isnative;
119 119 boolean_t zone_iscluster;
120 120 boolean_t zone_islabeled;
121 121 boolean_t shutdown_in_progress;
122 122 static zoneid_t zone_id;
123 123 dladm_handle_t dld_handle = NULL;
124 124
125 125 static char pre_statechg_hook[2 * MAXPATHLEN];
126 126 static char post_statechg_hook[2 * MAXPATHLEN];
127 127 char query_hook[2 * MAXPATHLEN];
128 128
129 129 zlog_t logsys;
130 130
131 131 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
132 132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
133 133
134 134 static sema_t scratch_sem; /* for scratch zones */
135 135
136 136 static char zone_door_path[MAXPATHLEN];
137 137 static int zone_door = -1;
138 138
139 139 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
140 140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
141 141
142 142 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
143 143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
144 144 #endif
145 145
146 146 #define DEFAULT_LOCALE "C"
147 147
148 148 static const char *
149 149 z_cmd_name(zone_cmd_t zcmd)
150 150 {
151 151 /* This list needs to match the enum in sys/zone.h */
152 152 static const char *zcmdstr[] = {
153 153 "ready", "boot", "forceboot", "reboot", "halt",
154 154 "note_uninstalling", "mount", "forcemount", "unmount",
155 155 "shutdown"
156 156 };
157 157
158 158 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
159 159 return ("unknown");
160 160 else
161 161 return (zcmdstr[(int)zcmd]);
162 162 }
163 163
164 164 static char *
165 165 get_execbasename(char *execfullname)
166 166 {
167 167 char *last_slash, *execbasename;
168 168
169 169 /* guard against '/' at end of command invocation */
170 170 for (;;) {
171 171 last_slash = strrchr(execfullname, '/');
172 172 if (last_slash == NULL) {
173 173 execbasename = execfullname;
174 174 break;
175 175 } else {
176 176 execbasename = last_slash + 1;
177 177 if (*execbasename == '\0') {
178 178 *last_slash = '\0';
179 179 continue;
180 180 }
181 181 break;
182 182 }
183 183 }
184 184 return (execbasename);
185 185 }
186 186
187 187 static void
188 188 usage(void)
189 189 {
190 190 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
191 191 (void) fprintf(stderr,
192 192 gettext("\tNote: %s should not be run directly.\n"), progname);
193 193 exit(2);
194 194 }
195 195
196 196 /* ARGSUSED */
197 197 static void
198 198 sigchld(int sig)
199 199 {
200 200 }
201 201
202 202 char *
203 203 localize_msg(char *locale, const char *msg)
204 204 {
205 205 char *out;
206 206
207 207 (void) mutex_lock(&msglock);
208 208 (void) setlocale(LC_MESSAGES, locale);
209 209 out = gettext(msg);
210 210 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
211 211 (void) mutex_unlock(&msglock);
212 212 return (out);
213 213 }
214 214
215 215 /* PRINTFLIKE3 */
216 216 void
217 217 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
218 218 {
219 219 va_list alist;
220 220 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
221 221 char *bp;
222 222 int saved_errno = errno;
223 223
224 224 if (zlogp == NULL)
225 225 return;
226 226 if (zlogp == &logsys)
227 227 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
228 228 zone_name);
229 229 else
230 230 buf[0] = '\0';
231 231 bp = &(buf[strlen(buf)]);
232 232
233 233 /*
234 234 * In theory, the locale pointer should be set to either "C" or a
235 235 * char array, so it should never be NULL
236 236 */
237 237 assert(zlogp->locale != NULL);
238 238 /* Locale is per process, but we are multi-threaded... */
239 239 fmt = localize_msg(zlogp->locale, fmt);
240 240
241 241 va_start(alist, fmt);
242 242 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
243 243 va_end(alist);
244 244 bp = &(buf[strlen(buf)]);
245 245 if (use_strerror)
246 246 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
247 247 strerror(saved_errno));
248 248 if (zlogp == &logsys) {
249 249 (void) syslog(LOG_ERR, "%s", buf);
250 250 } else if (zlogp->logfile != NULL) {
251 251 (void) fprintf(zlogp->logfile, "%s\n", buf);
252 252 } else {
253 253 size_t buflen;
254 254 size_t copylen;
255 255
256 256 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
257 257 copylen = MIN(buflen, zlogp->loglen);
258 258 zlogp->log += copylen;
259 259 zlogp->loglen -= copylen;
260 260 }
261 261 }
262 262
263 263 /*
264 264 * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
265 265 * put the arguments into an argv style array, use getopt to process them,
266 266 * and put the resultant argument string back into outargs. Non-Solaris brands
267 267 * may support alternate forms of boot arguments so we must handle that as well.
268 268 *
269 269 * During the filtering, we pull out any arguments which are truly "boot"
270 270 * arguments, leaving only those which are to be passed intact to the
271 271 * progenitor process. The one we support at the moment is -i, which
272 272 * indicates to the kernel which program should be launched as 'init'.
273 273 *
274 274 * Except for Z_OK, all other return values are treated as fatal.
275 275 */
276 276 static int
277 277 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
278 278 char *init_file)
279 279 {
280 280 int argc = 0, argc_save;
281 281 int i;
282 282 int err;
283 283 char *arg, *lasts, **argv = NULL, **argv_save;
284 284 char zonecfg_args[BOOTARGS_MAX];
285 285 char scratchargs[BOOTARGS_MAX], *sargs;
286 286 char c;
287 287
288 288 bzero(outargs, BOOTARGS_MAX);
289 289
290 290 /*
291 291 * If the user didn't specify transient boot arguments, check
292 292 * to see if there were any specified in the zone configuration,
293 293 * and use them if applicable.
294 294 */
295 295 if (inargs == NULL || inargs[0] == '\0') {
296 296 zone_dochandle_t handle;
297 297 if ((handle = zonecfg_init_handle()) == NULL) {
298 298 zerror(zlogp, B_TRUE,
299 299 "getting zone configuration handle");
300 300 return (Z_BAD_HANDLE);
301 301 }
302 302 err = zonecfg_get_snapshot_handle(zone_name, handle);
303 303 if (err != Z_OK) {
304 304 zerror(zlogp, B_FALSE,
305 305 "invalid configuration snapshot");
306 306 zonecfg_fini_handle(handle);
307 307 return (Z_BAD_HANDLE);
308 308 }
309 309
310 310 bzero(zonecfg_args, sizeof (zonecfg_args));
311 311 (void) zonecfg_get_bootargs(handle, zonecfg_args,
312 312 sizeof (zonecfg_args));
313 313 inargs = zonecfg_args;
314 314 zonecfg_fini_handle(handle);
315 315 }
316 316
317 317 if (strlen(inargs) >= BOOTARGS_MAX) {
318 318 zerror(zlogp, B_FALSE, "boot argument string too long");
319 319 return (Z_INVAL);
320 320 }
321 321
322 322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
323 323 sargs = scratchargs;
324 324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
325 325 sargs = NULL;
326 326 argc++;
327 327 }
328 328
329 329 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
330 330 zerror(zlogp, B_FALSE, "memory allocation failed");
331 331 return (Z_NOMEM);
332 332 }
333 333
334 334 argv_save = argv;
335 335 argc_save = argc;
336 336
337 337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 338 sargs = scratchargs;
339 339 i = 0;
340 340 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
341 341 sargs = NULL;
342 342 if ((argv[i] = strdup(arg)) == NULL) {
343 343 err = Z_NOMEM;
344 344 zerror(zlogp, B_FALSE, "memory allocation failed");
345 345 goto done;
346 346 }
347 347 i++;
348 348 }
349 349
350 350 /*
351 351 * We preserve compatibility with the illumos system boot behavior,
352 352 * which allows:
353 353 *
354 354 * # reboot kernel/unix -s -m verbose
355 355 *
356 356 * In this example, kernel/unix tells the booter what file to boot. The
357 357 * original intent of this was that we didn't want reboot in a zone to
358 358 * be gratuitously different, so we would silently ignore the boot
359 359 * file, if necessary. However, this usage is archaic and has never
360 360 * been common, since it is impossible to boot a zone onto a different
361 361 * kernel. Ignoring the first argument breaks for non-native brands
362 362 * which pass boot arguments in a different style. e.g.
363 363 * systemd.log_level=debug
364 364 * Thus, for backward compatibility we only ignore the first argument
365 365 * if it appears to be in the illumos form and attempting to specify a
366 366 * kernel.
367 367 */
368 368 if (argv[0] == NULL)
369 369 goto done;
370 370
371 371 assert(argv[0][0] != ' ');
372 372 assert(argv[0][0] != '\t');
373 373
374 374 if (strncmp(argv[0], "kernel/", 7) == 0) {
375 375 argv = &argv[1];
376 376 argc--;
377 377 }
378 378
379 379 optind = 0;
380 380 opterr = 0;
381 381 err = Z_OK;
382 382 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
383 383 switch (c) {
384 384 case 'i':
385 385 /*
386 386 * -i is handled by the runtime and is not passed
387 387 * along to userland
388 388 */
389 389 (void) strlcpy(init_file, optarg, MAXPATHLEN);
390 390 break;
391 391 case 'f':
392 392 /* This has already been processed by zoneadm */
393 393 break;
394 394 case 'm':
395 395 case 's':
396 396 /* These pass through unmolested */
397 397 (void) snprintf(outargs, BOOTARGS_MAX,
398 398 "%s -%c %s ", outargs, c, optarg ? optarg : "");
399 399 break;
400 400 case '?':
401 401 /*
402 402 * If a brand has its own init, we need to pass along
403 403 * whatever the user provides. We use the entire
404 404 * unknown string here so that we correctly handle
405 405 * unknown long options (e.g. --debug).
406 406 */
407 407 (void) snprintf(outargs, BOOTARGS_MAX,
408 408 "%s %s", outargs, argv[optind - 1]);
409 409 break;
410 410 }
411 411 }
412 412
413 413 /*
414 414 * We need to pass along everything else since we don't know what
415 415 * the brand's init is expecting. For example, an argument list like:
416 416 * --confdir /foo --debug
417 417 * will cause the getopt parsing to stop at '/foo' but we need to pass
418 418 * that on, along with the '--debug'. This does mean that we require
419 419 * any of our known options (-ifms) to preceed the brand-specific ones.
420 420 */
421 421 while (optind < argc) {
422 422 (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
423 423 argv[optind]);
424 424 optind++;
425 425 }
426 426
427 427 done:
428 428 for (i = 0; i < argc_save; i++) {
429 429 if (argv_save[i] != NULL)
430 430 free(argv_save[i]);
431 431 }
432 432 free(argv_save);
433 433 return (err);
434 434 }
435 435
436 436
437 437 static int
438 438 mkzonedir(zlog_t *zlogp)
439 439 {
440 440 struct stat st;
441 441 /*
442 442 * We must create and lock everyone but root out of ZONES_TMPDIR
443 443 * since anyone can open any UNIX domain socket, regardless of
444 444 * its file system permissions. Sigh...
445 445 */
446 446 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
447 447 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
448 448 return (-1);
449 449 }
450 450 /* paranoia */
451 451 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
452 452 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
453 453 return (-1);
454 454 }
455 455 (void) chmod(ZONES_TMPDIR, S_IRWXU);
456 456 return (0);
457 457 }
458 458
459 459 /*
460 460 * Run the brand's pre-state change callback, if it exists.
461 461 */
462 462 static int
463 463 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
464 464 {
465 465 char cmdbuf[2 * MAXPATHLEN];
466 466 const char *altroot;
467 467
468 468 if (pre_statechg_hook[0] == '\0')
469 469 return (0);
470 470
471 471 altroot = zonecfg_get_root();
472 472 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
473 473 state, cmd, altroot) > sizeof (cmdbuf))
474 474 return (-1);
475 475
476 476 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
477 477 return (-1);
478 478
479 479 return (0);
480 480 }
481 481
482 482 /*
483 483 * Run the brand's post-state change callback, if it exists.
484 484 */
485 485 static int
486 486 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
487 487 {
488 488 char cmdbuf[2 * MAXPATHLEN];
489 489 const char *altroot;
490 490
491 491 if (post_statechg_hook[0] == '\0')
492 492 return (0);
493 493
494 494 altroot = zonecfg_get_root();
495 495 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
496 496 state, cmd, altroot) > sizeof (cmdbuf))
497 497 return (-1);
498 498
499 499 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
500 500 return (-1);
501 501
502 502 return (0);
503 503 }
504 504
505 505 /*
506 506 * Notify zonestatd of the new zone. If zonestatd is not running, this
507 507 * will do nothing.
508 508 */
509 509 static void
510 510 notify_zonestatd(zoneid_t zoneid)
511 511 {
512 512 int cmd[2];
513 513 int fd;
514 514 door_arg_t params;
515 515
516 516 fd = open(ZS_DOOR_PATH, O_RDONLY);
517 517 if (fd < 0)
518 518 return;
519 519
520 520 cmd[0] = ZSD_CMD_NEW_ZONE;
521 521 cmd[1] = zoneid;
522 522 params.data_ptr = (char *)&cmd;
523 523 params.data_size = sizeof (cmd);
524 524 params.desc_ptr = NULL;
525 525 params.desc_num = 0;
526 526 params.rbuf = NULL;
527 527 params.rsize = NULL;
528 528 (void) door_call(fd, ¶ms);
529 529 (void) close(fd);
530 530 }
531 531
532 532 /*
533 533 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
534 534 * 'true' if this is being invoked as part of the processing for the "mount"
535 535 * subcommand.
536 536 */
537 537 static int
538 538 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
539 539 {
540 540 int err;
541 541
542 542 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
543 543 return (-1);
544 544
545 545 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
546 546 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
547 547 zonecfg_strerror(err));
548 548 goto bad;
549 549 }
550 550
551 551 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
552 552 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
553 553 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
554 554 zonecfg_strerror(err));
555 555 goto bad;
556 556 }
557 557 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
558 558 bringup_failure_recovery = B_TRUE;
559 559 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
560 560 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
561 561 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
562 562 zonecfg_strerror(err));
563 563 goto bad;
564 564 }
565 565
566 566 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
567 567 goto bad;
568 568
569 569 return (0);
570 570
571 571 bad:
572 572 /*
573 573 * If something goes wrong, we up the zones's state to the target
574 574 * state, READY, and then invoke the hook as if we're halting.
575 575 */
576 576 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
577 577 return (-1);
578 578 }
579 579
580 580 int
581 581 init_template(void)
582 582 {
583 583 int fd;
584 584 int err = 0;
585 585
586 586 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
587 587 if (fd == -1)
588 588 return (-1);
589 589
590 590 /*
591 591 * For now, zoneadmd doesn't do anything with the contract.
592 592 * Deliver no events, don't inherit, and allow it to be orphaned.
593 593 */
594 594 err |= ct_tmpl_set_critical(fd, 0);
595 595 err |= ct_tmpl_set_informative(fd, 0);
596 596 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
597 597 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
598 598 if (err || ct_tmpl_activate(fd)) {
599 599 (void) close(fd);
600 600 return (-1);
601 601 }
602 602
603 603 return (fd);
604 604 }
605 605
606 606 typedef struct fs_callback {
607 607 zlog_t *zlogp;
608 608 zoneid_t zoneid;
609 609 boolean_t mount_cmd;
610 610 } fs_callback_t;
611 611
612 612 static int
613 613 mount_early_fs(void *data, const char *spec, const char *dir,
614 614 const char *fstype, const char *opt)
615 615 {
616 616 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
617 617 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
618 618 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
619 619 char rootpath[MAXPATHLEN];
620 620 pid_t child;
621 621 int child_status;
622 622 int tmpl_fd;
623 623 int rv;
624 624 ctid_t ct;
625 625
626 626 /* determine the zone rootpath */
627 627 if (mount_cmd) {
628 628 char luroot[MAXPATHLEN];
629 629
630 630 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
631 631 resolve_lofs(zlogp, luroot, sizeof (luroot));
632 632 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
633 633 } else {
634 634 if (zone_get_rootpath(zone_name,
635 635 rootpath, sizeof (rootpath)) != Z_OK) {
636 636 zerror(zlogp, B_FALSE, "unable to determine zone root");
637 637 return (-1);
638 638 }
639 639 }
640 640
641 641 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
642 642 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
643 643 rootpath, dir);
644 644 return (-1);
645 645 } else if (rv > 0) {
646 646 /* The mount point path doesn't exist, create it now. */
647 647 if (make_one_dir(zlogp, rootpath, dir,
648 648 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
649 649 DEFAULT_DIR_GROUP) != 0) {
650 650 zerror(zlogp, B_FALSE, "failed to create mount point");
651 651 return (-1);
652 652 }
653 653
654 654 /*
655 655 * Now this might seem weird, but we need to invoke
656 656 * valid_mount_path() again. Why? Because it checks
657 657 * to make sure that the mount point path is canonical,
658 658 * which it can only do if the path exists, so now that
659 659 * we've created the path we have to verify it again.
660 660 */
661 661 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
662 662 fstype)) < 0) {
663 663 zerror(zlogp, B_FALSE,
664 664 "%s%s is not a valid mount point", rootpath, dir);
665 665 return (-1);
666 666 }
667 667 }
668 668
669 669 if ((tmpl_fd = init_template()) == -1) {
670 670 zerror(zlogp, B_TRUE, "failed to create contract");
671 671 return (-1);
672 672 }
673 673
674 674 if ((child = fork()) == -1) {
675 675 (void) ct_tmpl_clear(tmpl_fd);
676 676 (void) close(tmpl_fd);
677 677 zerror(zlogp, B_TRUE, "failed to fork");
678 678 return (-1);
679 679
680 680 } else if (child == 0) { /* child */
681 681 char opt_buf[MAX_MNTOPT_STR];
682 682 int optlen = 0;
683 683 int mflag = MS_DATA;
684 684
685 685 (void) ct_tmpl_clear(tmpl_fd);
686 686 /*
687 687 * Even though there are no procs running in the zone, we
688 688 * do this for paranoia's sake.
689 689 */
690 690 (void) closefrom(0);
691 691
692 692 if (zone_enter(zoneid) == -1) {
693 693 _exit(errno);
694 694 }
695 695 if (opt != NULL) {
696 696 /*
697 697 * The mount() system call is incredibly annoying.
698 698 * If options are specified, we need to copy them
699 699 * into a temporary buffer since the mount() system
700 700 * call will overwrite the options string. It will
701 701 * also fail if the new option string it wants to
702 702 * write is bigger than the one we passed in, so
703 703 * you must pass in a buffer of the maximum possible
704 704 * option string length. sigh.
705 705 */
706 706 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
707 707 opt = opt_buf;
708 708 optlen = MAX_MNTOPT_STR;
709 709 mflag = MS_OPTIONSTR;
710 710 }
711 711 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
712 712 _exit(errno);
713 713 _exit(0);
714 714 }
715 715
716 716 /* parent */
717 717 if (contract_latest(&ct) == -1)
718 718 ct = -1;
719 719 (void) ct_tmpl_clear(tmpl_fd);
720 720 (void) close(tmpl_fd);
721 721 if (waitpid(child, &child_status, 0) != child) {
722 722 /* unexpected: we must have been signalled */
723 723 (void) contract_abandon_id(ct);
724 724 return (-1);
725 725 }
726 726 (void) contract_abandon_id(ct);
727 727 if (WEXITSTATUS(child_status) != 0) {
728 728 errno = WEXITSTATUS(child_status);
729 729 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
730 730 return (-1);
731 731 }
732 732
733 733 return (0);
734 734 }
735 735
736 736 /*
737 737 * If retstr is not NULL, the output of the subproc is returned in the str,
738 738 * otherwise it is output using zerror(). Any memory allocated for retstr
739 739 * should be freed by the caller.
740 740 */
741 741 int
742 742 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
743 743 {
744 744 char buf[1024]; /* arbitrary large amount */
745 745 char *inbuf;
746 746 FILE *file;
747 747 int status;
748 748 int rd_cnt;
749 749
750 750 if (retstr != NULL) {
751 751 if ((*retstr = malloc(1024)) == NULL) {
752 752 zerror(zlogp, B_FALSE, "out of memory");
753 753 return (-1);
754 754 }
755 755 inbuf = *retstr;
756 756 rd_cnt = 0;
757 757 } else {
758 758 inbuf = buf;
759 759 }
760 760
761 761 file = popen(cmdbuf, "r");
762 762 if (file == NULL) {
763 763 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
764 764 return (-1);
765 765 }
766 766
767 767 while (fgets(inbuf, 1024, file) != NULL) {
768 768 if (retstr == NULL) {
769 769 if (zlogp != &logsys)
770 770 zerror(zlogp, B_FALSE, "%s", inbuf);
771 771 } else {
772 772 char *p;
773 773
774 774 rd_cnt += 1024 - 1;
775 775 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
776 776 zerror(zlogp, B_FALSE, "out of memory");
777 777 (void) pclose(file);
778 778 return (-1);
779 779 }
780 780
781 781 *retstr = p;
782 782 inbuf = *retstr + rd_cnt;
783 783 }
784 784 }
785 785 status = pclose(file);
786 786
787 787 if (WIFSIGNALED(status)) {
788 788 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
789 789 "signal %d", cmdbuf, WTERMSIG(status));
790 790 return (-1);
791 791 }
792 792 assert(WIFEXITED(status));
793 793 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
794 794 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
795 795 return (-1);
796 796 }
797 797 return (WEXITSTATUS(status));
798 798 }
799 799
800 800 #if 0 /* XXX KEBE SAYS not yet */
801 801 /*
802 802 * Get the path for this zone's init(1M) (or equivalent) process. First look
803 803 * for a zone-specific init-name attr, then get it from the brand.
804 804 */
805 805 static int
806 806 get_initname(brand_handle_t bh, char *initname, int len)
807 807 {
808 808 struct zone_attrtab a;
809 809
810 810 bzero(&a, sizeof (a));
811 811 (void) strlcpy(a.zone_attr_name, "init-name",
812 812 sizeof (a.zone_attr_name));
813 813
814 814 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
815 815 (void) strlcpy(initname, a.zone_attr_value, len);
816 816 return (0);
817 817 }
818 818
819 819 return (brand_get_initname(bh, initname, len));
820 820 }
821 821
822 822 /*
823 823 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
824 824 * First look for a zone-specific restart-init attr, then get it from the brand.
825 825 */
826 826 static boolean_t
827 827 restartinit(brand_handle_t bh)
828 828 {
829 829 struct zone_attrtab a;
830 830
831 831 bzero(&a, sizeof (a));
832 832 (void) strlcpy(a.zone_attr_name, "restart-init",
833 833 sizeof (a.zone_attr_name));
834 834
835 835 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
836 836 if (strcmp(a.zone_attr_value, "false") == 0)
837 837 return (B_FALSE);
838 838 return (B_TRUE);
839 839 }
840 840
|
↓ open down ↓ |
840 lines elided |
↑ open up ↑ |
841 841 return (brand_restartinit(bh));
842 842 }
843 843 #endif /* XXX KEBE */
844 844
845 845 /*
846 846 * Get the app-svc-dependent flag for this zone's init process. This is a
847 847 * zone-specific attr which controls the type of contract we create for the
848 848 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
849 849 * set, so that when any service which is in the same contract exits, the init
850 850 * application will be terminated.
851 + *
852 + * We use the global "snap_hndl", so no parameters get passed here.
851 853 */
852 854 static boolean_t
853 -is_app_svc_dep(brand_handle_t bh)
855 +is_app_svc_dep(void)
854 856 {
855 857 struct zone_attrtab a;
856 858
857 859 bzero(&a, sizeof (a));
858 860 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
859 861 sizeof (a.zone_attr_name));
860 862
861 863 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
862 864 strcmp(a.zone_attr_value, "true") == 0) {
863 865 return (B_TRUE);
864 866 }
865 867
866 868 return (B_FALSE);
867 869 }
868 870
869 871 static int
870 872 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
871 873 {
872 874 zoneid_t zoneid;
873 875 struct stat st;
874 876 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
875 877 char nbootargs[BOOTARGS_MAX];
876 878 char cmdbuf[MAXPATHLEN];
877 879 fs_callback_t cb;
878 880 brand_handle_t bh;
879 881 zone_iptype_t iptype;
880 882 dladm_status_t status;
881 883 char errmsg[DLADM_STRSIZE];
882 884 int err;
883 885 boolean_t restart_init;
884 886 boolean_t app_svc_dep;
885 887
886 888 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
887 889 return (-1);
888 890
889 891 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
890 892 zerror(zlogp, B_TRUE, "unable to get zoneid");
891 893 goto bad;
892 894 }
893 895
894 896 cb.zlogp = zlogp;
895 897 cb.zoneid = zoneid;
896 898 cb.mount_cmd = B_FALSE;
897 899
898 900 /* Get a handle to the brand info for this zone */
899 901 if ((bh = brand_open(brand_name)) == NULL) {
900 902 zerror(zlogp, B_FALSE, "unable to determine zone brand");
901 903 goto bad;
902 904 }
903 905
904 906 /*
905 907 * Get the list of filesystems to mount from the brand
906 908 * configuration. These mounts are done via a thread that will
907 909 * enter the zone, so they are done from within the context of the
908 910 * zone.
909 911 */
910 912 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
911 913 zerror(zlogp, B_FALSE, "unable to mount filesystems");
912 914 brand_close(bh);
913 915 goto bad;
914 916 }
915 917
916 918 /*
917 919 * Get the brand's boot callback if it exists.
918 920 */
919 921 (void) strcpy(cmdbuf, EXEC_PREFIX);
920 922 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
921 923 sizeof (cmdbuf) - EXEC_LEN) != 0) {
922 924 zerror(zlogp, B_FALSE,
923 925 "unable to determine branded zone's boot callback");
924 926 brand_close(bh);
925 927 goto bad;
926 928 }
927 929
928 930 /* Get the path for this zone's init(1M) (or equivalent) process. */
929 931 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
930 932 zerror(zlogp, B_FALSE,
931 933 "unable to determine zone's init(1M) location");
932 934 brand_close(bh);
|
↓ open down ↓ |
69 lines elided |
↑ open up ↑ |
933 935 goto bad;
934 936 }
935 937
936 938 /* See if this zone's brand should restart init if it dies. */
937 939 restart_init = brand_restartinit(bh);
938 940
939 941 /*
940 942 * See if we need to setup contract dependencies between the zone's
941 943 * primary application and any of its services.
942 944 */
943 - app_svc_dep = is_app_svc_dep(bh);
945 + app_svc_dep = is_app_svc_dep();
944 946
945 947 brand_close(bh);
946 948
947 949 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
948 950 if (err != Z_OK)
949 951 goto bad;
950 952
951 953 assert(init_file[0] != '\0');
952 954
953 955 /*
954 956 * Try to anticipate possible problems: If possible, make sure init is
955 957 * executable.
956 958 */
957 959 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
958 960 zerror(zlogp, B_FALSE, "unable to determine zone root");
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
959 961 goto bad;
960 962 }
961 963
962 964 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
963 965
964 966 if (lstat(initpath, &st) == -1) {
965 967 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
966 968 goto bad;
967 969 }
968 970
969 - if ((st.st_mode & S_IFMT) == S_IFLNK) {
970 - /* symlink, we'll have to wait and resolve when we boot */
971 - } else if ((st.st_mode & S_IXUSR) == 0) {
971 + /*
972 + * If a symlink, we'll have to wait and resolve when we boot,
973 + * otherwise check the executable bits now.
974 + */
975 + if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
972 976 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
973 977 goto bad;
974 978 }
975 979
976 980 /*
977 981 * Exclusive stack zones interact with the dlmgmtd running in the
978 982 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
979 983 * booting, and loads its datalinks from the zone's datalink
980 984 * configuration file.
981 985 */
982 986 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
983 987 status = dladm_zone_boot(dld_handle, zoneid);
984 988 if (status != DLADM_STATUS_OK) {
985 989 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
986 990 " %s", dladm_status2str(status, errmsg));
987 991 goto bad;
988 992 }
989 993 }
990 994
991 995 /*
992 996 * If there is a brand 'boot' callback, execute it now to give the
993 997 * brand one last chance to do any additional setup before the zone
994 998 * is booted.
995 999 */
996 1000 if ((strlen(cmdbuf) > EXEC_LEN) &&
997 1001 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
998 1002 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
999 1003 goto bad;
1000 1004 }
1001 1005
1002 1006 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1003 1007 zerror(zlogp, B_TRUE, "could not set zone boot file");
1004 1008 goto bad;
1005 1009 }
1006 1010
1007 1011 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1008 1012 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1009 1013 goto bad;
1010 1014 }
1011 1015
1012 1016 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1013 1017 NULL, 0) == -1) {
1014 1018 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1015 1019 goto bad;
1016 1020 }
1017 1021
1018 1022 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1019 1023 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1020 1024 zerror(zlogp, B_TRUE, "could not set zone app-die");
1021 1025 goto bad;
1022 1026 }
1023 1027
1024 1028 /*
1025 1029 * Inform zonestatd of a new zone so that it can install a door for
1026 1030 * the zone to contact it.
1027 1031 */
1028 1032 notify_zonestatd(zone_id);
1029 1033
1030 1034 if (zone_boot(zoneid) == -1) {
1031 1035 zerror(zlogp, B_TRUE, "unable to boot zone");
1032 1036 goto bad;
1033 1037 }
1034 1038
1035 1039 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1036 1040 goto bad;
1037 1041
1038 1042 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1039 1043 create_log_thread(zlogp, zone_id);
1040 1044
1041 1045 /* Startup a thread to perform memory capping for the zone. */
1042 1046 create_mcap_thread(zlogp, zone_id);
1043 1047
1044 1048 return (0);
1045 1049
1046 1050 bad:
1047 1051 /*
1048 1052 * If something goes wrong, we up the zones's state to the target
1049 1053 * state, RUNNING, and then invoke the hook as if we're halting.
1050 1054 */
1051 1055 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1052 1056
1053 1057 return (-1);
1054 1058 }
1055 1059
1056 1060 static int
1057 1061 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1058 1062 {
1059 1063 int err;
1060 1064
1061 1065 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1062 1066 return (-1);
1063 1067
1064 1068 /* Shutting down, stop the memcap thread */
1065 1069 destroy_mcap_thread();
1066 1070
1067 1071 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1068 1072 if (!bringup_failure_recovery)
1069 1073 zerror(zlogp, B_FALSE, "unable to destroy zone");
1070 1074 destroy_log_thread();
1071 1075 return (-1);
1072 1076 }
1073 1077
1074 1078 /* Shut down is done, stop the log thread */
1075 1079 destroy_log_thread();
1076 1080
1077 1081 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1078 1082 return (-1);
1079 1083
1080 1084 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1081 1085 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1082 1086 zonecfg_strerror(err));
1083 1087
1084 1088 return (0);
1085 1089 }
1086 1090
1087 1091 static int
1088 1092 zone_graceful_shutdown(zlog_t *zlogp)
1089 1093 {
1090 1094 zoneid_t zoneid;
1091 1095 pid_t child;
1092 1096 char cmdbuf[MAXPATHLEN];
1093 1097 brand_handle_t bh = NULL;
1094 1098 ctid_t ct;
1095 1099 int tmpl_fd;
1096 1100 int child_status;
1097 1101
1098 1102 if (shutdown_in_progress) {
1099 1103 zerror(zlogp, B_FALSE, "shutdown already in progress");
1100 1104 return (-1);
1101 1105 }
1102 1106
1103 1107 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1104 1108 zerror(zlogp, B_TRUE, "unable to get zoneid");
1105 1109 return (-1);
1106 1110 }
1107 1111
1108 1112 /* Get a handle to the brand info for this zone */
1109 1113 if ((bh = brand_open(brand_name)) == NULL) {
1110 1114 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1111 1115 return (-1);
1112 1116 }
1113 1117
1114 1118 /*
1115 1119 * If there is a brand 'shutdown' callback, execute it now to give the
1116 1120 * brand a chance to cleanup any custom configuration.
1117 1121 */
1118 1122 (void) strcpy(cmdbuf, EXEC_PREFIX);
1119 1123 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1120 1124 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1121 1125 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1122 1126 }
1123 1127 brand_close(bh);
1124 1128
1125 1129 if ((tmpl_fd = init_template()) == -1) {
1126 1130 zerror(zlogp, B_TRUE, "failed to create contract");
1127 1131 return (-1);
1128 1132 }
1129 1133
1130 1134 if ((child = fork()) == -1) {
1131 1135 (void) ct_tmpl_clear(tmpl_fd);
1132 1136 (void) close(tmpl_fd);
1133 1137 zerror(zlogp, B_TRUE, "failed to fork");
1134 1138 return (-1);
1135 1139 } else if (child == 0) {
1136 1140 (void) ct_tmpl_clear(tmpl_fd);
1137 1141 if (zone_enter(zoneid) == -1) {
1138 1142 _exit(errno);
1139 1143 }
1140 1144 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1141 1145 }
1142 1146
1143 1147 if (contract_latest(&ct) == -1)
1144 1148 ct = -1;
1145 1149 (void) ct_tmpl_clear(tmpl_fd);
1146 1150 (void) close(tmpl_fd);
1147 1151
1148 1152 if (waitpid(child, &child_status, 0) != child) {
1149 1153 /* unexpected: we must have been signalled */
1150 1154 (void) contract_abandon_id(ct);
1151 1155 return (-1);
1152 1156 }
1153 1157
1154 1158 (void) contract_abandon_id(ct);
1155 1159 if (WEXITSTATUS(child_status) != 0) {
1156 1160 errno = WEXITSTATUS(child_status);
1157 1161 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1158 1162 return (-1);
1159 1163 }
1160 1164
1161 1165 shutdown_in_progress = B_TRUE;
1162 1166
1163 1167 return (0);
1164 1168 }
1165 1169
1166 1170 static int
1167 1171 zone_wait_shutdown(zlog_t *zlogp)
1168 1172 {
1169 1173 zone_state_t zstate;
1170 1174 uint64_t *tm = NULL;
1171 1175 scf_simple_prop_t *prop = NULL;
1172 1176 int timeout;
1173 1177 int tries;
1174 1178 int rc = -1;
1175 1179
1176 1180 /* Get default stop timeout from SMF framework */
1177 1181 timeout = SHUTDOWN_WAIT;
1178 1182 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1179 1183 SCF_PROPERTY_TIMEOUT)) != NULL) {
1180 1184 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1181 1185 if (tm != 0)
1182 1186 timeout = *tm;
1183 1187 }
1184 1188 scf_simple_prop_free(prop);
1185 1189 }
1186 1190
1187 1191 /* allow time for zone to shutdown cleanly */
1188 1192 for (tries = 0; tries < timeout; tries ++) {
1189 1193 (void) sleep(1);
1190 1194 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1191 1195 zstate == ZONE_STATE_INSTALLED) {
1192 1196 rc = 0;
1193 1197 break;
1194 1198 }
1195 1199 }
1196 1200
1197 1201 if (rc != 0)
1198 1202 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1199 1203
1200 1204 shutdown_in_progress = B_FALSE;
1201 1205
1202 1206 return (rc);
1203 1207 }
1204 1208
1205 1209
1206 1210
1207 1211 /*
1208 1212 * Generate AUE_zone_state for a command that boots a zone.
1209 1213 */
1210 1214 static void
1211 1215 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1212 1216 char *new_state)
1213 1217 {
1214 1218 adt_session_data_t *ah;
1215 1219 adt_event_data_t *event;
1216 1220 int pass_fail, fail_reason;
1217 1221
1218 1222 if (!adt_audit_enabled())
1219 1223 return;
1220 1224
1221 1225 if (return_val == 0) {
1222 1226 pass_fail = ADT_SUCCESS;
1223 1227 fail_reason = ADT_SUCCESS;
1224 1228 } else {
1225 1229 pass_fail = ADT_FAILURE;
1226 1230 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1227 1231 }
1228 1232
1229 1233 if (adt_start_session(&ah, NULL, 0)) {
1230 1234 zerror(zlogp, B_TRUE, gettext("audit failure."));
1231 1235 return;
1232 1236 }
1233 1237 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1234 1238 zerror(zlogp, B_TRUE, gettext("audit failure."));
1235 1239 (void) adt_end_session(ah);
1236 1240 return;
1237 1241 }
1238 1242
1239 1243 event = adt_alloc_event(ah, ADT_zone_state);
1240 1244 if (event == NULL) {
1241 1245 zerror(zlogp, B_TRUE, gettext("audit failure."));
1242 1246 (void) adt_end_session(ah);
1243 1247 return;
1244 1248 }
1245 1249 event->adt_zone_state.zonename = zone_name;
1246 1250 event->adt_zone_state.new_state = new_state;
1247 1251
1248 1252 if (adt_put_event(event, pass_fail, fail_reason))
1249 1253 zerror(zlogp, B_TRUE, gettext("audit failure."));
1250 1254
1251 1255 adt_free_event(event);
1252 1256
1253 1257 (void) adt_end_session(ah);
1254 1258 }
1255 1259
1256 1260 /*
1257 1261 * Log the exit time and status of the zone's init process into
1258 1262 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1259 1263 * be -1, otherwise it will be the exit status as described in wait.3c.
1260 1264 * If the zone is configured to restart init, then nothing will be logged if
1261 1265 * init exits unexpectedly (the kernel will never upcall in this case).
1262 1266 */
1263 1267 static void
1264 1268 log_init_exit(int status)
1265 1269 {
1266 1270 char p[MAXPATHLEN];
1267 1271 char buf[128];
1268 1272 struct timeval t;
1269 1273 int fd;
1270 1274
1271 1275 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1272 1276 return;
1273 1277 if (gettimeofday(&t, NULL) != 0)
1274 1278 return;
1275 1279 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1276 1280 status) > sizeof (buf))
1277 1281 return;
1278 1282 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1279 1283 return;
1280 1284
1281 1285 (void) write(fd, buf, strlen(buf));
1282 1286
1283 1287 (void) close(fd);
1284 1288 }
1285 1289
1286 1290 /*
1287 1291 * The main routine for the door server that deals with zone state transitions.
1288 1292 */
|
↓ open down ↓ |
307 lines elided |
↑ open up ↑ |
1289 1293 /* ARGSUSED */
1290 1294 static void
1291 1295 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1292 1296 uint_t n_desc)
1293 1297 {
1294 1298 ucred_t *uc = NULL;
1295 1299 const priv_set_t *eset;
1296 1300
1297 1301 zone_state_t zstate;
1298 1302 zone_cmd_t cmd;
1299 - boolean_t debug;
1300 1303 int init_status;
1301 1304 zone_cmd_arg_t *zargp;
1302 1305
1303 1306 boolean_t kernelcall;
1304 1307
1305 1308 int rval = -1;
1306 1309 uint64_t uniqid;
1307 1310 zoneid_t zoneid = -1;
1308 1311 zlog_t zlog;
1309 1312 zlog_t *zlogp;
1310 1313 zone_cmd_rval_t *rvalp;
1311 1314 size_t rlen = getpagesize(); /* conservative */
1312 1315 fs_callback_t cb;
1313 1316 brand_handle_t bh;
1314 1317 boolean_t wait_shut = B_FALSE;
1315 1318
1316 1319 /* LINTED E_BAD_PTR_CAST_ALIGN */
1317 1320 zargp = (zone_cmd_arg_t *)args;
1318 1321
1319 1322 /*
1320 1323 * When we get the door unref message, we've fdetach'd the door, and
1321 1324 * it is time for us to shut down zoneadmd.
1322 1325 */
1323 1326 if (zargp == DOOR_UNREF_DATA) {
1324 1327 /*
1325 1328 * See comment at end of main() for info on the last rites.
1326 1329 */
1327 1330 exit(0);
1328 1331 }
1329 1332
1330 1333 if (zargp == NULL) {
1331 1334 (void) door_return(NULL, 0, 0, 0);
1332 1335 }
1333 1336
1334 1337 rvalp = alloca(rlen);
1335 1338 bzero(rvalp, rlen);
1336 1339 zlog.logfile = NULL;
1337 1340 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1338 1341 zlog.buf = rvalp->errbuf;
1339 1342 zlog.log = zlog.buf;
1340 1343 /* defer initialization of zlog.locale until after credential check */
1341 1344 zlogp = &zlog;
1342 1345
|
↓ open down ↓ |
33 lines elided |
↑ open up ↑ |
1343 1346 if (alen != sizeof (zone_cmd_arg_t)) {
1344 1347 /*
1345 1348 * This really shouldn't be happening.
1346 1349 */
1347 1350 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1348 1351 "unexpected (expected %d bytes)", alen,
1349 1352 sizeof (zone_cmd_arg_t));
1350 1353 goto out;
1351 1354 }
1352 1355 cmd = zargp->cmd;
1353 - debug = zargp->debug;
1354 1356 init_status = zargp->status;
1355 1357
1356 1358 if (door_ucred(&uc) != 0) {
1357 1359 zerror(&logsys, B_TRUE, "door_ucred");
1358 1360 goto out;
1359 1361 }
1360 1362 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1361 1363 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1362 1364 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1363 1365 ucred_geteuid(uc) != 0)) {
1364 1366 zerror(&logsys, B_FALSE, "insufficient privileges");
1365 1367 goto out;
1366 1368 }
1367 1369
1368 1370 kernelcall = ucred_getpid(uc) == 0;
1369 1371
1370 1372 /*
1371 1373 * This is safe because we only use a zlog_t throughout the
1372 1374 * duration of a door call; i.e., by the time the pointer
1373 1375 * might become invalid, the door call would be over.
1374 1376 */
1375 1377 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1376 1378
1377 1379 (void) mutex_lock(&lock);
1378 1380
1379 1381 /*
1380 1382 * Once we start to really die off, we don't want more connections.
1381 1383 */
1382 1384 if (in_death_throes) {
1383 1385 (void) mutex_unlock(&lock);
1384 1386 ucred_free(uc);
1385 1387 (void) door_return(NULL, 0, 0, 0);
1386 1388 thr_exit(NULL);
1387 1389 }
1388 1390
1389 1391 /*
1390 1392 * Check for validity of command.
1391 1393 */
1392 1394 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1393 1395 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1394 1396 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1395 1397 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1396 1398 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1397 1399 goto out;
1398 1400 }
1399 1401
1400 1402 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1401 1403 /*
1402 1404 * Can't happen
1403 1405 */
1404 1406 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1405 1407 cmd);
1406 1408 goto out;
1407 1409 }
1408 1410 /*
1409 1411 * We ignore the possibility of someone calling zone_create(2)
1410 1412 * explicitly; all requests must come through zoneadmd.
1411 1413 */
1412 1414 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1413 1415 /*
1414 1416 * Something terribly wrong happened
1415 1417 */
1416 1418 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1417 1419 goto out;
1418 1420 }
1419 1421
1420 1422 if (kernelcall) {
1421 1423 /*
1422 1424 * Kernel-initiated requests may lose their validity if the
1423 1425 * zone_t the kernel was referring to has gone away.
1424 1426 */
1425 1427 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1426 1428 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1427 1429 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1428 1430 /*
1429 1431 * We're not talking about the same zone. The request
1430 1432 * must have arrived too late. Return error.
1431 1433 */
1432 1434 rval = -1;
1433 1435 goto out;
1434 1436 }
1435 1437 zlogp = &logsys; /* Log errors to syslog */
1436 1438 }
1437 1439
1438 1440 /*
1439 1441 * If we are being asked to forcibly mount or boot a zone, we
1440 1442 * pretend that an INCOMPLETE zone is actually INSTALLED.
1441 1443 */
1442 1444 if (zstate == ZONE_STATE_INCOMPLETE &&
1443 1445 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1444 1446 zstate = ZONE_STATE_INSTALLED;
1445 1447
1446 1448 switch (zstate) {
1447 1449 case ZONE_STATE_CONFIGURED:
1448 1450 case ZONE_STATE_INCOMPLETE:
1449 1451 /*
1450 1452 * Not our area of expertise; we just print a nice message
1451 1453 * and die off.
1452 1454 */
1453 1455 zerror(zlogp, B_FALSE,
1454 1456 "%s operation is invalid for zones in state '%s'",
1455 1457 z_cmd_name(cmd), zone_state_str(zstate));
1456 1458 break;
1457 1459
1458 1460 case ZONE_STATE_INSTALLED:
1459 1461 switch (cmd) {
1460 1462 case Z_READY:
1461 1463 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1462 1464 if (rval == 0)
1463 1465 eventstream_write(Z_EVT_ZONE_READIED);
1464 1466 break;
1465 1467 case Z_BOOT:
1466 1468 case Z_FORCEBOOT:
1467 1469 eventstream_write(Z_EVT_ZONE_BOOTING);
1468 1470 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1469 1471 == 0) {
1470 1472 rval = zone_bootup(zlogp, zargp->bootbuf,
1471 1473 zstate);
1472 1474 }
1473 1475 audit_put_record(zlogp, uc, rval, "boot");
1474 1476 if (rval != 0) {
1475 1477 bringup_failure_recovery = B_TRUE;
1476 1478 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1477 1479 zstate);
1478 1480 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1479 1481 }
1480 1482 break;
1481 1483 case Z_SHUTDOWN:
1482 1484 case Z_HALT:
1483 1485 if (kernelcall) /* Invalid; can't happen */
1484 1486 abort();
1485 1487 /*
1486 1488 * We could have two clients racing to halt this
1487 1489 * zone; the second client loses, but his request
1488 1490 * doesn't fail, since the zone is now in the desired
1489 1491 * state.
1490 1492 */
1491 1493 zerror(zlogp, B_FALSE, "zone is already halted");
1492 1494 rval = 0;
1493 1495 break;
1494 1496 case Z_REBOOT:
1495 1497 if (kernelcall) /* Invalid; can't happen */
1496 1498 abort();
1497 1499 zerror(zlogp, B_FALSE, "%s operation is invalid "
1498 1500 "for zones in state '%s'", z_cmd_name(cmd),
1499 1501 zone_state_str(zstate));
1500 1502 rval = -1;
1501 1503 break;
1502 1504 case Z_NOTE_UNINSTALLING:
1503 1505 if (kernelcall) /* Invalid; can't happen */
1504 1506 abort();
1505 1507 /*
1506 1508 * Tell the console to print out a message about this.
1507 1509 * Once it does, we will be in_death_throes.
1508 1510 */
1509 1511 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1510 1512 break;
1511 1513 case Z_MOUNT:
1512 1514 case Z_FORCEMOUNT:
1513 1515 if (kernelcall) /* Invalid; can't happen */
1514 1516 abort();
1515 1517 if (!zone_isnative && !zone_iscluster &&
1516 1518 !zone_islabeled) {
1517 1519 /*
1518 1520 * -U mounts the zone without lofs mounting
1519 1521 * zone file systems back into the scratch
1520 1522 * zone. This is required when mounting
1521 1523 * non-native branded zones.
1522 1524 */
1523 1525 (void) strlcpy(zargp->bootbuf, "-U",
1524 1526 BOOTARGS_MAX);
1525 1527 }
1526 1528
1527 1529 rval = zone_ready(zlogp,
1528 1530 strcmp(zargp->bootbuf, "-U") == 0 ?
1529 1531 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1530 1532 if (rval != 0)
1531 1533 break;
1532 1534
1533 1535 eventstream_write(Z_EVT_ZONE_READIED);
1534 1536
1535 1537 /*
1536 1538 * Get a handle to the default brand info.
1537 1539 * We must always use the default brand file system
1538 1540 * list when mounting the zone.
1539 1541 */
1540 1542 if ((bh = brand_open(default_brand)) == NULL) {
1541 1543 rval = -1;
1542 1544 break;
1543 1545 }
1544 1546
1545 1547 /*
1546 1548 * Get the list of filesystems to mount from
1547 1549 * the brand configuration. These mounts are done
1548 1550 * via a thread that will enter the zone, so they
1549 1551 * are done from within the context of the zone.
1550 1552 */
1551 1553 cb.zlogp = zlogp;
1552 1554 cb.zoneid = zone_id;
1553 1555 cb.mount_cmd = B_TRUE;
1554 1556 rval = brand_platform_iter_mounts(bh,
1555 1557 mount_early_fs, &cb);
1556 1558
1557 1559 brand_close(bh);
1558 1560
1559 1561 /*
1560 1562 * Ordinarily, /dev/fd would be mounted inside the zone
1561 1563 * by svc:/system/filesystem/usr:default, but since
1562 1564 * we're not booting the zone, we need to do this
1563 1565 * manually.
1564 1566 */
1565 1567 if (rval == 0)
1566 1568 rval = mount_early_fs(&cb,
1567 1569 "fd", "/dev/fd", "fd", NULL);
1568 1570 break;
1569 1571 case Z_UNMOUNT:
1570 1572 if (kernelcall) /* Invalid; can't happen */
1571 1573 abort();
1572 1574 zerror(zlogp, B_FALSE, "zone is already unmounted");
1573 1575 rval = 0;
1574 1576 break;
1575 1577 }
1576 1578 break;
1577 1579
1578 1580 case ZONE_STATE_READY:
1579 1581 switch (cmd) {
1580 1582 case Z_READY:
1581 1583 /*
1582 1584 * We could have two clients racing to ready this
1583 1585 * zone; the second client loses, but his request
1584 1586 * doesn't fail, since the zone is now in the desired
1585 1587 * state.
1586 1588 */
1587 1589 zerror(zlogp, B_FALSE, "zone is already ready");
1588 1590 rval = 0;
1589 1591 break;
1590 1592 case Z_BOOT:
1591 1593 (void) strlcpy(boot_args, zargp->bootbuf,
1592 1594 sizeof (boot_args));
1593 1595 eventstream_write(Z_EVT_ZONE_BOOTING);
1594 1596 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1595 1597 audit_put_record(zlogp, uc, rval, "boot");
1596 1598 if (rval != 0) {
1597 1599 bringup_failure_recovery = B_TRUE;
1598 1600 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1599 1601 zstate);
1600 1602 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1601 1603 }
1602 1604 boot_args[0] = '\0';
1603 1605 break;
1604 1606 case Z_HALT:
1605 1607 if (kernelcall) /* Invalid; can't happen */
1606 1608 abort();
1607 1609 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1608 1610 != 0)
1609 1611 break;
1610 1612 eventstream_write(Z_EVT_ZONE_HALTED);
1611 1613 break;
1612 1614 case Z_SHUTDOWN:
1613 1615 case Z_REBOOT:
1614 1616 case Z_NOTE_UNINSTALLING:
1615 1617 case Z_MOUNT:
1616 1618 case Z_UNMOUNT:
1617 1619 if (kernelcall) /* Invalid; can't happen */
1618 1620 abort();
1619 1621 zerror(zlogp, B_FALSE, "%s operation is invalid "
1620 1622 "for zones in state '%s'", z_cmd_name(cmd),
1621 1623 zone_state_str(zstate));
1622 1624 rval = -1;
1623 1625 break;
1624 1626 }
1625 1627 break;
1626 1628
1627 1629 case ZONE_STATE_MOUNTED:
1628 1630 switch (cmd) {
1629 1631 case Z_UNMOUNT:
1630 1632 if (kernelcall) /* Invalid; can't happen */
1631 1633 abort();
1632 1634 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1633 1635 if (rval == 0) {
1634 1636 eventstream_write(Z_EVT_ZONE_HALTED);
1635 1637 (void) sema_post(&scratch_sem);
1636 1638 }
1637 1639 break;
1638 1640 default:
1639 1641 if (kernelcall) /* Invalid; can't happen */
1640 1642 abort();
1641 1643 zerror(zlogp, B_FALSE, "%s operation is invalid "
1642 1644 "for zones in state '%s'", z_cmd_name(cmd),
1643 1645 zone_state_str(zstate));
1644 1646 rval = -1;
1645 1647 break;
1646 1648 }
1647 1649 break;
1648 1650
1649 1651 case ZONE_STATE_RUNNING:
1650 1652 case ZONE_STATE_SHUTTING_DOWN:
1651 1653 case ZONE_STATE_DOWN:
1652 1654 switch (cmd) {
1653 1655 case Z_READY:
1654 1656 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1655 1657 != 0)
1656 1658 break;
1657 1659 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1658 1660 eventstream_write(Z_EVT_ZONE_READIED);
1659 1661 else
1660 1662 eventstream_write(Z_EVT_ZONE_HALTED);
1661 1663 break;
1662 1664 case Z_BOOT:
1663 1665 /*
1664 1666 * We could have two clients racing to boot this
1665 1667 * zone; the second client loses, but his request
1666 1668 * doesn't fail, since the zone is now in the desired
1667 1669 * state.
1668 1670 */
1669 1671 zerror(zlogp, B_FALSE, "zone is already booted");
1670 1672 rval = 0;
1671 1673 break;
1672 1674 case Z_HALT:
1673 1675 if (kernelcall) {
1674 1676 log_init_exit(init_status);
1675 1677 } else {
1676 1678 log_init_exit(-1);
1677 1679 }
1678 1680 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1679 1681 != 0)
1680 1682 break;
1681 1683 eventstream_write(Z_EVT_ZONE_HALTED);
1682 1684 break;
1683 1685 case Z_REBOOT:
1684 1686 (void) strlcpy(boot_args, zargp->bootbuf,
1685 1687 sizeof (boot_args));
1686 1688 eventstream_write(Z_EVT_ZONE_REBOOTING);
1687 1689 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1688 1690 != 0) {
1689 1691 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1690 1692 boot_args[0] = '\0';
1691 1693 break;
1692 1694 }
1693 1695 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1694 1696 != 0) {
1695 1697 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1696 1698 boot_args[0] = '\0';
1697 1699 break;
1698 1700 }
1699 1701 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1700 1702 audit_put_record(zlogp, uc, rval, "reboot");
1701 1703 if (rval != 0) {
1702 1704 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1703 1705 zstate);
1704 1706 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1705 1707 }
1706 1708 boot_args[0] = '\0';
1707 1709 break;
1708 1710 case Z_SHUTDOWN:
1709 1711 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1710 1712 wait_shut = B_TRUE;
1711 1713 }
1712 1714 break;
1713 1715 case Z_NOTE_UNINSTALLING:
1714 1716 case Z_MOUNT:
1715 1717 case Z_UNMOUNT:
1716 1718 zerror(zlogp, B_FALSE, "%s operation is invalid "
1717 1719 "for zones in state '%s'", z_cmd_name(cmd),
1718 1720 zone_state_str(zstate));
1719 1721 rval = -1;
1720 1722 break;
1721 1723 }
1722 1724 break;
1723 1725 default:
1724 1726 abort();
1725 1727 }
1726 1728
1727 1729 /*
1728 1730 * Because the state of the zone may have changed, we make sure
1729 1731 * to wake the console poller, which is in charge of initiating
1730 1732 * the shutdown procedure as necessary.
1731 1733 */
1732 1734 eventstream_write(Z_EVT_NULL);
1733 1735
1734 1736 out:
1735 1737 (void) mutex_unlock(&lock);
1736 1738
1737 1739 /* Wait for the Z_SHUTDOWN commands to complete */
1738 1740 if (wait_shut)
1739 1741 rval = zone_wait_shutdown(zlogp);
1740 1742
1741 1743 if (kernelcall) {
1742 1744 rvalp = NULL;
1743 1745 rlen = 0;
1744 1746 } else {
1745 1747 rvalp->rval = rval;
1746 1748 }
1747 1749 if (uc != NULL)
1748 1750 ucred_free(uc);
1749 1751 (void) door_return((char *)rvalp, rlen, NULL, 0);
1750 1752 thr_exit(NULL);
1751 1753 }
1752 1754
1753 1755 static int
1754 1756 setup_door(zlog_t *zlogp)
1755 1757 {
1756 1758 if ((zone_door = door_create(server, NULL,
1757 1759 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1758 1760 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1759 1761 return (-1);
1760 1762 }
1761 1763 (void) fdetach(zone_door_path);
1762 1764
1763 1765 if (fattach(zone_door, zone_door_path) != 0) {
1764 1766 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1765 1767 (void) door_revoke(zone_door);
1766 1768 (void) fdetach(zone_door_path);
1767 1769 zone_door = -1;
1768 1770 return (-1);
1769 1771 }
1770 1772 return (0);
1771 1773 }
1772 1774
1773 1775 /*
1774 1776 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1775 1777 * is where zoneadmd itself will check to see that another instance of
1776 1778 * zoneadmd isn't already controlling this zone.
1777 1779 *
1778 1780 * The idea here is that we want to open the path to which we will
1779 1781 * attach our door, lock it, and then make sure that no-one has beat us
1780 1782 * to fattach(3c)ing onto it.
1781 1783 *
1782 1784 * fattach(3c) is really a mount, so there are actually two possible
1783 1785 * vnodes we could be dealing with. Our strategy is as follows:
1784 1786 *
1785 1787 * - If the file we opened is a regular file (common case):
1786 1788 * There is no fattach(3c)ed door, so we have a chance of becoming
1787 1789 * the managing zoneadmd. We attempt to lock the file: if it is
1788 1790 * already locked, that means someone else raced us here, so we
1789 1791 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1790 1792 * that beat us to it.
1791 1793 *
1792 1794 * - If the file we opened is a namefs file:
1793 1795 * This means there is already an established door fattach(3c)'ed
1794 1796 * to the rendezvous path. We've lost the race, so we give up.
1795 1797 * Note that in this case we also try to grab the file lock, and
1796 1798 * will succeed in acquiring it since the vnode locked by the
1797 1799 * "winning" zoneadmd was a regular one, and the one we locked was
1798 1800 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1799 1801 * we just return to zoneadm(1m) which knows to retry.
1800 1802 */
1801 1803 static int
1802 1804 make_daemon_exclusive(zlog_t *zlogp)
1803 1805 {
1804 1806 int doorfd = -1;
1805 1807 int err, ret = -1;
1806 1808 struct stat st;
1807 1809 struct flock flock;
1808 1810 zone_state_t zstate;
1809 1811
1810 1812 top:
1811 1813 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1812 1814 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1813 1815 zonecfg_strerror(err));
1814 1816 goto out;
1815 1817 }
1816 1818 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1817 1819 S_IREAD|S_IWRITE)) < 0) {
1818 1820 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1819 1821 goto out;
1820 1822 }
1821 1823 if (fstat(doorfd, &st) < 0) {
1822 1824 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1823 1825 goto out;
1824 1826 }
1825 1827 /*
1826 1828 * Lock the file to synchronize with other zoneadmd
1827 1829 */
1828 1830 flock.l_type = F_WRLCK;
1829 1831 flock.l_whence = SEEK_SET;
1830 1832 flock.l_start = (off_t)0;
1831 1833 flock.l_len = (off_t)0;
1832 1834 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1833 1835 /*
1834 1836 * Someone else raced us here and grabbed the lock file
1835 1837 * first. A warning here is inappropriate since nothing
1836 1838 * went wrong.
1837 1839 */
1838 1840 goto out;
1839 1841 }
1840 1842
1841 1843 if (strcmp(st.st_fstype, "namefs") == 0) {
1842 1844 struct door_info info;
1843 1845
1844 1846 /*
1845 1847 * There is already something fattach()'ed to this file.
1846 1848 * Lets see what the door is up to.
1847 1849 */
1848 1850 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1849 1851 /*
1850 1852 * Another zoneadmd process seems to be in
1851 1853 * control of the situation and we don't need to
1852 1854 * be here. A warning here is inappropriate
1853 1855 * since nothing went wrong.
1854 1856 *
1855 1857 * If the door has been revoked, the zoneadmd
1856 1858 * process currently managing the zone is going
1857 1859 * away. We'll return control to zoneadm(1m)
1858 1860 * which will try again (by which time zoneadmd
1859 1861 * will hopefully have exited).
1860 1862 */
1861 1863 goto out;
1862 1864 }
1863 1865
1864 1866 /*
1865 1867 * If we got this far, there's a fattach(3c)'ed door
1866 1868 * that belongs to a process that has exited, which can
1867 1869 * happen if the previous zoneadmd died unexpectedly.
1868 1870 *
1869 1871 * Let user know that something is amiss, but that we can
1870 1872 * recover; if the zone is in the installed state, then don't
1871 1873 * message, since having a running zoneadmd isn't really
1872 1874 * expected/needed. We want to keep occurences of this message
1873 1875 * limited to times when zoneadmd is picking back up from a
1874 1876 * zoneadmd that died while the zone was in some non-trivial
1875 1877 * state.
1876 1878 */
1877 1879 if (zstate > ZONE_STATE_INSTALLED) {
1878 1880 static zoneid_t zid;
1879 1881
1880 1882 zerror(zlogp, B_FALSE,
1881 1883 "zone '%s': WARNING: zone is in state '%s', but "
1882 1884 "zoneadmd does not appear to be available; "
1883 1885 "restarted zoneadmd to recover.",
1884 1886 zone_name, zone_state_str(zstate));
1885 1887
1886 1888 /*
1887 1889 * Startup a thread to perform the zfd logging/tty svc
1888 1890 * and a thread to perform memory capping for the
1889 1891 * zone. zlogp won't be valid for much longer so use
1890 1892 * logsys.
1891 1893 */
1892 1894 if ((zid = getzoneidbyname(zone_name)) != -1) {
1893 1895 create_log_thread(&logsys, zid);
1894 1896 create_mcap_thread(&logsys, zid);
1895 1897 }
1896 1898
1897 1899 /* recover the global configuration snapshot */
1898 1900 if (snap_hndl == NULL) {
1899 1901 if ((snap_hndl = zonecfg_init_handle())
1900 1902 == NULL ||
1901 1903 zonecfg_create_snapshot(zone_name)
1902 1904 != Z_OK ||
1903 1905 zonecfg_get_snapshot_handle(zone_name,
1904 1906 snap_hndl) != Z_OK) {
1905 1907 zerror(zlogp, B_FALSE, "recovering "
1906 1908 "zone configuration handle");
1907 1909 goto out;
1908 1910 }
1909 1911 }
1910 1912 }
1911 1913
1912 1914 (void) fdetach(zone_door_path);
1913 1915 (void) close(doorfd);
1914 1916 goto top;
1915 1917 }
1916 1918 ret = 0;
1917 1919 out:
1918 1920 (void) close(doorfd);
1919 1921 return (ret);
1920 1922 }
1921 1923
1922 1924 /*
1923 1925 * Setup the brand's pre and post state change callbacks, as well as the
1924 1926 * query callback, if any of these exist.
1925 1927 */
1926 1928 static int
1927 1929 brand_callback_init(brand_handle_t bh, char *zone_name)
1928 1930 {
1929 1931 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1930 1932 sizeof (pre_statechg_hook));
1931 1933
1932 1934 if (brand_get_prestatechange(bh, zone_name, zonepath,
1933 1935 pre_statechg_hook + EXEC_LEN,
1934 1936 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1935 1937 return (-1);
1936 1938
1937 1939 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1938 1940 pre_statechg_hook[0] = '\0';
1939 1941
1940 1942 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1941 1943 sizeof (post_statechg_hook));
1942 1944
1943 1945 if (brand_get_poststatechange(bh, zone_name, zonepath,
1944 1946 post_statechg_hook + EXEC_LEN,
1945 1947 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1946 1948 return (-1);
1947 1949
1948 1950 if (strlen(post_statechg_hook) <= EXEC_LEN)
1949 1951 post_statechg_hook[0] = '\0';
1950 1952
1951 1953 (void) strlcpy(query_hook, EXEC_PREFIX,
1952 1954 sizeof (query_hook));
1953 1955
1954 1956 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
1955 1957 sizeof (query_hook) - EXEC_LEN) != 0)
1956 1958 return (-1);
1957 1959
1958 1960 if (strlen(query_hook) <= EXEC_LEN)
1959 1961 query_hook[0] = '\0';
1960 1962
1961 1963 return (0);
1962 1964 }
1963 1965
1964 1966 int
1965 1967 main(int argc, char *argv[])
1966 1968 {
1967 1969 int opt;
1968 1970 zoneid_t zid;
1969 1971 priv_set_t *privset;
1970 1972 zone_state_t zstate;
1971 1973 char parents_locale[MAXPATHLEN];
1972 1974 brand_handle_t bh;
1973 1975 int err;
1974 1976
1975 1977 pid_t pid;
1976 1978 sigset_t blockset;
1977 1979 sigset_t block_cld;
1978 1980
1979 1981 struct {
1980 1982 sema_t sem;
1981 1983 int status;
1982 1984 zlog_t log;
1983 1985 } *shstate;
1984 1986 size_t shstatelen = getpagesize();
1985 1987
1986 1988 zlog_t errlog;
1987 1989 zlog_t *zlogp;
1988 1990
1989 1991 int ctfd;
1990 1992
1991 1993 progname = get_execbasename(argv[0]);
1992 1994
1993 1995 /*
1994 1996 * Make sure stderr is unbuffered
1995 1997 */
1996 1998 (void) setbuffer(stderr, NULL, 0);
1997 1999
1998 2000 /*
1999 2001 * Get out of the way of mounted filesystems, since we will daemonize
2000 2002 * soon.
2001 2003 */
2002 2004 (void) chdir("/");
2003 2005
2004 2006 /*
2005 2007 * Use the default system umask per PSARC 1998/110 rather than
2006 2008 * anything that may have been set by the caller.
2007 2009 */
2008 2010 (void) umask(CMASK);
2009 2011
2010 2012 /*
2011 2013 * Initially we want to use our parent's locale.
2012 2014 */
2013 2015 (void) setlocale(LC_ALL, "");
2014 2016 (void) textdomain(TEXT_DOMAIN);
2015 2017 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2016 2018 sizeof (parents_locale));
2017 2019
2018 2020 /*
2019 2021 * This zlog_t is used for writing to stderr
2020 2022 */
2021 2023 errlog.logfile = stderr;
2022 2024 errlog.buflen = errlog.loglen = 0;
2023 2025 errlog.buf = errlog.log = NULL;
2024 2026 errlog.locale = parents_locale;
2025 2027
2026 2028 /*
2027 2029 * We start off writing to stderr until we're ready to daemonize.
2028 2030 */
2029 2031 zlogp = &errlog;
2030 2032
2031 2033 /*
2032 2034 * Process options.
2033 2035 */
2034 2036 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2035 2037 switch (opt) {
2036 2038 case 'R':
2037 2039 zonecfg_set_root(optarg);
2038 2040 break;
2039 2041 case 'z':
2040 2042 zone_name = optarg;
2041 2043 break;
2042 2044 default:
2043 2045 usage();
2044 2046 }
2045 2047 }
2046 2048
2047 2049 if (zone_name == NULL)
2048 2050 usage();
2049 2051
2050 2052 /*
2051 2053 * Because usage() prints directly to stderr, it has gettext()
2052 2054 * wrapping, which depends on the locale. But since zerror() calls
2053 2055 * localize() which tweaks the locale, it is not safe to call zerror()
2054 2056 * until after the last call to usage(). Fortunately, the last call
2055 2057 * to usage() is just above and the first call to zerror() is just
2056 2058 * below. Don't mess this up.
2057 2059 */
2058 2060 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2059 2061 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2060 2062 GLOBAL_ZONENAME);
2061 2063 return (1);
2062 2064 }
2063 2065
2064 2066 if (zone_get_id(zone_name, &zid) != 0) {
2065 2067 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2066 2068 zonecfg_strerror(Z_NO_ZONE));
2067 2069 return (1);
2068 2070 }
2069 2071
2070 2072 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2071 2073 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2072 2074 zonecfg_strerror(err));
2073 2075 return (1);
2074 2076 }
2075 2077 if (zstate < ZONE_STATE_INCOMPLETE) {
2076 2078 zerror(zlogp, B_FALSE,
2077 2079 "cannot manage a zone which is in state '%s'",
2078 2080 zone_state_str(zstate));
2079 2081 return (1);
2080 2082 }
2081 2083
2082 2084 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2083 2085 zerror(zlogp, B_FALSE, "unable to determine zone path");
2084 2086 return (-1);
2085 2087 }
2086 2088
2087 2089 if (zonecfg_default_brand(default_brand,
2088 2090 sizeof (default_brand)) != Z_OK) {
2089 2091 zerror(zlogp, B_FALSE, "unable to determine default brand");
2090 2092 return (1);
2091 2093 }
2092 2094
2093 2095 /* Get a handle to the brand info for this zone */
2094 2096 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2095 2097 != Z_OK) {
2096 2098 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2097 2099 return (1);
2098 2100 }
2099 2101 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2100 2102 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2101 2103
2102 2104 /*
2103 2105 * In the alternate root environment, the only supported
2104 2106 * operations are mount and unmount. In this case, just treat
2105 2107 * the zone as native if it is cluster. Cluster zones can be
2106 2108 * native for the purpose of LU or upgrade, and the cluster
2107 2109 * brand may not exist in the miniroot (such as in net install
2108 2110 * upgrade).
2109 2111 */
2110 2112 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2111 2113 zone_iscluster = B_TRUE;
2112 2114 if (zonecfg_in_alt_root()) {
2113 2115 (void) strlcpy(brand_name, default_brand,
2114 2116 sizeof (brand_name));
2115 2117 }
2116 2118 } else {
2117 2119 zone_iscluster = B_FALSE;
2118 2120 }
2119 2121
2120 2122 if ((bh = brand_open(brand_name)) == NULL) {
2121 2123 zerror(zlogp, B_FALSE, "unable to open zone brand");
2122 2124 return (1);
2123 2125 }
2124 2126
2125 2127 /* Get state change brand hooks. */
2126 2128 if (brand_callback_init(bh, zone_name) == -1) {
2127 2129 zerror(zlogp, B_TRUE,
2128 2130 "failed to initialize brand state change hooks");
2129 2131 brand_close(bh);
2130 2132 return (1);
2131 2133 }
2132 2134
2133 2135 brand_close(bh);
2134 2136
2135 2137 /*
2136 2138 * Check that we have all privileges. It would be nice to pare
2137 2139 * this down, but this is at least a first cut.
2138 2140 */
2139 2141 if ((privset = priv_allocset()) == NULL) {
2140 2142 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2141 2143 return (1);
2142 2144 }
2143 2145
2144 2146 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2145 2147 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2146 2148 priv_freeset(privset);
2147 2149 return (1);
2148 2150 }
2149 2151
2150 2152 if (priv_isfullset(privset) == B_FALSE) {
2151 2153 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2152 2154 "run this command (all privs required)");
2153 2155 priv_freeset(privset);
2154 2156 return (1);
2155 2157 }
2156 2158 priv_freeset(privset);
2157 2159
2158 2160 if (mkzonedir(zlogp) != 0)
2159 2161 return (1);
2160 2162
2161 2163 /*
2162 2164 * Pre-fork: setup shared state
2163 2165 */
2164 2166 if ((shstate = (void *)mmap(NULL, shstatelen,
2165 2167 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2166 2168 MAP_FAILED) {
2167 2169 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2168 2170 return (1);
2169 2171 }
2170 2172 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2171 2173 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2172 2174 (void) munmap((char *)shstate, shstatelen);
2173 2175 return (1);
2174 2176 }
2175 2177 shstate->log.logfile = NULL;
2176 2178 shstate->log.buflen = shstatelen - sizeof (*shstate);
2177 2179 shstate->log.loglen = shstate->log.buflen;
2178 2180 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2179 2181 shstate->log.log = shstate->log.buf;
2180 2182 shstate->log.locale = parents_locale;
2181 2183 shstate->status = -1;
2182 2184
2183 2185 /*
2184 2186 * We need a SIGCHLD handler so the sema_wait() below will wake
2185 2187 * up if the child dies without doing a sema_post().
2186 2188 */
2187 2189 (void) sigset(SIGCHLD, sigchld);
2188 2190 /*
2189 2191 * We must mask SIGCHLD until after we've coped with the fork
2190 2192 * sufficiently to deal with it; otherwise we can race and
2191 2193 * receive the signal before pid has been initialized
2192 2194 * (yes, this really happens).
2193 2195 */
2194 2196 (void) sigemptyset(&block_cld);
2195 2197 (void) sigaddset(&block_cld, SIGCHLD);
2196 2198 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2197 2199
2198 2200 /*
2199 2201 * The parent only needs stderr after the fork, so close other fd's
2200 2202 * that we inherited from zoneadm so that the parent doesn't have those
2201 2203 * open while waiting. The child will close the rest after the fork.
2202 2204 */
2203 2205 closefrom(3);
2204 2206
2205 2207 if ((ctfd = init_template()) == -1) {
2206 2208 zerror(zlogp, B_TRUE, "failed to create contract");
2207 2209 return (1);
2208 2210 }
2209 2211
2210 2212 /*
2211 2213 * Do not let another thread localize a message while we are forking.
2212 2214 */
2213 2215 (void) mutex_lock(&msglock);
2214 2216 pid = fork();
2215 2217 (void) mutex_unlock(&msglock);
2216 2218
2217 2219 /*
2218 2220 * In all cases (parent, child, and in the event of an error) we
2219 2221 * don't want to cause creation of contracts on subsequent fork()s.
2220 2222 */
2221 2223 (void) ct_tmpl_clear(ctfd);
2222 2224 (void) close(ctfd);
2223 2225
2224 2226 if (pid == -1) {
2225 2227 zerror(zlogp, B_TRUE, "could not fork");
2226 2228 return (1);
2227 2229
2228 2230 } else if (pid > 0) { /* parent */
2229 2231 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2230 2232 /*
2231 2233 * This marks a window of vulnerability in which we receive
2232 2234 * the SIGCLD before falling into sema_wait (normally we would
2233 2235 * get woken up from sema_wait with EINTR upon receipt of
2234 2236 * SIGCLD). So we may need to use some other scheme like
2235 2237 * sema_posting in the sigcld handler.
2236 2238 * blech
2237 2239 */
2238 2240 (void) sema_wait(&shstate->sem);
2239 2241 (void) sema_destroy(&shstate->sem);
2240 2242 if (shstate->status != 0)
2241 2243 (void) waitpid(pid, NULL, WNOHANG);
2242 2244 /*
2243 2245 * It's ok if we die with SIGPIPE. It's not like we could have
2244 2246 * done anything about it.
2245 2247 */
2246 2248 (void) fprintf(stderr, "%s", shstate->log.buf);
2247 2249 _exit(shstate->status == 0 ? 0 : 1);
2248 2250 }
2249 2251
2250 2252 /*
2251 2253 * The child charges on.
2252 2254 */
2253 2255 (void) sigset(SIGCHLD, SIG_DFL);
2254 2256 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2255 2257
2256 2258 /*
2257 2259 * SIGPIPE can be delivered if we write to a socket for which the
2258 2260 * peer endpoint is gone. That can lead to too-early termination
2259 2261 * of zoneadmd, and that's not good eats.
2260 2262 */
2261 2263 (void) sigset(SIGPIPE, SIG_IGN);
2262 2264 /*
2263 2265 * Stop using stderr
2264 2266 */
2265 2267 zlogp = &shstate->log;
2266 2268
2267 2269 /*
2268 2270 * We don't need stdout/stderr from now on.
2269 2271 */
2270 2272 closefrom(0);
2271 2273
2272 2274 /*
2273 2275 * Initialize the syslog zlog_t. This needs to be done after
2274 2276 * the call to closefrom().
2275 2277 */
2276 2278 logsys.buf = logsys.log = NULL;
2277 2279 logsys.buflen = logsys.loglen = 0;
2278 2280 logsys.logfile = NULL;
2279 2281 logsys.locale = DEFAULT_LOCALE;
2280 2282
2281 2283 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2282 2284
2283 2285 /*
2284 2286 * The eventstream is used to publish state changes in the zone
2285 2287 * from the door threads to the console I/O poller.
2286 2288 */
2287 2289 if (eventstream_init() == -1) {
2288 2290 zerror(zlogp, B_TRUE, "unable to create eventstream");
2289 2291 goto child_out;
2290 2292 }
2291 2293
2292 2294 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2293 2295 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2294 2296
2295 2297 /*
2296 2298 * See if another zoneadmd is running for this zone. If not, then we
2297 2299 * can now modify system state.
2298 2300 */
2299 2301 if (make_daemon_exclusive(zlogp) == -1)
2300 2302 goto child_out;
2301 2303
2302 2304
2303 2305 /*
2304 2306 * Create/join a new session; we need to be careful of what we do with
2305 2307 * the console from now on so we don't end up being the session leader
2306 2308 * for the terminal we're going to be handing out.
2307 2309 */
2308 2310 (void) setsid();
2309 2311
2310 2312 /*
2311 2313 * This thread shouldn't be receiving any signals; in particular,
2312 2314 * SIGCHLD should be received by the thread doing the fork().
2313 2315 */
2314 2316 (void) sigfillset(&blockset);
2315 2317 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2316 2318
2317 2319 /*
2318 2320 * Setup the console device and get ready to serve the console;
2319 2321 * once this has completed, we're ready to let console clients
2320 2322 * make an attempt to connect (they will block until
2321 2323 * serve_console_sock() below gets called, and any pending
2322 2324 * connection is accept()ed).
2323 2325 */
2324 2326 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2325 2327 goto child_out;
2326 2328
2327 2329 /*
2328 2330 * Take the lock now, so that when the door server gets going, we
2329 2331 * are guaranteed that it won't take a request until we are sure
2330 2332 * that everything is completely set up. See the child_out: label
2331 2333 * below to see why this matters.
2332 2334 */
2333 2335 (void) mutex_lock(&lock);
2334 2336
2335 2337 /* Init semaphore for scratch zones. */
2336 2338 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2337 2339 zerror(zlogp, B_TRUE,
2338 2340 "failed to initialize semaphore for scratch zone");
2339 2341 goto child_out;
2340 2342 }
2341 2343
2342 2344 /* open the dladm handle */
2343 2345 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2344 2346 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2345 2347 goto child_out;
2346 2348 }
2347 2349
2348 2350 /*
2349 2351 * Note: door setup must occur *after* the console is setup.
2350 2352 * This is so that as zlogin tests the door to see if zoneadmd
2351 2353 * is ready yet, we know that the console will get serviced
2352 2354 * once door_info() indicates that the door is "up".
2353 2355 */
2354 2356 if (setup_door(zlogp) == -1)
2355 2357 goto child_out;
2356 2358
2357 2359 /*
2358 2360 * Things seem OK so far; tell the parent process that we're done
2359 2361 * with setup tasks. This will cause the parent to exit, signalling
2360 2362 * to zoneadm, zlogin, or whatever forked it that we are ready to
2361 2363 * service requests.
2362 2364 */
2363 2365 shstate->status = 0;
2364 2366 (void) sema_post(&shstate->sem);
2365 2367 (void) munmap((char *)shstate, shstatelen);
2366 2368 shstate = NULL;
2367 2369
2368 2370 (void) mutex_unlock(&lock);
2369 2371
2370 2372 /*
2371 2373 * zlogp is now invalid, so reset it to the syslog logger.
2372 2374 */
2373 2375 zlogp = &logsys;
2374 2376
2375 2377 /*
2376 2378 * Now that we are free of any parents, switch to the default locale.
2377 2379 */
2378 2380 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2379 2381
2380 2382 /*
2381 2383 * At this point the setup portion of main() is basically done, so
2382 2384 * we reuse this thread to manage the zone console. When
2383 2385 * serve_console() has returned, we are past the point of no return
2384 2386 * in the life of this zoneadmd.
2385 2387 */
2386 2388 if (zonecfg_in_alt_root()) {
2387 2389 /*
2388 2390 * This is just awful, but mounted scratch zones don't (and
2389 2391 * can't) have consoles. We just wait for unmount instead.
2390 2392 */
2391 2393 while (sema_wait(&scratch_sem) == EINTR)
2392 2394 ;
2393 2395 } else {
2394 2396 serve_console(zlogp);
2395 2397 assert(in_death_throes);
2396 2398 }
2397 2399
2398 2400 /*
2399 2401 * This is the next-to-last part of the exit interlock. Upon calling
2400 2402 * fdetach(), the door will go unreferenced; once any
2401 2403 * outstanding requests (like the door thread doing Z_HALT) are
2402 2404 * done, the door will get an UNREF notification; when it handles
2403 2405 * the UNREF, the door server will cause the exit. It's possible
2404 2406 * that fdetach() can fail because the file is in use, in which
2405 2407 * case we'll retry the operation.
2406 2408 */
2407 2409 assert(!MUTEX_HELD(&lock));
2408 2410 for (;;) {
2409 2411 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2410 2412 break;
2411 2413 yield();
2412 2414 }
2413 2415
2414 2416 for (;;)
2415 2417 (void) pause();
2416 2418
2417 2419 child_out:
2418 2420 assert(pid == 0);
2419 2421 if (shstate != NULL) {
2420 2422 shstate->status = -1;
2421 2423 (void) sema_post(&shstate->sem);
2422 2424 (void) munmap((char *)shstate, shstatelen);
2423 2425 }
2424 2426
2425 2427 /*
2426 2428 * This might trigger an unref notification, but if so,
2427 2429 * we are still holding the lock, so our call to exit will
2428 2430 * ultimately win the race and will publish the right exit
2429 2431 * code.
2430 2432 */
2431 2433 if (zone_door != -1) {
2432 2434 assert(MUTEX_HELD(&lock));
2433 2435 (void) door_revoke(zone_door);
2434 2436 (void) fdetach(zone_door_path);
2435 2437 }
2436 2438
2437 2439 if (dld_handle != NULL)
2438 2440 dladm_close(dld_handle);
2439 2441
2440 2442 return (1); /* return from main() forcibly exits an MT process */
2441 2443 }
|
↓ open down ↓ |
1078 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX