Print this page
14019 Allow more control over zone init exit actions (fix mismerge)
14019 Allow more control over zone init exit actions
Portions contributed by: Joshua M. Clulow <jmc@joyent.com>
Portions contributed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Gordon Ross <Gordon.W.Ross@gmail.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zoneadmd/zoneadmd.c
+++ new/usr/src/cmd/zoneadmd/zoneadmd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright 2021 Joyent, Inc.
26 26 * Copyright (c) 2016 by Delphix. All rights reserved.
27 27 */
28 28
29 29 /*
30 30 * zoneadmd manages zones; one zoneadmd process is launched for each
31 31 * non-global zone on the system. This daemon juggles four jobs:
32 32 *
33 33 * - Implement setup and teardown of the zone "virtual platform": mount and
34 34 * unmount filesystems; create and destroy network interfaces; communicate
35 35 * with devfsadmd to lay out devices for the zone; instantiate the zone
36 36 * console device; configure process runtime attributes such as resource
37 37 * controls, pool bindings, fine-grained privileges.
38 38 *
39 39 * - Launch the zone's init(1M) process.
40 40 *
41 41 * - Implement a door server; clients (like zoneadm) connect to the door
42 42 * server and request zone state changes. The kernel is also a client of
43 43 * this door server. A request to halt or reboot the zone which originates
44 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
45 45 *
46 46 * One minor problem is that messages emitted by zoneadmd need to be passed
47 47 * back to the zoneadm process making the request. These messages need to
48 48 * be rendered in the client's locale; so, this is passed in as part of the
49 49 * request. The exception is the kernel upcall to zoneadmd, in which case
50 50 * messages are syslog'd.
51 51 *
52 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
53 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
54 54 * strings which do not need to be translated.
55 55 *
56 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c
57 57 * for more information about the zone console architecture.
58 58 *
59 59 * DESIGN NOTES
60 60 *
61 61 * Restart:
62 62 * A chief design constraint of zoneadmd is that it should be restartable in
63 63 * the case that the administrator kills it off, or it suffers a fatal error,
64 64 * without the running zone being impacted; this is akin to being able to
65 65 * reboot the service processor of a server without affecting the OS instance.
66 66 */
67 67
68 68 #include <sys/param.h>
69 69 #include <sys/mman.h>
70 70 #include <sys/types.h>
71 71 #include <sys/stat.h>
72 72 #include <sys/sysmacros.h>
73 73 #include <sys/time.h>
74 74
75 75 #include <bsm/adt.h>
76 76 #include <bsm/adt_event.h>
77 77
78 78 #include <alloca.h>
79 79 #include <assert.h>
80 80 #include <errno.h>
81 81 #include <door.h>
82 82 #include <fcntl.h>
83 83 #include <locale.h>
84 84 #include <signal.h>
85 85 #include <stdarg.h>
86 86 #include <stdio.h>
87 87 #include <stdlib.h>
88 88 #include <string.h>
89 89 #include <strings.h>
90 90 #include <synch.h>
91 91 #include <syslog.h>
92 92 #include <thread.h>
93 93 #include <unistd.h>
94 94 #include <wait.h>
95 95 #include <limits.h>
96 96 #include <zone.h>
97 97 #include <libbrand.h>
98 98 #include <sys/brand.h>
99 99 #include <libcontract.h>
100 100 #include <libcontract_priv.h>
101 101 #include <sys/brand.h>
102 102 #include <sys/contract/process.h>
103 103 #include <sys/ctfs.h>
104 104 #include <libdladm.h>
105 105 #include <sys/dls_mgmt.h>
106 106 #include <libscf.h>
107 107 #include <uuid/uuid.h>
108 108 #include <libppt.h>
109 109
110 110 #include <libzonecfg.h>
111 111 #include <zonestat_impl.h>
112 112 #include "zoneadmd.h"
113 113
114 114 static char *progname;
115 115 char *zone_name; /* zone which we are managing */
116 116 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
117 117 char zonepath[MAXNAMELEN];
118 118 char pool_name[MAXNAMELEN];
119 119 char default_brand[MAXNAMELEN];
120 120 char brand_name[MAXNAMELEN];
121 121 boolean_t zone_isnative;
122 122 boolean_t zone_iscluster;
123 123 boolean_t zone_islabeled;
124 124 boolean_t shutdown_in_progress;
125 125 static zoneid_t zone_id;
126 126 static zoneid_t zone_did = 0;
127 127 dladm_handle_t dld_handle = NULL;
128 128
129 129 char pre_statechg_hook[2 * MAXPATHLEN];
130 130 char post_statechg_hook[2 * MAXPATHLEN];
131 131 char query_hook[2 * MAXPATHLEN];
132 132
133 133 zlog_t logsys; /* log to syslog */
134 134 zlog_t logplat; /* log to platform.log */
135 135
136 136 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
137 137 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
138 138
139 139 static sema_t scratch_sem; /* for scratch zones */
140 140
141 141 static char zone_door_path[MAXPATHLEN];
142 142 static int zone_door = -1;
143 143
144 144 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
145 145 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
146 146
147 147 static int platloghdl = -1; /* Handle for <zonepath>/logs/platform.log */
148 148
149 149 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
150 150 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
151 151 #endif
152 152
153 153 #define DEFAULT_LOCALE "C"
154 154
155 155 #define RSRC_NET "net"
156 156 #define RSRC_DEV "device"
157 157
158 158 static const char *
159 159 z_cmd_name(zone_cmd_t zcmd)
160 160 {
161 161 /* This list needs to match the enum in sys/zone.h */
162 162 static const char *zcmdstr[] = {
163 163 "ready", "boot", "forceboot", "reboot", "halt",
164 164 "note_uninstalling", "mount", "forcemount", "unmount",
165 165 "shutdown"
166 166 };
167 167
168 168 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
169 169 return ("unknown");
170 170 else
171 171 return (zcmdstr[(int)zcmd]);
172 172 }
173 173
174 174 static char *
175 175 get_execbasename(char *execfullname)
176 176 {
177 177 char *last_slash, *execbasename;
178 178
179 179 /* guard against '/' at end of command invocation */
180 180 for (;;) {
181 181 last_slash = strrchr(execfullname, '/');
182 182 if (last_slash == NULL) {
183 183 execbasename = execfullname;
184 184 break;
185 185 } else {
186 186 execbasename = last_slash + 1;
187 187 if (*execbasename == '\0') {
188 188 *last_slash = '\0';
189 189 continue;
190 190 }
191 191 break;
192 192 }
193 193 }
194 194 return (execbasename);
195 195 }
196 196
197 197 static void
198 198 usage(void)
199 199 {
200 200 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
201 201 (void) fprintf(stderr,
202 202 gettext("\tNote: %s should not be run directly.\n"), progname);
203 203 exit(2);
204 204 }
205 205
206 206 /* ARGSUSED */
207 207 static void
208 208 sigchld(int sig)
209 209 {
210 210 }
211 211
212 212 char *
213 213 localize_msg(char *locale, const char *msg)
214 214 {
215 215 char *out;
216 216
217 217 (void) mutex_lock(&msglock);
218 218 (void) setlocale(LC_MESSAGES, locale);
219 219 out = gettext(msg);
220 220 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
221 221 (void) mutex_unlock(&msglock);
222 222 return (out);
223 223 }
224 224
225 225 /* PRINTFLIKE3 */
226 226 void
227 227 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
228 228 {
229 229 va_list alist;
230 230 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
231 231 char *bp, *bp_nozone;
232 232 int saved_errno = errno;
233 233
234 234 if (zlogp == &logsys)
235 235 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", zone_name);
236 236 else
237 237 buf[0] = '\0';
238 238 bp = bp_nozone = &(buf[strlen(buf)]);
239 239
240 240 /*
241 241 * In theory, the locale pointer should be set to either "C" or a
242 242 * char array, so it should never be NULL
243 243 */
244 244 assert(zlogp->locale != NULL);
245 245 /* Locale is per process, but we are multi-threaded... */
246 246 fmt = localize_msg(zlogp->locale, fmt);
247 247
248 248 va_start(alist, fmt);
249 249 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
250 250 va_end(alist);
251 251 bp = &(buf[strlen(buf)]);
252 252 if (use_strerror)
253 253 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
254 254 strerror(saved_errno));
255 255
256 256 (void) strlcat(buf, "\n", sizeof (buf));
257 257
258 258 /*
259 259 * If we don't have the platform log, we are in a child process, and
260 260 * should log to stderr (which is a pipe) instead of the file.
261 261 */
262 262 if (logging_poisoned) {
263 263 (void) fprintf(stderr, "%s", buf);
264 264
265 265 if (zlogp != &logsys && zlogp->logfile == stderr)
266 266 return;
267 267 } else {
268 268 logstream_write(platloghdl, bp_nozone, strlen(bp_nozone));
269 269
270 270 if (zlogp == &logplat)
271 271 return;
272 272 }
273 273
274 274 if (zlogp == &logsys) {
275 275 bp = strrchr(buf, '\n');
276 276 if (bp != NULL && bp[1] == '\0') {
277 277 *bp = '\0';
278 278 }
279 279 (void) syslog(LOG_ERR, "%s", buf);
280 280 } else if (zlogp->logfile != NULL) {
281 281 (void) fprintf(zlogp->logfile, "%s", buf);
282 282 } else {
283 283 size_t buflen;
284 284 size_t copylen;
285 285
286 286 buflen = snprintf(zlogp->log, zlogp->loglen, "%s", buf);
287 287 copylen = MIN(buflen, zlogp->loglen);
288 288 zlogp->log += copylen;
289 289 zlogp->loglen -= copylen;
290 290 }
291 291 }
292 292
293 293 /*
294 294 * Append src to dest, modifying dest in the process. Prefix src with
295 295 * a space character if dest is a non-empty string. Assumes dest is already
296 296 * properly \0-terminated OR overruns destsize.
297 297 */
298 298 static void
299 299 strnappend(char *dest, size_t destsize, const char *src)
300 300 {
301 301 size_t startpoint = strnlen(dest, destsize);
302 302
303 303 if (startpoint >= destsize - 1) {
304 304 /* We've run out of room. Record something?! */
305 305 return;
306 306 }
307 307
308 308 if (startpoint > 0) {
309 309 /* Add the space per the function's intro comment. */
310 310 dest[startpoint] = ' ';
311 311 startpoint++;
312 312 }
313 313
314 314 /* Arguably we should check here too... */
315 315 (void) strlcpy(dest + startpoint, src, destsize - startpoint);
316 316 }
317 317
318 318 /*
319 319 * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
320 320 * put the arguments into an argv style array, use getopt to process them,
321 321 * and put the resultant argument string back into outargs. Non-native brands
322 322 * may support alternate forms of boot arguments so we must handle that as well.
323 323 *
324 324 * During the filtering, we pull out any arguments which are truly "boot"
325 325 * arguments, leaving only those which are to be passed intact to the
326 326 * progenitor process. The one we support at the moment is -i, which
327 327 * indicates to the kernel which program should be launched as 'init'.
328 328 *
329 329 * Except for Z_OK, all other return values are treated as fatal.
330 330 */
331 331 static int
332 332 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
333 333 char *init_file)
334 334 {
335 335 int argc = 0, argc_save;
336 336 int i;
337 337 int err = Z_OK;
338 338 char *arg, *lasts, **argv = NULL, **argv_save;
339 339 char zonecfg_args[BOOTARGS_MAX];
340 340 char scratchargs[BOOTARGS_MAX], *sargs;
341 341 char scratchopt[3];
342 342 char c;
343 343
344 344 bzero(outargs, BOOTARGS_MAX);
345 345
346 346 /*
347 347 * If the user didn't specify transient boot arguments, check
348 348 * to see if there were any specified in the zone configuration,
349 349 * and use them if applicable.
350 350 */
351 351 if (inargs == NULL || inargs[0] == '\0') {
352 352 bzero(zonecfg_args, sizeof (zonecfg_args));
353 353 (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
354 354 sizeof (zonecfg_args));
355 355 inargs = zonecfg_args;
356 356 }
357 357
358 358 if (strlen(inargs) >= BOOTARGS_MAX) {
359 359 zerror(zlogp, B_FALSE, "boot argument string too long");
360 360 return (Z_INVAL);
361 361 }
362 362
363 363 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
364 364 sargs = scratchargs;
365 365 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
366 366 sargs = NULL;
367 367 argc++;
368 368 }
369 369
370 370 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
371 371 zerror(zlogp, B_FALSE, "memory allocation failed");
372 372 return (Z_NOMEM);
373 373 }
374 374
375 375 argv_save = argv;
376 376 argc_save = argc;
377 377
378 378 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
379 379 sargs = scratchargs;
380 380 i = 0;
381 381 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
382 382 sargs = NULL;
383 383 if ((argv[i] = strdup(arg)) == NULL) {
384 384 err = Z_NOMEM;
385 385 zerror(zlogp, B_FALSE, "memory allocation failed");
386 386 goto done;
387 387 }
388 388 i++;
389 389 }
390 390
391 391 /*
392 392 * We preserve compatibility with the illumos system boot behavior,
393 393 * which allows:
394 394 *
395 395 * # reboot kernel/unix -s -m verbose
396 396 *
397 397 * In this example, kernel/unix tells the booter what file to boot. The
398 398 * original intent of this was that we didn't want reboot in a zone to
399 399 * be gratuitously different, so we would silently ignore the boot
400 400 * file, if necessary. However, this usage is archaic and has never
401 401 * been common, since it is impossible to boot a zone onto a different
402 402 * kernel. Ignoring the first argument breaks for non-native brands
403 403 * which pass boot arguments in a different style. e.g.
404 404 * systemd.log_level=debug
405 405 * Thus, for backward compatibility we only ignore the first argument
406 406 * if it appears to be in the illumos form and attempting to specify a
407 407 * kernel.
408 408 */
409 409 if (argv[0] == NULL)
410 410 goto done;
411 411
412 412 assert(argv[0][0] != ' ');
413 413 assert(argv[0][0] != '\t');
414 414
415 415 if (strncmp(argv[0], "kernel/", 7) == 0) {
416 416 argv = &argv[1];
417 417 argc--;
418 418 }
419 419
420 420 optind = 0;
421 421 opterr = 0;
422 422 err = Z_OK;
423 423 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
424 424 switch (c) {
425 425 case 'i':
426 426 /*
427 427 * -i is handled by the runtime and is not passed
428 428 * along to userland
429 429 */
430 430 (void) strlcpy(init_file, optarg, MAXPATHLEN);
431 431 break;
432 432 case 'f':
433 433 /* This has already been processed by zoneadm */
434 434 break;
435 435 case 'm':
436 436 case 's':
437 437 /* These pass through unmolested */
438 438 (void) snprintf(scratchopt, sizeof (scratchopt),
439 439 "-%c", c);
440 440 strnappend(outargs, BOOTARGS_MAX, scratchopt);
441 441 if (optarg != NULL)
442 442 strnappend(outargs, BOOTARGS_MAX, optarg);
443 443 break;
444 444 case '?':
445 445 /*
446 446 * If a brand has its own init, we need to pass along
447 447 * whatever the user provides. We use the entire
448 448 * unknown string here so that we correctly handle
449 449 * unknown long options (e.g. --debug).
450 450 */
451 451 strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]);
452 452 break;
453 453 }
454 454 }
455 455
456 456 /*
457 457 * We need to pass along everything else since we don't know what
458 458 * the brand's init is expecting. For example, an argument list like:
459 459 * --confdir /foo --debug
460 460 * will cause the getopt parsing to stop at '/foo' but we need to pass
461 461 * that on, along with the '--debug'. This does mean that we require
462 462 * any of our known options (-ifms) to preceed the brand-specific ones.
463 463 */
464 464 while (optind < argc) {
465 465 strnappend(outargs, BOOTARGS_MAX, argv[optind]);
466 466 optind++;
467 467 }
468 468
469 469 done:
470 470 for (i = 0; i < argc_save; i++) {
471 471 if (argv_save[i] != NULL)
472 472 free(argv_save[i]);
473 473 }
474 474 free(argv_save);
475 475 return (err);
476 476 }
477 477
478 478
479 479 static int
480 480 mkzonedir(zlog_t *zlogp)
481 481 {
482 482 struct stat st;
483 483 /*
484 484 * We must create and lock everyone but root out of ZONES_TMPDIR
485 485 * since anyone can open any UNIX domain socket, regardless of
486 486 * its file system permissions. Sigh...
487 487 */
488 488 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
489 489 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
490 490 return (-1);
491 491 }
492 492 /* paranoia */
493 493 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
494 494 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
495 495 return (-1);
496 496 }
497 497 (void) chmod(ZONES_TMPDIR, S_IRWXU);
498 498 return (0);
499 499 }
500 500
501 501 /*
502 502 * Run the brand's pre-state change callback, if it exists.
503 503 */
504 504 static int
505 505 brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
506 506 {
507 507 char cmdbuf[2 * MAXPATHLEN];
508 508 const char *altroot;
509 509
510 510 if (pre_statechg_hook[0] == '\0')
511 511 return (0);
512 512
513 513 altroot = zonecfg_get_root();
514 514 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
515 515 state, cmd, altroot) > sizeof (cmdbuf))
516 516 return (-1);
517 517
518 518 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
519 519 return (-1);
520 520
521 521 return (0);
522 522 }
523 523
524 524 /*
525 525 * Run the brand's post-state change callback, if it exists.
526 526 */
527 527 static int
528 528 brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
529 529 {
530 530 char cmdbuf[2 * MAXPATHLEN];
531 531 const char *altroot;
532 532
533 533 if (post_statechg_hook[0] == '\0')
534 534 return (0);
535 535
536 536 altroot = zonecfg_get_root();
537 537 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
538 538 state, cmd, altroot) > sizeof (cmdbuf))
539 539 return (-1);
540 540
541 541 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
542 542 return (-1);
543 543
544 544 return (0);
545 545 }
546 546
547 547 /*
548 548 * Notify zonestatd of the new zone. If zonestatd is not running, this
549 549 * will do nothing.
550 550 */
551 551 static void
552 552 notify_zonestatd(zoneid_t zoneid)
553 553 {
554 554 int cmd[2];
555 555 int fd;
556 556 door_arg_t params;
557 557
558 558 fd = open(ZS_DOOR_PATH, O_RDONLY);
559 559 if (fd < 0)
560 560 return;
561 561
562 562 cmd[0] = ZSD_CMD_NEW_ZONE;
563 563 cmd[1] = zoneid;
564 564 params.data_ptr = (char *)&cmd;
565 565 params.data_size = sizeof (cmd);
566 566 params.desc_ptr = NULL;
567 567 params.desc_num = 0;
568 568 params.rbuf = NULL;
569 569 params.rsize = 0;
570 570 (void) door_call(fd, ¶ms);
571 571 (void) close(fd);
572 572 }
573 573
574 574 /*
575 575 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
576 576 * 'true' if this is being invoked as part of the processing for the "mount"
577 577 * subcommand.
578 578 *
579 579 * If a scratch zone mount (ALT_MOUNT) is being performed then do not
580 580 * call the state change hooks.
581 581 */
582 582 static int
583 583 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
584 584 {
585 585 int err;
586 586 boolean_t snapped = B_FALSE;
587 587
588 588 if ((snap_hndl = zonecfg_init_handle()) == NULL) {
589 589 zerror(zlogp, B_TRUE, "getting zone configuration handle");
590 590 goto bad;
591 591 }
592 592 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
593 593 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
594 594 zonecfg_strerror(err));
595 595 goto bad;
596 596 }
597 597 snapped = B_TRUE;
598 598
599 599 if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
600 600 zerror(zlogp, B_FALSE, "invalid configuration snapshot");
601 601 goto bad;
602 602 }
603 603
604 604 if (zone_did == 0)
605 605 zone_did = zone_get_did(zone_name);
606 606
607 607 if (!ALT_MOUNT(mount_cmd) &&
608 608 brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
609 609 goto bad;
610 610
611 611 if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
612 612 goto bad;
613 613
614 614 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
615 615 bringup_failure_recovery = B_TRUE;
616 616 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
617 617 debug);
618 618 goto bad;
619 619 }
620 620
621 621 if (!ALT_MOUNT(mount_cmd) &&
622 622 brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
623 623 goto bad;
624 624
625 625 return (0);
626 626
627 627 bad:
628 628 /*
629 629 * If something goes wrong, we up the zones's state to the target
630 630 * state, READY, and then invoke the hook as if we're halting.
631 631 */
632 632 if (!ALT_MOUNT(mount_cmd))
633 633 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT,
634 634 debug);
635 635
636 636 if (snapped)
637 637 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
638 638 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
639 639 zonecfg_strerror(err));
640 640 zonecfg_fini_handle(snap_hndl);
641 641 snap_hndl = NULL;
642 642 return (-1);
643 643 }
644 644
645 645 int
646 646 init_template(void)
647 647 {
648 648 int fd;
649 649 int err = 0;
650 650
651 651 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
652 652 if (fd == -1)
653 653 return (-1);
654 654
655 655 /*
656 656 * For now, zoneadmd doesn't do anything with the contract.
657 657 * Deliver no events, don't inherit, and allow it to be orphaned.
658 658 */
659 659 err |= ct_tmpl_set_critical(fd, 0);
660 660 err |= ct_tmpl_set_informative(fd, 0);
661 661 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
662 662 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
663 663 if (err || ct_tmpl_activate(fd)) {
664 664 (void) close(fd);
665 665 return (-1);
666 666 }
667 667
668 668 return (fd);
669 669 }
670 670
671 671 typedef struct fs_callback {
672 672 zlog_t *zlogp;
673 673 zoneid_t zoneid;
674 674 boolean_t mount_cmd;
675 675 } fs_callback_t;
676 676
677 677 static int
678 678 mount_early_fs(void *data, const char *spec, const char *dir,
679 679 const char *fstype, const char *opt)
680 680 {
681 681 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
682 682 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
683 683 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
684 684 char rootpath[MAXPATHLEN];
685 685 pid_t child;
686 686 int child_status;
687 687 int tmpl_fd;
688 688 int rv;
689 689 ctid_t ct;
690 690
691 691 /* determine the zone rootpath */
692 692 if (mount_cmd) {
693 693 char luroot[MAXPATHLEN];
694 694
695 695 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
696 696 resolve_lofs(zlogp, luroot, sizeof (luroot));
697 697 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
698 698 } else {
699 699 if (zone_get_rootpath(zone_name,
700 700 rootpath, sizeof (rootpath)) != Z_OK) {
701 701 zerror(zlogp, B_FALSE, "unable to determine zone root");
702 702 return (-1);
703 703 }
704 704 }
705 705
706 706 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
707 707 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
708 708 rootpath, dir);
709 709 return (-1);
710 710 } else if (rv > 0) {
711 711 /* The mount point path doesn't exist, create it now. */
712 712 if (make_one_dir(zlogp, rootpath, dir,
713 713 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
714 714 DEFAULT_DIR_GROUP) != 0) {
715 715 zerror(zlogp, B_FALSE, "failed to create mount point");
716 716 return (-1);
717 717 }
718 718
719 719 /*
720 720 * Now this might seem weird, but we need to invoke
721 721 * valid_mount_path() again. Why? Because it checks
722 722 * to make sure that the mount point path is canonical,
723 723 * which it can only do if the path exists, so now that
724 724 * we've created the path we have to verify it again.
725 725 */
726 726 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
727 727 fstype)) < 0) {
728 728 zerror(zlogp, B_FALSE,
729 729 "%s%s is not a valid mount point", rootpath, dir);
730 730 return (-1);
731 731 }
732 732 }
733 733
734 734 if ((tmpl_fd = init_template()) == -1) {
735 735 zerror(zlogp, B_TRUE, "failed to create contract");
736 736 return (-1);
737 737 }
738 738
739 739 if ((child = fork()) == -1) {
740 740 (void) ct_tmpl_clear(tmpl_fd);
741 741 (void) close(tmpl_fd);
742 742 zerror(zlogp, B_TRUE, "failed to fork");
743 743 return (-1);
744 744
745 745 } else if (child == 0) { /* child */
746 746 char opt_buf[MAX_MNTOPT_STR];
747 747 int optlen = 0;
748 748 int mflag = MS_DATA;
749 749 int i;
750 750 int ret;
751 751
752 752 (void) ct_tmpl_clear(tmpl_fd);
753 753 /*
754 754 * Even though there are no procs running in the zone, we
755 755 * do this for paranoia's sake.
756 756 */
757 757 (void) closefrom(0);
758 758
759 759 if (zone_enter(zoneid) == -1) {
760 760 _exit(errno);
761 761 }
762 762 if (opt != NULL) {
763 763 /*
764 764 * The mount() system call is incredibly annoying.
765 765 * If options are specified, we need to copy them
766 766 * into a temporary buffer since the mount() system
767 767 * call will overwrite the options string. It will
768 768 * also fail if the new option string it wants to
769 769 * write is bigger than the one we passed in, so
770 770 * you must pass in a buffer of the maximum possible
771 771 * option string length. sigh.
772 772 */
773 773 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
774 774 opt = opt_buf;
775 775 optlen = MAX_MNTOPT_STR;
776 776 mflag = MS_OPTIONSTR;
777 777 }
778 778
779 779 /*
780 780 * There is an obscure race condition which can cause mount
781 781 * to return EBUSY. This happens for example on the mount
782 782 * of the zone's /etc/svc/volatile file system if there is
783 783 * a GZ process running svcs -Z, which will touch the
784 784 * mountpoint, just as we're trying to do the mount. To cope
785 785 * with this, we retry up to 3 times to let this transient
786 786 * process get out of the way.
787 787 */
788 788 for (i = 0; i < 3; i++) {
789 789 ret = 0;
790 790 if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
791 791 optlen) != 0)
792 792 ret = errno;
793 793 if (ret != EBUSY)
794 794 break;
795 795 (void) sleep(1);
796 796 }
797 797 _exit(ret);
798 798 }
799 799
800 800 /* parent */
801 801 if (contract_latest(&ct) == -1)
802 802 ct = -1;
803 803 (void) ct_tmpl_clear(tmpl_fd);
804 804 (void) close(tmpl_fd);
805 805 if (waitpid(child, &child_status, 0) != child) {
806 806 /* unexpected: we must have been signalled */
807 807 (void) contract_abandon_id(ct);
808 808 return (-1);
809 809 }
810 810 (void) contract_abandon_id(ct);
811 811 if (WEXITSTATUS(child_status) != 0) {
812 812 errno = WEXITSTATUS(child_status);
813 813 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
814 814 return (-1);
815 815 }
816 816
817 817 return (0);
818 818 }
819 819
820 820 /*
821 821 * Replace characters other than [A-Za-z0-9_] with '_' so that the string is a
822 822 * valid environment variable name.
823 823 */
824 824 static void
825 825 sanitize_env_var_name(char *var)
826 826 {
827 827 for (char *p = var; *p != '\0'; p++) {
828 828 if (!isalnum(*p)) {
829 829 *p = '_';
830 830 }
831 831 }
832 832 }
833 833
834 834 /*
835 835 * env variable name format
836 836 * _ZONECFG_{resource name}_{identifying attr. name}_{property name}
837 837 * Any dashes (-) in the property names are replaced with underscore (_).
838 838 */
839 839 static void
840 840 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
841 841 {
842 842 /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
843 843 char nm[2 * MAXNAMELEN + 32];
844 844
845 845 if (attr == NULL)
846 846 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
847 847 name);
848 848 else
849 849 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
850 850 attr, name);
851 851
852 852 sanitize_env_var_name(nm);
853 853
854 854 (void) setenv(nm, val, 1);
855 855 }
856 856
857 857 /*
858 858 * Resolve a device:match value to a path. This is only different for PPT
859 859 * devices, where we expect the match property to be a /devices/... path, and
860 860 * configured for PPT already.
861 861 */
862 862 int
863 863 resolve_device_match(zlog_t *zlogp, struct zone_devtab *dtab,
864 864 char *path, size_t len)
865 865 {
866 866 struct zone_res_attrtab *rap;
867 867
868 868 for (rap = dtab->zone_dev_attrp; rap != NULL;
869 869 rap = rap->zone_res_attr_next) {
870 870 if (strcmp(rap->zone_res_attr_name, "model") == 0 &&
871 871 strcmp(rap->zone_res_attr_value, "passthru") == 0)
872 872 break;
873 873 }
874 874
875 875 if (rap == NULL) {
876 876 if (strlcpy(path, dtab->zone_dev_match, len) >= len)
877 877 return (Z_INVAL);
878 878 return (Z_OK);
879 879 }
880 880
881 881 if (strncmp(dtab->zone_dev_match, "/devices",
882 882 strlen("/devices")) != 0) {
883 883 zerror(zlogp, B_FALSE, "invalid passthru match value '%s'",
884 884 dtab->zone_dev_match);
885 885 return (Z_INVAL);
886 886 }
887 887
888 888 if (ppt_devpath_to_dev(dtab->zone_dev_match, path, len) != 0) {
889 889 zerror(zlogp, B_TRUE, "failed to resolve passthru device %s",
890 890 dtab->zone_dev_match);
891 891 return (Z_INVAL);
892 892 }
893 893
894 894 return (Z_OK);
895 895 }
896 896
897 897 /*
898 898 * Export various zonecfg properties into environment for the boot and state
899 899 * change hooks.
900 900 *
901 901 * If debug is true, _ZONEADMD_brand_debug is set to 1, else it is set to an
902 902 * empty string. Brand hooks consider any non-empty string as an indication
903 903 * that debug output is requested.
904 904 *
905 905 * We could export more of the config in the future, as necessary. A better
906 906 * solution would be to make it so brand-specific behavior is handled by
907 907 * brand-specific callbacks written in C. Then the normal libzonecfg interfaces
908 908 * can be used for accessing any parts of the configuration that are needed.
909 909 *
910 910 * All of the environment variables set by this function are specific to
911 911 * SmartOS.
912 912 */
913 913 static int
914 914 setup_subproc_env(zlog_t *zlogp, boolean_t debug)
915 915 {
916 916 int res;
917 917 struct zone_nwiftab ntab;
918 918 struct zone_devtab dtab;
919 919 struct zone_attrtab atab;
920 920 char net_resources[MAXNAMELEN * 2];
921 921 char dev_resources[MAXNAMELEN * 2];
922 922 char didstr[16];
923 923 char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
924 924 uuid_t uuid;
925 925
926 926 /* snap_hndl is null when called through the set_brand_env code path */
927 927 if (snap_hndl == NULL)
928 928 return (Z_OK);
929 929
930 930 if ((res = zonecfg_get_uuid(zone_name, uuid)) != Z_OK)
931 931 return (res);
932 932
933 933 uuid_unparse(uuid, uuidstr);
934 934 (void) setenv("_ZONECFG_uuid", uuidstr, 1);
935 935
936 936 (void) snprintf(didstr, sizeof (didstr), "%d", zone_did);
937 937 (void) setenv("_ZONECFG_did", didstr, 1);
938 938
939 939 /*
940 940 * "net" resources are exported because zoneadmd does not handle
941 941 * automatic configuration of vnics and so that the bhyve boot hook
942 942 * can generate the argument list for the brand's init program. At such
943 943 * a time as vnic creation is handled in zoneadmd and brand callbacks
944 944 * can be executed as part of the zoneadmd process this should be
945 945 * removed.
946 946 */
947 947 net_resources[0] = '\0';
948 948 if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
949 949 goto done;
950 950
951 951 while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
952 952 struct zone_res_attrtab *rap;
953 953 char *phys;
954 954
955 955 phys = ntab.zone_nwif_physical;
956 956
957 957 (void) strlcat(net_resources, phys, sizeof (net_resources));
958 958 (void) strlcat(net_resources, " ", sizeof (net_resources));
959 959
960 960 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
961 961
962 962 set_zonecfg_env(RSRC_NET, phys, "address",
963 963 ntab.zone_nwif_address);
964 964 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
965 965 ntab.zone_nwif_allowed_address);
966 966 set_zonecfg_env(RSRC_NET, phys, "defrouter",
967 967 ntab.zone_nwif_defrouter);
968 968 set_zonecfg_env(RSRC_NET, phys, "global-nic",
969 969 ntab.zone_nwif_gnic);
970 970 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
971 971 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
972 972 ntab.zone_nwif_vlan_id);
973 973
974 974 for (rap = ntab.zone_nwif_attrp; rap != NULL;
975 975 rap = rap->zone_res_attr_next)
976 976 set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
977 977 rap->zone_res_attr_value);
978 978 nwifent_free_attrs(&ntab);
979 979 }
980 980
981 981 (void) setenv("_ZONECFG_net_resources", net_resources, 1);
982 982
983 983 (void) zonecfg_endnwifent(snap_hndl);
984 984
985 985 /*
986 986 * "device" resources are exported because the bhyve boot brand callback
987 987 * needs them to generate the argument list for the brand's init
988 988 * program. At such a time as brand callbacks can be executed as part
989 989 * of the zoneadmd process, this should be removed.
990 990 *
991 991 * The bhyve brand only supports disk-like and ppt devices and does not
992 992 * support regular expressions.
993 993 */
994 994 if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
995 995 goto done;
996 996
997 997 dev_resources[0] = '\0';
998 998 while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
999 999 char *match = dtab.zone_dev_match;
1000 1000 struct zone_res_attrtab *rap;
1001 1001 char path[MAXPATHLEN];
1002 1002
1003 1003 res = resolve_device_match(zlogp, &dtab, path, sizeof (path));
1004 1004 if (res != Z_OK)
1005 1005 goto done;
1006 1006
1007 1007 /*
1008 1008 * Even if not modified, the match path will be mangled in the
1009 1009 * environment variable name, so we always store the value here.
1010 1010 */
1011 1011 set_zonecfg_env(RSRC_DEV, match, "path", path);
1012 1012
1013 1013 for (rap = dtab.zone_dev_attrp; rap != NULL;
1014 1014 rap = rap->zone_res_attr_next) {
1015 1015 set_zonecfg_env(RSRC_DEV, match,
1016 1016 rap->zone_res_attr_name, rap->zone_res_attr_value);
1017 1017 }
1018 1018
1019 1019 /*
1020 1020 * _ZONECFG_device_resources will contain a space separated list
1021 1021 * of devices that have _ZONECFG_device_<device>* environment
1022 1022 * variables. So that each element of the list matches up with
1023 1023 * <device>, each list item needs to be sanitized in the same
1024 1024 * way that environment variable names are sanitized.
1025 1025 */
1026 1026 sanitize_env_var_name(match);
1027 1027 (void) strlcat(dev_resources, match, sizeof (dev_resources));
1028 1028 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
1029 1029 }
1030 1030 (void) zonecfg_enddevent(snap_hndl);
1031 1031
1032 1032 (void) setenv("_ZONECFG_device_resources", dev_resources, 1);
1033 1033
1034 1034 /*
1035 1035 * "attr" resources are exported because the bhyve brand's boot hook
1036 1036 * needs access to the "ram", "cpu", "bootrom", etc. to form the
1037 1037 * argument list for the brand's init program. Once the bhyve brand is
1038 1038 * configured via proper resources and properties, this should be
1039 1039 * removed.
1040 1040 */
1041 1041 if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
1042 1042 goto done;
1043 1043
1044 1044 while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
1045 1045 set_zonecfg_env("attr", NULL, atab.zone_attr_name,
1046 1046 atab.zone_attr_value);
1047 1047 }
1048 1048
1049 1049 (void) zonecfg_endattrent(snap_hndl);
1050 1050
1051 1051 if (debug)
1052 1052 (void) setenv("_ZONEADMD_brand_debug", "1", 1);
1053 1053 else
1054 1054 (void) setenv("_ZONEADMD_brand_debug", "", 1);
1055 1055
1056 1056 res = Z_OK;
1057 1057
1058 1058 done:
1059 1059 return (res);
1060 1060 }
1061 1061
1062 1062 void
1063 1063 nwifent_free_attrs(struct zone_nwiftab *np)
1064 1064 {
1065 1065 struct zone_res_attrtab *rap;
1066 1066
1067 1067 for (rap = np->zone_nwif_attrp; rap != NULL; ) {
1068 1068 struct zone_res_attrtab *tp = rap;
1069 1069
1070 1070 rap = rap->zone_res_attr_next;
1071 1071 free(tp);
1072 1072 }
1073 1073 }
1074 1074
1075 1075 /*
1076 1076 * If retstr is not NULL, the output of the subproc is returned in the str,
1077 1077 * otherwise it is output using zerror(). Any memory allocated for retstr
1078 1078 * should be freed by the caller.
1079 1079 */
1080 1080 int
1081 1081 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
1082 1082 {
1083 1083 char buf[1024]; /* arbitrary large amount */
1084 1084 char *inbuf;
1085 1085 FILE *file;
1086 1086 int status;
1087 1087 int rd_cnt;
1088 1088 int fds[2];
1089 1089 pid_t child;
1090 1090
1091 1091 if (retstr != NULL) {
1092 1092 if ((*retstr = malloc(1024)) == NULL) {
1093 1093 zerror(zlogp, B_FALSE, "out of memory");
1094 1094 return (-1);
1095 1095 }
1096 1096 inbuf = *retstr;
1097 1097 rd_cnt = 0;
1098 1098 } else {
1099 1099 inbuf = buf;
1100 1100 }
1101 1101
1102 1102 if (pipe(fds) != 0) {
1103 1103 zerror(zlogp, B_TRUE, "failed to create pipe for subprocess");
1104 1104 return (-1);
1105 1105 }
1106 1106
1107 1107 if ((child = fork()) == 0) {
1108 1108 int in;
1109 1109
1110 1110 /*
1111 1111 * SIGINT is currently ignored. It probably shouldn't be so
1112 1112 * hard to kill errant children, so we revert to SIG_DFL.
1113 1113 * SIGHUP and SIGUSR1 are used to perform log rotation. We
1114 1114 * leave those as-is because we don't want a 'pkill -HUP
1115 1115 * zoneadmd' to kill this child process before exec(). On
1116 1116 * exec(), SIGHUP and SIGUSR1 will become SIG_DFL.
1117 1117 */
1118 1118 (void) sigset(SIGINT, SIG_DFL);
1119 1119
1120 1120 /*
1121 1121 * Set up a pipe for the child to log to.
1122 1122 */
1123 1123 if (dup2(fds[1], STDERR_FILENO) == -1) {
1124 1124 (void) snprintf(buf, sizeof (buf),
1125 1125 "subprocess failed to dup2(STDERR_FILENO): %s\n",
1126 1126 strerror(errno));
1127 1127 (void) write(fds[1], buf, strlen(buf));
1128 1128 _exit(127);
1129 1129 }
1130 1130 if (dup2(fds[1], STDOUT_FILENO) == -1) {
1131 1131 perror("subprocess failed to dup2(STDOUT_FILENO)");
1132 1132 _exit(127);
1133 1133 }
1134 1134 /*
1135 1135 * Some naughty children may try to read from stdin. Be sure
1136 1136 * that the first file that a child opens doesn't get stdin's
1137 1137 * file descriptor.
1138 1138 */
1139 1139 if ((in = open("/dev/null", O_RDONLY)) == -1 ||
1140 1140 dup2(in, STDIN_FILENO) == -1) {
1141 1141 zerror(zlogp, B_TRUE,
1142 1142 "subprocess failed to set up STDIN_FILENO");
1143 1143 _exit(127);
1144 1144 }
1145 1145 closefrom(STDERR_FILENO + 1);
1146 1146
1147 1147 if (setup_subproc_env(zlogp, debug) != Z_OK) {
1148 1148 zerror(zlogp, B_FALSE, "failed to setup environment");
1149 1149 _exit(127);
1150 1150 }
1151 1151
1152 1152 (void) execl("/bin/sh", "sh", "-c", cmdbuf, NULL);
1153 1153
1154 1154 zerror(zlogp, B_TRUE, "subprocess execl failed");
1155 1155 _exit(127);
1156 1156 } else if (child == -1) {
1157 1157 zerror(zlogp, B_TRUE, "failed to create subprocess for '%s'",
1158 1158 cmdbuf);
1159 1159 (void) close(fds[0]);
1160 1160 (void) close(fds[1]);
1161 1161 return (-1);
1162 1162 }
1163 1163
1164 1164 (void) close(fds[1]);
1165 1165
1166 1166 file = fdopen(fds[0], "r");
1167 1167 while (fgets(inbuf, 1024, file) != NULL) {
1168 1168 if (retstr == NULL) {
1169 1169 if (zlogp != &logsys) {
1170 1170 int last = strlen(inbuf) - 1;
1171 1171
1172 1172 if (inbuf[last] == '\n')
1173 1173 inbuf[last] = '\0';
1174 1174 zerror(zlogp, B_FALSE, "%s", inbuf);
1175 1175 }
1176 1176 } else {
1177 1177 char *p;
1178 1178
1179 1179 rd_cnt += 1024 - 1;
1180 1180 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
1181 1181 zerror(zlogp, B_FALSE, "out of memory");
1182 1182 break;
1183 1183 }
1184 1184
1185 1185 *retstr = p;
1186 1186 inbuf = *retstr + rd_cnt;
1187 1187 }
1188 1188 }
1189 1189
1190 1190 while (fclose(file) != 0) {
1191 1191 assert(errno == EINTR);
1192 1192 }
1193 1193 while (waitpid(child, &status, 0) == -1) {
1194 1194 if (errno != EINTR) {
1195 1195 zerror(zlogp, B_TRUE,
1196 1196 "failed to get exit status of '%s'", cmdbuf);
1197 1197 return (-1);
1198 1198 }
1199 1199 }
1200 1200
1201 1201 if (WIFSIGNALED(status)) {
1202 1202 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
1203 1203 "signal %d", cmdbuf, WTERMSIG(status));
1204 1204 return (-1);
1205 1205 }
1206 1206 assert(WIFEXITED(status));
1207 1207 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
1208 1208 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
1209 1209 return (-1);
1210 1210 }
1211 1211 return (WEXITSTATUS(status));
1212 1212 }
1213 1213
1214 1214 /*
1215 1215 * Get the path for this zone's init(1M) (or equivalent) process. First look
1216 1216 * for a zone-specific init-name attr, then get it from the brand.
1217 1217 */
1218 1218 static int
1219 1219 get_initname(brand_handle_t bh, char *initname, int len)
1220 1220 {
1221 1221 struct zone_attrtab a;
1222 1222
1223 1223 bzero(&a, sizeof (a));
1224 1224 (void) strlcpy(a.zone_attr_name, "init-name",
1225 1225 sizeof (a.zone_attr_name));
1226 1226
1227 1227 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1228 1228 (void) strlcpy(initname, a.zone_attr_value, len);
1229 1229 return (0);
1230 1230 }
1231 1231
1232 1232 return (brand_get_initname(bh, initname, len));
1233 1233 }
1234 1234
1235 1235 /*
1236 1236 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
1237 1237 * First look for a zone-specific restart-init attr, then get it from the brand.
1238 1238 */
1239 1239 static boolean_t
1240 1240 restartinit(brand_handle_t bh)
1241 1241 {
1242 1242 struct zone_attrtab a;
1243 1243
1244 1244 bzero(&a, sizeof (a));
1245 1245 (void) strlcpy(a.zone_attr_name, "restart-init",
1246 1246 sizeof (a.zone_attr_name));
1247 1247
1248 1248 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1249 1249 if (strcmp(a.zone_attr_value, "false") == 0)
1250 1250 return (B_FALSE);
1251 1251 return (B_TRUE);
1252 1252 }
1253 1253
1254 1254 return (brand_restartinit(bh));
1255 1255 }
1256 1256
1257 1257 /*
1258 1258 * Get the app-svc-dependent flag for this zone's init process. This is a
1259 1259 * zone-specific attr which controls the type of contract we create for the
1260 1260 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
1261 1261 * set, so that when any service which is in the same contract exits, the init
1262 1262 * application will be terminated.
1263 1263 */
1264 1264 static boolean_t
1265 1265 is_app_svc_dep(void)
1266 1266 {
1267 1267 struct zone_attrtab a;
1268 1268
1269 1269 bzero(&a, sizeof (a));
1270 1270 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
1271 1271 sizeof (a.zone_attr_name));
1272 1272
1273 1273 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
1274 1274 strcmp(a.zone_attr_value, "true") == 0) {
1275 1275 return (B_TRUE);
1276 1276 }
1277 1277
1278 1278 return (B_FALSE);
1279 1279 }
1280 1280
1281 1281 static int
1282 1282 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
1283 1283 {
1284 1284 zoneid_t zoneid;
|
↓ open down ↓ |
1284 lines elided |
↑ open up ↑ |
1285 1285 struct stat st;
1286 1286 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
1287 1287 char nbootargs[BOOTARGS_MAX];
1288 1288 char cmdbuf[MAXPATHLEN];
1289 1289 fs_callback_t cb;
1290 1290 brand_handle_t bh;
1291 1291 zone_iptype_t iptype;
1292 1292 dladm_status_t status;
1293 1293 char errmsg[DLADM_STRSIZE];
1294 1294 int err;
1295 - boolean_t restart_init;
1296 1295 boolean_t app_svc_dep;
1296 + boolean_t restart_init, restart_init0, restart_initreboot;
1297 1297
1298 1298 if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1299 1299 return (-1);
1300 1300
1301 1301 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1302 1302 zerror(zlogp, B_TRUE, "unable to get zoneid");
1303 1303 goto bad;
1304 1304 }
1305 1305
1306 1306 cb.zlogp = zlogp;
1307 1307 cb.zoneid = zoneid;
1308 1308 cb.mount_cmd = B_FALSE;
1309 1309
1310 1310 /* Get a handle to the brand info for this zone */
1311 1311 if ((bh = brand_open(brand_name)) == NULL) {
1312 1312 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1313 1313 goto bad;
1314 1314 }
1315 1315
1316 1316 /*
1317 1317 * Get the list of filesystems to mount from the brand
1318 1318 * configuration. These mounts are done via a thread that will
1319 1319 * enter the zone, so they are done from within the context of the
1320 1320 * zone.
1321 1321 */
1322 1322 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1323 1323 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1324 1324 brand_close(bh);
1325 1325 goto bad;
1326 1326 }
1327 1327
1328 1328 /*
1329 1329 * Get the brand's boot callback if it exists.
1330 1330 */
1331 1331 (void) strcpy(cmdbuf, EXEC_PREFIX);
1332 1332 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1333 1333 sizeof (cmdbuf) - EXEC_LEN) != 0) {
1334 1334 zerror(zlogp, B_FALSE,
1335 1335 "unable to determine branded zone's boot callback");
1336 1336 brand_close(bh);
1337 1337 goto bad;
|
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
1338 1338 }
1339 1339
1340 1340 /* Get the path for this zone's init(1M) (or equivalent) process. */
1341 1341 if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
1342 1342 zerror(zlogp, B_FALSE,
1343 1343 "unable to determine zone's init(1M) location");
1344 1344 brand_close(bh);
1345 1345 goto bad;
1346 1346 }
1347 1347
1348 - /* See if we should restart init if it dies. */
1348 + /* See if this zone's brand should restart init if it dies. */
1349 1349 restart_init = restartinit(bh);
1350 + restart_init0 = brand_restartinit0(bh);
1351 + restart_initreboot = brand_restartinitreboot(bh);
1350 1352
1351 1353 /*
1352 1354 * See if we need to setup contract dependencies between the zone's
1353 1355 * primary application and any of its services.
1354 1356 */
1355 1357 app_svc_dep = is_app_svc_dep();
1356 1358
1357 1359 brand_close(bh);
1358 1360
1359 1361 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1360 1362 if (err != Z_OK)
1361 1363 goto bad;
1362 1364
1363 1365 assert(init_file[0] != '\0');
1364 1366
1365 1367 /*
1366 1368 * Try to anticipate possible problems: If possible, make sure init is
1367 1369 * executable.
1368 1370 */
1369 1371 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1370 1372 zerror(zlogp, B_FALSE, "unable to determine zone root");
1371 1373 goto bad;
1372 1374 }
1373 1375
1374 1376 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1375 1377
1376 1378 if (lstat(initpath, &st) == -1) {
1377 1379 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1378 1380 goto bad;
1379 1381 }
1380 1382
1381 1383 /* LINTED: E_NOP_IF_STMT */
1382 1384 if ((st.st_mode & S_IFMT) == S_IFLNK) {
1383 1385 /* symlink, we'll have to wait and resolve when we boot */
1384 1386 } else if ((st.st_mode & S_IXUSR) == 0) {
1385 1387 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1386 1388 goto bad;
1387 1389 }
1388 1390
1389 1391 /*
1390 1392 * Exclusive stack zones interact with the dlmgmtd running in the
1391 1393 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
1392 1394 * booting, and loads its datalinks from the zone's datalink
1393 1395 * configuration file.
1394 1396 */
1395 1397 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1396 1398 status = dladm_zone_boot(dld_handle, zoneid);
1397 1399 if (status != DLADM_STATUS_OK) {
1398 1400 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1399 1401 " %s", dladm_status2str(status, errmsg));
1400 1402 goto bad;
1401 1403 }
1402 1404 }
1403 1405
1404 1406 /*
1405 1407 * If there is a brand 'boot' callback, execute it now to give the
1406 1408 * brand one last chance to do any additional setup before the zone
1407 1409 * is booted.
1408 1410 */
1409 1411 if ((strlen(cmdbuf) > EXEC_LEN) &&
1410 1412 (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
1411 1413 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1412 1414 goto bad;
1413 1415 }
1414 1416
1415 1417 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1416 1418 zerror(zlogp, B_TRUE, "could not set zone boot file");
1417 1419 goto bad;
1418 1420 }
1419 1421
|
↓ open down ↓ |
60 lines elided |
↑ open up ↑ |
1420 1422 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1421 1423 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1422 1424 goto bad;
1423 1425 }
1424 1426
1425 1427 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1426 1428 NULL, 0) == -1) {
1427 1429 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1428 1430 goto bad;
1429 1431 }
1432 + if (restart_init0 && zone_setattr(zoneid, ZONE_ATTR_INITRESTART0,
1433 + NULL, 0) == -1) {
1434 + zerror(zlogp, B_TRUE,
1435 + "could not set zone init-restart-on-exit-0");
1436 + goto bad;
1437 + }
1438 + if (restart_initreboot && zone_setattr(zoneid, ZONE_ATTR_INITREBOOT,
1439 + NULL, 0) == -1) {
1440 + zerror(zlogp, B_TRUE, "could not set zone reboot-on-init-exit");
1441 + goto bad;
1442 + }
1430 1443
1431 1444 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1432 1445 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1433 1446 zerror(zlogp, B_TRUE, "could not set zone app-die");
1434 1447 goto bad;
1435 1448 }
1436 1449
1437 1450 /*
1438 1451 * Inform zonestatd of a new zone so that it can install a door for
1439 1452 * the zone to contact it.
1440 1453 */
1441 1454 notify_zonestatd(zone_id);
1442 1455
1443 1456 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1444 1457 create_log_thread(zlogp);
1445 1458
1446 1459 if (zone_boot(zoneid) == -1) {
1447 1460 zerror(zlogp, B_TRUE, "unable to boot zone");
1448 1461 destroy_log_thread(zlogp);
1449 1462 goto bad;
1450 1463 }
1451 1464
1452 1465 if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) {
1453 1466 destroy_log_thread(zlogp);
1454 1467 goto bad;
1455 1468 }
1456 1469
1457 1470 return (0);
1458 1471
1459 1472 bad:
1460 1473 /*
1461 1474 * If something goes wrong, we up the zones's state to the target
1462 1475 * state, RUNNING, and then invoke the hook as if we're halting.
1463 1476 */
1464 1477 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
1465 1478
1466 1479 return (-1);
1467 1480 }
1468 1481
1469 1482 static int
1470 1483 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
1471 1484 boolean_t debug)
1472 1485 {
1473 1486 int err;
1474 1487
1475 1488 /*
1476 1489 * If performing a scratch zone unmount then do not call the
1477 1490 * state change hooks.
1478 1491 */
1479 1492 if (unmount_cmd == B_FALSE &&
1480 1493 brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
1481 1494 return (-1);
1482 1495
1483 1496 if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
1484 1497 if (!bringup_failure_recovery)
1485 1498 zerror(zlogp, B_FALSE, "unable to destroy zone");
1486 1499 destroy_log_thread(zlogp);
1487 1500 return (-1);
1488 1501 }
1489 1502
1490 1503 /* Shut down is done, stop the log thread */
1491 1504 destroy_log_thread(zlogp);
1492 1505
1493 1506 if (unmount_cmd == B_FALSE &&
1494 1507 brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
1495 1508 return (-1);
1496 1509
1497 1510 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1498 1511 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1499 1512 zonecfg_strerror(err));
1500 1513
1501 1514 zonecfg_fini_handle(snap_hndl);
1502 1515 snap_hndl = NULL;
1503 1516
1504 1517 return (0);
1505 1518 }
1506 1519
1507 1520 static int
1508 1521 zone_graceful_shutdown(zlog_t *zlogp)
1509 1522 {
1510 1523 zoneid_t zoneid;
1511 1524 pid_t child;
1512 1525 char cmdbuf[MAXPATHLEN];
1513 1526 brand_handle_t bh = NULL;
1514 1527 ctid_t ct;
1515 1528 int tmpl_fd;
1516 1529 int child_status;
1517 1530
1518 1531 if (shutdown_in_progress) {
1519 1532 zerror(zlogp, B_FALSE, "shutdown already in progress");
1520 1533 return (-1);
1521 1534 }
1522 1535
1523 1536 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1524 1537 zerror(zlogp, B_TRUE, "unable to get zoneid");
1525 1538 return (-1);
1526 1539 }
1527 1540
1528 1541 /* Get a handle to the brand info for this zone */
1529 1542 if ((bh = brand_open(brand_name)) == NULL) {
1530 1543 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1531 1544 return (-1);
1532 1545 }
1533 1546
1534 1547 /*
1535 1548 * If there is a brand 'shutdown' callback, execute it now to give the
1536 1549 * brand a chance to cleanup any custom configuration.
1537 1550 */
1538 1551 (void) strcpy(cmdbuf, EXEC_PREFIX);
1539 1552 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1540 1553 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1541 1554 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1542 1555 }
1543 1556 brand_close(bh);
1544 1557
1545 1558 if ((tmpl_fd = init_template()) == -1) {
1546 1559 zerror(zlogp, B_TRUE, "failed to create contract");
1547 1560 return (-1);
1548 1561 }
1549 1562
1550 1563 if ((child = fork()) == -1) {
1551 1564 (void) ct_tmpl_clear(tmpl_fd);
1552 1565 (void) close(tmpl_fd);
1553 1566 zerror(zlogp, B_TRUE, "failed to fork");
1554 1567 return (-1);
1555 1568 } else if (child == 0) {
1556 1569 (void) ct_tmpl_clear(tmpl_fd);
1557 1570 if (zone_enter(zoneid) == -1) {
1558 1571 _exit(errno);
1559 1572 }
1560 1573 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1561 1574 }
1562 1575
1563 1576 if (contract_latest(&ct) == -1)
1564 1577 ct = -1;
1565 1578 (void) ct_tmpl_clear(tmpl_fd);
1566 1579 (void) close(tmpl_fd);
1567 1580
1568 1581 if (waitpid(child, &child_status, 0) != child) {
1569 1582 /* unexpected: we must have been signalled */
1570 1583 (void) contract_abandon_id(ct);
1571 1584 return (-1);
1572 1585 }
1573 1586
1574 1587 (void) contract_abandon_id(ct);
1575 1588 if (WEXITSTATUS(child_status) != 0) {
1576 1589 errno = WEXITSTATUS(child_status);
1577 1590 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1578 1591 return (-1);
1579 1592 }
1580 1593
1581 1594 shutdown_in_progress = B_TRUE;
1582 1595
1583 1596 return (0);
1584 1597 }
1585 1598
1586 1599 static int
1587 1600 zone_wait_shutdown(zlog_t *zlogp)
1588 1601 {
1589 1602 zone_state_t zstate;
1590 1603 uint64_t *tm = NULL;
1591 1604 scf_simple_prop_t *prop = NULL;
1592 1605 int timeout;
1593 1606 int tries;
1594 1607 int rc = -1;
1595 1608
1596 1609 /* Get default stop timeout from SMF framework */
1597 1610 timeout = SHUTDOWN_WAIT;
1598 1611 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1599 1612 SCF_PROPERTY_TIMEOUT)) != NULL) {
1600 1613 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1601 1614 if (tm != 0)
1602 1615 timeout = *tm;
1603 1616 }
1604 1617 scf_simple_prop_free(prop);
1605 1618 }
1606 1619
1607 1620 /* allow time for zone to shutdown cleanly */
1608 1621 for (tries = 0; tries < timeout; tries ++) {
1609 1622 (void) sleep(1);
1610 1623 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1611 1624 zstate == ZONE_STATE_INSTALLED) {
1612 1625 rc = 0;
1613 1626 break;
1614 1627 }
1615 1628 }
1616 1629
1617 1630 if (rc != 0)
1618 1631 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1619 1632
1620 1633 shutdown_in_progress = B_FALSE;
1621 1634
1622 1635 return (rc);
1623 1636 }
1624 1637
1625 1638
1626 1639
1627 1640 /*
1628 1641 * Generate AUE_zone_state for a command that boots a zone.
1629 1642 */
1630 1643 static void
1631 1644 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1632 1645 char *new_state)
1633 1646 {
1634 1647 adt_session_data_t *ah;
1635 1648 adt_event_data_t *event;
1636 1649 int pass_fail, fail_reason;
1637 1650
1638 1651 if (!adt_audit_enabled())
1639 1652 return;
1640 1653
1641 1654 if (return_val == 0) {
1642 1655 pass_fail = ADT_SUCCESS;
1643 1656 fail_reason = ADT_SUCCESS;
1644 1657 } else {
1645 1658 pass_fail = ADT_FAILURE;
1646 1659 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1647 1660 }
1648 1661
1649 1662 if (adt_start_session(&ah, NULL, 0)) {
1650 1663 zerror(zlogp, B_TRUE, gettext("audit failure."));
1651 1664 return;
1652 1665 }
1653 1666 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1654 1667 zerror(zlogp, B_TRUE, gettext("audit failure."));
1655 1668 (void) adt_end_session(ah);
1656 1669 return;
1657 1670 }
1658 1671
1659 1672 event = adt_alloc_event(ah, ADT_zone_state);
1660 1673 if (event == NULL) {
1661 1674 zerror(zlogp, B_TRUE, gettext("audit failure."));
1662 1675 (void) adt_end_session(ah);
1663 1676 return;
1664 1677 }
1665 1678 event->adt_zone_state.zonename = zone_name;
1666 1679 event->adt_zone_state.new_state = new_state;
1667 1680
1668 1681 if (adt_put_event(event, pass_fail, fail_reason))
1669 1682 zerror(zlogp, B_TRUE, gettext("audit failure."));
1670 1683
1671 1684 adt_free_event(event);
1672 1685
1673 1686 (void) adt_end_session(ah);
1674 1687 }
1675 1688
1676 1689 /*
1677 1690 * Log the exit time and status of the zone's init process into
1678 1691 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1679 1692 * be -1, otherwise it will be the exit status as described in wait.3c.
1680 1693 * If the zone is configured to restart init, then nothing will be logged if
1681 1694 * init exits unexpectedly (the kernel will never upcall in this case).
1682 1695 */
1683 1696 static void
1684 1697 log_init_exit(int status)
1685 1698 {
1686 1699 char p[MAXPATHLEN];
1687 1700 char buf[128];
1688 1701 struct timeval t;
1689 1702 int fd;
1690 1703
1691 1704 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1692 1705 return;
1693 1706 if (gettimeofday(&t, NULL) != 0)
1694 1707 return;
1695 1708 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1696 1709 status) > sizeof (buf))
1697 1710 return;
1698 1711 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1699 1712 return;
1700 1713
1701 1714 (void) write(fd, buf, strlen(buf));
1702 1715
1703 1716 (void) close(fd);
1704 1717 }
1705 1718
1706 1719 /*
1707 1720 * The main routine for the door server that deals with zone state transitions.
1708 1721 */
1709 1722 /* ARGSUSED */
1710 1723 static void
1711 1724 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1712 1725 uint_t n_desc)
1713 1726 {
1714 1727 ucred_t *uc = NULL;
1715 1728 const priv_set_t *eset;
1716 1729
1717 1730 zone_state_t zstate;
1718 1731 zone_cmd_t cmd;
1719 1732 boolean_t debug;
1720 1733 int init_status;
1721 1734 zone_cmd_arg_t *zargp;
1722 1735
1723 1736 boolean_t kernelcall = B_TRUE;
1724 1737
1725 1738 int rval = -1;
1726 1739 uint64_t uniqid;
1727 1740 zoneid_t zoneid = -1;
1728 1741 zlog_t zlog;
1729 1742 zlog_t *zlogp;
1730 1743 zone_cmd_rval_t *rvalp;
1731 1744 size_t rlen = getpagesize(); /* conservative */
1732 1745 fs_callback_t cb;
1733 1746 brand_handle_t bh;
1734 1747 boolean_t wait_shut = B_FALSE;
1735 1748
1736 1749 /* LINTED E_BAD_PTR_CAST_ALIGN */
1737 1750 zargp = (zone_cmd_arg_t *)args;
1738 1751
1739 1752 /*
1740 1753 * When we get the door unref message, we've fdetach'd the door, and
1741 1754 * it is time for us to shut down zoneadmd.
1742 1755 */
1743 1756 if (zargp == DOOR_UNREF_DATA) {
1744 1757 logstream_close(platloghdl, B_TRUE);
1745 1758
1746 1759 /*
1747 1760 * See comment at end of main() for info on the last rites.
1748 1761 */
1749 1762 exit(0);
1750 1763 }
1751 1764
1752 1765 if (zargp == NULL) {
1753 1766 (void) door_return(NULL, 0, 0, 0);
1754 1767 }
1755 1768
1756 1769 rvalp = alloca(rlen);
1757 1770 bzero(rvalp, rlen);
1758 1771 zlog.logfile = NULL;
1759 1772 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1760 1773 zlog.buf = rvalp->errbuf;
1761 1774 zlog.log = zlog.buf;
1762 1775 /* defer initialization of zlog.locale until after credential check */
1763 1776 zlogp = &zlog;
1764 1777
1765 1778 if (alen != sizeof (zone_cmd_arg_t)) {
1766 1779 /*
1767 1780 * This really shouldn't be happening.
1768 1781 */
1769 1782 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1770 1783 "unexpected (expected %d bytes)", alen,
1771 1784 sizeof (zone_cmd_arg_t));
1772 1785 goto out;
1773 1786 }
1774 1787 cmd = zargp->cmd;
1775 1788 debug = zargp->debug;
1776 1789 init_status = zargp->status;
1777 1790
1778 1791 if (door_ucred(&uc) != 0) {
1779 1792 zerror(&logsys, B_TRUE, "door_ucred");
1780 1793 goto out;
1781 1794 }
1782 1795 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1783 1796 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1784 1797 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1785 1798 ucred_geteuid(uc) != 0)) {
1786 1799 zerror(&logsys, B_FALSE, "insufficient privileges");
1787 1800 goto out;
1788 1801 }
1789 1802
1790 1803 kernelcall = ucred_getpid(uc) == 0;
1791 1804
1792 1805 /*
1793 1806 * This is safe because we only use a zlog_t throughout the
1794 1807 * duration of a door call; i.e., by the time the pointer
1795 1808 * might become invalid, the door call would be over.
1796 1809 */
1797 1810 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1798 1811
1799 1812 (void) mutex_lock(&lock);
1800 1813
1801 1814 /*
1802 1815 * Once we start to really die off, we don't want more connections.
1803 1816 */
1804 1817 if (in_death_throes) {
1805 1818 (void) mutex_unlock(&lock);
1806 1819 ucred_free(uc);
1807 1820 (void) door_return(NULL, 0, 0, 0);
1808 1821 thr_exit(NULL);
1809 1822 }
1810 1823
1811 1824 /*
1812 1825 * Check for validity of command.
1813 1826 */
1814 1827 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1815 1828 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1816 1829 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1817 1830 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1818 1831 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1819 1832 goto out;
1820 1833 }
1821 1834
1822 1835 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1823 1836 /*
1824 1837 * Can't happen
1825 1838 */
1826 1839 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1827 1840 cmd);
1828 1841 goto out;
1829 1842 }
1830 1843 /*
1831 1844 * We ignore the possibility of someone calling zone_create(2)
1832 1845 * explicitly; all requests must come through zoneadmd.
1833 1846 */
1834 1847 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1835 1848 /*
1836 1849 * Something terribly wrong happened
1837 1850 */
1838 1851 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1839 1852 goto out;
1840 1853 }
1841 1854
1842 1855 if (kernelcall) {
1843 1856 /*
1844 1857 * Kernel-initiated requests may lose their validity if the
1845 1858 * zone_t the kernel was referring to has gone away.
1846 1859 */
1847 1860 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1848 1861 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1849 1862 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1850 1863 /*
1851 1864 * We're not talking about the same zone. The request
1852 1865 * must have arrived too late. Return error.
1853 1866 */
1854 1867 rval = -1;
1855 1868 goto out;
1856 1869 }
1857 1870 zlogp = &logplat; /* Log errors to platform.log */
1858 1871 }
1859 1872
1860 1873 /*
1861 1874 * If we are being asked to forcibly mount or boot a zone, we
1862 1875 * pretend that an INCOMPLETE zone is actually INSTALLED.
1863 1876 */
1864 1877 if (zstate == ZONE_STATE_INCOMPLETE &&
1865 1878 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1866 1879 zstate = ZONE_STATE_INSTALLED;
1867 1880
1868 1881 switch (zstate) {
1869 1882 case ZONE_STATE_CONFIGURED:
1870 1883 case ZONE_STATE_INCOMPLETE:
1871 1884 /*
1872 1885 * Not our area of expertise; we just print a nice message
1873 1886 * and die off.
1874 1887 */
1875 1888 zerror(zlogp, B_FALSE,
1876 1889 "%s operation is invalid for zones in state '%s'",
1877 1890 z_cmd_name(cmd), zone_state_str(zstate));
1878 1891 break;
1879 1892
1880 1893 case ZONE_STATE_INSTALLED:
1881 1894 switch (cmd) {
1882 1895 case Z_READY:
1883 1896 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
1884 1897 if (rval == 0)
1885 1898 eventstream_write(Z_EVT_ZONE_READIED);
1886 1899 zcons_statechanged();
1887 1900 break;
1888 1901 case Z_BOOT:
1889 1902 case Z_FORCEBOOT:
1890 1903 eventstream_write(Z_EVT_ZONE_BOOTING);
1891 1904 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1892 1905 debug)) == 0) {
1893 1906 rval = zone_bootup(zlogp, zargp->bootbuf,
1894 1907 zstate, debug);
1895 1908 }
1896 1909 audit_put_record(zlogp, uc, rval, "boot");
1897 1910 zcons_statechanged();
1898 1911 if (rval != 0) {
1899 1912 bringup_failure_recovery = B_TRUE;
1900 1913 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1901 1914 zstate, debug);
1902 1915 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1903 1916 }
1904 1917 break;
1905 1918 case Z_SHUTDOWN:
1906 1919 case Z_HALT:
1907 1920 if (kernelcall) /* Invalid; can't happen */
1908 1921 abort();
1909 1922 /*
1910 1923 * We could have two clients racing to halt this
1911 1924 * zone; the second client loses, but its request
1912 1925 * doesn't fail, since the zone is now in the desired
1913 1926 * state.
1914 1927 */
1915 1928 zerror(zlogp, B_FALSE, "zone is already halted");
1916 1929 rval = 0;
1917 1930 break;
1918 1931 case Z_REBOOT:
1919 1932 if (kernelcall) /* Invalid; can't happen */
1920 1933 abort();
1921 1934 zerror(zlogp, B_FALSE, "%s operation is invalid "
1922 1935 "for zones in state '%s'", z_cmd_name(cmd),
1923 1936 zone_state_str(zstate));
1924 1937 rval = -1;
1925 1938 break;
1926 1939 case Z_NOTE_UNINSTALLING:
1927 1940 if (kernelcall) /* Invalid; can't happen */
1928 1941 abort();
1929 1942 /*
1930 1943 * Tell the console to print out a message about this.
1931 1944 * Once it does, we will be in_death_throes.
1932 1945 */
1933 1946 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1934 1947 break;
1935 1948 case Z_MOUNT:
1936 1949 case Z_FORCEMOUNT:
1937 1950 if (kernelcall) /* Invalid; can't happen */
1938 1951 abort();
1939 1952 if (!zone_isnative && !zone_iscluster &&
1940 1953 !zone_islabeled) {
1941 1954 /*
1942 1955 * -U mounts the zone without lofs mounting
1943 1956 * zone file systems back into the scratch
1944 1957 * zone. This is required when mounting
1945 1958 * non-native branded zones.
1946 1959 */
1947 1960 (void) strlcpy(zargp->bootbuf, "-U",
1948 1961 BOOTARGS_MAX);
1949 1962 }
1950 1963
1951 1964 rval = zone_ready(zlogp,
1952 1965 strcmp(zargp->bootbuf, "-U") == 0 ?
1953 1966 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
1954 1967 if (rval != 0)
1955 1968 break;
1956 1969
1957 1970 eventstream_write(Z_EVT_ZONE_READIED);
1958 1971
1959 1972 /*
1960 1973 * Get a handle to the default brand info.
1961 1974 * We must always use the default brand file system
1962 1975 * list when mounting the zone.
1963 1976 */
1964 1977 if ((bh = brand_open(default_brand)) == NULL) {
1965 1978 rval = -1;
1966 1979 break;
1967 1980 }
1968 1981
1969 1982 /*
1970 1983 * Get the list of filesystems to mount from
1971 1984 * the brand configuration. These mounts are done
1972 1985 * via a thread that will enter the zone, so they
1973 1986 * are done from within the context of the zone.
1974 1987 */
1975 1988 cb.zlogp = zlogp;
1976 1989 cb.zoneid = zone_id;
1977 1990 cb.mount_cmd = B_TRUE;
1978 1991 rval = brand_platform_iter_mounts(bh,
1979 1992 mount_early_fs, &cb);
1980 1993
1981 1994 brand_close(bh);
1982 1995
1983 1996 /*
1984 1997 * Ordinarily, /dev/fd would be mounted inside the zone
1985 1998 * by svc:/system/filesystem/usr:default, but since
1986 1999 * we're not booting the zone, we need to do this
1987 2000 * manually.
1988 2001 */
1989 2002 if (rval == 0)
1990 2003 rval = mount_early_fs(&cb,
1991 2004 "fd", "/dev/fd", "fd", NULL);
1992 2005 break;
1993 2006 case Z_UNMOUNT:
1994 2007 if (kernelcall) /* Invalid; can't happen */
1995 2008 abort();
1996 2009 zerror(zlogp, B_FALSE, "zone is already unmounted");
1997 2010 rval = 0;
1998 2011 break;
1999 2012 }
2000 2013 break;
2001 2014
2002 2015 case ZONE_STATE_READY:
2003 2016 switch (cmd) {
2004 2017 case Z_READY:
2005 2018 /*
2006 2019 * We could have two clients racing to ready this
2007 2020 * zone; the second client loses, but its request
2008 2021 * doesn't fail, since the zone is now in the desired
2009 2022 * state.
2010 2023 */
2011 2024 zerror(zlogp, B_FALSE, "zone is already ready");
2012 2025 rval = 0;
2013 2026 break;
2014 2027 case Z_BOOT:
2015 2028 case Z_FORCEBOOT:
2016 2029 (void) strlcpy(boot_args, zargp->bootbuf,
2017 2030 sizeof (boot_args));
2018 2031 eventstream_write(Z_EVT_ZONE_BOOTING);
2019 2032 rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
2020 2033 debug);
2021 2034 audit_put_record(zlogp, uc, rval, "boot");
2022 2035 zcons_statechanged();
2023 2036 if (rval != 0) {
2024 2037 bringup_failure_recovery = B_TRUE;
2025 2038 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
2026 2039 zstate, debug);
2027 2040 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2028 2041 }
2029 2042 boot_args[0] = '\0';
2030 2043 break;
2031 2044 case Z_HALT:
2032 2045 if (kernelcall) /* Invalid; can't happen */
2033 2046 abort();
2034 2047 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
2035 2048 debug)) != 0)
2036 2049 break;
2037 2050 zcons_statechanged();
2038 2051 eventstream_write(Z_EVT_ZONE_HALTED);
2039 2052 break;
2040 2053 case Z_SHUTDOWN:
2041 2054 case Z_REBOOT:
2042 2055 case Z_NOTE_UNINSTALLING:
2043 2056 case Z_MOUNT:
2044 2057 case Z_FORCEMOUNT:
2045 2058 case Z_UNMOUNT:
2046 2059 if (kernelcall) /* Invalid; can't happen */
2047 2060 abort();
2048 2061 zerror(zlogp, B_FALSE, "%s operation is invalid "
2049 2062 "for zones in state '%s'", z_cmd_name(cmd),
2050 2063 zone_state_str(zstate));
2051 2064 rval = -1;
2052 2065 break;
2053 2066 }
2054 2067 break;
2055 2068
2056 2069 case ZONE_STATE_MOUNTED:
2057 2070 switch (cmd) {
2058 2071 case Z_UNMOUNT:
2059 2072 if (kernelcall) /* Invalid; can't happen */
2060 2073 abort();
2061 2074 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
2062 2075 if (rval == 0) {
2063 2076 eventstream_write(Z_EVT_ZONE_HALTED);
2064 2077 (void) sema_post(&scratch_sem);
2065 2078 }
2066 2079 break;
2067 2080 default:
2068 2081 if (kernelcall) /* Invalid; can't happen */
2069 2082 abort();
2070 2083 zerror(zlogp, B_FALSE, "%s operation is invalid "
2071 2084 "for zones in state '%s'", z_cmd_name(cmd),
2072 2085 zone_state_str(zstate));
2073 2086 rval = -1;
2074 2087 break;
2075 2088 }
2076 2089 break;
2077 2090
2078 2091 case ZONE_STATE_RUNNING:
2079 2092 case ZONE_STATE_SHUTTING_DOWN:
2080 2093 case ZONE_STATE_DOWN:
2081 2094 switch (cmd) {
2082 2095 case Z_READY:
2083 2096 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
2084 2097 debug)) != 0)
2085 2098 break;
2086 2099 zcons_statechanged();
2087 2100 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
2088 2101 debug)) == 0)
2089 2102 eventstream_write(Z_EVT_ZONE_READIED);
2090 2103 else
2091 2104 eventstream_write(Z_EVT_ZONE_HALTED);
2092 2105 break;
2093 2106 case Z_BOOT:
2094 2107 case Z_FORCEBOOT:
2095 2108 /*
2096 2109 * We could have two clients racing to boot this
2097 2110 * zone; the second client loses, but its request
2098 2111 * doesn't fail, since the zone is now in the desired
2099 2112 * state.
2100 2113 */
2101 2114 zerror(zlogp, B_FALSE, "zone is already booted");
2102 2115 rval = 0;
2103 2116 break;
2104 2117 case Z_HALT:
2105 2118 if (kernelcall) {
2106 2119 log_init_exit(init_status);
2107 2120 } else {
2108 2121 log_init_exit(-1);
2109 2122 }
2110 2123 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
2111 2124 debug)) != 0)
2112 2125 break;
2113 2126 eventstream_write(Z_EVT_ZONE_HALTED);
2114 2127 zcons_statechanged();
2115 2128 break;
2116 2129 case Z_REBOOT:
2117 2130 (void) strlcpy(boot_args, zargp->bootbuf,
2118 2131 sizeof (boot_args));
2119 2132 eventstream_write(Z_EVT_ZONE_REBOOTING);
2120 2133 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
2121 2134 debug)) != 0) {
2122 2135 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2123 2136 boot_args[0] = '\0';
2124 2137 break;
2125 2138 }
2126 2139 zcons_statechanged();
2127 2140 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
2128 2141 debug)) != 0) {
2129 2142 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2130 2143 boot_args[0] = '\0';
2131 2144 break;
2132 2145 }
2133 2146 rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
2134 2147 debug);
2135 2148 audit_put_record(zlogp, uc, rval, "reboot");
2136 2149 if (rval != 0) {
2137 2150 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
2138 2151 zstate, debug);
2139 2152 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2140 2153 }
2141 2154 boot_args[0] = '\0';
2142 2155 break;
2143 2156 case Z_SHUTDOWN:
2144 2157 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
2145 2158 wait_shut = B_TRUE;
2146 2159 }
2147 2160 break;
2148 2161 case Z_NOTE_UNINSTALLING:
2149 2162 case Z_MOUNT:
2150 2163 case Z_FORCEMOUNT:
2151 2164 case Z_UNMOUNT:
2152 2165 zerror(zlogp, B_FALSE, "%s operation is invalid "
2153 2166 "for zones in state '%s'", z_cmd_name(cmd),
2154 2167 zone_state_str(zstate));
2155 2168 rval = -1;
2156 2169 break;
2157 2170 }
2158 2171 break;
2159 2172 default:
2160 2173 abort();
2161 2174 }
2162 2175
2163 2176 /*
2164 2177 * Because the state of the zone may have changed, we make sure
2165 2178 * to wake the console poller, which is in charge of initiating
2166 2179 * the shutdown procedure as necessary.
2167 2180 */
2168 2181 eventstream_write(Z_EVT_NULL);
2169 2182
2170 2183 out:
2171 2184 (void) mutex_unlock(&lock);
2172 2185
2173 2186 /* Wait for the Z_SHUTDOWN commands to complete */
2174 2187 if (wait_shut)
2175 2188 rval = zone_wait_shutdown(zlogp);
2176 2189
2177 2190 if (kernelcall) {
2178 2191 rvalp = NULL;
2179 2192 rlen = 0;
2180 2193 } else {
2181 2194 rvalp->rval = rval;
2182 2195 }
2183 2196 if (uc != NULL)
2184 2197 ucred_free(uc);
2185 2198 (void) door_return((char *)rvalp, rlen, NULL, 0);
2186 2199 thr_exit(NULL);
2187 2200 }
2188 2201
2189 2202 static int
2190 2203 setup_door(zlog_t *zlogp)
2191 2204 {
2192 2205 if ((zone_door = door_create(server, NULL,
2193 2206 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
2194 2207 zerror(zlogp, B_TRUE, "%s failed", "door_create");
2195 2208 return (-1);
2196 2209 }
2197 2210 (void) fdetach(zone_door_path);
2198 2211
2199 2212 if (fattach(zone_door, zone_door_path) != 0) {
2200 2213 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
2201 2214 (void) door_revoke(zone_door);
2202 2215 (void) fdetach(zone_door_path);
2203 2216 zone_door = -1;
2204 2217 return (-1);
2205 2218 }
2206 2219 return (0);
2207 2220 }
2208 2221
2209 2222 /*
2210 2223 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
2211 2224 * is where zoneadmd itself will check to see that another instance of
2212 2225 * zoneadmd isn't already controlling this zone.
2213 2226 *
2214 2227 * The idea here is that we want to open the path to which we will
2215 2228 * attach our door, lock it, and then make sure that no-one has beat us
2216 2229 * to fattach(3c)ing onto it.
2217 2230 *
2218 2231 * fattach(3c) is really a mount, so there are actually two possible
2219 2232 * vnodes we could be dealing with. Our strategy is as follows:
2220 2233 *
2221 2234 * - If the file we opened is a regular file (common case):
2222 2235 * There is no fattach(3c)ed door, so we have a chance of becoming
2223 2236 * the managing zoneadmd. We attempt to lock the file: if it is
2224 2237 * already locked, that means someone else raced us here, so we
2225 2238 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
2226 2239 * that beat us to it.
2227 2240 *
2228 2241 * - If the file we opened is a namefs file:
2229 2242 * This means there is already an established door fattach(3c)'ed
2230 2243 * to the rendezvous path. We've lost the race, so we give up.
2231 2244 * Note that in this case we also try to grab the file lock, and
2232 2245 * will succeed in acquiring it since the vnode locked by the
2233 2246 * "winning" zoneadmd was a regular one, and the one we locked was
2234 2247 * the fattach(3c)'ed door node. At any rate, no harm is done, and
2235 2248 * we just return to zoneadm(1m) which knows to retry.
2236 2249 */
2237 2250 static int
2238 2251 make_daemon_exclusive(zlog_t *zlogp)
2239 2252 {
2240 2253 int doorfd = -1;
2241 2254 int err, ret = -1;
2242 2255 struct stat st;
2243 2256 struct flock flock;
2244 2257 zone_state_t zstate;
2245 2258
2246 2259 top:
2247 2260 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2248 2261 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2249 2262 zonecfg_strerror(err));
2250 2263 goto out;
2251 2264 }
2252 2265 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
2253 2266 S_IREAD|S_IWRITE)) < 0) {
2254 2267 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
2255 2268 goto out;
2256 2269 }
2257 2270 if (fstat(doorfd, &st) < 0) {
2258 2271 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
2259 2272 goto out;
2260 2273 }
2261 2274 /*
2262 2275 * Lock the file to synchronize with other zoneadmd
2263 2276 */
2264 2277 flock.l_type = F_WRLCK;
2265 2278 flock.l_whence = SEEK_SET;
2266 2279 flock.l_start = (off_t)0;
2267 2280 flock.l_len = (off_t)0;
2268 2281 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
2269 2282 /*
2270 2283 * Someone else raced us here and grabbed the lock file
2271 2284 * first. A warning here is inappropriate since nothing
2272 2285 * went wrong.
2273 2286 */
2274 2287 goto out;
2275 2288 }
2276 2289
2277 2290 if (strcmp(st.st_fstype, "namefs") == 0) {
2278 2291 struct door_info info;
2279 2292
2280 2293 /*
2281 2294 * There is already something fattach()'ed to this file.
2282 2295 * Lets see what the door is up to.
2283 2296 */
2284 2297 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
2285 2298 /*
2286 2299 * Another zoneadmd process seems to be in
2287 2300 * control of the situation and we don't need to
2288 2301 * be here. A warning here is inappropriate
2289 2302 * since nothing went wrong.
2290 2303 *
2291 2304 * If the door has been revoked, the zoneadmd
2292 2305 * process currently managing the zone is going
2293 2306 * away. We'll return control to zoneadm(1m)
2294 2307 * which will try again (by which time zoneadmd
2295 2308 * will hopefully have exited).
2296 2309 */
2297 2310 goto out;
2298 2311 }
2299 2312
2300 2313 /*
2301 2314 * If we got this far, there's a fattach(3c)'ed door
2302 2315 * that belongs to a process that has exited, which can
2303 2316 * happen if the previous zoneadmd died unexpectedly.
2304 2317 *
2305 2318 * Let user know that something is amiss, but that we can
2306 2319 * recover; if the zone is in the installed state, then don't
2307 2320 * message, since having a running zoneadmd isn't really
2308 2321 * expected/needed. We want to keep occurences of this message
2309 2322 * limited to times when zoneadmd is picking back up from a
2310 2323 * zoneadmd that died while the zone was in some non-trivial
2311 2324 * state.
2312 2325 */
2313 2326 if (zstate > ZONE_STATE_INSTALLED) {
2314 2327 zerror(zlogp, B_FALSE,
2315 2328 "zone '%s': WARNING: zone is in state '%s', but "
2316 2329 "zoneadmd does not appear to be available; "
2317 2330 "restarted zoneadmd to recover.",
2318 2331 zone_name, zone_state_str(zstate));
2319 2332
2320 2333 /*
2321 2334 * Startup a thread to perform the zfd logging/tty svc
2322 2335 * for the zone. zlogp won't be valid for much longer
2323 2336 * so use logplat.
2324 2337 */
2325 2338 if (getzoneidbyname(zone_name) != -1) {
2326 2339 create_log_thread(&logplat);
2327 2340 }
2328 2341
2329 2342 /* recover the global configuration snapshot */
2330 2343 if (snap_hndl == NULL) {
2331 2344 if ((snap_hndl = zonecfg_init_handle())
2332 2345 == NULL ||
2333 2346 zonecfg_create_snapshot(zone_name)
2334 2347 != Z_OK ||
2335 2348 zonecfg_get_snapshot_handle(zone_name,
2336 2349 snap_hndl) != Z_OK) {
2337 2350 zerror(zlogp, B_FALSE, "recovering "
2338 2351 "zone configuration handle");
2339 2352 goto out;
2340 2353 }
2341 2354 }
2342 2355 }
2343 2356
2344 2357 (void) fdetach(zone_door_path);
2345 2358 (void) close(doorfd);
2346 2359 goto top;
2347 2360 }
2348 2361 ret = 0;
2349 2362 out:
2350 2363 (void) close(doorfd);
2351 2364 return (ret);
2352 2365 }
2353 2366
2354 2367 /*
2355 2368 * Run the query hook with the 'env' parameter. It should return a
2356 2369 * string of tab-delimited key-value pairs, each of which should be set
2357 2370 * in the environment.
2358 2371 *
2359 2372 * Because the env_vars string values become part of the environment, the
2360 2373 * string is static and we don't free it.
2361 2374 *
2362 2375 * This function is always called before zoneadmd forks and makes itself
2363 2376 * exclusive, so it is possible there could more than one instance of zoneadmd
2364 2377 * running in parallel at this point. Thus, we have no zonecfg snapshot and
2365 2378 * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
2366 2379 * need any zonecfg info to query for a brand-specific env value.
2367 2380 */
2368 2381 static int
2369 2382 set_brand_env(zlog_t *zlogp)
2370 2383 {
2371 2384 int ret = 0;
2372 2385 static char *env_vars = NULL;
2373 2386 char buf[2 * MAXPATHLEN];
2374 2387
2375 2388 if (query_hook[0] == '\0' || env_vars != NULL)
2376 2389 return (0);
2377 2390
2378 2391 if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
2379 2392 return (-1);
2380 2393
2381 2394 if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
2382 2395 return (-1);
2383 2396
2384 2397 if (env_vars != NULL) {
2385 2398 char *sp;
2386 2399
2387 2400 sp = strtok(env_vars, "\t");
2388 2401 while (sp != NULL) {
2389 2402 if (putenv(sp) != 0) {
2390 2403 ret = -1;
2391 2404 break;
2392 2405 }
2393 2406 sp = strtok(NULL, "\t");
2394 2407 }
2395 2408 }
2396 2409
2397 2410 return (ret);
2398 2411 }
2399 2412
2400 2413 /*
2401 2414 * Setup the brand's pre and post state change callbacks, as well as the
2402 2415 * query callback, if any of these exist.
2403 2416 */
2404 2417 static int
2405 2418 brand_callback_init(brand_handle_t bh, char *zone_name)
2406 2419 {
2407 2420 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2408 2421 sizeof (pre_statechg_hook));
2409 2422
2410 2423 if (brand_get_prestatechange(bh, zone_name, zonepath,
2411 2424 pre_statechg_hook + EXEC_LEN,
2412 2425 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2413 2426 return (-1);
2414 2427
2415 2428 if (strlen(pre_statechg_hook) <= EXEC_LEN)
2416 2429 pre_statechg_hook[0] = '\0';
2417 2430
2418 2431 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2419 2432 sizeof (post_statechg_hook));
2420 2433
2421 2434 if (brand_get_poststatechange(bh, zone_name, zonepath,
2422 2435 post_statechg_hook + EXEC_LEN,
2423 2436 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
2424 2437 return (-1);
2425 2438
2426 2439 if (strlen(post_statechg_hook) <= EXEC_LEN)
2427 2440 post_statechg_hook[0] = '\0';
2428 2441
2429 2442 (void) strlcpy(query_hook, EXEC_PREFIX,
2430 2443 sizeof (query_hook));
2431 2444
2432 2445 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
2433 2446 sizeof (query_hook) - EXEC_LEN) != 0)
2434 2447 return (-1);
2435 2448
2436 2449 if (strlen(query_hook) <= EXEC_LEN)
2437 2450 query_hook[0] = '\0';
2438 2451
2439 2452 return (0);
2440 2453 }
2441 2454
2442 2455 int
2443 2456 main(int argc, char *argv[])
2444 2457 {
2445 2458 int opt;
2446 2459 zoneid_t zid;
2447 2460 priv_set_t *privset;
2448 2461 zone_state_t zstate;
2449 2462 char parents_locale[MAXPATHLEN];
2450 2463 brand_handle_t bh;
2451 2464 int err;
2452 2465
2453 2466 pid_t pid;
2454 2467 sigset_t blockset;
2455 2468 sigset_t block_cld;
2456 2469
2457 2470 struct {
2458 2471 sema_t sem;
2459 2472 int status;
2460 2473 zlog_t log;
2461 2474 } *shstate;
2462 2475 size_t shstatelen = getpagesize();
2463 2476
2464 2477 zlog_t errlog;
2465 2478 zlog_t *zlogp;
2466 2479
2467 2480 int ctfd;
2468 2481
2469 2482 progname = get_execbasename(argv[0]);
2470 2483
2471 2484 /*
2472 2485 * Make sure stderr is unbuffered
2473 2486 */
2474 2487 (void) setbuffer(stderr, NULL, 0);
2475 2488
2476 2489 /*
2477 2490 * Get out of the way of mounted filesystems, since we will daemonize
2478 2491 * soon.
2479 2492 */
2480 2493 (void) chdir("/");
2481 2494
2482 2495 /*
2483 2496 * Use the default system umask per PSARC 1998/110 rather than
2484 2497 * anything that may have been set by the caller.
2485 2498 */
2486 2499 (void) umask(CMASK);
2487 2500
2488 2501 /*
2489 2502 * Initially we want to use our parent's locale.
2490 2503 */
2491 2504 (void) setlocale(LC_ALL, "");
2492 2505 (void) textdomain(TEXT_DOMAIN);
2493 2506 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2494 2507 sizeof (parents_locale));
2495 2508
2496 2509 /*
2497 2510 * This zlog_t is used for writing to stderr
2498 2511 */
2499 2512 errlog.logfile = stderr;
2500 2513 errlog.buflen = errlog.loglen = 0;
2501 2514 errlog.buf = errlog.log = NULL;
2502 2515 errlog.locale = parents_locale;
2503 2516
2504 2517 /*
2505 2518 * We start off writing to stderr until we're ready to daemonize.
2506 2519 */
2507 2520 zlogp = &errlog;
2508 2521
2509 2522 /*
2510 2523 * Process options.
2511 2524 */
2512 2525 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2513 2526 switch (opt) {
2514 2527 case 'R':
2515 2528 zonecfg_set_root(optarg);
2516 2529 break;
2517 2530 case 'z':
2518 2531 zone_name = optarg;
2519 2532 break;
2520 2533 default:
2521 2534 usage();
2522 2535 }
2523 2536 }
2524 2537
2525 2538 if (zone_name == NULL)
2526 2539 usage();
2527 2540
2528 2541 /*
2529 2542 * Because usage() prints directly to stderr, it has gettext()
2530 2543 * wrapping, which depends on the locale. But since zerror() calls
2531 2544 * localize() which tweaks the locale, it is not safe to call zerror()
2532 2545 * until after the last call to usage(). Fortunately, the last call
2533 2546 * to usage() is just above and the first call to zerror() is just
2534 2547 * below. Don't mess this up.
2535 2548 */
2536 2549 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2537 2550 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2538 2551 GLOBAL_ZONENAME);
2539 2552 return (1);
2540 2553 }
2541 2554
2542 2555 if (zone_get_id(zone_name, &zid) != 0) {
2543 2556 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2544 2557 zonecfg_strerror(Z_NO_ZONE));
2545 2558 return (1);
2546 2559 }
2547 2560
2548 2561 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2549 2562 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2550 2563 zonecfg_strerror(err));
2551 2564 return (1);
2552 2565 }
2553 2566 if (zstate < ZONE_STATE_INCOMPLETE) {
2554 2567 zerror(zlogp, B_FALSE,
2555 2568 "cannot manage a zone which is in state '%s'",
2556 2569 zone_state_str(zstate));
2557 2570 return (1);
2558 2571 }
2559 2572
2560 2573 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2561 2574 zerror(zlogp, B_FALSE, "unable to determine zone path");
2562 2575 return (-1);
2563 2576 }
2564 2577
2565 2578 if (zonecfg_default_brand(default_brand,
2566 2579 sizeof (default_brand)) != Z_OK) {
2567 2580 zerror(zlogp, B_FALSE, "unable to determine default brand");
2568 2581 return (1);
2569 2582 }
2570 2583
2571 2584 /* Get a handle to the brand info for this zone */
2572 2585 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2573 2586 != Z_OK) {
2574 2587 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2575 2588 return (1);
2576 2589 }
2577 2590 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2578 2591 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2579 2592
2580 2593 /*
2581 2594 * In the alternate root environment, the only supported
2582 2595 * operations are mount and unmount. In this case, just treat
2583 2596 * the zone as native if it is cluster. Cluster zones can be
2584 2597 * native for the purpose of LU or upgrade, and the cluster
2585 2598 * brand may not exist in the miniroot (such as in net install
2586 2599 * upgrade).
2587 2600 */
2588 2601 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2589 2602 zone_iscluster = B_TRUE;
2590 2603 if (zonecfg_in_alt_root()) {
2591 2604 (void) strlcpy(brand_name, default_brand,
2592 2605 sizeof (brand_name));
2593 2606 }
2594 2607 } else {
2595 2608 zone_iscluster = B_FALSE;
2596 2609 }
2597 2610
2598 2611 if ((bh = brand_open(brand_name)) == NULL) {
2599 2612 zerror(zlogp, B_FALSE, "unable to open zone brand");
2600 2613 return (1);
2601 2614 }
2602 2615
2603 2616 /* Get state change brand hooks. */
2604 2617 if (brand_callback_init(bh, zone_name) == -1) {
2605 2618 zerror(zlogp, B_TRUE,
2606 2619 "failed to initialize brand state change hooks");
2607 2620 brand_close(bh);
2608 2621 return (1);
2609 2622 }
2610 2623
2611 2624 brand_close(bh);
2612 2625
2613 2626 /*
2614 2627 * Check that we have all privileges. It would be nice to pare
2615 2628 * this down, but this is at least a first cut.
2616 2629 */
2617 2630 if ((privset = priv_allocset()) == NULL) {
2618 2631 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2619 2632 return (1);
2620 2633 }
2621 2634
2622 2635 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2623 2636 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2624 2637 priv_freeset(privset);
2625 2638 return (1);
2626 2639 }
2627 2640
2628 2641 if (priv_isfullset(privset) == B_FALSE) {
2629 2642 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2630 2643 "run this command (all privs required)");
2631 2644 priv_freeset(privset);
2632 2645 return (1);
2633 2646 }
2634 2647 priv_freeset(privset);
2635 2648
2636 2649 if (set_brand_env(zlogp) != 0) {
2637 2650 zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
2638 2651 return (1);
2639 2652 }
2640 2653
2641 2654 if (mkzonedir(zlogp) != 0)
2642 2655 return (1);
2643 2656
2644 2657 /*
2645 2658 * Pre-fork: setup shared state
2646 2659 */
2647 2660 if ((shstate = (void *)mmap(NULL, shstatelen,
2648 2661 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2649 2662 MAP_FAILED) {
2650 2663 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2651 2664 return (1);
2652 2665 }
2653 2666 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2654 2667 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2655 2668 (void) munmap((char *)shstate, shstatelen);
2656 2669 return (1);
2657 2670 }
2658 2671 shstate->log.logfile = NULL;
2659 2672 shstate->log.buflen = shstatelen - sizeof (*shstate);
2660 2673 shstate->log.loglen = shstate->log.buflen;
2661 2674 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2662 2675 shstate->log.log = shstate->log.buf;
2663 2676 shstate->log.locale = parents_locale;
2664 2677 shstate->status = -1;
2665 2678
2666 2679 /*
2667 2680 * We need a SIGCHLD handler so the sema_wait() below will wake
2668 2681 * up if the child dies without doing a sema_post().
2669 2682 */
2670 2683 (void) sigset(SIGCHLD, sigchld);
2671 2684 /*
2672 2685 * We must mask SIGCHLD until after we've coped with the fork
2673 2686 * sufficiently to deal with it; otherwise we can race and
2674 2687 * receive the signal before pid has been initialized
2675 2688 * (yes, this really happens).
2676 2689 */
2677 2690 (void) sigemptyset(&block_cld);
2678 2691 (void) sigaddset(&block_cld, SIGCHLD);
2679 2692 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2680 2693
2681 2694 /*
2682 2695 * The parent only needs stderr after the fork, so close other fd's
2683 2696 * that we inherited from zoneadm so that the parent doesn't have those
2684 2697 * open while waiting. The child will close the rest after the fork.
2685 2698 */
2686 2699 closefrom(3);
2687 2700
2688 2701 if ((ctfd = init_template()) == -1) {
2689 2702 zerror(zlogp, B_TRUE, "failed to create contract");
2690 2703 return (1);
2691 2704 }
2692 2705
2693 2706 /*
2694 2707 * Do not let another thread localize a message while we are forking.
2695 2708 */
2696 2709 (void) mutex_lock(&msglock);
2697 2710 pid = fork();
2698 2711 (void) mutex_unlock(&msglock);
2699 2712
2700 2713 /*
2701 2714 * In all cases (parent, child, and in the event of an error) we
2702 2715 * don't want to cause creation of contracts on subsequent fork()s.
2703 2716 */
2704 2717 (void) ct_tmpl_clear(ctfd);
2705 2718 (void) close(ctfd);
2706 2719
2707 2720 if (pid == -1) {
2708 2721 zerror(zlogp, B_TRUE, "could not fork");
2709 2722 return (1);
2710 2723
2711 2724 } else if (pid > 0) { /* parent */
2712 2725 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2713 2726 /*
2714 2727 * This marks a window of vulnerability in which we receive
2715 2728 * the SIGCLD before falling into sema_wait (normally we would
2716 2729 * get woken up from sema_wait with EINTR upon receipt of
2717 2730 * SIGCLD). So we may need to use some other scheme like
2718 2731 * sema_posting in the sigcld handler.
2719 2732 * blech
2720 2733 */
2721 2734 (void) sema_wait(&shstate->sem);
2722 2735 (void) sema_destroy(&shstate->sem);
2723 2736 if (shstate->status != 0)
2724 2737 (void) waitpid(pid, NULL, WNOHANG);
2725 2738 /*
2726 2739 * It's ok if we die with SIGPIPE. It's not like we could have
2727 2740 * done anything about it.
2728 2741 */
2729 2742 (void) fprintf(stderr, "%s", shstate->log.buf);
2730 2743 _exit(shstate->status == 0 ? 0 : 1);
2731 2744 }
2732 2745
2733 2746 /*
2734 2747 * The child charges on.
2735 2748 */
2736 2749 (void) sigset(SIGCHLD, SIG_DFL);
2737 2750 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2738 2751
2739 2752 /*
2740 2753 * SIGPIPE can be delivered if we write to a socket for which the
2741 2754 * peer endpoint is gone. That can lead to too-early termination
2742 2755 * of zoneadmd, and that's not good eats.
2743 2756 */
2744 2757 (void) sigset(SIGPIPE, SIG_IGN);
2745 2758 /*
2746 2759 * Stop using stderr
2747 2760 */
2748 2761 zlogp = &shstate->log;
2749 2762
2750 2763 /*
2751 2764 * We don't need stdout/stderr from now on.
2752 2765 */
2753 2766 closefrom(0);
2754 2767
2755 2768 /*
2756 2769 * Initialize the syslog zlog_t. This needs to be done after
2757 2770 * the call to closefrom().
2758 2771 */
2759 2772 logsys.buf = logsys.log = NULL;
2760 2773 logsys.buflen = logsys.loglen = 0;
2761 2774 logsys.logfile = NULL;
2762 2775 logsys.locale = DEFAULT_LOCALE;
2763 2776
2764 2777 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2765 2778
2766 2779 /*
2767 2780 * Allow logging to <zonepath>/logs/<file>.
2768 2781 */
2769 2782 logstream_init(zlogp);
2770 2783 platloghdl = logstream_open("platform.log", "zoneadmd", 0);
2771 2784
2772 2785 /* logplat looks the same as logsys, but logs to platform.log */
2773 2786 logplat = logsys;
2774 2787
2775 2788 /*
2776 2789 * The eventstream is used to publish state changes in the zone
2777 2790 * from the door threads to the console I/O poller.
2778 2791 */
2779 2792 if (eventstream_init() == -1) {
2780 2793 zerror(zlogp, B_TRUE, "unable to create eventstream");
2781 2794 goto child_out;
2782 2795 }
2783 2796
2784 2797 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2785 2798 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2786 2799
2787 2800 /*
2788 2801 * See if another zoneadmd is running for this zone. If not, then we
2789 2802 * can now modify system state.
2790 2803 */
2791 2804 if (make_daemon_exclusive(zlogp) == -1)
2792 2805 goto child_out;
2793 2806
2794 2807 /*
2795 2808 * Create/join a new session; we need to be careful of what we do with
2796 2809 * the console from now on so we don't end up being the session leader
2797 2810 * for the terminal we're going to be handing out.
2798 2811 */
2799 2812 (void) setsid();
2800 2813
2801 2814 /*
2802 2815 * This thread shouldn't be receiving any signals; in particular,
2803 2816 * SIGCHLD should be received by the thread doing the fork(). The
2804 2817 * exceptions are SIGHUP and SIGUSR1 for log rotation, set up by
2805 2818 * logstream_init().
2806 2819 */
2807 2820 (void) sigfillset(&blockset);
2808 2821 (void) sigdelset(&blockset, SIGHUP);
2809 2822 (void) sigdelset(&blockset, SIGUSR1);
2810 2823 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2811 2824
2812 2825 /*
2813 2826 * Setup the console device and get ready to serve the console;
2814 2827 * once this has completed, we're ready to let console clients
2815 2828 * make an attempt to connect (they will block until
2816 2829 * serve_console_sock() below gets called, and any pending
2817 2830 * connection is accept()ed).
2818 2831 */
2819 2832 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2820 2833 goto child_out;
2821 2834
2822 2835 /*
2823 2836 * Take the lock now, so that when the door server gets going, we
2824 2837 * are guaranteed that it won't take a request until we are sure
2825 2838 * that everything is completely set up. See the child_out: label
2826 2839 * below to see why this matters.
2827 2840 */
2828 2841 (void) mutex_lock(&lock);
2829 2842
2830 2843 /* Init semaphore for scratch zones. */
2831 2844 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2832 2845 zerror(zlogp, B_TRUE,
2833 2846 "failed to initialize semaphore for scratch zone");
2834 2847 goto child_out;
2835 2848 }
2836 2849
2837 2850 /* open the dladm handle */
2838 2851 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2839 2852 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2840 2853 goto child_out;
2841 2854 }
2842 2855
2843 2856 /*
2844 2857 * Note: door setup must occur *after* the console is setup.
2845 2858 * This is so that as zlogin tests the door to see if zoneadmd
2846 2859 * is ready yet, we know that the console will get serviced
2847 2860 * once door_info() indicates that the door is "up".
2848 2861 */
2849 2862 if (setup_door(zlogp) == -1)
2850 2863 goto child_out;
2851 2864
2852 2865 /*
2853 2866 * Things seem OK so far; tell the parent process that we're done
2854 2867 * with setup tasks. This will cause the parent to exit, signalling
2855 2868 * to zoneadm, zlogin, or whatever forked it that we are ready to
2856 2869 * service requests.
2857 2870 */
2858 2871 shstate->status = 0;
2859 2872 (void) sema_post(&shstate->sem);
2860 2873 (void) munmap((char *)shstate, shstatelen);
2861 2874 shstate = NULL;
2862 2875
2863 2876 (void) mutex_unlock(&lock);
2864 2877
2865 2878 /*
2866 2879 * zlogp is now invalid, so reset it to the syslog logger.
2867 2880 */
2868 2881 zlogp = &logsys;
2869 2882
2870 2883 /*
2871 2884 * Now that we are free of any parents, switch to the default locale.
2872 2885 */
2873 2886 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2874 2887
2875 2888 /*
2876 2889 * At this point the setup portion of main() is basically done, so
2877 2890 * we reuse this thread to manage the zone console. When
2878 2891 * serve_console() has returned, we are past the point of no return
2879 2892 * in the life of this zoneadmd.
2880 2893 */
2881 2894 if (zonecfg_in_alt_root()) {
2882 2895 /*
2883 2896 * This is just awful, but mounted scratch zones don't (and
2884 2897 * can't) have consoles. We just wait for unmount instead.
2885 2898 */
2886 2899 while (sema_wait(&scratch_sem) == EINTR)
2887 2900 ;
2888 2901 } else {
2889 2902 serve_console(zlogp);
2890 2903 assert(in_death_throes);
2891 2904 }
2892 2905
2893 2906 /*
2894 2907 * This is the next-to-last part of the exit interlock. Upon calling
2895 2908 * fdetach(), the door will go unreferenced; once any
2896 2909 * outstanding requests (like the door thread doing Z_HALT) are
2897 2910 * done, the door will get an UNREF notification; when it handles
2898 2911 * the UNREF, the door server will cause the exit. It's possible
2899 2912 * that fdetach() can fail because the file is in use, in which
2900 2913 * case we'll retry the operation.
2901 2914 */
2902 2915 assert(!MUTEX_HELD(&lock));
2903 2916 for (;;) {
2904 2917 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2905 2918 break;
2906 2919 yield();
2907 2920 }
2908 2921
2909 2922 for (;;)
2910 2923 (void) pause();
2911 2924
2912 2925 child_out:
2913 2926 assert(pid == 0);
2914 2927
2915 2928 shstate->status = -1;
2916 2929 (void) sema_post(&shstate->sem);
2917 2930 (void) munmap((char *)shstate, shstatelen);
2918 2931
2919 2932 /*
2920 2933 * This might trigger an unref notification, but if so,
2921 2934 * we are still holding the lock, so our call to exit will
2922 2935 * ultimately win the race and will publish the right exit
2923 2936 * code.
2924 2937 */
2925 2938 if (zone_door != -1) {
2926 2939 assert(MUTEX_HELD(&lock));
2927 2940 (void) door_revoke(zone_door);
2928 2941 (void) fdetach(zone_door_path);
2929 2942 }
2930 2943
2931 2944 if (dld_handle != NULL)
2932 2945 dladm_close(dld_handle);
2933 2946
2934 2947 return (1); /* return from main() forcibly exits an MT process */
2935 2948 }
|
↓ open down ↓ |
1496 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX