1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2021 Joyent, Inc.
26 * Copyright (c) 2016 by Delphix. All rights reserved.
27 */
28
29 /*
30 * zoneadmd manages zones; one zoneadmd process is launched for each
31 * non-global zone on the system. This daemon juggles four jobs:
32 *
33 * - Implement setup and teardown of the zone "virtual platform": mount and
34 * unmount filesystems; create and destroy network interfaces; communicate
35 * with devfsadmd to lay out devices for the zone; instantiate the zone
36 * console device; configure process runtime attributes such as resource
37 * controls, pool bindings, fine-grained privileges.
38 *
39 * - Launch the zone's init(1M) process.
40 *
41 * - Implement a door server; clients (like zoneadm) connect to the door
42 * server and request zone state changes. The kernel is also a client of
43 * this door server. A request to halt or reboot the zone which originates
44 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
45 *
46 * One minor problem is that messages emitted by zoneadmd need to be passed
47 * back to the zoneadm process making the request. These messages need to
48 * be rendered in the client's locale; so, this is passed in as part of the
49 * request. The exception is the kernel upcall to zoneadmd, in which case
50 * messages are syslog'd.
51 *
52 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
54 * strings which do not need to be translated.
55 *
56 * - Act as a console server for zlogin -C processes; see comments in zcons.c
57 * for more information about the zone console architecture.
58 *
59 * DESIGN NOTES
60 *
61 * Restart:
62 * A chief design constraint of zoneadmd is that it should be restartable in
63 * the case that the administrator kills it off, or it suffers a fatal error,
64 * without the running zone being impacted; this is akin to being able to
65 * reboot the service processor of a server without affecting the OS instance.
66 */
67
68 #include <sys/param.h>
69 #include <sys/mman.h>
70 #include <sys/types.h>
71 #include <sys/stat.h>
72 #include <sys/sysmacros.h>
73 #include <sys/time.h>
74
75 #include <bsm/adt.h>
76 #include <bsm/adt_event.h>
77
78 #include <alloca.h>
79 #include <assert.h>
80 #include <errno.h>
81 #include <door.h>
82 #include <fcntl.h>
83 #include <locale.h>
84 #include <signal.h>
85 #include <stdarg.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <strings.h>
90 #include <synch.h>
91 #include <syslog.h>
92 #include <thread.h>
93 #include <unistd.h>
94 #include <wait.h>
95 #include <limits.h>
96 #include <zone.h>
97 #include <libbrand.h>
98 #include <sys/brand.h>
99 #include <libcontract.h>
100 #include <libcontract_priv.h>
101 #include <sys/brand.h>
102 #include <sys/contract/process.h>
103 #include <sys/ctfs.h>
104 #include <libdladm.h>
105 #include <sys/dls_mgmt.h>
106 #include <libscf.h>
107 #include <uuid/uuid.h>
108 #include <libppt.h>
109
110 #include <libzonecfg.h>
111 #include <zonestat_impl.h>
112 #include "zoneadmd.h"
113
114 static char *progname;
115 char *zone_name; /* zone which we are managing */
116 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
117 char zonepath[MAXNAMELEN];
118 char pool_name[MAXNAMELEN];
119 char default_brand[MAXNAMELEN];
120 char brand_name[MAXNAMELEN];
121 boolean_t zone_isnative;
122 boolean_t zone_iscluster;
123 boolean_t zone_islabeled;
124 boolean_t shutdown_in_progress;
125 static zoneid_t zone_id;
126 static zoneid_t zone_did = 0;
127 dladm_handle_t dld_handle = NULL;
128
129 char pre_statechg_hook[2 * MAXPATHLEN];
130 char post_statechg_hook[2 * MAXPATHLEN];
131 char query_hook[2 * MAXPATHLEN];
132
133 zlog_t logsys; /* log to syslog */
134 zlog_t logplat; /* log to platform.log */
135
136 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
137 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
138
139 static sema_t scratch_sem; /* for scratch zones */
140
141 static char zone_door_path[MAXPATHLEN];
142 static int zone_door = -1;
143
144 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
145 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
146
147 static int platloghdl = -1; /* Handle for <zonepath>/logs/platform.log */
148
149 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
150 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
151 #endif
152
153 #define DEFAULT_LOCALE "C"
154
155 #define RSRC_NET "net"
156 #define RSRC_DEV "device"
157
158 static const char *
159 z_cmd_name(zone_cmd_t zcmd)
160 {
161 /* This list needs to match the enum in sys/zone.h */
162 static const char *zcmdstr[] = {
163 "ready", "boot", "forceboot", "reboot", "halt",
164 "note_uninstalling", "mount", "forcemount", "unmount",
165 "shutdown"
166 };
167
168 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
169 return ("unknown");
170 else
171 return (zcmdstr[(int)zcmd]);
172 }
173
174 static char *
175 get_execbasename(char *execfullname)
176 {
177 char *last_slash, *execbasename;
178
179 /* guard against '/' at end of command invocation */
180 for (;;) {
181 last_slash = strrchr(execfullname, '/');
182 if (last_slash == NULL) {
183 execbasename = execfullname;
184 break;
185 } else {
186 execbasename = last_slash + 1;
187 if (*execbasename == '\0') {
188 *last_slash = '\0';
189 continue;
190 }
191 break;
192 }
193 }
194 return (execbasename);
195 }
196
197 static void
198 usage(void)
199 {
200 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
201 (void) fprintf(stderr,
202 gettext("\tNote: %s should not be run directly.\n"), progname);
203 exit(2);
204 }
205
206 /* ARGSUSED */
207 static void
208 sigchld(int sig)
209 {
210 }
211
212 char *
213 localize_msg(char *locale, const char *msg)
214 {
215 char *out;
216
217 (void) mutex_lock(&msglock);
218 (void) setlocale(LC_MESSAGES, locale);
219 out = gettext(msg);
220 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
221 (void) mutex_unlock(&msglock);
222 return (out);
223 }
224
225 /* PRINTFLIKE3 */
226 void
227 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
228 {
229 va_list alist;
230 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
231 char *bp, *bp_nozone;
232 int saved_errno = errno;
233
234 if (zlogp == &logsys)
235 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", zone_name);
236 else
237 buf[0] = '\0';
238 bp = bp_nozone = &(buf[strlen(buf)]);
239
240 /*
241 * In theory, the locale pointer should be set to either "C" or a
242 * char array, so it should never be NULL
243 */
244 assert(zlogp->locale != NULL);
245 /* Locale is per process, but we are multi-threaded... */
246 fmt = localize_msg(zlogp->locale, fmt);
247
248 va_start(alist, fmt);
249 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
250 va_end(alist);
251 bp = &(buf[strlen(buf)]);
252 if (use_strerror)
253 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
254 strerror(saved_errno));
255
256 (void) strlcat(buf, "\n", sizeof (buf));
257
258 /*
259 * If we don't have the platform log, we are in a child process, and
260 * should log to stderr (which is a pipe) instead of the file.
261 */
262 if (logging_poisoned) {
263 (void) fprintf(stderr, "%s", buf);
264
265 if (zlogp != &logsys && zlogp->logfile == stderr)
266 return;
267 } else {
268 logstream_write(platloghdl, bp_nozone, strlen(bp_nozone));
269
270 if (zlogp == &logplat)
271 return;
272 }
273
274 if (zlogp == &logsys) {
275 bp = strrchr(buf, '\n');
276 if (bp != NULL && bp[1] == '\0') {
277 *bp = '\0';
278 }
279 (void) syslog(LOG_ERR, "%s", buf);
280 } else if (zlogp->logfile != NULL) {
281 (void) fprintf(zlogp->logfile, "%s", buf);
282 } else {
283 size_t buflen;
284 size_t copylen;
285
286 buflen = snprintf(zlogp->log, zlogp->loglen, "%s", buf);
287 copylen = MIN(buflen, zlogp->loglen);
288 zlogp->log += copylen;
289 zlogp->loglen -= copylen;
290 }
291 }
292
293 /*
294 * Append src to dest, modifying dest in the process. Prefix src with
295 * a space character if dest is a non-empty string. Assumes dest is already
296 * properly \0-terminated OR overruns destsize.
297 */
298 static void
299 strnappend(char *dest, size_t destsize, const char *src)
300 {
301 size_t startpoint = strnlen(dest, destsize);
302
303 if (startpoint >= destsize - 1) {
304 /* We've run out of room. Record something?! */
305 return;
306 }
307
308 if (startpoint > 0) {
309 /* Add the space per the function's intro comment. */
310 dest[startpoint] = ' ';
311 startpoint++;
312 }
313
314 /* Arguably we should check here too... */
315 (void) strlcpy(dest + startpoint, src, destsize - startpoint);
316 }
317
318 /*
319 * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
320 * put the arguments into an argv style array, use getopt to process them,
321 * and put the resultant argument string back into outargs. Non-native brands
322 * may support alternate forms of boot arguments so we must handle that as well.
323 *
324 * During the filtering, we pull out any arguments which are truly "boot"
325 * arguments, leaving only those which are to be passed intact to the
326 * progenitor process. The one we support at the moment is -i, which
327 * indicates to the kernel which program should be launched as 'init'.
328 *
329 * Except for Z_OK, all other return values are treated as fatal.
330 */
331 static int
332 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
333 char *init_file)
334 {
335 int argc = 0, argc_save;
336 int i;
337 int err = Z_OK;
338 char *arg, *lasts, **argv = NULL, **argv_save;
339 char zonecfg_args[BOOTARGS_MAX];
340 char scratchargs[BOOTARGS_MAX], *sargs;
341 char scratchopt[3];
342 char c;
343
344 bzero(outargs, BOOTARGS_MAX);
345
346 /*
347 * If the user didn't specify transient boot arguments, check
348 * to see if there were any specified in the zone configuration,
349 * and use them if applicable.
350 */
351 if (inargs == NULL || inargs[0] == '\0') {
352 bzero(zonecfg_args, sizeof (zonecfg_args));
353 (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
354 sizeof (zonecfg_args));
355 inargs = zonecfg_args;
356 }
357
358 if (strlen(inargs) >= BOOTARGS_MAX) {
359 zerror(zlogp, B_FALSE, "boot argument string too long");
360 return (Z_INVAL);
361 }
362
363 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
364 sargs = scratchargs;
365 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
366 sargs = NULL;
367 argc++;
368 }
369
370 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
371 zerror(zlogp, B_FALSE, "memory allocation failed");
372 return (Z_NOMEM);
373 }
374
375 argv_save = argv;
376 argc_save = argc;
377
378 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
379 sargs = scratchargs;
380 i = 0;
381 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
382 sargs = NULL;
383 if ((argv[i] = strdup(arg)) == NULL) {
384 err = Z_NOMEM;
385 zerror(zlogp, B_FALSE, "memory allocation failed");
386 goto done;
387 }
388 i++;
389 }
390
391 /*
392 * We preserve compatibility with the illumos system boot behavior,
393 * which allows:
394 *
395 * # reboot kernel/unix -s -m verbose
396 *
397 * In this example, kernel/unix tells the booter what file to boot. The
398 * original intent of this was that we didn't want reboot in a zone to
399 * be gratuitously different, so we would silently ignore the boot
400 * file, if necessary. However, this usage is archaic and has never
401 * been common, since it is impossible to boot a zone onto a different
402 * kernel. Ignoring the first argument breaks for non-native brands
403 * which pass boot arguments in a different style. e.g.
404 * systemd.log_level=debug
405 * Thus, for backward compatibility we only ignore the first argument
406 * if it appears to be in the illumos form and attempting to specify a
407 * kernel.
408 */
409 if (argv[0] == NULL)
410 goto done;
411
412 assert(argv[0][0] != ' ');
413 assert(argv[0][0] != '\t');
414
415 if (strncmp(argv[0], "kernel/", 7) == 0) {
416 argv = &argv[1];
417 argc--;
418 }
419
420 optind = 0;
421 opterr = 0;
422 err = Z_OK;
423 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
424 switch (c) {
425 case 'i':
426 /*
427 * -i is handled by the runtime and is not passed
428 * along to userland
429 */
430 (void) strlcpy(init_file, optarg, MAXPATHLEN);
431 break;
432 case 'f':
433 /* This has already been processed by zoneadm */
434 break;
435 case 'm':
436 case 's':
437 /* These pass through unmolested */
438 (void) snprintf(scratchopt, sizeof (scratchopt),
439 "-%c", c);
440 strnappend(outargs, BOOTARGS_MAX, scratchopt);
441 if (optarg != NULL)
442 strnappend(outargs, BOOTARGS_MAX, optarg);
443 break;
444 case '?':
445 /*
446 * If a brand has its own init, we need to pass along
447 * whatever the user provides. We use the entire
448 * unknown string here so that we correctly handle
449 * unknown long options (e.g. --debug).
450 */
451 strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]);
452 break;
453 }
454 }
455
456 /*
457 * We need to pass along everything else since we don't know what
458 * the brand's init is expecting. For example, an argument list like:
459 * --confdir /foo --debug
460 * will cause the getopt parsing to stop at '/foo' but we need to pass
461 * that on, along with the '--debug'. This does mean that we require
462 * any of our known options (-ifms) to preceed the brand-specific ones.
463 */
464 while (optind < argc) {
465 strnappend(outargs, BOOTARGS_MAX, argv[optind]);
466 optind++;
467 }
468
469 done:
470 for (i = 0; i < argc_save; i++) {
471 if (argv_save[i] != NULL)
472 free(argv_save[i]);
473 }
474 free(argv_save);
475 return (err);
476 }
477
478
479 static int
480 mkzonedir(zlog_t *zlogp)
481 {
482 struct stat st;
483 /*
484 * We must create and lock everyone but root out of ZONES_TMPDIR
485 * since anyone can open any UNIX domain socket, regardless of
486 * its file system permissions. Sigh...
487 */
488 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
489 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
490 return (-1);
491 }
492 /* paranoia */
493 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
494 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
495 return (-1);
496 }
497 (void) chmod(ZONES_TMPDIR, S_IRWXU);
498 return (0);
499 }
500
501 /*
502 * Run the brand's pre-state change callback, if it exists.
503 */
504 static int
505 brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
506 {
507 char cmdbuf[2 * MAXPATHLEN];
508 const char *altroot;
509
510 if (pre_statechg_hook[0] == '\0')
511 return (0);
512
513 altroot = zonecfg_get_root();
514 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
515 state, cmd, altroot) > sizeof (cmdbuf))
516 return (-1);
517
518 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
519 return (-1);
520
521 return (0);
522 }
523
524 /*
525 * Run the brand's post-state change callback, if it exists.
526 */
527 static int
528 brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
529 {
530 char cmdbuf[2 * MAXPATHLEN];
531 const char *altroot;
532
533 if (post_statechg_hook[0] == '\0')
534 return (0);
535
536 altroot = zonecfg_get_root();
537 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
538 state, cmd, altroot) > sizeof (cmdbuf))
539 return (-1);
540
541 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
542 return (-1);
543
544 return (0);
545 }
546
547 /*
548 * Notify zonestatd of the new zone. If zonestatd is not running, this
549 * will do nothing.
550 */
551 static void
552 notify_zonestatd(zoneid_t zoneid)
553 {
554 int cmd[2];
555 int fd;
556 door_arg_t params;
557
558 fd = open(ZS_DOOR_PATH, O_RDONLY);
559 if (fd < 0)
560 return;
561
562 cmd[0] = ZSD_CMD_NEW_ZONE;
563 cmd[1] = zoneid;
564 params.data_ptr = (char *)&cmd;
565 params.data_size = sizeof (cmd);
566 params.desc_ptr = NULL;
567 params.desc_num = 0;
568 params.rbuf = NULL;
569 params.rsize = 0;
570 (void) door_call(fd, ¶ms);
571 (void) close(fd);
572 }
573
574 /*
575 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
576 * 'true' if this is being invoked as part of the processing for the "mount"
577 * subcommand.
578 *
579 * If a scratch zone mount (ALT_MOUNT) is being performed then do not
580 * call the state change hooks.
581 */
582 static int
583 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
584 {
585 int err;
586 boolean_t snapped = B_FALSE;
587
588 if ((snap_hndl = zonecfg_init_handle()) == NULL) {
589 zerror(zlogp, B_TRUE, "getting zone configuration handle");
590 goto bad;
591 }
592 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
593 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
594 zonecfg_strerror(err));
595 goto bad;
596 }
597 snapped = B_TRUE;
598
599 if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
600 zerror(zlogp, B_FALSE, "invalid configuration snapshot");
601 goto bad;
602 }
603
604 if (zone_did == 0)
605 zone_did = zone_get_did(zone_name);
606
607 if (!ALT_MOUNT(mount_cmd) &&
608 brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
609 goto bad;
610
611 if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
612 goto bad;
613
614 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
615 bringup_failure_recovery = B_TRUE;
616 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
617 debug);
618 goto bad;
619 }
620
621 if (!ALT_MOUNT(mount_cmd) &&
622 brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
623 goto bad;
624
625 return (0);
626
627 bad:
628 /*
629 * If something goes wrong, we up the zones's state to the target
630 * state, READY, and then invoke the hook as if we're halting.
631 */
632 if (!ALT_MOUNT(mount_cmd))
633 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT,
634 debug);
635
636 if (snapped)
637 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
638 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
639 zonecfg_strerror(err));
640 zonecfg_fini_handle(snap_hndl);
641 snap_hndl = NULL;
642 return (-1);
643 }
644
645 int
646 init_template(void)
647 {
648 int fd;
649 int err = 0;
650
651 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
652 if (fd == -1)
653 return (-1);
654
655 /*
656 * For now, zoneadmd doesn't do anything with the contract.
657 * Deliver no events, don't inherit, and allow it to be orphaned.
658 */
659 err |= ct_tmpl_set_critical(fd, 0);
660 err |= ct_tmpl_set_informative(fd, 0);
661 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
662 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
663 if (err || ct_tmpl_activate(fd)) {
664 (void) close(fd);
665 return (-1);
666 }
667
668 return (fd);
669 }
670
671 typedef struct fs_callback {
672 zlog_t *zlogp;
673 zoneid_t zoneid;
674 boolean_t mount_cmd;
675 } fs_callback_t;
676
677 static int
678 mount_early_fs(void *data, const char *spec, const char *dir,
679 const char *fstype, const char *opt)
680 {
681 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
682 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
683 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
684 char rootpath[MAXPATHLEN];
685 pid_t child;
686 int child_status;
687 int tmpl_fd;
688 int rv;
689 ctid_t ct;
690
691 /* determine the zone rootpath */
692 if (mount_cmd) {
693 char luroot[MAXPATHLEN];
694
695 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
696 resolve_lofs(zlogp, luroot, sizeof (luroot));
697 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
698 } else {
699 if (zone_get_rootpath(zone_name,
700 rootpath, sizeof (rootpath)) != Z_OK) {
701 zerror(zlogp, B_FALSE, "unable to determine zone root");
702 return (-1);
703 }
704 }
705
706 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
707 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
708 rootpath, dir);
709 return (-1);
710 } else if (rv > 0) {
711 /* The mount point path doesn't exist, create it now. */
712 if (make_one_dir(zlogp, rootpath, dir,
713 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
714 DEFAULT_DIR_GROUP) != 0) {
715 zerror(zlogp, B_FALSE, "failed to create mount point");
716 return (-1);
717 }
718
719 /*
720 * Now this might seem weird, but we need to invoke
721 * valid_mount_path() again. Why? Because it checks
722 * to make sure that the mount point path is canonical,
723 * which it can only do if the path exists, so now that
724 * we've created the path we have to verify it again.
725 */
726 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
727 fstype)) < 0) {
728 zerror(zlogp, B_FALSE,
729 "%s%s is not a valid mount point", rootpath, dir);
730 return (-1);
731 }
732 }
733
734 if ((tmpl_fd = init_template()) == -1) {
735 zerror(zlogp, B_TRUE, "failed to create contract");
736 return (-1);
737 }
738
739 if ((child = fork()) == -1) {
740 (void) ct_tmpl_clear(tmpl_fd);
741 (void) close(tmpl_fd);
742 zerror(zlogp, B_TRUE, "failed to fork");
743 return (-1);
744
745 } else if (child == 0) { /* child */
746 char opt_buf[MAX_MNTOPT_STR];
747 int optlen = 0;
748 int mflag = MS_DATA;
749 int i;
750 int ret;
751
752 (void) ct_tmpl_clear(tmpl_fd);
753 /*
754 * Even though there are no procs running in the zone, we
755 * do this for paranoia's sake.
756 */
757 (void) closefrom(0);
758
759 if (zone_enter(zoneid) == -1) {
760 _exit(errno);
761 }
762 if (opt != NULL) {
763 /*
764 * The mount() system call is incredibly annoying.
765 * If options are specified, we need to copy them
766 * into a temporary buffer since the mount() system
767 * call will overwrite the options string. It will
768 * also fail if the new option string it wants to
769 * write is bigger than the one we passed in, so
770 * you must pass in a buffer of the maximum possible
771 * option string length. sigh.
772 */
773 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
774 opt = opt_buf;
775 optlen = MAX_MNTOPT_STR;
776 mflag = MS_OPTIONSTR;
777 }
778
779 /*
780 * There is an obscure race condition which can cause mount
781 * to return EBUSY. This happens for example on the mount
782 * of the zone's /etc/svc/volatile file system if there is
783 * a GZ process running svcs -Z, which will touch the
784 * mountpoint, just as we're trying to do the mount. To cope
785 * with this, we retry up to 3 times to let this transient
786 * process get out of the way.
787 */
788 for (i = 0; i < 3; i++) {
789 ret = 0;
790 if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
791 optlen) != 0)
792 ret = errno;
793 if (ret != EBUSY)
794 break;
795 (void) sleep(1);
796 }
797 _exit(ret);
798 }
799
800 /* parent */
801 if (contract_latest(&ct) == -1)
802 ct = -1;
803 (void) ct_tmpl_clear(tmpl_fd);
804 (void) close(tmpl_fd);
805 if (waitpid(child, &child_status, 0) != child) {
806 /* unexpected: we must have been signalled */
807 (void) contract_abandon_id(ct);
808 return (-1);
809 }
810 (void) contract_abandon_id(ct);
811 if (WEXITSTATUS(child_status) != 0) {
812 errno = WEXITSTATUS(child_status);
813 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
814 return (-1);
815 }
816
817 return (0);
818 }
819
820 /*
821 * Replace characters other than [A-Za-z0-9_] with '_' so that the string is a
822 * valid environment variable name.
823 */
824 static void
825 sanitize_env_var_name(char *var)
826 {
827 for (char *p = var; *p != '\0'; p++) {
828 if (!isalnum(*p)) {
829 *p = '_';
830 }
831 }
832 }
833
834 /*
835 * env variable name format
836 * _ZONECFG_{resource name}_{identifying attr. name}_{property name}
837 * Any dashes (-) in the property names are replaced with underscore (_).
838 */
839 static void
840 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
841 {
842 /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
843 char nm[2 * MAXNAMELEN + 32];
844
845 if (attr == NULL)
846 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
847 name);
848 else
849 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
850 attr, name);
851
852 sanitize_env_var_name(nm);
853
854 (void) setenv(nm, val, 1);
855 }
856
857 /*
858 * Resolve a device:match value to a path. This is only different for PPT
859 * devices, where we expect the match property to be a /devices/... path, and
860 * configured for PPT already.
861 */
862 int
863 resolve_device_match(zlog_t *zlogp, struct zone_devtab *dtab,
864 char *path, size_t len)
865 {
866 struct zone_res_attrtab *rap;
867
868 for (rap = dtab->zone_dev_attrp; rap != NULL;
869 rap = rap->zone_res_attr_next) {
870 if (strcmp(rap->zone_res_attr_name, "model") == 0 &&
871 strcmp(rap->zone_res_attr_value, "passthru") == 0)
872 break;
873 }
874
875 if (rap == NULL) {
876 if (strlcpy(path, dtab->zone_dev_match, len) >= len)
877 return (Z_INVAL);
878 return (Z_OK);
879 }
880
881 if (strncmp(dtab->zone_dev_match, "/devices",
882 strlen("/devices")) != 0) {
883 zerror(zlogp, B_FALSE, "invalid passthru match value '%s'",
884 dtab->zone_dev_match);
885 return (Z_INVAL);
886 }
887
888 if (ppt_devpath_to_dev(dtab->zone_dev_match, path, len) != 0) {
889 zerror(zlogp, B_TRUE, "failed to resolve passthru device %s",
890 dtab->zone_dev_match);
891 return (Z_INVAL);
892 }
893
894 return (Z_OK);
895 }
896
897 /*
898 * Export various zonecfg properties into environment for the boot and state
899 * change hooks.
900 *
901 * If debug is true, _ZONEADMD_brand_debug is set to 1, else it is set to an
902 * empty string. Brand hooks consider any non-empty string as an indication
903 * that debug output is requested.
904 *
905 * We could export more of the config in the future, as necessary. A better
906 * solution would be to make it so brand-specific behavior is handled by
907 * brand-specific callbacks written in C. Then the normal libzonecfg interfaces
908 * can be used for accessing any parts of the configuration that are needed.
909 *
910 * All of the environment variables set by this function are specific to
911 * SmartOS.
912 */
913 static int
914 setup_subproc_env(zlog_t *zlogp, boolean_t debug)
915 {
916 int res;
917 struct zone_nwiftab ntab;
918 struct zone_devtab dtab;
919 struct zone_attrtab atab;
920 char net_resources[MAXNAMELEN * 2];
921 char dev_resources[MAXNAMELEN * 2];
922 char didstr[16];
923 char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
924 uuid_t uuid;
925
926 /* snap_hndl is null when called through the set_brand_env code path */
927 if (snap_hndl == NULL)
928 return (Z_OK);
929
930 if ((res = zonecfg_get_uuid(zone_name, uuid)) != Z_OK)
931 return (res);
932
933 uuid_unparse(uuid, uuidstr);
934 (void) setenv("_ZONECFG_uuid", uuidstr, 1);
935
936 (void) snprintf(didstr, sizeof (didstr), "%d", zone_did);
937 (void) setenv("_ZONECFG_did", didstr, 1);
938
939 /*
940 * "net" resources are exported because zoneadmd does not handle
941 * automatic configuration of vnics and so that the bhyve boot hook
942 * can generate the argument list for the brand's init program. At such
943 * a time as vnic creation is handled in zoneadmd and brand callbacks
944 * can be executed as part of the zoneadmd process this should be
945 * removed.
946 */
947 net_resources[0] = '\0';
948 if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
949 goto done;
950
951 while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
952 struct zone_res_attrtab *rap;
953 char *phys;
954
955 phys = ntab.zone_nwif_physical;
956
957 (void) strlcat(net_resources, phys, sizeof (net_resources));
958 (void) strlcat(net_resources, " ", sizeof (net_resources));
959
960 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
961
962 set_zonecfg_env(RSRC_NET, phys, "address",
963 ntab.zone_nwif_address);
964 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
965 ntab.zone_nwif_allowed_address);
966 set_zonecfg_env(RSRC_NET, phys, "defrouter",
967 ntab.zone_nwif_defrouter);
968 set_zonecfg_env(RSRC_NET, phys, "global-nic",
969 ntab.zone_nwif_gnic);
970 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
971 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
972 ntab.zone_nwif_vlan_id);
973
974 for (rap = ntab.zone_nwif_attrp; rap != NULL;
975 rap = rap->zone_res_attr_next)
976 set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
977 rap->zone_res_attr_value);
978 nwifent_free_attrs(&ntab);
979 }
980
981 (void) setenv("_ZONECFG_net_resources", net_resources, 1);
982
983 (void) zonecfg_endnwifent(snap_hndl);
984
985 /*
986 * "device" resources are exported because the bhyve boot brand callback
987 * needs them to generate the argument list for the brand's init
988 * program. At such a time as brand callbacks can be executed as part
989 * of the zoneadmd process, this should be removed.
990 *
991 * The bhyve brand only supports disk-like and ppt devices and does not
992 * support regular expressions.
993 */
994 if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
995 goto done;
996
997 dev_resources[0] = '\0';
998 while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
999 char *match = dtab.zone_dev_match;
1000 struct zone_res_attrtab *rap;
1001 char path[MAXPATHLEN];
1002
1003 res = resolve_device_match(zlogp, &dtab, path, sizeof (path));
1004 if (res != Z_OK)
1005 goto done;
1006
1007 /*
1008 * Even if not modified, the match path will be mangled in the
1009 * environment variable name, so we always store the value here.
1010 */
1011 set_zonecfg_env(RSRC_DEV, match, "path", path);
1012
1013 for (rap = dtab.zone_dev_attrp; rap != NULL;
1014 rap = rap->zone_res_attr_next) {
1015 set_zonecfg_env(RSRC_DEV, match,
1016 rap->zone_res_attr_name, rap->zone_res_attr_value);
1017 }
1018
1019 /*
1020 * _ZONECFG_device_resources will contain a space separated list
1021 * of devices that have _ZONECFG_device_<device>* environment
1022 * variables. So that each element of the list matches up with
1023 * <device>, each list item needs to be sanitized in the same
1024 * way that environment variable names are sanitized.
1025 */
1026 sanitize_env_var_name(match);
1027 (void) strlcat(dev_resources, match, sizeof (dev_resources));
1028 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
1029 }
1030 (void) zonecfg_enddevent(snap_hndl);
1031
1032 (void) setenv("_ZONECFG_device_resources", dev_resources, 1);
1033
1034 /*
1035 * "attr" resources are exported because the bhyve brand's boot hook
1036 * needs access to the "ram", "cpu", "bootrom", etc. to form the
1037 * argument list for the brand's init program. Once the bhyve brand is
1038 * configured via proper resources and properties, this should be
1039 * removed.
1040 */
1041 if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
1042 goto done;
1043
1044 while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
1045 set_zonecfg_env("attr", NULL, atab.zone_attr_name,
1046 atab.zone_attr_value);
1047 }
1048
1049 (void) zonecfg_endattrent(snap_hndl);
1050
1051 if (debug)
1052 (void) setenv("_ZONEADMD_brand_debug", "1", 1);
1053 else
1054 (void) setenv("_ZONEADMD_brand_debug", "", 1);
1055
1056 res = Z_OK;
1057
1058 done:
1059 return (res);
1060 }
1061
1062 void
1063 nwifent_free_attrs(struct zone_nwiftab *np)
1064 {
1065 struct zone_res_attrtab *rap;
1066
1067 for (rap = np->zone_nwif_attrp; rap != NULL; ) {
1068 struct zone_res_attrtab *tp = rap;
1069
1070 rap = rap->zone_res_attr_next;
1071 free(tp);
1072 }
1073 }
1074
1075 /*
1076 * If retstr is not NULL, the output of the subproc is returned in the str,
1077 * otherwise it is output using zerror(). Any memory allocated for retstr
1078 * should be freed by the caller.
1079 */
1080 int
1081 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
1082 {
1083 char buf[1024]; /* arbitrary large amount */
1084 char *inbuf;
1085 FILE *file;
1086 int status;
1087 int rd_cnt;
1088 int fds[2];
1089 pid_t child;
1090
1091 if (retstr != NULL) {
1092 if ((*retstr = malloc(1024)) == NULL) {
1093 zerror(zlogp, B_FALSE, "out of memory");
1094 return (-1);
1095 }
1096 inbuf = *retstr;
1097 rd_cnt = 0;
1098 } else {
1099 inbuf = buf;
1100 }
1101
1102 if (pipe(fds) != 0) {
1103 zerror(zlogp, B_TRUE, "failed to create pipe for subprocess");
1104 return (-1);
1105 }
1106
1107 if ((child = fork()) == 0) {
1108 int in;
1109
1110 /*
1111 * SIGINT is currently ignored. It probably shouldn't be so
1112 * hard to kill errant children, so we revert to SIG_DFL.
1113 * SIGHUP and SIGUSR1 are used to perform log rotation. We
1114 * leave those as-is because we don't want a 'pkill -HUP
1115 * zoneadmd' to kill this child process before exec(). On
1116 * exec(), SIGHUP and SIGUSR1 will become SIG_DFL.
1117 */
1118 (void) sigset(SIGINT, SIG_DFL);
1119
1120 /*
1121 * Set up a pipe for the child to log to.
1122 */
1123 if (dup2(fds[1], STDERR_FILENO) == -1) {
1124 (void) snprintf(buf, sizeof (buf),
1125 "subprocess failed to dup2(STDERR_FILENO): %s\n",
1126 strerror(errno));
1127 (void) write(fds[1], buf, strlen(buf));
1128 _exit(127);
1129 }
1130 if (dup2(fds[1], STDOUT_FILENO) == -1) {
1131 perror("subprocess failed to dup2(STDOUT_FILENO)");
1132 _exit(127);
1133 }
1134 /*
1135 * Some naughty children may try to read from stdin. Be sure
1136 * that the first file that a child opens doesn't get stdin's
1137 * file descriptor.
1138 */
1139 if ((in = open("/dev/null", O_RDONLY)) == -1 ||
1140 dup2(in, STDIN_FILENO) == -1) {
1141 zerror(zlogp, B_TRUE,
1142 "subprocess failed to set up STDIN_FILENO");
1143 _exit(127);
1144 }
1145 closefrom(STDERR_FILENO + 1);
1146
1147 if (setup_subproc_env(zlogp, debug) != Z_OK) {
1148 zerror(zlogp, B_FALSE, "failed to setup environment");
1149 _exit(127);
1150 }
1151
1152 (void) execl("/bin/sh", "sh", "-c", cmdbuf, NULL);
1153
1154 zerror(zlogp, B_TRUE, "subprocess execl failed");
1155 _exit(127);
1156 } else if (child == -1) {
1157 zerror(zlogp, B_TRUE, "failed to create subprocess for '%s'",
1158 cmdbuf);
1159 (void) close(fds[0]);
1160 (void) close(fds[1]);
1161 return (-1);
1162 }
1163
1164 (void) close(fds[1]);
1165
1166 file = fdopen(fds[0], "r");
1167 while (fgets(inbuf, 1024, file) != NULL) {
1168 if (retstr == NULL) {
1169 if (zlogp != &logsys) {
1170 int last = strlen(inbuf) - 1;
1171
1172 if (inbuf[last] == '\n')
1173 inbuf[last] = '\0';
1174 zerror(zlogp, B_FALSE, "%s", inbuf);
1175 }
1176 } else {
1177 char *p;
1178
1179 rd_cnt += 1024 - 1;
1180 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
1181 zerror(zlogp, B_FALSE, "out of memory");
1182 break;
1183 }
1184
1185 *retstr = p;
1186 inbuf = *retstr + rd_cnt;
1187 }
1188 }
1189
1190 while (fclose(file) != 0) {
1191 assert(errno == EINTR);
1192 }
1193 while (waitpid(child, &status, 0) == -1) {
1194 if (errno != EINTR) {
1195 zerror(zlogp, B_TRUE,
1196 "failed to get exit status of '%s'", cmdbuf);
1197 return (-1);
1198 }
1199 }
1200
1201 if (WIFSIGNALED(status)) {
1202 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
1203 "signal %d", cmdbuf, WTERMSIG(status));
1204 return (-1);
1205 }
1206 assert(WIFEXITED(status));
1207 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
1208 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
1209 return (-1);
1210 }
1211 return (WEXITSTATUS(status));
1212 }
1213
1214 /*
1215 * Get the path for this zone's init(1M) (or equivalent) process. First look
1216 * for a zone-specific init-name attr, then get it from the brand.
1217 */
1218 static int
1219 get_initname(brand_handle_t bh, char *initname, int len)
1220 {
1221 struct zone_attrtab a;
1222
1223 bzero(&a, sizeof (a));
1224 (void) strlcpy(a.zone_attr_name, "init-name",
1225 sizeof (a.zone_attr_name));
1226
1227 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1228 (void) strlcpy(initname, a.zone_attr_value, len);
1229 return (0);
1230 }
1231
1232 return (brand_get_initname(bh, initname, len));
1233 }
1234
1235 /*
1236 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
1237 * First look for a zone-specific restart-init attr, then get it from the brand.
1238 */
1239 static boolean_t
1240 restartinit(brand_handle_t bh)
1241 {
1242 struct zone_attrtab a;
1243
1244 bzero(&a, sizeof (a));
1245 (void) strlcpy(a.zone_attr_name, "restart-init",
1246 sizeof (a.zone_attr_name));
1247
1248 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1249 if (strcmp(a.zone_attr_value, "false") == 0)
1250 return (B_FALSE);
1251 return (B_TRUE);
1252 }
1253
1254 return (brand_restartinit(bh));
1255 }
1256
1257 /*
1258 * Get the app-svc-dependent flag for this zone's init process. This is a
1259 * zone-specific attr which controls the type of contract we create for the
1260 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
1261 * set, so that when any service which is in the same contract exits, the init
1262 * application will be terminated.
1263 */
1264 static boolean_t
1265 is_app_svc_dep(void)
1266 {
1267 struct zone_attrtab a;
1268
1269 bzero(&a, sizeof (a));
1270 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
1271 sizeof (a.zone_attr_name));
1272
1273 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
1274 strcmp(a.zone_attr_value, "true") == 0) {
1275 return (B_TRUE);
1276 }
1277
1278 return (B_FALSE);
1279 }
1280
1281 static int
1282 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
1283 {
1284 zoneid_t zoneid;
1285 struct stat st;
1286 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
1287 char nbootargs[BOOTARGS_MAX];
1288 char cmdbuf[MAXPATHLEN];
1289 fs_callback_t cb;
1290 brand_handle_t bh;
1291 zone_iptype_t iptype;
1292 dladm_status_t status;
1293 char errmsg[DLADM_STRSIZE];
1294 int err;
1295 boolean_t restart_init;
1296 boolean_t app_svc_dep;
1297
1298 if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1299 return (-1);
1300
1301 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1302 zerror(zlogp, B_TRUE, "unable to get zoneid");
1303 goto bad;
1304 }
1305
1306 cb.zlogp = zlogp;
1307 cb.zoneid = zoneid;
1308 cb.mount_cmd = B_FALSE;
1309
1310 /* Get a handle to the brand info for this zone */
1311 if ((bh = brand_open(brand_name)) == NULL) {
1312 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1313 goto bad;
1314 }
1315
1316 /*
1317 * Get the list of filesystems to mount from the brand
1318 * configuration. These mounts are done via a thread that will
1319 * enter the zone, so they are done from within the context of the
1320 * zone.
1321 */
1322 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1323 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1324 brand_close(bh);
1325 goto bad;
1326 }
1327
1328 /*
1329 * Get the brand's boot callback if it exists.
1330 */
1331 (void) strcpy(cmdbuf, EXEC_PREFIX);
1332 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1333 sizeof (cmdbuf) - EXEC_LEN) != 0) {
1334 zerror(zlogp, B_FALSE,
1335 "unable to determine branded zone's boot callback");
1336 brand_close(bh);
1337 goto bad;
1338 }
1339
1340 /* Get the path for this zone's init(1M) (or equivalent) process. */
1341 if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
1342 zerror(zlogp, B_FALSE,
1343 "unable to determine zone's init(1M) location");
1344 brand_close(bh);
1345 goto bad;
1346 }
1347
1348 /* See if we should restart init if it dies. */
1349 restart_init = restartinit(bh);
1350
1351 /*
1352 * See if we need to setup contract dependencies between the zone's
1353 * primary application and any of its services.
1354 */
1355 app_svc_dep = is_app_svc_dep();
1356
1357 brand_close(bh);
1358
1359 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1360 if (err != Z_OK)
1361 goto bad;
1362
1363 assert(init_file[0] != '\0');
1364
1365 /*
1366 * Try to anticipate possible problems: If possible, make sure init is
1367 * executable.
1368 */
1369 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1370 zerror(zlogp, B_FALSE, "unable to determine zone root");
1371 goto bad;
1372 }
1373
1374 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1375
1376 if (lstat(initpath, &st) == -1) {
1377 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1378 goto bad;
1379 }
1380
1381 /* LINTED: E_NOP_IF_STMT */
1382 if ((st.st_mode & S_IFMT) == S_IFLNK) {
1383 /* symlink, we'll have to wait and resolve when we boot */
1384 } else if ((st.st_mode & S_IXUSR) == 0) {
1385 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1386 goto bad;
1387 }
1388
1389 /*
1390 * Exclusive stack zones interact with the dlmgmtd running in the
1391 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
1392 * booting, and loads its datalinks from the zone's datalink
1393 * configuration file.
1394 */
1395 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1396 status = dladm_zone_boot(dld_handle, zoneid);
1397 if (status != DLADM_STATUS_OK) {
1398 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1399 " %s", dladm_status2str(status, errmsg));
1400 goto bad;
1401 }
1402 }
1403
1404 /*
1405 * If there is a brand 'boot' callback, execute it now to give the
1406 * brand one last chance to do any additional setup before the zone
1407 * is booted.
1408 */
1409 if ((strlen(cmdbuf) > EXEC_LEN) &&
1410 (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
1411 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1412 goto bad;
1413 }
1414
1415 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1416 zerror(zlogp, B_TRUE, "could not set zone boot file");
1417 goto bad;
1418 }
1419
1420 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1421 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1422 goto bad;
1423 }
1424
1425 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1426 NULL, 0) == -1) {
1427 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1428 goto bad;
1429 }
1430
1431 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1432 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1433 zerror(zlogp, B_TRUE, "could not set zone app-die");
1434 goto bad;
1435 }
1436
1437 /*
1438 * Inform zonestatd of a new zone so that it can install a door for
1439 * the zone to contact it.
1440 */
1441 notify_zonestatd(zone_id);
1442
1443 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1444 create_log_thread(zlogp);
1445
1446 if (zone_boot(zoneid) == -1) {
1447 zerror(zlogp, B_TRUE, "unable to boot zone");
1448 destroy_log_thread(zlogp);
1449 goto bad;
1450 }
1451
1452 if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) {
1453 destroy_log_thread(zlogp);
1454 goto bad;
1455 }
1456
1457 return (0);
1458
1459 bad:
1460 /*
1461 * If something goes wrong, we up the zones's state to the target
1462 * state, RUNNING, and then invoke the hook as if we're halting.
1463 */
1464 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
1465
1466 return (-1);
1467 }
1468
1469 static int
1470 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
1471 boolean_t debug)
1472 {
1473 int err;
1474
1475 /*
1476 * If performing a scratch zone unmount then do not call the
1477 * state change hooks.
1478 */
1479 if (unmount_cmd == B_FALSE &&
1480 brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
1481 return (-1);
1482
1483 if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
1484 if (!bringup_failure_recovery)
1485 zerror(zlogp, B_FALSE, "unable to destroy zone");
1486 destroy_log_thread(zlogp);
1487 return (-1);
1488 }
1489
1490 /* Shut down is done, stop the log thread */
1491 destroy_log_thread(zlogp);
1492
1493 if (unmount_cmd == B_FALSE &&
1494 brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
1495 return (-1);
1496
1497 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1498 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1499 zonecfg_strerror(err));
1500
1501 zonecfg_fini_handle(snap_hndl);
1502 snap_hndl = NULL;
1503
1504 return (0);
1505 }
1506
1507 static int
1508 zone_graceful_shutdown(zlog_t *zlogp)
1509 {
1510 zoneid_t zoneid;
1511 pid_t child;
1512 char cmdbuf[MAXPATHLEN];
1513 brand_handle_t bh = NULL;
1514 ctid_t ct;
1515 int tmpl_fd;
1516 int child_status;
1517
1518 if (shutdown_in_progress) {
1519 zerror(zlogp, B_FALSE, "shutdown already in progress");
1520 return (-1);
1521 }
1522
1523 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1524 zerror(zlogp, B_TRUE, "unable to get zoneid");
1525 return (-1);
1526 }
1527
1528 /* Get a handle to the brand info for this zone */
1529 if ((bh = brand_open(brand_name)) == NULL) {
1530 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1531 return (-1);
1532 }
1533
1534 /*
1535 * If there is a brand 'shutdown' callback, execute it now to give the
1536 * brand a chance to cleanup any custom configuration.
1537 */
1538 (void) strcpy(cmdbuf, EXEC_PREFIX);
1539 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1540 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1541 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1542 }
1543 brand_close(bh);
1544
1545 if ((tmpl_fd = init_template()) == -1) {
1546 zerror(zlogp, B_TRUE, "failed to create contract");
1547 return (-1);
1548 }
1549
1550 if ((child = fork()) == -1) {
1551 (void) ct_tmpl_clear(tmpl_fd);
1552 (void) close(tmpl_fd);
1553 zerror(zlogp, B_TRUE, "failed to fork");
1554 return (-1);
1555 } else if (child == 0) {
1556 (void) ct_tmpl_clear(tmpl_fd);
1557 if (zone_enter(zoneid) == -1) {
1558 _exit(errno);
1559 }
1560 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1561 }
1562
1563 if (contract_latest(&ct) == -1)
1564 ct = -1;
1565 (void) ct_tmpl_clear(tmpl_fd);
1566 (void) close(tmpl_fd);
1567
1568 if (waitpid(child, &child_status, 0) != child) {
1569 /* unexpected: we must have been signalled */
1570 (void) contract_abandon_id(ct);
1571 return (-1);
1572 }
1573
1574 (void) contract_abandon_id(ct);
1575 if (WEXITSTATUS(child_status) != 0) {
1576 errno = WEXITSTATUS(child_status);
1577 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1578 return (-1);
1579 }
1580
1581 shutdown_in_progress = B_TRUE;
1582
1583 return (0);
1584 }
1585
1586 static int
1587 zone_wait_shutdown(zlog_t *zlogp)
1588 {
1589 zone_state_t zstate;
1590 uint64_t *tm = NULL;
1591 scf_simple_prop_t *prop = NULL;
1592 int timeout;
1593 int tries;
1594 int rc = -1;
1595
1596 /* Get default stop timeout from SMF framework */
1597 timeout = SHUTDOWN_WAIT;
1598 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1599 SCF_PROPERTY_TIMEOUT)) != NULL) {
1600 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1601 if (tm != 0)
1602 timeout = *tm;
1603 }
1604 scf_simple_prop_free(prop);
1605 }
1606
1607 /* allow time for zone to shutdown cleanly */
1608 for (tries = 0; tries < timeout; tries ++) {
1609 (void) sleep(1);
1610 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1611 zstate == ZONE_STATE_INSTALLED) {
1612 rc = 0;
1613 break;
1614 }
1615 }
1616
1617 if (rc != 0)
1618 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1619
1620 shutdown_in_progress = B_FALSE;
1621
1622 return (rc);
1623 }
1624
1625
1626
1627 /*
1628 * Generate AUE_zone_state for a command that boots a zone.
1629 */
1630 static void
1631 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1632 char *new_state)
1633 {
1634 adt_session_data_t *ah;
1635 adt_event_data_t *event;
1636 int pass_fail, fail_reason;
1637
1638 if (!adt_audit_enabled())
1639 return;
1640
1641 if (return_val == 0) {
1642 pass_fail = ADT_SUCCESS;
1643 fail_reason = ADT_SUCCESS;
1644 } else {
1645 pass_fail = ADT_FAILURE;
1646 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1647 }
1648
1649 if (adt_start_session(&ah, NULL, 0)) {
1650 zerror(zlogp, B_TRUE, gettext("audit failure."));
1651 return;
1652 }
1653 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1654 zerror(zlogp, B_TRUE, gettext("audit failure."));
1655 (void) adt_end_session(ah);
1656 return;
1657 }
1658
1659 event = adt_alloc_event(ah, ADT_zone_state);
1660 if (event == NULL) {
1661 zerror(zlogp, B_TRUE, gettext("audit failure."));
1662 (void) adt_end_session(ah);
1663 return;
1664 }
1665 event->adt_zone_state.zonename = zone_name;
1666 event->adt_zone_state.new_state = new_state;
1667
1668 if (adt_put_event(event, pass_fail, fail_reason))
1669 zerror(zlogp, B_TRUE, gettext("audit failure."));
1670
1671 adt_free_event(event);
1672
1673 (void) adt_end_session(ah);
1674 }
1675
1676 /*
1677 * Log the exit time and status of the zone's init process into
1678 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1679 * be -1, otherwise it will be the exit status as described in wait.3c.
1680 * If the zone is configured to restart init, then nothing will be logged if
1681 * init exits unexpectedly (the kernel will never upcall in this case).
1682 */
1683 static void
1684 log_init_exit(int status)
1685 {
1686 char p[MAXPATHLEN];
1687 char buf[128];
1688 struct timeval t;
1689 int fd;
1690
1691 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1692 return;
1693 if (gettimeofday(&t, NULL) != 0)
1694 return;
1695 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1696 status) > sizeof (buf))
1697 return;
1698 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1699 return;
1700
1701 (void) write(fd, buf, strlen(buf));
1702
1703 (void) close(fd);
1704 }
1705
1706 /*
1707 * The main routine for the door server that deals with zone state transitions.
1708 */
1709 /* ARGSUSED */
1710 static void
1711 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1712 uint_t n_desc)
1713 {
1714 ucred_t *uc = NULL;
1715 const priv_set_t *eset;
1716
1717 zone_state_t zstate;
1718 zone_cmd_t cmd;
1719 boolean_t debug;
1720 int init_status;
1721 zone_cmd_arg_t *zargp;
1722
1723 boolean_t kernelcall = B_TRUE;
1724
1725 int rval = -1;
1726 uint64_t uniqid;
1727 zoneid_t zoneid = -1;
1728 zlog_t zlog;
1729 zlog_t *zlogp;
1730 zone_cmd_rval_t *rvalp;
1731 size_t rlen = getpagesize(); /* conservative */
1732 fs_callback_t cb;
1733 brand_handle_t bh;
1734 boolean_t wait_shut = B_FALSE;
1735
1736 /* LINTED E_BAD_PTR_CAST_ALIGN */
1737 zargp = (zone_cmd_arg_t *)args;
1738
1739 /*
1740 * When we get the door unref message, we've fdetach'd the door, and
1741 * it is time for us to shut down zoneadmd.
1742 */
1743 if (zargp == DOOR_UNREF_DATA) {
1744 logstream_close(platloghdl, B_TRUE);
1745
1746 /*
1747 * See comment at end of main() for info on the last rites.
1748 */
1749 exit(0);
1750 }
1751
1752 if (zargp == NULL) {
1753 (void) door_return(NULL, 0, 0, 0);
1754 }
1755
1756 rvalp = alloca(rlen);
1757 bzero(rvalp, rlen);
1758 zlog.logfile = NULL;
1759 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1760 zlog.buf = rvalp->errbuf;
1761 zlog.log = zlog.buf;
1762 /* defer initialization of zlog.locale until after credential check */
1763 zlogp = &zlog;
1764
1765 if (alen != sizeof (zone_cmd_arg_t)) {
1766 /*
1767 * This really shouldn't be happening.
1768 */
1769 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1770 "unexpected (expected %d bytes)", alen,
1771 sizeof (zone_cmd_arg_t));
1772 goto out;
1773 }
1774 cmd = zargp->cmd;
1775 debug = zargp->debug;
1776 init_status = zargp->status;
1777
1778 if (door_ucred(&uc) != 0) {
1779 zerror(&logsys, B_TRUE, "door_ucred");
1780 goto out;
1781 }
1782 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1783 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1784 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1785 ucred_geteuid(uc) != 0)) {
1786 zerror(&logsys, B_FALSE, "insufficient privileges");
1787 goto out;
1788 }
1789
1790 kernelcall = ucred_getpid(uc) == 0;
1791
1792 /*
1793 * This is safe because we only use a zlog_t throughout the
1794 * duration of a door call; i.e., by the time the pointer
1795 * might become invalid, the door call would be over.
1796 */
1797 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1798
1799 (void) mutex_lock(&lock);
1800
1801 /*
1802 * Once we start to really die off, we don't want more connections.
1803 */
1804 if (in_death_throes) {
1805 (void) mutex_unlock(&lock);
1806 ucred_free(uc);
1807 (void) door_return(NULL, 0, 0, 0);
1808 thr_exit(NULL);
1809 }
1810
1811 /*
1812 * Check for validity of command.
1813 */
1814 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1815 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1816 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1817 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1818 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1819 goto out;
1820 }
1821
1822 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1823 /*
1824 * Can't happen
1825 */
1826 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1827 cmd);
1828 goto out;
1829 }
1830 /*
1831 * We ignore the possibility of someone calling zone_create(2)
1832 * explicitly; all requests must come through zoneadmd.
1833 */
1834 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1835 /*
1836 * Something terribly wrong happened
1837 */
1838 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1839 goto out;
1840 }
1841
1842 if (kernelcall) {
1843 /*
1844 * Kernel-initiated requests may lose their validity if the
1845 * zone_t the kernel was referring to has gone away.
1846 */
1847 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1848 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1849 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1850 /*
1851 * We're not talking about the same zone. The request
1852 * must have arrived too late. Return error.
1853 */
1854 rval = -1;
1855 goto out;
1856 }
1857 zlogp = &logplat; /* Log errors to platform.log */
1858 }
1859
1860 /*
1861 * If we are being asked to forcibly mount or boot a zone, we
1862 * pretend that an INCOMPLETE zone is actually INSTALLED.
1863 */
1864 if (zstate == ZONE_STATE_INCOMPLETE &&
1865 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1866 zstate = ZONE_STATE_INSTALLED;
1867
1868 switch (zstate) {
1869 case ZONE_STATE_CONFIGURED:
1870 case ZONE_STATE_INCOMPLETE:
1871 /*
1872 * Not our area of expertise; we just print a nice message
1873 * and die off.
1874 */
1875 zerror(zlogp, B_FALSE,
1876 "%s operation is invalid for zones in state '%s'",
1877 z_cmd_name(cmd), zone_state_str(zstate));
1878 break;
1879
1880 case ZONE_STATE_INSTALLED:
1881 switch (cmd) {
1882 case Z_READY:
1883 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
1884 if (rval == 0)
1885 eventstream_write(Z_EVT_ZONE_READIED);
1886 zcons_statechanged();
1887 break;
1888 case Z_BOOT:
1889 case Z_FORCEBOOT:
1890 eventstream_write(Z_EVT_ZONE_BOOTING);
1891 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1892 debug)) == 0) {
1893 rval = zone_bootup(zlogp, zargp->bootbuf,
1894 zstate, debug);
1895 }
1896 audit_put_record(zlogp, uc, rval, "boot");
1897 zcons_statechanged();
1898 if (rval != 0) {
1899 bringup_failure_recovery = B_TRUE;
1900 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1901 zstate, debug);
1902 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1903 }
1904 break;
1905 case Z_SHUTDOWN:
1906 case Z_HALT:
1907 if (kernelcall) /* Invalid; can't happen */
1908 abort();
1909 /*
1910 * We could have two clients racing to halt this
1911 * zone; the second client loses, but its request
1912 * doesn't fail, since the zone is now in the desired
1913 * state.
1914 */
1915 zerror(zlogp, B_FALSE, "zone is already halted");
1916 rval = 0;
1917 break;
1918 case Z_REBOOT:
1919 if (kernelcall) /* Invalid; can't happen */
1920 abort();
1921 zerror(zlogp, B_FALSE, "%s operation is invalid "
1922 "for zones in state '%s'", z_cmd_name(cmd),
1923 zone_state_str(zstate));
1924 rval = -1;
1925 break;
1926 case Z_NOTE_UNINSTALLING:
1927 if (kernelcall) /* Invalid; can't happen */
1928 abort();
1929 /*
1930 * Tell the console to print out a message about this.
1931 * Once it does, we will be in_death_throes.
1932 */
1933 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1934 break;
1935 case Z_MOUNT:
1936 case Z_FORCEMOUNT:
1937 if (kernelcall) /* Invalid; can't happen */
1938 abort();
1939 if (!zone_isnative && !zone_iscluster &&
1940 !zone_islabeled) {
1941 /*
1942 * -U mounts the zone without lofs mounting
1943 * zone file systems back into the scratch
1944 * zone. This is required when mounting
1945 * non-native branded zones.
1946 */
1947 (void) strlcpy(zargp->bootbuf, "-U",
1948 BOOTARGS_MAX);
1949 }
1950
1951 rval = zone_ready(zlogp,
1952 strcmp(zargp->bootbuf, "-U") == 0 ?
1953 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
1954 if (rval != 0)
1955 break;
1956
1957 eventstream_write(Z_EVT_ZONE_READIED);
1958
1959 /*
1960 * Get a handle to the default brand info.
1961 * We must always use the default brand file system
1962 * list when mounting the zone.
1963 */
1964 if ((bh = brand_open(default_brand)) == NULL) {
1965 rval = -1;
1966 break;
1967 }
1968
1969 /*
1970 * Get the list of filesystems to mount from
1971 * the brand configuration. These mounts are done
1972 * via a thread that will enter the zone, so they
1973 * are done from within the context of the zone.
1974 */
1975 cb.zlogp = zlogp;
1976 cb.zoneid = zone_id;
1977 cb.mount_cmd = B_TRUE;
1978 rval = brand_platform_iter_mounts(bh,
1979 mount_early_fs, &cb);
1980
1981 brand_close(bh);
1982
1983 /*
1984 * Ordinarily, /dev/fd would be mounted inside the zone
1985 * by svc:/system/filesystem/usr:default, but since
1986 * we're not booting the zone, we need to do this
1987 * manually.
1988 */
1989 if (rval == 0)
1990 rval = mount_early_fs(&cb,
1991 "fd", "/dev/fd", "fd", NULL);
1992 break;
1993 case Z_UNMOUNT:
1994 if (kernelcall) /* Invalid; can't happen */
1995 abort();
1996 zerror(zlogp, B_FALSE, "zone is already unmounted");
1997 rval = 0;
1998 break;
1999 }
2000 break;
2001
2002 case ZONE_STATE_READY:
2003 switch (cmd) {
2004 case Z_READY:
2005 /*
2006 * We could have two clients racing to ready this
2007 * zone; the second client loses, but its request
2008 * doesn't fail, since the zone is now in the desired
2009 * state.
2010 */
2011 zerror(zlogp, B_FALSE, "zone is already ready");
2012 rval = 0;
2013 break;
2014 case Z_BOOT:
2015 case Z_FORCEBOOT:
2016 (void) strlcpy(boot_args, zargp->bootbuf,
2017 sizeof (boot_args));
2018 eventstream_write(Z_EVT_ZONE_BOOTING);
2019 rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
2020 debug);
2021 audit_put_record(zlogp, uc, rval, "boot");
2022 zcons_statechanged();
2023 if (rval != 0) {
2024 bringup_failure_recovery = B_TRUE;
2025 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
2026 zstate, debug);
2027 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2028 }
2029 boot_args[0] = '\0';
2030 break;
2031 case Z_HALT:
2032 if (kernelcall) /* Invalid; can't happen */
2033 abort();
2034 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
2035 debug)) != 0)
2036 break;
2037 zcons_statechanged();
2038 eventstream_write(Z_EVT_ZONE_HALTED);
2039 break;
2040 case Z_SHUTDOWN:
2041 case Z_REBOOT:
2042 case Z_NOTE_UNINSTALLING:
2043 case Z_MOUNT:
2044 case Z_FORCEMOUNT:
2045 case Z_UNMOUNT:
2046 if (kernelcall) /* Invalid; can't happen */
2047 abort();
2048 zerror(zlogp, B_FALSE, "%s operation is invalid "
2049 "for zones in state '%s'", z_cmd_name(cmd),
2050 zone_state_str(zstate));
2051 rval = -1;
2052 break;
2053 }
2054 break;
2055
2056 case ZONE_STATE_MOUNTED:
2057 switch (cmd) {
2058 case Z_UNMOUNT:
2059 if (kernelcall) /* Invalid; can't happen */
2060 abort();
2061 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
2062 if (rval == 0) {
2063 eventstream_write(Z_EVT_ZONE_HALTED);
2064 (void) sema_post(&scratch_sem);
2065 }
2066 break;
2067 default:
2068 if (kernelcall) /* Invalid; can't happen */
2069 abort();
2070 zerror(zlogp, B_FALSE, "%s operation is invalid "
2071 "for zones in state '%s'", z_cmd_name(cmd),
2072 zone_state_str(zstate));
2073 rval = -1;
2074 break;
2075 }
2076 break;
2077
2078 case ZONE_STATE_RUNNING:
2079 case ZONE_STATE_SHUTTING_DOWN:
2080 case ZONE_STATE_DOWN:
2081 switch (cmd) {
2082 case Z_READY:
2083 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
2084 debug)) != 0)
2085 break;
2086 zcons_statechanged();
2087 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
2088 debug)) == 0)
2089 eventstream_write(Z_EVT_ZONE_READIED);
2090 else
2091 eventstream_write(Z_EVT_ZONE_HALTED);
2092 break;
2093 case Z_BOOT:
2094 case Z_FORCEBOOT:
2095 /*
2096 * We could have two clients racing to boot this
2097 * zone; the second client loses, but its request
2098 * doesn't fail, since the zone is now in the desired
2099 * state.
2100 */
2101 zerror(zlogp, B_FALSE, "zone is already booted");
2102 rval = 0;
2103 break;
2104 case Z_HALT:
2105 if (kernelcall) {
2106 log_init_exit(init_status);
2107 } else {
2108 log_init_exit(-1);
2109 }
2110 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
2111 debug)) != 0)
2112 break;
2113 eventstream_write(Z_EVT_ZONE_HALTED);
2114 zcons_statechanged();
2115 break;
2116 case Z_REBOOT:
2117 (void) strlcpy(boot_args, zargp->bootbuf,
2118 sizeof (boot_args));
2119 eventstream_write(Z_EVT_ZONE_REBOOTING);
2120 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
2121 debug)) != 0) {
2122 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2123 boot_args[0] = '\0';
2124 break;
2125 }
2126 zcons_statechanged();
2127 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
2128 debug)) != 0) {
2129 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2130 boot_args[0] = '\0';
2131 break;
2132 }
2133 rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
2134 debug);
2135 audit_put_record(zlogp, uc, rval, "reboot");
2136 if (rval != 0) {
2137 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
2138 zstate, debug);
2139 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2140 }
2141 boot_args[0] = '\0';
2142 break;
2143 case Z_SHUTDOWN:
2144 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
2145 wait_shut = B_TRUE;
2146 }
2147 break;
2148 case Z_NOTE_UNINSTALLING:
2149 case Z_MOUNT:
2150 case Z_FORCEMOUNT:
2151 case Z_UNMOUNT:
2152 zerror(zlogp, B_FALSE, "%s operation is invalid "
2153 "for zones in state '%s'", z_cmd_name(cmd),
2154 zone_state_str(zstate));
2155 rval = -1;
2156 break;
2157 }
2158 break;
2159 default:
2160 abort();
2161 }
2162
2163 /*
2164 * Because the state of the zone may have changed, we make sure
2165 * to wake the console poller, which is in charge of initiating
2166 * the shutdown procedure as necessary.
2167 */
2168 eventstream_write(Z_EVT_NULL);
2169
2170 out:
2171 (void) mutex_unlock(&lock);
2172
2173 /* Wait for the Z_SHUTDOWN commands to complete */
2174 if (wait_shut)
2175 rval = zone_wait_shutdown(zlogp);
2176
2177 if (kernelcall) {
2178 rvalp = NULL;
2179 rlen = 0;
2180 } else {
2181 rvalp->rval = rval;
2182 }
2183 if (uc != NULL)
2184 ucred_free(uc);
2185 (void) door_return((char *)rvalp, rlen, NULL, 0);
2186 thr_exit(NULL);
2187 }
2188
2189 static int
2190 setup_door(zlog_t *zlogp)
2191 {
2192 if ((zone_door = door_create(server, NULL,
2193 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
2194 zerror(zlogp, B_TRUE, "%s failed", "door_create");
2195 return (-1);
2196 }
2197 (void) fdetach(zone_door_path);
2198
2199 if (fattach(zone_door, zone_door_path) != 0) {
2200 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
2201 (void) door_revoke(zone_door);
2202 (void) fdetach(zone_door_path);
2203 zone_door = -1;
2204 return (-1);
2205 }
2206 return (0);
2207 }
2208
2209 /*
2210 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
2211 * is where zoneadmd itself will check to see that another instance of
2212 * zoneadmd isn't already controlling this zone.
2213 *
2214 * The idea here is that we want to open the path to which we will
2215 * attach our door, lock it, and then make sure that no-one has beat us
2216 * to fattach(3c)ing onto it.
2217 *
2218 * fattach(3c) is really a mount, so there are actually two possible
2219 * vnodes we could be dealing with. Our strategy is as follows:
2220 *
2221 * - If the file we opened is a regular file (common case):
2222 * There is no fattach(3c)ed door, so we have a chance of becoming
2223 * the managing zoneadmd. We attempt to lock the file: if it is
2224 * already locked, that means someone else raced us here, so we
2225 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
2226 * that beat us to it.
2227 *
2228 * - If the file we opened is a namefs file:
2229 * This means there is already an established door fattach(3c)'ed
2230 * to the rendezvous path. We've lost the race, so we give up.
2231 * Note that in this case we also try to grab the file lock, and
2232 * will succeed in acquiring it since the vnode locked by the
2233 * "winning" zoneadmd was a regular one, and the one we locked was
2234 * the fattach(3c)'ed door node. At any rate, no harm is done, and
2235 * we just return to zoneadm(1m) which knows to retry.
2236 */
2237 static int
2238 make_daemon_exclusive(zlog_t *zlogp)
2239 {
2240 int doorfd = -1;
2241 int err, ret = -1;
2242 struct stat st;
2243 struct flock flock;
2244 zone_state_t zstate;
2245
2246 top:
2247 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2248 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2249 zonecfg_strerror(err));
2250 goto out;
2251 }
2252 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
2253 S_IREAD|S_IWRITE)) < 0) {
2254 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
2255 goto out;
2256 }
2257 if (fstat(doorfd, &st) < 0) {
2258 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
2259 goto out;
2260 }
2261 /*
2262 * Lock the file to synchronize with other zoneadmd
2263 */
2264 flock.l_type = F_WRLCK;
2265 flock.l_whence = SEEK_SET;
2266 flock.l_start = (off_t)0;
2267 flock.l_len = (off_t)0;
2268 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
2269 /*
2270 * Someone else raced us here and grabbed the lock file
2271 * first. A warning here is inappropriate since nothing
2272 * went wrong.
2273 */
2274 goto out;
2275 }
2276
2277 if (strcmp(st.st_fstype, "namefs") == 0) {
2278 struct door_info info;
2279
2280 /*
2281 * There is already something fattach()'ed to this file.
2282 * Lets see what the door is up to.
2283 */
2284 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
2285 /*
2286 * Another zoneadmd process seems to be in
2287 * control of the situation and we don't need to
2288 * be here. A warning here is inappropriate
2289 * since nothing went wrong.
2290 *
2291 * If the door has been revoked, the zoneadmd
2292 * process currently managing the zone is going
2293 * away. We'll return control to zoneadm(1m)
2294 * which will try again (by which time zoneadmd
2295 * will hopefully have exited).
2296 */
2297 goto out;
2298 }
2299
2300 /*
2301 * If we got this far, there's a fattach(3c)'ed door
2302 * that belongs to a process that has exited, which can
2303 * happen if the previous zoneadmd died unexpectedly.
2304 *
2305 * Let user know that something is amiss, but that we can
2306 * recover; if the zone is in the installed state, then don't
2307 * message, since having a running zoneadmd isn't really
2308 * expected/needed. We want to keep occurences of this message
2309 * limited to times when zoneadmd is picking back up from a
2310 * zoneadmd that died while the zone was in some non-trivial
2311 * state.
2312 */
2313 if (zstate > ZONE_STATE_INSTALLED) {
2314 zerror(zlogp, B_FALSE,
2315 "zone '%s': WARNING: zone is in state '%s', but "
2316 "zoneadmd does not appear to be available; "
2317 "restarted zoneadmd to recover.",
2318 zone_name, zone_state_str(zstate));
2319
2320 /*
2321 * Startup a thread to perform the zfd logging/tty svc
2322 * for the zone. zlogp won't be valid for much longer
2323 * so use logplat.
2324 */
2325 if (getzoneidbyname(zone_name) != -1) {
2326 create_log_thread(&logplat);
2327 }
2328
2329 /* recover the global configuration snapshot */
2330 if (snap_hndl == NULL) {
2331 if ((snap_hndl = zonecfg_init_handle())
2332 == NULL ||
2333 zonecfg_create_snapshot(zone_name)
2334 != Z_OK ||
2335 zonecfg_get_snapshot_handle(zone_name,
2336 snap_hndl) != Z_OK) {
2337 zerror(zlogp, B_FALSE, "recovering "
2338 "zone configuration handle");
2339 goto out;
2340 }
2341 }
2342 }
2343
2344 (void) fdetach(zone_door_path);
2345 (void) close(doorfd);
2346 goto top;
2347 }
2348 ret = 0;
2349 out:
2350 (void) close(doorfd);
2351 return (ret);
2352 }
2353
2354 /*
2355 * Run the query hook with the 'env' parameter. It should return a
2356 * string of tab-delimited key-value pairs, each of which should be set
2357 * in the environment.
2358 *
2359 * Because the env_vars string values become part of the environment, the
2360 * string is static and we don't free it.
2361 *
2362 * This function is always called before zoneadmd forks and makes itself
2363 * exclusive, so it is possible there could more than one instance of zoneadmd
2364 * running in parallel at this point. Thus, we have no zonecfg snapshot and
2365 * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
2366 * need any zonecfg info to query for a brand-specific env value.
2367 */
2368 static int
2369 set_brand_env(zlog_t *zlogp)
2370 {
2371 int ret = 0;
2372 static char *env_vars = NULL;
2373 char buf[2 * MAXPATHLEN];
2374
2375 if (query_hook[0] == '\0' || env_vars != NULL)
2376 return (0);
2377
2378 if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
2379 return (-1);
2380
2381 if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
2382 return (-1);
2383
2384 if (env_vars != NULL) {
2385 char *sp;
2386
2387 sp = strtok(env_vars, "\t");
2388 while (sp != NULL) {
2389 if (putenv(sp) != 0) {
2390 ret = -1;
2391 break;
2392 }
2393 sp = strtok(NULL, "\t");
2394 }
2395 }
2396
2397 return (ret);
2398 }
2399
2400 /*
2401 * Setup the brand's pre and post state change callbacks, as well as the
2402 * query callback, if any of these exist.
2403 */
2404 static int
2405 brand_callback_init(brand_handle_t bh, char *zone_name)
2406 {
2407 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2408 sizeof (pre_statechg_hook));
2409
2410 if (brand_get_prestatechange(bh, zone_name, zonepath,
2411 pre_statechg_hook + EXEC_LEN,
2412 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2413 return (-1);
2414
2415 if (strlen(pre_statechg_hook) <= EXEC_LEN)
2416 pre_statechg_hook[0] = '\0';
2417
2418 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2419 sizeof (post_statechg_hook));
2420
2421 if (brand_get_poststatechange(bh, zone_name, zonepath,
2422 post_statechg_hook + EXEC_LEN,
2423 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
2424 return (-1);
2425
2426 if (strlen(post_statechg_hook) <= EXEC_LEN)
2427 post_statechg_hook[0] = '\0';
2428
2429 (void) strlcpy(query_hook, EXEC_PREFIX,
2430 sizeof (query_hook));
2431
2432 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
2433 sizeof (query_hook) - EXEC_LEN) != 0)
2434 return (-1);
2435
2436 if (strlen(query_hook) <= EXEC_LEN)
2437 query_hook[0] = '\0';
2438
2439 return (0);
2440 }
2441
2442 int
2443 main(int argc, char *argv[])
2444 {
2445 int opt;
2446 zoneid_t zid;
2447 priv_set_t *privset;
2448 zone_state_t zstate;
2449 char parents_locale[MAXPATHLEN];
2450 brand_handle_t bh;
2451 int err;
2452
2453 pid_t pid;
2454 sigset_t blockset;
2455 sigset_t block_cld;
2456
2457 struct {
2458 sema_t sem;
2459 int status;
2460 zlog_t log;
2461 } *shstate;
2462 size_t shstatelen = getpagesize();
2463
2464 zlog_t errlog;
2465 zlog_t *zlogp;
2466
2467 int ctfd;
2468
2469 progname = get_execbasename(argv[0]);
2470
2471 /*
2472 * Make sure stderr is unbuffered
2473 */
2474 (void) setbuffer(stderr, NULL, 0);
2475
2476 /*
2477 * Get out of the way of mounted filesystems, since we will daemonize
2478 * soon.
2479 */
2480 (void) chdir("/");
2481
2482 /*
2483 * Use the default system umask per PSARC 1998/110 rather than
2484 * anything that may have been set by the caller.
2485 */
2486 (void) umask(CMASK);
2487
2488 /*
2489 * Initially we want to use our parent's locale.
2490 */
2491 (void) setlocale(LC_ALL, "");
2492 (void) textdomain(TEXT_DOMAIN);
2493 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2494 sizeof (parents_locale));
2495
2496 /*
2497 * This zlog_t is used for writing to stderr
2498 */
2499 errlog.logfile = stderr;
2500 errlog.buflen = errlog.loglen = 0;
2501 errlog.buf = errlog.log = NULL;
2502 errlog.locale = parents_locale;
2503
2504 /*
2505 * We start off writing to stderr until we're ready to daemonize.
2506 */
2507 zlogp = &errlog;
2508
2509 /*
2510 * Process options.
2511 */
2512 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2513 switch (opt) {
2514 case 'R':
2515 zonecfg_set_root(optarg);
2516 break;
2517 case 'z':
2518 zone_name = optarg;
2519 break;
2520 default:
2521 usage();
2522 }
2523 }
2524
2525 if (zone_name == NULL)
2526 usage();
2527
2528 /*
2529 * Because usage() prints directly to stderr, it has gettext()
2530 * wrapping, which depends on the locale. But since zerror() calls
2531 * localize() which tweaks the locale, it is not safe to call zerror()
2532 * until after the last call to usage(). Fortunately, the last call
2533 * to usage() is just above and the first call to zerror() is just
2534 * below. Don't mess this up.
2535 */
2536 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2537 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2538 GLOBAL_ZONENAME);
2539 return (1);
2540 }
2541
2542 if (zone_get_id(zone_name, &zid) != 0) {
2543 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2544 zonecfg_strerror(Z_NO_ZONE));
2545 return (1);
2546 }
2547
2548 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2549 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2550 zonecfg_strerror(err));
2551 return (1);
2552 }
2553 if (zstate < ZONE_STATE_INCOMPLETE) {
2554 zerror(zlogp, B_FALSE,
2555 "cannot manage a zone which is in state '%s'",
2556 zone_state_str(zstate));
2557 return (1);
2558 }
2559
2560 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2561 zerror(zlogp, B_FALSE, "unable to determine zone path");
2562 return (-1);
2563 }
2564
2565 if (zonecfg_default_brand(default_brand,
2566 sizeof (default_brand)) != Z_OK) {
2567 zerror(zlogp, B_FALSE, "unable to determine default brand");
2568 return (1);
2569 }
2570
2571 /* Get a handle to the brand info for this zone */
2572 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2573 != Z_OK) {
2574 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2575 return (1);
2576 }
2577 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2578 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2579
2580 /*
2581 * In the alternate root environment, the only supported
2582 * operations are mount and unmount. In this case, just treat
2583 * the zone as native if it is cluster. Cluster zones can be
2584 * native for the purpose of LU or upgrade, and the cluster
2585 * brand may not exist in the miniroot (such as in net install
2586 * upgrade).
2587 */
2588 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2589 zone_iscluster = B_TRUE;
2590 if (zonecfg_in_alt_root()) {
2591 (void) strlcpy(brand_name, default_brand,
2592 sizeof (brand_name));
2593 }
2594 } else {
2595 zone_iscluster = B_FALSE;
2596 }
2597
2598 if ((bh = brand_open(brand_name)) == NULL) {
2599 zerror(zlogp, B_FALSE, "unable to open zone brand");
2600 return (1);
2601 }
2602
2603 /* Get state change brand hooks. */
2604 if (brand_callback_init(bh, zone_name) == -1) {
2605 zerror(zlogp, B_TRUE,
2606 "failed to initialize brand state change hooks");
2607 brand_close(bh);
2608 return (1);
2609 }
2610
2611 brand_close(bh);
2612
2613 /*
2614 * Check that we have all privileges. It would be nice to pare
2615 * this down, but this is at least a first cut.
2616 */
2617 if ((privset = priv_allocset()) == NULL) {
2618 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2619 return (1);
2620 }
2621
2622 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2623 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2624 priv_freeset(privset);
2625 return (1);
2626 }
2627
2628 if (priv_isfullset(privset) == B_FALSE) {
2629 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2630 "run this command (all privs required)");
2631 priv_freeset(privset);
2632 return (1);
2633 }
2634 priv_freeset(privset);
2635
2636 if (set_brand_env(zlogp) != 0) {
2637 zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
2638 return (1);
2639 }
2640
2641 if (mkzonedir(zlogp) != 0)
2642 return (1);
2643
2644 /*
2645 * Pre-fork: setup shared state
2646 */
2647 if ((shstate = (void *)mmap(NULL, shstatelen,
2648 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2649 MAP_FAILED) {
2650 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2651 return (1);
2652 }
2653 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2654 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2655 (void) munmap((char *)shstate, shstatelen);
2656 return (1);
2657 }
2658 shstate->log.logfile = NULL;
2659 shstate->log.buflen = shstatelen - sizeof (*shstate);
2660 shstate->log.loglen = shstate->log.buflen;
2661 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2662 shstate->log.log = shstate->log.buf;
2663 shstate->log.locale = parents_locale;
2664 shstate->status = -1;
2665
2666 /*
2667 * We need a SIGCHLD handler so the sema_wait() below will wake
2668 * up if the child dies without doing a sema_post().
2669 */
2670 (void) sigset(SIGCHLD, sigchld);
2671 /*
2672 * We must mask SIGCHLD until after we've coped with the fork
2673 * sufficiently to deal with it; otherwise we can race and
2674 * receive the signal before pid has been initialized
2675 * (yes, this really happens).
2676 */
2677 (void) sigemptyset(&block_cld);
2678 (void) sigaddset(&block_cld, SIGCHLD);
2679 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2680
2681 /*
2682 * The parent only needs stderr after the fork, so close other fd's
2683 * that we inherited from zoneadm so that the parent doesn't have those
2684 * open while waiting. The child will close the rest after the fork.
2685 */
2686 closefrom(3);
2687
2688 if ((ctfd = init_template()) == -1) {
2689 zerror(zlogp, B_TRUE, "failed to create contract");
2690 return (1);
2691 }
2692
2693 /*
2694 * Do not let another thread localize a message while we are forking.
2695 */
2696 (void) mutex_lock(&msglock);
2697 pid = fork();
2698 (void) mutex_unlock(&msglock);
2699
2700 /*
2701 * In all cases (parent, child, and in the event of an error) we
2702 * don't want to cause creation of contracts on subsequent fork()s.
2703 */
2704 (void) ct_tmpl_clear(ctfd);
2705 (void) close(ctfd);
2706
2707 if (pid == -1) {
2708 zerror(zlogp, B_TRUE, "could not fork");
2709 return (1);
2710
2711 } else if (pid > 0) { /* parent */
2712 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2713 /*
2714 * This marks a window of vulnerability in which we receive
2715 * the SIGCLD before falling into sema_wait (normally we would
2716 * get woken up from sema_wait with EINTR upon receipt of
2717 * SIGCLD). So we may need to use some other scheme like
2718 * sema_posting in the sigcld handler.
2719 * blech
2720 */
2721 (void) sema_wait(&shstate->sem);
2722 (void) sema_destroy(&shstate->sem);
2723 if (shstate->status != 0)
2724 (void) waitpid(pid, NULL, WNOHANG);
2725 /*
2726 * It's ok if we die with SIGPIPE. It's not like we could have
2727 * done anything about it.
2728 */
2729 (void) fprintf(stderr, "%s", shstate->log.buf);
2730 _exit(shstate->status == 0 ? 0 : 1);
2731 }
2732
2733 /*
2734 * The child charges on.
2735 */
2736 (void) sigset(SIGCHLD, SIG_DFL);
2737 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2738
2739 /*
2740 * SIGPIPE can be delivered if we write to a socket for which the
2741 * peer endpoint is gone. That can lead to too-early termination
2742 * of zoneadmd, and that's not good eats.
2743 */
2744 (void) sigset(SIGPIPE, SIG_IGN);
2745 /*
2746 * Stop using stderr
2747 */
2748 zlogp = &shstate->log;
2749
2750 /*
2751 * We don't need stdout/stderr from now on.
2752 */
2753 closefrom(0);
2754
2755 /*
2756 * Initialize the syslog zlog_t. This needs to be done after
2757 * the call to closefrom().
2758 */
2759 logsys.buf = logsys.log = NULL;
2760 logsys.buflen = logsys.loglen = 0;
2761 logsys.logfile = NULL;
2762 logsys.locale = DEFAULT_LOCALE;
2763
2764 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2765
2766 /*
2767 * Allow logging to <zonepath>/logs/<file>.
2768 */
2769 logstream_init(zlogp);
2770 platloghdl = logstream_open("platform.log", "zoneadmd", 0);
2771
2772 /* logplat looks the same as logsys, but logs to platform.log */
2773 logplat = logsys;
2774
2775 /*
2776 * The eventstream is used to publish state changes in the zone
2777 * from the door threads to the console I/O poller.
2778 */
2779 if (eventstream_init() == -1) {
2780 zerror(zlogp, B_TRUE, "unable to create eventstream");
2781 goto child_out;
2782 }
2783
2784 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2785 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2786
2787 /*
2788 * See if another zoneadmd is running for this zone. If not, then we
2789 * can now modify system state.
2790 */
2791 if (make_daemon_exclusive(zlogp) == -1)
2792 goto child_out;
2793
2794 /*
2795 * Create/join a new session; we need to be careful of what we do with
2796 * the console from now on so we don't end up being the session leader
2797 * for the terminal we're going to be handing out.
2798 */
2799 (void) setsid();
2800
2801 /*
2802 * This thread shouldn't be receiving any signals; in particular,
2803 * SIGCHLD should be received by the thread doing the fork(). The
2804 * exceptions are SIGHUP and SIGUSR1 for log rotation, set up by
2805 * logstream_init().
2806 */
2807 (void) sigfillset(&blockset);
2808 (void) sigdelset(&blockset, SIGHUP);
2809 (void) sigdelset(&blockset, SIGUSR1);
2810 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2811
2812 /*
2813 * Setup the console device and get ready to serve the console;
2814 * once this has completed, we're ready to let console clients
2815 * make an attempt to connect (they will block until
2816 * serve_console_sock() below gets called, and any pending
2817 * connection is accept()ed).
2818 */
2819 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2820 goto child_out;
2821
2822 /*
2823 * Take the lock now, so that when the door server gets going, we
2824 * are guaranteed that it won't take a request until we are sure
2825 * that everything is completely set up. See the child_out: label
2826 * below to see why this matters.
2827 */
2828 (void) mutex_lock(&lock);
2829
2830 /* Init semaphore for scratch zones. */
2831 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2832 zerror(zlogp, B_TRUE,
2833 "failed to initialize semaphore for scratch zone");
2834 goto child_out;
2835 }
2836
2837 /* open the dladm handle */
2838 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2839 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2840 goto child_out;
2841 }
2842
2843 /*
2844 * Note: door setup must occur *after* the console is setup.
2845 * This is so that as zlogin tests the door to see if zoneadmd
2846 * is ready yet, we know that the console will get serviced
2847 * once door_info() indicates that the door is "up".
2848 */
2849 if (setup_door(zlogp) == -1)
2850 goto child_out;
2851
2852 /*
2853 * Things seem OK so far; tell the parent process that we're done
2854 * with setup tasks. This will cause the parent to exit, signalling
2855 * to zoneadm, zlogin, or whatever forked it that we are ready to
2856 * service requests.
2857 */
2858 shstate->status = 0;
2859 (void) sema_post(&shstate->sem);
2860 (void) munmap((char *)shstate, shstatelen);
2861 shstate = NULL;
2862
2863 (void) mutex_unlock(&lock);
2864
2865 /*
2866 * zlogp is now invalid, so reset it to the syslog logger.
2867 */
2868 zlogp = &logsys;
2869
2870 /*
2871 * Now that we are free of any parents, switch to the default locale.
2872 */
2873 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2874
2875 /*
2876 * At this point the setup portion of main() is basically done, so
2877 * we reuse this thread to manage the zone console. When
2878 * serve_console() has returned, we are past the point of no return
2879 * in the life of this zoneadmd.
2880 */
2881 if (zonecfg_in_alt_root()) {
2882 /*
2883 * This is just awful, but mounted scratch zones don't (and
2884 * can't) have consoles. We just wait for unmount instead.
2885 */
2886 while (sema_wait(&scratch_sem) == EINTR)
2887 ;
2888 } else {
2889 serve_console(zlogp);
2890 assert(in_death_throes);
2891 }
2892
2893 /*
2894 * This is the next-to-last part of the exit interlock. Upon calling
2895 * fdetach(), the door will go unreferenced; once any
2896 * outstanding requests (like the door thread doing Z_HALT) are
2897 * done, the door will get an UNREF notification; when it handles
2898 * the UNREF, the door server will cause the exit. It's possible
2899 * that fdetach() can fail because the file is in use, in which
2900 * case we'll retry the operation.
2901 */
2902 assert(!MUTEX_HELD(&lock));
2903 for (;;) {
2904 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2905 break;
2906 yield();
2907 }
2908
2909 for (;;)
2910 (void) pause();
2911
2912 child_out:
2913 assert(pid == 0);
2914
2915 shstate->status = -1;
2916 (void) sema_post(&shstate->sem);
2917 (void) munmap((char *)shstate, shstatelen);
2918
2919 /*
2920 * This might trigger an unref notification, but if so,
2921 * we are still holding the lock, so our call to exit will
2922 * ultimately win the race and will publish the right exit
2923 * code.
2924 */
2925 if (zone_door != -1) {
2926 assert(MUTEX_HELD(&lock));
2927 (void) door_revoke(zone_door);
2928 (void) fdetach(zone_door_path);
2929 }
2930
2931 if (dld_handle != NULL)
2932 dladm_close(dld_handle);
2933
2934 return (1); /* return from main() forcibly exits an MT process */
2935 }