1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 */
27
28 /*
29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 * non-global zone on the system. This daemon juggles four jobs:
31 *
32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 * unmount filesystems; create and destroy network interfaces; communicate
34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 * console device; configure process runtime attributes such as resource
36 * controls, pool bindings, fine-grained privileges.
37 *
38 * - Launch the zone's init(1M) process.
39 *
40 * - Implement a door server; clients (like zoneadm) connect to the door
41 * server and request zone state changes. The kernel is also a client of
42 * this door server. A request to halt or reboot the zone which originates
43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 *
45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 * back to the zoneadm process making the request. These messages need to
47 * be rendered in the client's locale; so, this is passed in as part of the
48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 * messages are syslog'd.
50 *
51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 * strings which do not need to be translated.
54 *
55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 * for more information about the zone console architecture.
57 *
58 * DESIGN NOTES
59 *
60 * Restart:
61 * A chief design constraint of zoneadmd is that it should be restartable in
62 * the case that the administrator kills it off, or it suffers a fatal error,
63 * without the running zone being impacted; this is akin to being able to
64 * reboot the service processor of a server without affecting the OS instance.
65 */
66
67 #include <sys/param.h>
68 #include <sys/mman.h>
69 #include <sys/types.h>
70 #include <sys/stat.h>
71 #include <sys/sysmacros.h>
72 #include <sys/time.h>
73
74 #include <bsm/adt.h>
75 #include <bsm/adt_event.h>
76
77 #include <alloca.h>
78 #include <assert.h>
79 #include <errno.h>
80 #include <door.h>
81 #include <fcntl.h>
82 #include <locale.h>
83 #include <signal.h>
84 #include <stdarg.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <strings.h>
89 #include <synch.h>
90 #include <syslog.h>
91 #include <thread.h>
92 #include <unistd.h>
93 #include <wait.h>
94 #include <limits.h>
95 #include <zone.h>
96 #include <libbrand.h>
97 #include <sys/brand.h>
98 #include <libcontract.h>
99 #include <libcontract_priv.h>
100 #include <sys/brand.h>
101 #include <sys/contract/process.h>
102 #include <sys/ctfs.h>
103 #include <libdladm.h>
104 #include <sys/dls_mgmt.h>
105 #include <libscf.h>
106
107 #include <libzonecfg.h>
108 #include <zonestat_impl.h>
109 #include "zoneadmd.h"
110
111 static char *progname;
112 char *zone_name; /* zone which we are managing */
113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 char zonepath[MAXNAMELEN];
115 char pool_name[MAXNAMELEN];
116 char default_brand[MAXNAMELEN];
117 char brand_name[MAXNAMELEN];
118 boolean_t zone_isnative;
119 boolean_t zone_iscluster;
120 boolean_t zone_islabeled;
121 boolean_t shutdown_in_progress;
122 static zoneid_t zone_id;
123 dladm_handle_t dld_handle = NULL;
124
125 static char pre_statechg_hook[2 * MAXPATHLEN];
126 static char post_statechg_hook[2 * MAXPATHLEN];
127 char query_hook[2 * MAXPATHLEN];
128
129 zlog_t logsys;
130
131 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
133
134 static sema_t scratch_sem; /* for scratch zones */
135
136 static char zone_door_path[MAXPATHLEN];
137 static int zone_door = -1;
138
139 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
141
142 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
144 #endif
145
146 #define DEFAULT_LOCALE "C"
147
148 static const char *
149 z_cmd_name(zone_cmd_t zcmd)
150 {
151 /* This list needs to match the enum in sys/zone.h */
152 static const char *zcmdstr[] = {
153 "ready", "boot", "forceboot", "reboot", "halt",
154 "note_uninstalling", "mount", "forcemount", "unmount",
155 "shutdown"
156 };
157
158 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
159 return ("unknown");
160 else
161 return (zcmdstr[(int)zcmd]);
162 }
163
164 static char *
165 get_execbasename(char *execfullname)
166 {
167 char *last_slash, *execbasename;
168
169 /* guard against '/' at end of command invocation */
170 for (;;) {
171 last_slash = strrchr(execfullname, '/');
172 if (last_slash == NULL) {
173 execbasename = execfullname;
174 break;
175 } else {
176 execbasename = last_slash + 1;
177 if (*execbasename == '\0') {
178 *last_slash = '\0';
179 continue;
180 }
181 break;
182 }
183 }
184 return (execbasename);
185 }
186
187 static void
188 usage(void)
189 {
190 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
191 (void) fprintf(stderr,
192 gettext("\tNote: %s should not be run directly.\n"), progname);
193 exit(2);
194 }
195
196 /* ARGSUSED */
197 static void
198 sigchld(int sig)
199 {
200 }
201
202 char *
203 localize_msg(char *locale, const char *msg)
204 {
205 char *out;
206
207 (void) mutex_lock(&msglock);
208 (void) setlocale(LC_MESSAGES, locale);
209 out = gettext(msg);
210 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
211 (void) mutex_unlock(&msglock);
212 return (out);
213 }
214
215 /* PRINTFLIKE3 */
216 void
217 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
218 {
219 va_list alist;
220 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
221 char *bp;
222 int saved_errno = errno;
223
224 if (zlogp == NULL)
225 return;
226 if (zlogp == &logsys)
227 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
228 zone_name);
229 else
230 buf[0] = '\0';
231 bp = &(buf[strlen(buf)]);
232
233 /*
234 * In theory, the locale pointer should be set to either "C" or a
235 * char array, so it should never be NULL
236 */
237 assert(zlogp->locale != NULL);
238 /* Locale is per process, but we are multi-threaded... */
239 fmt = localize_msg(zlogp->locale, fmt);
240
241 va_start(alist, fmt);
242 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
243 va_end(alist);
244 bp = &(buf[strlen(buf)]);
245 if (use_strerror)
246 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
247 strerror(saved_errno));
248 if (zlogp == &logsys) {
249 (void) syslog(LOG_ERR, "%s", buf);
250 } else if (zlogp->logfile != NULL) {
251 (void) fprintf(zlogp->logfile, "%s\n", buf);
252 } else {
253 size_t buflen;
254 size_t copylen;
255
256 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
257 copylen = MIN(buflen, zlogp->loglen);
258 zlogp->log += copylen;
259 zlogp->loglen -= copylen;
260 }
261 }
262
263 /*
264 * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
265 * put the arguments into an argv style array, use getopt to process them,
266 * and put the resultant argument string back into outargs. Non-Solaris brands
267 * may support alternate forms of boot arguments so we must handle that as well.
268 *
269 * During the filtering, we pull out any arguments which are truly "boot"
270 * arguments, leaving only those which are to be passed intact to the
271 * progenitor process. The one we support at the moment is -i, which
272 * indicates to the kernel which program should be launched as 'init'.
273 *
274 * Except for Z_OK, all other return values are treated as fatal.
275 */
276 static int
277 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
278 char *init_file)
279 {
280 int argc = 0, argc_save;
281 int i;
282 int err;
283 char *arg, *lasts, **argv = NULL, **argv_save;
284 char zonecfg_args[BOOTARGS_MAX];
285 char scratchargs[BOOTARGS_MAX], *sargs;
286 char c;
287
288 bzero(outargs, BOOTARGS_MAX);
289
290 /*
291 * If the user didn't specify transient boot arguments, check
292 * to see if there were any specified in the zone configuration,
293 * and use them if applicable.
294 */
295 if (inargs == NULL || inargs[0] == '\0') {
296 zone_dochandle_t handle;
297 if ((handle = zonecfg_init_handle()) == NULL) {
298 zerror(zlogp, B_TRUE,
299 "getting zone configuration handle");
300 return (Z_BAD_HANDLE);
301 }
302 err = zonecfg_get_snapshot_handle(zone_name, handle);
303 if (err != Z_OK) {
304 zerror(zlogp, B_FALSE,
305 "invalid configuration snapshot");
306 zonecfg_fini_handle(handle);
307 return (Z_BAD_HANDLE);
308 }
309
310 bzero(zonecfg_args, sizeof (zonecfg_args));
311 (void) zonecfg_get_bootargs(handle, zonecfg_args,
312 sizeof (zonecfg_args));
313 inargs = zonecfg_args;
314 zonecfg_fini_handle(handle);
315 }
316
317 if (strlen(inargs) >= BOOTARGS_MAX) {
318 zerror(zlogp, B_FALSE, "boot argument string too long");
319 return (Z_INVAL);
320 }
321
322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
323 sargs = scratchargs;
324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
325 sargs = NULL;
326 argc++;
327 }
328
329 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
330 zerror(zlogp, B_FALSE, "memory allocation failed");
331 return (Z_NOMEM);
332 }
333
334 argv_save = argv;
335 argc_save = argc;
336
337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 sargs = scratchargs;
339 i = 0;
340 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
341 sargs = NULL;
342 if ((argv[i] = strdup(arg)) == NULL) {
343 err = Z_NOMEM;
344 zerror(zlogp, B_FALSE, "memory allocation failed");
345 goto done;
346 }
347 i++;
348 }
349
350 /*
351 * We preserve compatibility with the illumos system boot behavior,
352 * which allows:
353 *
354 * # reboot kernel/unix -s -m verbose
355 *
356 * In this example, kernel/unix tells the booter what file to boot. The
357 * original intent of this was that we didn't want reboot in a zone to
358 * be gratuitously different, so we would silently ignore the boot
359 * file, if necessary. However, this usage is archaic and has never
360 * been common, since it is impossible to boot a zone onto a different
361 * kernel. Ignoring the first argument breaks for non-native brands
362 * which pass boot arguments in a different style. e.g.
363 * systemd.log_level=debug
364 * Thus, for backward compatibility we only ignore the first argument
365 * if it appears to be in the illumos form and attempting to specify a
366 * kernel.
367 */
368 if (argv[0] == NULL)
369 goto done;
370
371 assert(argv[0][0] != ' ');
372 assert(argv[0][0] != '\t');
373
374 if (strncmp(argv[0], "kernel/", 7) == 0) {
375 argv = &argv[1];
376 argc--;
377 }
378
379 optind = 0;
380 opterr = 0;
381 err = Z_OK;
382 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
383 switch (c) {
384 case 'i':
385 /*
386 * -i is handled by the runtime and is not passed
387 * along to userland
388 */
389 (void) strlcpy(init_file, optarg, MAXPATHLEN);
390 break;
391 case 'f':
392 /* This has already been processed by zoneadm */
393 break;
394 case 'm':
395 case 's':
396 /* These pass through unmolested */
397 (void) snprintf(outargs, BOOTARGS_MAX,
398 "%s -%c %s ", outargs, c, optarg ? optarg : "");
399 break;
400 case '?':
401 /*
402 * If a brand has its own init, we need to pass along
403 * whatever the user provides. We use the entire
404 * unknown string here so that we correctly handle
405 * unknown long options (e.g. --debug).
406 */
407 (void) snprintf(outargs, BOOTARGS_MAX,
408 "%s %s", outargs, argv[optind - 1]);
409 break;
410 }
411 }
412
413 /*
414 * We need to pass along everything else since we don't know what
415 * the brand's init is expecting. For example, an argument list like:
416 * --confdir /foo --debug
417 * will cause the getopt parsing to stop at '/foo' but we need to pass
418 * that on, along with the '--debug'. This does mean that we require
419 * any of our known options (-ifms) to preceed the brand-specific ones.
420 */
421 while (optind < argc) {
422 (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
423 argv[optind]);
424 optind++;
425 }
426
427 done:
428 for (i = 0; i < argc_save; i++) {
429 if (argv_save[i] != NULL)
430 free(argv_save[i]);
431 }
432 free(argv_save);
433 return (err);
434 }
435
436
437 static int
438 mkzonedir(zlog_t *zlogp)
439 {
440 struct stat st;
441 /*
442 * We must create and lock everyone but root out of ZONES_TMPDIR
443 * since anyone can open any UNIX domain socket, regardless of
444 * its file system permissions. Sigh...
445 */
446 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
447 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
448 return (-1);
449 }
450 /* paranoia */
451 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
452 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
453 return (-1);
454 }
455 (void) chmod(ZONES_TMPDIR, S_IRWXU);
456 return (0);
457 }
458
459 /*
460 * Run the brand's pre-state change callback, if it exists.
461 */
462 static int
463 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
464 {
465 char cmdbuf[2 * MAXPATHLEN];
466 const char *altroot;
467
468 if (pre_statechg_hook[0] == '\0')
469 return (0);
470
471 altroot = zonecfg_get_root();
472 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
473 state, cmd, altroot) > sizeof (cmdbuf))
474 return (-1);
475
476 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
477 return (-1);
478
479 return (0);
480 }
481
482 /*
483 * Run the brand's post-state change callback, if it exists.
484 */
485 static int
486 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
487 {
488 char cmdbuf[2 * MAXPATHLEN];
489 const char *altroot;
490
491 if (post_statechg_hook[0] == '\0')
492 return (0);
493
494 altroot = zonecfg_get_root();
495 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
496 state, cmd, altroot) > sizeof (cmdbuf))
497 return (-1);
498
499 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
500 return (-1);
501
502 return (0);
503 }
504
505 /*
506 * Notify zonestatd of the new zone. If zonestatd is not running, this
507 * will do nothing.
508 */
509 static void
510 notify_zonestatd(zoneid_t zoneid)
511 {
512 int cmd[2];
513 int fd;
514 door_arg_t params;
515
516 fd = open(ZS_DOOR_PATH, O_RDONLY);
517 if (fd < 0)
518 return;
519
520 cmd[0] = ZSD_CMD_NEW_ZONE;
521 cmd[1] = zoneid;
522 params.data_ptr = (char *)&cmd;
523 params.data_size = sizeof (cmd);
524 params.desc_ptr = NULL;
525 params.desc_num = 0;
526 params.rbuf = NULL;
527 params.rsize = NULL;
528 (void) door_call(fd, ¶ms);
529 (void) close(fd);
530 }
531
532 /*
533 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
534 * 'true' if this is being invoked as part of the processing for the "mount"
535 * subcommand.
536 */
537 static int
538 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
539 {
540 int err;
541
542 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
543 return (-1);
544
545 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
546 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
547 zonecfg_strerror(err));
548 goto bad;
549 }
550
551 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
552 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
553 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
554 zonecfg_strerror(err));
555 goto bad;
556 }
557 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
558 bringup_failure_recovery = B_TRUE;
559 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
560 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
561 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
562 zonecfg_strerror(err));
563 goto bad;
564 }
565
566 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
567 goto bad;
568
569 return (0);
570
571 bad:
572 /*
573 * If something goes wrong, we up the zones's state to the target
574 * state, READY, and then invoke the hook as if we're halting.
575 */
576 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
577 return (-1);
578 }
579
580 int
581 init_template(void)
582 {
583 int fd;
584 int err = 0;
585
586 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
587 if (fd == -1)
588 return (-1);
589
590 /*
591 * For now, zoneadmd doesn't do anything with the contract.
592 * Deliver no events, don't inherit, and allow it to be orphaned.
593 */
594 err |= ct_tmpl_set_critical(fd, 0);
595 err |= ct_tmpl_set_informative(fd, 0);
596 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
597 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
598 if (err || ct_tmpl_activate(fd)) {
599 (void) close(fd);
600 return (-1);
601 }
602
603 return (fd);
604 }
605
606 typedef struct fs_callback {
607 zlog_t *zlogp;
608 zoneid_t zoneid;
609 boolean_t mount_cmd;
610 } fs_callback_t;
611
612 static int
613 mount_early_fs(void *data, const char *spec, const char *dir,
614 const char *fstype, const char *opt)
615 {
616 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
617 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
618 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
619 char rootpath[MAXPATHLEN];
620 pid_t child;
621 int child_status;
622 int tmpl_fd;
623 int rv;
624 ctid_t ct;
625
626 /* determine the zone rootpath */
627 if (mount_cmd) {
628 char luroot[MAXPATHLEN];
629
630 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
631 resolve_lofs(zlogp, luroot, sizeof (luroot));
632 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
633 } else {
634 if (zone_get_rootpath(zone_name,
635 rootpath, sizeof (rootpath)) != Z_OK) {
636 zerror(zlogp, B_FALSE, "unable to determine zone root");
637 return (-1);
638 }
639 }
640
641 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
642 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
643 rootpath, dir);
644 return (-1);
645 } else if (rv > 0) {
646 /* The mount point path doesn't exist, create it now. */
647 if (make_one_dir(zlogp, rootpath, dir,
648 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
649 DEFAULT_DIR_GROUP) != 0) {
650 zerror(zlogp, B_FALSE, "failed to create mount point");
651 return (-1);
652 }
653
654 /*
655 * Now this might seem weird, but we need to invoke
656 * valid_mount_path() again. Why? Because it checks
657 * to make sure that the mount point path is canonical,
658 * which it can only do if the path exists, so now that
659 * we've created the path we have to verify it again.
660 */
661 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
662 fstype)) < 0) {
663 zerror(zlogp, B_FALSE,
664 "%s%s is not a valid mount point", rootpath, dir);
665 return (-1);
666 }
667 }
668
669 if ((tmpl_fd = init_template()) == -1) {
670 zerror(zlogp, B_TRUE, "failed to create contract");
671 return (-1);
672 }
673
674 if ((child = fork()) == -1) {
675 (void) ct_tmpl_clear(tmpl_fd);
676 (void) close(tmpl_fd);
677 zerror(zlogp, B_TRUE, "failed to fork");
678 return (-1);
679
680 } else if (child == 0) { /* child */
681 char opt_buf[MAX_MNTOPT_STR];
682 int optlen = 0;
683 int mflag = MS_DATA;
684
685 (void) ct_tmpl_clear(tmpl_fd);
686 /*
687 * Even though there are no procs running in the zone, we
688 * do this for paranoia's sake.
689 */
690 (void) closefrom(0);
691
692 if (zone_enter(zoneid) == -1) {
693 _exit(errno);
694 }
695 if (opt != NULL) {
696 /*
697 * The mount() system call is incredibly annoying.
698 * If options are specified, we need to copy them
699 * into a temporary buffer since the mount() system
700 * call will overwrite the options string. It will
701 * also fail if the new option string it wants to
702 * write is bigger than the one we passed in, so
703 * you must pass in a buffer of the maximum possible
704 * option string length. sigh.
705 */
706 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
707 opt = opt_buf;
708 optlen = MAX_MNTOPT_STR;
709 mflag = MS_OPTIONSTR;
710 }
711 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
712 _exit(errno);
713 _exit(0);
714 }
715
716 /* parent */
717 if (contract_latest(&ct) == -1)
718 ct = -1;
719 (void) ct_tmpl_clear(tmpl_fd);
720 (void) close(tmpl_fd);
721 if (waitpid(child, &child_status, 0) != child) {
722 /* unexpected: we must have been signalled */
723 (void) contract_abandon_id(ct);
724 return (-1);
725 }
726 (void) contract_abandon_id(ct);
727 if (WEXITSTATUS(child_status) != 0) {
728 errno = WEXITSTATUS(child_status);
729 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
730 return (-1);
731 }
732
733 return (0);
734 }
735
736 /*
737 * If retstr is not NULL, the output of the subproc is returned in the str,
738 * otherwise it is output using zerror(). Any memory allocated for retstr
739 * should be freed by the caller.
740 */
741 int
742 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
743 {
744 char buf[1024]; /* arbitrary large amount */
745 char *inbuf;
746 FILE *file;
747 int status;
748 int rd_cnt;
749
750 if (retstr != NULL) {
751 if ((*retstr = malloc(1024)) == NULL) {
752 zerror(zlogp, B_FALSE, "out of memory");
753 return (-1);
754 }
755 inbuf = *retstr;
756 rd_cnt = 0;
757 } else {
758 inbuf = buf;
759 }
760
761 file = popen(cmdbuf, "r");
762 if (file == NULL) {
763 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
764 return (-1);
765 }
766
767 while (fgets(inbuf, 1024, file) != NULL) {
768 if (retstr == NULL) {
769 if (zlogp != &logsys)
770 zerror(zlogp, B_FALSE, "%s", inbuf);
771 } else {
772 char *p;
773
774 rd_cnt += 1024 - 1;
775 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
776 zerror(zlogp, B_FALSE, "out of memory");
777 (void) pclose(file);
778 return (-1);
779 }
780
781 *retstr = p;
782 inbuf = *retstr + rd_cnt;
783 }
784 }
785 status = pclose(file);
786
787 if (WIFSIGNALED(status)) {
788 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
789 "signal %d", cmdbuf, WTERMSIG(status));
790 return (-1);
791 }
792 assert(WIFEXITED(status));
793 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
794 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
795 return (-1);
796 }
797 return (WEXITSTATUS(status));
798 }
799
800 #if 0 /* XXX KEBE SAYS not yet */
801 /*
802 * Get the path for this zone's init(1M) (or equivalent) process. First look
803 * for a zone-specific init-name attr, then get it from the brand.
804 */
805 static int
806 get_initname(brand_handle_t bh, char *initname, int len)
807 {
808 struct zone_attrtab a;
809
810 bzero(&a, sizeof (a));
811 (void) strlcpy(a.zone_attr_name, "init-name",
812 sizeof (a.zone_attr_name));
813
814 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
815 (void) strlcpy(initname, a.zone_attr_value, len);
816 return (0);
817 }
818
819 return (brand_get_initname(bh, initname, len));
820 }
821
822 /*
823 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
824 * First look for a zone-specific restart-init attr, then get it from the brand.
825 */
826 static boolean_t
827 restartinit(brand_handle_t bh)
828 {
829 struct zone_attrtab a;
830
831 bzero(&a, sizeof (a));
832 (void) strlcpy(a.zone_attr_name, "restart-init",
833 sizeof (a.zone_attr_name));
834
835 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
836 if (strcmp(a.zone_attr_value, "false") == 0)
837 return (B_FALSE);
838 return (B_TRUE);
839 }
840
841 return (brand_restartinit(bh));
842 }
843 #endif /* XXX KEBE */
844
845 /*
846 * Get the app-svc-dependent flag for this zone's init process. This is a
847 * zone-specific attr which controls the type of contract we create for the
848 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
849 * set, so that when any service which is in the same contract exits, the init
850 * application will be terminated.
851 *
852 * We use the global "snap_hndl", so no parameters get passed here.
853 */
854 static boolean_t
855 is_app_svc_dep(void)
856 {
857 struct zone_attrtab a;
858
859 bzero(&a, sizeof (a));
860 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
861 sizeof (a.zone_attr_name));
862
863 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
864 strcmp(a.zone_attr_value, "true") == 0) {
865 return (B_TRUE);
866 }
867
868 return (B_FALSE);
869 }
870
871 static int
872 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
873 {
874 zoneid_t zoneid;
875 struct stat st;
876 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
877 char nbootargs[BOOTARGS_MAX];
878 char cmdbuf[MAXPATHLEN];
879 fs_callback_t cb;
880 brand_handle_t bh;
881 zone_iptype_t iptype;
882 dladm_status_t status;
883 char errmsg[DLADM_STRSIZE];
884 int err;
885 boolean_t restart_init;
886 boolean_t app_svc_dep;
887
888 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
889 return (-1);
890
891 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
892 zerror(zlogp, B_TRUE, "unable to get zoneid");
893 goto bad;
894 }
895
896 cb.zlogp = zlogp;
897 cb.zoneid = zoneid;
898 cb.mount_cmd = B_FALSE;
899
900 /* Get a handle to the brand info for this zone */
901 if ((bh = brand_open(brand_name)) == NULL) {
902 zerror(zlogp, B_FALSE, "unable to determine zone brand");
903 goto bad;
904 }
905
906 /*
907 * Get the list of filesystems to mount from the brand
908 * configuration. These mounts are done via a thread that will
909 * enter the zone, so they are done from within the context of the
910 * zone.
911 */
912 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
913 zerror(zlogp, B_FALSE, "unable to mount filesystems");
914 brand_close(bh);
915 goto bad;
916 }
917
918 /*
919 * Get the brand's boot callback if it exists.
920 */
921 (void) strcpy(cmdbuf, EXEC_PREFIX);
922 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
923 sizeof (cmdbuf) - EXEC_LEN) != 0) {
924 zerror(zlogp, B_FALSE,
925 "unable to determine branded zone's boot callback");
926 brand_close(bh);
927 goto bad;
928 }
929
930 /* Get the path for this zone's init(1M) (or equivalent) process. */
931 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
932 zerror(zlogp, B_FALSE,
933 "unable to determine zone's init(1M) location");
934 brand_close(bh);
935 goto bad;
936 }
937
938 /* See if this zone's brand should restart init if it dies. */
939 restart_init = brand_restartinit(bh);
940
941 /*
942 * See if we need to setup contract dependencies between the zone's
943 * primary application and any of its services.
944 */
945 app_svc_dep = is_app_svc_dep();
946
947 brand_close(bh);
948
949 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
950 if (err != Z_OK)
951 goto bad;
952
953 assert(init_file[0] != '\0');
954
955 /*
956 * Try to anticipate possible problems: If possible, make sure init is
957 * executable.
958 */
959 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
960 zerror(zlogp, B_FALSE, "unable to determine zone root");
961 goto bad;
962 }
963
964 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
965
966 if (lstat(initpath, &st) == -1) {
967 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
968 goto bad;
969 }
970
971 /*
972 * If a symlink, we'll have to wait and resolve when we boot,
973 * otherwise check the executable bits now.
974 */
975 if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
976 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
977 goto bad;
978 }
979
980 /*
981 * Exclusive stack zones interact with the dlmgmtd running in the
982 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
983 * booting, and loads its datalinks from the zone's datalink
984 * configuration file.
985 */
986 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
987 status = dladm_zone_boot(dld_handle, zoneid);
988 if (status != DLADM_STATUS_OK) {
989 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
990 " %s", dladm_status2str(status, errmsg));
991 goto bad;
992 }
993 }
994
995 /*
996 * If there is a brand 'boot' callback, execute it now to give the
997 * brand one last chance to do any additional setup before the zone
998 * is booted.
999 */
1000 if ((strlen(cmdbuf) > EXEC_LEN) &&
1001 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
1002 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1003 goto bad;
1004 }
1005
1006 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1007 zerror(zlogp, B_TRUE, "could not set zone boot file");
1008 goto bad;
1009 }
1010
1011 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1012 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1013 goto bad;
1014 }
1015
1016 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1017 NULL, 0) == -1) {
1018 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1019 goto bad;
1020 }
1021
1022 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1023 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1024 zerror(zlogp, B_TRUE, "could not set zone app-die");
1025 goto bad;
1026 }
1027
1028 /*
1029 * Inform zonestatd of a new zone so that it can install a door for
1030 * the zone to contact it.
1031 */
1032 notify_zonestatd(zone_id);
1033
1034 if (zone_boot(zoneid) == -1) {
1035 zerror(zlogp, B_TRUE, "unable to boot zone");
1036 goto bad;
1037 }
1038
1039 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1040 goto bad;
1041
1042 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1043 create_log_thread(zlogp, zone_id);
1044
1045 /* Startup a thread to perform memory capping for the zone. */
1046 create_mcap_thread(zlogp, zone_id);
1047
1048 return (0);
1049
1050 bad:
1051 /*
1052 * If something goes wrong, we up the zones's state to the target
1053 * state, RUNNING, and then invoke the hook as if we're halting.
1054 */
1055 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1056
1057 return (-1);
1058 }
1059
1060 static int
1061 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1062 {
1063 int err;
1064
1065 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1066 return (-1);
1067
1068 /* Shutting down, stop the memcap thread */
1069 destroy_mcap_thread();
1070
1071 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1072 if (!bringup_failure_recovery)
1073 zerror(zlogp, B_FALSE, "unable to destroy zone");
1074 destroy_log_thread();
1075 return (-1);
1076 }
1077
1078 /* Shut down is done, stop the log thread */
1079 destroy_log_thread();
1080
1081 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1082 return (-1);
1083
1084 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1085 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1086 zonecfg_strerror(err));
1087
1088 return (0);
1089 }
1090
1091 static int
1092 zone_graceful_shutdown(zlog_t *zlogp)
1093 {
1094 zoneid_t zoneid;
1095 pid_t child;
1096 char cmdbuf[MAXPATHLEN];
1097 brand_handle_t bh = NULL;
1098 ctid_t ct;
1099 int tmpl_fd;
1100 int child_status;
1101
1102 if (shutdown_in_progress) {
1103 zerror(zlogp, B_FALSE, "shutdown already in progress");
1104 return (-1);
1105 }
1106
1107 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1108 zerror(zlogp, B_TRUE, "unable to get zoneid");
1109 return (-1);
1110 }
1111
1112 /* Get a handle to the brand info for this zone */
1113 if ((bh = brand_open(brand_name)) == NULL) {
1114 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1115 return (-1);
1116 }
1117
1118 /*
1119 * If there is a brand 'shutdown' callback, execute it now to give the
1120 * brand a chance to cleanup any custom configuration.
1121 */
1122 (void) strcpy(cmdbuf, EXEC_PREFIX);
1123 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1124 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1125 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1126 }
1127 brand_close(bh);
1128
1129 if ((tmpl_fd = init_template()) == -1) {
1130 zerror(zlogp, B_TRUE, "failed to create contract");
1131 return (-1);
1132 }
1133
1134 if ((child = fork()) == -1) {
1135 (void) ct_tmpl_clear(tmpl_fd);
1136 (void) close(tmpl_fd);
1137 zerror(zlogp, B_TRUE, "failed to fork");
1138 return (-1);
1139 } else if (child == 0) {
1140 (void) ct_tmpl_clear(tmpl_fd);
1141 if (zone_enter(zoneid) == -1) {
1142 _exit(errno);
1143 }
1144 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1145 }
1146
1147 if (contract_latest(&ct) == -1)
1148 ct = -1;
1149 (void) ct_tmpl_clear(tmpl_fd);
1150 (void) close(tmpl_fd);
1151
1152 if (waitpid(child, &child_status, 0) != child) {
1153 /* unexpected: we must have been signalled */
1154 (void) contract_abandon_id(ct);
1155 return (-1);
1156 }
1157
1158 (void) contract_abandon_id(ct);
1159 if (WEXITSTATUS(child_status) != 0) {
1160 errno = WEXITSTATUS(child_status);
1161 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1162 return (-1);
1163 }
1164
1165 shutdown_in_progress = B_TRUE;
1166
1167 return (0);
1168 }
1169
1170 static int
1171 zone_wait_shutdown(zlog_t *zlogp)
1172 {
1173 zone_state_t zstate;
1174 uint64_t *tm = NULL;
1175 scf_simple_prop_t *prop = NULL;
1176 int timeout;
1177 int tries;
1178 int rc = -1;
1179
1180 /* Get default stop timeout from SMF framework */
1181 timeout = SHUTDOWN_WAIT;
1182 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1183 SCF_PROPERTY_TIMEOUT)) != NULL) {
1184 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1185 if (tm != 0)
1186 timeout = *tm;
1187 }
1188 scf_simple_prop_free(prop);
1189 }
1190
1191 /* allow time for zone to shutdown cleanly */
1192 for (tries = 0; tries < timeout; tries ++) {
1193 (void) sleep(1);
1194 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1195 zstate == ZONE_STATE_INSTALLED) {
1196 rc = 0;
1197 break;
1198 }
1199 }
1200
1201 if (rc != 0)
1202 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1203
1204 shutdown_in_progress = B_FALSE;
1205
1206 return (rc);
1207 }
1208
1209
1210
1211 /*
1212 * Generate AUE_zone_state for a command that boots a zone.
1213 */
1214 static void
1215 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1216 char *new_state)
1217 {
1218 adt_session_data_t *ah;
1219 adt_event_data_t *event;
1220 int pass_fail, fail_reason;
1221
1222 if (!adt_audit_enabled())
1223 return;
1224
1225 if (return_val == 0) {
1226 pass_fail = ADT_SUCCESS;
1227 fail_reason = ADT_SUCCESS;
1228 } else {
1229 pass_fail = ADT_FAILURE;
1230 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1231 }
1232
1233 if (adt_start_session(&ah, NULL, 0)) {
1234 zerror(zlogp, B_TRUE, gettext("audit failure."));
1235 return;
1236 }
1237 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1238 zerror(zlogp, B_TRUE, gettext("audit failure."));
1239 (void) adt_end_session(ah);
1240 return;
1241 }
1242
1243 event = adt_alloc_event(ah, ADT_zone_state);
1244 if (event == NULL) {
1245 zerror(zlogp, B_TRUE, gettext("audit failure."));
1246 (void) adt_end_session(ah);
1247 return;
1248 }
1249 event->adt_zone_state.zonename = zone_name;
1250 event->adt_zone_state.new_state = new_state;
1251
1252 if (adt_put_event(event, pass_fail, fail_reason))
1253 zerror(zlogp, B_TRUE, gettext("audit failure."));
1254
1255 adt_free_event(event);
1256
1257 (void) adt_end_session(ah);
1258 }
1259
1260 /*
1261 * Log the exit time and status of the zone's init process into
1262 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1263 * be -1, otherwise it will be the exit status as described in wait.3c.
1264 * If the zone is configured to restart init, then nothing will be logged if
1265 * init exits unexpectedly (the kernel will never upcall in this case).
1266 */
1267 static void
1268 log_init_exit(int status)
1269 {
1270 char p[MAXPATHLEN];
1271 char buf[128];
1272 struct timeval t;
1273 int fd;
1274
1275 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1276 return;
1277 if (gettimeofday(&t, NULL) != 0)
1278 return;
1279 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1280 status) > sizeof (buf))
1281 return;
1282 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1283 return;
1284
1285 (void) write(fd, buf, strlen(buf));
1286
1287 (void) close(fd);
1288 }
1289
1290 /*
1291 * The main routine for the door server that deals with zone state transitions.
1292 */
1293 /* ARGSUSED */
1294 static void
1295 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1296 uint_t n_desc)
1297 {
1298 ucred_t *uc = NULL;
1299 const priv_set_t *eset;
1300
1301 zone_state_t zstate;
1302 zone_cmd_t cmd;
1303 int init_status;
1304 zone_cmd_arg_t *zargp;
1305
1306 boolean_t kernelcall;
1307
1308 int rval = -1;
1309 uint64_t uniqid;
1310 zoneid_t zoneid = -1;
1311 zlog_t zlog;
1312 zlog_t *zlogp;
1313 zone_cmd_rval_t *rvalp;
1314 size_t rlen = getpagesize(); /* conservative */
1315 fs_callback_t cb;
1316 brand_handle_t bh;
1317 boolean_t wait_shut = B_FALSE;
1318
1319 /* LINTED E_BAD_PTR_CAST_ALIGN */
1320 zargp = (zone_cmd_arg_t *)args;
1321
1322 /*
1323 * When we get the door unref message, we've fdetach'd the door, and
1324 * it is time for us to shut down zoneadmd.
1325 */
1326 if (zargp == DOOR_UNREF_DATA) {
1327 /*
1328 * See comment at end of main() for info on the last rites.
1329 */
1330 exit(0);
1331 }
1332
1333 if (zargp == NULL) {
1334 (void) door_return(NULL, 0, 0, 0);
1335 }
1336
1337 rvalp = alloca(rlen);
1338 bzero(rvalp, rlen);
1339 zlog.logfile = NULL;
1340 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1341 zlog.buf = rvalp->errbuf;
1342 zlog.log = zlog.buf;
1343 /* defer initialization of zlog.locale until after credential check */
1344 zlogp = &zlog;
1345
1346 if (alen != sizeof (zone_cmd_arg_t)) {
1347 /*
1348 * This really shouldn't be happening.
1349 */
1350 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1351 "unexpected (expected %d bytes)", alen,
1352 sizeof (zone_cmd_arg_t));
1353 goto out;
1354 }
1355 cmd = zargp->cmd;
1356 init_status = zargp->status;
1357
1358 if (door_ucred(&uc) != 0) {
1359 zerror(&logsys, B_TRUE, "door_ucred");
1360 goto out;
1361 }
1362 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1363 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1364 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1365 ucred_geteuid(uc) != 0)) {
1366 zerror(&logsys, B_FALSE, "insufficient privileges");
1367 goto out;
1368 }
1369
1370 kernelcall = ucred_getpid(uc) == 0;
1371
1372 /*
1373 * This is safe because we only use a zlog_t throughout the
1374 * duration of a door call; i.e., by the time the pointer
1375 * might become invalid, the door call would be over.
1376 */
1377 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1378
1379 (void) mutex_lock(&lock);
1380
1381 /*
1382 * Once we start to really die off, we don't want more connections.
1383 */
1384 if (in_death_throes) {
1385 (void) mutex_unlock(&lock);
1386 ucred_free(uc);
1387 (void) door_return(NULL, 0, 0, 0);
1388 thr_exit(NULL);
1389 }
1390
1391 /*
1392 * Check for validity of command.
1393 */
1394 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1395 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1396 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1397 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1398 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1399 goto out;
1400 }
1401
1402 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1403 /*
1404 * Can't happen
1405 */
1406 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1407 cmd);
1408 goto out;
1409 }
1410 /*
1411 * We ignore the possibility of someone calling zone_create(2)
1412 * explicitly; all requests must come through zoneadmd.
1413 */
1414 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1415 /*
1416 * Something terribly wrong happened
1417 */
1418 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1419 goto out;
1420 }
1421
1422 if (kernelcall) {
1423 /*
1424 * Kernel-initiated requests may lose their validity if the
1425 * zone_t the kernel was referring to has gone away.
1426 */
1427 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1428 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1429 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1430 /*
1431 * We're not talking about the same zone. The request
1432 * must have arrived too late. Return error.
1433 */
1434 rval = -1;
1435 goto out;
1436 }
1437 zlogp = &logsys; /* Log errors to syslog */
1438 }
1439
1440 /*
1441 * If we are being asked to forcibly mount or boot a zone, we
1442 * pretend that an INCOMPLETE zone is actually INSTALLED.
1443 */
1444 if (zstate == ZONE_STATE_INCOMPLETE &&
1445 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1446 zstate = ZONE_STATE_INSTALLED;
1447
1448 switch (zstate) {
1449 case ZONE_STATE_CONFIGURED:
1450 case ZONE_STATE_INCOMPLETE:
1451 /*
1452 * Not our area of expertise; we just print a nice message
1453 * and die off.
1454 */
1455 zerror(zlogp, B_FALSE,
1456 "%s operation is invalid for zones in state '%s'",
1457 z_cmd_name(cmd), zone_state_str(zstate));
1458 break;
1459
1460 case ZONE_STATE_INSTALLED:
1461 switch (cmd) {
1462 case Z_READY:
1463 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1464 if (rval == 0)
1465 eventstream_write(Z_EVT_ZONE_READIED);
1466 break;
1467 case Z_BOOT:
1468 case Z_FORCEBOOT:
1469 eventstream_write(Z_EVT_ZONE_BOOTING);
1470 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1471 == 0) {
1472 rval = zone_bootup(zlogp, zargp->bootbuf,
1473 zstate);
1474 }
1475 audit_put_record(zlogp, uc, rval, "boot");
1476 if (rval != 0) {
1477 bringup_failure_recovery = B_TRUE;
1478 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1479 zstate);
1480 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1481 }
1482 break;
1483 case Z_SHUTDOWN:
1484 case Z_HALT:
1485 if (kernelcall) /* Invalid; can't happen */
1486 abort();
1487 /*
1488 * We could have two clients racing to halt this
1489 * zone; the second client loses, but his request
1490 * doesn't fail, since the zone is now in the desired
1491 * state.
1492 */
1493 zerror(zlogp, B_FALSE, "zone is already halted");
1494 rval = 0;
1495 break;
1496 case Z_REBOOT:
1497 if (kernelcall) /* Invalid; can't happen */
1498 abort();
1499 zerror(zlogp, B_FALSE, "%s operation is invalid "
1500 "for zones in state '%s'", z_cmd_name(cmd),
1501 zone_state_str(zstate));
1502 rval = -1;
1503 break;
1504 case Z_NOTE_UNINSTALLING:
1505 if (kernelcall) /* Invalid; can't happen */
1506 abort();
1507 /*
1508 * Tell the console to print out a message about this.
1509 * Once it does, we will be in_death_throes.
1510 */
1511 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1512 break;
1513 case Z_MOUNT:
1514 case Z_FORCEMOUNT:
1515 if (kernelcall) /* Invalid; can't happen */
1516 abort();
1517 if (!zone_isnative && !zone_iscluster &&
1518 !zone_islabeled) {
1519 /*
1520 * -U mounts the zone without lofs mounting
1521 * zone file systems back into the scratch
1522 * zone. This is required when mounting
1523 * non-native branded zones.
1524 */
1525 (void) strlcpy(zargp->bootbuf, "-U",
1526 BOOTARGS_MAX);
1527 }
1528
1529 rval = zone_ready(zlogp,
1530 strcmp(zargp->bootbuf, "-U") == 0 ?
1531 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1532 if (rval != 0)
1533 break;
1534
1535 eventstream_write(Z_EVT_ZONE_READIED);
1536
1537 /*
1538 * Get a handle to the default brand info.
1539 * We must always use the default brand file system
1540 * list when mounting the zone.
1541 */
1542 if ((bh = brand_open(default_brand)) == NULL) {
1543 rval = -1;
1544 break;
1545 }
1546
1547 /*
1548 * Get the list of filesystems to mount from
1549 * the brand configuration. These mounts are done
1550 * via a thread that will enter the zone, so they
1551 * are done from within the context of the zone.
1552 */
1553 cb.zlogp = zlogp;
1554 cb.zoneid = zone_id;
1555 cb.mount_cmd = B_TRUE;
1556 rval = brand_platform_iter_mounts(bh,
1557 mount_early_fs, &cb);
1558
1559 brand_close(bh);
1560
1561 /*
1562 * Ordinarily, /dev/fd would be mounted inside the zone
1563 * by svc:/system/filesystem/usr:default, but since
1564 * we're not booting the zone, we need to do this
1565 * manually.
1566 */
1567 if (rval == 0)
1568 rval = mount_early_fs(&cb,
1569 "fd", "/dev/fd", "fd", NULL);
1570 break;
1571 case Z_UNMOUNT:
1572 if (kernelcall) /* Invalid; can't happen */
1573 abort();
1574 zerror(zlogp, B_FALSE, "zone is already unmounted");
1575 rval = 0;
1576 break;
1577 }
1578 break;
1579
1580 case ZONE_STATE_READY:
1581 switch (cmd) {
1582 case Z_READY:
1583 /*
1584 * We could have two clients racing to ready this
1585 * zone; the second client loses, but his request
1586 * doesn't fail, since the zone is now in the desired
1587 * state.
1588 */
1589 zerror(zlogp, B_FALSE, "zone is already ready");
1590 rval = 0;
1591 break;
1592 case Z_BOOT:
1593 (void) strlcpy(boot_args, zargp->bootbuf,
1594 sizeof (boot_args));
1595 eventstream_write(Z_EVT_ZONE_BOOTING);
1596 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1597 audit_put_record(zlogp, uc, rval, "boot");
1598 if (rval != 0) {
1599 bringup_failure_recovery = B_TRUE;
1600 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1601 zstate);
1602 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1603 }
1604 boot_args[0] = '\0';
1605 break;
1606 case Z_HALT:
1607 if (kernelcall) /* Invalid; can't happen */
1608 abort();
1609 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1610 != 0)
1611 break;
1612 eventstream_write(Z_EVT_ZONE_HALTED);
1613 break;
1614 case Z_SHUTDOWN:
1615 case Z_REBOOT:
1616 case Z_NOTE_UNINSTALLING:
1617 case Z_MOUNT:
1618 case Z_UNMOUNT:
1619 if (kernelcall) /* Invalid; can't happen */
1620 abort();
1621 zerror(zlogp, B_FALSE, "%s operation is invalid "
1622 "for zones in state '%s'", z_cmd_name(cmd),
1623 zone_state_str(zstate));
1624 rval = -1;
1625 break;
1626 }
1627 break;
1628
1629 case ZONE_STATE_MOUNTED:
1630 switch (cmd) {
1631 case Z_UNMOUNT:
1632 if (kernelcall) /* Invalid; can't happen */
1633 abort();
1634 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1635 if (rval == 0) {
1636 eventstream_write(Z_EVT_ZONE_HALTED);
1637 (void) sema_post(&scratch_sem);
1638 }
1639 break;
1640 default:
1641 if (kernelcall) /* Invalid; can't happen */
1642 abort();
1643 zerror(zlogp, B_FALSE, "%s operation is invalid "
1644 "for zones in state '%s'", z_cmd_name(cmd),
1645 zone_state_str(zstate));
1646 rval = -1;
1647 break;
1648 }
1649 break;
1650
1651 case ZONE_STATE_RUNNING:
1652 case ZONE_STATE_SHUTTING_DOWN:
1653 case ZONE_STATE_DOWN:
1654 switch (cmd) {
1655 case Z_READY:
1656 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1657 != 0)
1658 break;
1659 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1660 eventstream_write(Z_EVT_ZONE_READIED);
1661 else
1662 eventstream_write(Z_EVT_ZONE_HALTED);
1663 break;
1664 case Z_BOOT:
1665 /*
1666 * We could have two clients racing to boot this
1667 * zone; the second client loses, but his request
1668 * doesn't fail, since the zone is now in the desired
1669 * state.
1670 */
1671 zerror(zlogp, B_FALSE, "zone is already booted");
1672 rval = 0;
1673 break;
1674 case Z_HALT:
1675 if (kernelcall) {
1676 log_init_exit(init_status);
1677 } else {
1678 log_init_exit(-1);
1679 }
1680 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1681 != 0)
1682 break;
1683 eventstream_write(Z_EVT_ZONE_HALTED);
1684 break;
1685 case Z_REBOOT:
1686 (void) strlcpy(boot_args, zargp->bootbuf,
1687 sizeof (boot_args));
1688 eventstream_write(Z_EVT_ZONE_REBOOTING);
1689 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1690 != 0) {
1691 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1692 boot_args[0] = '\0';
1693 break;
1694 }
1695 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1696 != 0) {
1697 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1698 boot_args[0] = '\0';
1699 break;
1700 }
1701 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1702 audit_put_record(zlogp, uc, rval, "reboot");
1703 if (rval != 0) {
1704 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1705 zstate);
1706 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1707 }
1708 boot_args[0] = '\0';
1709 break;
1710 case Z_SHUTDOWN:
1711 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1712 wait_shut = B_TRUE;
1713 }
1714 break;
1715 case Z_NOTE_UNINSTALLING:
1716 case Z_MOUNT:
1717 case Z_UNMOUNT:
1718 zerror(zlogp, B_FALSE, "%s operation is invalid "
1719 "for zones in state '%s'", z_cmd_name(cmd),
1720 zone_state_str(zstate));
1721 rval = -1;
1722 break;
1723 }
1724 break;
1725 default:
1726 abort();
1727 }
1728
1729 /*
1730 * Because the state of the zone may have changed, we make sure
1731 * to wake the console poller, which is in charge of initiating
1732 * the shutdown procedure as necessary.
1733 */
1734 eventstream_write(Z_EVT_NULL);
1735
1736 out:
1737 (void) mutex_unlock(&lock);
1738
1739 /* Wait for the Z_SHUTDOWN commands to complete */
1740 if (wait_shut)
1741 rval = zone_wait_shutdown(zlogp);
1742
1743 if (kernelcall) {
1744 rvalp = NULL;
1745 rlen = 0;
1746 } else {
1747 rvalp->rval = rval;
1748 }
1749 if (uc != NULL)
1750 ucred_free(uc);
1751 (void) door_return((char *)rvalp, rlen, NULL, 0);
1752 thr_exit(NULL);
1753 }
1754
1755 static int
1756 setup_door(zlog_t *zlogp)
1757 {
1758 if ((zone_door = door_create(server, NULL,
1759 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1760 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1761 return (-1);
1762 }
1763 (void) fdetach(zone_door_path);
1764
1765 if (fattach(zone_door, zone_door_path) != 0) {
1766 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1767 (void) door_revoke(zone_door);
1768 (void) fdetach(zone_door_path);
1769 zone_door = -1;
1770 return (-1);
1771 }
1772 return (0);
1773 }
1774
1775 /*
1776 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1777 * is where zoneadmd itself will check to see that another instance of
1778 * zoneadmd isn't already controlling this zone.
1779 *
1780 * The idea here is that we want to open the path to which we will
1781 * attach our door, lock it, and then make sure that no-one has beat us
1782 * to fattach(3c)ing onto it.
1783 *
1784 * fattach(3c) is really a mount, so there are actually two possible
1785 * vnodes we could be dealing with. Our strategy is as follows:
1786 *
1787 * - If the file we opened is a regular file (common case):
1788 * There is no fattach(3c)ed door, so we have a chance of becoming
1789 * the managing zoneadmd. We attempt to lock the file: if it is
1790 * already locked, that means someone else raced us here, so we
1791 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1792 * that beat us to it.
1793 *
1794 * - If the file we opened is a namefs file:
1795 * This means there is already an established door fattach(3c)'ed
1796 * to the rendezvous path. We've lost the race, so we give up.
1797 * Note that in this case we also try to grab the file lock, and
1798 * will succeed in acquiring it since the vnode locked by the
1799 * "winning" zoneadmd was a regular one, and the one we locked was
1800 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1801 * we just return to zoneadm(1m) which knows to retry.
1802 */
1803 static int
1804 make_daemon_exclusive(zlog_t *zlogp)
1805 {
1806 int doorfd = -1;
1807 int err, ret = -1;
1808 struct stat st;
1809 struct flock flock;
1810 zone_state_t zstate;
1811
1812 top:
1813 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1814 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1815 zonecfg_strerror(err));
1816 goto out;
1817 }
1818 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1819 S_IREAD|S_IWRITE)) < 0) {
1820 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1821 goto out;
1822 }
1823 if (fstat(doorfd, &st) < 0) {
1824 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1825 goto out;
1826 }
1827 /*
1828 * Lock the file to synchronize with other zoneadmd
1829 */
1830 flock.l_type = F_WRLCK;
1831 flock.l_whence = SEEK_SET;
1832 flock.l_start = (off_t)0;
1833 flock.l_len = (off_t)0;
1834 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1835 /*
1836 * Someone else raced us here and grabbed the lock file
1837 * first. A warning here is inappropriate since nothing
1838 * went wrong.
1839 */
1840 goto out;
1841 }
1842
1843 if (strcmp(st.st_fstype, "namefs") == 0) {
1844 struct door_info info;
1845
1846 /*
1847 * There is already something fattach()'ed to this file.
1848 * Lets see what the door is up to.
1849 */
1850 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1851 /*
1852 * Another zoneadmd process seems to be in
1853 * control of the situation and we don't need to
1854 * be here. A warning here is inappropriate
1855 * since nothing went wrong.
1856 *
1857 * If the door has been revoked, the zoneadmd
1858 * process currently managing the zone is going
1859 * away. We'll return control to zoneadm(1m)
1860 * which will try again (by which time zoneadmd
1861 * will hopefully have exited).
1862 */
1863 goto out;
1864 }
1865
1866 /*
1867 * If we got this far, there's a fattach(3c)'ed door
1868 * that belongs to a process that has exited, which can
1869 * happen if the previous zoneadmd died unexpectedly.
1870 *
1871 * Let user know that something is amiss, but that we can
1872 * recover; if the zone is in the installed state, then don't
1873 * message, since having a running zoneadmd isn't really
1874 * expected/needed. We want to keep occurences of this message
1875 * limited to times when zoneadmd is picking back up from a
1876 * zoneadmd that died while the zone was in some non-trivial
1877 * state.
1878 */
1879 if (zstate > ZONE_STATE_INSTALLED) {
1880 static zoneid_t zid;
1881
1882 zerror(zlogp, B_FALSE,
1883 "zone '%s': WARNING: zone is in state '%s', but "
1884 "zoneadmd does not appear to be available; "
1885 "restarted zoneadmd to recover.",
1886 zone_name, zone_state_str(zstate));
1887
1888 /*
1889 * Startup a thread to perform the zfd logging/tty svc
1890 * and a thread to perform memory capping for the
1891 * zone. zlogp won't be valid for much longer so use
1892 * logsys.
1893 */
1894 if ((zid = getzoneidbyname(zone_name)) != -1) {
1895 create_log_thread(&logsys, zid);
1896 create_mcap_thread(&logsys, zid);
1897 }
1898
1899 /* recover the global configuration snapshot */
1900 if (snap_hndl == NULL) {
1901 if ((snap_hndl = zonecfg_init_handle())
1902 == NULL ||
1903 zonecfg_create_snapshot(zone_name)
1904 != Z_OK ||
1905 zonecfg_get_snapshot_handle(zone_name,
1906 snap_hndl) != Z_OK) {
1907 zerror(zlogp, B_FALSE, "recovering "
1908 "zone configuration handle");
1909 goto out;
1910 }
1911 }
1912 }
1913
1914 (void) fdetach(zone_door_path);
1915 (void) close(doorfd);
1916 goto top;
1917 }
1918 ret = 0;
1919 out:
1920 (void) close(doorfd);
1921 return (ret);
1922 }
1923
1924 /*
1925 * Setup the brand's pre and post state change callbacks, as well as the
1926 * query callback, if any of these exist.
1927 */
1928 static int
1929 brand_callback_init(brand_handle_t bh, char *zone_name)
1930 {
1931 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1932 sizeof (pre_statechg_hook));
1933
1934 if (brand_get_prestatechange(bh, zone_name, zonepath,
1935 pre_statechg_hook + EXEC_LEN,
1936 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1937 return (-1);
1938
1939 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1940 pre_statechg_hook[0] = '\0';
1941
1942 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1943 sizeof (post_statechg_hook));
1944
1945 if (brand_get_poststatechange(bh, zone_name, zonepath,
1946 post_statechg_hook + EXEC_LEN,
1947 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1948 return (-1);
1949
1950 if (strlen(post_statechg_hook) <= EXEC_LEN)
1951 post_statechg_hook[0] = '\0';
1952
1953 (void) strlcpy(query_hook, EXEC_PREFIX,
1954 sizeof (query_hook));
1955
1956 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
1957 sizeof (query_hook) - EXEC_LEN) != 0)
1958 return (-1);
1959
1960 if (strlen(query_hook) <= EXEC_LEN)
1961 query_hook[0] = '\0';
1962
1963 return (0);
1964 }
1965
1966 int
1967 main(int argc, char *argv[])
1968 {
1969 int opt;
1970 zoneid_t zid;
1971 priv_set_t *privset;
1972 zone_state_t zstate;
1973 char parents_locale[MAXPATHLEN];
1974 brand_handle_t bh;
1975 int err;
1976
1977 pid_t pid;
1978 sigset_t blockset;
1979 sigset_t block_cld;
1980
1981 struct {
1982 sema_t sem;
1983 int status;
1984 zlog_t log;
1985 } *shstate;
1986 size_t shstatelen = getpagesize();
1987
1988 zlog_t errlog;
1989 zlog_t *zlogp;
1990
1991 int ctfd;
1992
1993 progname = get_execbasename(argv[0]);
1994
1995 /*
1996 * Make sure stderr is unbuffered
1997 */
1998 (void) setbuffer(stderr, NULL, 0);
1999
2000 /*
2001 * Get out of the way of mounted filesystems, since we will daemonize
2002 * soon.
2003 */
2004 (void) chdir("/");
2005
2006 /*
2007 * Use the default system umask per PSARC 1998/110 rather than
2008 * anything that may have been set by the caller.
2009 */
2010 (void) umask(CMASK);
2011
2012 /*
2013 * Initially we want to use our parent's locale.
2014 */
2015 (void) setlocale(LC_ALL, "");
2016 (void) textdomain(TEXT_DOMAIN);
2017 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2018 sizeof (parents_locale));
2019
2020 /*
2021 * This zlog_t is used for writing to stderr
2022 */
2023 errlog.logfile = stderr;
2024 errlog.buflen = errlog.loglen = 0;
2025 errlog.buf = errlog.log = NULL;
2026 errlog.locale = parents_locale;
2027
2028 /*
2029 * We start off writing to stderr until we're ready to daemonize.
2030 */
2031 zlogp = &errlog;
2032
2033 /*
2034 * Process options.
2035 */
2036 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2037 switch (opt) {
2038 case 'R':
2039 zonecfg_set_root(optarg);
2040 break;
2041 case 'z':
2042 zone_name = optarg;
2043 break;
2044 default:
2045 usage();
2046 }
2047 }
2048
2049 if (zone_name == NULL)
2050 usage();
2051
2052 /*
2053 * Because usage() prints directly to stderr, it has gettext()
2054 * wrapping, which depends on the locale. But since zerror() calls
2055 * localize() which tweaks the locale, it is not safe to call zerror()
2056 * until after the last call to usage(). Fortunately, the last call
2057 * to usage() is just above and the first call to zerror() is just
2058 * below. Don't mess this up.
2059 */
2060 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2061 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2062 GLOBAL_ZONENAME);
2063 return (1);
2064 }
2065
2066 if (zone_get_id(zone_name, &zid) != 0) {
2067 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2068 zonecfg_strerror(Z_NO_ZONE));
2069 return (1);
2070 }
2071
2072 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2073 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2074 zonecfg_strerror(err));
2075 return (1);
2076 }
2077 if (zstate < ZONE_STATE_INCOMPLETE) {
2078 zerror(zlogp, B_FALSE,
2079 "cannot manage a zone which is in state '%s'",
2080 zone_state_str(zstate));
2081 return (1);
2082 }
2083
2084 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2085 zerror(zlogp, B_FALSE, "unable to determine zone path");
2086 return (-1);
2087 }
2088
2089 if (zonecfg_default_brand(default_brand,
2090 sizeof (default_brand)) != Z_OK) {
2091 zerror(zlogp, B_FALSE, "unable to determine default brand");
2092 return (1);
2093 }
2094
2095 /* Get a handle to the brand info for this zone */
2096 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2097 != Z_OK) {
2098 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2099 return (1);
2100 }
2101 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2102 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2103
2104 /*
2105 * In the alternate root environment, the only supported
2106 * operations are mount and unmount. In this case, just treat
2107 * the zone as native if it is cluster. Cluster zones can be
2108 * native for the purpose of LU or upgrade, and the cluster
2109 * brand may not exist in the miniroot (such as in net install
2110 * upgrade).
2111 */
2112 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2113 zone_iscluster = B_TRUE;
2114 if (zonecfg_in_alt_root()) {
2115 (void) strlcpy(brand_name, default_brand,
2116 sizeof (brand_name));
2117 }
2118 } else {
2119 zone_iscluster = B_FALSE;
2120 }
2121
2122 if ((bh = brand_open(brand_name)) == NULL) {
2123 zerror(zlogp, B_FALSE, "unable to open zone brand");
2124 return (1);
2125 }
2126
2127 /* Get state change brand hooks. */
2128 if (brand_callback_init(bh, zone_name) == -1) {
2129 zerror(zlogp, B_TRUE,
2130 "failed to initialize brand state change hooks");
2131 brand_close(bh);
2132 return (1);
2133 }
2134
2135 brand_close(bh);
2136
2137 /*
2138 * Check that we have all privileges. It would be nice to pare
2139 * this down, but this is at least a first cut.
2140 */
2141 if ((privset = priv_allocset()) == NULL) {
2142 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2143 return (1);
2144 }
2145
2146 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2147 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2148 priv_freeset(privset);
2149 return (1);
2150 }
2151
2152 if (priv_isfullset(privset) == B_FALSE) {
2153 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2154 "run this command (all privs required)");
2155 priv_freeset(privset);
2156 return (1);
2157 }
2158 priv_freeset(privset);
2159
2160 if (mkzonedir(zlogp) != 0)
2161 return (1);
2162
2163 /*
2164 * Pre-fork: setup shared state
2165 */
2166 if ((shstate = (void *)mmap(NULL, shstatelen,
2167 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2168 MAP_FAILED) {
2169 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2170 return (1);
2171 }
2172 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2173 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2174 (void) munmap((char *)shstate, shstatelen);
2175 return (1);
2176 }
2177 shstate->log.logfile = NULL;
2178 shstate->log.buflen = shstatelen - sizeof (*shstate);
2179 shstate->log.loglen = shstate->log.buflen;
2180 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2181 shstate->log.log = shstate->log.buf;
2182 shstate->log.locale = parents_locale;
2183 shstate->status = -1;
2184
2185 /*
2186 * We need a SIGCHLD handler so the sema_wait() below will wake
2187 * up if the child dies without doing a sema_post().
2188 */
2189 (void) sigset(SIGCHLD, sigchld);
2190 /*
2191 * We must mask SIGCHLD until after we've coped with the fork
2192 * sufficiently to deal with it; otherwise we can race and
2193 * receive the signal before pid has been initialized
2194 * (yes, this really happens).
2195 */
2196 (void) sigemptyset(&block_cld);
2197 (void) sigaddset(&block_cld, SIGCHLD);
2198 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2199
2200 /*
2201 * The parent only needs stderr after the fork, so close other fd's
2202 * that we inherited from zoneadm so that the parent doesn't have those
2203 * open while waiting. The child will close the rest after the fork.
2204 */
2205 closefrom(3);
2206
2207 if ((ctfd = init_template()) == -1) {
2208 zerror(zlogp, B_TRUE, "failed to create contract");
2209 return (1);
2210 }
2211
2212 /*
2213 * Do not let another thread localize a message while we are forking.
2214 */
2215 (void) mutex_lock(&msglock);
2216 pid = fork();
2217 (void) mutex_unlock(&msglock);
2218
2219 /*
2220 * In all cases (parent, child, and in the event of an error) we
2221 * don't want to cause creation of contracts on subsequent fork()s.
2222 */
2223 (void) ct_tmpl_clear(ctfd);
2224 (void) close(ctfd);
2225
2226 if (pid == -1) {
2227 zerror(zlogp, B_TRUE, "could not fork");
2228 return (1);
2229
2230 } else if (pid > 0) { /* parent */
2231 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2232 /*
2233 * This marks a window of vulnerability in which we receive
2234 * the SIGCLD before falling into sema_wait (normally we would
2235 * get woken up from sema_wait with EINTR upon receipt of
2236 * SIGCLD). So we may need to use some other scheme like
2237 * sema_posting in the sigcld handler.
2238 * blech
2239 */
2240 (void) sema_wait(&shstate->sem);
2241 (void) sema_destroy(&shstate->sem);
2242 if (shstate->status != 0)
2243 (void) waitpid(pid, NULL, WNOHANG);
2244 /*
2245 * It's ok if we die with SIGPIPE. It's not like we could have
2246 * done anything about it.
2247 */
2248 (void) fprintf(stderr, "%s", shstate->log.buf);
2249 _exit(shstate->status == 0 ? 0 : 1);
2250 }
2251
2252 /*
2253 * The child charges on.
2254 */
2255 (void) sigset(SIGCHLD, SIG_DFL);
2256 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2257
2258 /*
2259 * SIGPIPE can be delivered if we write to a socket for which the
2260 * peer endpoint is gone. That can lead to too-early termination
2261 * of zoneadmd, and that's not good eats.
2262 */
2263 (void) sigset(SIGPIPE, SIG_IGN);
2264 /*
2265 * Stop using stderr
2266 */
2267 zlogp = &shstate->log;
2268
2269 /*
2270 * We don't need stdout/stderr from now on.
2271 */
2272 closefrom(0);
2273
2274 /*
2275 * Initialize the syslog zlog_t. This needs to be done after
2276 * the call to closefrom().
2277 */
2278 logsys.buf = logsys.log = NULL;
2279 logsys.buflen = logsys.loglen = 0;
2280 logsys.logfile = NULL;
2281 logsys.locale = DEFAULT_LOCALE;
2282
2283 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2284
2285 /*
2286 * The eventstream is used to publish state changes in the zone
2287 * from the door threads to the console I/O poller.
2288 */
2289 if (eventstream_init() == -1) {
2290 zerror(zlogp, B_TRUE, "unable to create eventstream");
2291 goto child_out;
2292 }
2293
2294 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2295 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2296
2297 /*
2298 * See if another zoneadmd is running for this zone. If not, then we
2299 * can now modify system state.
2300 */
2301 if (make_daemon_exclusive(zlogp) == -1)
2302 goto child_out;
2303
2304
2305 /*
2306 * Create/join a new session; we need to be careful of what we do with
2307 * the console from now on so we don't end up being the session leader
2308 * for the terminal we're going to be handing out.
2309 */
2310 (void) setsid();
2311
2312 /*
2313 * This thread shouldn't be receiving any signals; in particular,
2314 * SIGCHLD should be received by the thread doing the fork().
2315 */
2316 (void) sigfillset(&blockset);
2317 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2318
2319 /*
2320 * Setup the console device and get ready to serve the console;
2321 * once this has completed, we're ready to let console clients
2322 * make an attempt to connect (they will block until
2323 * serve_console_sock() below gets called, and any pending
2324 * connection is accept()ed).
2325 */
2326 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2327 goto child_out;
2328
2329 /*
2330 * Take the lock now, so that when the door server gets going, we
2331 * are guaranteed that it won't take a request until we are sure
2332 * that everything is completely set up. See the child_out: label
2333 * below to see why this matters.
2334 */
2335 (void) mutex_lock(&lock);
2336
2337 /* Init semaphore for scratch zones. */
2338 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2339 zerror(zlogp, B_TRUE,
2340 "failed to initialize semaphore for scratch zone");
2341 goto child_out;
2342 }
2343
2344 /* open the dladm handle */
2345 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2346 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2347 goto child_out;
2348 }
2349
2350 /*
2351 * Note: door setup must occur *after* the console is setup.
2352 * This is so that as zlogin tests the door to see if zoneadmd
2353 * is ready yet, we know that the console will get serviced
2354 * once door_info() indicates that the door is "up".
2355 */
2356 if (setup_door(zlogp) == -1)
2357 goto child_out;
2358
2359 /*
2360 * Things seem OK so far; tell the parent process that we're done
2361 * with setup tasks. This will cause the parent to exit, signalling
2362 * to zoneadm, zlogin, or whatever forked it that we are ready to
2363 * service requests.
2364 */
2365 shstate->status = 0;
2366 (void) sema_post(&shstate->sem);
2367 (void) munmap((char *)shstate, shstatelen);
2368 shstate = NULL;
2369
2370 (void) mutex_unlock(&lock);
2371
2372 /*
2373 * zlogp is now invalid, so reset it to the syslog logger.
2374 */
2375 zlogp = &logsys;
2376
2377 /*
2378 * Now that we are free of any parents, switch to the default locale.
2379 */
2380 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2381
2382 /*
2383 * At this point the setup portion of main() is basically done, so
2384 * we reuse this thread to manage the zone console. When
2385 * serve_console() has returned, we are past the point of no return
2386 * in the life of this zoneadmd.
2387 */
2388 if (zonecfg_in_alt_root()) {
2389 /*
2390 * This is just awful, but mounted scratch zones don't (and
2391 * can't) have consoles. We just wait for unmount instead.
2392 */
2393 while (sema_wait(&scratch_sem) == EINTR)
2394 ;
2395 } else {
2396 serve_console(zlogp);
2397 assert(in_death_throes);
2398 }
2399
2400 /*
2401 * This is the next-to-last part of the exit interlock. Upon calling
2402 * fdetach(), the door will go unreferenced; once any
2403 * outstanding requests (like the door thread doing Z_HALT) are
2404 * done, the door will get an UNREF notification; when it handles
2405 * the UNREF, the door server will cause the exit. It's possible
2406 * that fdetach() can fail because the file is in use, in which
2407 * case we'll retry the operation.
2408 */
2409 assert(!MUTEX_HELD(&lock));
2410 for (;;) {
2411 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2412 break;
2413 yield();
2414 }
2415
2416 for (;;)
2417 (void) pause();
2418
2419 child_out:
2420 assert(pid == 0);
2421 if (shstate != NULL) {
2422 shstate->status = -1;
2423 (void) sema_post(&shstate->sem);
2424 (void) munmap((char *)shstate, shstatelen);
2425 }
2426
2427 /*
2428 * This might trigger an unref notification, but if so,
2429 * we are still holding the lock, so our call to exit will
2430 * ultimately win the race and will publish the right exit
2431 * code.
2432 */
2433 if (zone_door != -1) {
2434 assert(MUTEX_HELD(&lock));
2435 (void) door_revoke(zone_door);
2436 (void) fdetach(zone_door_path);
2437 }
2438
2439 if (dld_handle != NULL)
2440 dladm_close(dld_handle);
2441
2442 return (1); /* return from main() forcibly exits an MT process */
2443 }