1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 */
27
28 /*
29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 * non-global zone on the system. This daemon juggles four jobs:
31 *
32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 * unmount filesystems; create and destroy network interfaces; communicate
34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 * console device; configure process runtime attributes such as resource
36 * controls, pool bindings, fine-grained privileges.
37 *
38 * - Launch the zone's init(1M) process.
39 *
40 * - Implement a door server; clients (like zoneadm) connect to the door
41 * server and request zone state changes. The kernel is also a client of
42 * this door server. A request to halt or reboot the zone which originates
43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 *
45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 * back to the zoneadm process making the request. These messages need to
47 * be rendered in the client's locale; so, this is passed in as part of the
48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 * messages are syslog'd.
50 *
51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 * strings which do not need to be translated.
54 *
55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 * for more information about the zone console architecture.
57 *
58 * DESIGN NOTES
59 *
60 * Restart:
61 * A chief design constraint of zoneadmd is that it should be restartable in
62 * the case that the administrator kills it off, or it suffers a fatal error,
63 * without the running zone being impacted; this is akin to being able to
64 * reboot the service processor of a server without affecting the OS instance.
65 */
66
67 #include <sys/param.h>
68 #include <sys/mman.h>
69 #include <sys/types.h>
70 #include <sys/stat.h>
71 #include <sys/sysmacros.h>
72 #include <sys/time.h>
73
74 #include <bsm/adt.h>
75 #include <bsm/adt_event.h>
76
77 #include <alloca.h>
78 #include <assert.h>
79 #include <errno.h>
80 #include <door.h>
81 #include <fcntl.h>
82 #include <locale.h>
83 #include <signal.h>
84 #include <stdarg.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <strings.h>
89 #include <synch.h>
90 #include <syslog.h>
91 #include <thread.h>
92 #include <unistd.h>
93 #include <wait.h>
94 #include <limits.h>
95 #include <zone.h>
96 #include <libbrand.h>
97 #include <sys/brand.h>
98 #include <libcontract.h>
99 #include <libcontract_priv.h>
100 #include <sys/brand.h>
101 #include <sys/contract/process.h>
102 #include <sys/ctfs.h>
103 #include <libdladm.h>
104 #include <sys/dls_mgmt.h>
105 #include <libscf.h>
106
107 #include <libzonecfg.h>
108 #include <zonestat_impl.h>
109 #include "zoneadmd.h"
110
111 static char *progname;
112 char *zone_name; /* zone which we are managing */
113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 char zonepath[MAXNAMELEN];
115 char pool_name[MAXNAMELEN];
116 char default_brand[MAXNAMELEN];
117 char brand_name[MAXNAMELEN];
118 boolean_t zone_isnative;
119 boolean_t zone_iscluster;
120 boolean_t zone_islabeled;
121 boolean_t shutdown_in_progress;
122 static zoneid_t zone_id;
123 dladm_handle_t dld_handle = NULL;
124
125 static char pre_statechg_hook[2 * MAXPATHLEN];
126 static char post_statechg_hook[2 * MAXPATHLEN];
127 char query_hook[2 * MAXPATHLEN];
128
129 zlog_t logsys;
130
131 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
133
134 static sema_t scratch_sem; /* for scratch zones */
135
136 static char zone_door_path[MAXPATHLEN];
137 static int zone_door = -1;
138
139 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
141
142 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
144 #endif
145
146 #define DEFAULT_LOCALE "C"
147
148 static const char *
149 z_cmd_name(zone_cmd_t zcmd)
150 {
151 /* This list needs to match the enum in sys/zone.h */
152 static const char *zcmdstr[] = {
153 "ready", "boot", "forceboot", "reboot", "halt",
154 "note_uninstalling", "mount", "forcemount", "unmount",
155 "shutdown"
156 };
157
158 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
159 return ("unknown");
160 else
161 return (zcmdstr[(int)zcmd]);
162 }
163
164 static char *
165 get_execbasename(char *execfullname)
166 {
167 char *last_slash, *execbasename;
168
169 /* guard against '/' at end of command invocation */
170 for (;;) {
171 last_slash = strrchr(execfullname, '/');
172 if (last_slash == NULL) {
173 execbasename = execfullname;
174 break;
175 } else {
176 execbasename = last_slash + 1;
177 if (*execbasename == '\0') {
178 *last_slash = '\0';
179 continue;
180 }
181 break;
182 }
183 }
184 return (execbasename);
185 }
186
187 static void
188 usage(void)
189 {
190 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
191 (void) fprintf(stderr,
192 gettext("\tNote: %s should not be run directly.\n"), progname);
193 exit(2);
194 }
195
196 /* ARGSUSED */
197 static void
198 sigchld(int sig)
199 {
200 }
201
202 char *
203 localize_msg(char *locale, const char *msg)
204 {
205 char *out;
206
207 (void) mutex_lock(&msglock);
208 (void) setlocale(LC_MESSAGES, locale);
209 out = gettext(msg);
210 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
211 (void) mutex_unlock(&msglock);
212 return (out);
213 }
214
215 /* PRINTFLIKE3 */
216 void
217 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
218 {
219 va_list alist;
220 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
221 char *bp;
222 int saved_errno = errno;
223
224 if (zlogp == NULL)
225 return;
226 if (zlogp == &logsys)
227 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
228 zone_name);
229 else
230 buf[0] = '\0';
231 bp = &(buf[strlen(buf)]);
232
233 /*
234 * In theory, the locale pointer should be set to either "C" or a
235 * char array, so it should never be NULL
236 */
237 assert(zlogp->locale != NULL);
238 /* Locale is per process, but we are multi-threaded... */
239 fmt = localize_msg(zlogp->locale, fmt);
240
241 va_start(alist, fmt);
242 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
243 va_end(alist);
244 bp = &(buf[strlen(buf)]);
245 if (use_strerror)
246 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
247 strerror(saved_errno));
248 if (zlogp == &logsys) {
249 (void) syslog(LOG_ERR, "%s", buf);
250 } else if (zlogp->logfile != NULL) {
251 (void) fprintf(zlogp->logfile, "%s\n", buf);
252 } else {
253 size_t buflen;
254 size_t copylen;
255
256 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
257 copylen = MIN(buflen, zlogp->loglen);
258 zlogp->log += copylen;
259 zlogp->loglen -= copylen;
260 }
261 }
262
263 /*
264 * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
265 * put the arguments into an argv style array, use getopt to process them,
266 * and put the resultant argument string back into outargs. Non-Solaris brands
267 * may support alternate forms of boot arguments so we must handle that as well.
268 *
269 * During the filtering, we pull out any arguments which are truly "boot"
270 * arguments, leaving only those which are to be passed intact to the
271 * progenitor process. The one we support at the moment is -i, which
272 * indicates to the kernel which program should be launched as 'init'.
273 *
274 * Except for Z_OK, all other return values are treated as fatal.
275 */
276 static int
277 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
278 char *init_file)
279 {
280 int argc = 0, argc_save;
281 int i;
282 int err;
283 char *arg, *lasts, **argv = NULL, **argv_save;
284 char zonecfg_args[BOOTARGS_MAX];
285 char scratchargs[BOOTARGS_MAX], *sargs;
286 char c;
287
288 bzero(outargs, BOOTARGS_MAX);
289
290 /*
291 * If the user didn't specify transient boot arguments, check
292 * to see if there were any specified in the zone configuration,
293 * and use them if applicable.
294 */
295 if (inargs == NULL || inargs[0] == '\0') {
296 zone_dochandle_t handle;
297 if ((handle = zonecfg_init_handle()) == NULL) {
298 zerror(zlogp, B_TRUE,
299 "getting zone configuration handle");
300 return (Z_BAD_HANDLE);
301 }
302 err = zonecfg_get_snapshot_handle(zone_name, handle);
303 if (err != Z_OK) {
304 zerror(zlogp, B_FALSE,
305 "invalid configuration snapshot");
306 zonecfg_fini_handle(handle);
307 return (Z_BAD_HANDLE);
308 }
309
310 bzero(zonecfg_args, sizeof (zonecfg_args));
311 (void) zonecfg_get_bootargs(handle, zonecfg_args,
312 sizeof (zonecfg_args));
313 inargs = zonecfg_args;
314 zonecfg_fini_handle(handle);
315 }
316
317 if (strlen(inargs) >= BOOTARGS_MAX) {
318 zerror(zlogp, B_FALSE, "boot argument string too long");
319 return (Z_INVAL);
320 }
321
322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
323 sargs = scratchargs;
324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
325 sargs = NULL;
326 argc++;
327 }
328
329 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
330 zerror(zlogp, B_FALSE, "memory allocation failed");
331 return (Z_NOMEM);
332 }
333
334 argv_save = argv;
335 argc_save = argc;
336
337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 sargs = scratchargs;
339 i = 0;
340 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
341 sargs = NULL;
342 if ((argv[i] = strdup(arg)) == NULL) {
343 err = Z_NOMEM;
344 zerror(zlogp, B_FALSE, "memory allocation failed");
345 goto done;
346 }
347 i++;
348 }
349
350 /*
351 * We preserve compatibility with the illumos system boot behavior,
352 * which allows:
353 *
354 * # reboot kernel/unix -s -m verbose
355 *
356 * In this example, kernel/unix tells the booter what file to boot. The
357 * original intent of this was that we didn't want reboot in a zone to
358 * be gratuitously different, so we would silently ignore the boot
359 * file, if necessary. However, this usage is archaic and has never
360 * been common, since it is impossible to boot a zone onto a different
361 * kernel. Ignoring the first argument breaks for non-native brands
362 * which pass boot arguments in a different style. e.g.
363 * systemd.log_level=debug
364 * Thus, for backward compatibility we only ignore the first argument
365 * if it appears to be in the illumos form and attempting to specify a
366 * kernel.
367 */
368 if (argv[0] == NULL)
369 goto done;
370
371 assert(argv[0][0] != ' ');
372 assert(argv[0][0] != '\t');
373
374 if (strncmp(argv[0], "kernel/", 7) == 0) {
375 argv = &argv[1];
376 argc--;
377 }
378
379 optind = 0;
380 opterr = 0;
381 err = Z_OK;
382 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
383 switch (c) {
384 case 'i':
385 /*
386 * -i is handled by the runtime and is not passed
387 * along to userland
388 */
389 (void) strlcpy(init_file, optarg, MAXPATHLEN);
390 break;
391 case 'f':
392 /* This has already been processed by zoneadm */
393 break;
394 case 'm':
395 case 's':
396 /* These pass through unmolested */
397 (void) snprintf(outargs, BOOTARGS_MAX,
398 "%s -%c %s ", outargs, c, optarg ? optarg : "");
399 break;
400 case '?':
401 /*
402 * If a brand has its own init, we need to pass along
403 * whatever the user provides. We use the entire
404 * unknown string here so that we correctly handle
405 * unknown long options (e.g. --debug).
406 */
407 (void) snprintf(outargs, BOOTARGS_MAX,
408 "%s %s", outargs, argv[optind - 1]);
409 break;
410 }
411 }
412
413 /*
414 * We need to pass along everything else since we don't know what
415 * the brand's init is expecting. For example, an argument list like:
416 * --confdir /foo --debug
417 * will cause the getopt parsing to stop at '/foo' but we need to pass
418 * that on, along with the '--debug'. This does mean that we require
419 * any of our known options (-ifms) to preceed the brand-specific ones.
420 */
421 while (optind < argc) {
422 (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
423 argv[optind]);
424 optind++;
425 }
426
427 done:
428 for (i = 0; i < argc_save; i++) {
429 if (argv_save[i] != NULL)
430 free(argv_save[i]);
431 }
432 free(argv_save);
433 return (err);
434 }
435
436
437 static int
438 mkzonedir(zlog_t *zlogp)
439 {
440 struct stat st;
441 /*
442 * We must create and lock everyone but root out of ZONES_TMPDIR
443 * since anyone can open any UNIX domain socket, regardless of
444 * its file system permissions. Sigh...
445 */
446 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
447 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
448 return (-1);
449 }
450 /* paranoia */
451 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
452 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
453 return (-1);
454 }
455 (void) chmod(ZONES_TMPDIR, S_IRWXU);
456 return (0);
457 }
458
459 /*
460 * Run the brand's pre-state change callback, if it exists.
461 */
462 static int
463 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
464 {
465 char cmdbuf[2 * MAXPATHLEN];
466 const char *altroot;
467
468 if (pre_statechg_hook[0] == '\0')
469 return (0);
470
471 altroot = zonecfg_get_root();
472 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
473 state, cmd, altroot) > sizeof (cmdbuf))
474 return (-1);
475
476 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
477 return (-1);
478
479 return (0);
480 }
481
482 /*
483 * Run the brand's post-state change callback, if it exists.
484 */
485 static int
486 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
487 {
488 char cmdbuf[2 * MAXPATHLEN];
489 const char *altroot;
490
491 if (post_statechg_hook[0] == '\0')
492 return (0);
493
494 altroot = zonecfg_get_root();
495 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
496 state, cmd, altroot) > sizeof (cmdbuf))
497 return (-1);
498
499 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
500 return (-1);
501
502 return (0);
503 }
504
505 /*
506 * Notify zonestatd of the new zone. If zonestatd is not running, this
507 * will do nothing.
508 */
509 static void
510 notify_zonestatd(zoneid_t zoneid)
511 {
512 int cmd[2];
513 int fd;
514 door_arg_t params;
515
516 fd = open(ZS_DOOR_PATH, O_RDONLY);
517 if (fd < 0)
518 return;
519
520 cmd[0] = ZSD_CMD_NEW_ZONE;
521 cmd[1] = zoneid;
522 params.data_ptr = (char *)&cmd;
523 params.data_size = sizeof (cmd);
524 params.desc_ptr = NULL;
525 params.desc_num = 0;
526 params.rbuf = NULL;
527 params.rsize = NULL;
528 (void) door_call(fd, ¶ms);
529 (void) close(fd);
530 }
531
532 /*
533 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
534 * 'true' if this is being invoked as part of the processing for the "mount"
535 * subcommand.
536 */
537 static int
538 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
539 {
540 int err;
541
542 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
543 return (-1);
544
545 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
546 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
547 zonecfg_strerror(err));
548 goto bad;
549 }
550
551 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
552 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
553 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
554 zonecfg_strerror(err));
555 goto bad;
556 }
557 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
558 bringup_failure_recovery = B_TRUE;
559 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
560 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
561 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
562 zonecfg_strerror(err));
563 goto bad;
564 }
565
566 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
567 goto bad;
568
569 return (0);
570
571 bad:
572 /*
573 * If something goes wrong, we up the zones's state to the target
574 * state, READY, and then invoke the hook as if we're halting.
575 */
576 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
577 return (-1);
578 }
579
580 int
581 init_template(void)
582 {
583 int fd;
584 int err = 0;
585
586 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
587 if (fd == -1)
588 return (-1);
589
590 /*
591 * For now, zoneadmd doesn't do anything with the contract.
592 * Deliver no events, don't inherit, and allow it to be orphaned.
593 */
594 err |= ct_tmpl_set_critical(fd, 0);
595 err |= ct_tmpl_set_informative(fd, 0);
596 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
597 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
598 if (err || ct_tmpl_activate(fd)) {
599 (void) close(fd);
600 return (-1);
601 }
602
603 return (fd);
604 }
605
606 typedef struct fs_callback {
607 zlog_t *zlogp;
608 zoneid_t zoneid;
609 boolean_t mount_cmd;
610 } fs_callback_t;
611
612 static int
613 mount_early_fs(void *data, const char *spec, const char *dir,
614 const char *fstype, const char *opt)
615 {
616 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
617 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
618 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
619 char rootpath[MAXPATHLEN];
620 pid_t child;
621 int child_status;
622 int tmpl_fd;
623 int rv;
624 ctid_t ct;
625
626 /* determine the zone rootpath */
627 if (mount_cmd) {
628 char luroot[MAXPATHLEN];
629
630 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
631 resolve_lofs(zlogp, luroot, sizeof (luroot));
632 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
633 } else {
634 if (zone_get_rootpath(zone_name,
635 rootpath, sizeof (rootpath)) != Z_OK) {
636 zerror(zlogp, B_FALSE, "unable to determine zone root");
637 return (-1);
638 }
639 }
640
641 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
642 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
643 rootpath, dir);
644 return (-1);
645 } else if (rv > 0) {
646 /* The mount point path doesn't exist, create it now. */
647 if (make_one_dir(zlogp, rootpath, dir,
648 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
649 DEFAULT_DIR_GROUP) != 0) {
650 zerror(zlogp, B_FALSE, "failed to create mount point");
651 return (-1);
652 }
653
654 /*
655 * Now this might seem weird, but we need to invoke
656 * valid_mount_path() again. Why? Because it checks
657 * to make sure that the mount point path is canonical,
658 * which it can only do if the path exists, so now that
659 * we've created the path we have to verify it again.
660 */
661 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
662 fstype)) < 0) {
663 zerror(zlogp, B_FALSE,
664 "%s%s is not a valid mount point", rootpath, dir);
665 return (-1);
666 }
667 }
668
669 if ((tmpl_fd = init_template()) == -1) {
670 zerror(zlogp, B_TRUE, "failed to create contract");
671 return (-1);
672 }
673
674 if ((child = fork()) == -1) {
675 (void) ct_tmpl_clear(tmpl_fd);
676 (void) close(tmpl_fd);
677 zerror(zlogp, B_TRUE, "failed to fork");
678 return (-1);
679
680 } else if (child == 0) { /* child */
681 char opt_buf[MAX_MNTOPT_STR];
682 int optlen = 0;
683 int mflag = MS_DATA;
684
685 (void) ct_tmpl_clear(tmpl_fd);
686 /*
687 * Even though there are no procs running in the zone, we
688 * do this for paranoia's sake.
689 */
690 (void) closefrom(0);
691
692 if (zone_enter(zoneid) == -1) {
693 _exit(errno);
694 }
695 if (opt != NULL) {
696 /*
697 * The mount() system call is incredibly annoying.
698 * If options are specified, we need to copy them
699 * into a temporary buffer since the mount() system
700 * call will overwrite the options string. It will
701 * also fail if the new option string it wants to
702 * write is bigger than the one we passed in, so
703 * you must pass in a buffer of the maximum possible
704 * option string length. sigh.
705 */
706 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
707 opt = opt_buf;
708 optlen = MAX_MNTOPT_STR;
709 mflag = MS_OPTIONSTR;
710 }
711 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
712 _exit(errno);
713 _exit(0);
714 }
715
716 /* parent */
717 if (contract_latest(&ct) == -1)
718 ct = -1;
719 (void) ct_tmpl_clear(tmpl_fd);
720 (void) close(tmpl_fd);
721 if (waitpid(child, &child_status, 0) != child) {
722 /* unexpected: we must have been signalled */
723 (void) contract_abandon_id(ct);
724 return (-1);
725 }
726 (void) contract_abandon_id(ct);
727 if (WEXITSTATUS(child_status) != 0) {
728 errno = WEXITSTATUS(child_status);
729 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
730 return (-1);
731 }
732
733 return (0);
734 }
735
736 /*
737 * If retstr is not NULL, the output of the subproc is returned in the str,
738 * otherwise it is output using zerror(). Any memory allocated for retstr
739 * should be freed by the caller.
740 */
741 int
742 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
743 {
744 char buf[1024]; /* arbitrary large amount */
745 char *inbuf;
746 FILE *file;
747 int status;
748 int rd_cnt;
749
750 if (retstr != NULL) {
751 if ((*retstr = malloc(1024)) == NULL) {
752 zerror(zlogp, B_FALSE, "out of memory");
753 return (-1);
754 }
755 inbuf = *retstr;
756 rd_cnt = 0;
757 } else {
758 inbuf = buf;
759 }
760
761 file = popen(cmdbuf, "r");
762 if (file == NULL) {
763 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
764 return (-1);
765 }
766
767 while (fgets(inbuf, 1024, file) != NULL) {
768 if (retstr == NULL) {
769 if (zlogp != &logsys)
770 zerror(zlogp, B_FALSE, "%s", inbuf);
771 } else {
772 char *p;
773
774 rd_cnt += 1024 - 1;
775 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
776 zerror(zlogp, B_FALSE, "out of memory");
777 (void) pclose(file);
778 return (-1);
779 }
780
781 *retstr = p;
782 inbuf = *retstr + rd_cnt;
783 }
784 }
785 status = pclose(file);
786
787 if (WIFSIGNALED(status)) {
788 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
789 "signal %d", cmdbuf, WTERMSIG(status));
790 return (-1);
791 }
792 assert(WIFEXITED(status));
793 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
794 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
795 return (-1);
796 }
797 return (WEXITSTATUS(status));
798 }
799
800 #if 0 /* XXX KEBE SAYS not yet */
801 /*
802 * Get the path for this zone's init(1M) (or equivalent) process. First look
803 * for a zone-specific init-name attr, then get it from the brand.
804 */
805 static int
806 get_initname(brand_handle_t bh, char *initname, int len)
807 {
808 struct zone_attrtab a;
809
810 bzero(&a, sizeof (a));
811 (void) strlcpy(a.zone_attr_name, "init-name",
812 sizeof (a.zone_attr_name));
813
814 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
815 (void) strlcpy(initname, a.zone_attr_value, len);
816 return (0);
817 }
818
819 return (brand_get_initname(bh, initname, len));
820 }
821
822 /*
823 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
824 * First look for a zone-specific restart-init attr, then get it from the brand.
825 */
826 static boolean_t
827 restartinit(brand_handle_t bh)
828 {
829 struct zone_attrtab a;
830
831 bzero(&a, sizeof (a));
832 (void) strlcpy(a.zone_attr_name, "restart-init",
833 sizeof (a.zone_attr_name));
834
835 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
836 if (strcmp(a.zone_attr_value, "false") == 0)
837 return (B_FALSE);
838 return (B_TRUE);
839 }
840
841 return (brand_restartinit(bh));
842 }
843 #endif /* XXX KEBE */
844
845 /*
846 * Get the app-svc-dependent flag for this zone's init process. This is a
847 * zone-specific attr which controls the type of contract we create for the
848 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
849 * set, so that when any service which is in the same contract exits, the init
850 * application will be terminated.
851 */
852 static boolean_t
853 is_app_svc_dep(brand_handle_t bh)
854 {
855 struct zone_attrtab a;
856
857 bzero(&a, sizeof (a));
858 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
859 sizeof (a.zone_attr_name));
860
861 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
862 strcmp(a.zone_attr_value, "true") == 0) {
863 return (B_TRUE);
864 }
865
866 return (B_FALSE);
867 }
868
869 static int
870 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
871 {
872 zoneid_t zoneid;
873 struct stat st;
874 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
875 char nbootargs[BOOTARGS_MAX];
876 char cmdbuf[MAXPATHLEN];
877 fs_callback_t cb;
878 brand_handle_t bh;
879 zone_iptype_t iptype;
880 dladm_status_t status;
881 char errmsg[DLADM_STRSIZE];
882 int err;
883 boolean_t restart_init;
884 boolean_t app_svc_dep;
885
886 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
887 return (-1);
888
889 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
890 zerror(zlogp, B_TRUE, "unable to get zoneid");
891 goto bad;
892 }
893
894 cb.zlogp = zlogp;
895 cb.zoneid = zoneid;
896 cb.mount_cmd = B_FALSE;
897
898 /* Get a handle to the brand info for this zone */
899 if ((bh = brand_open(brand_name)) == NULL) {
900 zerror(zlogp, B_FALSE, "unable to determine zone brand");
901 goto bad;
902 }
903
904 /*
905 * Get the list of filesystems to mount from the brand
906 * configuration. These mounts are done via a thread that will
907 * enter the zone, so they are done from within the context of the
908 * zone.
909 */
910 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
911 zerror(zlogp, B_FALSE, "unable to mount filesystems");
912 brand_close(bh);
913 goto bad;
914 }
915
916 /*
917 * Get the brand's boot callback if it exists.
918 */
919 (void) strcpy(cmdbuf, EXEC_PREFIX);
920 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
921 sizeof (cmdbuf) - EXEC_LEN) != 0) {
922 zerror(zlogp, B_FALSE,
923 "unable to determine branded zone's boot callback");
924 brand_close(bh);
925 goto bad;
926 }
927
928 /* Get the path for this zone's init(1M) (or equivalent) process. */
929 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
930 zerror(zlogp, B_FALSE,
931 "unable to determine zone's init(1M) location");
932 brand_close(bh);
933 goto bad;
934 }
935
936 /* See if this zone's brand should restart init if it dies. */
937 restart_init = brand_restartinit(bh);
938
939 /*
940 * See if we need to setup contract dependencies between the zone's
941 * primary application and any of its services.
942 */
943 app_svc_dep = is_app_svc_dep(bh);
944
945 brand_close(bh);
946
947 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
948 if (err != Z_OK)
949 goto bad;
950
951 assert(init_file[0] != '\0');
952
953 /*
954 * Try to anticipate possible problems: If possible, make sure init is
955 * executable.
956 */
957 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
958 zerror(zlogp, B_FALSE, "unable to determine zone root");
959 goto bad;
960 }
961
962 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
963
964 if (lstat(initpath, &st) == -1) {
965 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
966 goto bad;
967 }
968
969 if ((st.st_mode & S_IFMT) == S_IFLNK) {
970 /* symlink, we'll have to wait and resolve when we boot */
971 } else if ((st.st_mode & S_IXUSR) == 0) {
972 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
973 goto bad;
974 }
975
976 /*
977 * Exclusive stack zones interact with the dlmgmtd running in the
978 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
979 * booting, and loads its datalinks from the zone's datalink
980 * configuration file.
981 */
982 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
983 status = dladm_zone_boot(dld_handle, zoneid);
984 if (status != DLADM_STATUS_OK) {
985 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
986 " %s", dladm_status2str(status, errmsg));
987 goto bad;
988 }
989 }
990
991 /*
992 * If there is a brand 'boot' callback, execute it now to give the
993 * brand one last chance to do any additional setup before the zone
994 * is booted.
995 */
996 if ((strlen(cmdbuf) > EXEC_LEN) &&
997 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
998 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
999 goto bad;
1000 }
1001
1002 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1003 zerror(zlogp, B_TRUE, "could not set zone boot file");
1004 goto bad;
1005 }
1006
1007 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1008 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1009 goto bad;
1010 }
1011
1012 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1013 NULL, 0) == -1) {
1014 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1015 goto bad;
1016 }
1017
1018 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1019 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1020 zerror(zlogp, B_TRUE, "could not set zone app-die");
1021 goto bad;
1022 }
1023
1024 /*
1025 * Inform zonestatd of a new zone so that it can install a door for
1026 * the zone to contact it.
1027 */
1028 notify_zonestatd(zone_id);
1029
1030 if (zone_boot(zoneid) == -1) {
1031 zerror(zlogp, B_TRUE, "unable to boot zone");
1032 goto bad;
1033 }
1034
1035 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1036 goto bad;
1037
1038 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1039 create_log_thread(zlogp, zone_id);
1040
1041 /* Startup a thread to perform memory capping for the zone. */
1042 create_mcap_thread(zlogp, zone_id);
1043
1044 return (0);
1045
1046 bad:
1047 /*
1048 * If something goes wrong, we up the zones's state to the target
1049 * state, RUNNING, and then invoke the hook as if we're halting.
1050 */
1051 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1052
1053 return (-1);
1054 }
1055
1056 static int
1057 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1058 {
1059 int err;
1060
1061 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1062 return (-1);
1063
1064 /* Shutting down, stop the memcap thread */
1065 destroy_mcap_thread();
1066
1067 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1068 if (!bringup_failure_recovery)
1069 zerror(zlogp, B_FALSE, "unable to destroy zone");
1070 destroy_log_thread();
1071 return (-1);
1072 }
1073
1074 /* Shut down is done, stop the log thread */
1075 destroy_log_thread();
1076
1077 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1078 return (-1);
1079
1080 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1081 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1082 zonecfg_strerror(err));
1083
1084 return (0);
1085 }
1086
1087 static int
1088 zone_graceful_shutdown(zlog_t *zlogp)
1089 {
1090 zoneid_t zoneid;
1091 pid_t child;
1092 char cmdbuf[MAXPATHLEN];
1093 brand_handle_t bh = NULL;
1094 ctid_t ct;
1095 int tmpl_fd;
1096 int child_status;
1097
1098 if (shutdown_in_progress) {
1099 zerror(zlogp, B_FALSE, "shutdown already in progress");
1100 return (-1);
1101 }
1102
1103 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1104 zerror(zlogp, B_TRUE, "unable to get zoneid");
1105 return (-1);
1106 }
1107
1108 /* Get a handle to the brand info for this zone */
1109 if ((bh = brand_open(brand_name)) == NULL) {
1110 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1111 return (-1);
1112 }
1113
1114 /*
1115 * If there is a brand 'shutdown' callback, execute it now to give the
1116 * brand a chance to cleanup any custom configuration.
1117 */
1118 (void) strcpy(cmdbuf, EXEC_PREFIX);
1119 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1120 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1121 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1122 }
1123 brand_close(bh);
1124
1125 if ((tmpl_fd = init_template()) == -1) {
1126 zerror(zlogp, B_TRUE, "failed to create contract");
1127 return (-1);
1128 }
1129
1130 if ((child = fork()) == -1) {
1131 (void) ct_tmpl_clear(tmpl_fd);
1132 (void) close(tmpl_fd);
1133 zerror(zlogp, B_TRUE, "failed to fork");
1134 return (-1);
1135 } else if (child == 0) {
1136 (void) ct_tmpl_clear(tmpl_fd);
1137 if (zone_enter(zoneid) == -1) {
1138 _exit(errno);
1139 }
1140 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1141 }
1142
1143 if (contract_latest(&ct) == -1)
1144 ct = -1;
1145 (void) ct_tmpl_clear(tmpl_fd);
1146 (void) close(tmpl_fd);
1147
1148 if (waitpid(child, &child_status, 0) != child) {
1149 /* unexpected: we must have been signalled */
1150 (void) contract_abandon_id(ct);
1151 return (-1);
1152 }
1153
1154 (void) contract_abandon_id(ct);
1155 if (WEXITSTATUS(child_status) != 0) {
1156 errno = WEXITSTATUS(child_status);
1157 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1158 return (-1);
1159 }
1160
1161 shutdown_in_progress = B_TRUE;
1162
1163 return (0);
1164 }
1165
1166 static int
1167 zone_wait_shutdown(zlog_t *zlogp)
1168 {
1169 zone_state_t zstate;
1170 uint64_t *tm = NULL;
1171 scf_simple_prop_t *prop = NULL;
1172 int timeout;
1173 int tries;
1174 int rc = -1;
1175
1176 /* Get default stop timeout from SMF framework */
1177 timeout = SHUTDOWN_WAIT;
1178 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1179 SCF_PROPERTY_TIMEOUT)) != NULL) {
1180 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1181 if (tm != 0)
1182 timeout = *tm;
1183 }
1184 scf_simple_prop_free(prop);
1185 }
1186
1187 /* allow time for zone to shutdown cleanly */
1188 for (tries = 0; tries < timeout; tries ++) {
1189 (void) sleep(1);
1190 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1191 zstate == ZONE_STATE_INSTALLED) {
1192 rc = 0;
1193 break;
1194 }
1195 }
1196
1197 if (rc != 0)
1198 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1199
1200 shutdown_in_progress = B_FALSE;
1201
1202 return (rc);
1203 }
1204
1205
1206
1207 /*
1208 * Generate AUE_zone_state for a command that boots a zone.
1209 */
1210 static void
1211 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1212 char *new_state)
1213 {
1214 adt_session_data_t *ah;
1215 adt_event_data_t *event;
1216 int pass_fail, fail_reason;
1217
1218 if (!adt_audit_enabled())
1219 return;
1220
1221 if (return_val == 0) {
1222 pass_fail = ADT_SUCCESS;
1223 fail_reason = ADT_SUCCESS;
1224 } else {
1225 pass_fail = ADT_FAILURE;
1226 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1227 }
1228
1229 if (adt_start_session(&ah, NULL, 0)) {
1230 zerror(zlogp, B_TRUE, gettext("audit failure."));
1231 return;
1232 }
1233 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1234 zerror(zlogp, B_TRUE, gettext("audit failure."));
1235 (void) adt_end_session(ah);
1236 return;
1237 }
1238
1239 event = adt_alloc_event(ah, ADT_zone_state);
1240 if (event == NULL) {
1241 zerror(zlogp, B_TRUE, gettext("audit failure."));
1242 (void) adt_end_session(ah);
1243 return;
1244 }
1245 event->adt_zone_state.zonename = zone_name;
1246 event->adt_zone_state.new_state = new_state;
1247
1248 if (adt_put_event(event, pass_fail, fail_reason))
1249 zerror(zlogp, B_TRUE, gettext("audit failure."));
1250
1251 adt_free_event(event);
1252
1253 (void) adt_end_session(ah);
1254 }
1255
1256 /*
1257 * Log the exit time and status of the zone's init process into
1258 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1259 * be -1, otherwise it will be the exit status as described in wait.3c.
1260 * If the zone is configured to restart init, then nothing will be logged if
1261 * init exits unexpectedly (the kernel will never upcall in this case).
1262 */
1263 static void
1264 log_init_exit(int status)
1265 {
1266 char p[MAXPATHLEN];
1267 char buf[128];
1268 struct timeval t;
1269 int fd;
1270
1271 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1272 return;
1273 if (gettimeofday(&t, NULL) != 0)
1274 return;
1275 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1276 status) > sizeof (buf))
1277 return;
1278 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1279 return;
1280
1281 (void) write(fd, buf, strlen(buf));
1282
1283 (void) close(fd);
1284 }
1285
1286 /*
1287 * The main routine for the door server that deals with zone state transitions.
1288 */
1289 /* ARGSUSED */
1290 static void
1291 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1292 uint_t n_desc)
1293 {
1294 ucred_t *uc = NULL;
1295 const priv_set_t *eset;
1296
1297 zone_state_t zstate;
1298 zone_cmd_t cmd;
1299 boolean_t debug;
1300 int init_status;
1301 zone_cmd_arg_t *zargp;
1302
1303 boolean_t kernelcall;
1304
1305 int rval = -1;
1306 uint64_t uniqid;
1307 zoneid_t zoneid = -1;
1308 zlog_t zlog;
1309 zlog_t *zlogp;
1310 zone_cmd_rval_t *rvalp;
1311 size_t rlen = getpagesize(); /* conservative */
1312 fs_callback_t cb;
1313 brand_handle_t bh;
1314 boolean_t wait_shut = B_FALSE;
1315
1316 /* LINTED E_BAD_PTR_CAST_ALIGN */
1317 zargp = (zone_cmd_arg_t *)args;
1318
1319 /*
1320 * When we get the door unref message, we've fdetach'd the door, and
1321 * it is time for us to shut down zoneadmd.
1322 */
1323 if (zargp == DOOR_UNREF_DATA) {
1324 /*
1325 * See comment at end of main() for info on the last rites.
1326 */
1327 exit(0);
1328 }
1329
1330 if (zargp == NULL) {
1331 (void) door_return(NULL, 0, 0, 0);
1332 }
1333
1334 rvalp = alloca(rlen);
1335 bzero(rvalp, rlen);
1336 zlog.logfile = NULL;
1337 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1338 zlog.buf = rvalp->errbuf;
1339 zlog.log = zlog.buf;
1340 /* defer initialization of zlog.locale until after credential check */
1341 zlogp = &zlog;
1342
1343 if (alen != sizeof (zone_cmd_arg_t)) {
1344 /*
1345 * This really shouldn't be happening.
1346 */
1347 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1348 "unexpected (expected %d bytes)", alen,
1349 sizeof (zone_cmd_arg_t));
1350 goto out;
1351 }
1352 cmd = zargp->cmd;
1353 debug = zargp->debug;
1354 init_status = zargp->status;
1355
1356 if (door_ucred(&uc) != 0) {
1357 zerror(&logsys, B_TRUE, "door_ucred");
1358 goto out;
1359 }
1360 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1361 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1362 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1363 ucred_geteuid(uc) != 0)) {
1364 zerror(&logsys, B_FALSE, "insufficient privileges");
1365 goto out;
1366 }
1367
1368 kernelcall = ucred_getpid(uc) == 0;
1369
1370 /*
1371 * This is safe because we only use a zlog_t throughout the
1372 * duration of a door call; i.e., by the time the pointer
1373 * might become invalid, the door call would be over.
1374 */
1375 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1376
1377 (void) mutex_lock(&lock);
1378
1379 /*
1380 * Once we start to really die off, we don't want more connections.
1381 */
1382 if (in_death_throes) {
1383 (void) mutex_unlock(&lock);
1384 ucred_free(uc);
1385 (void) door_return(NULL, 0, 0, 0);
1386 thr_exit(NULL);
1387 }
1388
1389 /*
1390 * Check for validity of command.
1391 */
1392 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1393 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1394 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1395 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1396 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1397 goto out;
1398 }
1399
1400 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1401 /*
1402 * Can't happen
1403 */
1404 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1405 cmd);
1406 goto out;
1407 }
1408 /*
1409 * We ignore the possibility of someone calling zone_create(2)
1410 * explicitly; all requests must come through zoneadmd.
1411 */
1412 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1413 /*
1414 * Something terribly wrong happened
1415 */
1416 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1417 goto out;
1418 }
1419
1420 if (kernelcall) {
1421 /*
1422 * Kernel-initiated requests may lose their validity if the
1423 * zone_t the kernel was referring to has gone away.
1424 */
1425 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1426 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1427 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1428 /*
1429 * We're not talking about the same zone. The request
1430 * must have arrived too late. Return error.
1431 */
1432 rval = -1;
1433 goto out;
1434 }
1435 zlogp = &logsys; /* Log errors to syslog */
1436 }
1437
1438 /*
1439 * If we are being asked to forcibly mount or boot a zone, we
1440 * pretend that an INCOMPLETE zone is actually INSTALLED.
1441 */
1442 if (zstate == ZONE_STATE_INCOMPLETE &&
1443 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1444 zstate = ZONE_STATE_INSTALLED;
1445
1446 switch (zstate) {
1447 case ZONE_STATE_CONFIGURED:
1448 case ZONE_STATE_INCOMPLETE:
1449 /*
1450 * Not our area of expertise; we just print a nice message
1451 * and die off.
1452 */
1453 zerror(zlogp, B_FALSE,
1454 "%s operation is invalid for zones in state '%s'",
1455 z_cmd_name(cmd), zone_state_str(zstate));
1456 break;
1457
1458 case ZONE_STATE_INSTALLED:
1459 switch (cmd) {
1460 case Z_READY:
1461 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1462 if (rval == 0)
1463 eventstream_write(Z_EVT_ZONE_READIED);
1464 break;
1465 case Z_BOOT:
1466 case Z_FORCEBOOT:
1467 eventstream_write(Z_EVT_ZONE_BOOTING);
1468 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1469 == 0) {
1470 rval = zone_bootup(zlogp, zargp->bootbuf,
1471 zstate);
1472 }
1473 audit_put_record(zlogp, uc, rval, "boot");
1474 if (rval != 0) {
1475 bringup_failure_recovery = B_TRUE;
1476 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1477 zstate);
1478 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1479 }
1480 break;
1481 case Z_SHUTDOWN:
1482 case Z_HALT:
1483 if (kernelcall) /* Invalid; can't happen */
1484 abort();
1485 /*
1486 * We could have two clients racing to halt this
1487 * zone; the second client loses, but his request
1488 * doesn't fail, since the zone is now in the desired
1489 * state.
1490 */
1491 zerror(zlogp, B_FALSE, "zone is already halted");
1492 rval = 0;
1493 break;
1494 case Z_REBOOT:
1495 if (kernelcall) /* Invalid; can't happen */
1496 abort();
1497 zerror(zlogp, B_FALSE, "%s operation is invalid "
1498 "for zones in state '%s'", z_cmd_name(cmd),
1499 zone_state_str(zstate));
1500 rval = -1;
1501 break;
1502 case Z_NOTE_UNINSTALLING:
1503 if (kernelcall) /* Invalid; can't happen */
1504 abort();
1505 /*
1506 * Tell the console to print out a message about this.
1507 * Once it does, we will be in_death_throes.
1508 */
1509 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1510 break;
1511 case Z_MOUNT:
1512 case Z_FORCEMOUNT:
1513 if (kernelcall) /* Invalid; can't happen */
1514 abort();
1515 if (!zone_isnative && !zone_iscluster &&
1516 !zone_islabeled) {
1517 /*
1518 * -U mounts the zone without lofs mounting
1519 * zone file systems back into the scratch
1520 * zone. This is required when mounting
1521 * non-native branded zones.
1522 */
1523 (void) strlcpy(zargp->bootbuf, "-U",
1524 BOOTARGS_MAX);
1525 }
1526
1527 rval = zone_ready(zlogp,
1528 strcmp(zargp->bootbuf, "-U") == 0 ?
1529 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1530 if (rval != 0)
1531 break;
1532
1533 eventstream_write(Z_EVT_ZONE_READIED);
1534
1535 /*
1536 * Get a handle to the default brand info.
1537 * We must always use the default brand file system
1538 * list when mounting the zone.
1539 */
1540 if ((bh = brand_open(default_brand)) == NULL) {
1541 rval = -1;
1542 break;
1543 }
1544
1545 /*
1546 * Get the list of filesystems to mount from
1547 * the brand configuration. These mounts are done
1548 * via a thread that will enter the zone, so they
1549 * are done from within the context of the zone.
1550 */
1551 cb.zlogp = zlogp;
1552 cb.zoneid = zone_id;
1553 cb.mount_cmd = B_TRUE;
1554 rval = brand_platform_iter_mounts(bh,
1555 mount_early_fs, &cb);
1556
1557 brand_close(bh);
1558
1559 /*
1560 * Ordinarily, /dev/fd would be mounted inside the zone
1561 * by svc:/system/filesystem/usr:default, but since
1562 * we're not booting the zone, we need to do this
1563 * manually.
1564 */
1565 if (rval == 0)
1566 rval = mount_early_fs(&cb,
1567 "fd", "/dev/fd", "fd", NULL);
1568 break;
1569 case Z_UNMOUNT:
1570 if (kernelcall) /* Invalid; can't happen */
1571 abort();
1572 zerror(zlogp, B_FALSE, "zone is already unmounted");
1573 rval = 0;
1574 break;
1575 }
1576 break;
1577
1578 case ZONE_STATE_READY:
1579 switch (cmd) {
1580 case Z_READY:
1581 /*
1582 * We could have two clients racing to ready this
1583 * zone; the second client loses, but his request
1584 * doesn't fail, since the zone is now in the desired
1585 * state.
1586 */
1587 zerror(zlogp, B_FALSE, "zone is already ready");
1588 rval = 0;
1589 break;
1590 case Z_BOOT:
1591 (void) strlcpy(boot_args, zargp->bootbuf,
1592 sizeof (boot_args));
1593 eventstream_write(Z_EVT_ZONE_BOOTING);
1594 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1595 audit_put_record(zlogp, uc, rval, "boot");
1596 if (rval != 0) {
1597 bringup_failure_recovery = B_TRUE;
1598 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1599 zstate);
1600 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1601 }
1602 boot_args[0] = '\0';
1603 break;
1604 case Z_HALT:
1605 if (kernelcall) /* Invalid; can't happen */
1606 abort();
1607 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1608 != 0)
1609 break;
1610 eventstream_write(Z_EVT_ZONE_HALTED);
1611 break;
1612 case Z_SHUTDOWN:
1613 case Z_REBOOT:
1614 case Z_NOTE_UNINSTALLING:
1615 case Z_MOUNT:
1616 case Z_UNMOUNT:
1617 if (kernelcall) /* Invalid; can't happen */
1618 abort();
1619 zerror(zlogp, B_FALSE, "%s operation is invalid "
1620 "for zones in state '%s'", z_cmd_name(cmd),
1621 zone_state_str(zstate));
1622 rval = -1;
1623 break;
1624 }
1625 break;
1626
1627 case ZONE_STATE_MOUNTED:
1628 switch (cmd) {
1629 case Z_UNMOUNT:
1630 if (kernelcall) /* Invalid; can't happen */
1631 abort();
1632 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1633 if (rval == 0) {
1634 eventstream_write(Z_EVT_ZONE_HALTED);
1635 (void) sema_post(&scratch_sem);
1636 }
1637 break;
1638 default:
1639 if (kernelcall) /* Invalid; can't happen */
1640 abort();
1641 zerror(zlogp, B_FALSE, "%s operation is invalid "
1642 "for zones in state '%s'", z_cmd_name(cmd),
1643 zone_state_str(zstate));
1644 rval = -1;
1645 break;
1646 }
1647 break;
1648
1649 case ZONE_STATE_RUNNING:
1650 case ZONE_STATE_SHUTTING_DOWN:
1651 case ZONE_STATE_DOWN:
1652 switch (cmd) {
1653 case Z_READY:
1654 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1655 != 0)
1656 break;
1657 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1658 eventstream_write(Z_EVT_ZONE_READIED);
1659 else
1660 eventstream_write(Z_EVT_ZONE_HALTED);
1661 break;
1662 case Z_BOOT:
1663 /*
1664 * We could have two clients racing to boot this
1665 * zone; the second client loses, but his request
1666 * doesn't fail, since the zone is now in the desired
1667 * state.
1668 */
1669 zerror(zlogp, B_FALSE, "zone is already booted");
1670 rval = 0;
1671 break;
1672 case Z_HALT:
1673 if (kernelcall) {
1674 log_init_exit(init_status);
1675 } else {
1676 log_init_exit(-1);
1677 }
1678 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1679 != 0)
1680 break;
1681 eventstream_write(Z_EVT_ZONE_HALTED);
1682 break;
1683 case Z_REBOOT:
1684 (void) strlcpy(boot_args, zargp->bootbuf,
1685 sizeof (boot_args));
1686 eventstream_write(Z_EVT_ZONE_REBOOTING);
1687 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1688 != 0) {
1689 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1690 boot_args[0] = '\0';
1691 break;
1692 }
1693 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1694 != 0) {
1695 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1696 boot_args[0] = '\0';
1697 break;
1698 }
1699 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1700 audit_put_record(zlogp, uc, rval, "reboot");
1701 if (rval != 0) {
1702 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1703 zstate);
1704 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1705 }
1706 boot_args[0] = '\0';
1707 break;
1708 case Z_SHUTDOWN:
1709 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1710 wait_shut = B_TRUE;
1711 }
1712 break;
1713 case Z_NOTE_UNINSTALLING:
1714 case Z_MOUNT:
1715 case Z_UNMOUNT:
1716 zerror(zlogp, B_FALSE, "%s operation is invalid "
1717 "for zones in state '%s'", z_cmd_name(cmd),
1718 zone_state_str(zstate));
1719 rval = -1;
1720 break;
1721 }
1722 break;
1723 default:
1724 abort();
1725 }
1726
1727 /*
1728 * Because the state of the zone may have changed, we make sure
1729 * to wake the console poller, which is in charge of initiating
1730 * the shutdown procedure as necessary.
1731 */
1732 eventstream_write(Z_EVT_NULL);
1733
1734 out:
1735 (void) mutex_unlock(&lock);
1736
1737 /* Wait for the Z_SHUTDOWN commands to complete */
1738 if (wait_shut)
1739 rval = zone_wait_shutdown(zlogp);
1740
1741 if (kernelcall) {
1742 rvalp = NULL;
1743 rlen = 0;
1744 } else {
1745 rvalp->rval = rval;
1746 }
1747 if (uc != NULL)
1748 ucred_free(uc);
1749 (void) door_return((char *)rvalp, rlen, NULL, 0);
1750 thr_exit(NULL);
1751 }
1752
1753 static int
1754 setup_door(zlog_t *zlogp)
1755 {
1756 if ((zone_door = door_create(server, NULL,
1757 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1758 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1759 return (-1);
1760 }
1761 (void) fdetach(zone_door_path);
1762
1763 if (fattach(zone_door, zone_door_path) != 0) {
1764 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1765 (void) door_revoke(zone_door);
1766 (void) fdetach(zone_door_path);
1767 zone_door = -1;
1768 return (-1);
1769 }
1770 return (0);
1771 }
1772
1773 /*
1774 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1775 * is where zoneadmd itself will check to see that another instance of
1776 * zoneadmd isn't already controlling this zone.
1777 *
1778 * The idea here is that we want to open the path to which we will
1779 * attach our door, lock it, and then make sure that no-one has beat us
1780 * to fattach(3c)ing onto it.
1781 *
1782 * fattach(3c) is really a mount, so there are actually two possible
1783 * vnodes we could be dealing with. Our strategy is as follows:
1784 *
1785 * - If the file we opened is a regular file (common case):
1786 * There is no fattach(3c)ed door, so we have a chance of becoming
1787 * the managing zoneadmd. We attempt to lock the file: if it is
1788 * already locked, that means someone else raced us here, so we
1789 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1790 * that beat us to it.
1791 *
1792 * - If the file we opened is a namefs file:
1793 * This means there is already an established door fattach(3c)'ed
1794 * to the rendezvous path. We've lost the race, so we give up.
1795 * Note that in this case we also try to grab the file lock, and
1796 * will succeed in acquiring it since the vnode locked by the
1797 * "winning" zoneadmd was a regular one, and the one we locked was
1798 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1799 * we just return to zoneadm(1m) which knows to retry.
1800 */
1801 static int
1802 make_daemon_exclusive(zlog_t *zlogp)
1803 {
1804 int doorfd = -1;
1805 int err, ret = -1;
1806 struct stat st;
1807 struct flock flock;
1808 zone_state_t zstate;
1809
1810 top:
1811 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1812 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1813 zonecfg_strerror(err));
1814 goto out;
1815 }
1816 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1817 S_IREAD|S_IWRITE)) < 0) {
1818 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1819 goto out;
1820 }
1821 if (fstat(doorfd, &st) < 0) {
1822 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1823 goto out;
1824 }
1825 /*
1826 * Lock the file to synchronize with other zoneadmd
1827 */
1828 flock.l_type = F_WRLCK;
1829 flock.l_whence = SEEK_SET;
1830 flock.l_start = (off_t)0;
1831 flock.l_len = (off_t)0;
1832 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1833 /*
1834 * Someone else raced us here and grabbed the lock file
1835 * first. A warning here is inappropriate since nothing
1836 * went wrong.
1837 */
1838 goto out;
1839 }
1840
1841 if (strcmp(st.st_fstype, "namefs") == 0) {
1842 struct door_info info;
1843
1844 /*
1845 * There is already something fattach()'ed to this file.
1846 * Lets see what the door is up to.
1847 */
1848 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1849 /*
1850 * Another zoneadmd process seems to be in
1851 * control of the situation and we don't need to
1852 * be here. A warning here is inappropriate
1853 * since nothing went wrong.
1854 *
1855 * If the door has been revoked, the zoneadmd
1856 * process currently managing the zone is going
1857 * away. We'll return control to zoneadm(1m)
1858 * which will try again (by which time zoneadmd
1859 * will hopefully have exited).
1860 */
1861 goto out;
1862 }
1863
1864 /*
1865 * If we got this far, there's a fattach(3c)'ed door
1866 * that belongs to a process that has exited, which can
1867 * happen if the previous zoneadmd died unexpectedly.
1868 *
1869 * Let user know that something is amiss, but that we can
1870 * recover; if the zone is in the installed state, then don't
1871 * message, since having a running zoneadmd isn't really
1872 * expected/needed. We want to keep occurences of this message
1873 * limited to times when zoneadmd is picking back up from a
1874 * zoneadmd that died while the zone was in some non-trivial
1875 * state.
1876 */
1877 if (zstate > ZONE_STATE_INSTALLED) {
1878 static zoneid_t zid;
1879
1880 zerror(zlogp, B_FALSE,
1881 "zone '%s': WARNING: zone is in state '%s', but "
1882 "zoneadmd does not appear to be available; "
1883 "restarted zoneadmd to recover.",
1884 zone_name, zone_state_str(zstate));
1885
1886 /*
1887 * Startup a thread to perform the zfd logging/tty svc
1888 * and a thread to perform memory capping for the
1889 * zone. zlogp won't be valid for much longer so use
1890 * logsys.
1891 */
1892 if ((zid = getzoneidbyname(zone_name)) != -1) {
1893 create_log_thread(&logsys, zid);
1894 create_mcap_thread(&logsys, zid);
1895 }
1896
1897 /* recover the global configuration snapshot */
1898 if (snap_hndl == NULL) {
1899 if ((snap_hndl = zonecfg_init_handle())
1900 == NULL ||
1901 zonecfg_create_snapshot(zone_name)
1902 != Z_OK ||
1903 zonecfg_get_snapshot_handle(zone_name,
1904 snap_hndl) != Z_OK) {
1905 zerror(zlogp, B_FALSE, "recovering "
1906 "zone configuration handle");
1907 goto out;
1908 }
1909 }
1910 }
1911
1912 (void) fdetach(zone_door_path);
1913 (void) close(doorfd);
1914 goto top;
1915 }
1916 ret = 0;
1917 out:
1918 (void) close(doorfd);
1919 return (ret);
1920 }
1921
1922 /*
1923 * Setup the brand's pre and post state change callbacks, as well as the
1924 * query callback, if any of these exist.
1925 */
1926 static int
1927 brand_callback_init(brand_handle_t bh, char *zone_name)
1928 {
1929 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1930 sizeof (pre_statechg_hook));
1931
1932 if (brand_get_prestatechange(bh, zone_name, zonepath,
1933 pre_statechg_hook + EXEC_LEN,
1934 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1935 return (-1);
1936
1937 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1938 pre_statechg_hook[0] = '\0';
1939
1940 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1941 sizeof (post_statechg_hook));
1942
1943 if (brand_get_poststatechange(bh, zone_name, zonepath,
1944 post_statechg_hook + EXEC_LEN,
1945 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1946 return (-1);
1947
1948 if (strlen(post_statechg_hook) <= EXEC_LEN)
1949 post_statechg_hook[0] = '\0';
1950
1951 (void) strlcpy(query_hook, EXEC_PREFIX,
1952 sizeof (query_hook));
1953
1954 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
1955 sizeof (query_hook) - EXEC_LEN) != 0)
1956 return (-1);
1957
1958 if (strlen(query_hook) <= EXEC_LEN)
1959 query_hook[0] = '\0';
1960
1961 return (0);
1962 }
1963
1964 int
1965 main(int argc, char *argv[])
1966 {
1967 int opt;
1968 zoneid_t zid;
1969 priv_set_t *privset;
1970 zone_state_t zstate;
1971 char parents_locale[MAXPATHLEN];
1972 brand_handle_t bh;
1973 int err;
1974
1975 pid_t pid;
1976 sigset_t blockset;
1977 sigset_t block_cld;
1978
1979 struct {
1980 sema_t sem;
1981 int status;
1982 zlog_t log;
1983 } *shstate;
1984 size_t shstatelen = getpagesize();
1985
1986 zlog_t errlog;
1987 zlog_t *zlogp;
1988
1989 int ctfd;
1990
1991 progname = get_execbasename(argv[0]);
1992
1993 /*
1994 * Make sure stderr is unbuffered
1995 */
1996 (void) setbuffer(stderr, NULL, 0);
1997
1998 /*
1999 * Get out of the way of mounted filesystems, since we will daemonize
2000 * soon.
2001 */
2002 (void) chdir("/");
2003
2004 /*
2005 * Use the default system umask per PSARC 1998/110 rather than
2006 * anything that may have been set by the caller.
2007 */
2008 (void) umask(CMASK);
2009
2010 /*
2011 * Initially we want to use our parent's locale.
2012 */
2013 (void) setlocale(LC_ALL, "");
2014 (void) textdomain(TEXT_DOMAIN);
2015 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2016 sizeof (parents_locale));
2017
2018 /*
2019 * This zlog_t is used for writing to stderr
2020 */
2021 errlog.logfile = stderr;
2022 errlog.buflen = errlog.loglen = 0;
2023 errlog.buf = errlog.log = NULL;
2024 errlog.locale = parents_locale;
2025
2026 /*
2027 * We start off writing to stderr until we're ready to daemonize.
2028 */
2029 zlogp = &errlog;
2030
2031 /*
2032 * Process options.
2033 */
2034 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2035 switch (opt) {
2036 case 'R':
2037 zonecfg_set_root(optarg);
2038 break;
2039 case 'z':
2040 zone_name = optarg;
2041 break;
2042 default:
2043 usage();
2044 }
2045 }
2046
2047 if (zone_name == NULL)
2048 usage();
2049
2050 /*
2051 * Because usage() prints directly to stderr, it has gettext()
2052 * wrapping, which depends on the locale. But since zerror() calls
2053 * localize() which tweaks the locale, it is not safe to call zerror()
2054 * until after the last call to usage(). Fortunately, the last call
2055 * to usage() is just above and the first call to zerror() is just
2056 * below. Don't mess this up.
2057 */
2058 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2059 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2060 GLOBAL_ZONENAME);
2061 return (1);
2062 }
2063
2064 if (zone_get_id(zone_name, &zid) != 0) {
2065 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2066 zonecfg_strerror(Z_NO_ZONE));
2067 return (1);
2068 }
2069
2070 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2071 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2072 zonecfg_strerror(err));
2073 return (1);
2074 }
2075 if (zstate < ZONE_STATE_INCOMPLETE) {
2076 zerror(zlogp, B_FALSE,
2077 "cannot manage a zone which is in state '%s'",
2078 zone_state_str(zstate));
2079 return (1);
2080 }
2081
2082 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2083 zerror(zlogp, B_FALSE, "unable to determine zone path");
2084 return (-1);
2085 }
2086
2087 if (zonecfg_default_brand(default_brand,
2088 sizeof (default_brand)) != Z_OK) {
2089 zerror(zlogp, B_FALSE, "unable to determine default brand");
2090 return (1);
2091 }
2092
2093 /* Get a handle to the brand info for this zone */
2094 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2095 != Z_OK) {
2096 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2097 return (1);
2098 }
2099 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2100 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2101
2102 /*
2103 * In the alternate root environment, the only supported
2104 * operations are mount and unmount. In this case, just treat
2105 * the zone as native if it is cluster. Cluster zones can be
2106 * native for the purpose of LU or upgrade, and the cluster
2107 * brand may not exist in the miniroot (such as in net install
2108 * upgrade).
2109 */
2110 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2111 zone_iscluster = B_TRUE;
2112 if (zonecfg_in_alt_root()) {
2113 (void) strlcpy(brand_name, default_brand,
2114 sizeof (brand_name));
2115 }
2116 } else {
2117 zone_iscluster = B_FALSE;
2118 }
2119
2120 if ((bh = brand_open(brand_name)) == NULL) {
2121 zerror(zlogp, B_FALSE, "unable to open zone brand");
2122 return (1);
2123 }
2124
2125 /* Get state change brand hooks. */
2126 if (brand_callback_init(bh, zone_name) == -1) {
2127 zerror(zlogp, B_TRUE,
2128 "failed to initialize brand state change hooks");
2129 brand_close(bh);
2130 return (1);
2131 }
2132
2133 brand_close(bh);
2134
2135 /*
2136 * Check that we have all privileges. It would be nice to pare
2137 * this down, but this is at least a first cut.
2138 */
2139 if ((privset = priv_allocset()) == NULL) {
2140 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2141 return (1);
2142 }
2143
2144 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2145 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2146 priv_freeset(privset);
2147 return (1);
2148 }
2149
2150 if (priv_isfullset(privset) == B_FALSE) {
2151 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2152 "run this command (all privs required)");
2153 priv_freeset(privset);
2154 return (1);
2155 }
2156 priv_freeset(privset);
2157
2158 if (mkzonedir(zlogp) != 0)
2159 return (1);
2160
2161 /*
2162 * Pre-fork: setup shared state
2163 */
2164 if ((shstate = (void *)mmap(NULL, shstatelen,
2165 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2166 MAP_FAILED) {
2167 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2168 return (1);
2169 }
2170 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2171 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2172 (void) munmap((char *)shstate, shstatelen);
2173 return (1);
2174 }
2175 shstate->log.logfile = NULL;
2176 shstate->log.buflen = shstatelen - sizeof (*shstate);
2177 shstate->log.loglen = shstate->log.buflen;
2178 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2179 shstate->log.log = shstate->log.buf;
2180 shstate->log.locale = parents_locale;
2181 shstate->status = -1;
2182
2183 /*
2184 * We need a SIGCHLD handler so the sema_wait() below will wake
2185 * up if the child dies without doing a sema_post().
2186 */
2187 (void) sigset(SIGCHLD, sigchld);
2188 /*
2189 * We must mask SIGCHLD until after we've coped with the fork
2190 * sufficiently to deal with it; otherwise we can race and
2191 * receive the signal before pid has been initialized
2192 * (yes, this really happens).
2193 */
2194 (void) sigemptyset(&block_cld);
2195 (void) sigaddset(&block_cld, SIGCHLD);
2196 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2197
2198 /*
2199 * The parent only needs stderr after the fork, so close other fd's
2200 * that we inherited from zoneadm so that the parent doesn't have those
2201 * open while waiting. The child will close the rest after the fork.
2202 */
2203 closefrom(3);
2204
2205 if ((ctfd = init_template()) == -1) {
2206 zerror(zlogp, B_TRUE, "failed to create contract");
2207 return (1);
2208 }
2209
2210 /*
2211 * Do not let another thread localize a message while we are forking.
2212 */
2213 (void) mutex_lock(&msglock);
2214 pid = fork();
2215 (void) mutex_unlock(&msglock);
2216
2217 /*
2218 * In all cases (parent, child, and in the event of an error) we
2219 * don't want to cause creation of contracts on subsequent fork()s.
2220 */
2221 (void) ct_tmpl_clear(ctfd);
2222 (void) close(ctfd);
2223
2224 if (pid == -1) {
2225 zerror(zlogp, B_TRUE, "could not fork");
2226 return (1);
2227
2228 } else if (pid > 0) { /* parent */
2229 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2230 /*
2231 * This marks a window of vulnerability in which we receive
2232 * the SIGCLD before falling into sema_wait (normally we would
2233 * get woken up from sema_wait with EINTR upon receipt of
2234 * SIGCLD). So we may need to use some other scheme like
2235 * sema_posting in the sigcld handler.
2236 * blech
2237 */
2238 (void) sema_wait(&shstate->sem);
2239 (void) sema_destroy(&shstate->sem);
2240 if (shstate->status != 0)
2241 (void) waitpid(pid, NULL, WNOHANG);
2242 /*
2243 * It's ok if we die with SIGPIPE. It's not like we could have
2244 * done anything about it.
2245 */
2246 (void) fprintf(stderr, "%s", shstate->log.buf);
2247 _exit(shstate->status == 0 ? 0 : 1);
2248 }
2249
2250 /*
2251 * The child charges on.
2252 */
2253 (void) sigset(SIGCHLD, SIG_DFL);
2254 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2255
2256 /*
2257 * SIGPIPE can be delivered if we write to a socket for which the
2258 * peer endpoint is gone. That can lead to too-early termination
2259 * of zoneadmd, and that's not good eats.
2260 */
2261 (void) sigset(SIGPIPE, SIG_IGN);
2262 /*
2263 * Stop using stderr
2264 */
2265 zlogp = &shstate->log;
2266
2267 /*
2268 * We don't need stdout/stderr from now on.
2269 */
2270 closefrom(0);
2271
2272 /*
2273 * Initialize the syslog zlog_t. This needs to be done after
2274 * the call to closefrom().
2275 */
2276 logsys.buf = logsys.log = NULL;
2277 logsys.buflen = logsys.loglen = 0;
2278 logsys.logfile = NULL;
2279 logsys.locale = DEFAULT_LOCALE;
2280
2281 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2282
2283 /*
2284 * The eventstream is used to publish state changes in the zone
2285 * from the door threads to the console I/O poller.
2286 */
2287 if (eventstream_init() == -1) {
2288 zerror(zlogp, B_TRUE, "unable to create eventstream");
2289 goto child_out;
2290 }
2291
2292 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2293 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2294
2295 /*
2296 * See if another zoneadmd is running for this zone. If not, then we
2297 * can now modify system state.
2298 */
2299 if (make_daemon_exclusive(zlogp) == -1)
2300 goto child_out;
2301
2302
2303 /*
2304 * Create/join a new session; we need to be careful of what we do with
2305 * the console from now on so we don't end up being the session leader
2306 * for the terminal we're going to be handing out.
2307 */
2308 (void) setsid();
2309
2310 /*
2311 * This thread shouldn't be receiving any signals; in particular,
2312 * SIGCHLD should be received by the thread doing the fork().
2313 */
2314 (void) sigfillset(&blockset);
2315 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2316
2317 /*
2318 * Setup the console device and get ready to serve the console;
2319 * once this has completed, we're ready to let console clients
2320 * make an attempt to connect (they will block until
2321 * serve_console_sock() below gets called, and any pending
2322 * connection is accept()ed).
2323 */
2324 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2325 goto child_out;
2326
2327 /*
2328 * Take the lock now, so that when the door server gets going, we
2329 * are guaranteed that it won't take a request until we are sure
2330 * that everything is completely set up. See the child_out: label
2331 * below to see why this matters.
2332 */
2333 (void) mutex_lock(&lock);
2334
2335 /* Init semaphore for scratch zones. */
2336 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2337 zerror(zlogp, B_TRUE,
2338 "failed to initialize semaphore for scratch zone");
2339 goto child_out;
2340 }
2341
2342 /* open the dladm handle */
2343 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2344 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2345 goto child_out;
2346 }
2347
2348 /*
2349 * Note: door setup must occur *after* the console is setup.
2350 * This is so that as zlogin tests the door to see if zoneadmd
2351 * is ready yet, we know that the console will get serviced
2352 * once door_info() indicates that the door is "up".
2353 */
2354 if (setup_door(zlogp) == -1)
2355 goto child_out;
2356
2357 /*
2358 * Things seem OK so far; tell the parent process that we're done
2359 * with setup tasks. This will cause the parent to exit, signalling
2360 * to zoneadm, zlogin, or whatever forked it that we are ready to
2361 * service requests.
2362 */
2363 shstate->status = 0;
2364 (void) sema_post(&shstate->sem);
2365 (void) munmap((char *)shstate, shstatelen);
2366 shstate = NULL;
2367
2368 (void) mutex_unlock(&lock);
2369
2370 /*
2371 * zlogp is now invalid, so reset it to the syslog logger.
2372 */
2373 zlogp = &logsys;
2374
2375 /*
2376 * Now that we are free of any parents, switch to the default locale.
2377 */
2378 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2379
2380 /*
2381 * At this point the setup portion of main() is basically done, so
2382 * we reuse this thread to manage the zone console. When
2383 * serve_console() has returned, we are past the point of no return
2384 * in the life of this zoneadmd.
2385 */
2386 if (zonecfg_in_alt_root()) {
2387 /*
2388 * This is just awful, but mounted scratch zones don't (and
2389 * can't) have consoles. We just wait for unmount instead.
2390 */
2391 while (sema_wait(&scratch_sem) == EINTR)
2392 ;
2393 } else {
2394 serve_console(zlogp);
2395 assert(in_death_throes);
2396 }
2397
2398 /*
2399 * This is the next-to-last part of the exit interlock. Upon calling
2400 * fdetach(), the door will go unreferenced; once any
2401 * outstanding requests (like the door thread doing Z_HALT) are
2402 * done, the door will get an UNREF notification; when it handles
2403 * the UNREF, the door server will cause the exit. It's possible
2404 * that fdetach() can fail because the file is in use, in which
2405 * case we'll retry the operation.
2406 */
2407 assert(!MUTEX_HELD(&lock));
2408 for (;;) {
2409 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2410 break;
2411 yield();
2412 }
2413
2414 for (;;)
2415 (void) pause();
2416
2417 child_out:
2418 assert(pid == 0);
2419 if (shstate != NULL) {
2420 shstate->status = -1;
2421 (void) sema_post(&shstate->sem);
2422 (void) munmap((char *)shstate, shstatelen);
2423 }
2424
2425 /*
2426 * This might trigger an unref notification, but if so,
2427 * we are still holding the lock, so our call to exit will
2428 * ultimately win the race and will publish the right exit
2429 * code.
2430 */
2431 if (zone_door != -1) {
2432 assert(MUTEX_HELD(&lock));
2433 (void) door_revoke(zone_door);
2434 (void) fdetach(zone_door_path);
2435 }
2436
2437 if (dld_handle != NULL)
2438 dladm_close(dld_handle);
2439
2440 return (1); /* return from main() forcibly exits an MT process */
2441 }