1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 */
27
28 /*
29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 * non-global zone on the system. This daemon juggles four jobs:
31 *
32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 * unmount filesystems; create and destroy network interfaces; communicate
34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 * console device; configure process runtime attributes such as resource
36 * controls, pool bindings, fine-grained privileges.
37 *
38 * - Launch the zone's init(1M) process.
39 *
40 * - Implement a door server; clients (like zoneadm) connect to the door
41 * server and request zone state changes. The kernel is also a client of
42 * this door server. A request to halt or reboot the zone which originates
43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 *
45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 * back to the zoneadm process making the request. These messages need to
47 * be rendered in the client's locale; so, this is passed in as part of the
48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 * messages are syslog'd.
50 *
51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 * strings which do not need to be translated.
54 *
55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 * for more information about the zone console architecture.
57 *
58 * DESIGN NOTES
59 *
60 * Restart:
61 * A chief design constraint of zoneadmd is that it should be restartable in
62 * the case that the administrator kills it off, or it suffers a fatal error,
63 * without the running zone being impacted; this is akin to being able to
64 * reboot the service processor of a server without affecting the OS instance.
65 */
66
67 #include <sys/param.h>
68 #include <sys/mman.h>
69 #include <sys/types.h>
70 #include <sys/stat.h>
71 #include <sys/sysmacros.h>
72 #include <sys/time.h>
73
74 #include <bsm/adt.h>
75 #include <bsm/adt_event.h>
76
77 #include <alloca.h>
78 #include <assert.h>
79 #include <errno.h>
80 #include <door.h>
81 #include <fcntl.h>
82 #include <locale.h>
83 #include <signal.h>
84 #include <stdarg.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <strings.h>
89 #include <synch.h>
90 #include <syslog.h>
91 #include <thread.h>
92 #include <unistd.h>
93 #include <wait.h>
94 #include <limits.h>
95 #include <zone.h>
96 #include <libbrand.h>
97 #include <sys/brand.h>
98 #include <libcontract.h>
99 #include <libcontract_priv.h>
100 #include <sys/brand.h>
101 #include <sys/contract/process.h>
102 #include <sys/ctfs.h>
103 #include <libdladm.h>
104 #include <sys/dls_mgmt.h>
105 #include <libscf.h>
106
107 #include <libzonecfg.h>
108 #include <zonestat_impl.h>
109 #include "zoneadmd.h"
110
111 static char *progname;
112 char *zone_name; /* zone which we are managing */
113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 char zonepath[MAXNAMELEN];
115 char pool_name[MAXNAMELEN];
116 char default_brand[MAXNAMELEN];
117 char brand_name[MAXNAMELEN];
118 boolean_t zone_isnative;
119 boolean_t zone_iscluster;
120 boolean_t zone_islabeled;
121 boolean_t shutdown_in_progress;
122 static zoneid_t zone_id;
123 dladm_handle_t dld_handle = NULL;
124
125 static char pre_statechg_hook[2 * MAXPATHLEN];
126 static char post_statechg_hook[2 * MAXPATHLEN];
127 char query_hook[2 * MAXPATHLEN];
128
129 zlog_t logsys;
130
131 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
133
134 static sema_t scratch_sem; /* for scratch zones */
135
136 static char zone_door_path[MAXPATHLEN];
137 static int zone_door = -1;
138
139 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
141
142 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
144 #endif
145
146 #define DEFAULT_LOCALE "C"
147
148 #define RSRC_NET "net"
149 #define RSRC_DEV "device"
150
151 static const char *
152 z_cmd_name(zone_cmd_t zcmd)
153 {
154 /* This list needs to match the enum in sys/zone.h */
155 static const char *zcmdstr[] = {
156 "ready", "boot", "forceboot", "reboot", "halt",
157 "note_uninstalling", "mount", "forcemount", "unmount",
158 "shutdown"
159 };
160
161 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
162 return ("unknown");
163 else
164 return (zcmdstr[(int)zcmd]);
165 }
166
167 static char *
168 get_execbasename(char *execfullname)
169 {
170 char *last_slash, *execbasename;
171
172 /* guard against '/' at end of command invocation */
173 for (;;) {
174 last_slash = strrchr(execfullname, '/');
175 if (last_slash == NULL) {
176 execbasename = execfullname;
177 break;
178 } else {
179 execbasename = last_slash + 1;
180 if (*execbasename == '\0') {
181 *last_slash = '\0';
182 continue;
183 }
184 break;
185 }
186 }
187 return (execbasename);
188 }
189
190 static void
191 usage(void)
192 {
193 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
194 (void) fprintf(stderr,
195 gettext("\tNote: %s should not be run directly.\n"), progname);
196 exit(2);
197 }
198
199 /* ARGSUSED */
200 static void
201 sigchld(int sig)
202 {
203 }
204
205 char *
206 localize_msg(char *locale, const char *msg)
207 {
208 char *out;
209
210 (void) mutex_lock(&msglock);
211 (void) setlocale(LC_MESSAGES, locale);
212 out = gettext(msg);
213 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
214 (void) mutex_unlock(&msglock);
215 return (out);
216 }
217
218 /* PRINTFLIKE3 */
219 void
220 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
221 {
222 va_list alist;
223 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
224 char *bp;
225 int saved_errno = errno;
226
227 if (zlogp == NULL)
228 return;
229 if (zlogp == &logsys)
230 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
231 zone_name);
232 else
233 buf[0] = '\0';
234 bp = &(buf[strlen(buf)]);
235
236 /*
237 * In theory, the locale pointer should be set to either "C" or a
238 * char array, so it should never be NULL
239 */
240 assert(zlogp->locale != NULL);
241 /* Locale is per process, but we are multi-threaded... */
242 fmt = localize_msg(zlogp->locale, fmt);
243
244 va_start(alist, fmt);
245 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
246 va_end(alist);
247 bp = &(buf[strlen(buf)]);
248 if (use_strerror)
249 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
250 strerror(saved_errno));
251 if (zlogp == &logsys) {
252 (void) syslog(LOG_ERR, "%s", buf);
253 } else if (zlogp->logfile != NULL) {
254 (void) fprintf(zlogp->logfile, "%s\n", buf);
255 } else {
256 size_t buflen;
257 size_t copylen;
258
259 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
260 copylen = MIN(buflen, zlogp->loglen);
261 zlogp->log += copylen;
262 zlogp->loglen -= copylen;
263 }
264 }
265
266 /*
267 * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
268 * put the arguments into an argv style array, use getopt to process them,
269 * and put the resultant argument string back into outargs. Non-Solaris brands
270 * may support alternate forms of boot arguments so we must handle that as well.
271 *
272 * During the filtering, we pull out any arguments which are truly "boot"
273 * arguments, leaving only those which are to be passed intact to the
274 * progenitor process. The one we support at the moment is -i, which
275 * indicates to the kernel which program should be launched as 'init'.
276 *
277 * Except for Z_OK, all other return values are treated as fatal.
278 */
279 static int
280 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
281 char *init_file)
282 {
283 int argc = 0, argc_save;
284 int i;
285 int err;
286 char *arg, *lasts, **argv = NULL, **argv_save;
287 char zonecfg_args[BOOTARGS_MAX];
288 char scratchargs[BOOTARGS_MAX], *sargs;
289 char c;
290
291 bzero(outargs, BOOTARGS_MAX);
292
293 /*
294 * If the user didn't specify transient boot arguments, check
295 * to see if there were any specified in the zone configuration,
296 * and use them if applicable.
297 */
298 if (inargs == NULL || inargs[0] == '\0') {
299 zone_dochandle_t handle;
300 if ((handle = zonecfg_init_handle()) == NULL) {
301 zerror(zlogp, B_TRUE,
302 "getting zone configuration handle");
303 return (Z_BAD_HANDLE);
304 }
305 err = zonecfg_get_snapshot_handle(zone_name, handle);
306 if (err != Z_OK) {
307 zerror(zlogp, B_FALSE,
308 "invalid configuration snapshot");
309 zonecfg_fini_handle(handle);
310 return (Z_BAD_HANDLE);
311 }
312
313 bzero(zonecfg_args, sizeof (zonecfg_args));
314 (void) zonecfg_get_bootargs(handle, zonecfg_args,
315 sizeof (zonecfg_args));
316 inargs = zonecfg_args;
317 zonecfg_fini_handle(handle);
318 }
319
320 if (strlen(inargs) >= BOOTARGS_MAX) {
321 zerror(zlogp, B_FALSE, "boot argument string too long");
322 return (Z_INVAL);
323 }
324
325 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
326 sargs = scratchargs;
327 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
328 sargs = NULL;
329 argc++;
330 }
331
332 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
333 zerror(zlogp, B_FALSE, "memory allocation failed");
334 return (Z_NOMEM);
335 }
336
337 argv_save = argv;
338 argc_save = argc;
339
340 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
341 sargs = scratchargs;
342 i = 0;
343 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
344 sargs = NULL;
345 if ((argv[i] = strdup(arg)) == NULL) {
346 err = Z_NOMEM;
347 zerror(zlogp, B_FALSE, "memory allocation failed");
348 goto done;
349 }
350 i++;
351 }
352
353 /*
354 * We preserve compatibility with the illumos system boot behavior,
355 * which allows:
356 *
357 * # reboot kernel/unix -s -m verbose
358 *
359 * In this example, kernel/unix tells the booter what file to boot. The
360 * original intent of this was that we didn't want reboot in a zone to
361 * be gratuitously different, so we would silently ignore the boot
362 * file, if necessary. However, this usage is archaic and has never
363 * been common, since it is impossible to boot a zone onto a different
364 * kernel. Ignoring the first argument breaks for non-native brands
365 * which pass boot arguments in a different style. e.g.
366 * systemd.log_level=debug
367 * Thus, for backward compatibility we only ignore the first argument
368 * if it appears to be in the illumos form and attempting to specify a
369 * kernel.
370 */
371 if (argv[0] == NULL)
372 goto done;
373
374 assert(argv[0][0] != ' ');
375 assert(argv[0][0] != '\t');
376
377 if (strncmp(argv[0], "kernel/", 7) == 0) {
378 argv = &argv[1];
379 argc--;
380 }
381
382 optind = 0;
383 opterr = 0;
384 err = Z_OK;
385 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
386 switch (c) {
387 case 'i':
388 /*
389 * -i is handled by the runtime and is not passed
390 * along to userland
391 */
392 (void) strlcpy(init_file, optarg, MAXPATHLEN);
393 break;
394 case 'f':
395 /* This has already been processed by zoneadm */
396 break;
397 case 'm':
398 case 's':
399 /* These pass through unmolested */
400 (void) snprintf(outargs, BOOTARGS_MAX,
401 "%s -%c %s ", outargs, c, optarg ? optarg : "");
402 break;
403 case '?':
404 /*
405 * If a brand has its own init, we need to pass along
406 * whatever the user provides. We use the entire
407 * unknown string here so that we correctly handle
408 * unknown long options (e.g. --debug).
409 */
410 (void) snprintf(outargs, BOOTARGS_MAX,
411 "%s %s", outargs, argv[optind - 1]);
412 break;
413 }
414 }
415
416 /*
417 * We need to pass along everything else since we don't know what
418 * the brand's init is expecting. For example, an argument list like:
419 * --confdir /foo --debug
420 * will cause the getopt parsing to stop at '/foo' but we need to pass
421 * that on, along with the '--debug'. This does mean that we require
422 * any of our known options (-ifms) to preceed the brand-specific ones.
423 */
424 while (optind < argc) {
425 (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
426 argv[optind]);
427 optind++;
428 }
429
430 done:
431 for (i = 0; i < argc_save; i++) {
432 if (argv_save[i] != NULL)
433 free(argv_save[i]);
434 }
435 free(argv_save);
436 return (err);
437 }
438
439
440 static int
441 mkzonedir(zlog_t *zlogp)
442 {
443 struct stat st;
444 /*
445 * We must create and lock everyone but root out of ZONES_TMPDIR
446 * since anyone can open any UNIX domain socket, regardless of
447 * its file system permissions. Sigh...
448 */
449 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
450 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
451 return (-1);
452 }
453 /* paranoia */
454 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
455 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
456 return (-1);
457 }
458 (void) chmod(ZONES_TMPDIR, S_IRWXU);
459 return (0);
460 }
461
462 /*
463 * Run the brand's pre-state change callback, if it exists.
464 */
465 static int
466 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
467 {
468 char cmdbuf[2 * MAXPATHLEN];
469 const char *altroot;
470
471 if (pre_statechg_hook[0] == '\0')
472 return (0);
473
474 altroot = zonecfg_get_root();
475 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
476 state, cmd, altroot) > sizeof (cmdbuf))
477 return (-1);
478
479 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
480 return (-1);
481
482 return (0);
483 }
484
485 /*
486 * Run the brand's post-state change callback, if it exists.
487 */
488 static int
489 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
490 {
491 char cmdbuf[2 * MAXPATHLEN];
492 const char *altroot;
493
494 if (post_statechg_hook[0] == '\0')
495 return (0);
496
497 altroot = zonecfg_get_root();
498 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
499 state, cmd, altroot) > sizeof (cmdbuf))
500 return (-1);
501
502 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
503 return (-1);
504
505 return (0);
506 }
507
508 /*
509 * Notify zonestatd of the new zone. If zonestatd is not running, this
510 * will do nothing.
511 */
512 static void
513 notify_zonestatd(zoneid_t zoneid)
514 {
515 int cmd[2];
516 int fd;
517 door_arg_t params;
518
519 fd = open(ZS_DOOR_PATH, O_RDONLY);
520 if (fd < 0)
521 return;
522
523 cmd[0] = ZSD_CMD_NEW_ZONE;
524 cmd[1] = zoneid;
525 params.data_ptr = (char *)&cmd;
526 params.data_size = sizeof (cmd);
527 params.desc_ptr = NULL;
528 params.desc_num = 0;
529 params.rbuf = NULL;
530 params.rsize = NULL;
531 (void) door_call(fd, ¶ms);
532 (void) close(fd);
533 }
534
535 /*
536 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
537 * 'true' if this is being invoked as part of the processing for the "mount"
538 * subcommand.
539 */
540 static int
541 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
542 {
543 int err;
544
545 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
546 return (-1);
547
548 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
549 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
550 zonecfg_strerror(err));
551 goto bad;
552 }
553
554 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
555 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
556 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
557 zonecfg_strerror(err));
558 goto bad;
559 }
560 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
561 bringup_failure_recovery = B_TRUE;
562 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
563 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
564 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
565 zonecfg_strerror(err));
566 goto bad;
567 }
568
569 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
570 goto bad;
571
572 return (0);
573
574 bad:
575 /*
576 * If something goes wrong, we up the zones's state to the target
577 * state, READY, and then invoke the hook as if we're halting.
578 */
579 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
580 return (-1);
581 }
582
583 int
584 init_template(void)
585 {
586 int fd;
587 int err = 0;
588
589 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
590 if (fd == -1)
591 return (-1);
592
593 /*
594 * For now, zoneadmd doesn't do anything with the contract.
595 * Deliver no events, don't inherit, and allow it to be orphaned.
596 */
597 err |= ct_tmpl_set_critical(fd, 0);
598 err |= ct_tmpl_set_informative(fd, 0);
599 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
600 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
601 if (err || ct_tmpl_activate(fd)) {
602 (void) close(fd);
603 return (-1);
604 }
605
606 return (fd);
607 }
608
609 typedef struct fs_callback {
610 zlog_t *zlogp;
611 zoneid_t zoneid;
612 boolean_t mount_cmd;
613 } fs_callback_t;
614
615 static int
616 mount_early_fs(void *data, const char *spec, const char *dir,
617 const char *fstype, const char *opt)
618 {
619 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
620 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
621 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
622 char rootpath[MAXPATHLEN];
623 pid_t child;
624 int child_status;
625 int tmpl_fd;
626 int rv;
627 ctid_t ct;
628
629 /* determine the zone rootpath */
630 if (mount_cmd) {
631 char luroot[MAXPATHLEN];
632
633 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
634 resolve_lofs(zlogp, luroot, sizeof (luroot));
635 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
636 } else {
637 if (zone_get_rootpath(zone_name,
638 rootpath, sizeof (rootpath)) != Z_OK) {
639 zerror(zlogp, B_FALSE, "unable to determine zone root");
640 return (-1);
641 }
642 }
643
644 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
645 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
646 rootpath, dir);
647 return (-1);
648 } else if (rv > 0) {
649 /* The mount point path doesn't exist, create it now. */
650 if (make_one_dir(zlogp, rootpath, dir,
651 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
652 DEFAULT_DIR_GROUP) != 0) {
653 zerror(zlogp, B_FALSE, "failed to create mount point");
654 return (-1);
655 }
656
657 /*
658 * Now this might seem weird, but we need to invoke
659 * valid_mount_path() again. Why? Because it checks
660 * to make sure that the mount point path is canonical,
661 * which it can only do if the path exists, so now that
662 * we've created the path we have to verify it again.
663 */
664 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
665 fstype)) < 0) {
666 zerror(zlogp, B_FALSE,
667 "%s%s is not a valid mount point", rootpath, dir);
668 return (-1);
669 }
670 }
671
672 if ((tmpl_fd = init_template()) == -1) {
673 zerror(zlogp, B_TRUE, "failed to create contract");
674 return (-1);
675 }
676
677 if ((child = fork()) == -1) {
678 (void) ct_tmpl_clear(tmpl_fd);
679 (void) close(tmpl_fd);
680 zerror(zlogp, B_TRUE, "failed to fork");
681 return (-1);
682
683 } else if (child == 0) { /* child */
684 char opt_buf[MAX_MNTOPT_STR];
685 int optlen = 0;
686 int mflag = MS_DATA;
687 int i;
688 int ret;
689
690 (void) ct_tmpl_clear(tmpl_fd);
691 /*
692 * Even though there are no procs running in the zone, we
693 * do this for paranoia's sake.
694 */
695 (void) closefrom(0);
696
697 if (zone_enter(zoneid) == -1) {
698 _exit(errno);
699 }
700 if (opt != NULL) {
701 /*
702 * The mount() system call is incredibly annoying.
703 * If options are specified, we need to copy them
704 * into a temporary buffer since the mount() system
705 * call will overwrite the options string. It will
706 * also fail if the new option string it wants to
707 * write is bigger than the one we passed in, so
708 * you must pass in a buffer of the maximum possible
709 * option string length. sigh.
710 */
711 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
712 opt = opt_buf;
713 optlen = MAX_MNTOPT_STR;
714 mflag = MS_OPTIONSTR;
715 }
716
717 /*
718 * There is an obscure race condition which can cause mount
719 * to return EBUSY. This happens for example on the mount
720 * of the zone's /etc/svc/volatile file system if there is
721 * a GZ process running svcs -Z, which will touch the
722 * mountpoint, just as we're trying to do the mount. To cope
723 * with this, we retry up to 3 times to let this transient
724 * process get out of the way.
725 */
726 for (i = 0; i < 3; i++) {
727 ret = 0;
728 if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
729 optlen) != 0)
730 ret = errno;
731 if (ret != EBUSY)
732 break;
733 (void) sleep(1);
734 }
735 _exit(ret);
736 }
737
738 /* parent */
739 if (contract_latest(&ct) == -1)
740 ct = -1;
741 (void) ct_tmpl_clear(tmpl_fd);
742 (void) close(tmpl_fd);
743 if (waitpid(child, &child_status, 0) != child) {
744 /* unexpected: we must have been signalled */
745 (void) contract_abandon_id(ct);
746 return (-1);
747 }
748 (void) contract_abandon_id(ct);
749 if (WEXITSTATUS(child_status) != 0) {
750 errno = WEXITSTATUS(child_status);
751 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
752 return (-1);
753 }
754
755 return (0);
756 }
757
758 /*
759 * env variable name format
760 * _ZONECFG;{resource name};{identifying attr. name};{property name}
761 */
762 static void
763 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
764 {
765 char *p;
766 /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
767 char nm[2 * MAXNAMELEN + 32];
768
769 if (attr == NULL)
770 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
771 name);
772 else
773 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
774 attr, name);
775
776 p = nm;
777 while ((p = strchr(p, '-')) != NULL)
778 *p++ = '_';
779
780 (void) setenv(nm, val, 1);
781 }
782
783 /*
784 * Export zonecfg network and device properties into environment for the boot
785 * and state change hooks.
786 * If debug is true, export the brand hook debug env. variable as well.
787 *
788 * We could export more of the config in the future, as necessary.
789 */
790 static int
791 setup_subproc_env()
792 {
793 int res;
794 zone_dochandle_t handle;
795 struct zone_nwiftab ntab;
796 struct zone_devtab dtab;
797 char net_resources[MAXNAMELEN * 2];
798 char dev_resources[MAXNAMELEN * 2];
799
800 if ((handle = zonecfg_init_handle()) == NULL)
801 exit(Z_NOMEM);
802
803 if ((res = zonecfg_get_handle(zone_name, handle)) != Z_OK)
804 goto done;
805
806 if ((res = zonecfg_setnwifent(handle)) != Z_OK)
807 goto done;
808
809 while (zonecfg_getnwifent(handle, &ntab) == Z_OK) {
810 struct zone_res_attrtab *rap;
811 char *phys;
812
813 phys = ntab.zone_nwif_physical;
814
815 (void) strlcat(net_resources, phys, sizeof (net_resources));
816 (void) strlcat(net_resources, " ", sizeof (net_resources));
817
818 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
819
820 set_zonecfg_env(RSRC_NET, phys, "address",
821 ntab.zone_nwif_address);
822 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
823 ntab.zone_nwif_allowed_address);
824 set_zonecfg_env(RSRC_NET, phys, "defrouter",
825 ntab.zone_nwif_defrouter);
826 set_zonecfg_env(RSRC_NET, phys, "global-nic",
827 ntab.zone_nwif_gnic);
828 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
829 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
830 ntab.zone_nwif_vlan_id);
831
832 for (rap = ntab.zone_nwif_attrp; rap != NULL;
833 rap = rap->zone_res_attr_next)
834 set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
835 rap->zone_res_attr_value);
836 }
837
838 (void) zonecfg_endnwifent(handle);
839
840 if ((res = zonecfg_setdevent(handle)) != Z_OK)
841 goto done;
842
843 while (zonecfg_getdevent(handle, &dtab) == Z_OK) {
844 struct zone_res_attrtab *rap;
845 char *match;
846
847 match = dtab.zone_dev_match;
848
849 (void) strlcat(dev_resources, match, sizeof (dev_resources));
850 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
851
852 for (rap = dtab.zone_dev_attrp; rap != NULL;
853 rap = rap->zone_res_attr_next)
854 set_zonecfg_env(RSRC_DEV, match,
855 rap->zone_res_attr_name, rap->zone_res_attr_value);
856 }
857
858 (void) zonecfg_enddevent(handle);
859
860 res = Z_OK;
861
862 done:
863 zonecfg_fini_handle(handle);
864 return (res);
865 }
866
867 /*
868 * If retstr is not NULL, the output of the subproc is returned in the str,
869 * otherwise it is output using zerror(). Any memory allocated for retstr
870 * should be freed by the caller.
871 */
872 int
873 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
874 {
875 char buf[1024]; /* arbitrary large amount */
876 char *inbuf;
877 FILE *file;
878 int status;
879 int rd_cnt;
880
881 if (retstr != NULL) {
882 if ((*retstr = malloc(1024)) == NULL) {
883 zerror(zlogp, B_FALSE, "out of memory");
884 return (-1);
885 }
886 inbuf = *retstr;
887 rd_cnt = 0;
888 } else {
889 inbuf = buf;
890 }
891
892 if (setup_subproc_env() != Z_OK) {
893 zerror(zlogp, B_FALSE, "failed to setup environment");
894 return (-1);
895 }
896
897 file = popen(cmdbuf, "r");
898 if (file == NULL) {
899 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
900 return (-1);
901 }
902
903 while (fgets(inbuf, 1024, file) != NULL) {
904 if (retstr == NULL) {
905 if (zlogp != &logsys)
906 zerror(zlogp, B_FALSE, "%s", inbuf);
907 } else {
908 char *p;
909
910 rd_cnt += 1024 - 1;
911 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
912 zerror(zlogp, B_FALSE, "out of memory");
913 (void) pclose(file);
914 return (-1);
915 }
916
917 *retstr = p;
918 inbuf = *retstr + rd_cnt;
919 }
920 }
921 status = pclose(file);
922
923 if (WIFSIGNALED(status)) {
924 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
925 "signal %d", cmdbuf, WTERMSIG(status));
926 return (-1);
927 }
928 assert(WIFEXITED(status));
929 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
930 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
931 return (-1);
932 }
933 return (WEXITSTATUS(status));
934 }
935
936 #if 0 /* XXX KEBE SAYS not yet */
937 /*
938 * Get the path for this zone's init(1M) (or equivalent) process. First look
939 * for a zone-specific init-name attr, then get it from the brand.
940 */
941 static int
942 get_initname(brand_handle_t bh, char *initname, int len)
943 {
944 struct zone_attrtab a;
945
946 bzero(&a, sizeof (a));
947 (void) strlcpy(a.zone_attr_name, "init-name",
948 sizeof (a.zone_attr_name));
949
950 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
951 (void) strlcpy(initname, a.zone_attr_value, len);
952 return (0);
953 }
954
955 return (brand_get_initname(bh, initname, len));
956 }
957
958 /*
959 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
960 * First look for a zone-specific restart-init attr, then get it from the brand.
961 */
962 static boolean_t
963 restartinit(brand_handle_t bh)
964 {
965 struct zone_attrtab a;
966
967 bzero(&a, sizeof (a));
968 (void) strlcpy(a.zone_attr_name, "restart-init",
969 sizeof (a.zone_attr_name));
970
971 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
972 if (strcmp(a.zone_attr_value, "false") == 0)
973 return (B_FALSE);
974 return (B_TRUE);
975 }
976
977 return (brand_restartinit(bh));
978 }
979 #endif /* XXX KEBE */
980
981 /*
982 * Get the app-svc-dependent flag for this zone's init process. This is a
983 * zone-specific attr which controls the type of contract we create for the
984 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
985 * set, so that when any service which is in the same contract exits, the init
986 * application will be terminated.
987 *
988 * We use the global "snap_hndl", so no parameters get passed here.
989 */
990 static boolean_t
991 is_app_svc_dep(void)
992 {
993 struct zone_attrtab a;
994
995 bzero(&a, sizeof (a));
996 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
997 sizeof (a.zone_attr_name));
998
999 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
1000 strcmp(a.zone_attr_value, "true") == 0) {
1001 return (B_TRUE);
1002 }
1003
1004 return (B_FALSE);
1005 }
1006
1007 static int
1008 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
1009 {
1010 zoneid_t zoneid;
1011 struct stat st;
1012 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
1013 char nbootargs[BOOTARGS_MAX];
1014 char cmdbuf[MAXPATHLEN];
1015 fs_callback_t cb;
1016 brand_handle_t bh;
1017 zone_iptype_t iptype;
1018 dladm_status_t status;
1019 char errmsg[DLADM_STRSIZE];
1020 int err;
1021 boolean_t restart_init;
1022 boolean_t app_svc_dep;
1023
1024 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
1025 return (-1);
1026
1027 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1028 zerror(zlogp, B_TRUE, "unable to get zoneid");
1029 goto bad;
1030 }
1031
1032 cb.zlogp = zlogp;
1033 cb.zoneid = zoneid;
1034 cb.mount_cmd = B_FALSE;
1035
1036 /* Get a handle to the brand info for this zone */
1037 if ((bh = brand_open(brand_name)) == NULL) {
1038 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1039 goto bad;
1040 }
1041
1042 /*
1043 * Get the list of filesystems to mount from the brand
1044 * configuration. These mounts are done via a thread that will
1045 * enter the zone, so they are done from within the context of the
1046 * zone.
1047 */
1048 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1049 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1050 brand_close(bh);
1051 goto bad;
1052 }
1053
1054 /*
1055 * Get the brand's boot callback if it exists.
1056 */
1057 (void) strcpy(cmdbuf, EXEC_PREFIX);
1058 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1059 sizeof (cmdbuf) - EXEC_LEN) != 0) {
1060 zerror(zlogp, B_FALSE,
1061 "unable to determine branded zone's boot callback");
1062 brand_close(bh);
1063 goto bad;
1064 }
1065
1066 /* Get the path for this zone's init(1M) (or equivalent) process. */
1067 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
1068 zerror(zlogp, B_FALSE,
1069 "unable to determine zone's init(1M) location");
1070 brand_close(bh);
1071 goto bad;
1072 }
1073
1074 /* See if this zone's brand should restart init if it dies. */
1075 restart_init = brand_restartinit(bh);
1076
1077 /*
1078 * See if we need to setup contract dependencies between the zone's
1079 * primary application and any of its services.
1080 */
1081 app_svc_dep = is_app_svc_dep();
1082
1083 brand_close(bh);
1084
1085 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1086 if (err != Z_OK)
1087 goto bad;
1088
1089 assert(init_file[0] != '\0');
1090
1091 /*
1092 * Try to anticipate possible problems: If possible, make sure init is
1093 * executable.
1094 */
1095 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1096 zerror(zlogp, B_FALSE, "unable to determine zone root");
1097 goto bad;
1098 }
1099
1100 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1101
1102 if (lstat(initpath, &st) == -1) {
1103 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1104 goto bad;
1105 }
1106
1107 /*
1108 * If a symlink, we'll have to wait and resolve when we boot,
1109 * otherwise check the executable bits now.
1110 */
1111 if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
1112 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1113 goto bad;
1114 }
1115
1116 /*
1117 * Exclusive stack zones interact with the dlmgmtd running in the
1118 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
1119 * booting, and loads its datalinks from the zone's datalink
1120 * configuration file.
1121 */
1122 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1123 status = dladm_zone_boot(dld_handle, zoneid);
1124 if (status != DLADM_STATUS_OK) {
1125 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1126 " %s", dladm_status2str(status, errmsg));
1127 goto bad;
1128 }
1129 }
1130
1131 /*
1132 * If there is a brand 'boot' callback, execute it now to give the
1133 * brand one last chance to do any additional setup before the zone
1134 * is booted.
1135 */
1136 if ((strlen(cmdbuf) > EXEC_LEN) &&
1137 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
1138 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1139 goto bad;
1140 }
1141
1142 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1143 zerror(zlogp, B_TRUE, "could not set zone boot file");
1144 goto bad;
1145 }
1146
1147 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1148 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1149 goto bad;
1150 }
1151
1152 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1153 NULL, 0) == -1) {
1154 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1155 goto bad;
1156 }
1157
1158 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1159 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1160 zerror(zlogp, B_TRUE, "could not set zone app-die");
1161 goto bad;
1162 }
1163
1164 /*
1165 * Inform zonestatd of a new zone so that it can install a door for
1166 * the zone to contact it.
1167 */
1168 notify_zonestatd(zone_id);
1169
1170 if (zone_boot(zoneid) == -1) {
1171 zerror(zlogp, B_TRUE, "unable to boot zone");
1172 goto bad;
1173 }
1174
1175 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1176 goto bad;
1177
1178 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1179 create_log_thread(zlogp, zone_id);
1180
1181 /* Startup a thread to perform memory capping for the zone. */
1182 create_mcap_thread(zlogp, zone_id);
1183
1184 return (0);
1185
1186 bad:
1187 /*
1188 * If something goes wrong, we up the zones's state to the target
1189 * state, RUNNING, and then invoke the hook as if we're halting.
1190 */
1191 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1192
1193 return (-1);
1194 }
1195
1196 static int
1197 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1198 {
1199 int err;
1200
1201 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1202 return (-1);
1203
1204 /* Shutting down, stop the memcap thread */
1205 destroy_mcap_thread();
1206
1207 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1208 if (!bringup_failure_recovery)
1209 zerror(zlogp, B_FALSE, "unable to destroy zone");
1210 destroy_log_thread();
1211 return (-1);
1212 }
1213
1214 /* Shut down is done, stop the log thread */
1215 destroy_log_thread();
1216
1217 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1218 return (-1);
1219
1220 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1221 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1222 zonecfg_strerror(err));
1223
1224 return (0);
1225 }
1226
1227 static int
1228 zone_graceful_shutdown(zlog_t *zlogp)
1229 {
1230 zoneid_t zoneid;
1231 pid_t child;
1232 char cmdbuf[MAXPATHLEN];
1233 brand_handle_t bh = NULL;
1234 ctid_t ct;
1235 int tmpl_fd;
1236 int child_status;
1237
1238 if (shutdown_in_progress) {
1239 zerror(zlogp, B_FALSE, "shutdown already in progress");
1240 return (-1);
1241 }
1242
1243 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1244 zerror(zlogp, B_TRUE, "unable to get zoneid");
1245 return (-1);
1246 }
1247
1248 /* Get a handle to the brand info for this zone */
1249 if ((bh = brand_open(brand_name)) == NULL) {
1250 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1251 return (-1);
1252 }
1253
1254 /*
1255 * If there is a brand 'shutdown' callback, execute it now to give the
1256 * brand a chance to cleanup any custom configuration.
1257 */
1258 (void) strcpy(cmdbuf, EXEC_PREFIX);
1259 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1260 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1261 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1262 }
1263 brand_close(bh);
1264
1265 if ((tmpl_fd = init_template()) == -1) {
1266 zerror(zlogp, B_TRUE, "failed to create contract");
1267 return (-1);
1268 }
1269
1270 if ((child = fork()) == -1) {
1271 (void) ct_tmpl_clear(tmpl_fd);
1272 (void) close(tmpl_fd);
1273 zerror(zlogp, B_TRUE, "failed to fork");
1274 return (-1);
1275 } else if (child == 0) {
1276 (void) ct_tmpl_clear(tmpl_fd);
1277 if (zone_enter(zoneid) == -1) {
1278 _exit(errno);
1279 }
1280 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1281 }
1282
1283 if (contract_latest(&ct) == -1)
1284 ct = -1;
1285 (void) ct_tmpl_clear(tmpl_fd);
1286 (void) close(tmpl_fd);
1287
1288 if (waitpid(child, &child_status, 0) != child) {
1289 /* unexpected: we must have been signalled */
1290 (void) contract_abandon_id(ct);
1291 return (-1);
1292 }
1293
1294 (void) contract_abandon_id(ct);
1295 if (WEXITSTATUS(child_status) != 0) {
1296 errno = WEXITSTATUS(child_status);
1297 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1298 return (-1);
1299 }
1300
1301 shutdown_in_progress = B_TRUE;
1302
1303 return (0);
1304 }
1305
1306 static int
1307 zone_wait_shutdown(zlog_t *zlogp)
1308 {
1309 zone_state_t zstate;
1310 uint64_t *tm = NULL;
1311 scf_simple_prop_t *prop = NULL;
1312 int timeout;
1313 int tries;
1314 int rc = -1;
1315
1316 /* Get default stop timeout from SMF framework */
1317 timeout = SHUTDOWN_WAIT;
1318 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1319 SCF_PROPERTY_TIMEOUT)) != NULL) {
1320 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1321 if (tm != 0)
1322 timeout = *tm;
1323 }
1324 scf_simple_prop_free(prop);
1325 }
1326
1327 /* allow time for zone to shutdown cleanly */
1328 for (tries = 0; tries < timeout; tries ++) {
1329 (void) sleep(1);
1330 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1331 zstate == ZONE_STATE_INSTALLED) {
1332 rc = 0;
1333 break;
1334 }
1335 }
1336
1337 if (rc != 0)
1338 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1339
1340 shutdown_in_progress = B_FALSE;
1341
1342 return (rc);
1343 }
1344
1345
1346
1347 /*
1348 * Generate AUE_zone_state for a command that boots a zone.
1349 */
1350 static void
1351 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1352 char *new_state)
1353 {
1354 adt_session_data_t *ah;
1355 adt_event_data_t *event;
1356 int pass_fail, fail_reason;
1357
1358 if (!adt_audit_enabled())
1359 return;
1360
1361 if (return_val == 0) {
1362 pass_fail = ADT_SUCCESS;
1363 fail_reason = ADT_SUCCESS;
1364 } else {
1365 pass_fail = ADT_FAILURE;
1366 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1367 }
1368
1369 if (adt_start_session(&ah, NULL, 0)) {
1370 zerror(zlogp, B_TRUE, gettext("audit failure."));
1371 return;
1372 }
1373 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1374 zerror(zlogp, B_TRUE, gettext("audit failure."));
1375 (void) adt_end_session(ah);
1376 return;
1377 }
1378
1379 event = adt_alloc_event(ah, ADT_zone_state);
1380 if (event == NULL) {
1381 zerror(zlogp, B_TRUE, gettext("audit failure."));
1382 (void) adt_end_session(ah);
1383 return;
1384 }
1385 event->adt_zone_state.zonename = zone_name;
1386 event->adt_zone_state.new_state = new_state;
1387
1388 if (adt_put_event(event, pass_fail, fail_reason))
1389 zerror(zlogp, B_TRUE, gettext("audit failure."));
1390
1391 adt_free_event(event);
1392
1393 (void) adt_end_session(ah);
1394 }
1395
1396 /*
1397 * Log the exit time and status of the zone's init process into
1398 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1399 * be -1, otherwise it will be the exit status as described in wait.3c.
1400 * If the zone is configured to restart init, then nothing will be logged if
1401 * init exits unexpectedly (the kernel will never upcall in this case).
1402 */
1403 static void
1404 log_init_exit(int status)
1405 {
1406 char p[MAXPATHLEN];
1407 char buf[128];
1408 struct timeval t;
1409 int fd;
1410
1411 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1412 return;
1413 if (gettimeofday(&t, NULL) != 0)
1414 return;
1415 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1416 status) > sizeof (buf))
1417 return;
1418 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1419 return;
1420
1421 (void) write(fd, buf, strlen(buf));
1422
1423 (void) close(fd);
1424 }
1425
1426 /*
1427 * The main routine for the door server that deals with zone state transitions.
1428 */
1429 /* ARGSUSED */
1430 static void
1431 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1432 uint_t n_desc)
1433 {
1434 ucred_t *uc = NULL;
1435 const priv_set_t *eset;
1436
1437 zone_state_t zstate;
1438 zone_cmd_t cmd;
1439 int init_status;
1440 zone_cmd_arg_t *zargp;
1441
1442 boolean_t kernelcall;
1443
1444 int rval = -1;
1445 uint64_t uniqid;
1446 zoneid_t zoneid = -1;
1447 zlog_t zlog;
1448 zlog_t *zlogp;
1449 zone_cmd_rval_t *rvalp;
1450 size_t rlen = getpagesize(); /* conservative */
1451 fs_callback_t cb;
1452 brand_handle_t bh;
1453 boolean_t wait_shut = B_FALSE;
1454
1455 /* LINTED E_BAD_PTR_CAST_ALIGN */
1456 zargp = (zone_cmd_arg_t *)args;
1457
1458 /*
1459 * When we get the door unref message, we've fdetach'd the door, and
1460 * it is time for us to shut down zoneadmd.
1461 */
1462 if (zargp == DOOR_UNREF_DATA) {
1463 /*
1464 * See comment at end of main() for info on the last rites.
1465 */
1466 exit(0);
1467 }
1468
1469 if (zargp == NULL) {
1470 (void) door_return(NULL, 0, 0, 0);
1471 }
1472
1473 rvalp = alloca(rlen);
1474 bzero(rvalp, rlen);
1475 zlog.logfile = NULL;
1476 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1477 zlog.buf = rvalp->errbuf;
1478 zlog.log = zlog.buf;
1479 /* defer initialization of zlog.locale until after credential check */
1480 zlogp = &zlog;
1481
1482 if (alen != sizeof (zone_cmd_arg_t)) {
1483 /*
1484 * This really shouldn't be happening.
1485 */
1486 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1487 "unexpected (expected %d bytes)", alen,
1488 sizeof (zone_cmd_arg_t));
1489 goto out;
1490 }
1491 cmd = zargp->cmd;
1492 init_status = zargp->status;
1493
1494 if (door_ucred(&uc) != 0) {
1495 zerror(&logsys, B_TRUE, "door_ucred");
1496 goto out;
1497 }
1498 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1499 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1500 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1501 ucred_geteuid(uc) != 0)) {
1502 zerror(&logsys, B_FALSE, "insufficient privileges");
1503 goto out;
1504 }
1505
1506 kernelcall = ucred_getpid(uc) == 0;
1507
1508 /*
1509 * This is safe because we only use a zlog_t throughout the
1510 * duration of a door call; i.e., by the time the pointer
1511 * might become invalid, the door call would be over.
1512 */
1513 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1514
1515 (void) mutex_lock(&lock);
1516
1517 /*
1518 * Once we start to really die off, we don't want more connections.
1519 */
1520 if (in_death_throes) {
1521 (void) mutex_unlock(&lock);
1522 ucred_free(uc);
1523 (void) door_return(NULL, 0, 0, 0);
1524 thr_exit(NULL);
1525 }
1526
1527 /*
1528 * Check for validity of command.
1529 */
1530 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1531 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1532 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1533 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1534 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1535 goto out;
1536 }
1537
1538 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1539 /*
1540 * Can't happen
1541 */
1542 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1543 cmd);
1544 goto out;
1545 }
1546 /*
1547 * We ignore the possibility of someone calling zone_create(2)
1548 * explicitly; all requests must come through zoneadmd.
1549 */
1550 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1551 /*
1552 * Something terribly wrong happened
1553 */
1554 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1555 goto out;
1556 }
1557
1558 if (kernelcall) {
1559 /*
1560 * Kernel-initiated requests may lose their validity if the
1561 * zone_t the kernel was referring to has gone away.
1562 */
1563 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1564 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1565 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1566 /*
1567 * We're not talking about the same zone. The request
1568 * must have arrived too late. Return error.
1569 */
1570 rval = -1;
1571 goto out;
1572 }
1573 zlogp = &logsys; /* Log errors to syslog */
1574 }
1575
1576 /*
1577 * If we are being asked to forcibly mount or boot a zone, we
1578 * pretend that an INCOMPLETE zone is actually INSTALLED.
1579 */
1580 if (zstate == ZONE_STATE_INCOMPLETE &&
1581 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1582 zstate = ZONE_STATE_INSTALLED;
1583
1584 switch (zstate) {
1585 case ZONE_STATE_CONFIGURED:
1586 case ZONE_STATE_INCOMPLETE:
1587 /*
1588 * Not our area of expertise; we just print a nice message
1589 * and die off.
1590 */
1591 zerror(zlogp, B_FALSE,
1592 "%s operation is invalid for zones in state '%s'",
1593 z_cmd_name(cmd), zone_state_str(zstate));
1594 break;
1595
1596 case ZONE_STATE_INSTALLED:
1597 switch (cmd) {
1598 case Z_READY:
1599 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1600 if (rval == 0)
1601 eventstream_write(Z_EVT_ZONE_READIED);
1602 zcons_statechanged();
1603 break;
1604 case Z_BOOT:
1605 case Z_FORCEBOOT:
1606 eventstream_write(Z_EVT_ZONE_BOOTING);
1607 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1608 == 0) {
1609 rval = zone_bootup(zlogp, zargp->bootbuf,
1610 zstate);
1611 }
1612 audit_put_record(zlogp, uc, rval, "boot");
1613 zcons_statechanged();
1614 if (rval != 0) {
1615 bringup_failure_recovery = B_TRUE;
1616 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1617 zstate);
1618 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1619 }
1620 break;
1621 case Z_SHUTDOWN:
1622 case Z_HALT:
1623 if (kernelcall) /* Invalid; can't happen */
1624 abort();
1625 /*
1626 * We could have two clients racing to halt this
1627 * zone; the second client loses, but his request
1628 * doesn't fail, since the zone is now in the desired
1629 * state.
1630 */
1631 zerror(zlogp, B_FALSE, "zone is already halted");
1632 rval = 0;
1633 break;
1634 case Z_REBOOT:
1635 if (kernelcall) /* Invalid; can't happen */
1636 abort();
1637 zerror(zlogp, B_FALSE, "%s operation is invalid "
1638 "for zones in state '%s'", z_cmd_name(cmd),
1639 zone_state_str(zstate));
1640 rval = -1;
1641 break;
1642 case Z_NOTE_UNINSTALLING:
1643 if (kernelcall) /* Invalid; can't happen */
1644 abort();
1645 /*
1646 * Tell the console to print out a message about this.
1647 * Once it does, we will be in_death_throes.
1648 */
1649 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1650 break;
1651 case Z_MOUNT:
1652 case Z_FORCEMOUNT:
1653 if (kernelcall) /* Invalid; can't happen */
1654 abort();
1655 if (!zone_isnative && !zone_iscluster &&
1656 !zone_islabeled) {
1657 /*
1658 * -U mounts the zone without lofs mounting
1659 * zone file systems back into the scratch
1660 * zone. This is required when mounting
1661 * non-native branded zones.
1662 */
1663 (void) strlcpy(zargp->bootbuf, "-U",
1664 BOOTARGS_MAX);
1665 }
1666
1667 rval = zone_ready(zlogp,
1668 strcmp(zargp->bootbuf, "-U") == 0 ?
1669 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1670 if (rval != 0)
1671 break;
1672
1673 eventstream_write(Z_EVT_ZONE_READIED);
1674
1675 /*
1676 * Get a handle to the default brand info.
1677 * We must always use the default brand file system
1678 * list when mounting the zone.
1679 */
1680 if ((bh = brand_open(default_brand)) == NULL) {
1681 rval = -1;
1682 break;
1683 }
1684
1685 /*
1686 * Get the list of filesystems to mount from
1687 * the brand configuration. These mounts are done
1688 * via a thread that will enter the zone, so they
1689 * are done from within the context of the zone.
1690 */
1691 cb.zlogp = zlogp;
1692 cb.zoneid = zone_id;
1693 cb.mount_cmd = B_TRUE;
1694 rval = brand_platform_iter_mounts(bh,
1695 mount_early_fs, &cb);
1696
1697 brand_close(bh);
1698
1699 /*
1700 * Ordinarily, /dev/fd would be mounted inside the zone
1701 * by svc:/system/filesystem/usr:default, but since
1702 * we're not booting the zone, we need to do this
1703 * manually.
1704 */
1705 if (rval == 0)
1706 rval = mount_early_fs(&cb,
1707 "fd", "/dev/fd", "fd", NULL);
1708 break;
1709 case Z_UNMOUNT:
1710 if (kernelcall) /* Invalid; can't happen */
1711 abort();
1712 zerror(zlogp, B_FALSE, "zone is already unmounted");
1713 rval = 0;
1714 break;
1715 }
1716 break;
1717
1718 case ZONE_STATE_READY:
1719 switch (cmd) {
1720 case Z_READY:
1721 /*
1722 * We could have two clients racing to ready this
1723 * zone; the second client loses, but his request
1724 * doesn't fail, since the zone is now in the desired
1725 * state.
1726 */
1727 zerror(zlogp, B_FALSE, "zone is already ready");
1728 rval = 0;
1729 break;
1730 case Z_BOOT:
1731 (void) strlcpy(boot_args, zargp->bootbuf,
1732 sizeof (boot_args));
1733 eventstream_write(Z_EVT_ZONE_BOOTING);
1734 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1735 audit_put_record(zlogp, uc, rval, "boot");
1736 zcons_statechanged();
1737 if (rval != 0) {
1738 bringup_failure_recovery = B_TRUE;
1739 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1740 zstate);
1741 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1742 }
1743 boot_args[0] = '\0';
1744 break;
1745 case Z_HALT:
1746 if (kernelcall) /* Invalid; can't happen */
1747 abort();
1748 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1749 != 0)
1750 break;
1751 zcons_statechanged();
1752 eventstream_write(Z_EVT_ZONE_HALTED);
1753 break;
1754 case Z_SHUTDOWN:
1755 case Z_REBOOT:
1756 case Z_NOTE_UNINSTALLING:
1757 case Z_MOUNT:
1758 case Z_UNMOUNT:
1759 if (kernelcall) /* Invalid; can't happen */
1760 abort();
1761 zerror(zlogp, B_FALSE, "%s operation is invalid "
1762 "for zones in state '%s'", z_cmd_name(cmd),
1763 zone_state_str(zstate));
1764 rval = -1;
1765 break;
1766 }
1767 break;
1768
1769 case ZONE_STATE_MOUNTED:
1770 switch (cmd) {
1771 case Z_UNMOUNT:
1772 if (kernelcall) /* Invalid; can't happen */
1773 abort();
1774 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1775 if (rval == 0) {
1776 eventstream_write(Z_EVT_ZONE_HALTED);
1777 (void) sema_post(&scratch_sem);
1778 }
1779 break;
1780 default:
1781 if (kernelcall) /* Invalid; can't happen */
1782 abort();
1783 zerror(zlogp, B_FALSE, "%s operation is invalid "
1784 "for zones in state '%s'", z_cmd_name(cmd),
1785 zone_state_str(zstate));
1786 rval = -1;
1787 break;
1788 }
1789 break;
1790
1791 case ZONE_STATE_RUNNING:
1792 case ZONE_STATE_SHUTTING_DOWN:
1793 case ZONE_STATE_DOWN:
1794 switch (cmd) {
1795 case Z_READY:
1796 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1797 != 0)
1798 break;
1799 zcons_statechanged();
1800 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1801 eventstream_write(Z_EVT_ZONE_READIED);
1802 else
1803 eventstream_write(Z_EVT_ZONE_HALTED);
1804 break;
1805 case Z_BOOT:
1806 /*
1807 * We could have two clients racing to boot this
1808 * zone; the second client loses, but his request
1809 * doesn't fail, since the zone is now in the desired
1810 * state.
1811 */
1812 zerror(zlogp, B_FALSE, "zone is already booted");
1813 rval = 0;
1814 break;
1815 case Z_HALT:
1816 if (kernelcall) {
1817 log_init_exit(init_status);
1818 } else {
1819 log_init_exit(-1);
1820 }
1821 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1822 != 0)
1823 break;
1824 eventstream_write(Z_EVT_ZONE_HALTED);
1825 zcons_statechanged();
1826 break;
1827 case Z_REBOOT:
1828 (void) strlcpy(boot_args, zargp->bootbuf,
1829 sizeof (boot_args));
1830 eventstream_write(Z_EVT_ZONE_REBOOTING);
1831 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1832 != 0) {
1833 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1834 boot_args[0] = '\0';
1835 break;
1836 }
1837 zcons_statechanged();
1838 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) !=
1839 0) {
1840 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1841 boot_args[0] = '\0';
1842 break;
1843 }
1844 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1845 audit_put_record(zlogp, uc, rval, "reboot");
1846 if (rval != 0) {
1847 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1848 zstate);
1849 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1850 }
1851 boot_args[0] = '\0';
1852 break;
1853 case Z_SHUTDOWN:
1854 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1855 wait_shut = B_TRUE;
1856 }
1857 break;
1858 case Z_NOTE_UNINSTALLING:
1859 case Z_MOUNT:
1860 case Z_UNMOUNT:
1861 zerror(zlogp, B_FALSE, "%s operation is invalid "
1862 "for zones in state '%s'", z_cmd_name(cmd),
1863 zone_state_str(zstate));
1864 rval = -1;
1865 break;
1866 }
1867 break;
1868 default:
1869 abort();
1870 }
1871
1872 /*
1873 * Because the state of the zone may have changed, we make sure
1874 * to wake the console poller, which is in charge of initiating
1875 * the shutdown procedure as necessary.
1876 */
1877 eventstream_write(Z_EVT_NULL);
1878
1879 out:
1880 (void) mutex_unlock(&lock);
1881
1882 /* Wait for the Z_SHUTDOWN commands to complete */
1883 if (wait_shut)
1884 rval = zone_wait_shutdown(zlogp);
1885
1886 if (kernelcall) {
1887 rvalp = NULL;
1888 rlen = 0;
1889 } else {
1890 rvalp->rval = rval;
1891 }
1892 if (uc != NULL)
1893 ucred_free(uc);
1894 (void) door_return((char *)rvalp, rlen, NULL, 0);
1895 thr_exit(NULL);
1896 }
1897
1898 static int
1899 setup_door(zlog_t *zlogp)
1900 {
1901 if ((zone_door = door_create(server, NULL,
1902 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1903 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1904 return (-1);
1905 }
1906 (void) fdetach(zone_door_path);
1907
1908 if (fattach(zone_door, zone_door_path) != 0) {
1909 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1910 (void) door_revoke(zone_door);
1911 (void) fdetach(zone_door_path);
1912 zone_door = -1;
1913 return (-1);
1914 }
1915 return (0);
1916 }
1917
1918 /*
1919 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1920 * is where zoneadmd itself will check to see that another instance of
1921 * zoneadmd isn't already controlling this zone.
1922 *
1923 * The idea here is that we want to open the path to which we will
1924 * attach our door, lock it, and then make sure that no-one has beat us
1925 * to fattach(3c)ing onto it.
1926 *
1927 * fattach(3c) is really a mount, so there are actually two possible
1928 * vnodes we could be dealing with. Our strategy is as follows:
1929 *
1930 * - If the file we opened is a regular file (common case):
1931 * There is no fattach(3c)ed door, so we have a chance of becoming
1932 * the managing zoneadmd. We attempt to lock the file: if it is
1933 * already locked, that means someone else raced us here, so we
1934 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1935 * that beat us to it.
1936 *
1937 * - If the file we opened is a namefs file:
1938 * This means there is already an established door fattach(3c)'ed
1939 * to the rendezvous path. We've lost the race, so we give up.
1940 * Note that in this case we also try to grab the file lock, and
1941 * will succeed in acquiring it since the vnode locked by the
1942 * "winning" zoneadmd was a regular one, and the one we locked was
1943 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1944 * we just return to zoneadm(1m) which knows to retry.
1945 */
1946 static int
1947 make_daemon_exclusive(zlog_t *zlogp)
1948 {
1949 int doorfd = -1;
1950 int err, ret = -1;
1951 struct stat st;
1952 struct flock flock;
1953 zone_state_t zstate;
1954
1955 top:
1956 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1957 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1958 zonecfg_strerror(err));
1959 goto out;
1960 }
1961 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1962 S_IREAD|S_IWRITE)) < 0) {
1963 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1964 goto out;
1965 }
1966 if (fstat(doorfd, &st) < 0) {
1967 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1968 goto out;
1969 }
1970 /*
1971 * Lock the file to synchronize with other zoneadmd
1972 */
1973 flock.l_type = F_WRLCK;
1974 flock.l_whence = SEEK_SET;
1975 flock.l_start = (off_t)0;
1976 flock.l_len = (off_t)0;
1977 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1978 /*
1979 * Someone else raced us here and grabbed the lock file
1980 * first. A warning here is inappropriate since nothing
1981 * went wrong.
1982 */
1983 goto out;
1984 }
1985
1986 if (strcmp(st.st_fstype, "namefs") == 0) {
1987 struct door_info info;
1988
1989 /*
1990 * There is already something fattach()'ed to this file.
1991 * Lets see what the door is up to.
1992 */
1993 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1994 /*
1995 * Another zoneadmd process seems to be in
1996 * control of the situation and we don't need to
1997 * be here. A warning here is inappropriate
1998 * since nothing went wrong.
1999 *
2000 * If the door has been revoked, the zoneadmd
2001 * process currently managing the zone is going
2002 * away. We'll return control to zoneadm(1m)
2003 * which will try again (by which time zoneadmd
2004 * will hopefully have exited).
2005 */
2006 goto out;
2007 }
2008
2009 /*
2010 * If we got this far, there's a fattach(3c)'ed door
2011 * that belongs to a process that has exited, which can
2012 * happen if the previous zoneadmd died unexpectedly.
2013 *
2014 * Let user know that something is amiss, but that we can
2015 * recover; if the zone is in the installed state, then don't
2016 * message, since having a running zoneadmd isn't really
2017 * expected/needed. We want to keep occurences of this message
2018 * limited to times when zoneadmd is picking back up from a
2019 * zoneadmd that died while the zone was in some non-trivial
2020 * state.
2021 */
2022 if (zstate > ZONE_STATE_INSTALLED) {
2023 static zoneid_t zid;
2024
2025 zerror(zlogp, B_FALSE,
2026 "zone '%s': WARNING: zone is in state '%s', but "
2027 "zoneadmd does not appear to be available; "
2028 "restarted zoneadmd to recover.",
2029 zone_name, zone_state_str(zstate));
2030
2031 /*
2032 * Startup a thread to perform the zfd logging/tty svc
2033 * and a thread to perform memory capping for the
2034 * zone. zlogp won't be valid for much longer so use
2035 * logsys.
2036 */
2037 if ((zid = getzoneidbyname(zone_name)) != -1) {
2038 create_log_thread(&logsys, zid);
2039 create_mcap_thread(&logsys, zid);
2040 }
2041
2042 /* recover the global configuration snapshot */
2043 if (snap_hndl == NULL) {
2044 if ((snap_hndl = zonecfg_init_handle())
2045 == NULL ||
2046 zonecfg_create_snapshot(zone_name)
2047 != Z_OK ||
2048 zonecfg_get_snapshot_handle(zone_name,
2049 snap_hndl) != Z_OK) {
2050 zerror(zlogp, B_FALSE, "recovering "
2051 "zone configuration handle");
2052 goto out;
2053 }
2054 }
2055 }
2056
2057 (void) fdetach(zone_door_path);
2058 (void) close(doorfd);
2059 goto top;
2060 }
2061 ret = 0;
2062 out:
2063 (void) close(doorfd);
2064 return (ret);
2065 }
2066
2067 /*
2068 * Setup the brand's pre and post state change callbacks, as well as the
2069 * query callback, if any of these exist.
2070 */
2071 static int
2072 brand_callback_init(brand_handle_t bh, char *zone_name)
2073 {
2074 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2075 sizeof (pre_statechg_hook));
2076
2077 if (brand_get_prestatechange(bh, zone_name, zonepath,
2078 pre_statechg_hook + EXEC_LEN,
2079 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2080 return (-1);
2081
2082 if (strlen(pre_statechg_hook) <= EXEC_LEN)
2083 pre_statechg_hook[0] = '\0';
2084
2085 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2086 sizeof (post_statechg_hook));
2087
2088 if (brand_get_poststatechange(bh, zone_name, zonepath,
2089 post_statechg_hook + EXEC_LEN,
2090 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
2091 return (-1);
2092
2093 if (strlen(post_statechg_hook) <= EXEC_LEN)
2094 post_statechg_hook[0] = '\0';
2095
2096 (void) strlcpy(query_hook, EXEC_PREFIX,
2097 sizeof (query_hook));
2098
2099 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
2100 sizeof (query_hook) - EXEC_LEN) != 0)
2101 return (-1);
2102
2103 if (strlen(query_hook) <= EXEC_LEN)
2104 query_hook[0] = '\0';
2105
2106 return (0);
2107 }
2108
2109 int
2110 main(int argc, char *argv[])
2111 {
2112 int opt;
2113 zoneid_t zid;
2114 priv_set_t *privset;
2115 zone_state_t zstate;
2116 char parents_locale[MAXPATHLEN];
2117 brand_handle_t bh;
2118 int err;
2119
2120 pid_t pid;
2121 sigset_t blockset;
2122 sigset_t block_cld;
2123
2124 struct {
2125 sema_t sem;
2126 int status;
2127 zlog_t log;
2128 } *shstate;
2129 size_t shstatelen = getpagesize();
2130
2131 zlog_t errlog;
2132 zlog_t *zlogp;
2133
2134 int ctfd;
2135
2136 progname = get_execbasename(argv[0]);
2137
2138 /*
2139 * Make sure stderr is unbuffered
2140 */
2141 (void) setbuffer(stderr, NULL, 0);
2142
2143 /*
2144 * Get out of the way of mounted filesystems, since we will daemonize
2145 * soon.
2146 */
2147 (void) chdir("/");
2148
2149 /*
2150 * Use the default system umask per PSARC 1998/110 rather than
2151 * anything that may have been set by the caller.
2152 */
2153 (void) umask(CMASK);
2154
2155 /*
2156 * Initially we want to use our parent's locale.
2157 */
2158 (void) setlocale(LC_ALL, "");
2159 (void) textdomain(TEXT_DOMAIN);
2160 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2161 sizeof (parents_locale));
2162
2163 /*
2164 * This zlog_t is used for writing to stderr
2165 */
2166 errlog.logfile = stderr;
2167 errlog.buflen = errlog.loglen = 0;
2168 errlog.buf = errlog.log = NULL;
2169 errlog.locale = parents_locale;
2170
2171 /*
2172 * We start off writing to stderr until we're ready to daemonize.
2173 */
2174 zlogp = &errlog;
2175
2176 /*
2177 * Process options.
2178 */
2179 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2180 switch (opt) {
2181 case 'R':
2182 zonecfg_set_root(optarg);
2183 break;
2184 case 'z':
2185 zone_name = optarg;
2186 break;
2187 default:
2188 usage();
2189 }
2190 }
2191
2192 if (zone_name == NULL)
2193 usage();
2194
2195 /*
2196 * Because usage() prints directly to stderr, it has gettext()
2197 * wrapping, which depends on the locale. But since zerror() calls
2198 * localize() which tweaks the locale, it is not safe to call zerror()
2199 * until after the last call to usage(). Fortunately, the last call
2200 * to usage() is just above and the first call to zerror() is just
2201 * below. Don't mess this up.
2202 */
2203 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2204 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2205 GLOBAL_ZONENAME);
2206 return (1);
2207 }
2208
2209 if (zone_get_id(zone_name, &zid) != 0) {
2210 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2211 zonecfg_strerror(Z_NO_ZONE));
2212 return (1);
2213 }
2214
2215 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2216 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2217 zonecfg_strerror(err));
2218 return (1);
2219 }
2220 if (zstate < ZONE_STATE_INCOMPLETE) {
2221 zerror(zlogp, B_FALSE,
2222 "cannot manage a zone which is in state '%s'",
2223 zone_state_str(zstate));
2224 return (1);
2225 }
2226
2227 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2228 zerror(zlogp, B_FALSE, "unable to determine zone path");
2229 return (-1);
2230 }
2231
2232 if (zonecfg_default_brand(default_brand,
2233 sizeof (default_brand)) != Z_OK) {
2234 zerror(zlogp, B_FALSE, "unable to determine default brand");
2235 return (1);
2236 }
2237
2238 /* Get a handle to the brand info for this zone */
2239 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2240 != Z_OK) {
2241 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2242 return (1);
2243 }
2244 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2245 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2246
2247 /*
2248 * In the alternate root environment, the only supported
2249 * operations are mount and unmount. In this case, just treat
2250 * the zone as native if it is cluster. Cluster zones can be
2251 * native for the purpose of LU or upgrade, and the cluster
2252 * brand may not exist in the miniroot (such as in net install
2253 * upgrade).
2254 */
2255 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2256 zone_iscluster = B_TRUE;
2257 if (zonecfg_in_alt_root()) {
2258 (void) strlcpy(brand_name, default_brand,
2259 sizeof (brand_name));
2260 }
2261 } else {
2262 zone_iscluster = B_FALSE;
2263 }
2264
2265 if ((bh = brand_open(brand_name)) == NULL) {
2266 zerror(zlogp, B_FALSE, "unable to open zone brand");
2267 return (1);
2268 }
2269
2270 /* Get state change brand hooks. */
2271 if (brand_callback_init(bh, zone_name) == -1) {
2272 zerror(zlogp, B_TRUE,
2273 "failed to initialize brand state change hooks");
2274 brand_close(bh);
2275 return (1);
2276 }
2277
2278 brand_close(bh);
2279
2280 /*
2281 * Check that we have all privileges. It would be nice to pare
2282 * this down, but this is at least a first cut.
2283 */
2284 if ((privset = priv_allocset()) == NULL) {
2285 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2286 return (1);
2287 }
2288
2289 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2290 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2291 priv_freeset(privset);
2292 return (1);
2293 }
2294
2295 if (priv_isfullset(privset) == B_FALSE) {
2296 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2297 "run this command (all privs required)");
2298 priv_freeset(privset);
2299 return (1);
2300 }
2301 priv_freeset(privset);
2302
2303 if (mkzonedir(zlogp) != 0)
2304 return (1);
2305
2306 /*
2307 * Pre-fork: setup shared state
2308 */
2309 if ((shstate = (void *)mmap(NULL, shstatelen,
2310 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2311 MAP_FAILED) {
2312 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2313 return (1);
2314 }
2315 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2316 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2317 (void) munmap((char *)shstate, shstatelen);
2318 return (1);
2319 }
2320 shstate->log.logfile = NULL;
2321 shstate->log.buflen = shstatelen - sizeof (*shstate);
2322 shstate->log.loglen = shstate->log.buflen;
2323 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2324 shstate->log.log = shstate->log.buf;
2325 shstate->log.locale = parents_locale;
2326 shstate->status = -1;
2327
2328 /*
2329 * We need a SIGCHLD handler so the sema_wait() below will wake
2330 * up if the child dies without doing a sema_post().
2331 */
2332 (void) sigset(SIGCHLD, sigchld);
2333 /*
2334 * We must mask SIGCHLD until after we've coped with the fork
2335 * sufficiently to deal with it; otherwise we can race and
2336 * receive the signal before pid has been initialized
2337 * (yes, this really happens).
2338 */
2339 (void) sigemptyset(&block_cld);
2340 (void) sigaddset(&block_cld, SIGCHLD);
2341 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2342
2343 /*
2344 * The parent only needs stderr after the fork, so close other fd's
2345 * that we inherited from zoneadm so that the parent doesn't have those
2346 * open while waiting. The child will close the rest after the fork.
2347 */
2348 closefrom(3);
2349
2350 if ((ctfd = init_template()) == -1) {
2351 zerror(zlogp, B_TRUE, "failed to create contract");
2352 return (1);
2353 }
2354
2355 /*
2356 * Do not let another thread localize a message while we are forking.
2357 */
2358 (void) mutex_lock(&msglock);
2359 pid = fork();
2360 (void) mutex_unlock(&msglock);
2361
2362 /*
2363 * In all cases (parent, child, and in the event of an error) we
2364 * don't want to cause creation of contracts on subsequent fork()s.
2365 */
2366 (void) ct_tmpl_clear(ctfd);
2367 (void) close(ctfd);
2368
2369 if (pid == -1) {
2370 zerror(zlogp, B_TRUE, "could not fork");
2371 return (1);
2372
2373 } else if (pid > 0) { /* parent */
2374 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2375 /*
2376 * This marks a window of vulnerability in which we receive
2377 * the SIGCLD before falling into sema_wait (normally we would
2378 * get woken up from sema_wait with EINTR upon receipt of
2379 * SIGCLD). So we may need to use some other scheme like
2380 * sema_posting in the sigcld handler.
2381 * blech
2382 */
2383 (void) sema_wait(&shstate->sem);
2384 (void) sema_destroy(&shstate->sem);
2385 if (shstate->status != 0)
2386 (void) waitpid(pid, NULL, WNOHANG);
2387 /*
2388 * It's ok if we die with SIGPIPE. It's not like we could have
2389 * done anything about it.
2390 */
2391 (void) fprintf(stderr, "%s", shstate->log.buf);
2392 _exit(shstate->status == 0 ? 0 : 1);
2393 }
2394
2395 /*
2396 * The child charges on.
2397 */
2398 (void) sigset(SIGCHLD, SIG_DFL);
2399 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2400
2401 /*
2402 * SIGPIPE can be delivered if we write to a socket for which the
2403 * peer endpoint is gone. That can lead to too-early termination
2404 * of zoneadmd, and that's not good eats.
2405 */
2406 (void) sigset(SIGPIPE, SIG_IGN);
2407 /*
2408 * Stop using stderr
2409 */
2410 zlogp = &shstate->log;
2411
2412 /*
2413 * We don't need stdout/stderr from now on.
2414 */
2415 closefrom(0);
2416
2417 /*
2418 * Initialize the syslog zlog_t. This needs to be done after
2419 * the call to closefrom().
2420 */
2421 logsys.buf = logsys.log = NULL;
2422 logsys.buflen = logsys.loglen = 0;
2423 logsys.logfile = NULL;
2424 logsys.locale = DEFAULT_LOCALE;
2425
2426 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2427
2428 /*
2429 * The eventstream is used to publish state changes in the zone
2430 * from the door threads to the console I/O poller.
2431 */
2432 if (eventstream_init() == -1) {
2433 zerror(zlogp, B_TRUE, "unable to create eventstream");
2434 goto child_out;
2435 }
2436
2437 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2438 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2439
2440 /*
2441 * See if another zoneadmd is running for this zone. If not, then we
2442 * can now modify system state.
2443 */
2444 if (make_daemon_exclusive(zlogp) == -1)
2445 goto child_out;
2446
2447
2448 /*
2449 * Create/join a new session; we need to be careful of what we do with
2450 * the console from now on so we don't end up being the session leader
2451 * for the terminal we're going to be handing out.
2452 */
2453 (void) setsid();
2454
2455 /*
2456 * This thread shouldn't be receiving any signals; in particular,
2457 * SIGCHLD should be received by the thread doing the fork().
2458 */
2459 (void) sigfillset(&blockset);
2460 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2461
2462 /*
2463 * Setup the console device and get ready to serve the console;
2464 * once this has completed, we're ready to let console clients
2465 * make an attempt to connect (they will block until
2466 * serve_console_sock() below gets called, and any pending
2467 * connection is accept()ed).
2468 */
2469 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2470 goto child_out;
2471
2472 /*
2473 * Take the lock now, so that when the door server gets going, we
2474 * are guaranteed that it won't take a request until we are sure
2475 * that everything is completely set up. See the child_out: label
2476 * below to see why this matters.
2477 */
2478 (void) mutex_lock(&lock);
2479
2480 /* Init semaphore for scratch zones. */
2481 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2482 zerror(zlogp, B_TRUE,
2483 "failed to initialize semaphore for scratch zone");
2484 goto child_out;
2485 }
2486
2487 /* open the dladm handle */
2488 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2489 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2490 goto child_out;
2491 }
2492
2493 /*
2494 * Note: door setup must occur *after* the console is setup.
2495 * This is so that as zlogin tests the door to see if zoneadmd
2496 * is ready yet, we know that the console will get serviced
2497 * once door_info() indicates that the door is "up".
2498 */
2499 if (setup_door(zlogp) == -1)
2500 goto child_out;
2501
2502 /*
2503 * Things seem OK so far; tell the parent process that we're done
2504 * with setup tasks. This will cause the parent to exit, signalling
2505 * to zoneadm, zlogin, or whatever forked it that we are ready to
2506 * service requests.
2507 */
2508 shstate->status = 0;
2509 (void) sema_post(&shstate->sem);
2510 (void) munmap((char *)shstate, shstatelen);
2511 shstate = NULL;
2512
2513 (void) mutex_unlock(&lock);
2514
2515 /*
2516 * zlogp is now invalid, so reset it to the syslog logger.
2517 */
2518 zlogp = &logsys;
2519
2520 /*
2521 * Now that we are free of any parents, switch to the default locale.
2522 */
2523 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2524
2525 /*
2526 * At this point the setup portion of main() is basically done, so
2527 * we reuse this thread to manage the zone console. When
2528 * serve_console() has returned, we are past the point of no return
2529 * in the life of this zoneadmd.
2530 */
2531 if (zonecfg_in_alt_root()) {
2532 /*
2533 * This is just awful, but mounted scratch zones don't (and
2534 * can't) have consoles. We just wait for unmount instead.
2535 */
2536 while (sema_wait(&scratch_sem) == EINTR)
2537 ;
2538 } else {
2539 serve_console(zlogp);
2540 assert(in_death_throes);
2541 }
2542
2543 /*
2544 * This is the next-to-last part of the exit interlock. Upon calling
2545 * fdetach(), the door will go unreferenced; once any
2546 * outstanding requests (like the door thread doing Z_HALT) are
2547 * done, the door will get an UNREF notification; when it handles
2548 * the UNREF, the door server will cause the exit. It's possible
2549 * that fdetach() can fail because the file is in use, in which
2550 * case we'll retry the operation.
2551 */
2552 assert(!MUTEX_HELD(&lock));
2553 for (;;) {
2554 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2555 break;
2556 yield();
2557 }
2558
2559 for (;;)
2560 (void) pause();
2561
2562 child_out:
2563 assert(pid == 0);
2564 if (shstate != NULL) {
2565 shstate->status = -1;
2566 (void) sema_post(&shstate->sem);
2567 (void) munmap((char *)shstate, shstatelen);
2568 }
2569
2570 /*
2571 * This might trigger an unref notification, but if so,
2572 * we are still holding the lock, so our call to exit will
2573 * ultimately win the race and will publish the right exit
2574 * code.
2575 */
2576 if (zone_door != -1) {
2577 assert(MUTEX_HELD(&lock));
2578 (void) door_revoke(zone_door);
2579 (void) fdetach(zone_door_path);
2580 }
2581
2582 if (dld_handle != NULL)
2583 dladm_close(dld_handle);
2584
2585 return (1); /* return from main() forcibly exits an MT process */
2586 }