Print this page
OS-5330 zoneadm mounting an lx or joyent branded zone fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
(NOTE: Manual port, because of divergence from SmartOS.)
OS-3831 lxbrand /proc/cmdline should reflect zone boot arguments
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
Remove most KEBE comments and accompanying unused code or variables/fields.
Merge cleanup from previous six commits
OS-200 need a better mechanism for storing persistent zone_did
OS-2564 zone boot failed: could not start zoneadmd
OS-1763 mount of /etc/svc/volatile failed: Device busy
OS-511 make zonecfg device resource extensible, like the net resource
OS-224 add more zonecfg net properties
Reduce lint
Add zfd.c to zoneadmd's Makefile, a bit more not-yet ifdef-out.
zoneadmd mismerge (we don't support debug yet)
OS-4932 zoneadm boot args not passed to lx init
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4781 would like to be able to add CT_PR_EV_EXIT to fatal event set of current contract
OS-4253 lxbrand ubuntu 15.04 won't boot because /sbin/init is a symlink
OS-3524 in order to support interaction with docker containers, need to be able to connect to stdio for init from GZ
OS-3525 in order to support 'docker logs' need to be able to get stdio from zone to log file
OS-3429 Expose zone's init exit status
OS-3342 dlmgmtd needs to be mindful of lock ordering
OS-2608 dlmgmtd needs to record zone identifiers
OS-3492 zone_free asserts to its destruction when dlmgmtd has fallen
OS-3494 zoneadmd tears down networking too soon when boot fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3077 restarted zoneadmd uses invalid zlogp
OS-3075 zone long boot args aren't passed through
OS-11 rcapd behaves poorly when under extreme load
@@ -20,10 +20,11 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
* zoneadmd manages zones; one zoneadmd process is launched for each
* non-global zone on the system. This daemon juggles four jobs:
@@ -66,10 +67,11 @@
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/time.h>
#include <bsm/adt.h>
#include <bsm/adt_event.h>
#include <alloca.h>
@@ -106,10 +108,12 @@
#include <zonestat_impl.h>
#include "zoneadmd.h"
static char *progname;
char *zone_name; /* zone which we are managing */
+zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
+char zonepath[MAXNAMELEN];
char pool_name[MAXNAMELEN];
char default_brand[MAXNAMELEN];
char brand_name[MAXNAMELEN];
boolean_t zone_isnative;
boolean_t zone_iscluster;
@@ -139,10 +143,13 @@
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
#endif
#define DEFAULT_LOCALE "C"
+#define RSRC_NET "net"
+#define RSRC_DEV "device"
+
static const char *
z_cmd_name(zone_cmd_t zcmd)
{
/* This list needs to match the enum in sys/zone.h */
static const char *zcmdstr[] = {
@@ -255,38 +262,47 @@
zlogp->loglen -= copylen;
}
}
/*
- * Emit a warning for any boot arguments which are unrecognized. Since
- * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
+ * Append src to dest, modifying dest in the process. Prefix src with
+ * a space character if dest is a non-empty string.
+ */
+static void
+strnappend(char *dest, size_t n, const char *src)
+{
+ (void) snprintf(dest, n, "%s%s%s", dest,
+ dest[0] == '\0' ? "" : " ", src);
+}
+
+/*
+ * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
* put the arguments into an argv style array, use getopt to process them,
- * and put the resultant argument string back into outargs.
+ * and put the resultant argument string back into outargs. Non-native brands
+ * may support alternate forms of boot arguments so we must handle that as well.
*
* During the filtering, we pull out any arguments which are truly "boot"
* arguments, leaving only those which are to be passed intact to the
* progenitor process. The one we support at the moment is -i, which
* indicates to the kernel which program should be launched as 'init'.
*
- * A return of Z_INVAL indicates specifically that the arguments are
- * not valid; this is a non-fatal error. Except for Z_OK, all other return
- * values are treated as fatal.
+ * Except for Z_OK, all other return values are treated as fatal.
*/
static int
filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
- char *init_file, char *badarg)
+ char *init_file)
{
int argc = 0, argc_save;
int i;
int err;
char *arg, *lasts, **argv = NULL, **argv_save;
char zonecfg_args[BOOTARGS_MAX];
char scratchargs[BOOTARGS_MAX], *sargs;
+ char scratchopt[3];
char c;
bzero(outargs, BOOTARGS_MAX);
- bzero(badarg, BOOTARGS_MAX);
/*
* If the user didn't specify transient boot arguments, check
* to see if there were any specified in the zone configuration,
* and use them if applicable.
@@ -345,26 +361,34 @@
}
i++;
}
/*
- * We preserve compatibility with the Solaris system boot behavior,
+ * We preserve compatibility with the illumos system boot behavior,
* which allows:
*
* # reboot kernel/unix -s -m verbose
*
- * In this example, kernel/unix tells the booter what file to
- * boot. We don't want reboot in a zone to be gratuitously different,
- * so we silently ignore the boot file, if necessary.
+ * In this example, kernel/unix tells the booter what file to boot. The
+ * original intent of this was that we didn't want reboot in a zone to
+ * be gratuitously different, so we would silently ignore the boot
+ * file, if necessary. However, this usage is archaic and has never
+ * been common, since it is impossible to boot a zone onto a different
+ * kernel. Ignoring the first argument breaks for non-native brands
+ * which pass boot arguments in a different style. e.g.
+ * systemd.log_level=debug
+ * Thus, for backward compatibility we only ignore the first argument
+ * if it appears to be in the illumos form and attempting to specify a
+ * kernel.
*/
if (argv[0] == NULL)
goto done;
assert(argv[0][0] != ' ');
assert(argv[0][0] != '\t');
- if (argv[0][0] != '-' && argv[0][0] != '\0') {
+ if (strncmp(argv[0], "kernel/", 7) == 0) {
argv = &argv[1];
argc--;
}
optind = 0;
@@ -383,46 +407,40 @@
/* This has already been processed by zoneadm */
break;
case 'm':
case 's':
/* These pass through unmolested */
- (void) snprintf(outargs, BOOTARGS_MAX,
- "%s -%c %s ", outargs, c, optarg ? optarg : "");
+ (void) snprintf(scratchopt, sizeof (scratchopt),
+ "-%c", c);
+ strnappend(outargs, BOOTARGS_MAX, scratchopt);
+ if (optarg != NULL)
+ strnappend(outargs, BOOTARGS_MAX, optarg);
break;
case '?':
/*
- * We warn about unknown arguments but pass them
- * along anyway-- if someone wants to develop their
- * own init replacement, they can pass it whatever
- * args they want.
+ * If a brand has its own init, we need to pass along
+ * whatever the user provides. We use the entire
+ * unknown string here so that we correctly handle
+ * unknown long options (e.g. --debug).
*/
- err = Z_INVAL;
- (void) snprintf(outargs, BOOTARGS_MAX,
- "%s -%c", outargs, optopt);
- (void) snprintf(badarg, BOOTARGS_MAX,
- "%s -%c", badarg, optopt);
+ strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]);
break;
}
}
/*
- * For Solaris Zones we warn about and discard non-option arguments.
- * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
- * to the kernel, we concat up all the other remaining boot args.
- * and warn on them as a group.
+ * We need to pass along everything else since we don't know what
+ * the brand's init is expecting. For example, an argument list like:
+ * --confdir /foo --debug
+ * will cause the getopt parsing to stop at '/foo' but we need to pass
+ * that on, along with the '--debug'. This does mean that we require
+ * any of our known options (-ifms) to preceed the brand-specific ones.
*/
- if (optind < argc) {
- err = Z_INVAL;
while (optind < argc) {
- (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
- badarg, strlen(badarg) > 0 ? " " : "",
- argv[optind]);
+ strnappend(outargs, BOOTARGS_MAX, argv[optind]);
optind++;
}
- zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
- "arguments `%s'.", badarg);
- }
done:
for (i = 0; i < argc_save; i++) {
if (argv_save[i] != NULL)
free(argv_save[i]);
@@ -535,11 +553,12 @@
static int
zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
{
int err;
- if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
+ if (!ALT_MOUNT(mount_cmd) &&
+ brand_prestatechg(zlogp, zstate, Z_READY) != 0)
return (-1);
if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
zonecfg_strerror(err));
@@ -559,20 +578,22 @@
zerror(zlogp, B_FALSE, "destroying snapshot: %s",
zonecfg_strerror(err));
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
+ if (!ALT_MOUNT(mount_cmd) &&
+ brand_poststatechg(zlogp, zstate, Z_READY) != 0)
goto bad;
return (0);
bad:
/*
* If something goes wrong, we up the zones's state to the target
* state, READY, and then invoke the hook as if we're halting.
*/
+ if (!ALT_MOUNT(mount_cmd))
(void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
return (-1);
}
int
@@ -621,19 +642,12 @@
int rv;
ctid_t ct;
/* determine the zone rootpath */
if (mount_cmd) {
- char zonepath[MAXPATHLEN];
char luroot[MAXPATHLEN];
- if (zone_get_zonepath(zone_name,
- zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- return (-1);
- }
-
(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
resolve_lofs(zlogp, luroot, sizeof (luroot));
(void) strlcpy(rootpath, luroot, sizeof (rootpath));
} else {
if (zone_get_rootpath(zone_name,
@@ -684,10 +698,12 @@
} else if (child == 0) { /* child */
char opt_buf[MAX_MNTOPT_STR];
int optlen = 0;
int mflag = MS_DATA;
+ int i;
+ int ret;
(void) ct_tmpl_clear(tmpl_fd);
/*
* Even though there are no procs running in the zone, we
* do this for paranoia's sake.
@@ -711,14 +727,31 @@
(void) strlcpy(opt_buf, opt, sizeof (opt_buf));
opt = opt_buf;
optlen = MAX_MNTOPT_STR;
mflag = MS_OPTIONSTR;
}
- if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
- _exit(errno);
- _exit(0);
+
+ /*
+ * There is an obscure race condition which can cause mount
+ * to return EBUSY. This happens for example on the mount
+ * of the zone's /etc/svc/volatile file system if there is
+ * a GZ process running svcs -Z, which will touch the
+ * mountpoint, just as we're trying to do the mount. To cope
+ * with this, we retry up to 3 times to let this transient
+ * process get out of the way.
+ */
+ for (i = 0; i < 3; i++) {
+ ret = 0;
+ if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
+ optlen) != 0)
+ ret = errno;
+ if (ret != EBUSY)
+ break;
+ (void) sleep(1);
}
+ _exit(ret);
+ }
/* parent */
if (contract_latest(&ct) == -1)
ct = -1;
(void) ct_tmpl_clear(tmpl_fd);
@@ -737,10 +770,119 @@
return (0);
}
/*
+ * env variable name format
+ * _ZONECFG;{resource name};{identifying attr. name};{property name}
+ */
+static void
+set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
+{
+ char *p;
+ /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
+ char nm[2 * MAXNAMELEN + 32];
+
+ if (attr == NULL)
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
+ name);
+ else
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
+ attr, name);
+
+ p = nm;
+ while ((p = strchr(p, '-')) != NULL)
+ *p++ = '_';
+
+ (void) setenv(nm, val, 1);
+}
+
+/*
+ * Export zonecfg network and device properties into environment for the boot
+ * and state change hooks.
+ * If debug is true, export the brand hook debug env. variable as well.
+ *
+ * We could export more of the config in the future, as necessary.
+ */
+static int
+setup_subproc_env()
+{
+ int res;
+ zone_dochandle_t handle;
+ struct zone_nwiftab ntab;
+ struct zone_devtab dtab;
+ char net_resources[MAXNAMELEN * 2];
+ char dev_resources[MAXNAMELEN * 2];
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ exit(Z_NOMEM);
+
+ if ((res = zonecfg_get_handle(zone_name, handle)) != Z_OK)
+ goto done;
+
+ if ((res = zonecfg_setnwifent(handle)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getnwifent(handle, &ntab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *phys;
+
+ phys = ntab.zone_nwif_physical;
+
+ (void) strlcat(net_resources, phys, sizeof (net_resources));
+ (void) strlcat(net_resources, " ", sizeof (net_resources));
+
+ set_zonecfg_env(RSRC_NET, phys, "physical", phys);
+
+ set_zonecfg_env(RSRC_NET, phys, "address",
+ ntab.zone_nwif_address);
+ set_zonecfg_env(RSRC_NET, phys, "allowed-address",
+ ntab.zone_nwif_allowed_address);
+ set_zonecfg_env(RSRC_NET, phys, "defrouter",
+ ntab.zone_nwif_defrouter);
+ set_zonecfg_env(RSRC_NET, phys, "global-nic",
+ ntab.zone_nwif_gnic);
+ set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
+ set_zonecfg_env(RSRC_NET, phys, "vlan-id",
+ ntab.zone_nwif_vlan_id);
+
+ for (rap = ntab.zone_nwif_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
+ rap->zone_res_attr_value);
+ }
+
+ (void) zonecfg_endnwifent(handle);
+
+ if ((res = zonecfg_setdevent(handle)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getdevent(handle, &dtab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *match;
+
+ match = dtab.zone_dev_match;
+
+ (void) strlcat(dev_resources, match, sizeof (dev_resources));
+ (void) strlcat(dev_resources, " ", sizeof (dev_resources));
+
+ for (rap = dtab.zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_DEV, match,
+ rap->zone_res_attr_name, rap->zone_res_attr_value);
+ }
+
+ (void) zonecfg_enddevent(handle);
+
+ res = Z_OK;
+
+done:
+ zonecfg_fini_handle(handle);
+ return (res);
+}
+
+/*
* If retstr is not NULL, the output of the subproc is returned in the str,
* otherwise it is output using zerror(). Any memory allocated for retstr
* should be freed by the caller.
*/
int
@@ -761,10 +903,15 @@
rd_cnt = 0;
} else {
inbuf = buf;
}
+ if (setup_subproc_env() != Z_OK) {
+ zerror(zlogp, B_FALSE, "failed to setup environment");
+ return (-1);
+ }
+
file = popen(cmdbuf, "r");
if (file == NULL) {
zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
return (-1);
}
@@ -800,26 +947,52 @@
return (-1);
}
return (WEXITSTATUS(status));
}
+/*
+ * Get the app-svc-dependent flag for this zone's init process. This is a
+ * zone-specific attr which controls the type of contract we create for the
+ * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
+ * set, so that when any service which is in the same contract exits, the init
+ * application will be terminated.
+ *
+ * We use the global "snap_hndl", so no parameters get passed here.
+ */
+static boolean_t
+is_app_svc_dep(void)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+ strcmp(a.zone_attr_value, "true") == 0) {
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
static int
zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
{
zoneid_t zoneid;
struct stat st;
- char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
+ char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
char nbootargs[BOOTARGS_MAX];
char cmdbuf[MAXPATHLEN];
fs_callback_t cb;
brand_handle_t bh;
zone_iptype_t iptype;
- boolean_t links_loaded = B_FALSE;
dladm_status_t status;
char errmsg[DLADM_STRSIZE];
int err;
boolean_t restart_init;
+ boolean_t app_svc_dep;
if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
return (-1);
if ((zoneid = getzoneidbyname(zone_name)) == -1) {
@@ -850,17 +1023,12 @@
}
/*
* Get the brand's boot callback if it exists.
*/
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- brand_close(bh);
- goto bad;
- }
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) != 0) {
zerror(zlogp, B_FALSE,
"unable to determine branded zone's boot callback");
brand_close(bh);
goto bad;
@@ -875,35 +1043,45 @@
}
/* See if this zone's brand should restart init if it dies. */
restart_init = brand_restartinit(bh);
+ /*
+ * See if we need to setup contract dependencies between the zone's
+ * primary application and any of its services.
+ */
+ app_svc_dep = is_app_svc_dep();
+
brand_close(bh);
- err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
- bad_boot_arg);
- if (err == Z_INVAL)
- eventstream_write(Z_EVT_ZONE_BADARGS);
- else if (err != Z_OK)
+ err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
+ if (err != Z_OK)
goto bad;
assert(init_file[0] != '\0');
- /* Try to anticipate possible problems: Make sure init is executable. */
- if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
+ /*
+ * Try to anticipate possible problems: If possible, make sure init is
+ * executable.
+ */
+ if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine zone root");
goto bad;
}
- (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
+ (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
- if (stat(initpath, &st) == -1) {
+ if (lstat(initpath, &st) == -1) {
zerror(zlogp, B_TRUE, "could not stat %s", initpath);
goto bad;
}
- if ((st.st_mode & S_IXUSR) == 0) {
+ /*
+ * If a symlink, we'll have to wait and resolve when we boot,
+ * otherwise check the executable bits now.
+ */
+ if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
zerror(zlogp, B_FALSE, "%s is not executable", initpath);
goto bad;
}
/*
@@ -917,11 +1095,10 @@
if (status != DLADM_STATUS_OK) {
zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
" %s", dladm_status2str(status, errmsg));
goto bad;
}
- links_loaded = B_TRUE;
}
/*
* If there is a brand 'boot' callback, execute it now to give the
* brand one last chance to do any additional setup before the zone
@@ -947,10 +1124,16 @@
NULL, 0) == -1) {
zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
goto bad;
}
+ if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
+ (void *)B_TRUE, sizeof (boolean_t)) == -1) {
+ zerror(zlogp, B_TRUE, "could not set zone app-die");
+ goto bad;
+ }
+
/*
* Inform zonestatd of a new zone so that it can install a door for
* the zone to contact it.
*/
notify_zonestatd(zone_id);
@@ -961,44 +1144,58 @@
}
if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
goto bad;
+ /* Startup a thread to perform zfd logging/tty svc for the zone. */
+ create_log_thread(zlogp, zone_id);
+
+ /* Startup a thread to perform memory capping for the zone. */
+ create_mcap_thread(zlogp, zone_id);
+
return (0);
bad:
/*
* If something goes wrong, we up the zones's state to the target
* state, RUNNING, and then invoke the hook as if we're halting.
*/
(void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
- if (links_loaded)
- (void) dladm_zone_halt(dld_handle, zoneid);
+
return (-1);
}
static int
zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
{
int err;
- if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
+ if (unmount_cmd == B_FALSE &&
+ brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
return (-1);
+ /* Shutting down, stop the memcap thread */
+ destroy_mcap_thread();
+
if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
if (!bringup_failure_recovery)
zerror(zlogp, B_FALSE, "unable to destroy zone");
+ destroy_log_thread();
return (-1);
}
+ /* Shut down is done, stop the log thread */
+ destroy_log_thread();
+
+ if (unmount_cmd == B_FALSE &&
+ brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
+ return (-1);
+
if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
zerror(zlogp, B_FALSE, "destroying snapshot: %s",
zonecfg_strerror(err));
- if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
- return (-1);
-
return (0);
}
static int
zone_graceful_shutdown(zlog_t *zlogp)
@@ -1005,11 +1202,10 @@
{
zoneid_t zoneid;
pid_t child;
char cmdbuf[MAXPATHLEN];
brand_handle_t bh = NULL;
- char zpath[MAXPATHLEN];
ctid_t ct;
int tmpl_fd;
int child_status;
if (shutdown_in_progress) {
@@ -1026,22 +1222,16 @@
if ((bh = brand_open(brand_name)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
return (-1);
}
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- brand_close(bh);
- return (-1);
- }
-
/*
* If there is a brand 'shutdown' callback, execute it now to give the
* brand a chance to cleanup any custom configuration.
*/
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
(void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
}
brand_close(bh);
@@ -1175,10 +1365,40 @@
(void) adt_end_session(ah);
}
/*
+ * Log the exit time and status of the zone's init process into
+ * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
+ * be -1, otherwise it will be the exit status as described in wait.3c.
+ * If the zone is configured to restart init, then nothing will be logged if
+ * init exits unexpectedly (the kernel will never upcall in this case).
+ */
+static void
+log_init_exit(int status)
+{
+ char p[MAXPATHLEN];
+ char buf[128];
+ struct timeval t;
+ int fd;
+
+ if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
+ return;
+ if (gettimeofday(&t, NULL) != 0)
+ return;
+ if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
+ status) > sizeof (buf))
+ return;
+ if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
+ return;
+
+ (void) write(fd, buf, strlen(buf));
+
+ (void) close(fd);
+}
+
+/*
* The main routine for the door server that deals with zone state transitions.
*/
/* ARGSUSED */
static void
server(void *cookie, char *args, size_t alen, door_desc_t *dp,
@@ -1187,10 +1407,11 @@
ucred_t *uc = NULL;
const priv_set_t *eset;
zone_state_t zstate;
zone_cmd_t cmd;
+ int init_status;
zone_cmd_arg_t *zargp;
boolean_t kernelcall;
int rval = -1;
@@ -1239,10 +1460,11 @@
"unexpected (expected %d bytes)", alen,
sizeof (zone_cmd_arg_t));
goto out;
}
cmd = zargp->cmd;
+ init_status = zargp->status;
if (door_ucred(&uc) != 0) {
zerror(&logsys, B_TRUE, "door_ucred");
goto out;
}
@@ -1348,10 +1570,11 @@
switch (cmd) {
case Z_READY:
rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
if (rval == 0)
eventstream_write(Z_EVT_ZONE_READIED);
+ zcons_statechanged();
break;
case Z_BOOT:
case Z_FORCEBOOT:
eventstream_write(Z_EVT_ZONE_BOOTING);
if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
@@ -1358,10 +1581,11 @@
== 0) {
rval = zone_bootup(zlogp, zargp->bootbuf,
zstate);
}
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_FALSE,
zstate);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
@@ -1480,10 +1704,11 @@
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_BOOTING);
rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
zstate);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
@@ -1494,10 +1719,11 @@
if (kernelcall) /* Invalid; can't happen */
abort();
if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
!= 0)
break;
+ zcons_statechanged();
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_SHUTDOWN:
case Z_REBOOT:
case Z_NOTE_UNINSTALLING:
@@ -1541,10 +1767,11 @@
switch (cmd) {
case Z_READY:
if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
!= 0)
break;
+ zcons_statechanged();
if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
eventstream_write(Z_EVT_ZONE_READIED);
else
eventstream_write(Z_EVT_ZONE_HALTED);
break;
@@ -1557,14 +1784,20 @@
*/
zerror(zlogp, B_FALSE, "zone is already booted");
rval = 0;
break;
case Z_HALT:
+ if (kernelcall) {
+ log_init_exit(init_status);
+ } else {
+ log_init_exit(-1);
+ }
if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
!= 0)
break;
eventstream_write(Z_EVT_ZONE_HALTED);
+ zcons_statechanged();
break;
case Z_REBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_REBOOTING);
@@ -1572,12 +1805,13 @@
!= 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- != 0) {
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) !=
+ 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
@@ -1757,17 +1991,44 @@
* limited to times when zoneadmd is picking back up from a
* zoneadmd that died while the zone was in some non-trivial
* state.
*/
if (zstate > ZONE_STATE_INSTALLED) {
+ static zoneid_t zid;
+
zerror(zlogp, B_FALSE,
"zone '%s': WARNING: zone is in state '%s', but "
"zoneadmd does not appear to be available; "
"restarted zoneadmd to recover.",
zone_name, zone_state_str(zstate));
+
+ /*
+ * Startup a thread to perform the zfd logging/tty svc
+ * and a thread to perform memory capping for the
+ * zone. zlogp won't be valid for much longer so use
+ * logsys.
+ */
+ if ((zid = getzoneidbyname(zone_name)) != -1) {
+ create_log_thread(&logsys, zid);
+ create_mcap_thread(&logsys, zid);
}
+ /* recover the global configuration snapshot */
+ if (snap_hndl == NULL) {
+ if ((snap_hndl = zonecfg_init_handle())
+ == NULL ||
+ zonecfg_create_snapshot(zone_name)
+ != Z_OK ||
+ zonecfg_get_snapshot_handle(zone_name,
+ snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "recovering "
+ "zone configuration handle");
+ goto out;
+ }
+ }
+ }
+
(void) fdetach(zone_door_path);
(void) close(doorfd);
goto top;
}
ret = 0;
@@ -1781,19 +2042,14 @@
* query callback, if any of these exist.
*/
static int
brand_callback_init(brand_handle_t bh, char *zone_name)
{
- char zpath[MAXPATHLEN];
-
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
- return (-1);
-
(void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
sizeof (pre_statechg_hook));
- if (brand_get_prestatechange(bh, zone_name, zpath,
+ if (brand_get_prestatechange(bh, zone_name, zonepath,
pre_statechg_hook + EXEC_LEN,
sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
return (-1);
if (strlen(pre_statechg_hook) <= EXEC_LEN)
@@ -1800,11 +2056,11 @@
pre_statechg_hook[0] = '\0';
(void) strlcpy(post_statechg_hook, EXEC_PREFIX,
sizeof (post_statechg_hook));
- if (brand_get_poststatechange(bh, zone_name, zpath,
+ if (brand_get_poststatechange(bh, zone_name, zonepath,
post_statechg_hook + EXEC_LEN,
sizeof (post_statechg_hook) - EXEC_LEN) != 0)
return (-1);
if (strlen(post_statechg_hook) <= EXEC_LEN)
@@ -1811,11 +2067,11 @@
post_statechg_hook[0] = '\0';
(void) strlcpy(query_hook, EXEC_PREFIX,
sizeof (query_hook));
- if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
+ if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
sizeof (query_hook) - EXEC_LEN) != 0)
return (-1);
if (strlen(query_hook) <= EXEC_LEN)
query_hook[0] = '\0';
@@ -1939,10 +2195,15 @@
"cannot manage a zone which is in state '%s'",
zone_state_str(zstate));
return (1);
}
+ if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
+ zerror(zlogp, B_FALSE, "unable to determine zone path");
+ return (-1);
+ }
+
if (zonecfg_default_brand(default_brand,
sizeof (default_brand)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine default brand");
return (1);
}