Print this page
OS-5330 zoneadm mounting an lx or joyent branded zone fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
(NOTE: Manual port, because of divergence from SmartOS.)
OS-3831 lxbrand /proc/cmdline should reflect zone boot arguments
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
Remove most KEBE comments and accompanying unused code or variables/fields.
Merge cleanup from previous six commits
OS-200 need a better mechanism for storing persistent zone_did
OS-2564 zone boot failed: could not start zoneadmd
OS-1763 mount of /etc/svc/volatile failed: Device busy
OS-511 make zonecfg device resource extensible, like the net resource
OS-224 add more zonecfg net properties
Reduce lint
Add zfd.c to zoneadmd's Makefile, a bit more not-yet ifdef-out.
zoneadmd mismerge (we don't support debug yet)
OS-4932 zoneadm boot args not passed to lx init
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4781 would like to be able to add CT_PR_EV_EXIT to fatal event set of current contract
OS-4253 lxbrand ubuntu 15.04 won't boot because /sbin/init is a symlink
OS-3524 in order to support interaction with docker containers, need to be able to connect to stdio for init from GZ
OS-3525 in order to support 'docker logs' need to be able to get stdio from zone to log file
OS-3429 Expose zone's init exit status
OS-3342 dlmgmtd needs to be mindful of lock ordering
OS-2608 dlmgmtd needs to record zone identifiers
OS-3492 zone_free asserts to its destruction when dlmgmtd has fallen
OS-3494 zoneadmd tears down networking too soon when boot fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3077 restarted zoneadmd uses invalid zlogp
OS-3075 zone long boot args aren't passed through
OS-11 rcapd behaves poorly when under extreme load
        
*** 20,29 ****
--- 20,30 ----
   */
  
  /*
   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+  * Copyright 2016 Joyent, Inc.
   */
  
  /*
   * zoneadmd manages zones; one zoneadmd process is launched for each
   * non-global zone on the system.  This daemon juggles four jobs:
*** 66,75 ****
--- 67,77 ----
  #include <sys/param.h>
  #include <sys/mman.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <sys/sysmacros.h>
+ #include <sys/time.h>
  
  #include <bsm/adt.h>
  #include <bsm/adt_event.h>
  
  #include <alloca.h>
*** 106,115 ****
--- 108,119 ----
  #include <zonestat_impl.h>
  #include "zoneadmd.h"
  
  static char *progname;
  char *zone_name;        /* zone which we are managing */
+ zone_dochandle_t snap_hndl;     /* handle for snapshot created when ready */
+ char zonepath[MAXNAMELEN];
  char pool_name[MAXNAMELEN];
  char default_brand[MAXNAMELEN];
  char brand_name[MAXNAMELEN];
  boolean_t zone_isnative;
  boolean_t zone_iscluster;
*** 139,148 ****
--- 143,155 ----
  #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
  #endif
  
  #define DEFAULT_LOCALE  "C"
  
+ #define RSRC_NET        "net"
+ #define RSRC_DEV        "device"
+ 
  static const char *
  z_cmd_name(zone_cmd_t zcmd)
  {
          /* This list needs to match the enum in sys/zone.h */
          static const char *zcmdstr[] = {
*** 255,292 ****
                  zlogp->loglen -= copylen;
          }
  }
  
  /*
!  * Emit a warning for any boot arguments which are unrecognized.  Since
!  * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
   * put the arguments into an argv style array, use getopt to process them,
!  * and put the resultant argument string back into outargs.
   *
   * During the filtering, we pull out any arguments which are truly "boot"
   * arguments, leaving only those which are to be passed intact to the
   * progenitor process.  The one we support at the moment is -i, which
   * indicates to the kernel which program should be launched as 'init'.
   *
!  * A return of Z_INVAL indicates specifically that the arguments are
!  * not valid; this is a non-fatal error.  Except for Z_OK, all other return
!  * values are treated as fatal.
   */
  static int
  filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
!     char *init_file, char *badarg)
  {
          int argc = 0, argc_save;
          int i;
          int err;
          char *arg, *lasts, **argv = NULL, **argv_save;
          char zonecfg_args[BOOTARGS_MAX];
          char scratchargs[BOOTARGS_MAX], *sargs;
          char c;
  
          bzero(outargs, BOOTARGS_MAX);
-         bzero(badarg, BOOTARGS_MAX);
  
          /*
           * If the user didn't specify transient boot arguments, check
           * to see if there were any specified in the zone configuration,
           * and use them if applicable.
--- 262,308 ----
                  zlogp->loglen -= copylen;
          }
  }
  
  /*
!  * Append src to dest, modifying dest in the process. Prefix src with
!  * a space character if dest is a non-empty string.
!  */
! static void
! strnappend(char *dest, size_t n, const char *src)
! {
!         (void) snprintf(dest, n, "%s%s%s", dest,
!             dest[0] == '\0' ? "" : " ", src);
! }
! 
! /*
!  * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
   * put the arguments into an argv style array, use getopt to process them,
!  * and put the resultant argument string back into outargs. Non-native brands
!  * may support alternate forms of boot arguments so we must handle that as well.
   *
   * During the filtering, we pull out any arguments which are truly "boot"
   * arguments, leaving only those which are to be passed intact to the
   * progenitor process.  The one we support at the moment is -i, which
   * indicates to the kernel which program should be launched as 'init'.
   *
!  * Except for Z_OK, all other return values are treated as fatal.
   */
  static int
  filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
!     char *init_file)
  {
          int argc = 0, argc_save;
          int i;
          int err;
          char *arg, *lasts, **argv = NULL, **argv_save;
          char zonecfg_args[BOOTARGS_MAX];
          char scratchargs[BOOTARGS_MAX], *sargs;
+         char scratchopt[3];
          char c;
  
          bzero(outargs, BOOTARGS_MAX);
  
          /*
           * If the user didn't specify transient boot arguments, check
           * to see if there were any specified in the zone configuration,
           * and use them if applicable.
*** 345,370 ****
                  }
                  i++;
          }
  
          /*
!          * We preserve compatibility with the Solaris system boot behavior,
           * which allows:
           *
           *      # reboot kernel/unix -s -m verbose
           *
!          * In this example, kernel/unix tells the booter what file to
!          * boot.  We don't want reboot in a zone to be gratuitously different,
!          * so we silently ignore the boot file, if necessary.
           */
          if (argv[0] == NULL)
                  goto done;
  
          assert(argv[0][0] != ' ');
          assert(argv[0][0] != '\t');
  
!         if (argv[0][0] != '-' && argv[0][0] != '\0') {
                  argv = &argv[1];
                  argc--;
          }
  
          optind = 0;
--- 361,394 ----
                  }
                  i++;
          }
  
          /*
!          * We preserve compatibility with the illumos system boot behavior,
           * which allows:
           *
           *      # reboot kernel/unix -s -m verbose
           *
!          * In this example, kernel/unix tells the booter what file to boot. The
!          * original intent of this was that we didn't want reboot in a zone to
!          * be gratuitously different, so we would silently ignore the boot
!          * file, if necessary. However, this usage is archaic and has never
!          * been common, since it is impossible to boot a zone onto a different
!          * kernel. Ignoring the first argument breaks for non-native brands
!          * which pass boot arguments in a different style. e.g.
!          *      systemd.log_level=debug
!          * Thus, for backward compatibility we only ignore the first argument
!          * if it appears to be in the illumos form and attempting to specify a
!          * kernel.
           */
          if (argv[0] == NULL)
                  goto done;
  
          assert(argv[0][0] != ' ');
          assert(argv[0][0] != '\t');
  
!         if (strncmp(argv[0], "kernel/", 7) == 0) {
                  argv = &argv[1];
                  argc--;
          }
  
          optind = 0;
*** 383,428 ****
                          /* This has already been processed by zoneadm */
                          break;
                  case 'm':
                  case 's':
                          /* These pass through unmolested */
!                         (void) snprintf(outargs, BOOTARGS_MAX,
!                             "%s -%c %s ", outargs, c, optarg ? optarg : "");
                          break;
                  case '?':
                          /*
!                          * We warn about unknown arguments but pass them
!                          * along anyway-- if someone wants to develop their
!                          * own init replacement, they can pass it whatever
!                          * args they want.
                           */
!                         err = Z_INVAL;
!                         (void) snprintf(outargs, BOOTARGS_MAX,
!                             "%s -%c", outargs, optopt);
!                         (void) snprintf(badarg, BOOTARGS_MAX,
!                             "%s -%c", badarg, optopt);
                          break;
                  }
          }
  
          /*
!          * For Solaris Zones we warn about and discard non-option arguments.
!          * Hence 'boot foo bar baz gub' --> 'boot'.  However, to be similar
!          * to the kernel, we concat up all the other remaining boot args.
!          * and warn on them as a group.
           */
-         if (optind < argc) {
-                 err = Z_INVAL;
                  while (optind < argc) {
!                         (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
!                             badarg, strlen(badarg) > 0 ? " " : "",
!                             argv[optind]);
                          optind++;
                  }
-                 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
-                     "arguments `%s'.", badarg);
-         }
  
  done:
          for (i = 0; i < argc_save; i++) {
                  if (argv_save[i] != NULL)
                          free(argv_save[i]);
--- 407,446 ----
                          /* This has already been processed by zoneadm */
                          break;
                  case 'm':
                  case 's':
                          /* These pass through unmolested */
!                         (void) snprintf(scratchopt, sizeof (scratchopt),
!                             "-%c", c);
!                         strnappend(outargs, BOOTARGS_MAX, scratchopt);
!                         if (optarg != NULL)
!                                 strnappend(outargs, BOOTARGS_MAX, optarg);
                          break;
                  case '?':
                          /*
!                          * If a brand has its own init, we need to pass along
!                          * whatever the user provides. We use the entire
!                          * unknown string here so that we correctly handle
!                          * unknown long options (e.g. --debug).
                           */
!                         strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]);
                          break;
                  }
          }
  
          /*
!          * We need to pass along everything else since we don't know what
!          * the brand's init is expecting. For example, an argument list like:
!          *   --confdir /foo --debug
!          * will cause the getopt parsing to stop at '/foo' but we need to pass
!          * that on, along with the '--debug'. This does mean that we require
!          * any of our known options (-ifms) to preceed the brand-specific ones.
           */
          while (optind < argc) {
!                 strnappend(outargs, BOOTARGS_MAX, argv[optind]);
                  optind++;
          }
  
  done:
          for (i = 0; i < argc_save; i++) {
                  if (argv_save[i] != NULL)
                          free(argv_save[i]);
*** 535,545 ****
  static int
  zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
  {
          int err;
  
!         if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
                  return (-1);
  
          if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
                  zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
                      zonecfg_strerror(err));
--- 553,564 ----
  static int
  zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
  {
          int err;
  
!         if (!ALT_MOUNT(mount_cmd) &&
!             brand_prestatechg(zlogp, zstate, Z_READY) != 0)
                  return (-1);
  
          if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
                  zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
                      zonecfg_strerror(err));
*** 559,578 ****
                          zerror(zlogp, B_FALSE, "destroying snapshot: %s",
                              zonecfg_strerror(err));
                  goto bad;
          }
  
!         if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
                  goto bad;
  
          return (0);
  
  bad:
          /*
           * If something goes wrong, we up the zones's state to the target
           * state, READY, and then invoke the hook as if we're halting.
           */
          (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
          return (-1);
  }
  
  int
--- 578,599 ----
                          zerror(zlogp, B_FALSE, "destroying snapshot: %s",
                              zonecfg_strerror(err));
                  goto bad;
          }
  
!         if (!ALT_MOUNT(mount_cmd) &&
!             brand_poststatechg(zlogp, zstate, Z_READY) != 0)
                  goto bad;
  
          return (0);
  
  bad:
          /*
           * If something goes wrong, we up the zones's state to the target
           * state, READY, and then invoke the hook as if we're halting.
           */
+         if (!ALT_MOUNT(mount_cmd))
                  (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
          return (-1);
  }
  
  int
*** 621,639 ****
          int rv;
          ctid_t ct;
  
          /* determine the zone rootpath */
          if (mount_cmd) {
-                 char zonepath[MAXPATHLEN];
                  char luroot[MAXPATHLEN];
  
-                 if (zone_get_zonepath(zone_name,
-                     zonepath, sizeof (zonepath)) != Z_OK) {
-                         zerror(zlogp, B_FALSE, "unable to determine zone path");
-                         return (-1);
-                 }
- 
                  (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
                  resolve_lofs(zlogp, luroot, sizeof (luroot));
                  (void) strlcpy(rootpath, luroot, sizeof (rootpath));
          } else {
                  if (zone_get_rootpath(zone_name,
--- 642,653 ----
*** 684,693 ****
--- 698,709 ----
  
          } else if (child == 0) {        /* child */
                  char opt_buf[MAX_MNTOPT_STR];
                  int optlen = 0;
                  int mflag = MS_DATA;
+                 int i;
+                 int ret;
  
                  (void) ct_tmpl_clear(tmpl_fd);
                  /*
                   * Even though there are no procs running in the zone, we
                   * do this for paranoia's sake.
*** 711,724 ****
                          (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
                          opt = opt_buf;
                          optlen = MAX_MNTOPT_STR;
                          mflag = MS_OPTIONSTR;
                  }
!                 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
!                         _exit(errno);
!                 _exit(0);
          }
  
          /* parent */
          if (contract_latest(&ct) == -1)
                  ct = -1;
          (void) ct_tmpl_clear(tmpl_fd);
--- 727,757 ----
                          (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
                          opt = opt_buf;
                          optlen = MAX_MNTOPT_STR;
                          mflag = MS_OPTIONSTR;
                  }
! 
!                 /*
!                  * There is an obscure race condition which can cause mount
!                  * to return EBUSY. This happens for example on the mount
!                  * of the zone's /etc/svc/volatile file system if there is
!                  * a GZ process running svcs -Z, which will touch the
!                  * mountpoint, just as we're trying to do the mount. To cope
!                  * with this, we retry up to 3 times to let this transient
!                  * process get out of the way.
!                  */
!                 for (i = 0; i < 3; i++) {
!                         ret = 0;
!                         if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
!                             optlen) != 0)
!                                 ret = errno;
!                         if (ret != EBUSY)
!                                 break;
!                         (void) sleep(1);
                  }
+                 _exit(ret);
+         }
  
          /* parent */
          if (contract_latest(&ct) == -1)
                  ct = -1;
          (void) ct_tmpl_clear(tmpl_fd);
*** 737,746 ****
--- 770,888 ----
  
          return (0);
  }
  
  /*
+  * env variable name format
+  *      _ZONECFG;{resource name};{identifying attr. name};{property name}
+  */
+ static void
+ set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
+ {
+         char *p;
+         /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
+         char nm[2 * MAXNAMELEN + 32];
+ 
+         if (attr == NULL)
+                 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
+                     name);
+         else
+                 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
+                     attr, name);
+ 
+         p = nm;
+         while ((p = strchr(p, '-')) != NULL)
+                 *p++ = '_';
+ 
+         (void) setenv(nm, val, 1);
+ }
+ 
+ /*
+  * Export zonecfg network and device properties into environment for the boot
+  * and state change hooks.
+  * If debug is true, export the brand hook debug env. variable as well.
+  *
+  * We could export more of the config in the future, as necessary.
+  */
+ static int
+ setup_subproc_env()
+ {
+         int res;
+         zone_dochandle_t handle;
+         struct zone_nwiftab ntab;
+         struct zone_devtab dtab;
+         char net_resources[MAXNAMELEN * 2];
+         char dev_resources[MAXNAMELEN * 2];
+ 
+         if ((handle = zonecfg_init_handle()) == NULL)
+                 exit(Z_NOMEM);
+ 
+         if ((res = zonecfg_get_handle(zone_name, handle)) != Z_OK)
+                 goto done;
+ 
+         if ((res = zonecfg_setnwifent(handle)) != Z_OK)
+                 goto done;
+ 
+         while (zonecfg_getnwifent(handle, &ntab) == Z_OK) {
+                 struct zone_res_attrtab *rap;
+                 char *phys;
+ 
+                 phys = ntab.zone_nwif_physical;
+ 
+                 (void) strlcat(net_resources, phys, sizeof (net_resources));
+                 (void) strlcat(net_resources, " ", sizeof (net_resources));
+ 
+                 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
+ 
+                 set_zonecfg_env(RSRC_NET, phys, "address",
+                     ntab.zone_nwif_address);
+                 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
+                     ntab.zone_nwif_allowed_address);
+                 set_zonecfg_env(RSRC_NET, phys, "defrouter",
+                     ntab.zone_nwif_defrouter);
+                 set_zonecfg_env(RSRC_NET, phys, "global-nic",
+                     ntab.zone_nwif_gnic);
+                 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
+                 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
+                     ntab.zone_nwif_vlan_id);
+ 
+                 for (rap = ntab.zone_nwif_attrp; rap != NULL;
+                     rap = rap->zone_res_attr_next)
+                         set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
+                             rap->zone_res_attr_value);
+         }
+ 
+         (void) zonecfg_endnwifent(handle);
+ 
+         if ((res = zonecfg_setdevent(handle)) != Z_OK)
+                 goto done;
+ 
+         while (zonecfg_getdevent(handle, &dtab) == Z_OK) {
+                 struct zone_res_attrtab *rap;
+                 char *match;
+ 
+                 match = dtab.zone_dev_match;
+ 
+                 (void) strlcat(dev_resources, match, sizeof (dev_resources));
+                 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
+ 
+                 for (rap = dtab.zone_dev_attrp; rap != NULL;
+                     rap = rap->zone_res_attr_next)
+                         set_zonecfg_env(RSRC_DEV, match,
+                             rap->zone_res_attr_name, rap->zone_res_attr_value);
+         }
+ 
+         (void) zonecfg_enddevent(handle);
+ 
+         res = Z_OK;
+ 
+ done:
+         zonecfg_fini_handle(handle);
+         return (res);
+ }
+ 
+ /*
   * If retstr is not NULL, the output of the subproc is returned in the str,
   * otherwise it is output using zerror().  Any memory allocated for retstr
   * should be freed by the caller.
   */
  int
*** 761,770 ****
--- 903,917 ----
                  rd_cnt = 0;
          } else {
                  inbuf = buf;
          }
  
+         if (setup_subproc_env() != Z_OK) {
+                 zerror(zlogp, B_FALSE, "failed to setup environment");
+                 return (-1);
+         }
+ 
          file = popen(cmdbuf, "r");
          if (file == NULL) {
                  zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
                  return (-1);
          }
*** 800,825 ****
                  return (-1);
          }
          return (WEXITSTATUS(status));
  }
  
  static int
  zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
  {
          zoneid_t zoneid;
          struct stat st;
!         char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
          char nbootargs[BOOTARGS_MAX];
          char cmdbuf[MAXPATHLEN];
          fs_callback_t cb;
          brand_handle_t bh;
          zone_iptype_t iptype;
-         boolean_t links_loaded = B_FALSE;
          dladm_status_t status;
          char errmsg[DLADM_STRSIZE];
          int err;
          boolean_t restart_init;
  
          if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
                  return (-1);
  
          if ((zoneid = getzoneidbyname(zone_name)) == -1) {
--- 947,998 ----
                  return (-1);
          }
          return (WEXITSTATUS(status));
  }
  
+ /*
+  * Get the app-svc-dependent flag for this zone's init process. This is a
+  * zone-specific attr which controls the type of contract we create for the
+  * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
+  * set, so that when any service which is in the same contract exits, the init
+  * application will be terminated.
+  *
+  * We use the global "snap_hndl", so no parameters get passed here.
+  */
+ static boolean_t
+ is_app_svc_dep(void)
+ {
+         struct zone_attrtab a;
+ 
+         bzero(&a, sizeof (a));
+         (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
+             sizeof (a.zone_attr_name));
+ 
+         if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+             strcmp(a.zone_attr_value, "true") == 0) {
+                 return (B_TRUE);
+         }
+ 
+         return (B_FALSE);
+ }
+ 
  static int
  zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
  {
          zoneid_t zoneid;
          struct stat st;
!         char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
          char nbootargs[BOOTARGS_MAX];
          char cmdbuf[MAXPATHLEN];
          fs_callback_t cb;
          brand_handle_t bh;
          zone_iptype_t iptype;
          dladm_status_t status;
          char errmsg[DLADM_STRSIZE];
          int err;
          boolean_t restart_init;
+         boolean_t app_svc_dep;
  
          if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
                  return (-1);
  
          if ((zoneid = getzoneidbyname(zone_name)) == -1) {
*** 850,866 ****
          }
  
          /*
           * Get the brand's boot callback if it exists.
           */
-         if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
-                 zerror(zlogp, B_FALSE, "unable to determine zone path");
-                 brand_close(bh);
-                 goto bad;
-         }
          (void) strcpy(cmdbuf, EXEC_PREFIX);
!         if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
              sizeof (cmdbuf) - EXEC_LEN) != 0) {
                  zerror(zlogp, B_FALSE,
                      "unable to determine branded zone's boot callback");
                  brand_close(bh);
                  goto bad;
--- 1023,1034 ----
          }
  
          /*
           * Get the brand's boot callback if it exists.
           */
          (void) strcpy(cmdbuf, EXEC_PREFIX);
!         if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
              sizeof (cmdbuf) - EXEC_LEN) != 0) {
                  zerror(zlogp, B_FALSE,
                      "unable to determine branded zone's boot callback");
                  brand_close(bh);
                  goto bad;
*** 875,909 ****
          }
  
          /* See if this zone's brand should restart init if it dies. */
          restart_init = brand_restartinit(bh);
  
          brand_close(bh);
  
!         err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
!             bad_boot_arg);
!         if (err == Z_INVAL)
!                 eventstream_write(Z_EVT_ZONE_BADARGS);
!         else if (err != Z_OK)
                  goto bad;
  
          assert(init_file[0] != '\0');
  
!         /* Try to anticipate possible problems: Make sure init is executable. */
!         if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
                  zerror(zlogp, B_FALSE, "unable to determine zone root");
                  goto bad;
          }
  
!         (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
  
!         if (stat(initpath, &st) == -1) {
                  zerror(zlogp, B_TRUE, "could not stat %s", initpath);
                  goto bad;
          }
  
!         if ((st.st_mode & S_IXUSR) == 0) {
                  zerror(zlogp, B_FALSE, "%s is not executable", initpath);
                  goto bad;
          }
  
          /*
--- 1043,1087 ----
          }
  
          /* See if this zone's brand should restart init if it dies. */
          restart_init = brand_restartinit(bh);
  
+         /*
+          * See if we need to setup contract dependencies between the zone's
+          * primary application and any of its services.
+          */
+         app_svc_dep = is_app_svc_dep();
+ 
          brand_close(bh);
  
!         err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
!         if (err != Z_OK)
                  goto bad;
  
          assert(init_file[0] != '\0');
  
!         /*
!          * Try to anticipate possible problems: If possible, make sure init is
!          * executable.
!          */
!         if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
                  zerror(zlogp, B_FALSE, "unable to determine zone root");
                  goto bad;
          }
  
!         (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
  
!         if (lstat(initpath, &st) == -1) {
                  zerror(zlogp, B_TRUE, "could not stat %s", initpath);
                  goto bad;
          }
  
!         /*
!          * If a symlink, we'll have to wait and resolve when we boot,
!          * otherwise check the executable bits now.
!          */
!         if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
                  zerror(zlogp, B_FALSE, "%s is not executable", initpath);
                  goto bad;
          }
  
          /*
*** 917,927 ****
                  if (status != DLADM_STATUS_OK) {
                          zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
                              " %s", dladm_status2str(status, errmsg));
                          goto bad;
                  }
-                 links_loaded = B_TRUE;
          }
  
          /*
           * If there is a brand 'boot' callback, execute it now to give the
           * brand one last chance to do any additional setup before the zone
--- 1095,1104 ----
*** 947,956 ****
--- 1124,1139 ----
              NULL, 0) == -1) {
                  zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
                  goto bad;
          }
  
+         if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
+             (void *)B_TRUE, sizeof (boolean_t)) == -1) {
+                 zerror(zlogp, B_TRUE, "could not set zone app-die");
+                 goto bad;
+         }
+ 
          /*
           * Inform zonestatd of a new zone so that it can install a door for
           * the zone to contact it.
           */
          notify_zonestatd(zone_id);
*** 961,1004 ****
          }
  
          if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
                  goto bad;
  
          return (0);
  
  bad:
          /*
           * If something goes wrong, we up the zones's state to the target
           * state, RUNNING, and then invoke the hook as if we're halting.
           */
          (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
!         if (links_loaded)
!                 (void) dladm_zone_halt(dld_handle, zoneid);
          return (-1);
  }
  
  static int
  zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
  {
          int err;
  
!         if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
                  return (-1);
  
          if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
                  if (!bringup_failure_recovery)
                          zerror(zlogp, B_FALSE, "unable to destroy zone");
                  return (-1);
          }
  
          if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
                  zerror(zlogp, B_FALSE, "destroying snapshot: %s",
                      zonecfg_strerror(err));
  
-         if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
-                 return (-1);
- 
          return (0);
  }
  
  static int
  zone_graceful_shutdown(zlog_t *zlogp)
--- 1144,1201 ----
          }
  
          if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
                  goto bad;
  
+         /* Startup a thread to perform zfd logging/tty svc for the zone. */
+         create_log_thread(zlogp, zone_id);
+ 
+         /* Startup a thread to perform memory capping for the zone. */
+         create_mcap_thread(zlogp, zone_id);
+ 
          return (0);
  
  bad:
          /*
           * If something goes wrong, we up the zones's state to the target
           * state, RUNNING, and then invoke the hook as if we're halting.
           */
          (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
! 
          return (-1);
  }
  
  static int
  zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
  {
          int err;
  
!         if (unmount_cmd == B_FALSE &&
!             brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
                  return (-1);
  
+         /* Shutting down, stop the memcap thread */
+         destroy_mcap_thread();
+ 
          if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
                  if (!bringup_failure_recovery)
                          zerror(zlogp, B_FALSE, "unable to destroy zone");
+                 destroy_log_thread();
                  return (-1);
          }
  
+         /* Shut down is done, stop the log thread */
+         destroy_log_thread();
+ 
+         if (unmount_cmd == B_FALSE &&
+             brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
+                 return (-1);
+ 
          if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
                  zerror(zlogp, B_FALSE, "destroying snapshot: %s",
                      zonecfg_strerror(err));
  
          return (0);
  }
  
  static int
  zone_graceful_shutdown(zlog_t *zlogp)
*** 1005,1015 ****
  {
          zoneid_t zoneid;
          pid_t child;
          char cmdbuf[MAXPATHLEN];
          brand_handle_t bh = NULL;
-         char zpath[MAXPATHLEN];
          ctid_t ct;
          int tmpl_fd;
          int child_status;
  
          if (shutdown_in_progress) {
--- 1202,1211 ----
*** 1026,1047 ****
          if ((bh = brand_open(brand_name)) == NULL) {
                  zerror(zlogp, B_FALSE, "unable to determine zone brand");
                  return (-1);
          }
  
-         if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
-                 zerror(zlogp, B_FALSE, "unable to determine zone path");
-                 brand_close(bh);
-                 return (-1);
-         }
- 
          /*
           * If there is a brand 'shutdown' callback, execute it now to give the
           * brand a chance to cleanup any custom configuration.
           */
          (void) strcpy(cmdbuf, EXEC_PREFIX);
!         if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
              sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
                  (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
          }
          brand_close(bh);
  
--- 1222,1237 ----
          if ((bh = brand_open(brand_name)) == NULL) {
                  zerror(zlogp, B_FALSE, "unable to determine zone brand");
                  return (-1);
          }
  
          /*
           * If there is a brand 'shutdown' callback, execute it now to give the
           * brand a chance to cleanup any custom configuration.
           */
          (void) strcpy(cmdbuf, EXEC_PREFIX);
!         if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
              sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
                  (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
          }
          brand_close(bh);
  
*** 1175,1184 ****
--- 1365,1404 ----
  
          (void) adt_end_session(ah);
  }
  
  /*
+  * Log the exit time and status of the zone's init process into
+  * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
+  * be -1, otherwise it will be the exit status as described in wait.3c.
+  * If the zone is configured to restart init, then nothing will be logged if
+  * init exits unexpectedly (the kernel will never upcall in this case).
+  */
+ static void
+ log_init_exit(int status)
+ {
+         char p[MAXPATHLEN];
+         char buf[128];
+         struct timeval t;
+         int fd;
+ 
+         if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
+                 return;
+         if (gettimeofday(&t, NULL) != 0)
+                 return;
+         if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
+             status) > sizeof (buf))
+                 return;
+         if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
+                 return;
+ 
+         (void) write(fd, buf, strlen(buf));
+ 
+         (void) close(fd);
+ }
+ 
+ /*
   * The main routine for the door server that deals with zone state transitions.
   */
  /* ARGSUSED */
  static void
  server(void *cookie, char *args, size_t alen, door_desc_t *dp,
*** 1187,1196 ****
--- 1407,1417 ----
          ucred_t *uc = NULL;
          const priv_set_t *eset;
  
          zone_state_t zstate;
          zone_cmd_t cmd;
+         int init_status;
          zone_cmd_arg_t *zargp;
  
          boolean_t kernelcall;
  
          int rval = -1;
*** 1239,1248 ****
--- 1460,1470 ----
                      "unexpected (expected %d bytes)", alen,
                      sizeof (zone_cmd_arg_t));
                  goto out;
          }
          cmd = zargp->cmd;
+         init_status = zargp->status;
  
          if (door_ucred(&uc) != 0) {
                  zerror(&logsys, B_TRUE, "door_ucred");
                  goto out;
          }
*** 1348,1357 ****
--- 1570,1580 ----
                  switch (cmd) {
                  case Z_READY:
                          rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
                          if (rval == 0)
                                  eventstream_write(Z_EVT_ZONE_READIED);
+                         zcons_statechanged();
                          break;
                  case Z_BOOT:
                  case Z_FORCEBOOT:
                          eventstream_write(Z_EVT_ZONE_BOOTING);
                          if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
*** 1358,1367 ****
--- 1581,1591 ----
                              == 0) {
                                  rval = zone_bootup(zlogp, zargp->bootbuf,
                                      zstate);
                          }
                          audit_put_record(zlogp, uc, rval, "boot");
+                         zcons_statechanged();
                          if (rval != 0) {
                                  bringup_failure_recovery = B_TRUE;
                                  (void) zone_halt(zlogp, B_FALSE, B_FALSE,
                                      zstate);
                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
*** 1480,1489 ****
--- 1704,1714 ----
                          (void) strlcpy(boot_args, zargp->bootbuf,
                              sizeof (boot_args));
                          eventstream_write(Z_EVT_ZONE_BOOTING);
                          rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
                          audit_put_record(zlogp, uc, rval, "boot");
+                         zcons_statechanged();
                          if (rval != 0) {
                                  bringup_failure_recovery = B_TRUE;
                                  (void) zone_halt(zlogp, B_FALSE, B_TRUE,
                                      zstate);
                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
*** 1494,1503 ****
--- 1719,1729 ----
                          if (kernelcall) /* Invalid; can't happen */
                                  abort();
                          if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
                              != 0)
                                  break;
+                         zcons_statechanged();
                          eventstream_write(Z_EVT_ZONE_HALTED);
                          break;
                  case Z_SHUTDOWN:
                  case Z_REBOOT:
                  case Z_NOTE_UNINSTALLING:
*** 1541,1550 ****
--- 1767,1777 ----
                  switch (cmd) {
                  case Z_READY:
                          if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
                              != 0)
                                  break;
+                         zcons_statechanged();
                          if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
                                  eventstream_write(Z_EVT_ZONE_READIED);
                          else
                                  eventstream_write(Z_EVT_ZONE_HALTED);
                          break;
*** 1557,1570 ****
--- 1784,1803 ----
                           */
                          zerror(zlogp, B_FALSE, "zone is already booted");
                          rval = 0;
                          break;
                  case Z_HALT:
+                         if (kernelcall) {
+                                 log_init_exit(init_status);
+                         } else {
+                                 log_init_exit(-1);
+                         }
                          if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
                              != 0)
                                  break;
                          eventstream_write(Z_EVT_ZONE_HALTED);
+                         zcons_statechanged();
                          break;
                  case Z_REBOOT:
                          (void) strlcpy(boot_args, zargp->bootbuf,
                              sizeof (boot_args));
                          eventstream_write(Z_EVT_ZONE_REBOOTING);
*** 1572,1583 ****
                              != 0) {
                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
                                  boot_args[0] = '\0';
                                  break;
                          }
!                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
!                             != 0) {
                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
                                  boot_args[0] = '\0';
                                  break;
                          }
                          rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
--- 1805,1817 ----
                              != 0) {
                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
                                  boot_args[0] = '\0';
                                  break;
                          }
!                         zcons_statechanged();
!                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) !=
!                             0) {
                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
                                  boot_args[0] = '\0';
                                  break;
                          }
                          rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
*** 1757,1773 ****
--- 1991,2034 ----
                   * limited to times when zoneadmd is picking back up from a
                   * zoneadmd that died while the zone was in some non-trivial
                   * state.
                   */
                  if (zstate > ZONE_STATE_INSTALLED) {
+                         static zoneid_t zid;
+ 
                          zerror(zlogp, B_FALSE,
                              "zone '%s': WARNING: zone is in state '%s', but "
                              "zoneadmd does not appear to be available; "
                              "restarted zoneadmd to recover.",
                              zone_name, zone_state_str(zstate));
+ 
+                         /*
+                          * Startup a thread to perform the zfd logging/tty svc
+                          * and a thread to perform memory capping for the
+                          * zone. zlogp won't be valid for much longer so use
+                          * logsys.
+                          */
+                         if ((zid = getzoneidbyname(zone_name)) != -1) {
+                                 create_log_thread(&logsys, zid);
+                                 create_mcap_thread(&logsys, zid);
                          }
  
+                         /* recover the global configuration snapshot */
+                         if (snap_hndl == NULL) {
+                                 if ((snap_hndl = zonecfg_init_handle())
+                                     == NULL ||
+                                     zonecfg_create_snapshot(zone_name)
+                                     != Z_OK ||
+                                     zonecfg_get_snapshot_handle(zone_name,
+                                     snap_hndl) != Z_OK) {
+                                         zerror(zlogp, B_FALSE, "recovering "
+                                             "zone configuration handle");
+                                         goto out;
+                                 }
+                         }
+                 }
+ 
                  (void) fdetach(zone_door_path);
                  (void) close(doorfd);
                  goto top;
          }
          ret = 0;
*** 1781,1799 ****
   * query callback, if any of these exist.
   */
  static int
  brand_callback_init(brand_handle_t bh, char *zone_name)
  {
-         char zpath[MAXPATHLEN];
- 
-         if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
-                 return (-1);
- 
          (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
              sizeof (pre_statechg_hook));
  
!         if (brand_get_prestatechange(bh, zone_name, zpath,
              pre_statechg_hook + EXEC_LEN,
              sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
                  return (-1);
  
          if (strlen(pre_statechg_hook) <= EXEC_LEN)
--- 2042,2055 ----
   * query callback, if any of these exist.
   */
  static int
  brand_callback_init(brand_handle_t bh, char *zone_name)
  {
          (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
              sizeof (pre_statechg_hook));
  
!         if (brand_get_prestatechange(bh, zone_name, zonepath,
              pre_statechg_hook + EXEC_LEN,
              sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
                  return (-1);
  
          if (strlen(pre_statechg_hook) <= EXEC_LEN)
*** 1800,1810 ****
                  pre_statechg_hook[0] = '\0';
  
          (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
              sizeof (post_statechg_hook));
  
!         if (brand_get_poststatechange(bh, zone_name, zpath,
              post_statechg_hook + EXEC_LEN,
              sizeof (post_statechg_hook) - EXEC_LEN) != 0)
                  return (-1);
  
          if (strlen(post_statechg_hook) <= EXEC_LEN)
--- 2056,2066 ----
                  pre_statechg_hook[0] = '\0';
  
          (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
              sizeof (post_statechg_hook));
  
!         if (brand_get_poststatechange(bh, zone_name, zonepath,
              post_statechg_hook + EXEC_LEN,
              sizeof (post_statechg_hook) - EXEC_LEN) != 0)
                  return (-1);
  
          if (strlen(post_statechg_hook) <= EXEC_LEN)
*** 1811,1821 ****
                  post_statechg_hook[0] = '\0';
  
          (void) strlcpy(query_hook, EXEC_PREFIX,
              sizeof (query_hook));
  
!         if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
              sizeof (query_hook) - EXEC_LEN) != 0)
                  return (-1);
  
          if (strlen(query_hook) <= EXEC_LEN)
                  query_hook[0] = '\0';
--- 2067,2077 ----
                  post_statechg_hook[0] = '\0';
  
          (void) strlcpy(query_hook, EXEC_PREFIX,
              sizeof (query_hook));
  
!         if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
              sizeof (query_hook) - EXEC_LEN) != 0)
                  return (-1);
  
          if (strlen(query_hook) <= EXEC_LEN)
                  query_hook[0] = '\0';
*** 1939,1948 ****
--- 2195,2209 ----
                      "cannot manage a zone which is in state '%s'",
                      zone_state_str(zstate));
                  return (1);
          }
  
+         if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
+                 zerror(zlogp, B_FALSE, "unable to determine zone path");
+                 return (-1);
+         }
+ 
          if (zonecfg_default_brand(default_brand,
              sizeof (default_brand)) != Z_OK) {
                  zerror(zlogp, B_FALSE, "unable to determine default brand");
                  return (1);
          }