Print this page
    
14019 Allow more control over zone init exit actions (fix mismerge)
14019 Allow more control over zone init exit actions
Portions contributed by: Joshua M. Clulow <jmc@joyent.com>
Portions contributed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Gordon Ross <Gordon.W.Ross@gmail.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/zoneadmd/zoneadmd.c
          +++ new/usr/src/cmd/zoneadmd/zoneadmd.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  25   25   * Copyright 2021 Joyent, Inc.
  26   26   * Copyright (c) 2016 by Delphix. All rights reserved.
  27   27   */
  28   28  
  29   29  /*
  30   30   * zoneadmd manages zones; one zoneadmd process is launched for each
  31   31   * non-global zone on the system.  This daemon juggles four jobs:
  32   32   *
  33   33   * - Implement setup and teardown of the zone "virtual platform": mount and
  34   34   *   unmount filesystems; create and destroy network interfaces; communicate
  35   35   *   with devfsadmd to lay out devices for the zone; instantiate the zone
  36   36   *   console device; configure process runtime attributes such as resource
  37   37   *   controls, pool bindings, fine-grained privileges.
  38   38   *
  39   39   * - Launch the zone's init(1M) process.
  40   40   *
  41   41   * - Implement a door server; clients (like zoneadm) connect to the door
  42   42   *   server and request zone state changes.  The kernel is also a client of
  43   43   *   this door server.  A request to halt or reboot the zone which originates
  44   44   *   *inside* the zone results in a door upcall from the kernel into zoneadmd.
  45   45   *
  46   46   *   One minor problem is that messages emitted by zoneadmd need to be passed
  47   47   *   back to the zoneadm process making the request.  These messages need to
  48   48   *   be rendered in the client's locale; so, this is passed in as part of the
  49   49   *   request.  The exception is the kernel upcall to zoneadmd, in which case
  50   50   *   messages are syslog'd.
  51   51   *
  52   52   *   To make all of this work, the Makefile adds -a to xgettext to extract *all*
  53   53   *   strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
  54   54   *   strings which do not need to be translated.
  55   55   *
  56   56   * - Act as a console server for zlogin -C processes; see comments in zcons.c
  57   57   *   for more information about the zone console architecture.
  58   58   *
  59   59   * DESIGN NOTES
  60   60   *
  61   61   * Restart:
  62   62   *   A chief design constraint of zoneadmd is that it should be restartable in
  63   63   *   the case that the administrator kills it off, or it suffers a fatal error,
  64   64   *   without the running zone being impacted; this is akin to being able to
  65   65   *   reboot the service processor of a server without affecting the OS instance.
  66   66   */
  67   67  
  68   68  #include <sys/param.h>
  69   69  #include <sys/mman.h>
  70   70  #include <sys/types.h>
  71   71  #include <sys/stat.h>
  72   72  #include <sys/sysmacros.h>
  73   73  #include <sys/time.h>
  74   74  
  75   75  #include <bsm/adt.h>
  76   76  #include <bsm/adt_event.h>
  77   77  
  78   78  #include <alloca.h>
  79   79  #include <assert.h>
  80   80  #include <errno.h>
  81   81  #include <door.h>
  82   82  #include <fcntl.h>
  83   83  #include <locale.h>
  84   84  #include <signal.h>
  85   85  #include <stdarg.h>
  86   86  #include <stdio.h>
  87   87  #include <stdlib.h>
  88   88  #include <string.h>
  89   89  #include <strings.h>
  90   90  #include <synch.h>
  91   91  #include <syslog.h>
  92   92  #include <thread.h>
  93   93  #include <unistd.h>
  94   94  #include <wait.h>
  95   95  #include <limits.h>
  96   96  #include <zone.h>
  97   97  #include <libbrand.h>
  98   98  #include <sys/brand.h>
  99   99  #include <libcontract.h>
 100  100  #include <libcontract_priv.h>
 101  101  #include <sys/brand.h>
 102  102  #include <sys/contract/process.h>
 103  103  #include <sys/ctfs.h>
 104  104  #include <libdladm.h>
 105  105  #include <sys/dls_mgmt.h>
 106  106  #include <libscf.h>
 107  107  #include <uuid/uuid.h>
 108  108  #include <libppt.h>
 109  109  
 110  110  #include <libzonecfg.h>
 111  111  #include <zonestat_impl.h>
 112  112  #include "zoneadmd.h"
 113  113  
 114  114  static char *progname;
 115  115  char *zone_name;        /* zone which we are managing */
 116  116  zone_dochandle_t snap_hndl;     /* handle for snapshot created when ready */
 117  117  char zonepath[MAXNAMELEN];
 118  118  char pool_name[MAXNAMELEN];
 119  119  char default_brand[MAXNAMELEN];
 120  120  char brand_name[MAXNAMELEN];
 121  121  boolean_t zone_isnative;
 122  122  boolean_t zone_iscluster;
 123  123  boolean_t zone_islabeled;
 124  124  boolean_t shutdown_in_progress;
 125  125  static zoneid_t zone_id;
 126  126  static zoneid_t zone_did = 0;
 127  127  dladm_handle_t dld_handle = NULL;
 128  128  
 129  129  char pre_statechg_hook[2 * MAXPATHLEN];
 130  130  char post_statechg_hook[2 * MAXPATHLEN];
 131  131  char query_hook[2 * MAXPATHLEN];
 132  132  
 133  133  zlog_t logsys;                  /* log to syslog */
 134  134  zlog_t logplat;                 /* log to platform.log */
 135  135  
 136  136  mutex_t lock = DEFAULTMUTEX;    /* to serialize stuff */
 137  137  mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
 138  138  
 139  139  static sema_t scratch_sem;      /* for scratch zones */
 140  140  
 141  141  static char     zone_door_path[MAXPATHLEN];
 142  142  static int      zone_door = -1;
 143  143  
 144  144  boolean_t in_death_throes = B_FALSE;    /* daemon is dying */
 145  145  boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
 146  146  
 147  147  static int platloghdl = -1;     /* Handle for <zonepath>/logs/platform.log */
 148  148  
 149  149  #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 150  150  #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 151  151  #endif
 152  152  
 153  153  #define DEFAULT_LOCALE  "C"
 154  154  
 155  155  #define RSRC_NET        "net"
 156  156  #define RSRC_DEV        "device"
 157  157  
 158  158  static const char *
 159  159  z_cmd_name(zone_cmd_t zcmd)
 160  160  {
 161  161          /* This list needs to match the enum in sys/zone.h */
 162  162          static const char *zcmdstr[] = {
 163  163                  "ready", "boot", "forceboot", "reboot", "halt",
 164  164                  "note_uninstalling", "mount", "forcemount", "unmount",
 165  165                  "shutdown"
 166  166          };
 167  167  
 168  168          if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
 169  169                  return ("unknown");
 170  170          else
 171  171                  return (zcmdstr[(int)zcmd]);
 172  172  }
 173  173  
 174  174  static char *
 175  175  get_execbasename(char *execfullname)
 176  176  {
 177  177          char *last_slash, *execbasename;
 178  178  
 179  179          /* guard against '/' at end of command invocation */
 180  180          for (;;) {
 181  181                  last_slash = strrchr(execfullname, '/');
 182  182                  if (last_slash == NULL) {
 183  183                          execbasename = execfullname;
 184  184                          break;
 185  185                  } else {
 186  186                          execbasename = last_slash + 1;
 187  187                          if (*execbasename == '\0') {
 188  188                                  *last_slash = '\0';
 189  189                                  continue;
 190  190                          }
 191  191                          break;
 192  192                  }
 193  193          }
 194  194          return (execbasename);
 195  195  }
 196  196  
 197  197  static void
 198  198  usage(void)
 199  199  {
 200  200          (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
 201  201          (void) fprintf(stderr,
 202  202              gettext("\tNote: %s should not be run directly.\n"), progname);
 203  203          exit(2);
 204  204  }
 205  205  
 206  206  /* ARGSUSED */
 207  207  static void
 208  208  sigchld(int sig)
 209  209  {
 210  210  }
 211  211  
 212  212  char *
 213  213  localize_msg(char *locale, const char *msg)
 214  214  {
 215  215          char *out;
 216  216  
 217  217          (void) mutex_lock(&msglock);
 218  218          (void) setlocale(LC_MESSAGES, locale);
 219  219          out = gettext(msg);
 220  220          (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
 221  221          (void) mutex_unlock(&msglock);
 222  222          return (out);
 223  223  }
 224  224  
 225  225  /* PRINTFLIKE3 */
 226  226  void
 227  227  zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
 228  228  {
 229  229          va_list alist;
 230  230          char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
 231  231          char *bp, *bp_nozone;
 232  232          int saved_errno = errno;
 233  233  
 234  234          if (zlogp == &logsys)
 235  235                  (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", zone_name);
 236  236          else
 237  237                  buf[0] = '\0';
 238  238          bp = bp_nozone = &(buf[strlen(buf)]);
 239  239  
 240  240          /*
 241  241           * In theory, the locale pointer should be set to either "C" or a
 242  242           * char array, so it should never be NULL
 243  243           */
 244  244          assert(zlogp->locale != NULL);
 245  245          /* Locale is per process, but we are multi-threaded... */
 246  246          fmt = localize_msg(zlogp->locale, fmt);
 247  247  
 248  248          va_start(alist, fmt);
 249  249          (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
 250  250          va_end(alist);
 251  251          bp = &(buf[strlen(buf)]);
 252  252          if (use_strerror)
 253  253                  (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
 254  254                      strerror(saved_errno));
 255  255  
 256  256          (void) strlcat(buf, "\n", sizeof (buf));
 257  257  
 258  258          /*
 259  259           * If we don't have the platform log, we are in a child process, and
 260  260           * should log to stderr (which is a pipe) instead of the file.
 261  261           */
 262  262          if (logging_poisoned) {
 263  263                  (void) fprintf(stderr, "%s", buf);
 264  264  
 265  265                  if (zlogp != &logsys && zlogp->logfile == stderr)
 266  266                          return;
 267  267          } else {
 268  268                  logstream_write(platloghdl, bp_nozone, strlen(bp_nozone));
 269  269  
 270  270                  if (zlogp == &logplat)
 271  271                          return;
 272  272          }
 273  273  
 274  274          if (zlogp == &logsys) {
 275  275                  bp = strrchr(buf, '\n');
 276  276                  if (bp != NULL && bp[1] == '\0') {
 277  277                          *bp = '\0';
 278  278                  }
 279  279                  (void) syslog(LOG_ERR, "%s", buf);
 280  280          } else if (zlogp->logfile != NULL) {
 281  281                  (void) fprintf(zlogp->logfile, "%s", buf);
 282  282          } else {
 283  283                  size_t buflen;
 284  284                  size_t copylen;
 285  285  
 286  286                  buflen = snprintf(zlogp->log, zlogp->loglen, "%s", buf);
 287  287                  copylen = MIN(buflen, zlogp->loglen);
 288  288                  zlogp->log += copylen;
 289  289                  zlogp->loglen -= copylen;
 290  290          }
 291  291  }
 292  292  
 293  293  /*
 294  294   * Append src to dest, modifying dest in the process. Prefix src with
 295  295   * a space character if dest is a non-empty string. Assumes dest is already
 296  296   * properly \0-terminated OR overruns destsize.
 297  297   */
 298  298  static void
 299  299  strnappend(char *dest, size_t destsize, const char *src)
 300  300  {
 301  301          size_t startpoint = strnlen(dest, destsize);
 302  302  
 303  303          if (startpoint >= destsize - 1) {
 304  304                  /* We've run out of room.  Record something?! */
 305  305                  return;
 306  306          }
 307  307  
 308  308          if (startpoint > 0) {
 309  309                  /* Add the space per the function's intro comment. */
 310  310                  dest[startpoint] = ' ';
 311  311                  startpoint++;
 312  312          }
 313  313  
 314  314          /* Arguably we should check here too... */
 315  315          (void) strlcpy(dest + startpoint, src, destsize - startpoint);
 316  316  }
 317  317  
 318  318  /*
 319  319   * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
 320  320   * put the arguments into an argv style array, use getopt to process them,
 321  321   * and put the resultant argument string back into outargs. Non-native brands
 322  322   * may support alternate forms of boot arguments so we must handle that as well.
 323  323   *
 324  324   * During the filtering, we pull out any arguments which are truly "boot"
 325  325   * arguments, leaving only those which are to be passed intact to the
 326  326   * progenitor process.  The one we support at the moment is -i, which
 327  327   * indicates to the kernel which program should be launched as 'init'.
 328  328   *
 329  329   * Except for Z_OK, all other return values are treated as fatal.
 330  330   */
 331  331  static int
 332  332  filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
 333  333      char *init_file)
 334  334  {
 335  335          int argc = 0, argc_save;
 336  336          int i;
 337  337          int err = Z_OK;
 338  338          char *arg, *lasts, **argv = NULL, **argv_save;
 339  339          char zonecfg_args[BOOTARGS_MAX];
 340  340          char scratchargs[BOOTARGS_MAX], *sargs;
 341  341          char scratchopt[3];
 342  342          char c;
 343  343  
 344  344          bzero(outargs, BOOTARGS_MAX);
 345  345  
 346  346          /*
 347  347           * If the user didn't specify transient boot arguments, check
 348  348           * to see if there were any specified in the zone configuration,
 349  349           * and use them if applicable.
 350  350           */
 351  351          if (inargs == NULL || inargs[0] == '\0')  {
 352  352                  bzero(zonecfg_args, sizeof (zonecfg_args));
 353  353                  (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
 354  354                      sizeof (zonecfg_args));
 355  355                  inargs = zonecfg_args;
 356  356          }
 357  357  
 358  358          if (strlen(inargs) >= BOOTARGS_MAX) {
 359  359                  zerror(zlogp, B_FALSE, "boot argument string too long");
 360  360                  return (Z_INVAL);
 361  361          }
 362  362  
 363  363          (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
 364  364          sargs = scratchargs;
 365  365          while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
 366  366                  sargs = NULL;
 367  367                  argc++;
 368  368          }
 369  369  
 370  370          if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
 371  371                  zerror(zlogp, B_FALSE, "memory allocation failed");
 372  372                  return (Z_NOMEM);
 373  373          }
 374  374  
 375  375          argv_save = argv;
 376  376          argc_save = argc;
 377  377  
 378  378          (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
 379  379          sargs = scratchargs;
 380  380          i = 0;
 381  381          while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
 382  382                  sargs = NULL;
 383  383                  if ((argv[i] = strdup(arg)) == NULL) {
 384  384                          err = Z_NOMEM;
 385  385                          zerror(zlogp, B_FALSE, "memory allocation failed");
 386  386                          goto done;
 387  387                  }
 388  388                  i++;
 389  389          }
 390  390  
 391  391          /*
 392  392           * We preserve compatibility with the illumos system boot behavior,
 393  393           * which allows:
 394  394           *
 395  395           *      # reboot kernel/unix -s -m verbose
 396  396           *
 397  397           * In this example, kernel/unix tells the booter what file to boot. The
 398  398           * original intent of this was that we didn't want reboot in a zone to
 399  399           * be gratuitously different, so we would silently ignore the boot
 400  400           * file, if necessary. However, this usage is archaic and has never
 401  401           * been common, since it is impossible to boot a zone onto a different
 402  402           * kernel. Ignoring the first argument breaks for non-native brands
 403  403           * which pass boot arguments in a different style. e.g.
 404  404           *      systemd.log_level=debug
 405  405           * Thus, for backward compatibility we only ignore the first argument
 406  406           * if it appears to be in the illumos form and attempting to specify a
 407  407           * kernel.
 408  408           */
 409  409          if (argv[0] == NULL)
 410  410                  goto done;
 411  411  
 412  412          assert(argv[0][0] != ' ');
 413  413          assert(argv[0][0] != '\t');
 414  414  
 415  415          if (strncmp(argv[0], "kernel/", 7) == 0) {
 416  416                  argv = &argv[1];
 417  417                  argc--;
 418  418          }
 419  419  
 420  420          optind = 0;
 421  421          opterr = 0;
 422  422          err = Z_OK;
 423  423          while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
 424  424                  switch (c) {
 425  425                  case 'i':
 426  426                          /*
 427  427                           * -i is handled by the runtime and is not passed
 428  428                           * along to userland
 429  429                           */
 430  430                          (void) strlcpy(init_file, optarg, MAXPATHLEN);
 431  431                          break;
 432  432                  case 'f':
 433  433                          /* This has already been processed by zoneadm */
 434  434                          break;
 435  435                  case 'm':
 436  436                  case 's':
 437  437                          /* These pass through unmolested */
 438  438                          (void) snprintf(scratchopt, sizeof (scratchopt),
 439  439                              "-%c", c);
 440  440                          strnappend(outargs, BOOTARGS_MAX, scratchopt);
 441  441                          if (optarg != NULL)
 442  442                                  strnappend(outargs, BOOTARGS_MAX, optarg);
 443  443                          break;
 444  444                  case '?':
 445  445                          /*
 446  446                           * If a brand has its own init, we need to pass along
 447  447                           * whatever the user provides. We use the entire
 448  448                           * unknown string here so that we correctly handle
 449  449                           * unknown long options (e.g. --debug).
 450  450                           */
 451  451                          strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]);
 452  452                          break;
 453  453                  }
 454  454          }
 455  455  
 456  456          /*
 457  457           * We need to pass along everything else since we don't know what
 458  458           * the brand's init is expecting. For example, an argument list like:
 459  459           *   --confdir /foo --debug
 460  460           * will cause the getopt parsing to stop at '/foo' but we need to pass
 461  461           * that on, along with the '--debug'. This does mean that we require
 462  462           * any of our known options (-ifms) to preceed the brand-specific ones.
 463  463           */
 464  464          while (optind < argc) {
 465  465                  strnappend(outargs, BOOTARGS_MAX, argv[optind]);
 466  466                  optind++;
 467  467          }
 468  468  
 469  469  done:
 470  470          for (i = 0; i < argc_save; i++) {
 471  471                  if (argv_save[i] != NULL)
 472  472                          free(argv_save[i]);
 473  473          }
 474  474          free(argv_save);
 475  475          return (err);
 476  476  }
 477  477  
 478  478  
 479  479  static int
 480  480  mkzonedir(zlog_t *zlogp)
 481  481  {
 482  482          struct stat st;
 483  483          /*
 484  484           * We must create and lock everyone but root out of ZONES_TMPDIR
 485  485           * since anyone can open any UNIX domain socket, regardless of
 486  486           * its file system permissions.  Sigh...
 487  487           */
 488  488          if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
 489  489                  zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
 490  490                  return (-1);
 491  491          }
 492  492          /* paranoia */
 493  493          if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
 494  494                  zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
 495  495                  return (-1);
 496  496          }
 497  497          (void) chmod(ZONES_TMPDIR, S_IRWXU);
 498  498          return (0);
 499  499  }
 500  500  
 501  501  /*
 502  502   * Run the brand's pre-state change callback, if it exists.
 503  503   */
 504  504  static int
 505  505  brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
 506  506  {
 507  507          char cmdbuf[2 * MAXPATHLEN];
 508  508          const char *altroot;
 509  509  
 510  510          if (pre_statechg_hook[0] == '\0')
 511  511                  return (0);
 512  512  
 513  513          altroot = zonecfg_get_root();
 514  514          if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
 515  515              state, cmd, altroot) > sizeof (cmdbuf))
 516  516                  return (-1);
 517  517  
 518  518          if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
 519  519                  return (-1);
 520  520  
 521  521          return (0);
 522  522  }
 523  523  
 524  524  /*
 525  525   * Run the brand's post-state change callback, if it exists.
 526  526   */
 527  527  static int
 528  528  brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
 529  529  {
 530  530          char cmdbuf[2 * MAXPATHLEN];
 531  531          const char *altroot;
 532  532  
 533  533          if (post_statechg_hook[0] == '\0')
 534  534                  return (0);
 535  535  
 536  536          altroot = zonecfg_get_root();
 537  537          if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
 538  538              state, cmd, altroot) > sizeof (cmdbuf))
 539  539                  return (-1);
 540  540  
 541  541          if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
 542  542                  return (-1);
 543  543  
 544  544          return (0);
 545  545  }
 546  546  
 547  547  /*
 548  548   * Notify zonestatd of the new zone.  If zonestatd is not running, this
 549  549   * will do nothing.
 550  550   */
 551  551  static void
 552  552  notify_zonestatd(zoneid_t zoneid)
 553  553  {
 554  554          int cmd[2];
 555  555          int fd;
 556  556          door_arg_t params;
 557  557  
 558  558          fd = open(ZS_DOOR_PATH, O_RDONLY);
 559  559          if (fd < 0)
 560  560                  return;
 561  561  
 562  562          cmd[0] = ZSD_CMD_NEW_ZONE;
 563  563          cmd[1] = zoneid;
 564  564          params.data_ptr = (char *)&cmd;
 565  565          params.data_size = sizeof (cmd);
 566  566          params.desc_ptr = NULL;
 567  567          params.desc_num = 0;
 568  568          params.rbuf = NULL;
 569  569          params.rsize = 0;
 570  570          (void) door_call(fd, ¶ms);
 571  571          (void) close(fd);
 572  572  }
 573  573  
 574  574  /*
 575  575   * Bring a zone up to the pre-boot "ready" stage.  The mount_cmd argument is
 576  576   * 'true' if this is being invoked as part of the processing for the "mount"
 577  577   * subcommand.
 578  578   *
 579  579   * If a scratch zone mount (ALT_MOUNT) is being performed then do not
 580  580   * call the state change hooks.
 581  581   */
 582  582  static int
 583  583  zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
 584  584  {
 585  585          int err;
 586  586          boolean_t snapped = B_FALSE;
 587  587  
 588  588          if ((snap_hndl = zonecfg_init_handle()) == NULL) {
 589  589                  zerror(zlogp, B_TRUE, "getting zone configuration handle");
 590  590                  goto bad;
 591  591          }
 592  592          if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
 593  593                  zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
 594  594                      zonecfg_strerror(err));
 595  595                  goto bad;
 596  596          }
 597  597          snapped = B_TRUE;
 598  598  
 599  599          if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
 600  600                  zerror(zlogp, B_FALSE, "invalid configuration snapshot");
 601  601                  goto bad;
 602  602          }
 603  603  
 604  604          if (zone_did == 0)
 605  605                  zone_did = zone_get_did(zone_name);
 606  606  
 607  607          if (!ALT_MOUNT(mount_cmd) &&
 608  608              brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
 609  609                  goto bad;
 610  610  
 611  611          if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
 612  612                  goto bad;
 613  613  
 614  614          if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
 615  615                  bringup_failure_recovery = B_TRUE;
 616  616                  (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
 617  617                      debug);
 618  618                  goto bad;
 619  619          }
 620  620  
 621  621          if (!ALT_MOUNT(mount_cmd) &&
 622  622              brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
 623  623                  goto bad;
 624  624  
 625  625          return (0);
 626  626  
 627  627  bad:
 628  628          /*
 629  629           * If something goes wrong, we up the zones's state to the target
 630  630           * state, READY, and then invoke the hook as if we're halting.
 631  631           */
 632  632          if (!ALT_MOUNT(mount_cmd))
 633  633                  (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT,
 634  634                      debug);
 635  635  
 636  636          if (snapped)
 637  637                  if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 638  638                          zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 639  639                              zonecfg_strerror(err));
 640  640          zonecfg_fini_handle(snap_hndl);
 641  641          snap_hndl = NULL;
 642  642          return (-1);
 643  643  }
 644  644  
 645  645  int
 646  646  init_template(void)
 647  647  {
 648  648          int fd;
 649  649          int err = 0;
 650  650  
 651  651          fd = open64(CTFS_ROOT "/process/template", O_RDWR);
 652  652          if (fd == -1)
 653  653                  return (-1);
 654  654  
 655  655          /*
 656  656           * For now, zoneadmd doesn't do anything with the contract.
 657  657           * Deliver no events, don't inherit, and allow it to be orphaned.
 658  658           */
 659  659          err |= ct_tmpl_set_critical(fd, 0);
 660  660          err |= ct_tmpl_set_informative(fd, 0);
 661  661          err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
 662  662          err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
 663  663          if (err || ct_tmpl_activate(fd)) {
 664  664                  (void) close(fd);
 665  665                  return (-1);
 666  666          }
 667  667  
 668  668          return (fd);
 669  669  }
 670  670  
 671  671  typedef struct fs_callback {
 672  672          zlog_t          *zlogp;
 673  673          zoneid_t        zoneid;
 674  674          boolean_t       mount_cmd;
 675  675  } fs_callback_t;
 676  676  
 677  677  static int
 678  678  mount_early_fs(void *data, const char *spec, const char *dir,
 679  679      const char *fstype, const char *opt)
 680  680  {
 681  681          zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
 682  682          zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
 683  683          boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
 684  684          char rootpath[MAXPATHLEN];
 685  685          pid_t child;
 686  686          int child_status;
 687  687          int tmpl_fd;
 688  688          int rv;
 689  689          ctid_t ct;
 690  690  
 691  691          /* determine the zone rootpath */
 692  692          if (mount_cmd) {
 693  693                  char luroot[MAXPATHLEN];
 694  694  
 695  695                  (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
 696  696                  resolve_lofs(zlogp, luroot, sizeof (luroot));
 697  697                  (void) strlcpy(rootpath, luroot, sizeof (rootpath));
 698  698          } else {
 699  699                  if (zone_get_rootpath(zone_name,
 700  700                      rootpath, sizeof (rootpath)) != Z_OK) {
 701  701                          zerror(zlogp, B_FALSE, "unable to determine zone root");
 702  702                          return (-1);
 703  703                  }
 704  704          }
 705  705  
 706  706          if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
 707  707                  zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
 708  708                      rootpath, dir);
 709  709                  return (-1);
 710  710          } else if (rv > 0) {
 711  711                  /* The mount point path doesn't exist, create it now. */
 712  712                  if (make_one_dir(zlogp, rootpath, dir,
 713  713                      DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
 714  714                      DEFAULT_DIR_GROUP) != 0) {
 715  715                          zerror(zlogp, B_FALSE, "failed to create mount point");
 716  716                          return (-1);
 717  717                  }
 718  718  
 719  719                  /*
 720  720                   * Now this might seem weird, but we need to invoke
 721  721                   * valid_mount_path() again.  Why?  Because it checks
 722  722                   * to make sure that the mount point path is canonical,
 723  723                   * which it can only do if the path exists, so now that
 724  724                   * we've created the path we have to verify it again.
 725  725                   */
 726  726                  if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
 727  727                      fstype)) < 0) {
 728  728                          zerror(zlogp, B_FALSE,
 729  729                              "%s%s is not a valid mount point", rootpath, dir);
 730  730                          return (-1);
 731  731                  }
 732  732          }
 733  733  
 734  734          if ((tmpl_fd = init_template()) == -1) {
 735  735                  zerror(zlogp, B_TRUE, "failed to create contract");
 736  736                  return (-1);
 737  737          }
 738  738  
 739  739          if ((child = fork()) == -1) {
 740  740                  (void) ct_tmpl_clear(tmpl_fd);
 741  741                  (void) close(tmpl_fd);
 742  742                  zerror(zlogp, B_TRUE, "failed to fork");
 743  743                  return (-1);
 744  744  
 745  745          } else if (child == 0) {        /* child */
 746  746                  char opt_buf[MAX_MNTOPT_STR];
 747  747                  int optlen = 0;
 748  748                  int mflag = MS_DATA;
 749  749                  int i;
 750  750                  int ret;
 751  751  
 752  752                  (void) ct_tmpl_clear(tmpl_fd);
 753  753                  /*
 754  754                   * Even though there are no procs running in the zone, we
 755  755                   * do this for paranoia's sake.
 756  756                   */
 757  757                  (void) closefrom(0);
 758  758  
 759  759                  if (zone_enter(zoneid) == -1) {
 760  760                          _exit(errno);
 761  761                  }
 762  762                  if (opt != NULL) {
 763  763                          /*
 764  764                           * The mount() system call is incredibly annoying.
 765  765                           * If options are specified, we need to copy them
 766  766                           * into a temporary buffer since the mount() system
 767  767                           * call will overwrite the options string.  It will
 768  768                           * also fail if the new option string it wants to
 769  769                           * write is bigger than the one we passed in, so
 770  770                           * you must pass in a buffer of the maximum possible
 771  771                           * option string length.  sigh.
 772  772                           */
 773  773                          (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
 774  774                          opt = opt_buf;
 775  775                          optlen = MAX_MNTOPT_STR;
 776  776                          mflag = MS_OPTIONSTR;
 777  777                  }
 778  778  
 779  779                  /*
 780  780                   * There is an obscure race condition which can cause mount
 781  781                   * to return EBUSY. This happens for example on the mount
 782  782                   * of the zone's /etc/svc/volatile file system if there is
 783  783                   * a GZ process running svcs -Z, which will touch the
 784  784                   * mountpoint, just as we're trying to do the mount. To cope
 785  785                   * with this, we retry up to 3 times to let this transient
 786  786                   * process get out of the way.
 787  787                   */
 788  788                  for (i = 0; i < 3; i++) {
 789  789                          ret = 0;
 790  790                          if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
 791  791                              optlen) != 0)
 792  792                                  ret = errno;
 793  793                          if (ret != EBUSY)
 794  794                                  break;
 795  795                          (void) sleep(1);
 796  796                  }
 797  797                  _exit(ret);
 798  798          }
 799  799  
 800  800          /* parent */
 801  801          if (contract_latest(&ct) == -1)
 802  802                  ct = -1;
 803  803          (void) ct_tmpl_clear(tmpl_fd);
 804  804          (void) close(tmpl_fd);
 805  805          if (waitpid(child, &child_status, 0) != child) {
 806  806                  /* unexpected: we must have been signalled */
 807  807                  (void) contract_abandon_id(ct);
 808  808                  return (-1);
 809  809          }
 810  810          (void) contract_abandon_id(ct);
 811  811          if (WEXITSTATUS(child_status) != 0) {
 812  812                  errno = WEXITSTATUS(child_status);
 813  813                  zerror(zlogp, B_TRUE, "mount of %s failed", dir);
 814  814                  return (-1);
 815  815          }
 816  816  
 817  817          return (0);
 818  818  }
 819  819  
 820  820  /*
 821  821   * Replace characters other than [A-Za-z0-9_] with '_' so that the string is a
 822  822   * valid environment variable name.
 823  823   */
 824  824  static void
 825  825  sanitize_env_var_name(char *var)
 826  826  {
 827  827          for (char *p = var; *p != '\0'; p++) {
 828  828                  if (!isalnum(*p)) {
 829  829                          *p = '_';
 830  830                  }
 831  831          }
 832  832  }
 833  833  
 834  834  /*
 835  835   * env variable name format
 836  836   *      _ZONECFG_{resource name}_{identifying attr. name}_{property name}
 837  837   * Any dashes (-) in the property names are replaced with underscore (_).
 838  838   */
 839  839  static void
 840  840  set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
 841  841  {
 842  842          /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
 843  843          char nm[2 * MAXNAMELEN + 32];
 844  844  
 845  845          if (attr == NULL)
 846  846                  (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
 847  847                      name);
 848  848          else
 849  849                  (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
 850  850                      attr, name);
 851  851  
 852  852          sanitize_env_var_name(nm);
 853  853  
 854  854          (void) setenv(nm, val, 1);
 855  855  }
 856  856  
 857  857  /*
 858  858   * Resolve a device:match value to a path.  This is only different for PPT
 859  859   * devices, where we expect the match property to be a /devices/... path, and
 860  860   * configured for PPT already.
 861  861   */
 862  862  int
 863  863  resolve_device_match(zlog_t *zlogp, struct zone_devtab *dtab,
 864  864      char *path, size_t len)
 865  865  {
 866  866          struct zone_res_attrtab *rap;
 867  867  
 868  868          for (rap = dtab->zone_dev_attrp; rap != NULL;
 869  869              rap = rap->zone_res_attr_next) {
 870  870                  if (strcmp(rap->zone_res_attr_name, "model") == 0 &&
 871  871                      strcmp(rap->zone_res_attr_value, "passthru") == 0)
 872  872                          break;
 873  873          }
 874  874  
 875  875          if (rap == NULL) {
 876  876                  if (strlcpy(path, dtab->zone_dev_match, len) >= len)
 877  877                          return (Z_INVAL);
 878  878                  return (Z_OK);
 879  879          }
 880  880  
 881  881          if (strncmp(dtab->zone_dev_match, "/devices",
 882  882              strlen("/devices")) != 0) {
 883  883                  zerror(zlogp, B_FALSE, "invalid passthru match value '%s'",
 884  884                      dtab->zone_dev_match);
 885  885                  return (Z_INVAL);
 886  886          }
 887  887  
 888  888          if (ppt_devpath_to_dev(dtab->zone_dev_match, path, len) != 0) {
 889  889                  zerror(zlogp, B_TRUE, "failed to resolve passthru device %s",
 890  890                      dtab->zone_dev_match);
 891  891                  return (Z_INVAL);
 892  892          }
 893  893  
 894  894          return (Z_OK);
 895  895  }
 896  896  
 897  897  /*
 898  898   * Export various zonecfg properties into environment for the boot and state
 899  899   * change hooks.
 900  900   *
 901  901   * If debug is true, _ZONEADMD_brand_debug is set to 1, else it is set to an
 902  902   * empty string.  Brand hooks consider any non-empty string as an indication
 903  903   * that debug output is requested.
 904  904   *
 905  905   * We could export more of the config in the future, as necessary.  A better
 906  906   * solution would be to make it so brand-specific behavior is handled by
 907  907   * brand-specific callbacks written in C.  Then the normal libzonecfg interfaces
 908  908   * can be used for accessing any parts of the configuration that are needed.
 909  909   *
 910  910   * All of the environment variables set by this function are specific to
 911  911   * SmartOS.
 912  912   */
 913  913  static int
 914  914  setup_subproc_env(zlog_t *zlogp, boolean_t debug)
 915  915  {
 916  916          int res;
 917  917          struct zone_nwiftab ntab;
 918  918          struct zone_devtab dtab;
 919  919          struct zone_attrtab atab;
 920  920          char net_resources[MAXNAMELEN * 2];
 921  921          char dev_resources[MAXNAMELEN * 2];
 922  922          char didstr[16];
 923  923          char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
 924  924          uuid_t uuid;
 925  925  
 926  926          /* snap_hndl is null when called through the set_brand_env code path */
 927  927          if (snap_hndl == NULL)
 928  928                  return (Z_OK);
 929  929  
 930  930          if ((res = zonecfg_get_uuid(zone_name, uuid)) != Z_OK)
 931  931                  return (res);
 932  932  
 933  933          uuid_unparse(uuid, uuidstr);
 934  934          (void) setenv("_ZONECFG_uuid", uuidstr, 1);
 935  935  
 936  936          (void) snprintf(didstr, sizeof (didstr), "%d", zone_did);
 937  937          (void) setenv("_ZONECFG_did", didstr, 1);
 938  938  
 939  939          /*
 940  940           * "net" resources are exported because zoneadmd does not handle
 941  941           * automatic configuration of vnics and so that the bhyve boot hook
 942  942           * can generate the argument list for the brand's init program.  At such
 943  943           * a time as vnic creation is handled in zoneadmd and brand callbacks
 944  944           * can be executed as part of the zoneadmd process this should be
 945  945           * removed.
 946  946           */
 947  947          net_resources[0] = '\0';
 948  948          if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
 949  949                  goto done;
 950  950  
 951  951          while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
 952  952                  struct zone_res_attrtab *rap;
 953  953                  char *phys;
 954  954  
 955  955                  phys = ntab.zone_nwif_physical;
 956  956  
 957  957                  (void) strlcat(net_resources, phys, sizeof (net_resources));
 958  958                  (void) strlcat(net_resources, " ", sizeof (net_resources));
 959  959  
 960  960                  set_zonecfg_env(RSRC_NET, phys, "physical", phys);
 961  961  
 962  962                  set_zonecfg_env(RSRC_NET, phys, "address",
 963  963                      ntab.zone_nwif_address);
 964  964                  set_zonecfg_env(RSRC_NET, phys, "allowed-address",
 965  965                      ntab.zone_nwif_allowed_address);
 966  966                  set_zonecfg_env(RSRC_NET, phys, "defrouter",
 967  967                      ntab.zone_nwif_defrouter);
 968  968                  set_zonecfg_env(RSRC_NET, phys, "global-nic",
 969  969                      ntab.zone_nwif_gnic);
 970  970                  set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
 971  971                  set_zonecfg_env(RSRC_NET, phys, "vlan-id",
 972  972                      ntab.zone_nwif_vlan_id);
 973  973  
 974  974                  for (rap = ntab.zone_nwif_attrp; rap != NULL;
 975  975                      rap = rap->zone_res_attr_next)
 976  976                          set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
 977  977                              rap->zone_res_attr_value);
 978  978                  nwifent_free_attrs(&ntab);
 979  979          }
 980  980  
 981  981          (void) setenv("_ZONECFG_net_resources", net_resources, 1);
 982  982  
 983  983          (void) zonecfg_endnwifent(snap_hndl);
 984  984  
 985  985          /*
 986  986           * "device" resources are exported because the bhyve boot brand callback
 987  987           * needs them to generate the argument list for the brand's init
 988  988           * program.  At such a time as brand callbacks can be executed as part
 989  989           * of the zoneadmd process, this should be removed.
 990  990           *
 991  991           * The bhyve brand only supports disk-like and ppt devices and does not
 992  992           * support regular expressions.
 993  993           */
 994  994          if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
 995  995                  goto done;
 996  996  
 997  997          dev_resources[0] = '\0';
 998  998          while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
 999  999                  char *match = dtab.zone_dev_match;
1000 1000                  struct zone_res_attrtab *rap;
1001 1001                  char path[MAXPATHLEN];
1002 1002  
1003 1003                  res = resolve_device_match(zlogp, &dtab, path, sizeof (path));
1004 1004                  if (res != Z_OK)
1005 1005                          goto done;
1006 1006  
1007 1007                  /*
1008 1008                   * Even if not modified, the match path will be mangled in the
1009 1009                   * environment variable name, so we always store the value here.
1010 1010                   */
1011 1011                  set_zonecfg_env(RSRC_DEV, match, "path", path);
1012 1012  
1013 1013                  for (rap = dtab.zone_dev_attrp; rap != NULL;
1014 1014                      rap = rap->zone_res_attr_next) {
1015 1015                          set_zonecfg_env(RSRC_DEV, match,
1016 1016                              rap->zone_res_attr_name, rap->zone_res_attr_value);
1017 1017                  }
1018 1018  
1019 1019                  /*
1020 1020                   * _ZONECFG_device_resources will contain a space separated list
1021 1021                   * of devices that have _ZONECFG_device_<device>* environment
1022 1022                   * variables.  So that each element of the list matches up with
1023 1023                   * <device>, each list item needs to be sanitized in the same
1024 1024                   * way that environment variable names are sanitized.
1025 1025                   */
1026 1026                  sanitize_env_var_name(match);
1027 1027                  (void) strlcat(dev_resources, match, sizeof (dev_resources));
1028 1028                  (void) strlcat(dev_resources, " ", sizeof (dev_resources));
1029 1029          }
1030 1030          (void) zonecfg_enddevent(snap_hndl);
1031 1031  
1032 1032          (void) setenv("_ZONECFG_device_resources", dev_resources, 1);
1033 1033  
1034 1034          /*
1035 1035           * "attr" resources are exported because the bhyve brand's boot hook
1036 1036           * needs access to the "ram", "cpu", "bootrom", etc. to form the
1037 1037           * argument list for the brand's init program.  Once the bhyve brand is
1038 1038           * configured via proper resources and properties, this should be
1039 1039           * removed.
1040 1040           */
1041 1041          if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
1042 1042                  goto done;
1043 1043  
1044 1044          while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
1045 1045                  set_zonecfg_env("attr", NULL, atab.zone_attr_name,
1046 1046                      atab.zone_attr_value);
1047 1047          }
1048 1048  
1049 1049          (void) zonecfg_endattrent(snap_hndl);
1050 1050  
1051 1051          if (debug)
1052 1052                  (void) setenv("_ZONEADMD_brand_debug", "1", 1);
1053 1053          else
1054 1054                  (void) setenv("_ZONEADMD_brand_debug", "", 1);
1055 1055  
1056 1056          res = Z_OK;
1057 1057  
1058 1058  done:
1059 1059          return (res);
1060 1060  }
1061 1061  
1062 1062  void
1063 1063  nwifent_free_attrs(struct zone_nwiftab *np)
1064 1064  {
1065 1065          struct zone_res_attrtab *rap;
1066 1066  
1067 1067          for (rap = np->zone_nwif_attrp; rap != NULL; ) {
1068 1068                  struct zone_res_attrtab *tp = rap;
1069 1069  
1070 1070                  rap = rap->zone_res_attr_next;
1071 1071                  free(tp);
1072 1072          }
1073 1073  }
1074 1074  
1075 1075  /*
1076 1076   * If retstr is not NULL, the output of the subproc is returned in the str,
1077 1077   * otherwise it is output using zerror().  Any memory allocated for retstr
1078 1078   * should be freed by the caller.
1079 1079   */
1080 1080  int
1081 1081  do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
1082 1082  {
1083 1083          char buf[1024];         /* arbitrary large amount */
1084 1084          char *inbuf;
1085 1085          FILE *file;
1086 1086          int status;
1087 1087          int rd_cnt;
1088 1088          int fds[2];
1089 1089          pid_t child;
1090 1090  
1091 1091          if (retstr != NULL) {
1092 1092                  if ((*retstr = malloc(1024)) == NULL) {
1093 1093                          zerror(zlogp, B_FALSE, "out of memory");
1094 1094                          return (-1);
1095 1095                  }
1096 1096                  inbuf = *retstr;
1097 1097                  rd_cnt = 0;
1098 1098          } else {
1099 1099                  inbuf = buf;
1100 1100          }
1101 1101  
1102 1102          if (pipe(fds) != 0) {
1103 1103                  zerror(zlogp, B_TRUE, "failed to create pipe for subprocess");
1104 1104                  return (-1);
1105 1105          }
1106 1106  
1107 1107          if ((child = fork()) == 0) {
1108 1108                  int in;
1109 1109  
1110 1110                  /*
1111 1111                   * SIGINT is currently ignored.  It probably shouldn't be so
1112 1112                   * hard to kill errant children, so we revert to SIG_DFL.
1113 1113                   * SIGHUP and SIGUSR1 are used to perform log rotation.  We
1114 1114                   * leave those as-is because we don't want a 'pkill -HUP
1115 1115                   * zoneadmd' to kill this child process before exec().  On
1116 1116                   * exec(), SIGHUP and SIGUSR1 will become SIG_DFL.
1117 1117                   */
1118 1118                  (void) sigset(SIGINT, SIG_DFL);
1119 1119  
1120 1120                  /*
1121 1121                   * Set up a pipe for the child to log to.
1122 1122                   */
1123 1123                  if (dup2(fds[1], STDERR_FILENO) == -1) {
1124 1124                          (void) snprintf(buf, sizeof (buf),
1125 1125                              "subprocess failed to dup2(STDERR_FILENO): %s\n",
1126 1126                              strerror(errno));
1127 1127                          (void) write(fds[1], buf, strlen(buf));
1128 1128                          _exit(127);
1129 1129                  }
1130 1130                  if (dup2(fds[1], STDOUT_FILENO) == -1) {
1131 1131                          perror("subprocess failed to dup2(STDOUT_FILENO)");
1132 1132                          _exit(127);
1133 1133                  }
1134 1134                  /*
1135 1135                   * Some naughty children may try to read from stdin.  Be sure
1136 1136                   * that the first file that a child opens doesn't get stdin's
1137 1137                   * file descriptor.
1138 1138                   */
1139 1139                  if ((in = open("/dev/null", O_RDONLY)) == -1 ||
1140 1140                      dup2(in, STDIN_FILENO) == -1) {
1141 1141                          zerror(zlogp, B_TRUE,
1142 1142                              "subprocess failed to set up STDIN_FILENO");
1143 1143                          _exit(127);
1144 1144                  }
1145 1145                  closefrom(STDERR_FILENO + 1);
1146 1146  
1147 1147                  if (setup_subproc_env(zlogp, debug) != Z_OK) {
1148 1148                          zerror(zlogp, B_FALSE, "failed to setup environment");
1149 1149                          _exit(127);
1150 1150                  }
1151 1151  
1152 1152                  (void) execl("/bin/sh", "sh", "-c", cmdbuf, NULL);
1153 1153  
1154 1154                  zerror(zlogp, B_TRUE, "subprocess execl failed");
1155 1155                  _exit(127);
1156 1156          } else if (child == -1) {
1157 1157                  zerror(zlogp, B_TRUE, "failed to create subprocess for '%s'",
1158 1158                      cmdbuf);
1159 1159                  (void) close(fds[0]);
1160 1160                  (void) close(fds[1]);
1161 1161                  return (-1);
1162 1162          }
1163 1163  
1164 1164          (void) close(fds[1]);
1165 1165  
1166 1166          file = fdopen(fds[0], "r");
1167 1167          while (fgets(inbuf, 1024, file) != NULL) {
1168 1168                  if (retstr == NULL) {
1169 1169                          if (zlogp != &logsys) {
1170 1170                                  int last = strlen(inbuf) - 1;
1171 1171  
1172 1172                                  if (inbuf[last] == '\n')
1173 1173                                          inbuf[last] = '\0';
1174 1174                                  zerror(zlogp, B_FALSE, "%s", inbuf);
1175 1175                          }
1176 1176                  } else {
1177 1177                          char *p;
1178 1178  
1179 1179                          rd_cnt += 1024 - 1;
1180 1180                          if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
1181 1181                                  zerror(zlogp, B_FALSE, "out of memory");
1182 1182                                  break;
1183 1183                          }
1184 1184  
1185 1185                          *retstr = p;
1186 1186                          inbuf = *retstr + rd_cnt;
1187 1187                  }
1188 1188          }
1189 1189  
1190 1190          while (fclose(file) != 0) {
1191 1191                  assert(errno == EINTR);
1192 1192          }
1193 1193          while (waitpid(child, &status, 0) == -1) {
1194 1194                  if (errno != EINTR) {
1195 1195                          zerror(zlogp, B_TRUE,
1196 1196                              "failed to get exit status of '%s'", cmdbuf);
1197 1197                          return (-1);
1198 1198                  }
1199 1199          }
1200 1200  
1201 1201          if (WIFSIGNALED(status)) {
1202 1202                  zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
1203 1203                      "signal %d", cmdbuf, WTERMSIG(status));
1204 1204                  return (-1);
1205 1205          }
1206 1206          assert(WIFEXITED(status));
1207 1207          if (WEXITSTATUS(status) == ZEXIT_EXEC) {
1208 1208                  zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
1209 1209                  return (-1);
1210 1210          }
1211 1211          return (WEXITSTATUS(status));
1212 1212  }
1213 1213  
1214 1214  /*
1215 1215   * Get the path for this zone's init(1M) (or equivalent) process. First look
1216 1216   * for a zone-specific init-name attr, then get it from the brand.
1217 1217   */
1218 1218  static int
1219 1219  get_initname(brand_handle_t bh, char *initname, int len)
1220 1220  {
1221 1221          struct zone_attrtab a;
1222 1222  
1223 1223          bzero(&a, sizeof (a));
1224 1224          (void) strlcpy(a.zone_attr_name, "init-name",
1225 1225              sizeof (a.zone_attr_name));
1226 1226  
1227 1227          if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1228 1228                  (void) strlcpy(initname, a.zone_attr_value, len);
1229 1229                  return (0);
1230 1230          }
1231 1231  
1232 1232          return (brand_get_initname(bh, initname, len));
1233 1233  }
1234 1234  
1235 1235  /*
1236 1236   * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
1237 1237   * First look for a zone-specific restart-init attr, then get it from the brand.
1238 1238   */
1239 1239  static boolean_t
1240 1240  restartinit(brand_handle_t bh)
1241 1241  {
1242 1242          struct zone_attrtab a;
1243 1243  
1244 1244          bzero(&a, sizeof (a));
1245 1245          (void) strlcpy(a.zone_attr_name, "restart-init",
1246 1246              sizeof (a.zone_attr_name));
1247 1247  
1248 1248          if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1249 1249                  if (strcmp(a.zone_attr_value, "false") == 0)
1250 1250                          return (B_FALSE);
1251 1251                  return (B_TRUE);
1252 1252          }
1253 1253  
1254 1254          return (brand_restartinit(bh));
1255 1255  }
1256 1256  
1257 1257  /*
1258 1258   * Get the app-svc-dependent flag for this zone's init process. This is a
1259 1259   * zone-specific attr which controls the type of contract we create for the
1260 1260   * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
1261 1261   * set, so that when any service which is in the same contract exits, the init
1262 1262   * application will be terminated.
1263 1263   */
1264 1264  static boolean_t
1265 1265  is_app_svc_dep(void)
1266 1266  {
1267 1267          struct zone_attrtab a;
1268 1268  
1269 1269          bzero(&a, sizeof (a));
1270 1270          (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
1271 1271              sizeof (a.zone_attr_name));
1272 1272  
1273 1273          if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
1274 1274              strcmp(a.zone_attr_value, "true") == 0) {
1275 1275                  return (B_TRUE);
1276 1276          }
1277 1277  
1278 1278          return (B_FALSE);
1279 1279  }
1280 1280  
1281 1281  static int
1282 1282  zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
1283 1283  {
1284 1284          zoneid_t zoneid;
  
    | 
      ↓ open down ↓ | 
    1284 lines elided | 
    
      ↑ open up ↑ | 
  
1285 1285          struct stat st;
1286 1286          char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
1287 1287          char nbootargs[BOOTARGS_MAX];
1288 1288          char cmdbuf[MAXPATHLEN];
1289 1289          fs_callback_t cb;
1290 1290          brand_handle_t bh;
1291 1291          zone_iptype_t iptype;
1292 1292          dladm_status_t status;
1293 1293          char errmsg[DLADM_STRSIZE];
1294 1294          int err;
1295      -        boolean_t restart_init;
1296 1295          boolean_t app_svc_dep;
     1296 +        boolean_t restart_init, restart_init0, restart_initreboot;
1297 1297  
1298 1298          if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1299 1299                  return (-1);
1300 1300  
1301 1301          if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1302 1302                  zerror(zlogp, B_TRUE, "unable to get zoneid");
1303 1303                  goto bad;
1304 1304          }
1305 1305  
1306 1306          cb.zlogp = zlogp;
1307 1307          cb.zoneid = zoneid;
1308 1308          cb.mount_cmd = B_FALSE;
1309 1309  
1310 1310          /* Get a handle to the brand info for this zone */
1311 1311          if ((bh = brand_open(brand_name)) == NULL) {
1312 1312                  zerror(zlogp, B_FALSE, "unable to determine zone brand");
1313 1313                  goto bad;
1314 1314          }
1315 1315  
1316 1316          /*
1317 1317           * Get the list of filesystems to mount from the brand
1318 1318           * configuration.  These mounts are done via a thread that will
1319 1319           * enter the zone, so they are done from within the context of the
1320 1320           * zone.
1321 1321           */
1322 1322          if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1323 1323                  zerror(zlogp, B_FALSE, "unable to mount filesystems");
1324 1324                  brand_close(bh);
1325 1325                  goto bad;
1326 1326          }
1327 1327  
1328 1328          /*
1329 1329           * Get the brand's boot callback if it exists.
1330 1330           */
1331 1331          (void) strcpy(cmdbuf, EXEC_PREFIX);
1332 1332          if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1333 1333              sizeof (cmdbuf) - EXEC_LEN) != 0) {
1334 1334                  zerror(zlogp, B_FALSE,
1335 1335                      "unable to determine branded zone's boot callback");
1336 1336                  brand_close(bh);
1337 1337                  goto bad;
  
    | 
      ↓ open down ↓ | 
    31 lines elided | 
    
      ↑ open up ↑ | 
  
1338 1338          }
1339 1339  
1340 1340          /* Get the path for this zone's init(1M) (or equivalent) process.  */
1341 1341          if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
1342 1342                  zerror(zlogp, B_FALSE,
1343 1343                      "unable to determine zone's init(1M) location");
1344 1344                  brand_close(bh);
1345 1345                  goto bad;
1346 1346          }
1347 1347  
1348      -        /* See if we should restart init if it dies. */
     1348 +        /* See if this zone's brand should restart init if it dies. */
1349 1349          restart_init = restartinit(bh);
     1350 +        restart_init0 = brand_restartinit0(bh);
     1351 +        restart_initreboot = brand_restartinitreboot(bh);
1350 1352  
1351 1353          /*
1352 1354           * See if we need to setup contract dependencies between the zone's
1353 1355           * primary application and any of its services.
1354 1356           */
1355 1357          app_svc_dep = is_app_svc_dep();
1356 1358  
1357 1359          brand_close(bh);
1358 1360  
1359 1361          err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1360 1362          if (err != Z_OK)
1361 1363                  goto bad;
1362 1364  
1363 1365          assert(init_file[0] != '\0');
1364 1366  
1365 1367          /*
1366 1368           * Try to anticipate possible problems: If possible, make sure init is
1367 1369           * executable.
1368 1370           */
1369 1371          if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1370 1372                  zerror(zlogp, B_FALSE, "unable to determine zone root");
1371 1373                  goto bad;
1372 1374          }
1373 1375  
1374 1376          (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1375 1377  
1376 1378          if (lstat(initpath, &st) == -1) {
1377 1379                  zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1378 1380                  goto bad;
1379 1381          }
1380 1382  
1381 1383          /* LINTED: E_NOP_IF_STMT */
1382 1384          if ((st.st_mode & S_IFMT) == S_IFLNK) {
1383 1385                  /* symlink, we'll have to wait and resolve when we boot */
1384 1386          } else if ((st.st_mode & S_IXUSR) == 0) {
1385 1387                  zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1386 1388                  goto bad;
1387 1389          }
1388 1390  
1389 1391          /*
1390 1392           * Exclusive stack zones interact with the dlmgmtd running in the
1391 1393           * global zone.  dladm_zone_boot() tells dlmgmtd that this zone is
1392 1394           * booting, and loads its datalinks from the zone's datalink
1393 1395           * configuration file.
1394 1396           */
1395 1397          if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1396 1398                  status = dladm_zone_boot(dld_handle, zoneid);
1397 1399                  if (status != DLADM_STATUS_OK) {
1398 1400                          zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1399 1401                              " %s", dladm_status2str(status, errmsg));
1400 1402                          goto bad;
1401 1403                  }
1402 1404          }
1403 1405  
1404 1406          /*
1405 1407           * If there is a brand 'boot' callback, execute it now to give the
1406 1408           * brand one last chance to do any additional setup before the zone
1407 1409           * is booted.
1408 1410           */
1409 1411          if ((strlen(cmdbuf) > EXEC_LEN) &&
1410 1412              (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
1411 1413                  zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1412 1414                  goto bad;
1413 1415          }
1414 1416  
1415 1417          if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1416 1418                  zerror(zlogp, B_TRUE, "could not set zone boot file");
1417 1419                  goto bad;
1418 1420          }
1419 1421  
  
    | 
      ↓ open down ↓ | 
    60 lines elided | 
    
      ↑ open up ↑ | 
  
1420 1422          if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1421 1423                  zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1422 1424                  goto bad;
1423 1425          }
1424 1426  
1425 1427          if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1426 1428              NULL, 0) == -1) {
1427 1429                  zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1428 1430                  goto bad;
1429 1431          }
     1432 +        if (restart_init0 && zone_setattr(zoneid, ZONE_ATTR_INITRESTART0,
     1433 +            NULL, 0) == -1) {
     1434 +                zerror(zlogp, B_TRUE,
     1435 +                    "could not set zone init-restart-on-exit-0");
     1436 +                goto bad;
     1437 +        }
     1438 +        if (restart_initreboot && zone_setattr(zoneid, ZONE_ATTR_INITREBOOT,
     1439 +            NULL, 0) == -1) {
     1440 +                zerror(zlogp, B_TRUE, "could not set zone reboot-on-init-exit");
     1441 +                goto bad;
     1442 +        }
1430 1443  
1431 1444          if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1432 1445              (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1433 1446                  zerror(zlogp, B_TRUE, "could not set zone app-die");
1434 1447                  goto bad;
1435 1448          }
1436 1449  
1437 1450          /*
1438 1451           * Inform zonestatd of a new zone so that it can install a door for
1439 1452           * the zone to contact it.
1440 1453           */
1441 1454          notify_zonestatd(zone_id);
1442 1455  
1443 1456          /* Startup a thread to perform zfd logging/tty svc for the zone. */
1444 1457          create_log_thread(zlogp);
1445 1458  
1446 1459          if (zone_boot(zoneid) == -1) {
1447 1460                  zerror(zlogp, B_TRUE, "unable to boot zone");
1448 1461                  destroy_log_thread(zlogp);
1449 1462                  goto bad;
1450 1463          }
1451 1464  
1452 1465          if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) {
1453 1466                  destroy_log_thread(zlogp);
1454 1467                  goto bad;
1455 1468          }
1456 1469  
1457 1470          return (0);
1458 1471  
1459 1472  bad:
1460 1473          /*
1461 1474           * If something goes wrong, we up the zones's state to the target
1462 1475           * state, RUNNING, and then invoke the hook as if we're halting.
1463 1476           */
1464 1477          (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
1465 1478  
1466 1479          return (-1);
1467 1480  }
1468 1481  
1469 1482  static int
1470 1483  zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
1471 1484      boolean_t debug)
1472 1485  {
1473 1486          int err;
1474 1487  
1475 1488          /*
1476 1489           * If performing a scratch zone unmount then do not call the
1477 1490           * state change hooks.
1478 1491           */
1479 1492          if (unmount_cmd == B_FALSE &&
1480 1493              brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
1481 1494                  return (-1);
1482 1495  
1483 1496          if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
1484 1497                  if (!bringup_failure_recovery)
1485 1498                          zerror(zlogp, B_FALSE, "unable to destroy zone");
1486 1499                  destroy_log_thread(zlogp);
1487 1500                  return (-1);
1488 1501          }
1489 1502  
1490 1503          /* Shut down is done, stop the log thread */
1491 1504          destroy_log_thread(zlogp);
1492 1505  
1493 1506          if (unmount_cmd == B_FALSE &&
1494 1507              brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
1495 1508                  return (-1);
1496 1509  
1497 1510          if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1498 1511                  zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1499 1512                      zonecfg_strerror(err));
1500 1513  
1501 1514          zonecfg_fini_handle(snap_hndl);
1502 1515          snap_hndl = NULL;
1503 1516  
1504 1517          return (0);
1505 1518  }
1506 1519  
1507 1520  static int
1508 1521  zone_graceful_shutdown(zlog_t *zlogp)
1509 1522  {
1510 1523          zoneid_t zoneid;
1511 1524          pid_t child;
1512 1525          char cmdbuf[MAXPATHLEN];
1513 1526          brand_handle_t bh = NULL;
1514 1527          ctid_t ct;
1515 1528          int tmpl_fd;
1516 1529          int child_status;
1517 1530  
1518 1531          if (shutdown_in_progress) {
1519 1532                  zerror(zlogp, B_FALSE, "shutdown already in progress");
1520 1533                  return (-1);
1521 1534          }
1522 1535  
1523 1536          if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1524 1537                  zerror(zlogp, B_TRUE, "unable to get zoneid");
1525 1538                  return (-1);
1526 1539          }
1527 1540  
1528 1541          /* Get a handle to the brand info for this zone */
1529 1542          if ((bh = brand_open(brand_name)) == NULL) {
1530 1543                  zerror(zlogp, B_FALSE, "unable to determine zone brand");
1531 1544                  return (-1);
1532 1545          }
1533 1546  
1534 1547          /*
1535 1548           * If there is a brand 'shutdown' callback, execute it now to give the
1536 1549           * brand a chance to cleanup any custom configuration.
1537 1550           */
1538 1551          (void) strcpy(cmdbuf, EXEC_PREFIX);
1539 1552          if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1540 1553              sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1541 1554                  (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1542 1555          }
1543 1556          brand_close(bh);
1544 1557  
1545 1558          if ((tmpl_fd = init_template()) == -1) {
1546 1559                  zerror(zlogp, B_TRUE, "failed to create contract");
1547 1560                  return (-1);
1548 1561          }
1549 1562  
1550 1563          if ((child = fork()) == -1) {
1551 1564                  (void) ct_tmpl_clear(tmpl_fd);
1552 1565                  (void) close(tmpl_fd);
1553 1566                  zerror(zlogp, B_TRUE, "failed to fork");
1554 1567                  return (-1);
1555 1568          } else if (child == 0) {
1556 1569                  (void) ct_tmpl_clear(tmpl_fd);
1557 1570                  if (zone_enter(zoneid) == -1) {
1558 1571                          _exit(errno);
1559 1572                  }
1560 1573                  _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1561 1574          }
1562 1575  
1563 1576          if (contract_latest(&ct) == -1)
1564 1577                  ct = -1;
1565 1578          (void) ct_tmpl_clear(tmpl_fd);
1566 1579          (void) close(tmpl_fd);
1567 1580  
1568 1581          if (waitpid(child, &child_status, 0) != child) {
1569 1582                  /* unexpected: we must have been signalled */
1570 1583                  (void) contract_abandon_id(ct);
1571 1584                  return (-1);
1572 1585          }
1573 1586  
1574 1587          (void) contract_abandon_id(ct);
1575 1588          if (WEXITSTATUS(child_status) != 0) {
1576 1589                  errno = WEXITSTATUS(child_status);
1577 1590                  zerror(zlogp, B_FALSE, "unable to shutdown zone");
1578 1591                  return (-1);
1579 1592          }
1580 1593  
1581 1594          shutdown_in_progress = B_TRUE;
1582 1595  
1583 1596          return (0);
1584 1597  }
1585 1598  
1586 1599  static int
1587 1600  zone_wait_shutdown(zlog_t *zlogp)
1588 1601  {
1589 1602          zone_state_t zstate;
1590 1603          uint64_t *tm = NULL;
1591 1604          scf_simple_prop_t *prop = NULL;
1592 1605          int timeout;
1593 1606          int tries;
1594 1607          int rc = -1;
1595 1608  
1596 1609          /* Get default stop timeout from SMF framework */
1597 1610          timeout = SHUTDOWN_WAIT;
1598 1611          if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1599 1612              SCF_PROPERTY_TIMEOUT)) != NULL) {
1600 1613                  if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1601 1614                          if (tm != 0)
1602 1615                                  timeout = *tm;
1603 1616                  }
1604 1617                  scf_simple_prop_free(prop);
1605 1618          }
1606 1619  
1607 1620          /* allow time for zone to shutdown cleanly */
1608 1621          for (tries = 0; tries < timeout; tries ++) {
1609 1622                  (void) sleep(1);
1610 1623                  if (zone_get_state(zone_name, &zstate) == Z_OK &&
1611 1624                      zstate == ZONE_STATE_INSTALLED) {
1612 1625                          rc = 0;
1613 1626                          break;
1614 1627                  }
1615 1628          }
1616 1629  
1617 1630          if (rc != 0)
1618 1631                  zerror(zlogp, B_FALSE, "unable to shutdown zone");
1619 1632  
1620 1633          shutdown_in_progress = B_FALSE;
1621 1634  
1622 1635          return (rc);
1623 1636  }
1624 1637  
1625 1638  
1626 1639  
1627 1640  /*
1628 1641   * Generate AUE_zone_state for a command that boots a zone.
1629 1642   */
1630 1643  static void
1631 1644  audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1632 1645      char *new_state)
1633 1646  {
1634 1647          adt_session_data_t      *ah;
1635 1648          adt_event_data_t        *event;
1636 1649          int                     pass_fail, fail_reason;
1637 1650  
1638 1651          if (!adt_audit_enabled())
1639 1652                  return;
1640 1653  
1641 1654          if (return_val == 0) {
1642 1655                  pass_fail = ADT_SUCCESS;
1643 1656                  fail_reason = ADT_SUCCESS;
1644 1657          } else {
1645 1658                  pass_fail = ADT_FAILURE;
1646 1659                  fail_reason = ADT_FAIL_VALUE_PROGRAM;
1647 1660          }
1648 1661  
1649 1662          if (adt_start_session(&ah, NULL, 0)) {
1650 1663                  zerror(zlogp, B_TRUE, gettext("audit failure."));
1651 1664                  return;
1652 1665          }
1653 1666          if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1654 1667                  zerror(zlogp, B_TRUE, gettext("audit failure."));
1655 1668                  (void) adt_end_session(ah);
1656 1669                  return;
1657 1670          }
1658 1671  
1659 1672          event = adt_alloc_event(ah, ADT_zone_state);
1660 1673          if (event == NULL) {
1661 1674                  zerror(zlogp, B_TRUE, gettext("audit failure."));
1662 1675                  (void) adt_end_session(ah);
1663 1676                  return;
1664 1677          }
1665 1678          event->adt_zone_state.zonename = zone_name;
1666 1679          event->adt_zone_state.new_state = new_state;
1667 1680  
1668 1681          if (adt_put_event(event, pass_fail, fail_reason))
1669 1682                  zerror(zlogp, B_TRUE, gettext("audit failure."));
1670 1683  
1671 1684          adt_free_event(event);
1672 1685  
1673 1686          (void) adt_end_session(ah);
1674 1687  }
1675 1688  
1676 1689  /*
1677 1690   * Log the exit time and status of the zone's init process into
1678 1691   * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1679 1692   * be -1, otherwise it will be the exit status as described in wait.3c.
1680 1693   * If the zone is configured to restart init, then nothing will be logged if
1681 1694   * init exits unexpectedly (the kernel will never upcall in this case).
1682 1695   */
1683 1696  static void
1684 1697  log_init_exit(int status)
1685 1698  {
1686 1699          char p[MAXPATHLEN];
1687 1700          char buf[128];
1688 1701          struct timeval t;
1689 1702          int fd;
1690 1703  
1691 1704          if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1692 1705                  return;
1693 1706          if (gettimeofday(&t, NULL) != 0)
1694 1707                  return;
1695 1708          if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1696 1709              status) > sizeof (buf))
1697 1710                  return;
1698 1711          if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1699 1712                  return;
1700 1713  
1701 1714          (void) write(fd, buf, strlen(buf));
1702 1715  
1703 1716          (void) close(fd);
1704 1717  }
1705 1718  
1706 1719  /*
1707 1720   * The main routine for the door server that deals with zone state transitions.
1708 1721   */
1709 1722  /* ARGSUSED */
1710 1723  static void
1711 1724  server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1712 1725      uint_t n_desc)
1713 1726  {
1714 1727          ucred_t *uc = NULL;
1715 1728          const priv_set_t *eset;
1716 1729  
1717 1730          zone_state_t zstate;
1718 1731          zone_cmd_t cmd;
1719 1732          boolean_t debug;
1720 1733          int init_status;
1721 1734          zone_cmd_arg_t *zargp;
1722 1735  
1723 1736          boolean_t kernelcall = B_TRUE;
1724 1737  
1725 1738          int rval = -1;
1726 1739          uint64_t uniqid;
1727 1740          zoneid_t zoneid = -1;
1728 1741          zlog_t zlog;
1729 1742          zlog_t *zlogp;
1730 1743          zone_cmd_rval_t *rvalp;
1731 1744          size_t rlen = getpagesize(); /* conservative */
1732 1745          fs_callback_t cb;
1733 1746          brand_handle_t bh;
1734 1747          boolean_t wait_shut = B_FALSE;
1735 1748  
1736 1749          /* LINTED E_BAD_PTR_CAST_ALIGN */
1737 1750          zargp = (zone_cmd_arg_t *)args;
1738 1751  
1739 1752          /*
1740 1753           * When we get the door unref message, we've fdetach'd the door, and
1741 1754           * it is time for us to shut down zoneadmd.
1742 1755           */
1743 1756          if (zargp == DOOR_UNREF_DATA) {
1744 1757                  logstream_close(platloghdl, B_TRUE);
1745 1758  
1746 1759                  /*
1747 1760                   * See comment at end of main() for info on the last rites.
1748 1761                   */
1749 1762                  exit(0);
1750 1763          }
1751 1764  
1752 1765          if (zargp == NULL) {
1753 1766                  (void) door_return(NULL, 0, 0, 0);
1754 1767          }
1755 1768  
1756 1769          rvalp = alloca(rlen);
1757 1770          bzero(rvalp, rlen);
1758 1771          zlog.logfile = NULL;
1759 1772          zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1760 1773          zlog.buf = rvalp->errbuf;
1761 1774          zlog.log = zlog.buf;
1762 1775          /* defer initialization of zlog.locale until after credential check */
1763 1776          zlogp = &zlog;
1764 1777  
1765 1778          if (alen != sizeof (zone_cmd_arg_t)) {
1766 1779                  /*
1767 1780                   * This really shouldn't be happening.
1768 1781                   */
1769 1782                  zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1770 1783                      "unexpected (expected %d bytes)", alen,
1771 1784                      sizeof (zone_cmd_arg_t));
1772 1785                  goto out;
1773 1786          }
1774 1787          cmd = zargp->cmd;
1775 1788          debug = zargp->debug;
1776 1789          init_status = zargp->status;
1777 1790  
1778 1791          if (door_ucred(&uc) != 0) {
1779 1792                  zerror(&logsys, B_TRUE, "door_ucred");
1780 1793                  goto out;
1781 1794          }
1782 1795          eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1783 1796          if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1784 1797              (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1785 1798              ucred_geteuid(uc) != 0)) {
1786 1799                  zerror(&logsys, B_FALSE, "insufficient privileges");
1787 1800                  goto out;
1788 1801          }
1789 1802  
1790 1803          kernelcall = ucred_getpid(uc) == 0;
1791 1804  
1792 1805          /*
1793 1806           * This is safe because we only use a zlog_t throughout the
1794 1807           * duration of a door call; i.e., by the time the pointer
1795 1808           * might become invalid, the door call would be over.
1796 1809           */
1797 1810          zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1798 1811  
1799 1812          (void) mutex_lock(&lock);
1800 1813  
1801 1814          /*
1802 1815           * Once we start to really die off, we don't want more connections.
1803 1816           */
1804 1817          if (in_death_throes) {
1805 1818                  (void) mutex_unlock(&lock);
1806 1819                  ucred_free(uc);
1807 1820                  (void) door_return(NULL, 0, 0, 0);
1808 1821                  thr_exit(NULL);
1809 1822          }
1810 1823  
1811 1824          /*
1812 1825           * Check for validity of command.
1813 1826           */
1814 1827          if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1815 1828              cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1816 1829              cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1817 1830              cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1818 1831                  zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1819 1832                  goto out;
1820 1833          }
1821 1834  
1822 1835          if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1823 1836                  /*
1824 1837                   * Can't happen
1825 1838                   */
1826 1839                  zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1827 1840                      cmd);
1828 1841                  goto out;
1829 1842          }
1830 1843          /*
1831 1844           * We ignore the possibility of someone calling zone_create(2)
1832 1845           * explicitly; all requests must come through zoneadmd.
1833 1846           */
1834 1847          if (zone_get_state(zone_name, &zstate) != Z_OK) {
1835 1848                  /*
1836 1849                   * Something terribly wrong happened
1837 1850                   */
1838 1851                  zerror(&logsys, B_FALSE, "unable to determine state of zone");
1839 1852                  goto out;
1840 1853          }
1841 1854  
1842 1855          if (kernelcall) {
1843 1856                  /*
1844 1857                   * Kernel-initiated requests may lose their validity if the
1845 1858                   * zone_t the kernel was referring to has gone away.
1846 1859                   */
1847 1860                  if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1848 1861                      zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1849 1862                      sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1850 1863                          /*
1851 1864                           * We're not talking about the same zone. The request
1852 1865                           * must have arrived too late.  Return error.
1853 1866                           */
1854 1867                          rval = -1;
1855 1868                          goto out;
1856 1869                  }
1857 1870                  zlogp = &logplat;       /* Log errors to platform.log */
1858 1871          }
1859 1872  
1860 1873          /*
1861 1874           * If we are being asked to forcibly mount or boot a zone, we
1862 1875           * pretend that an INCOMPLETE zone is actually INSTALLED.
1863 1876           */
1864 1877          if (zstate == ZONE_STATE_INCOMPLETE &&
1865 1878              (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1866 1879                  zstate = ZONE_STATE_INSTALLED;
1867 1880  
1868 1881          switch (zstate) {
1869 1882          case ZONE_STATE_CONFIGURED:
1870 1883          case ZONE_STATE_INCOMPLETE:
1871 1884                  /*
1872 1885                   * Not our area of expertise; we just print a nice message
1873 1886                   * and die off.
1874 1887                   */
1875 1888                  zerror(zlogp, B_FALSE,
1876 1889                      "%s operation is invalid for zones in state '%s'",
1877 1890                      z_cmd_name(cmd), zone_state_str(zstate));
1878 1891                  break;
1879 1892  
1880 1893          case ZONE_STATE_INSTALLED:
1881 1894                  switch (cmd) {
1882 1895                  case Z_READY:
1883 1896                          rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
1884 1897                          if (rval == 0)
1885 1898                                  eventstream_write(Z_EVT_ZONE_READIED);
1886 1899                          zcons_statechanged();
1887 1900                          break;
1888 1901                  case Z_BOOT:
1889 1902                  case Z_FORCEBOOT:
1890 1903                          eventstream_write(Z_EVT_ZONE_BOOTING);
1891 1904                          if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1892 1905                              debug)) == 0) {
1893 1906                                  rval = zone_bootup(zlogp, zargp->bootbuf,
1894 1907                                      zstate, debug);
1895 1908                          }
1896 1909                          audit_put_record(zlogp, uc, rval, "boot");
1897 1910                          zcons_statechanged();
1898 1911                          if (rval != 0) {
1899 1912                                  bringup_failure_recovery = B_TRUE;
1900 1913                                  (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1901 1914                                      zstate, debug);
1902 1915                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1903 1916                          }
1904 1917                          break;
1905 1918                  case Z_SHUTDOWN:
1906 1919                  case Z_HALT:
1907 1920                          if (kernelcall) /* Invalid; can't happen */
1908 1921                                  abort();
1909 1922                          /*
1910 1923                           * We could have two clients racing to halt this
1911 1924                           * zone; the second client loses, but its request
1912 1925                           * doesn't fail, since the zone is now in the desired
1913 1926                           * state.
1914 1927                           */
1915 1928                          zerror(zlogp, B_FALSE, "zone is already halted");
1916 1929                          rval = 0;
1917 1930                          break;
1918 1931                  case Z_REBOOT:
1919 1932                          if (kernelcall) /* Invalid; can't happen */
1920 1933                                  abort();
1921 1934                          zerror(zlogp, B_FALSE, "%s operation is invalid "
1922 1935                              "for zones in state '%s'", z_cmd_name(cmd),
1923 1936                              zone_state_str(zstate));
1924 1937                          rval = -1;
1925 1938                          break;
1926 1939                  case Z_NOTE_UNINSTALLING:
1927 1940                          if (kernelcall) /* Invalid; can't happen */
1928 1941                                  abort();
1929 1942                          /*
1930 1943                           * Tell the console to print out a message about this.
1931 1944                           * Once it does, we will be in_death_throes.
1932 1945                           */
1933 1946                          eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1934 1947                          break;
1935 1948                  case Z_MOUNT:
1936 1949                  case Z_FORCEMOUNT:
1937 1950                          if (kernelcall) /* Invalid; can't happen */
1938 1951                                  abort();
1939 1952                          if (!zone_isnative && !zone_iscluster &&
1940 1953                              !zone_islabeled) {
1941 1954                                  /*
1942 1955                                   * -U mounts the zone without lofs mounting
1943 1956                                   * zone file systems back into the scratch
1944 1957                                   * zone.  This is required when mounting
1945 1958                                   * non-native branded zones.
1946 1959                                   */
1947 1960                                  (void) strlcpy(zargp->bootbuf, "-U",
1948 1961                                      BOOTARGS_MAX);
1949 1962                          }
1950 1963  
1951 1964                          rval = zone_ready(zlogp,
1952 1965                              strcmp(zargp->bootbuf, "-U") == 0 ?
1953 1966                              Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
1954 1967                          if (rval != 0)
1955 1968                                  break;
1956 1969  
1957 1970                          eventstream_write(Z_EVT_ZONE_READIED);
1958 1971  
1959 1972                          /*
1960 1973                           * Get a handle to the default brand info.
1961 1974                           * We must always use the default brand file system
1962 1975                           * list when mounting the zone.
1963 1976                           */
1964 1977                          if ((bh = brand_open(default_brand)) == NULL) {
1965 1978                                  rval = -1;
1966 1979                                  break;
1967 1980                          }
1968 1981  
1969 1982                          /*
1970 1983                           * Get the list of filesystems to mount from
1971 1984                           * the brand configuration.  These mounts are done
1972 1985                           * via a thread that will enter the zone, so they
1973 1986                           * are done from within the context of the zone.
1974 1987                           */
1975 1988                          cb.zlogp = zlogp;
1976 1989                          cb.zoneid = zone_id;
1977 1990                          cb.mount_cmd = B_TRUE;
1978 1991                          rval = brand_platform_iter_mounts(bh,
1979 1992                              mount_early_fs, &cb);
1980 1993  
1981 1994                          brand_close(bh);
1982 1995  
1983 1996                          /*
1984 1997                           * Ordinarily, /dev/fd would be mounted inside the zone
1985 1998                           * by svc:/system/filesystem/usr:default, but since
1986 1999                           * we're not booting the zone, we need to do this
1987 2000                           * manually.
1988 2001                           */
1989 2002                          if (rval == 0)
1990 2003                                  rval = mount_early_fs(&cb,
1991 2004                                      "fd", "/dev/fd", "fd", NULL);
1992 2005                          break;
1993 2006                  case Z_UNMOUNT:
1994 2007                          if (kernelcall) /* Invalid; can't happen */
1995 2008                                  abort();
1996 2009                          zerror(zlogp, B_FALSE, "zone is already unmounted");
1997 2010                          rval = 0;
1998 2011                          break;
1999 2012                  }
2000 2013                  break;
2001 2014  
2002 2015          case ZONE_STATE_READY:
2003 2016                  switch (cmd) {
2004 2017                  case Z_READY:
2005 2018                          /*
2006 2019                           * We could have two clients racing to ready this
2007 2020                           * zone; the second client loses, but its request
2008 2021                           * doesn't fail, since the zone is now in the desired
2009 2022                           * state.
2010 2023                           */
2011 2024                          zerror(zlogp, B_FALSE, "zone is already ready");
2012 2025                          rval = 0;
2013 2026                          break;
2014 2027                  case Z_BOOT:
2015 2028                  case Z_FORCEBOOT:
2016 2029                          (void) strlcpy(boot_args, zargp->bootbuf,
2017 2030                              sizeof (boot_args));
2018 2031                          eventstream_write(Z_EVT_ZONE_BOOTING);
2019 2032                          rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
2020 2033                              debug);
2021 2034                          audit_put_record(zlogp, uc, rval, "boot");
2022 2035                          zcons_statechanged();
2023 2036                          if (rval != 0) {
2024 2037                                  bringup_failure_recovery = B_TRUE;
2025 2038                                  (void) zone_halt(zlogp, B_FALSE, B_TRUE,
2026 2039                                      zstate, debug);
2027 2040                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2028 2041                          }
2029 2042                          boot_args[0] = '\0';
2030 2043                          break;
2031 2044                  case Z_HALT:
2032 2045                          if (kernelcall) /* Invalid; can't happen */
2033 2046                                  abort();
2034 2047                          if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
2035 2048                              debug)) != 0)
2036 2049                                  break;
2037 2050                          zcons_statechanged();
2038 2051                          eventstream_write(Z_EVT_ZONE_HALTED);
2039 2052                          break;
2040 2053                  case Z_SHUTDOWN:
2041 2054                  case Z_REBOOT:
2042 2055                  case Z_NOTE_UNINSTALLING:
2043 2056                  case Z_MOUNT:
2044 2057                  case Z_FORCEMOUNT:
2045 2058                  case Z_UNMOUNT:
2046 2059                          if (kernelcall) /* Invalid; can't happen */
2047 2060                                  abort();
2048 2061                          zerror(zlogp, B_FALSE, "%s operation is invalid "
2049 2062                              "for zones in state '%s'", z_cmd_name(cmd),
2050 2063                              zone_state_str(zstate));
2051 2064                          rval = -1;
2052 2065                          break;
2053 2066                  }
2054 2067                  break;
2055 2068  
2056 2069          case ZONE_STATE_MOUNTED:
2057 2070                  switch (cmd) {
2058 2071                  case Z_UNMOUNT:
2059 2072                          if (kernelcall) /* Invalid; can't happen */
2060 2073                                  abort();
2061 2074                          rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
2062 2075                          if (rval == 0) {
2063 2076                                  eventstream_write(Z_EVT_ZONE_HALTED);
2064 2077                                  (void) sema_post(&scratch_sem);
2065 2078                          }
2066 2079                          break;
2067 2080                  default:
2068 2081                          if (kernelcall) /* Invalid; can't happen */
2069 2082                                  abort();
2070 2083                          zerror(zlogp, B_FALSE, "%s operation is invalid "
2071 2084                              "for zones in state '%s'", z_cmd_name(cmd),
2072 2085                              zone_state_str(zstate));
2073 2086                          rval = -1;
2074 2087                          break;
2075 2088                  }
2076 2089                  break;
2077 2090  
2078 2091          case ZONE_STATE_RUNNING:
2079 2092          case ZONE_STATE_SHUTTING_DOWN:
2080 2093          case ZONE_STATE_DOWN:
2081 2094                  switch (cmd) {
2082 2095                  case Z_READY:
2083 2096                          if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
2084 2097                              debug)) != 0)
2085 2098                                  break;
2086 2099                          zcons_statechanged();
2087 2100                          if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
2088 2101                              debug)) == 0)
2089 2102                                  eventstream_write(Z_EVT_ZONE_READIED);
2090 2103                          else
2091 2104                                  eventstream_write(Z_EVT_ZONE_HALTED);
2092 2105                          break;
2093 2106                  case Z_BOOT:
2094 2107                  case Z_FORCEBOOT:
2095 2108                          /*
2096 2109                           * We could have two clients racing to boot this
2097 2110                           * zone; the second client loses, but its request
2098 2111                           * doesn't fail, since the zone is now in the desired
2099 2112                           * state.
2100 2113                           */
2101 2114                          zerror(zlogp, B_FALSE, "zone is already booted");
2102 2115                          rval = 0;
2103 2116                          break;
2104 2117                  case Z_HALT:
2105 2118                          if (kernelcall) {
2106 2119                                  log_init_exit(init_status);
2107 2120                          } else {
2108 2121                                  log_init_exit(-1);
2109 2122                          }
2110 2123                          if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
2111 2124                              debug)) != 0)
2112 2125                                  break;
2113 2126                          eventstream_write(Z_EVT_ZONE_HALTED);
2114 2127                          zcons_statechanged();
2115 2128                          break;
2116 2129                  case Z_REBOOT:
2117 2130                          (void) strlcpy(boot_args, zargp->bootbuf,
2118 2131                              sizeof (boot_args));
2119 2132                          eventstream_write(Z_EVT_ZONE_REBOOTING);
2120 2133                          if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
2121 2134                              debug)) != 0) {
2122 2135                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2123 2136                                  boot_args[0] = '\0';
2124 2137                                  break;
2125 2138                          }
2126 2139                          zcons_statechanged();
2127 2140                          if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
2128 2141                              debug)) != 0) {
2129 2142                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2130 2143                                  boot_args[0] = '\0';
2131 2144                                  break;
2132 2145                          }
2133 2146                          rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
2134 2147                              debug);
2135 2148                          audit_put_record(zlogp, uc, rval, "reboot");
2136 2149                          if (rval != 0) {
2137 2150                                  (void) zone_halt(zlogp, B_FALSE, B_TRUE,
2138 2151                                      zstate, debug);
2139 2152                                  eventstream_write(Z_EVT_ZONE_BOOTFAILED);
2140 2153                          }
2141 2154                          boot_args[0] = '\0';
2142 2155                          break;
2143 2156                  case Z_SHUTDOWN:
2144 2157                          if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
2145 2158                                  wait_shut = B_TRUE;
2146 2159                          }
2147 2160                          break;
2148 2161                  case Z_NOTE_UNINSTALLING:
2149 2162                  case Z_MOUNT:
2150 2163                  case Z_FORCEMOUNT:
2151 2164                  case Z_UNMOUNT:
2152 2165                          zerror(zlogp, B_FALSE, "%s operation is invalid "
2153 2166                              "for zones in state '%s'", z_cmd_name(cmd),
2154 2167                              zone_state_str(zstate));
2155 2168                          rval = -1;
2156 2169                          break;
2157 2170                  }
2158 2171                  break;
2159 2172          default:
2160 2173                  abort();
2161 2174          }
2162 2175  
2163 2176          /*
2164 2177           * Because the state of the zone may have changed, we make sure
2165 2178           * to wake the console poller, which is in charge of initiating
2166 2179           * the shutdown procedure as necessary.
2167 2180           */
2168 2181          eventstream_write(Z_EVT_NULL);
2169 2182  
2170 2183  out:
2171 2184          (void) mutex_unlock(&lock);
2172 2185  
2173 2186          /* Wait for the Z_SHUTDOWN commands to complete */
2174 2187          if (wait_shut)
2175 2188                  rval = zone_wait_shutdown(zlogp);
2176 2189  
2177 2190          if (kernelcall) {
2178 2191                  rvalp = NULL;
2179 2192                  rlen = 0;
2180 2193          } else {
2181 2194                  rvalp->rval = rval;
2182 2195          }
2183 2196          if (uc != NULL)
2184 2197                  ucred_free(uc);
2185 2198          (void) door_return((char *)rvalp, rlen, NULL, 0);
2186 2199          thr_exit(NULL);
2187 2200  }
2188 2201  
2189 2202  static int
2190 2203  setup_door(zlog_t *zlogp)
2191 2204  {
2192 2205          if ((zone_door = door_create(server, NULL,
2193 2206              DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
2194 2207                  zerror(zlogp, B_TRUE, "%s failed", "door_create");
2195 2208                  return (-1);
2196 2209          }
2197 2210          (void) fdetach(zone_door_path);
2198 2211  
2199 2212          if (fattach(zone_door, zone_door_path) != 0) {
2200 2213                  zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
2201 2214                  (void) door_revoke(zone_door);
2202 2215                  (void) fdetach(zone_door_path);
2203 2216                  zone_door = -1;
2204 2217                  return (-1);
2205 2218          }
2206 2219          return (0);
2207 2220  }
2208 2221  
2209 2222  /*
2210 2223   * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
2211 2224   * is where zoneadmd itself will check to see that another instance of
2212 2225   * zoneadmd isn't already controlling this zone.
2213 2226   *
2214 2227   * The idea here is that we want to open the path to which we will
2215 2228   * attach our door, lock it, and then make sure that no-one has beat us
2216 2229   * to fattach(3c)ing onto it.
2217 2230   *
2218 2231   * fattach(3c) is really a mount, so there are actually two possible
2219 2232   * vnodes we could be dealing with.  Our strategy is as follows:
2220 2233   *
2221 2234   * - If the file we opened is a regular file (common case):
2222 2235   *      There is no fattach(3c)ed door, so we have a chance of becoming
2223 2236   *      the managing zoneadmd. We attempt to lock the file: if it is
2224 2237   *      already locked, that means someone else raced us here, so we
2225 2238   *      lose and give up.  zoneadm(1m) will try to contact the zoneadmd
2226 2239   *      that beat us to it.
2227 2240   *
2228 2241   * - If the file we opened is a namefs file:
2229 2242   *      This means there is already an established door fattach(3c)'ed
2230 2243   *      to the rendezvous path.  We've lost the race, so we give up.
2231 2244   *      Note that in this case we also try to grab the file lock, and
2232 2245   *      will succeed in acquiring it since the vnode locked by the
2233 2246   *      "winning" zoneadmd was a regular one, and the one we locked was
2234 2247   *      the fattach(3c)'ed door node.  At any rate, no harm is done, and
2235 2248   *      we just return to zoneadm(1m) which knows to retry.
2236 2249   */
2237 2250  static int
2238 2251  make_daemon_exclusive(zlog_t *zlogp)
2239 2252  {
2240 2253          int doorfd = -1;
2241 2254          int err, ret = -1;
2242 2255          struct stat st;
2243 2256          struct flock flock;
2244 2257          zone_state_t zstate;
2245 2258  
2246 2259  top:
2247 2260          if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2248 2261                  zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2249 2262                      zonecfg_strerror(err));
2250 2263                  goto out;
2251 2264          }
2252 2265          if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
2253 2266              S_IREAD|S_IWRITE)) < 0) {
2254 2267                  zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
2255 2268                  goto out;
2256 2269          }
2257 2270          if (fstat(doorfd, &st) < 0) {
2258 2271                  zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
2259 2272                  goto out;
2260 2273          }
2261 2274          /*
2262 2275           * Lock the file to synchronize with other zoneadmd
2263 2276           */
2264 2277          flock.l_type = F_WRLCK;
2265 2278          flock.l_whence = SEEK_SET;
2266 2279          flock.l_start = (off_t)0;
2267 2280          flock.l_len = (off_t)0;
2268 2281          if (fcntl(doorfd, F_SETLK, &flock) < 0) {
2269 2282                  /*
2270 2283                   * Someone else raced us here and grabbed the lock file
2271 2284                   * first.  A warning here is inappropriate since nothing
2272 2285                   * went wrong.
2273 2286                   */
2274 2287                  goto out;
2275 2288          }
2276 2289  
2277 2290          if (strcmp(st.st_fstype, "namefs") == 0) {
2278 2291                  struct door_info info;
2279 2292  
2280 2293                  /*
2281 2294                   * There is already something fattach()'ed to this file.
2282 2295                   * Lets see what the door is up to.
2283 2296                   */
2284 2297                  if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
2285 2298                          /*
2286 2299                           * Another zoneadmd process seems to be in
2287 2300                           * control of the situation and we don't need to
2288 2301                           * be here.  A warning here is inappropriate
2289 2302                           * since nothing went wrong.
2290 2303                           *
2291 2304                           * If the door has been revoked, the zoneadmd
2292 2305                           * process currently managing the zone is going
2293 2306                           * away.  We'll return control to zoneadm(1m)
2294 2307                           * which will try again (by which time zoneadmd
2295 2308                           * will hopefully have exited).
2296 2309                           */
2297 2310                          goto out;
2298 2311                  }
2299 2312  
2300 2313                  /*
2301 2314                   * If we got this far, there's a fattach(3c)'ed door
2302 2315                   * that belongs to a process that has exited, which can
2303 2316                   * happen if the previous zoneadmd died unexpectedly.
2304 2317                   *
2305 2318                   * Let user know that something is amiss, but that we can
2306 2319                   * recover; if the zone is in the installed state, then don't
2307 2320                   * message, since having a running zoneadmd isn't really
2308 2321                   * expected/needed.  We want to keep occurences of this message
2309 2322                   * limited to times when zoneadmd is picking back up from a
2310 2323                   * zoneadmd that died while the zone was in some non-trivial
2311 2324                   * state.
2312 2325                   */
2313 2326                  if (zstate > ZONE_STATE_INSTALLED) {
2314 2327                          zerror(zlogp, B_FALSE,
2315 2328                              "zone '%s': WARNING: zone is in state '%s', but "
2316 2329                              "zoneadmd does not appear to be available; "
2317 2330                              "restarted zoneadmd to recover.",
2318 2331                              zone_name, zone_state_str(zstate));
2319 2332  
2320 2333                          /*
2321 2334                           * Startup a thread to perform the zfd logging/tty svc
2322 2335                           * for the zone. zlogp won't be valid for much longer
2323 2336                           * so use logplat.
2324 2337                           */
2325 2338                          if (getzoneidbyname(zone_name) != -1) {
2326 2339                                  create_log_thread(&logplat);
2327 2340                          }
2328 2341  
2329 2342                          /* recover the global configuration snapshot */
2330 2343                          if (snap_hndl == NULL) {
2331 2344                                  if ((snap_hndl = zonecfg_init_handle())
2332 2345                                      == NULL ||
2333 2346                                      zonecfg_create_snapshot(zone_name)
2334 2347                                      != Z_OK ||
2335 2348                                      zonecfg_get_snapshot_handle(zone_name,
2336 2349                                      snap_hndl) != Z_OK) {
2337 2350                                          zerror(zlogp, B_FALSE, "recovering "
2338 2351                                              "zone configuration handle");
2339 2352                                          goto out;
2340 2353                                  }
2341 2354                          }
2342 2355                  }
2343 2356  
2344 2357                  (void) fdetach(zone_door_path);
2345 2358                  (void) close(doorfd);
2346 2359                  goto top;
2347 2360          }
2348 2361          ret = 0;
2349 2362  out:
2350 2363          (void) close(doorfd);
2351 2364          return (ret);
2352 2365  }
2353 2366  
2354 2367  /*
2355 2368   * Run the query hook with the 'env' parameter.  It should return a
2356 2369   * string of tab-delimited key-value pairs, each of which should be set
2357 2370   * in the environment.
2358 2371   *
2359 2372   * Because the env_vars string values become part of the environment, the
2360 2373   * string is static and we don't free it.
2361 2374   *
2362 2375   * This function is always called before zoneadmd forks and makes itself
2363 2376   * exclusive, so it is possible there could more than one instance of zoneadmd
2364 2377   * running in parallel at this point. Thus, we have no zonecfg snapshot and
2365 2378   * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
2366 2379   * need any zonecfg info to query for a brand-specific env value.
2367 2380   */
2368 2381  static int
2369 2382  set_brand_env(zlog_t *zlogp)
2370 2383  {
2371 2384          int ret = 0;
2372 2385          static char *env_vars = NULL;
2373 2386          char buf[2 * MAXPATHLEN];
2374 2387  
2375 2388          if (query_hook[0] == '\0' || env_vars != NULL)
2376 2389                  return (0);
2377 2390  
2378 2391          if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
2379 2392                  return (-1);
2380 2393  
2381 2394          if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
2382 2395                  return (-1);
2383 2396  
2384 2397          if (env_vars != NULL) {
2385 2398                  char *sp;
2386 2399  
2387 2400                  sp = strtok(env_vars, "\t");
2388 2401                  while (sp != NULL) {
2389 2402                          if (putenv(sp) != 0) {
2390 2403                                  ret = -1;
2391 2404                                  break;
2392 2405                          }
2393 2406                          sp = strtok(NULL, "\t");
2394 2407                  }
2395 2408          }
2396 2409  
2397 2410          return (ret);
2398 2411  }
2399 2412  
2400 2413  /*
2401 2414   * Setup the brand's pre and post state change callbacks, as well as the
2402 2415   * query callback, if any of these exist.
2403 2416   */
2404 2417  static int
2405 2418  brand_callback_init(brand_handle_t bh, char *zone_name)
2406 2419  {
2407 2420          (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2408 2421              sizeof (pre_statechg_hook));
2409 2422  
2410 2423          if (brand_get_prestatechange(bh, zone_name, zonepath,
2411 2424              pre_statechg_hook + EXEC_LEN,
2412 2425              sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2413 2426                  return (-1);
2414 2427  
2415 2428          if (strlen(pre_statechg_hook) <= EXEC_LEN)
2416 2429                  pre_statechg_hook[0] = '\0';
2417 2430  
2418 2431          (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2419 2432              sizeof (post_statechg_hook));
2420 2433  
2421 2434          if (brand_get_poststatechange(bh, zone_name, zonepath,
2422 2435              post_statechg_hook + EXEC_LEN,
2423 2436              sizeof (post_statechg_hook) - EXEC_LEN) != 0)
2424 2437                  return (-1);
2425 2438  
2426 2439          if (strlen(post_statechg_hook) <= EXEC_LEN)
2427 2440                  post_statechg_hook[0] = '\0';
2428 2441  
2429 2442          (void) strlcpy(query_hook, EXEC_PREFIX,
2430 2443              sizeof (query_hook));
2431 2444  
2432 2445          if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
2433 2446              sizeof (query_hook) - EXEC_LEN) != 0)
2434 2447                  return (-1);
2435 2448  
2436 2449          if (strlen(query_hook) <= EXEC_LEN)
2437 2450                  query_hook[0] = '\0';
2438 2451  
2439 2452          return (0);
2440 2453  }
2441 2454  
2442 2455  int
2443 2456  main(int argc, char *argv[])
2444 2457  {
2445 2458          int opt;
2446 2459          zoneid_t zid;
2447 2460          priv_set_t *privset;
2448 2461          zone_state_t zstate;
2449 2462          char parents_locale[MAXPATHLEN];
2450 2463          brand_handle_t bh;
2451 2464          int err;
2452 2465  
2453 2466          pid_t pid;
2454 2467          sigset_t blockset;
2455 2468          sigset_t block_cld;
2456 2469  
2457 2470          struct {
2458 2471                  sema_t sem;
2459 2472                  int status;
2460 2473                  zlog_t log;
2461 2474          } *shstate;
2462 2475          size_t shstatelen = getpagesize();
2463 2476  
2464 2477          zlog_t errlog;
2465 2478          zlog_t *zlogp;
2466 2479  
2467 2480          int ctfd;
2468 2481  
2469 2482          progname = get_execbasename(argv[0]);
2470 2483  
2471 2484          /*
2472 2485           * Make sure stderr is unbuffered
2473 2486           */
2474 2487          (void) setbuffer(stderr, NULL, 0);
2475 2488  
2476 2489          /*
2477 2490           * Get out of the way of mounted filesystems, since we will daemonize
2478 2491           * soon.
2479 2492           */
2480 2493          (void) chdir("/");
2481 2494  
2482 2495          /*
2483 2496           * Use the default system umask per PSARC 1998/110 rather than
2484 2497           * anything that may have been set by the caller.
2485 2498           */
2486 2499          (void) umask(CMASK);
2487 2500  
2488 2501          /*
2489 2502           * Initially we want to use our parent's locale.
2490 2503           */
2491 2504          (void) setlocale(LC_ALL, "");
2492 2505          (void) textdomain(TEXT_DOMAIN);
2493 2506          (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2494 2507              sizeof (parents_locale));
2495 2508  
2496 2509          /*
2497 2510           * This zlog_t is used for writing to stderr
2498 2511           */
2499 2512          errlog.logfile = stderr;
2500 2513          errlog.buflen = errlog.loglen = 0;
2501 2514          errlog.buf = errlog.log = NULL;
2502 2515          errlog.locale = parents_locale;
2503 2516  
2504 2517          /*
2505 2518           * We start off writing to stderr until we're ready to daemonize.
2506 2519           */
2507 2520          zlogp = &errlog;
2508 2521  
2509 2522          /*
2510 2523           * Process options.
2511 2524           */
2512 2525          while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2513 2526                  switch (opt) {
2514 2527                  case 'R':
2515 2528                          zonecfg_set_root(optarg);
2516 2529                          break;
2517 2530                  case 'z':
2518 2531                          zone_name = optarg;
2519 2532                          break;
2520 2533                  default:
2521 2534                          usage();
2522 2535                  }
2523 2536          }
2524 2537  
2525 2538          if (zone_name == NULL)
2526 2539                  usage();
2527 2540  
2528 2541          /*
2529 2542           * Because usage() prints directly to stderr, it has gettext()
2530 2543           * wrapping, which depends on the locale.  But since zerror() calls
2531 2544           * localize() which tweaks the locale, it is not safe to call zerror()
2532 2545           * until after the last call to usage().  Fortunately, the last call
2533 2546           * to usage() is just above and the first call to zerror() is just
2534 2547           * below.  Don't mess this up.
2535 2548           */
2536 2549          if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2537 2550                  zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2538 2551                      GLOBAL_ZONENAME);
2539 2552                  return (1);
2540 2553          }
2541 2554  
2542 2555          if (zone_get_id(zone_name, &zid) != 0) {
2543 2556                  zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2544 2557                      zonecfg_strerror(Z_NO_ZONE));
2545 2558                  return (1);
2546 2559          }
2547 2560  
2548 2561          if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2549 2562                  zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2550 2563                      zonecfg_strerror(err));
2551 2564                  return (1);
2552 2565          }
2553 2566          if (zstate < ZONE_STATE_INCOMPLETE) {
2554 2567                  zerror(zlogp, B_FALSE,
2555 2568                      "cannot manage a zone which is in state '%s'",
2556 2569                      zone_state_str(zstate));
2557 2570                  return (1);
2558 2571          }
2559 2572  
2560 2573          if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2561 2574                  zerror(zlogp, B_FALSE, "unable to determine zone path");
2562 2575                  return (-1);
2563 2576          }
2564 2577  
2565 2578          if (zonecfg_default_brand(default_brand,
2566 2579              sizeof (default_brand)) != Z_OK) {
2567 2580                  zerror(zlogp, B_FALSE, "unable to determine default brand");
2568 2581                  return (1);
2569 2582          }
2570 2583  
2571 2584          /* Get a handle to the brand info for this zone */
2572 2585          if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2573 2586              != Z_OK) {
2574 2587                  zerror(zlogp, B_FALSE, "unable to determine zone brand");
2575 2588                  return (1);
2576 2589          }
2577 2590          zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2578 2591          zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2579 2592  
2580 2593          /*
2581 2594           * In the alternate root environment, the only supported
2582 2595           * operations are mount and unmount.  In this case, just treat
2583 2596           * the zone as native if it is cluster.  Cluster zones can be
2584 2597           * native for the purpose of LU or upgrade, and the cluster
2585 2598           * brand may not exist in the miniroot (such as in net install
2586 2599           * upgrade).
2587 2600           */
2588 2601          if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2589 2602                  zone_iscluster = B_TRUE;
2590 2603                  if (zonecfg_in_alt_root()) {
2591 2604                          (void) strlcpy(brand_name, default_brand,
2592 2605                              sizeof (brand_name));
2593 2606                  }
2594 2607          } else {
2595 2608                  zone_iscluster = B_FALSE;
2596 2609          }
2597 2610  
2598 2611          if ((bh = brand_open(brand_name)) == NULL) {
2599 2612                  zerror(zlogp, B_FALSE, "unable to open zone brand");
2600 2613                  return (1);
2601 2614          }
2602 2615  
2603 2616          /* Get state change brand hooks. */
2604 2617          if (brand_callback_init(bh, zone_name) == -1) {
2605 2618                  zerror(zlogp, B_TRUE,
2606 2619                      "failed to initialize brand state change hooks");
2607 2620                  brand_close(bh);
2608 2621                  return (1);
2609 2622          }
2610 2623  
2611 2624          brand_close(bh);
2612 2625  
2613 2626          /*
2614 2627           * Check that we have all privileges.  It would be nice to pare
2615 2628           * this down, but this is at least a first cut.
2616 2629           */
2617 2630          if ((privset = priv_allocset()) == NULL) {
2618 2631                  zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2619 2632                  return (1);
2620 2633          }
2621 2634  
2622 2635          if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2623 2636                  zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2624 2637                  priv_freeset(privset);
2625 2638                  return (1);
2626 2639          }
2627 2640  
2628 2641          if (priv_isfullset(privset) == B_FALSE) {
2629 2642                  zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2630 2643                      "run this command (all privs required)");
2631 2644                  priv_freeset(privset);
2632 2645                  return (1);
2633 2646          }
2634 2647          priv_freeset(privset);
2635 2648  
2636 2649          if (set_brand_env(zlogp) != 0) {
2637 2650                  zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
2638 2651                  return (1);
2639 2652          }
2640 2653  
2641 2654          if (mkzonedir(zlogp) != 0)
2642 2655                  return (1);
2643 2656  
2644 2657          /*
2645 2658           * Pre-fork: setup shared state
2646 2659           */
2647 2660          if ((shstate = (void *)mmap(NULL, shstatelen,
2648 2661              PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2649 2662              MAP_FAILED) {
2650 2663                  zerror(zlogp, B_TRUE, "%s failed", "mmap");
2651 2664                  return (1);
2652 2665          }
2653 2666          if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2654 2667                  zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2655 2668                  (void) munmap((char *)shstate, shstatelen);
2656 2669                  return (1);
2657 2670          }
2658 2671          shstate->log.logfile = NULL;
2659 2672          shstate->log.buflen = shstatelen - sizeof (*shstate);
2660 2673          shstate->log.loglen = shstate->log.buflen;
2661 2674          shstate->log.buf = (char *)shstate + sizeof (*shstate);
2662 2675          shstate->log.log = shstate->log.buf;
2663 2676          shstate->log.locale = parents_locale;
2664 2677          shstate->status = -1;
2665 2678  
2666 2679          /*
2667 2680           * We need a SIGCHLD handler so the sema_wait() below will wake
2668 2681           * up if the child dies without doing a sema_post().
2669 2682           */
2670 2683          (void) sigset(SIGCHLD, sigchld);
2671 2684          /*
2672 2685           * We must mask SIGCHLD until after we've coped with the fork
2673 2686           * sufficiently to deal with it; otherwise we can race and
2674 2687           * receive the signal before pid has been initialized
2675 2688           * (yes, this really happens).
2676 2689           */
2677 2690          (void) sigemptyset(&block_cld);
2678 2691          (void) sigaddset(&block_cld, SIGCHLD);
2679 2692          (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2680 2693  
2681 2694          /*
2682 2695           * The parent only needs stderr after the fork, so close other fd's
2683 2696           * that we inherited from zoneadm so that the parent doesn't have those
2684 2697           * open while waiting. The child will close the rest after the fork.
2685 2698           */
2686 2699          closefrom(3);
2687 2700  
2688 2701          if ((ctfd = init_template()) == -1) {
2689 2702                  zerror(zlogp, B_TRUE, "failed to create contract");
2690 2703                  return (1);
2691 2704          }
2692 2705  
2693 2706          /*
2694 2707           * Do not let another thread localize a message while we are forking.
2695 2708           */
2696 2709          (void) mutex_lock(&msglock);
2697 2710          pid = fork();
2698 2711          (void) mutex_unlock(&msglock);
2699 2712  
2700 2713          /*
2701 2714           * In all cases (parent, child, and in the event of an error) we
2702 2715           * don't want to cause creation of contracts on subsequent fork()s.
2703 2716           */
2704 2717          (void) ct_tmpl_clear(ctfd);
2705 2718          (void) close(ctfd);
2706 2719  
2707 2720          if (pid == -1) {
2708 2721                  zerror(zlogp, B_TRUE, "could not fork");
2709 2722                  return (1);
2710 2723  
2711 2724          } else if (pid > 0) { /* parent */
2712 2725                  (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2713 2726                  /*
2714 2727                   * This marks a window of vulnerability in which we receive
2715 2728                   * the SIGCLD before falling into sema_wait (normally we would
2716 2729                   * get woken up from sema_wait with EINTR upon receipt of
2717 2730                   * SIGCLD).  So we may need to use some other scheme like
2718 2731                   * sema_posting in the sigcld handler.
2719 2732                   * blech
2720 2733                   */
2721 2734                  (void) sema_wait(&shstate->sem);
2722 2735                  (void) sema_destroy(&shstate->sem);
2723 2736                  if (shstate->status != 0)
2724 2737                          (void) waitpid(pid, NULL, WNOHANG);
2725 2738                  /*
2726 2739                   * It's ok if we die with SIGPIPE.  It's not like we could have
2727 2740                   * done anything about it.
2728 2741                   */
2729 2742                  (void) fprintf(stderr, "%s", shstate->log.buf);
2730 2743                  _exit(shstate->status == 0 ? 0 : 1);
2731 2744          }
2732 2745  
2733 2746          /*
2734 2747           * The child charges on.
2735 2748           */
2736 2749          (void) sigset(SIGCHLD, SIG_DFL);
2737 2750          (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2738 2751  
2739 2752          /*
2740 2753           * SIGPIPE can be delivered if we write to a socket for which the
2741 2754           * peer endpoint is gone.  That can lead to too-early termination
2742 2755           * of zoneadmd, and that's not good eats.
2743 2756           */
2744 2757          (void) sigset(SIGPIPE, SIG_IGN);
2745 2758          /*
2746 2759           * Stop using stderr
2747 2760           */
2748 2761          zlogp = &shstate->log;
2749 2762  
2750 2763          /*
2751 2764           * We don't need stdout/stderr from now on.
2752 2765           */
2753 2766          closefrom(0);
2754 2767  
2755 2768          /*
2756 2769           * Initialize the syslog zlog_t.  This needs to be done after
2757 2770           * the call to closefrom().
2758 2771           */
2759 2772          logsys.buf = logsys.log = NULL;
2760 2773          logsys.buflen = logsys.loglen = 0;
2761 2774          logsys.logfile = NULL;
2762 2775          logsys.locale = DEFAULT_LOCALE;
2763 2776  
2764 2777          openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2765 2778  
2766 2779          /*
2767 2780           * Allow logging to <zonepath>/logs/<file>.
2768 2781           */
2769 2782          logstream_init(zlogp);
2770 2783          platloghdl = logstream_open("platform.log", "zoneadmd", 0);
2771 2784  
2772 2785          /* logplat looks the same as logsys, but logs to platform.log */
2773 2786          logplat = logsys;
2774 2787  
2775 2788          /*
2776 2789           * The eventstream is used to publish state changes in the zone
2777 2790           * from the door threads to the console I/O poller.
2778 2791           */
2779 2792          if (eventstream_init() == -1) {
2780 2793                  zerror(zlogp, B_TRUE, "unable to create eventstream");
2781 2794                  goto child_out;
2782 2795          }
2783 2796  
2784 2797          (void) snprintf(zone_door_path, sizeof (zone_door_path),
2785 2798              "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2786 2799  
2787 2800          /*
2788 2801           * See if another zoneadmd is running for this zone.  If not, then we
2789 2802           * can now modify system state.
2790 2803           */
2791 2804          if (make_daemon_exclusive(zlogp) == -1)
2792 2805                  goto child_out;
2793 2806  
2794 2807          /*
2795 2808           * Create/join a new session; we need to be careful of what we do with
2796 2809           * the console from now on so we don't end up being the session leader
2797 2810           * for the terminal we're going to be handing out.
2798 2811           */
2799 2812          (void) setsid();
2800 2813  
2801 2814          /*
2802 2815           * This thread shouldn't be receiving any signals; in particular,
2803 2816           * SIGCHLD should be received by the thread doing the fork().  The
2804 2817           * exceptions are SIGHUP and SIGUSR1 for log rotation, set up by
2805 2818           * logstream_init().
2806 2819           */
2807 2820          (void) sigfillset(&blockset);
2808 2821          (void) sigdelset(&blockset, SIGHUP);
2809 2822          (void) sigdelset(&blockset, SIGUSR1);
2810 2823          (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2811 2824  
2812 2825          /*
2813 2826           * Setup the console device and get ready to serve the console;
2814 2827           * once this has completed, we're ready to let console clients
2815 2828           * make an attempt to connect (they will block until
2816 2829           * serve_console_sock() below gets called, and any pending
2817 2830           * connection is accept()ed).
2818 2831           */
2819 2832          if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2820 2833                  goto child_out;
2821 2834  
2822 2835          /*
2823 2836           * Take the lock now, so that when the door server gets going, we
2824 2837           * are guaranteed that it won't take a request until we are sure
2825 2838           * that everything is completely set up.  See the child_out: label
2826 2839           * below to see why this matters.
2827 2840           */
2828 2841          (void) mutex_lock(&lock);
2829 2842  
2830 2843          /* Init semaphore for scratch zones. */
2831 2844          if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2832 2845                  zerror(zlogp, B_TRUE,
2833 2846                      "failed to initialize semaphore for scratch zone");
2834 2847                  goto child_out;
2835 2848          }
2836 2849  
2837 2850          /* open the dladm handle */
2838 2851          if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2839 2852                  zerror(zlogp, B_FALSE, "failed to open dladm handle");
2840 2853                  goto child_out;
2841 2854          }
2842 2855  
2843 2856          /*
2844 2857           * Note: door setup must occur *after* the console is setup.
2845 2858           * This is so that as zlogin tests the door to see if zoneadmd
2846 2859           * is ready yet, we know that the console will get serviced
2847 2860           * once door_info() indicates that the door is "up".
2848 2861           */
2849 2862          if (setup_door(zlogp) == -1)
2850 2863                  goto child_out;
2851 2864  
2852 2865          /*
2853 2866           * Things seem OK so far; tell the parent process that we're done
2854 2867           * with setup tasks.  This will cause the parent to exit, signalling
2855 2868           * to zoneadm, zlogin, or whatever forked it that we are ready to
2856 2869           * service requests.
2857 2870           */
2858 2871          shstate->status = 0;
2859 2872          (void) sema_post(&shstate->sem);
2860 2873          (void) munmap((char *)shstate, shstatelen);
2861 2874          shstate = NULL;
2862 2875  
2863 2876          (void) mutex_unlock(&lock);
2864 2877  
2865 2878          /*
2866 2879           * zlogp is now invalid, so reset it to the syslog logger.
2867 2880           */
2868 2881          zlogp = &logsys;
2869 2882  
2870 2883          /*
2871 2884           * Now that we are free of any parents, switch to the default locale.
2872 2885           */
2873 2886          (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2874 2887  
2875 2888          /*
2876 2889           * At this point the setup portion of main() is basically done, so
2877 2890           * we reuse this thread to manage the zone console.  When
2878 2891           * serve_console() has returned, we are past the point of no return
2879 2892           * in the life of this zoneadmd.
2880 2893           */
2881 2894          if (zonecfg_in_alt_root()) {
2882 2895                  /*
2883 2896                   * This is just awful, but mounted scratch zones don't (and
2884 2897                   * can't) have consoles.  We just wait for unmount instead.
2885 2898                   */
2886 2899                  while (sema_wait(&scratch_sem) == EINTR)
2887 2900                          ;
2888 2901          } else {
2889 2902                  serve_console(zlogp);
2890 2903                  assert(in_death_throes);
2891 2904          }
2892 2905  
2893 2906          /*
2894 2907           * This is the next-to-last part of the exit interlock.  Upon calling
2895 2908           * fdetach(), the door will go unreferenced; once any
2896 2909           * outstanding requests (like the door thread doing Z_HALT) are
2897 2910           * done, the door will get an UNREF notification; when it handles
2898 2911           * the UNREF, the door server will cause the exit.  It's possible
2899 2912           * that fdetach() can fail because the file is in use, in which
2900 2913           * case we'll retry the operation.
2901 2914           */
2902 2915          assert(!MUTEX_HELD(&lock));
2903 2916          for (;;) {
2904 2917                  if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2905 2918                          break;
2906 2919                  yield();
2907 2920          }
2908 2921  
2909 2922          for (;;)
2910 2923                  (void) pause();
2911 2924  
2912 2925  child_out:
2913 2926          assert(pid == 0);
2914 2927  
2915 2928          shstate->status = -1;
2916 2929          (void) sema_post(&shstate->sem);
2917 2930          (void) munmap((char *)shstate, shstatelen);
2918 2931  
2919 2932          /*
2920 2933           * This might trigger an unref notification, but if so,
2921 2934           * we are still holding the lock, so our call to exit will
2922 2935           * ultimately win the race and will publish the right exit
2923 2936           * code.
2924 2937           */
2925 2938          if (zone_door != -1) {
2926 2939                  assert(MUTEX_HELD(&lock));
2927 2940                  (void) door_revoke(zone_door);
2928 2941                  (void) fdetach(zone_door_path);
2929 2942          }
2930 2943  
2931 2944          if (dld_handle != NULL)
2932 2945                  dladm_close(dld_handle);
2933 2946  
2934 2947          return (1); /* return from main() forcibly exits an MT process */
2935 2948  }
  
    | 
      ↓ open down ↓ | 
    1496 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX