Print this page
    
Merge cleanup from previous six commits
OS-2564 zone boot failed: could not start zoneadmd
OS-4166 zlogin to zfd needs TIOCSWINSZ support
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-3075 zone long boot args aren't passed through
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/zoneadmd/zcons.c
          +++ new/usr/src/cmd/zoneadmd/zcons.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  
    | 
      ↓ open down ↓ | 
    14 lines elided | 
    
      ↑ open up ↑ | 
  
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25      - * Copyright 2012 Joyent, Inc.  All rights reserved.
       25 + * Copyright 2015 Joyent, Inc.
  26   26   * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  27   27   */
  28   28  
  29   29  /*
  30   30   * Console support for zones requires a significant infrastructure.  The
  31   31   * core pieces are contained in this file, but other portions of note
  32   32   * are in the zlogin(1M) command, the zcons(7D) driver, and in the
  33   33   * devfsadm(1M) misc_link generator.
  34   34   *
  35   35   * Care is taken to make the console behave in an "intuitive" fashion for
  36   36   * administrators.  Essentially, we try as much as possible to mimic the
  37   37   * experience of using a system via a tip line and system controller.
  38   38   *
  39   39   * The zone console architecture looks like this:
  40   40   *
  41   41   *                                      Global Zone | Non-Global Zone
  42   42   *                        .--------------.          |
  43   43   *        .-----------.   | zoneadmd -z  |          | .--------. .---------.
  44   44   *        | zlogin -C |   |     myzone   |          | | ttymon | | syslogd |
  45   45   *        `-----------'   `--------------'          | `--------' `---------'
  46   46   *                  |       |       | |             |      |       |
  47   47   *  User            |       |       | |             |      V       V
  48   48   * - - - - - - - - -|- - - -|- - - -|-|- - - - - - -|- - /dev/zconsole - - -
  49   49   *  Kernel          V       V       | |                        |
  50   50   *               [AF_UNIX Socket]   | `--------. .-------------'
  51   51   *                                  |          | |
  52   52   *                                  |          V V
  53   53   *                                  |     +-----------+
  54   54   *                                  |     |  ldterm,  |
  55   55   *                                  |     |   etc.    |
  56   56   *                                  |     +-----------+
  57   57   *                                  |     +-[Anchor]--+
  58   58   *                                  |     |   ptem    |
  59   59   *                                  V     +-----------+
  60   60   *                           +---master---+---slave---+
  61   61   *                           |                        |
  62   62   *                           |      zcons driver      |
  63   63   *                           |    zonename="myzone"   |
  64   64   *                           +------------------------+
  65   65   *
  66   66   * There are basically two major tasks which the console subsystem in
  67   67   * zoneadmd accomplishes:
  68   68   *
  69   69   * - Setup and teardown of zcons driver instances.  One zcons instance
  70   70   *   is maintained per zone; we take advantage of the libdevice APIs
  71   71   *   to online new instances of zcons as needed.  Care is taken to
  72   72   *   prune and manage these appropriately; see init_console_dev() and
  73   73   *   destroy_console_dev().  The end result is the creation of the
  74   74   *   zcons(7D) instance and an open file descriptor to the master side.
  75   75   *   zcons instances are associated with zones via their zonename device
  76   76   *   property.  This the console instance to persist across reboots,
  77   77   *   and while the zone is halted.
  78   78   *
  79   79   * - Acting as a server for 'zlogin -C' instances.  When zlogin -C is
  80   80   *   run, zlogin connects to zoneadmd via unix domain socket.  zoneadmd
  81   81   *   functions as a two-way proxy for console I/O, relaying user input
  82   82   *   to the master side of the console, and relaying output from the
  83   83   *   zone to the user.
  84   84   */
  85   85  
  86   86  #include <sys/types.h>
  87   87  #include <sys/socket.h>
  88   88  #include <sys/stat.h>
  89   89  #include <sys/termios.h>
  90   90  #include <sys/zcons.h>
  91   91  #include <sys/mkdev.h>
  92   92  
  93   93  #include <assert.h>
  94   94  #include <ctype.h>
  95   95  #include <errno.h>
  96   96  #include <fcntl.h>
  97   97  #include <stdarg.h>
  98   98  #include <stdio.h>
  99   99  #include <stdlib.h>
 100  100  #include <strings.h>
 101  101  #include <stropts.h>
 102  102  #include <thread.h>
 103  103  #include <ucred.h>
 104  104  #include <unistd.h>
 105  105  #include <zone.h>
 106  106  
 107  107  #include <libdevinfo.h>
 108  108  #include <libdevice.h>
 109  109  #include <libzonecfg.h>
 110  110  
  
    | 
      ↓ open down ↓ | 
    75 lines elided | 
    
      ↑ open up ↑ | 
  
 111  111  #include <syslog.h>
 112  112  #include <sys/modctl.h>
 113  113  
 114  114  #include "zoneadmd.h"
 115  115  
 116  116  #define ZCONSNEX_DEVTREEPATH    "/pseudo/zconsnex@1"
 117  117  #define ZCONSNEX_FILEPATH       "/devices/pseudo/zconsnex@1"
 118  118  
 119  119  #define CONSOLE_SOCKPATH        ZONES_TMPDIR "/%s.console_sock"
 120  120  
      121 +#define ZCONS_RETRY             10
      122 +
 121  123  static int      serverfd = -1;  /* console server unix domain socket fd */
 122  124  char boot_args[BOOTARGS_MAX];
 123      -char bad_boot_arg[BOOTARGS_MAX];
 124  125  
 125  126  /*
 126  127   * The eventstream is a simple one-directional flow of messages from the
 127  128   * door server to the console subsystem, implemented with a pipe.
 128  129   * It is used to wake up the console poller when it needs to take action,
 129  130   * message the user, die off, etc.
 130  131   */
 131  132  static int eventstream[2];
 132  133  
      134 +/* flag used to cope with race creating master zcons devlink */
      135 +static boolean_t master_zcons_failed = B_FALSE;
      136 +/* flag to track if we've seen a state change when there is no master zcons */
      137 +static boolean_t state_changed = B_FALSE;
 133  138  
 134      -
 135  139  int
 136  140  eventstream_init()
 137  141  {
 138  142          if (pipe(eventstream) == -1)
 139  143                  return (-1);
 140  144          return (0);
 141  145  }
 142  146  
 143  147  void
 144  148  eventstream_write(zone_evt_t evt)
 145  149  {
 146  150          (void) write(eventstream[0], &evt, sizeof (evt));
 147  151  }
 148  152  
 149  153  static zone_evt_t
 150  154  eventstream_read(void)
 151  155  {
 152  156          zone_evt_t evt = Z_EVT_NULL;
 153  157  
 154  158          (void) read(eventstream[1], &evt, sizeof (evt));
 155  159          return (evt);
 156  160  }
 157  161  
 158  162  /*
 159  163   * count_console_devs() and its helper count_cb() do a walk of the
 160  164   * subtree of the device tree where zone console nodes are represented.
 161  165   * The goal is to count zone console instances already setup for a zone
 162  166   * with the given name.  More than 1 is anomolous, and our caller will
 163  167   * have to deal with that if we find that's the case.
 164  168   *
 165  169   * Note: this algorithm is a linear search of nodes in the zconsnex subtree
 166  170   * of the device tree, and could be a scalability problem, but I don't see
 167  171   * how to avoid it.
 168  172   */
 169  173  
 170  174  /*
 171  175   * cb_data is shared by count_cb and destroy_cb for simplicity.
 172  176   */
 173  177  struct cb_data {
 174  178          zlog_t *zlogp;
 175  179          int found;
 176  180          int killed;
 177  181  };
 178  182  
 179  183  static int
 180  184  count_cb(di_node_t node, void *arg)
 181  185  {
 182  186          struct cb_data *cb = (struct cb_data *)arg;
 183  187          char *prop_data;
 184  188  
 185  189          if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zonename",
 186  190              &prop_data) != -1) {
 187  191                  assert(prop_data != NULL);
 188  192                  if (strcmp(prop_data, zone_name) == 0) {
 189  193                          cb->found++;
 190  194                          return (DI_WALK_CONTINUE);
 191  195                  }
 192  196          }
 193  197          return (DI_WALK_CONTINUE);
 194  198  }
 195  199  
 196  200  static int
 197  201  count_console_devs(zlog_t *zlogp)
 198  202  {
 199  203          di_node_t root;
 200  204          struct cb_data cb;
 201  205  
 202  206          bzero(&cb, sizeof (cb));
 203  207          cb.zlogp = zlogp;
 204  208  
 205  209          if ((root = di_init(ZCONSNEX_DEVTREEPATH, DINFOCPYALL)) ==
 206  210              DI_NODE_NIL) {
 207  211                  zerror(zlogp, B_TRUE, "%s failed", "di_init");
 208  212                  return (-1);
 209  213          }
 210  214  
 211  215          (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, count_cb);
 212  216          di_fini(root);
 213  217          return (cb.found);
 214  218  }
 215  219  
 216  220  /*
 217  221   * destroy_console_devs() and its helper destroy_cb() tears down any console
 218  222   * instances associated with this zone.  If things went very wrong, we
 219  223   * might have more than one console instance hanging around.  This routine
 220  224   * hunts down and tries to remove all of them.  Of course, if the console
 221  225   * is open, the instance will not detach, which is a potential issue.
 222  226   */
 223  227  static int
 224  228  destroy_cb(di_node_t node, void *arg)
 225  229  {
 226  230          struct cb_data *cb = (struct cb_data *)arg;
 227  231          char *prop_data;
 228  232          char *tmp;
 229  233          char devpath[MAXPATHLEN];
 230  234          devctl_hdl_t hdl;
 231  235  
 232  236          if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zonename",
 233  237              &prop_data) == -1)
 234  238                  return (DI_WALK_CONTINUE);
 235  239  
 236  240          assert(prop_data != NULL);
 237  241          if (strcmp(prop_data, zone_name) != 0) {
 238  242                  /* this is the console for a different zone */
 239  243                  return (DI_WALK_CONTINUE);
 240  244          }
 241  245  
 242  246          cb->found++;
 243  247          tmp = di_devfs_path(node);
 244  248          (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp);
 245  249          di_devfs_path_free(tmp);
 246  250  
 247  251          if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) {
 248  252                  zerror(cb->zlogp, B_TRUE, "WARNING: console %s found, "
 249  253                      "but it could not be controlled.", devpath);
 250  254                  return (DI_WALK_CONTINUE);
 251  255          }
 252  256          if (devctl_device_remove(hdl) == 0) {
 253  257                  cb->killed++;
 254  258          } else {
 255  259                  zerror(cb->zlogp, B_TRUE, "WARNING: console %s found, "
 256  260                      "but it could not be removed.", devpath);
 257  261          }
 258  262          devctl_release(hdl);
 259  263          return (DI_WALK_CONTINUE);
 260  264  }
 261  265  
 262  266  static int
 263  267  destroy_console_devs(zlog_t *zlogp)
 264  268  {
 265  269          char conspath[MAXPATHLEN];
 266  270          di_node_t root;
 267  271          struct cb_data cb;
 268  272          int masterfd;
 269  273          int slavefd;
 270  274  
 271  275          /*
 272  276           * Signal the master side to release its handle on the slave side by
 273  277           * issuing a ZC_RELEASESLAVE ioctl.
 274  278           */
 275  279          (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
 276  280              zone_name, ZCONS_MASTER_NAME);
 277  281          if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) != -1) {
 278  282                  (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
 279  283                      zone_name, ZCONS_SLAVE_NAME);
 280  284                  if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) != -1) {
 281  285                          if (ioctl(masterfd, ZC_RELEASESLAVE,
 282  286                              (caddr_t)(intptr_t)slavefd) != 0)
 283  287                                  zerror(zlogp, B_TRUE, "WARNING: error while "
 284  288                                      "releasing slave handle of zone console for"
 285  289                                      " %s", zone_name);
 286  290                          (void) close(slavefd);
 287  291                  } else {
 288  292                          zerror(zlogp, B_TRUE, "WARNING: could not open slave "
 289  293                              "side of zone console for %s to release slave "
 290  294                              "handle", zone_name);
 291  295                  }
 292  296                  (void) close(masterfd);
 293  297          } else {
 294  298                  zerror(zlogp, B_TRUE, "WARNING: could not open master side of "
 295  299                      "zone console for %s to release slave handle", zone_name);
 296  300          }
 297  301  
 298  302          bzero(&cb, sizeof (cb));
 299  303          cb.zlogp = zlogp;
 300  304  
 301  305          if ((root = di_init(ZCONSNEX_DEVTREEPATH, DINFOCPYALL)) ==
 302  306              DI_NODE_NIL) {
 303  307                  zerror(zlogp, B_TRUE, "%s failed", "di_init");
 304  308                  return (-1);
 305  309          }
 306  310  
 307  311          (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb);
 308  312          if (cb.found > 1) {
 309  313                  zerror(zlogp, B_FALSE, "WARNING: multiple zone console "
 310  314                      "instances detected for zone '%s'; %d of %d "
 311  315                      "successfully removed.",
 312  316                      zone_name, cb.killed, cb.found);
 313  317          }
 314  318  
 315  319          di_fini(root);
 316  320          return (0);
 317  321  }
 318  322  
 319  323  /*
 320  324   * init_console_dev() drives the device-tree configuration of the zone
 321  325   * console device.  The general strategy is to use the libdevice (devctl)
 322  326   * interfaces to instantiate a new zone console node.  We do a lot of
 323  327   * sanity checking, and are careful to reuse a console if one exists.
 324  328   *
 325  329   * Once the device is in the device tree, we kick devfsadm via di_init_devs()
 326  330   * to ensure that the appropriate symlinks (to the master and slave console
 327  331   * devices) are placed in /dev in the global zone.
 328  332   */
 329  333  static int
 330  334  init_console_dev(zlog_t *zlogp)
 331  335  {
 332  336          char conspath[MAXPATHLEN];
 333  337          devctl_hdl_t bus_hdl = NULL;
 334  338          devctl_hdl_t dev_hdl = NULL;
 335  339          devctl_ddef_t ddef_hdl = NULL;
 336  340          di_devlink_handle_t dl = NULL;
 337  341          int rv = -1;
 338  342          int ndevs;
 339  343          int masterfd;
 340  344          int slavefd;
 341  345          int i;
 342  346  
 343  347          /*
 344  348           * Don't re-setup console if it is working and ready already; just
 345  349           * skip ahead to making devlinks, which we do for sanity's sake.
 346  350           */
 347  351          ndevs = count_console_devs(zlogp);
 348  352          if (ndevs == 1) {
 349  353                  goto devlinks;
 350  354          } else if (ndevs > 1 || ndevs == -1) {
 351  355                  /*
 352  356                   * For now, this seems like a reasonable but harsh punishment.
 353  357                   * If needed, we could try to get clever and delete all but
 354  358                   * the console which is pointed at by the current symlink.
 355  359                   */
 356  360                  if (destroy_console_devs(zlogp) == -1) {
 357  361                          goto error;
 358  362                  }
 359  363          }
 360  364  
 361  365          /*
 362  366           * Time to make the consoles!
 363  367           */
 364  368          if ((bus_hdl = devctl_bus_acquire(ZCONSNEX_FILEPATH, 0)) == NULL) {
 365  369                  zerror(zlogp, B_TRUE, "%s failed", "devctl_bus_acquire");
 366  370                  goto error;
 367  371          }
 368  372          if ((ddef_hdl = devctl_ddef_alloc("zcons", 0)) == NULL) {
 369  373                  zerror(zlogp, B_TRUE, "failed to allocate ddef handle");
 370  374                  goto error;
 371  375          }
 372  376          /*
 373  377           * Set three properties on this node; the first is the name of the
 374  378           * zone; the second is a flag which lets pseudo know that it is
 375  379           * OK to automatically allocate an instance # for this device;
 376  380           * the third tells the device framework not to auto-detach this
 377  381           * node-- we need the node to still be there when we ask devfsadmd
 378  382           * to make links, and when we need to open it.
 379  383           */
 380  384          if (devctl_ddef_string(ddef_hdl, "zonename", zone_name) == -1) {
 381  385                  zerror(zlogp, B_TRUE, "failed to create zonename property");
 382  386                  goto error;
 383  387          }
 384  388          if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) {
 385  389                  zerror(zlogp, B_TRUE, "failed to create auto-assign-instance "
 386  390                      "property");
 387  391                  goto error;
 388  392          }
 389  393          if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) {
 390  394                  zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach "
 391  395                      "property");
 392  396                  goto error;
 393  397          }
 394  398          if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) {
 395  399                  zerror(zlogp, B_TRUE, "failed to create console node");
 396  400                  goto error;
 397  401          }
 398  402  
 399  403  devlinks:
 400  404          if ((dl = di_devlink_init("zcons", DI_MAKE_LINK)) != NULL) {
  
    | 
      ↓ open down ↓ | 
    256 lines elided | 
    
      ↑ open up ↑ | 
  
 401  405                  (void) di_devlink_fini(&dl);
 402  406          } else {
 403  407                  zerror(zlogp, B_TRUE, "failed to create devlinks");
 404  408                  goto error;
 405  409          }
 406  410  
 407  411          /*
 408  412           * Open the master side of the console and issue the ZC_HOLDSLAVE ioctl,
 409  413           * which will cause the master to retain a reference to the slave.
 410  414           * This prevents ttymon from blowing through the slave's STREAMS anchor.
      415 +         *
      416 +         * In very rare cases the open returns ENOENT if devfs doesn't have
      417 +         * everything setup yet due to heavy zone startup load. Wait for
      418 +         * 1 sec. and retry a few times. Even if we can't setup the zone's
      419 +         * console, we still go ahead and boot the zone.
 411  420           */
 412  421          (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
 413  422              zone_name, ZCONS_MASTER_NAME);
 414  423          if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
 415  424                  zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
 416  425                      "zone console for %s to acquire slave handle", zone_name);
 417      -                goto error;
      426 +                master_zcons_failed = B_TRUE;
 418  427          }
 419  428          (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
 420  429              zone_name, ZCONS_SLAVE_NAME);
 421      -        if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
      430 +        for (i = 0; i < ZCONS_RETRY; i++) {
      431 +                slavefd = open(conspath, O_RDWR | O_NOCTTY);
      432 +                if (slavefd >= 0 || errno != ENOENT)
      433 +                        break;
      434 +                (void) sleep(1);
      435 +        }
      436 +        if (slavefd == -1)
 422  437                  zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
 423  438                      " console for %s to acquire slave handle", zone_name);
 424      -                (void) close(masterfd);
 425      -                goto error;
 426      -        }
      439 +
 427  440          /*
 428  441           * This ioctl can occasionally return ENXIO if devfs doesn't have
 429  442           * everything plumbed up yet due to heavy zone startup load. Wait for
 430  443           * 1 sec. and retry a few times before we fail to boot the zone.
 431  444           */
 432      -        for (i = 0; i < 5; i++) {
 433      -                if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd)
 434      -                    == 0) {
 435      -                        rv = 0;
 436      -                        break;
 437      -                } else if (errno != ENXIO) {
 438      -                        break;
      445 +        if (masterfd != -1 && slavefd != -1) {
      446 +                for (i = 0; i < ZCONS_RETRY; i++) {
      447 +                        if (ioctl(masterfd, ZC_HOLDSLAVE,
      448 +                            (caddr_t)(intptr_t)slavefd) == 0) {
      449 +                                rv = 0;
      450 +                                break;
      451 +                        } else if (errno != ENXIO) {
      452 +                                break;
      453 +                        }
      454 +                        (void) sleep(1);
 439  455                  }
 440      -                (void) sleep(1);
      456 +                if (rv != 0)
      457 +                        zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
      458 +                            "slave handle of zone console for %s", zone_name);
 441  459          }
 442      -        if (rv != 0)
 443      -                zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave "
 444      -                    "handle of zone console for %s", zone_name);
 445  460  
 446      -        (void) close(slavefd);
 447      -        (void) close(masterfd);
      461 +        if (slavefd != -1)
      462 +                (void) close(slavefd);
      463 +        if (masterfd != -1)
      464 +                (void) close(masterfd);
 448  465  
 449  466  error:
 450  467          if (ddef_hdl)
 451  468                  devctl_ddef_free(ddef_hdl);
 452  469          if (bus_hdl)
 453  470                  devctl_release(bus_hdl);
 454  471          if (dev_hdl)
 455  472                  devctl_release(dev_hdl);
 456  473          return (rv);
 457  474  }
 458  475  
 459  476  static int
 460  477  init_console_sock(zlog_t *zlogp)
 461  478  {
 462  479          int servfd;
 463  480          struct sockaddr_un servaddr;
 464  481  
 465  482          bzero(&servaddr, sizeof (servaddr));
 466  483          servaddr.sun_family = AF_UNIX;
 467  484          (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 468  485              CONSOLE_SOCKPATH, zone_name);
 469  486  
 470  487          if ((servfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 471  488                  zerror(zlogp, B_TRUE, "console setup: could not create socket");
 472  489                  return (-1);
 473  490          }
 474  491          (void) unlink(servaddr.sun_path);
 475  492  
 476  493          if (bind(servfd, (struct sockaddr *)&servaddr,
 477  494              sizeof (servaddr)) == -1) {
 478  495                  zerror(zlogp, B_TRUE,
 479  496                      "console setup: could not bind to socket");
 480  497                  goto out;
 481  498          }
 482  499  
 483  500          if (listen(servfd, 4) == -1) {
 484  501                  zerror(zlogp, B_TRUE,
 485  502                      "console setup: could not listen on socket");
 486  503                  goto out;
 487  504          }
 488  505          return (servfd);
 489  506  
 490  507  out:
 491  508          (void) unlink(servaddr.sun_path);
 492  509          (void) close(servfd);
 493  510          return (-1);
 494  511  }
 495  512  
 496  513  static void
 497  514  destroy_console_sock(int servfd)
 498  515  {
 499  516          char path[MAXPATHLEN];
 500  517  
 501  518          (void) snprintf(path, sizeof (path), CONSOLE_SOCKPATH, zone_name);
 502  519          (void) unlink(path);
 503  520          (void) shutdown(servfd, SHUT_RDWR);
 504  521          (void) close(servfd);
 505  522  }
 506  523  
 507  524  /*
 508  525   * Read the "ident" string from the client's descriptor; this routine also
 509  526   * tolerates being called with pid=NULL, for times when you want to "eat"
  
    | 
      ↓ open down ↓ | 
    52 lines elided | 
    
      ↑ open up ↑ | 
  
 510  527   * the ident string from a client without saving it.
 511  528   */
 512  529  static int
 513  530  get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
 514  531      int *disconnect)
 515  532  {
 516  533          char buf[BUFSIZ], *bufp;
 517  534          size_t buflen = sizeof (buf);
 518  535          char c = '\0';
 519  536          int i = 0, r;
      537 +        ucred_t *cred = NULL;
 520  538  
 521  539          /* "eat up the ident string" case, for simplicity */
 522  540          if (pid == NULL) {
 523  541                  assert(locale == NULL && locale_len == 0);
 524  542                  while (read(clifd, &c, 1) == 1) {
 525  543                          if (c == '\n')
 526  544                                  return (0);
 527  545                  }
 528  546          }
 529  547  
 530  548          bzero(buf, sizeof (buf));
 531  549          while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) {
 532  550                  buflen--;
 533  551                  if (c == '\n')
 534  552                          break;
 535  553  
 536  554                  buf[i] = c;
 537  555                  i++;
 538  556          }
 539  557          if (r == -1)
 540  558                  return (-1);
 541  559  
 542  560          /*
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
 543  561           * We've filled the buffer, but still haven't seen \n.  Keep eating
 544  562           * until we find it; we don't expect this to happen, but this is
 545  563           * defensive.
 546  564           */
 547  565          if (c != '\n') {
 548  566                  while ((r = read(clifd, &c, sizeof (c))) > 0)
 549  567                          if (c == '\n')
 550  568                                  break;
 551  569          }
 552  570  
      571 +        if (getpeerucred(clifd, &cred) == 0) {
      572 +                *pid = ucred_getpid((const ucred_t *)cred);
      573 +                ucred_free(cred);
      574 +        } else {
      575 +                return (-1);
      576 +        }
      577 +
 553  578          /*
 554  579           * Parse buffer for message of the form:
 555      -         * IDENT <pid> <locale> <disconnect flag>
      580 +         * IDENT <locale> <disconnect flag>
 556  581           */
 557  582          bufp = buf;
 558  583          if (strncmp(bufp, "IDENT ", 6) != 0)
 559  584                  return (-1);
 560  585          bufp += 6;
 561  586          errno = 0;
 562      -        *pid = strtoll(bufp, &bufp, 10);
 563      -        if (errno != 0)
 564      -                return (-1);
 565  587  
 566  588          while (*bufp != '\0' && isspace(*bufp))
 567  589                  bufp++;
 568  590          buflen = strlen(bufp) - 1;
 569  591          *disconnect = atoi(&bufp[buflen]);
 570  592          bufp[buflen - 1] = '\0';
 571  593          (void) strlcpy(locale, bufp, locale_len);
 572  594  
 573  595          return (0);
 574  596  }
 575  597  
 576  598  static int
 577  599  accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len,
 578  600      int *disconnect)
 579  601  {
 580  602          int connfd;
 581  603          struct sockaddr_un cliaddr;
 582  604          socklen_t clilen;
 583  605  
 584  606          clilen = sizeof (cliaddr);
 585  607          connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
 586  608          if (connfd == -1)
 587  609                  return (-1);
 588  610          if (get_client_ident(connfd, pid, locale, locale_len,
 589  611              disconnect) == -1) {
 590  612                  (void) shutdown(connfd, SHUT_RDWR);
 591  613                  (void) close(connfd);
 592  614                  return (-1);
 593  615          }
 594  616          (void) write(connfd, "OK\n", 3);
 595  617          return (connfd);
 596  618  }
 597  619  
 598  620  static void
 599  621  reject_client(int servfd, pid_t clientpid)
 600  622  {
 601  623          int connfd;
 602  624          struct sockaddr_un cliaddr;
 603  625          socklen_t clilen;
 604  626          char nak[MAXPATHLEN];
 605  627  
 606  628          clilen = sizeof (cliaddr);
 607  629          connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
 608  630  
 609  631          /*
 610  632           * After hear its ident string, tell client to get lost.
 611  633           */
 612  634          if (get_client_ident(connfd, NULL, NULL, 0, NULL) == 0) {
 613  635                  (void) snprintf(nak, sizeof (nak), "%lu\n",
 614  636                      clientpid);
 615  637                  (void) write(connfd, nak, strlen(nak));
 616  638          }
 617  639          (void) shutdown(connfd, SHUT_RDWR);
 618  640          (void) close(connfd);
 619  641  }
 620  642  
 621  643  static void
 622  644  event_message(int clifd, char *clilocale, zone_evt_t evt, int dflag)
 623  645  {
 624  646          char *str, *lstr = NULL;
 625  647          char lmsg[BUFSIZ];
 626  648          char outbuf[BUFSIZ];
 627  649  
 628  650          if (clifd == -1)
 629  651                  return;
 630  652  
 631  653          switch (evt) {
 632  654          case Z_EVT_ZONE_BOOTING:
 633  655                  if (*boot_args == '\0') {
 634  656                          str = "NOTICE: Zone booting up";
 635  657                          break;
 636  658                  }
 637  659                  /*LINTED*/
 638  660                  (void) snprintf(lmsg, sizeof (lmsg), localize_msg(clilocale,
 639  661                      "NOTICE: Zone booting up with arguments: %s"), boot_args);
 640  662                  lstr = lmsg;
 641  663                  break;
 642  664          case Z_EVT_ZONE_READIED:
 643  665                  str = "NOTICE: Zone readied";
 644  666                  break;
 645  667          case Z_EVT_ZONE_HALTED:
 646  668                  if (dflag)
 647  669                          str = "NOTICE: Zone halted.  Disconnecting...";
 648  670                  else
 649  671                          str = "NOTICE: Zone halted";
 650  672                  break;
 651  673          case Z_EVT_ZONE_REBOOTING:
 652  674                  if (*boot_args == '\0') {
 653  675                          str = "NOTICE: Zone rebooting";
 654  676                          break;
 655  677                  }
 656  678                  /*LINTED*/
 657  679                  (void) snprintf(lmsg, sizeof (lmsg), localize_msg(clilocale,
 658  680                      "NOTICE: Zone rebooting with arguments: %s"), boot_args);
 659  681                  lstr = lmsg;
  
    | 
      ↓ open down ↓ | 
    85 lines elided | 
    
      ↑ open up ↑ | 
  
 660  682                  break;
 661  683          case Z_EVT_ZONE_UNINSTALLING:
 662  684                  str = "NOTICE: Zone is being uninstalled.  Disconnecting...";
 663  685                  break;
 664  686          case Z_EVT_ZONE_BOOTFAILED:
 665  687                  if (dflag)
 666  688                          str = "NOTICE: Zone boot failed.  Disconnecting...";
 667  689                  else
 668  690                          str = "NOTICE: Zone boot failed";
 669  691                  break;
 670      -        case Z_EVT_ZONE_BADARGS:
 671      -                /*LINTED*/
 672      -                (void) snprintf(lmsg, sizeof (lmsg),
 673      -                    localize_msg(clilocale,
 674      -                    "WARNING: Ignoring invalid boot arguments: %s"),
 675      -                    bad_boot_arg);
 676      -                lstr = lmsg;
 677      -                break;
 678  692          default:
 679  693                  return;
 680  694          }
 681  695  
 682  696          if (lstr == NULL)
 683  697                  lstr = localize_msg(clilocale, str);
 684  698          (void) snprintf(outbuf, sizeof (outbuf), "\r\n[%s]\r\n", lstr);
 685  699          (void) write(clifd, outbuf, strlen(outbuf));
 686  700  }
 687  701  
 688  702  /*
 689  703   * Check to see if the client at the other end of the socket is still
 690  704   * alive; we know it is not if it throws EPIPE at us when we try to write
 691  705   * an otherwise harmless 0-length message to it.
 692  706   */
 693  707  static int
 694  708  test_client(int clifd)
 695  709  {
 696  710          if ((write(clifd, "", 0) == -1) && errno == EPIPE)
 697  711                  return (-1);
 698  712          return (0);
 699  713  }
 700  714  
 701  715  /*
 702  716   * This routine drives the console I/O loop.  It polls for input from the
 703  717   * master side of the console (output to the console), and from the client
 704  718   * (input from the console user).  Additionally, it polls on the server fd,
 705  719   * and disconnects any clients that might try to hook up with the zone while
 706  720   * the console is in use.
 707  721   *
 708  722   * When the client first calls us up, it is expected to send a line giving
 709  723   * its "identity"; this consists of the string 'IDENT <pid> <locale>'.
 710  724   * This is so that we can report that the console is busy along with
 711  725   * some diagnostics about who has it busy; the locale is used so that
 712  726   * asynchronous messages about zone state (like the NOTICE: zone halted
 713  727   * messages) can be output in the user's locale.
 714  728   */
 715  729  static void
 716  730  do_console_io(zlog_t *zlogp, int consfd, int servfd)
 717  731  {
 718  732          struct pollfd pollfds[4];
 719  733          char ibuf[BUFSIZ];
 720  734          int cc, ret;
 721  735          int clifd = -1;
 722  736          int pollerr = 0;
 723  737          char clilocale[MAXPATHLEN];
 724  738          pid_t clipid = 0;
 725  739          int disconnect = 0;
 726  740  
 727  741          /* console side, watch for read events */
 728  742          pollfds[0].fd = consfd;
 729  743          pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND |
 730  744              POLLPRI | POLLERR | POLLHUP | POLLNVAL;
 731  745  
 732  746          /* client side, watch for read events */
 733  747          pollfds[1].fd = clifd;
 734  748          pollfds[1].events = pollfds[0].events;
 735  749  
 736  750          /* the server socket; watch for events (new connections) */
 737  751          pollfds[2].fd = servfd;
 738  752          pollfds[2].events = pollfds[0].events;
 739  753  
 740  754          /* the eventstram; watch for events (e.g.: zone halted) */
 741  755          pollfds[3].fd = eventstream[1];
 742  756          pollfds[3].events = pollfds[0].events;
 743  757  
 744  758          for (;;) {
 745  759                  pollfds[0].revents = pollfds[1].revents = 0;
 746  760                  pollfds[2].revents = pollfds[3].revents = 0;
 747  761  
 748  762                  ret = poll(pollfds,
 749  763                      sizeof (pollfds) / sizeof (struct pollfd), -1);
 750  764                  if (ret == -1 && errno != EINTR) {
 751  765                          zerror(zlogp, B_TRUE, "poll failed");
 752  766                          /* we are hosed, close connection */
 753  767                          break;
 754  768                  }
 755  769  
 756  770                  /* event from console side */
 757  771                  if (pollfds[0].revents) {
 758  772                          if (pollfds[0].revents &
 759  773                              (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 760  774                                  errno = 0;
 761  775                                  cc = read(consfd, ibuf, BUFSIZ);
 762  776                                  if (cc <= 0 && (errno != EINTR) &&
 763  777                                      (errno != EAGAIN))
 764  778                                          break;
 765  779                                  /*
 766  780                                   * Lose I/O if no one is listening
 767  781                                   */
 768  782                                  if (clifd != -1 && cc > 0)
 769  783                                          (void) write(clifd, ibuf, cc);
 770  784                          } else {
 771  785                                  pollerr = pollfds[0].revents;
 772  786                                  zerror(zlogp, B_FALSE,
 773  787                                      "closing connection with (console) "
 774  788                                      "pollerr %d\n", pollerr);
 775  789                                  break;
 776  790                          }
 777  791                  }
 778  792  
 779  793                  /* event from client side */
 780  794                  if (pollfds[1].revents) {
 781  795                          if (pollfds[1].revents &
 782  796                              (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 783  797                                  errno = 0;
 784  798                                  cc = read(clifd, ibuf, BUFSIZ);
 785  799                                  if (cc <= 0 && (errno != EINTR) &&
 786  800                                      (errno != EAGAIN))
 787  801                                          break;
 788  802                                  (void) write(consfd, ibuf, cc);
 789  803                          } else {
 790  804                                  pollerr = pollfds[1].revents;
 791  805                                  zerror(zlogp, B_FALSE,
 792  806                                      "closing connection with (client) "
 793  807                                      "pollerr %d\n", pollerr);
 794  808                                  break;
 795  809                          }
 796  810                  }
 797  811  
 798  812                  /* event from server socket */
 799  813                  if (pollfds[2].revents &&
 800  814                      (pollfds[2].revents & (POLLIN | POLLRDNORM))) {
 801  815                          if (clifd != -1) {
 802  816                                  /*
 803  817                                   * Test the client to see if it is really
 804  818                                   * still alive.  If it has died but we
 805  819                                   * haven't yet detected that, we might
 806  820                                   * deny a legitimate connect attempt.  If it
 807  821                                   * is dead, we break out; once we tear down
 808  822                                   * the old connection, the new connection
 809  823                                   * will happen.
 810  824                                   */
 811  825                                  if (test_client(clifd) == -1) {
 812  826                                          break;
 813  827                                  }
 814  828                                  /* we're already handling a client */
 815  829                                  reject_client(servfd, clipid);
 816  830  
 817  831  
 818  832                          } else if ((clifd = accept_client(servfd, &clipid,
 819  833                              clilocale, sizeof (clilocale),
 820  834                              &disconnect)) != -1) {
 821  835                                  pollfds[1].fd = clifd;
 822  836  
 823  837                          } else {
 824  838                                  break;
 825  839                          }
 826  840                  }
 827  841  
 828  842                  /*
 829  843                   * Watch for events on the eventstream.  This is how we get
 830  844                   * notified of the zone halting, etc.  It provides us a
 831  845                   * "wakeup" from poll when important things happen, which
 832  846                   * is good.
 833  847                   */
 834  848                  if (pollfds[3].revents) {
 835  849                          int evt = eventstream_read();
 836  850                          /*
 837  851                           * After we drain out the event, if we aren't servicing
 838  852                           * a console client, we hop back out to our caller,
 839  853                           * which will check to see if it is time to shutdown
 840  854                           * the daemon, or if we should take another console
 841  855                           * service lap.
 842  856                           */
 843  857                          if (clifd == -1) {
 844  858                                  break;
 845  859                          }
 846  860                          event_message(clifd, clilocale, evt, disconnect);
 847  861                          /*
 848  862                           * Special handling for the message that the zone is
 849  863                           * uninstalling; we boot the client, then break out
 850  864                           * of this function.  When we return to the
 851  865                           * serve_console loop, we will see that the zone is
 852  866                           * in a state < READY, and so zoneadmd will shutdown.
 853  867                           */
 854  868                          if (evt == Z_EVT_ZONE_UNINSTALLING) {
 855  869                                  break;
 856  870                          }
 857  871                          /*
 858  872                           * Diconnect if -C and -d options were specified and
 859  873                           * zone was halted or failed to boot.
 860  874                           */
 861  875                          if ((evt == Z_EVT_ZONE_HALTED ||
 862  876                              evt == Z_EVT_ZONE_BOOTFAILED) && disconnect) {
 863  877                                  break;
 864  878                          }
 865  879                  }
 866  880  
 867  881          }
 868  882  
 869  883          if (clifd != -1) {
 870  884                  (void) shutdown(clifd, SHUT_RDWR);
  
    | 
      ↓ open down ↓ | 
    183 lines elided | 
    
      ↑ open up ↑ | 
  
 871  885                  (void) close(clifd);
 872  886          }
 873  887  }
 874  888  
 875  889  int
 876  890  init_console(zlog_t *zlogp)
 877  891  {
 878  892          if (init_console_dev(zlogp) == -1) {
 879  893                  zerror(zlogp, B_FALSE,
 880  894                      "console setup: device initialization failed");
 881      -                return (-1);
 882  895          }
 883  896  
 884  897          if ((serverfd = init_console_sock(zlogp)) == -1) {
 885  898                  zerror(zlogp, B_FALSE,
 886  899                      "console setup: socket initialization failed");
 887  900                  return (-1);
 888  901          }
 889  902          return (0);
 890  903  }
 891  904  
 892  905  /*
      906 + * Maintain a simple flag that tracks if we have seen at least one state
      907 + * change. This is currently only used to handle the special case where we are
      908 + * running without a console device, which is what normally drives shutdown.
      909 + */
      910 +void
      911 +zcons_statechanged()
      912 +{
      913 +        state_changed = B_TRUE;
      914 +}
      915 +
      916 +/*
 893  917   * serve_console() is the master loop for driving console I/O.  It is also the
 894  918   * routine which is ultimately responsible for "pulling the plug" on zoneadmd
 895  919   * when it realizes that the daemon should shut down.
 896  920   *
 897  921   * The rules for shutdown are: there must be no console client, and the zone
 898  922   * state must be < ready.  However, we need to give things a chance to actually
 899  923   * get going when the daemon starts up-- otherwise the daemon would immediately
 900  924   * exit on startup if the zone was in the installed state, so we first drop
 901  925   * into the do_console_io() loop in order to give *something* a chance to
 902  926   * happen.
 903  927   */
 904  928  void
 905  929  serve_console(zlog_t *zlogp)
 906  930  {
 907  931          int masterfd;
 908  932          zone_state_t zstate;
 909  933          char conspath[MAXPATHLEN];
      934 +        static boolean_t cons_warned = B_FALSE;
 910  935  
 911  936          (void) snprintf(conspath, sizeof (conspath),
 912  937              "/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME);
 913  938  
 914  939          for (;;) {
 915  940                  masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
 916  941                  if (masterfd == -1) {
      942 +                        if (master_zcons_failed) {
      943 +                                /*
      944 +                                 * If we don't have a console and the zone is
      945 +                                 * not shutting down, there may have been a
      946 +                                 * race/failure with devfs while creating the
      947 +                                 * console. In this case we want to leave the
      948 +                                 * zone up, even without a console, so
      949 +                                 * periodically recheck.
      950 +                                 */
      951 +                                int i;
      952 +
      953 +                                /*
      954 +                                 * In the normal flow of this loop, we use
      955 +                                 * do_console_io to give things a chance to get
      956 +                                 * going first. However, in this case we can't
      957 +                                 * use that, so we have to wait for at least
      958 +                                 * one state change before checking the state.
      959 +                                 */
      960 +                                for (i = 0; i < 60; i++) {
      961 +                                        if (state_changed)
      962 +                                                break;
      963 +                                        (void) sleep(1);
      964 +                                }
      965 +
      966 +                                if (i < 60 && zone_get_state(zone_name,
      967 +                                    &zstate) == Z_OK &&
      968 +                                    (zstate == ZONE_STATE_READY ||
      969 +                                    zstate == ZONE_STATE_RUNNING)) {
      970 +                                        if (!cons_warned) {
      971 +                                                zerror(zlogp, B_FALSE,
      972 +                                                    "WARNING: missing zone "
      973 +                                                    "console for %s",
      974 +                                                    zone_name);
      975 +                                                cons_warned = B_TRUE;
      976 +                                        }
      977 +                                        (void) sleep(ZCONS_RETRY);
      978 +                                        continue;
      979 +                                }
      980 +                        }
      981 +
 917  982                          zerror(zlogp, B_TRUE, "failed to open console master");
 918  983                          (void) mutex_lock(&lock);
 919  984                          goto death;
 920  985                  }
 921  986  
 922  987                  /*
 923  988                   * Setting RPROTDIS on the stream means that the control
 924  989                   * portion of messages received (which we don't care about)
 925  990                   * will be discarded by the stream head.  If we allowed such
 926  991                   * messages, we wouldn't be able to use read(2), as it fails
 927  992                   * (EBADMSG) when a message with a control element is received.
 928  993                   */
 929  994                  if (ioctl(masterfd, I_SRDOPT, RNORM|RPROTDIS) == -1) {
 930  995                          zerror(zlogp, B_TRUE, "failed to set options on "
 931  996                              "console master");
 932  997                          (void) mutex_lock(&lock);
 933  998                          goto death;
 934  999                  }
 935 1000  
 936 1001                  do_console_io(zlogp, masterfd, serverfd);
 937 1002  
 938 1003                  /*
 939 1004                   * We would prefer not to do this, but hostile zone processes
 940 1005                   * can cause the stream to become tainted, and reads will
 941 1006                   * fail.  So, in case something has gone seriously ill,
 942 1007                   * we dismantle the stream and reopen the console when we
 943 1008                   * take another lap.
 944 1009                   */
 945 1010                  (void) close(masterfd);
 946 1011  
 947 1012                  (void) mutex_lock(&lock);
 948 1013                  /*
 949 1014                   * We need to set death_throes (see below) atomically with
 950 1015                   * respect to noticing that (a) we have no console client and
 951 1016                   * (b) the zone is not installed.  Otherwise we could get a
 952 1017                   * request to boot during this time.  Once we set death_throes,
 953 1018                   * any incoming door stuff will be turned away.
 954 1019                   */
 955 1020                  if (zone_get_state(zone_name, &zstate) == Z_OK) {
 956 1021                          if (zstate < ZONE_STATE_READY)
 957 1022                                  goto death;
 958 1023                  } else {
 959 1024                          zerror(zlogp, B_FALSE,
 960 1025                              "unable to determine state of zone");
 961 1026                          goto death;
 962 1027                  }
 963 1028                  /*
 964 1029                   * Even if zone_get_state() fails, stay conservative, and
 965 1030                   * take another lap.
 966 1031                   */
 967 1032                  (void) mutex_unlock(&lock);
 968 1033          }
 969 1034  
 970 1035  death:
 971 1036          assert(MUTEX_HELD(&lock));
 972 1037          in_death_throes = B_TRUE;
 973 1038          (void) mutex_unlock(&lock);
 974 1039  
 975 1040          destroy_console_sock(serverfd);
 976 1041          (void) destroy_console_devs(zlogp);
 977 1042  }
  
    | 
      ↓ open down ↓ | 
    51 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX