Print this page
Merge cleanup from previous six commits
OS-2564 zone boot failed: could not start zoneadmd

*** 116,125 **** --- 116,127 ---- #define ZCONSNEX_DEVTREEPATH "/pseudo/zconsnex@1" #define ZCONSNEX_FILEPATH "/devices/pseudo/zconsnex@1" #define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock" + #define ZCONS_RETRY 10 + static int serverfd = -1; /* console server unix domain socket fd */ char boot_args[BOOTARGS_MAX]; /* * The eventstream is a simple one-directional flow of messages from the
*** 127,138 **** * It is used to wake up the console poller when it needs to take action, * message the user, die off, etc. */ static int eventstream[2]; - int eventstream_init() { if (pipe(eventstream) == -1) return (-1); --- 129,143 ---- * It is used to wake up the console poller when it needs to take action, * message the user, die off, etc. */ static int eventstream[2]; + /* flag used to cope with race creating master zcons devlink */ + static boolean_t master_zcons_failed = B_FALSE; + /* flag to track if we've seen a state change when there is no master zcons */ + static boolean_t state_changed = B_FALSE; int eventstream_init() { if (pipe(eventstream) == -1) return (-1);
*** 405,450 **** /* * Open the master side of the console and issue the ZC_HOLDSLAVE ioctl, * which will cause the master to retain a reference to the slave. * This prevents ttymon from blowing through the slave's STREAMS anchor. */ (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME); if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) { zerror(zlogp, B_TRUE, "ERROR: could not open master side of " "zone console for %s to acquire slave handle", zone_name); ! goto error; } (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_SLAVE_NAME); ! if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) == -1) { zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone" " console for %s to acquire slave handle", zone_name); ! (void) close(masterfd); ! goto error; ! } /* * This ioctl can occasionally return ENXIO if devfs doesn't have * everything plumbed up yet due to heavy zone startup load. Wait for * 1 sec. and retry a few times before we fail to boot the zone. */ ! for (i = 0; i < 5; i++) { ! if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd) ! == 0) { rv = 0; break; } else if (errno != ENXIO) { break; } (void) sleep(1); } if (rv != 0) ! zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave " ! "handle of zone console for %s", zone_name); (void) close(slavefd); (void) close(masterfd); error: if (ddef_hdl) devctl_ddef_free(ddef_hdl); --- 410,468 ---- /* * Open the master side of the console and issue the ZC_HOLDSLAVE ioctl, * which will cause the master to retain a reference to the slave. * This prevents ttymon from blowing through the slave's STREAMS anchor. + * + * In very rare cases the open returns ENOENT if devfs doesn't have + * everything setup yet due to heavy zone startup load. Wait for + * 1 sec. and retry a few times. Even if we can't setup the zone's + * console, we still go ahead and boot the zone. */ (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME); if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) { zerror(zlogp, B_TRUE, "ERROR: could not open master side of " "zone console for %s to acquire slave handle", zone_name); ! master_zcons_failed = B_TRUE; } (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_SLAVE_NAME); ! for (i = 0; i < ZCONS_RETRY; i++) { ! slavefd = open(conspath, O_RDWR | O_NOCTTY); ! if (slavefd >= 0 || errno != ENOENT) ! break; ! (void) sleep(1); ! } ! if (slavefd == -1) zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone" " console for %s to acquire slave handle", zone_name); ! /* * This ioctl can occasionally return ENXIO if devfs doesn't have * everything plumbed up yet due to heavy zone startup load. Wait for * 1 sec. and retry a few times before we fail to boot the zone. */ ! if (masterfd != -1 && slavefd != -1) { ! for (i = 0; i < ZCONS_RETRY; i++) { ! if (ioctl(masterfd, ZC_HOLDSLAVE, ! (caddr_t)(intptr_t)slavefd) == 0) { rv = 0; break; } else if (errno != ENXIO) { break; } (void) sleep(1); } if (rv != 0) ! zerror(zlogp, B_TRUE, "ERROR: error while acquiring " ! "slave handle of zone console for %s", zone_name); ! } + if (slavefd != -1) (void) close(slavefd); + if (masterfd != -1) (void) close(masterfd); error: if (ddef_hdl) devctl_ddef_free(ddef_hdl);
*** 872,882 **** init_console(zlog_t *zlogp) { if (init_console_dev(zlogp) == -1) { zerror(zlogp, B_FALSE, "console setup: device initialization failed"); - return (-1); } if ((serverfd = init_console_sock(zlogp)) == -1) { zerror(zlogp, B_FALSE, "console setup: socket initialization failed"); --- 890,899 ----
*** 884,893 **** --- 901,921 ---- } return (0); } /* + * Maintain a simple flag that tracks if we have seen at least one state + * change. This is currently only used to handle the special case where we are + * running without a console device, which is what normally drives shutdown. + */ + void + zcons_statechanged() + { + state_changed = B_TRUE; + } + + /* * serve_console() is the master loop for driving console I/O. It is also the * routine which is ultimately responsible for "pulling the plug" on zoneadmd * when it realizes that the daemon should shut down. * * The rules for shutdown are: there must be no console client, and the zone
*** 901,917 **** --- 929,986 ---- serve_console(zlog_t *zlogp) { int masterfd; zone_state_t zstate; char conspath[MAXPATHLEN]; + static boolean_t cons_warned = B_FALSE; (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME); for (;;) { masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY); if (masterfd == -1) { + if (master_zcons_failed) { + /* + * If we don't have a console and the zone is + * not shutting down, there may have been a + * race/failure with devfs while creating the + * console. In this case we want to leave the + * zone up, even without a console, so + * periodically recheck. + */ + int i; + + /* + * In the normal flow of this loop, we use + * do_console_io to give things a chance to get + * going first. However, in this case we can't + * use that, so we have to wait for at least + * one state change before checking the state. + */ + for (i = 0; i < 60; i++) { + if (state_changed) + break; + (void) sleep(1); + } + + if (i < 60 && zone_get_state(zone_name, + &zstate) == Z_OK && + (zstate == ZONE_STATE_READY || + zstate == ZONE_STATE_RUNNING)) { + if (!cons_warned) { + zerror(zlogp, B_FALSE, + "WARNING: missing zone " + "console for %s", + zone_name); + cons_warned = B_TRUE; + } + (void) sleep(ZCONS_RETRY); + continue; + } + } + zerror(zlogp, B_TRUE, "failed to open console master"); (void) mutex_lock(&lock); goto death; }