Print this page
Merge cleanup from previous six commits
OS-2564 zone boot failed: could not start zoneadmd
*** 116,125 ****
--- 116,127 ----
#define ZCONSNEX_DEVTREEPATH "/pseudo/zconsnex@1"
#define ZCONSNEX_FILEPATH "/devices/pseudo/zconsnex@1"
#define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock"
+ #define ZCONS_RETRY 10
+
static int serverfd = -1; /* console server unix domain socket fd */
char boot_args[BOOTARGS_MAX];
/*
* The eventstream is a simple one-directional flow of messages from the
*** 127,138 ****
* It is used to wake up the console poller when it needs to take action,
* message the user, die off, etc.
*/
static int eventstream[2];
-
int
eventstream_init()
{
if (pipe(eventstream) == -1)
return (-1);
--- 129,143 ----
* It is used to wake up the console poller when it needs to take action,
* message the user, die off, etc.
*/
static int eventstream[2];
+ /* flag used to cope with race creating master zcons devlink */
+ static boolean_t master_zcons_failed = B_FALSE;
+ /* flag to track if we've seen a state change when there is no master zcons */
+ static boolean_t state_changed = B_FALSE;
int
eventstream_init()
{
if (pipe(eventstream) == -1)
return (-1);
*** 405,450 ****
/*
* Open the master side of the console and issue the ZC_HOLDSLAVE ioctl,
* which will cause the master to retain a reference to the slave.
* This prevents ttymon from blowing through the slave's STREAMS anchor.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MASTER_NAME);
if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
"zone console for %s to acquire slave handle", zone_name);
! goto error;
}
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_SLAVE_NAME);
! if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
" console for %s to acquire slave handle", zone_name);
! (void) close(masterfd);
! goto error;
! }
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
! for (i = 0; i < 5; i++) {
! if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd)
! == 0) {
rv = 0;
break;
} else if (errno != ENXIO) {
break;
}
(void) sleep(1);
}
if (rv != 0)
! zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave "
! "handle of zone console for %s", zone_name);
(void) close(slavefd);
(void) close(masterfd);
error:
if (ddef_hdl)
devctl_ddef_free(ddef_hdl);
--- 410,468 ----
/*
* Open the master side of the console and issue the ZC_HOLDSLAVE ioctl,
* which will cause the master to retain a reference to the slave.
* This prevents ttymon from blowing through the slave's STREAMS anchor.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup the zone's
+ * console, we still go ahead and boot the zone.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MASTER_NAME);
if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
"zone console for %s to acquire slave handle", zone_name);
! master_zcons_failed = B_TRUE;
}
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_SLAVE_NAME);
! for (i = 0; i < ZCONS_RETRY; i++) {
! slavefd = open(conspath, O_RDWR | O_NOCTTY);
! if (slavefd >= 0 || errno != ENOENT)
! break;
! (void) sleep(1);
! }
! if (slavefd == -1)
zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
" console for %s to acquire slave handle", zone_name);
!
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
! if (masterfd != -1 && slavefd != -1) {
! for (i = 0; i < ZCONS_RETRY; i++) {
! if (ioctl(masterfd, ZC_HOLDSLAVE,
! (caddr_t)(intptr_t)slavefd) == 0) {
rv = 0;
break;
} else if (errno != ENXIO) {
break;
}
(void) sleep(1);
}
if (rv != 0)
! zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
! "slave handle of zone console for %s", zone_name);
! }
+ if (slavefd != -1)
(void) close(slavefd);
+ if (masterfd != -1)
(void) close(masterfd);
error:
if (ddef_hdl)
devctl_ddef_free(ddef_hdl);
*** 872,882 ****
init_console(zlog_t *zlogp)
{
if (init_console_dev(zlogp) == -1) {
zerror(zlogp, B_FALSE,
"console setup: device initialization failed");
- return (-1);
}
if ((serverfd = init_console_sock(zlogp)) == -1) {
zerror(zlogp, B_FALSE,
"console setup: socket initialization failed");
--- 890,899 ----
*** 884,893 ****
--- 901,921 ----
}
return (0);
}
/*
+ * Maintain a simple flag that tracks if we have seen at least one state
+ * change. This is currently only used to handle the special case where we are
+ * running without a console device, which is what normally drives shutdown.
+ */
+ void
+ zcons_statechanged()
+ {
+ state_changed = B_TRUE;
+ }
+
+ /*
* serve_console() is the master loop for driving console I/O. It is also the
* routine which is ultimately responsible for "pulling the plug" on zoneadmd
* when it realizes that the daemon should shut down.
*
* The rules for shutdown are: there must be no console client, and the zone
*** 901,917 ****
--- 929,986 ----
serve_console(zlog_t *zlogp)
{
int masterfd;
zone_state_t zstate;
char conspath[MAXPATHLEN];
+ static boolean_t cons_warned = B_FALSE;
(void) snprintf(conspath, sizeof (conspath),
"/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME);
for (;;) {
masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
if (masterfd == -1) {
+ if (master_zcons_failed) {
+ /*
+ * If we don't have a console and the zone is
+ * not shutting down, there may have been a
+ * race/failure with devfs while creating the
+ * console. In this case we want to leave the
+ * zone up, even without a console, so
+ * periodically recheck.
+ */
+ int i;
+
+ /*
+ * In the normal flow of this loop, we use
+ * do_console_io to give things a chance to get
+ * going first. However, in this case we can't
+ * use that, so we have to wait for at least
+ * one state change before checking the state.
+ */
+ for (i = 0; i < 60; i++) {
+ if (state_changed)
+ break;
+ (void) sleep(1);
+ }
+
+ if (i < 60 && zone_get_state(zone_name,
+ &zstate) == Z_OK &&
+ (zstate == ZONE_STATE_READY ||
+ zstate == ZONE_STATE_RUNNING)) {
+ if (!cons_warned) {
+ zerror(zlogp, B_FALSE,
+ "WARNING: missing zone "
+ "console for %s",
+ zone_name);
+ cons_warned = B_TRUE;
+ }
+ (void) sleep(ZCONS_RETRY);
+ continue;
+ }
+ }
+
zerror(zlogp, B_TRUE, "failed to open console master");
(void) mutex_lock(&lock);
goto death;
}