Print this page
Merge cleanup from previous six commits
OS-2564 zone boot failed: could not start zoneadmd
@@ -116,10 +116,12 @@
#define ZCONSNEX_DEVTREEPATH "/pseudo/zconsnex@1"
#define ZCONSNEX_FILEPATH "/devices/pseudo/zconsnex@1"
#define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock"
+#define ZCONS_RETRY 10
+
static int serverfd = -1; /* console server unix domain socket fd */
char boot_args[BOOTARGS_MAX];
/*
* The eventstream is a simple one-directional flow of messages from the
@@ -127,12 +129,15 @@
* It is used to wake up the console poller when it needs to take action,
* message the user, die off, etc.
*/
static int eventstream[2];
+/* flag used to cope with race creating master zcons devlink */
+static boolean_t master_zcons_failed = B_FALSE;
+/* flag to track if we've seen a state change when there is no master zcons */
+static boolean_t state_changed = B_FALSE;
-
int
eventstream_init()
{
if (pipe(eventstream) == -1)
return (-1);
@@ -405,46 +410,59 @@
/*
* Open the master side of the console and issue the ZC_HOLDSLAVE ioctl,
* which will cause the master to retain a reference to the slave.
* This prevents ttymon from blowing through the slave's STREAMS anchor.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup the zone's
+ * console, we still go ahead and boot the zone.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MASTER_NAME);
if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
"zone console for %s to acquire slave handle", zone_name);
- goto error;
+ master_zcons_failed = B_TRUE;
}
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_SLAVE_NAME);
- if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ slavefd = open(conspath, O_RDWR | O_NOCTTY);
+ if (slavefd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (slavefd == -1)
zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
" console for %s to acquire slave handle", zone_name);
- (void) close(masterfd);
- goto error;
- }
+
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
- for (i = 0; i < 5; i++) {
- if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd)
- == 0) {
+ if (masterfd != -1 && slavefd != -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ if (ioctl(masterfd, ZC_HOLDSLAVE,
+ (caddr_t)(intptr_t)slavefd) == 0) {
rv = 0;
break;
} else if (errno != ENXIO) {
break;
}
(void) sleep(1);
}
if (rv != 0)
- zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave "
- "handle of zone console for %s", zone_name);
+ zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
+ "slave handle of zone console for %s", zone_name);
+ }
+ if (slavefd != -1)
(void) close(slavefd);
+ if (masterfd != -1)
(void) close(masterfd);
error:
if (ddef_hdl)
devctl_ddef_free(ddef_hdl);
@@ -872,11 +890,10 @@
init_console(zlog_t *zlogp)
{
if (init_console_dev(zlogp) == -1) {
zerror(zlogp, B_FALSE,
"console setup: device initialization failed");
- return (-1);
}
if ((serverfd = init_console_sock(zlogp)) == -1) {
zerror(zlogp, B_FALSE,
"console setup: socket initialization failed");
@@ -884,10 +901,21 @@
}
return (0);
}
/*
+ * Maintain a simple flag that tracks if we have seen at least one state
+ * change. This is currently only used to handle the special case where we are
+ * running without a console device, which is what normally drives shutdown.
+ */
+void
+zcons_statechanged()
+{
+ state_changed = B_TRUE;
+}
+
+/*
* serve_console() is the master loop for driving console I/O. It is also the
* routine which is ultimately responsible for "pulling the plug" on zoneadmd
* when it realizes that the daemon should shut down.
*
* The rules for shutdown are: there must be no console client, and the zone
@@ -901,17 +929,58 @@
serve_console(zlog_t *zlogp)
{
int masterfd;
zone_state_t zstate;
char conspath[MAXPATHLEN];
+ static boolean_t cons_warned = B_FALSE;
(void) snprintf(conspath, sizeof (conspath),
"/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME);
for (;;) {
masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
if (masterfd == -1) {
+ if (master_zcons_failed) {
+ /*
+ * If we don't have a console and the zone is
+ * not shutting down, there may have been a
+ * race/failure with devfs while creating the
+ * console. In this case we want to leave the
+ * zone up, even without a console, so
+ * periodically recheck.
+ */
+ int i;
+
+ /*
+ * In the normal flow of this loop, we use
+ * do_console_io to give things a chance to get
+ * going first. However, in this case we can't
+ * use that, so we have to wait for at least
+ * one state change before checking the state.
+ */
+ for (i = 0; i < 60; i++) {
+ if (state_changed)
+ break;
+ (void) sleep(1);
+ }
+
+ if (i < 60 && zone_get_state(zone_name,
+ &zstate) == Z_OK &&
+ (zstate == ZONE_STATE_READY ||
+ zstate == ZONE_STATE_RUNNING)) {
+ if (!cons_warned) {
+ zerror(zlogp, B_FALSE,
+ "WARNING: missing zone "
+ "console for %s",
+ zone_name);
+ cons_warned = B_TRUE;
+ }
+ (void) sleep(ZCONS_RETRY);
+ continue;
+ }
+ }
+
zerror(zlogp, B_TRUE, "failed to open console master");
(void) mutex_lock(&lock);
goto death;
}