Print this page
OS-5330 zoneadm mounting an lx or joyent branded zone fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
(NOTE: Manual port, because of divergence from SmartOS.)
Network interfaces need to configure in /native/dev for LX.
Mismerged snap_hndl (should be handle) blocked lipkg zone boot
(NOTE: There are other instances of snap_hndl we pulled in from
illumos-joyent that may need to be nuked too.)
OS-1571 Placate gcc -Wparentheses
Reviewed by: Robert Mustacchi <rm@joyent.com>
OS-5292 zoneadmd should infer zone.max-processes
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4915 want FX high priority zone configuration option
OS-4925 ps pri shows misleading value for zone in RT scheduling class
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4254 libbrand token substitution incomplete for mount entries
OS-3776 project rctls should be in sync with zone rctls
OS-3524 in order to support interaction with docker containers, need to be able to connect to stdio for init from GZ
OS-3525 in order to support 'docker logs' need to be able to get stdio from zone to log file
OS-399 zone phys. mem. cap should be a rctl and have associated kstat
@@ -19,12 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent Inc. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright 2016, Joyent Inc.
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -134,12 +134,10 @@
MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
#define DFSTYPES "/etc/dfs/fstypes"
#define MAXTNZLEN 2048
-#define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
-
/* a reasonable estimate for the number of lwps per process */
#define LWPS_PER_PROCESS 10
/* for routing socket */
static int rts_seqno = 0;
@@ -159,10 +157,23 @@
static m_label_t *zid_label = NULL;
static priv_set_t *zprivs = NULL;
static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
+typedef struct zone_proj_rctl_map {
+ char *zpr_zone_rctl;
+ char *zpr_project_rctl;
+} zone_proj_rctl_map_t;
+
+static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
+ {"zone.max-msg-ids", "project.max-msg-ids"},
+ {"zone.max-sem-ids", "project.max-sem-ids"},
+ {"zone.max-shm-ids", "project.max-shm-ids"},
+ {"zone.max-shm-memory", "project.max-shm-memory"},
+ {NULL, NULL}
+};
+
/* from libsocket, not in any header file */
extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
/* from zoneadmd */
extern char query_hook[];
@@ -1148,16 +1159,16 @@
/* Add user-specified devices and directories */
if ((handle = zonecfg_init_handle()) == NULL) {
zerror(zlogp, B_FALSE, "can't initialize zone handle");
goto cleanup;
}
- if (err = zonecfg_get_handle(zone_name, handle)) {
+ if ((err = zonecfg_get_handle(zone_name, handle)) != 0) {
zerror(zlogp, B_FALSE, "can't get handle for zone "
"%s: %s", zone_name, zonecfg_strerror(err));
goto cleanup;
}
- if (err = zonecfg_setdevent(handle)) {
+ if ((err = zonecfg_setdevent(handle)) != 0) {
zerror(zlogp, B_FALSE, "%s: %s", zone_name,
zonecfg_strerror(err));
goto cleanup;
}
while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
@@ -1669,11 +1680,10 @@
static int
mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
{
char rootpath[MAXPATHLEN];
- char zonepath[MAXPATHLEN];
char brand[MAXNAMELEN];
char luroot[MAXPATHLEN];
int i, num_fs = 0;
struct zone_fstab *fs_ptr = NULL;
zone_dochandle_t handle = NULL;
@@ -1688,15 +1698,10 @@
zone_state_str(ZONE_STATE_READY),
zone_state_str(ZONE_STATE_MOUNTED));
goto bad;
}
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_TRUE, "unable to determine zone path");
- goto bad;
- }
-
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine zone root");
goto bad;
}
@@ -1793,27 +1798,44 @@
goto bad;
qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
for (i = 0; i < num_fs; i++) {
- if (ALT_MOUNT(mount_cmd) &&
- strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
+ if (ALT_MOUNT(mount_cmd)) {
+ if (strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
size_t slen = strlen(rootpath) - 2;
/*
- * By default we'll try to mount /dev as /a/dev
- * but /dev is special and always goes at the top
- * so strip the trailing '/a' from the rootpath.
+ * By default we'll try to mount /dev
+ * as /a/dev but /dev is special and
+ * always goes at the top so strip the
+ * trailing '/a' from the rootpath.
*/
assert(strcmp(&rootpath[slen], "/a") == 0);
rootpath[slen] = '\0';
- if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd)
- != 0)
+ if (mount_one(zlogp, &fs_ptr[i], rootpath,
+ mount_cmd) != 0)
goto bad;
rootpath[slen] = '/';
continue;
+ } else if (strcmp(brand_name, default_brand) != 0) {
+ /*
+ * If mounting non-native brand, skip
+ * mounting global mounts and
+ * filesystem entries since they are
+ * only needed for native pkg upgrade
+ * tools.
+ *
+ * The only exception right now is
+ * /dev (handled above), which is
+ * needed in the luroot in order to
+ * zlogin -S into the zone.
+ */
+ continue;
}
+ }
+
if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
goto bad;
}
if (ALT_MOUNT(mount_cmd) &&
!build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
@@ -2886,10 +2908,29 @@
free(new);
}
}
/*
+ * For IP networking, we need to use the illumos-native device tree. For most
+ * zones, this is $ZONEROOT/dev. For LX ones, it's $ZONEROOT/native/dev.
+ * Return the appropriate post-$ZONEROOT path.
+ */
+static char *
+get_brand_dev(void)
+{
+ static char *lxpath = "/native/dev";
+ /* Cheesy hard-coding of strlen("/native") */
+ char *default_path = lxpath + 7;
+
+ /* LX zones are the exception... */
+ if (strcmp(brand_name, "lx") == 0)
+ return (lxpath);
+
+ return (default_path);
+}
+
+/*
* Add the kernel access control information for the interface names.
* If anything goes wrong, we log a general error message, attempt to tear down
* whatever we set up, and return an error.
*/
static int
@@ -2931,11 +2972,11 @@
zerror(zlogp, B_TRUE,
"unable to determine dev root");
return (-1);
}
(void) snprintf(path, sizeof (path), "%s%s", rootpath,
- "/dev");
+ get_brand_dev());
if (di_prof_init(path, &prof) != 0) {
(void) zonecfg_endnwifent(handle);
zonecfg_fini_handle(handle);
zerror(zlogp, B_TRUE,
"failed to initialize profile");
@@ -3315,10 +3356,23 @@
free(privname);
zonecfg_fini_handle(handle);
return (error);
}
+static char *
+zone_proj_rctl(const char *name)
+{
+ int i;
+
+ for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
+ if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
+ return (zone_proj_rctl_map[i].zpr_project_rctl);
+ }
+ }
+ return (NULL);
+}
+
static int
get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
{
nvlist_t *nvl = NULL;
char *nvl_packed = NULL;
@@ -3329,10 +3383,11 @@
zone_dochandle_t handle;
struct zone_rctltab rctltab;
rctlblk_t *rctlblk = NULL;
uint64_t maxlwps;
uint64_t maxprocs;
+ int rproc, rlwp;
*bufp = NULL;
*bufsizep = 0;
if ((handle = zonecfg_init_handle()) == NULL) {
@@ -3351,24 +3406,33 @@
goto out;
}
/*
* Allow the administrator to control both the maximum number of
- * process table slots and the maximum number of lwps with just the
- * max-processes property. If only the max-processes property is set,
- * we add a max-lwps property with a limit derived from max-processes.
+ * process table slots, and the maximum number of lwps, with a single
+ * max-processes or max-lwps property. If only the max-processes
+ * property is set, we add a max-lwps property with a limit derived
+ * from max-processes. If only the max-lwps property is set, we add a
+ * max-processes property with the same limit as max-lwps.
*/
- if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs)
- == Z_OK &&
- zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps)
- == Z_NO_ENTRY) {
- if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
+ rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
+ rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
+ if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
maxprocs * LWPS_PER_PROCESS) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
goto out;
}
+ } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
+ /* no scaling for max-proc value */
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
+ maxlwps) != Z_OK) {
+ zerror(zlogp, B_FALSE,
+ "unable to set max-processes alias");
+ goto out;
}
+ }
if (zonecfg_setrctlent(handle) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
goto out;
}
@@ -3379,10 +3443,11 @@
}
while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
struct zone_rctlvaltab *rctlval;
uint_t i, count;
const char *name = rctltab.zone_rctl_name;
+ char *proj_nm;
/* zoneadm should have already warned about unknown rctls. */
if (!zonecfg_is_rctl(name)) {
zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
rctltab.zone_rctl_valptr = NULL;
@@ -3445,10 +3510,30 @@
goto out;
}
}
zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
rctltab.zone_rctl_valptr = NULL;
+
+ /*
+ * With no action on our part we will start zsched with the
+ * project rctl values for our (zoneadmd) current project. For
+ * brands running a variant of Illumos, that's not a problem
+ * since they will setup their own projects, but for a
+ * non-native brand like lx, where there are no projects, we
+ * want to start things up with the same project rctls as the
+ * corresponding zone rctls, since nothing within the zone will
+ * ever change the project rctls.
+ */
+ if ((proj_nm = zone_proj_rctl(name)) != NULL) {
+ if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
+ != 0) {
+ zerror(zlogp, B_FALSE,
+ "nvlist_add_nvlist_arrays failed");
+ goto out;
+ }
+ }
+
if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
!= 0) {
zerror(zlogp, B_FALSE, "%s failed",
"nvlist_add_nvlist_array");
goto out;
@@ -3706,21 +3791,15 @@
{
int error = -1;
zfs_handle_t *zhp;
libzfs_handle_t *hdl;
m_label_t ds_sl;
- char zonepath[MAXPATHLEN];
char ds_hexsl[MAXNAMELEN];
if (!is_system_labeled())
return (0);
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_TRUE, "unable to determine zone path");
- return (-1);
- }
-
if (!is_zonepath_zfs(zonepath))
return (0);
if ((hdl = libzfs_init()) == NULL) {
zerror(zlogp, B_FALSE, "opening ZFS library");
@@ -4387,19 +4466,17 @@
}
return (B_FALSE);
}
/*
- * Set memory cap and pool info for the zone's resource management
- * configuration.
+ * Set pool info for the zone's resource management configuration.
*/
static int
setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
{
int res;
uint64_t tmp;
- struct zone_mcaptab mcap;
char sched[MAXNAMELEN];
zone_dochandle_t handle = NULL;
char pool_err[128];
if ((handle = zonecfg_init_handle()) == NULL) {
@@ -4411,42 +4488,46 @@
zerror(zlogp, B_FALSE, "invalid configuration");
zonecfg_fini_handle(handle);
return (res);
}
- /*
- * If a memory cap is configured, set the cap in the kernel using
- * zone_setattr() and make sure the rcapd SMF service is enabled.
- */
- if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
- uint64_t num;
- char smf_err[128];
-
- num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
- if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
- zerror(zlogp, B_TRUE, "could not set zone memory cap");
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
-
- if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
- zerror(zlogp, B_FALSE, "enabling system/rcap service "
- "failed: %s", smf_err);
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
- }
-
/* Get the scheduling class set in the zone configuration. */
if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
strlen(sched) > 0) {
if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
strlen(sched)) == -1)
zerror(zlogp, B_TRUE, "WARNING: unable to set the "
"default scheduling class");
- } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
+ if (strcmp(sched, "FX") == 0) {
+ /*
+ * When FX is specified then by default all processes
+ * will start at the lowest priority level (0) and
+ * stay there. We support an optional attr which
+ * indicates that all the processes should be "high
+ * priority". We set this on the zone so that starting
+ * init will set the priority high.
+ */
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+ strcmp(a.zone_attr_value, "true") == 0) {
+ boolean_t hi = B_TRUE;
+
+ if (zone_setattr(zoneid,
+ ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
+ sizeof (hi)) == -1)
+ zerror(zlogp, B_TRUE, "WARNING: unable "
+ "to set high priority");
+ }
+ }
+
+ } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
== Z_OK) {
/*
* If the zone has the zone.cpu-shares rctl set then we want to
* use the Fair Share Scheduler (FSS) for processes in the
* zone. Check what scheduling class the zone would be running
@@ -4989,11 +5070,11 @@
}
int
vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
{
- char zonepath[MAXPATHLEN];
+ char zpath[MAXPATHLEN];
if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
lofs_discard_mnttab();
return (-1);
}
@@ -5000,19 +5081,15 @@
/*
* Before we try to mount filesystems we need to create the
* attribute backing store for /dev
*/
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- lofs_discard_mnttab();
- return (-1);
- }
- resolve_lofs(zlogp, zonepath, sizeof (zonepath));
+ (void) strlcpy(zpath, zonepath, sizeof (zpath));
+ resolve_lofs(zlogp, zpath, sizeof (zpath));
/* Make /dev directory owned by root, grouped sys */
- if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE,
- 0, 3) != 0) {
+ if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) {
lofs_discard_mnttab();
return (-1);
}
if (mount_filesystems(zlogp, mount_cmd) != 0) {
@@ -5124,11 +5201,10 @@
{
char *kzone;
zoneid_t zoneid;
int res;
char pool_err[128];
- char zpath[MAXPATHLEN];
char cmdbuf[MAXPATHLEN];
brand_handle_t bh = NULL;
dladm_status_t status;
char errmsg[DLADM_STRSIZE];
ushort_t flags;
@@ -5180,16 +5256,10 @@
if (zone_shutdown(zoneid) != 0) {
zerror(zlogp, B_TRUE, "unable to shutdown zone");
goto error;
}
- /* Get the zonepath of this zone */
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- goto error;
- }
-
/* Get a handle to the brand info for this zone */
if ((bh = brand_open(brand_name)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
return (-1);
}
@@ -5196,11 +5266,11 @@
/*
* If there is a brand 'halt' callback, execute it now to give the
* brand a chance to cleanup any custom configuration.
*/
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) < 0) {
brand_close(bh);
zerror(zlogp, B_FALSE, "unable to determine branded zone's "
"halt callback.");
goto error;