Print this page
OS-5330 zoneadm mounting an lx or joyent branded zone fails
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
(NOTE: Manual port, because of divergence from SmartOS.)
Network interfaces need to configure in /native/dev for LX.
Mismerged snap_hndl (should be handle) blocked lipkg zone boot
(NOTE:  There are other instances of snap_hndl we pulled in from
        illumos-joyent that may need to be nuked too.)
OS-1571 Placate gcc -Wparentheses
Reviewed by: Robert Mustacchi <rm@joyent.com>
OS-5292 zoneadmd should infer zone.max-processes
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4915 want FX high priority zone configuration option
OS-4925 ps pri shows misleading value for zone in RT scheduling class
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
OS-4254 libbrand token substitution incomplete for mount entries
OS-3776 project rctls should be in sync with zone rctls
OS-3524 in order to support interaction with docker containers, need to be able to connect to stdio for init from GZ
OS-3525 in order to support 'docker logs' need to be able to get stdio from zone to log file
OS-399 zone phys. mem. cap should be a rctl and have associated kstat
        
@@ -19,12 +19,12 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent Inc. All rights reserved.
  * Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright 2016, Joyent Inc.
  */
 
 /*
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
@@ -134,12 +134,10 @@
         MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
 
 #define DFSTYPES        "/etc/dfs/fstypes"
 #define MAXTNZLEN       2048
 
-#define ALT_MOUNT(mount_cmd)    ((mount_cmd) != Z_MNT_BOOT)
-
 /* a reasonable estimate for the number of lwps per process */
 #define LWPS_PER_PROCESS        10
 
 /* for routing socket */
 static int rts_seqno = 0;
@@ -159,10 +157,23 @@
 static m_label_t *zid_label = NULL;
 static priv_set_t *zprivs = NULL;
 
 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
 
+typedef struct zone_proj_rctl_map {
+        char *zpr_zone_rctl;
+        char *zpr_project_rctl;
+} zone_proj_rctl_map_t;
+
+static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
+        {"zone.max-msg-ids",    "project.max-msg-ids"},
+        {"zone.max-sem-ids",    "project.max-sem-ids"},
+        {"zone.max-shm-ids",    "project.max-shm-ids"},
+        {"zone.max-shm-memory", "project.max-shm-memory"},
+        {NULL,                  NULL}
+};
+
 /* from libsocket, not in any header file */
 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
 
 /* from zoneadmd */
 extern char query_hook[];
@@ -1148,16 +1159,16 @@
         /* Add user-specified devices and directories */
         if ((handle = zonecfg_init_handle()) == NULL) {
                 zerror(zlogp, B_FALSE, "can't initialize zone handle");
                 goto cleanup;
         }
-        if (err = zonecfg_get_handle(zone_name, handle)) {
+        if ((err = zonecfg_get_handle(zone_name, handle)) != 0) {
                 zerror(zlogp, B_FALSE, "can't get handle for zone "
                     "%s: %s", zone_name, zonecfg_strerror(err));
                 goto cleanup;
         }
-        if (err = zonecfg_setdevent(handle)) {
+        if ((err = zonecfg_setdevent(handle)) != 0) {
                 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
                     zonecfg_strerror(err));
                 goto cleanup;
         }
         while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
@@ -1669,11 +1680,10 @@
 
 static int
 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
 {
         char rootpath[MAXPATHLEN];
-        char zonepath[MAXPATHLEN];
         char brand[MAXNAMELEN];
         char luroot[MAXPATHLEN];
         int i, num_fs = 0;
         struct zone_fstab *fs_ptr = NULL;
         zone_dochandle_t handle = NULL;
@@ -1688,15 +1698,10 @@
                     zone_state_str(ZONE_STATE_READY),
                     zone_state_str(ZONE_STATE_MOUNTED));
                 goto bad;
         }
 
-        if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
-                zerror(zlogp, B_TRUE, "unable to determine zone path");
-                goto bad;
-        }
-
         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
                 zerror(zlogp, B_TRUE, "unable to determine zone root");
                 goto bad;
         }
 
@@ -1793,27 +1798,44 @@
                 goto bad;
 
         qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
 
         for (i = 0; i < num_fs; i++) {
-                if (ALT_MOUNT(mount_cmd) &&
-                    strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
+                if (ALT_MOUNT(mount_cmd)) {
+                        if (strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
                         size_t slen = strlen(rootpath) - 2;
 
                         /*
-                         * By default we'll try to mount /dev as /a/dev
-                         * but /dev is special and always goes at the top
-                         * so strip the trailing '/a' from the rootpath.
+                                 * By default we'll try to mount /dev
+                                 * as /a/dev but /dev is special and
+                                 * always goes at the top so strip the
+                                 * trailing '/a' from the rootpath.
                          */
                         assert(strcmp(&rootpath[slen], "/a") == 0);
                         rootpath[slen] = '\0';
-                        if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd)
-                            != 0)
+                                if (mount_one(zlogp, &fs_ptr[i], rootpath,
+                                    mount_cmd) != 0)
                                 goto bad;
                         rootpath[slen] = '/';
                         continue;
+                        } else if (strcmp(brand_name, default_brand) != 0) {
+                                /*
+                                 * If mounting non-native brand, skip
+                                 * mounting global mounts and
+                                 * filesystem entries since they are
+                                 * only needed for native pkg upgrade
+                                 * tools.
+                                 *
+                                 * The only exception right now is
+                                 * /dev (handled above), which is
+                                 * needed in the luroot in order to
+                                 * zlogin -S into the zone.
+                                 */
+                                continue;
                 }
+                }
+
                 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
                         goto bad;
         }
         if (ALT_MOUNT(mount_cmd) &&
             !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
@@ -2886,10 +2908,29 @@
                 free(new);
         }
 }
 
 /*
+ * For IP networking, we need to use the illumos-native device tree.  For most
+ * zones, this is $ZONEROOT/dev.  For LX ones, it's $ZONEROOT/native/dev.
+ * Return the appropriate post-$ZONEROOT path.
+ */
+static char *
+get_brand_dev(void)
+{
+        static char *lxpath = "/native/dev";
+        /* Cheesy hard-coding of strlen("/native") */
+        char *default_path = lxpath + 7;
+
+        /* LX zones are the exception... */
+        if (strcmp(brand_name, "lx") == 0)
+                return (lxpath);
+
+        return (default_path);
+}
+
+/*
  * Add the kernel access control information for the interface names.
  * If anything goes wrong, we log a general error message, attempt to tear down
  * whatever we set up, and return an error.
  */
 static int
@@ -2931,11 +2972,11 @@
                                 zerror(zlogp, B_TRUE,
                                     "unable to determine dev root");
                                 return (-1);
                         }
                         (void) snprintf(path, sizeof (path), "%s%s", rootpath,
-                            "/dev");
+                            get_brand_dev());
                         if (di_prof_init(path, &prof) != 0) {
                                 (void) zonecfg_endnwifent(handle);
                                 zonecfg_fini_handle(handle);
                                 zerror(zlogp, B_TRUE,
                                     "failed to initialize profile");
@@ -3315,10 +3356,23 @@
         free(privname);
         zonecfg_fini_handle(handle);
         return (error);
 }
 
+static char *
+zone_proj_rctl(const char *name)
+{
+        int i;
+
+        for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
+                if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
+                        return (zone_proj_rctl_map[i].zpr_project_rctl);
+                }
+        }
+        return (NULL);
+}
+
 static int
 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
 {
         nvlist_t *nvl = NULL;
         char *nvl_packed = NULL;
@@ -3329,10 +3383,11 @@
         zone_dochandle_t handle;
         struct zone_rctltab rctltab;
         rctlblk_t *rctlblk = NULL;
         uint64_t maxlwps;
         uint64_t maxprocs;
+        int rproc, rlwp;
 
         *bufp = NULL;
         *bufsizep = 0;
 
         if ((handle = zonecfg_init_handle()) == NULL) {
@@ -3351,24 +3406,33 @@
                 goto out;
         }
 
         /*
          * Allow the administrator to control both the maximum number of
-         * process table slots and the maximum number of lwps with just the
-         * max-processes property.  If only the max-processes property is set,
-         * we add a max-lwps property with a limit derived from max-processes.
+         * process table slots, and the maximum number of lwps, with a single
+         * max-processes or max-lwps property. If only the max-processes
+         * property is set, we add a max-lwps property with a limit derived
+         * from max-processes. If only the max-lwps property is set, we add a
+         * max-processes property with the same limit as max-lwps.
          */
-        if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs)
-            == Z_OK &&
-            zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps)
-            == Z_NO_ENTRY) {
-                if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
+        rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
+        rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
+        if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
+                if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
                     maxprocs * LWPS_PER_PROCESS) != Z_OK) {
                         zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
                         goto out;
                 }
+        } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
+                /* no scaling for max-proc value */
+                if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
+                    maxlwps) != Z_OK) {
+                        zerror(zlogp, B_FALSE,
+                            "unable to set max-processes alias");
+                        goto out;
         }
+        }
 
         if (zonecfg_setrctlent(handle) != Z_OK) {
                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
                 goto out;
         }
@@ -3379,10 +3443,11 @@
         }
         while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
                 struct zone_rctlvaltab *rctlval;
                 uint_t i, count;
                 const char *name = rctltab.zone_rctl_name;
+                char *proj_nm;
 
                 /* zoneadm should have already warned about unknown rctls. */
                 if (!zonecfg_is_rctl(name)) {
                         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
                         rctltab.zone_rctl_valptr = NULL;
@@ -3445,10 +3510,30 @@
                                 goto out;
                         }
                 }
                 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
                 rctltab.zone_rctl_valptr = NULL;
+
+                /*
+                 * With no action on our part we will start zsched with the
+                 * project rctl values for our (zoneadmd) current project. For
+                 * brands running a variant of Illumos, that's not a problem
+                 * since they will setup their own projects, but for a
+                 * non-native brand like lx, where there are no projects, we
+                 * want to start things up with the same project rctls as the
+                 * corresponding zone rctls, since nothing within the zone will
+                 * ever change the project rctls.
+                 */
+                if ((proj_nm = zone_proj_rctl(name)) != NULL) {
+                        if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
+                            != 0) {
+                                zerror(zlogp, B_FALSE,
+                                    "nvlist_add_nvlist_arrays failed");
+                                goto out;
+                        }
+                }
+
                 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
                     != 0) {
                         zerror(zlogp, B_FALSE, "%s failed",
                             "nvlist_add_nvlist_array");
                         goto out;
@@ -3706,21 +3791,15 @@
 {
         int             error = -1;
         zfs_handle_t    *zhp;
         libzfs_handle_t *hdl;
         m_label_t       ds_sl;
-        char            zonepath[MAXPATHLEN];
         char            ds_hexsl[MAXNAMELEN];
 
         if (!is_system_labeled())
                 return (0);
 
-        if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
-                zerror(zlogp, B_TRUE, "unable to determine zone path");
-                return (-1);
-        }
-
         if (!is_zonepath_zfs(zonepath))
                 return (0);
 
         if ((hdl = libzfs_init()) == NULL) {
                 zerror(zlogp, B_FALSE, "opening ZFS library");
@@ -4387,19 +4466,17 @@
         }
         return (B_FALSE);
 }
 
 /*
- * Set memory cap and pool info for the zone's resource management
- * configuration.
+ * Set pool info for the zone's resource management configuration.
  */
 static int
 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
 {
         int res;
         uint64_t tmp;
-        struct zone_mcaptab mcap;
         char sched[MAXNAMELEN];
         zone_dochandle_t handle = NULL;
         char pool_err[128];
 
         if ((handle = zonecfg_init_handle()) == NULL) {
@@ -4411,42 +4488,46 @@
                 zerror(zlogp, B_FALSE, "invalid configuration");
                 zonecfg_fini_handle(handle);
                 return (res);
         }
 
-        /*
-         * If a memory cap is configured, set the cap in the kernel using
-         * zone_setattr() and make sure the rcapd SMF service is enabled.
-         */
-        if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
-                uint64_t num;
-                char smf_err[128];
-
-                num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
-                if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
-                        zerror(zlogp, B_TRUE, "could not set zone memory cap");
-                        zonecfg_fini_handle(handle);
-                        return (Z_INVAL);
-                }
-
-                if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
-                        zerror(zlogp, B_FALSE, "enabling system/rcap service "
-                            "failed: %s", smf_err);
-                        zonecfg_fini_handle(handle);
-                        return (Z_INVAL);
-                }
-        }
-
         /* Get the scheduling class set in the zone configuration. */
         if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
             strlen(sched) > 0) {
                 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
                     strlen(sched)) == -1)
                         zerror(zlogp, B_TRUE, "WARNING: unable to set the "
                             "default scheduling class");
 
-        } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
+                if (strcmp(sched, "FX") == 0) {
+                        /*
+                         * When FX is specified then by default all processes
+                         * will start at the lowest priority level (0) and
+                         * stay there. We support an optional attr which
+                         * indicates that all the processes should be "high
+                         * priority". We set this on the zone so that starting
+                         * init will set the priority high.
+                         */
+                        struct zone_attrtab a;
+
+                        bzero(&a, sizeof (a));
+                        (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
+                            sizeof (a.zone_attr_name));
+
+                        if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+                            strcmp(a.zone_attr_value, "true") == 0) {
+                                boolean_t hi = B_TRUE;
+
+                                if (zone_setattr(zoneid,
+                                    ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
+                                    sizeof (hi)) == -1)
+                                        zerror(zlogp, B_TRUE, "WARNING: unable "
+                                            "to set high priority");
+                        }
+                }
+
+        } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
             == Z_OK) {
                 /*
                  * If the zone has the zone.cpu-shares rctl set then we want to
                  * use the Fair Share Scheduler (FSS) for processes in the
                  * zone.  Check what scheduling class the zone would be running
@@ -4989,11 +5070,11 @@
 }
 
 int
 vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
 {
-        char zonepath[MAXPATHLEN];
+        char zpath[MAXPATHLEN];
 
         if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
                 lofs_discard_mnttab();
                 return (-1);
         }
@@ -5000,19 +5081,15 @@
 
         /*
          * Before we try to mount filesystems we need to create the
          * attribute backing store for /dev
          */
-        if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
-                lofs_discard_mnttab();
-                return (-1);
-        }
-        resolve_lofs(zlogp, zonepath, sizeof (zonepath));
+        (void) strlcpy(zpath, zonepath, sizeof (zpath));
+        resolve_lofs(zlogp, zpath, sizeof (zpath));
 
         /* Make /dev directory owned by root, grouped sys */
-        if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE,
-            0, 3) != 0) {
+        if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) {
                 lofs_discard_mnttab();
                 return (-1);
         }
 
         if (mount_filesystems(zlogp, mount_cmd) != 0) {
@@ -5124,11 +5201,10 @@
 {
         char *kzone;
         zoneid_t zoneid;
         int res;
         char pool_err[128];
-        char zpath[MAXPATHLEN];
         char cmdbuf[MAXPATHLEN];
         brand_handle_t bh = NULL;
         dladm_status_t status;
         char errmsg[DLADM_STRSIZE];
         ushort_t flags;
@@ -5180,16 +5256,10 @@
         if (zone_shutdown(zoneid) != 0) {
                 zerror(zlogp, B_TRUE, "unable to shutdown zone");
                 goto error;
         }
 
-        /* Get the zonepath of this zone */
-        if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
-                zerror(zlogp, B_FALSE, "unable to determine zone path");
-                goto error;
-        }
-
         /* Get a handle to the brand info for this zone */
         if ((bh = brand_open(brand_name)) == NULL) {
                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
                 return (-1);
         }
@@ -5196,11 +5266,11 @@
         /*
          * If there is a brand 'halt' callback, execute it now to give the
          * brand a chance to cleanup any custom configuration.
          */
         (void) strcpy(cmdbuf, EXEC_PREFIX);
-        if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+        if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
             sizeof (cmdbuf) - EXEC_LEN) < 0) {
                 brand_close(bh);
                 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
                     "halt callback.");
                 goto error;