4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, Joyent Inc. All rights reserved.
  25  * Copyright (c) 2015 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  */
  31 
  32 /*
  33  * This module contains functions used to bring up and tear down the
  34  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
  35  * interfaces, [un]configuring devices, establishing resource controls,
  36  * and creating/destroying the zone in the kernel.  These actions, on
  37  * the way up, ready the zone; on the way down, they halt the zone.
  38  * See the much longer block comment at the beginning of zoneadmd.c
  39  * for a bigger picture of how the whole program functions.
  40  *
  41  * This module also has primary responsibility for the layout of "scratch
  42  * zones."  These are mounted, but inactive, zones that are used during
  43  * operating system upgrade and potentially other administrative action.  The
  44  * scratch zone environment is similar to the miniroot environment.  The zone's
  45  * actual root is mounted read-write on /a, and the standard paths (/usr,
 
 
 119 #include <libbrand.h>
 120 #include <sys/brand.h>
 121 #include <libzonecfg.h>
 122 #include <synch.h>
 123 
 124 #include "zoneadmd.h"
 125 #include <tsol/label.h>
 126 #include <libtsnet.h>
 127 #include <sys/priv.h>
 128 #include <libinetutil.h>
 129 
 130 #define V4_ADDR_LEN     32
 131 #define V6_ADDR_LEN     128
 132 
 133 #define RESOURCE_DEFAULT_OPTS \
 134         MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
 135 
 136 #define DFSTYPES        "/etc/dfs/fstypes"
 137 #define MAXTNZLEN       2048
 138 
 139 #define ALT_MOUNT(mount_cmd)    ((mount_cmd) != Z_MNT_BOOT)
 140 
 141 /* a reasonable estimate for the number of lwps per process */
 142 #define LWPS_PER_PROCESS        10
 143 
 144 /* for routing socket */
 145 static int rts_seqno = 0;
 146 
 147 /* mangled zone name when mounting in an alternate root environment */
 148 static char kernzone[ZONENAME_MAX];
 149 
 150 /* array of cached mount entries for resolve_lofs */
 151 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
 152 
 153 /* for Trusted Extensions */
 154 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
 155 static int tsol_mounts(zlog_t *, char *, char *);
 156 static void tsol_unmounts(zlog_t *, char *);
 157 
 158 static m_label_t *zlabel = NULL;
 159 static m_label_t *zid_label = NULL;
 160 static priv_set_t *zprivs = NULL;
 161 
 162 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
 163 
 164 /* from libsocket, not in any header file */
 165 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
 166 
 167 /* from zoneadmd */
 168 extern char query_hook[];
 169 
 170 /*
 171  * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
 172  * node in a linked list that is sorted by linkid.  The list is constructed as
 173  * the xml configuration file is parsed, and the information
 174  * contained in each node is added to the kernel before the zone is
 175  * booted, to be retrieved and applied from within the exclusive-IP NGZ
 176  * on boot.
 177  */
 178 typedef struct zone_addr_list {
 179         struct zone_addr_list *za_next;
 180         datalink_id_t za_linkid;        /* datalink_id_t of interface */
 181         struct zone_nwiftab za_nwiftab; /* address, defrouter properties */
 182 } zone_addr_list_t;
 183 
 
1133                 break;
1134         }
1135 
1136         if (brand_platform_iter_devices(bh, zone_name,
1137             mount_one_dev_device_cb, prof, curr_iptype) != 0) {
1138                 zerror(zlogp, B_TRUE, "failed to add standard device");
1139                 goto cleanup;
1140         }
1141 
1142         if (brand_platform_iter_link(bh,
1143             mount_one_dev_symlink_cb, prof) != 0) {
1144                 zerror(zlogp, B_TRUE, "failed to add standard symlink");
1145                 goto cleanup;
1146         }
1147 
1148         /* Add user-specified devices and directories */
1149         if ((handle = zonecfg_init_handle()) == NULL) {
1150                 zerror(zlogp, B_FALSE, "can't initialize zone handle");
1151                 goto cleanup;
1152         }
1153         if (err = zonecfg_get_handle(zone_name, handle)) {
1154                 zerror(zlogp, B_FALSE, "can't get handle for zone "
1155                     "%s: %s", zone_name, zonecfg_strerror(err));
1156                 goto cleanup;
1157         }
1158         if (err = zonecfg_setdevent(handle)) {
1159                 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
1160                     zonecfg_strerror(err));
1161                 goto cleanup;
1162         }
1163         while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
1164                 if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
1165                         zerror(zlogp, B_TRUE, "failed to add "
1166                             "user-specified device");
1167                         goto cleanup;
1168                 }
1169         }
1170         (void) zonecfg_enddevent(handle);
1171 
1172         /* Send profile to kernel */
1173         if (di_prof_commit(prof)) {
1174                 zerror(zlogp, B_TRUE, "failed to commit profile");
1175                 goto cleanup;
1176         }
1177 
1178         retval = 0;
 
1654                  * for non-lofs mounts since they will have a device
1655                  * as a backing store and device paths must always be
1656                  * specified relative to the current boot environment.
1657                  */
1658                 fsp->zone_fs_special[0] = '\0';
1659                 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
1660                         (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
1661                             sizeof (fsp->zone_fs_special));
1662                 }
1663                 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
1664                     sizeof (fsp->zone_fs_special));
1665         }
1666         (void) zonecfg_endfsent(handle);
1667         return (0);
1668 }
1669 
1670 static int
1671 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
1672 {
1673         char rootpath[MAXPATHLEN];
1674         char zonepath[MAXPATHLEN];
1675         char brand[MAXNAMELEN];
1676         char luroot[MAXPATHLEN];
1677         int i, num_fs = 0;
1678         struct zone_fstab *fs_ptr = NULL;
1679         zone_dochandle_t handle = NULL;
1680         zone_state_t zstate;
1681         brand_handle_t bh;
1682         plat_gmount_cb_data_t cb;
1683 
1684         if (zone_get_state(zone_name, &zstate) != Z_OK ||
1685             (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1686                 zerror(zlogp, B_FALSE,
1687                     "zone must be in '%s' or '%s' state to mount file-systems",
1688                     zone_state_str(ZONE_STATE_READY),
1689                     zone_state_str(ZONE_STATE_MOUNTED));
1690                 goto bad;
1691         }
1692 
1693         if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
1694                 zerror(zlogp, B_TRUE, "unable to determine zone path");
1695                 goto bad;
1696         }
1697 
1698         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1699                 zerror(zlogp, B_TRUE, "unable to determine zone root");
1700                 goto bad;
1701         }
1702 
1703         if ((handle = zonecfg_init_handle()) == NULL) {
1704                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
1705                 goto bad;
1706         }
1707         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
1708             zonecfg_setfsent(handle) != Z_OK) {
1709                 zerror(zlogp, B_FALSE, "invalid configuration");
1710                 goto bad;
1711         }
1712 
1713         /*
1714          * If we are mounting the zone, then we must always use the default
1715          * brand global mounts.
1716          */
1717         if (ALT_MOUNT(mount_cmd)) {
 
1778          * we need <zoneroot>/lu/dev to be the /dev filesystem
1779          * for the zone and we don't want to have any /dev filesystem
1780          * mounted at <zoneroot>/lu/a/dev.  Since /dev is specified
1781          * as a normal zone filesystem by default we'll try to mount
1782          * it at <zoneroot>/lu/a/dev, so we have to detect this
1783          * case and instead mount it at <zoneroot>/lu/dev.
1784          *
1785          * All this work is done in three phases:
1786          *   1) Create and populate lu directory (build_mounted_pre_var()).
1787          *   2) Mount the required filesystems as per the zone configuration.
1788          *   3) Set up the rest of the scratch zone environment
1789          *      (build_mounted_post_var()).
1790          */
1791         if (ALT_MOUNT(mount_cmd) && !build_mounted_pre_var(zlogp,
1792             rootpath, sizeof (rootpath), zonepath, luroot, sizeof (luroot)))
1793                 goto bad;
1794 
1795         qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
1796 
1797         for (i = 0; i < num_fs; i++) {
1798                 if (ALT_MOUNT(mount_cmd) &&
1799                     strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
1800                         size_t slen = strlen(rootpath) - 2;
1801 
1802                         /*
1803                          * By default we'll try to mount /dev as /a/dev
1804                          * but /dev is special and always goes at the top
1805                          * so strip the trailing '/a' from the rootpath.
1806                          */
1807                         assert(strcmp(&rootpath[slen], "/a") == 0);
1808                         rootpath[slen] = '\0';
1809                         if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd)
1810                             != 0)
1811                                 goto bad;
1812                         rootpath[slen] = '/';
1813                         continue;
1814                 }
1815                 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
1816                         goto bad;
1817         }
1818         if (ALT_MOUNT(mount_cmd) &&
1819             !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
1820                 goto bad;
1821 
1822         /*
1823          * For Trusted Extensions cross-mount each lower level /export/home
1824          */
1825         if (mount_cmd == Z_MNT_BOOT &&
1826             tsol_mounts(zlogp, zone_name, rootpath) != 0)
1827                 goto bad;
1828 
1829         free_fs_data(fs_ptr, num_fs);
1830 
1831         /*
1832          * Everything looks fine.
1833          */
1834         return (0);
 
2871         }
2872         /* insert new after ptr */
2873         new->za_next = next;
2874         ptr->za_next = new;
2875         return (old);
2876 }
2877 
2878 void
2879 free_ip_interface(zone_addr_list_t *zalist)
2880 {
2881         zone_addr_list_t *ptr, *new;
2882 
2883         for (ptr = zalist; ptr != NULL; ) {
2884                 new = ptr;
2885                 ptr = ptr->za_next;
2886                 free(new);
2887         }
2888 }
2889 
2890 /*
2891  * Add the kernel access control information for the interface names.
2892  * If anything goes wrong, we log a general error message, attempt to tear down
2893  * whatever we set up, and return an error.
2894  */
2895 static int
2896 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
2897 {
2898         zone_dochandle_t handle;
2899         struct zone_nwiftab nwiftab;
2900         char rootpath[MAXPATHLEN];
2901         char path[MAXPATHLEN];
2902         datalink_id_t linkid;
2903         di_prof_t prof = NULL;
2904         boolean_t added = B_FALSE;
2905         zone_addr_list_t *zalist = NULL, *new;
2906 
2907         if ((handle = zonecfg_init_handle()) == NULL) {
2908                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
2909                 return (-1);
2910         }
 
2916 
2917         if (zonecfg_setnwifent(handle) != Z_OK) {
2918                 zonecfg_fini_handle(handle);
2919                 return (0);
2920         }
2921 
2922         for (;;) {
2923                 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2924                         break;
2925 
2926                 if (prof == NULL) {
2927                         if (zone_get_devroot(zone_name, rootpath,
2928                             sizeof (rootpath)) != Z_OK) {
2929                                 (void) zonecfg_endnwifent(handle);
2930                                 zonecfg_fini_handle(handle);
2931                                 zerror(zlogp, B_TRUE,
2932                                     "unable to determine dev root");
2933                                 return (-1);
2934                         }
2935                         (void) snprintf(path, sizeof (path), "%s%s", rootpath,
2936                             "/dev");
2937                         if (di_prof_init(path, &prof) != 0) {
2938                                 (void) zonecfg_endnwifent(handle);
2939                                 zonecfg_fini_handle(handle);
2940                                 zerror(zlogp, B_TRUE,
2941                                     "failed to initialize profile");
2942                                 return (-1);
2943                         }
2944                 }
2945 
2946                 /*
2947                  * Create the /dev entry for backward compatibility.
2948                  * Only create the /dev entry if it's not in use.
2949                  * Note that the zone still boots when the assigned
2950                  * interface is inaccessible, used by others, etc.
2951                  * Also, when vanity naming is used, some interface do
2952                  * do not have corresponding /dev node names (for example,
2953                  * vanity named aggregations).  The /dev entry is not
2954                  * created in that case.  The /dev/net entry is always
2955                  * accessible.
2956                  */
 
3300                 break;
3301         case Z_PRIV_REQUIRED:
3302                 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
3303                     "from the zone's privilege set", privname);
3304                 break;
3305         case Z_PRIV_UNKNOWN:
3306                 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
3307                     "in the zone's privilege set", privname);
3308                 break;
3309         default:
3310                 zerror(zlogp, B_FALSE, "failed to determine the zone's "
3311                     "privilege set");
3312                 break;
3313         }
3314 
3315         free(privname);
3316         zonecfg_fini_handle(handle);
3317         return (error);
3318 }
3319 
3320 static int
3321 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3322 {
3323         nvlist_t *nvl = NULL;
3324         char *nvl_packed = NULL;
3325         size_t nvl_size = 0;
3326         nvlist_t **nvlv = NULL;
3327         int rctlcount = 0;
3328         int error = -1;
3329         zone_dochandle_t handle;
3330         struct zone_rctltab rctltab;
3331         rctlblk_t *rctlblk = NULL;
3332         uint64_t maxlwps;
3333         uint64_t maxprocs;
3334 
3335         *bufp = NULL;
3336         *bufsizep = 0;
3337 
3338         if ((handle = zonecfg_init_handle()) == NULL) {
3339                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3340                 return (-1);
3341         }
3342         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3343                 zerror(zlogp, B_FALSE, "invalid configuration");
3344                 zonecfg_fini_handle(handle);
3345                 return (-1);
3346         }
3347 
3348         rctltab.zone_rctl_valptr = NULL;
3349         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
3350                 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
3351                 goto out;
3352         }
3353 
3354         /*
3355          * Allow the administrator to control both the maximum number of
3356          * process table slots and the maximum number of lwps with just the
3357          * max-processes property.  If only the max-processes property is set,
3358          * we add a max-lwps property with a limit derived from max-processes.
3359          */
3360         if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs)
3361             == Z_OK &&
3362             zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps)
3363             == Z_NO_ENTRY) {
3364                 if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
3365                     maxprocs * LWPS_PER_PROCESS) != Z_OK) {
3366                         zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
3367                         goto out;
3368                 }
3369         }
3370 
3371         if (zonecfg_setrctlent(handle) != Z_OK) {
3372                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
3373                 goto out;
3374         }
3375 
3376         if ((rctlblk = malloc(rctlblk_size())) == NULL) {
3377                 zerror(zlogp, B_TRUE, "memory allocation failed");
3378                 goto out;
3379         }
3380         while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
3381                 struct zone_rctlvaltab *rctlval;
3382                 uint_t i, count;
3383                 const char *name = rctltab.zone_rctl_name;
3384 
3385                 /* zoneadm should have already warned about unknown rctls. */
3386                 if (!zonecfg_is_rctl(name)) {
3387                         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3388                         rctltab.zone_rctl_valptr = NULL;
3389                         continue;
3390                 }
3391                 count = 0;
3392                 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3393                     rctlval = rctlval->zone_rctlval_next) {
3394                         count++;
3395                 }
3396                 if (count == 0) {       /* ignore */
3397                         continue;       /* Nothing to free */
3398                 }
3399                 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
3400                         goto out;
3401                 i = 0;
3402                 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3403                     rctlval = rctlval->zone_rctlval_next, i++) {
 
3430                                 zerror(zlogp, B_FALSE, "%s failed",
3431                                     "nvlist_add_uint64");
3432                                 goto out;
3433                         }
3434                         if (nvlist_add_uint64(nvlv[i], "limit",
3435                             rctlblk_get_value(rctlblk)) != 0) {
3436                                 zerror(zlogp, B_FALSE, "%s failed",
3437                                     "nvlist_add_uint64");
3438                                 goto out;
3439                         }
3440                         if (nvlist_add_uint64(nvlv[i], "action",
3441                             (uint_t)rctlblk_get_local_action(rctlblk, NULL))
3442                             != 0) {
3443                                 zerror(zlogp, B_FALSE, "%s failed",
3444                                     "nvlist_add_uint64");
3445                                 goto out;
3446                         }
3447                 }
3448                 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3449                 rctltab.zone_rctl_valptr = NULL;
3450                 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
3451                     != 0) {
3452                         zerror(zlogp, B_FALSE, "%s failed",
3453                             "nvlist_add_nvlist_array");
3454                         goto out;
3455                 }
3456                 for (i = 0; i < count; i++)
3457                         nvlist_free(nvlv[i]);
3458                 free(nvlv);
3459                 nvlv = NULL;
3460                 rctlcount++;
3461         }
3462         (void) zonecfg_endrctlent(handle);
3463 
3464         if (rctlcount == 0) {
3465                 error = 0;
3466                 goto out;
3467         }
3468         if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
3469             != 0) {
 
3691 
3692         if (buf1.f_fsid == buf2.f_fsid)
3693                 return (B_FALSE);
3694 
3695         return (B_TRUE);
3696 }
3697 
3698 /*
3699  * Verify the MAC label in the root dataset for the zone.
3700  * If the label exists, it must match the label configured for the zone.
3701  * Otherwise if there's no label on the dataset, create one here.
3702  */
3703 
3704 static int
3705 validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl)
3706 {
3707         int             error = -1;
3708         zfs_handle_t    *zhp;
3709         libzfs_handle_t *hdl;
3710         m_label_t       ds_sl;
3711         char            zonepath[MAXPATHLEN];
3712         char            ds_hexsl[MAXNAMELEN];
3713 
3714         if (!is_system_labeled())
3715                 return (0);
3716 
3717         if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
3718                 zerror(zlogp, B_TRUE, "unable to determine zone path");
3719                 return (-1);
3720         }
3721 
3722         if (!is_zonepath_zfs(zonepath))
3723                 return (0);
3724 
3725         if ((hdl = libzfs_init()) == NULL) {
3726                 zerror(zlogp, B_FALSE, "opening ZFS library");
3727                 return (-1);
3728         }
3729 
3730         if ((zhp = zfs_path_to_zhandle(hdl, rootpath,
3731             ZFS_TYPE_FILESYSTEM)) == NULL) {
3732                 zerror(zlogp, B_FALSE, "cannot open ZFS dataset for path '%s'",
3733                     rootpath);
3734                 libzfs_fini(hdl);
3735                 return (-1);
3736         }
3737 
3738         /* Get the mlslabel property if it exists. */
3739         if ((zfs_prop_get(zhp, ZFS_PROP_MLSLABEL, ds_hexsl, MAXNAMELEN,
3740             NULL, NULL, 0, B_TRUE) != 0) ||
3741             (strcmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)) {
 
4372         if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
4373                 return (B_TRUE);
4374         for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
4375                 if (mnp->mnt_fstype == NULL ||
4376                     strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
4377                         continue;
4378                 /* We're looking at a loopback mount.  Stat it. */
4379                 if (mnp->mnt_special != NULL &&
4380                     stat64(mnp->mnt_special, &zst) != -1 &&
4381                     rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
4382                         zerror(zlogp, B_FALSE,
4383                             "zone root %s is reachable through %s",
4384                             rootpath, mnp->mnt_mountp);
4385                         return (B_TRUE);
4386                 }
4387         }
4388         return (B_FALSE);
4389 }
4390 
4391 /*
4392  * Set memory cap and pool info for the zone's resource management
4393  * configuration.
4394  */
4395 static int
4396 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
4397 {
4398         int res;
4399         uint64_t tmp;
4400         struct zone_mcaptab mcap;
4401         char sched[MAXNAMELEN];
4402         zone_dochandle_t handle = NULL;
4403         char pool_err[128];
4404 
4405         if ((handle = zonecfg_init_handle()) == NULL) {
4406                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
4407                 return (Z_BAD_HANDLE);
4408         }
4409 
4410         if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
4411                 zerror(zlogp, B_FALSE, "invalid configuration");
4412                 zonecfg_fini_handle(handle);
4413                 return (res);
4414         }
4415 
4416         /*
4417          * If a memory cap is configured, set the cap in the kernel using
4418          * zone_setattr() and make sure the rcapd SMF service is enabled.
4419          */
4420         if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
4421                 uint64_t num;
4422                 char smf_err[128];
4423 
4424                 num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
4425                 if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
4426                         zerror(zlogp, B_TRUE, "could not set zone memory cap");
4427                         zonecfg_fini_handle(handle);
4428                         return (Z_INVAL);
4429                 }
4430 
4431                 if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
4432                         zerror(zlogp, B_FALSE, "enabling system/rcap service "
4433                             "failed: %s", smf_err);
4434                         zonecfg_fini_handle(handle);
4435                         return (Z_INVAL);
4436                 }
4437         }
4438 
4439         /* Get the scheduling class set in the zone configuration. */
4440         if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
4441             strlen(sched) > 0) {
4442                 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
4443                     strlen(sched)) == -1)
4444                         zerror(zlogp, B_TRUE, "WARNING: unable to set the "
4445                             "default scheduling class");
4446 
4447         } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
4448             == Z_OK) {
4449                 /*
4450                  * If the zone has the zone.cpu-shares rctl set then we want to
4451                  * use the Fair Share Scheduler (FSS) for processes in the
4452                  * zone.  Check what scheduling class the zone would be running
4453                  * in by default so we can print a warning and modify the class
4454                  * if we wouldn't be using FSS.
4455                  */
4456                 char class_name[PC_CLNMSZ];
4457 
4458                 if (zonecfg_get_dflt_sched_class(handle, class_name,
4459                     sizeof (class_name)) != Z_OK) {
4460                         zerror(zlogp, B_FALSE, "WARNING: unable to determine "
4461                             "the zone's scheduling class");
4462 
4463                 } else if (strcmp("FSS", class_name) != 0) {
4464                         zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
4465                             "rctl is set but\nFSS is not the default "
4466                             "scheduling class for\nthis zone.  FSS will be "
4467                             "used for processes\nin the zone but to get the "
 
4974                 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
4975                     ZONE_INDEX_MODE);
4976                 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
4977                         (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
4978                         if (uuid_is_null(zep->zone_uuid))
4979                                 uuidstr[0] = '\0';
4980                         else
4981                                 uuid_unparse(zep->zone_uuid, uuidstr);
4982                         (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
4983                             zone_state_str(zep->zone_state),
4984                             uuidstr);
4985                         (void) fclose(zet);
4986                 }
4987         }
4988         _exit(0);
4989 }
4990 
4991 int
4992 vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
4993 {
4994         char zonepath[MAXPATHLEN];
4995 
4996         if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
4997                 lofs_discard_mnttab();
4998                 return (-1);
4999         }
5000 
5001         /*
5002          * Before we try to mount filesystems we need to create the
5003          * attribute backing store for /dev
5004          */
5005         if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
5006                 lofs_discard_mnttab();
5007                 return (-1);
5008         }
5009         resolve_lofs(zlogp, zonepath, sizeof (zonepath));
5010 
5011         /* Make /dev directory owned by root, grouped sys */
5012         if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE,
5013             0, 3) != 0) {
5014                 lofs_discard_mnttab();
5015                 return (-1);
5016         }
5017 
5018         if (mount_filesystems(zlogp, mount_cmd) != 0) {
5019                 lofs_discard_mnttab();
5020                 return (-1);
5021         }
5022 
5023         if (mount_cmd == Z_MNT_BOOT) {
5024                 zone_iptype_t iptype;
5025 
5026                 if (vplat_get_iptype(zlogp, &iptype) < 0) {
5027                         zerror(zlogp, B_TRUE, "unable to determine ip-type");
5028                         lofs_discard_mnttab();
5029                         return (-1);
5030                 }
5031 
5032                 switch (iptype) {
5033                 case ZS_SHARED:
 
5109                 if (zonecfg_lock_scratch(fp) != 0)
5110                         zerror(zlogp, B_TRUE, "cannot lock mapfile");
5111                 else if (zonecfg_delete_scratch(fp, kernzone) != 0)
5112                         zerror(zlogp, B_TRUE, "cannot delete map entry");
5113                 else
5114                         retv = 0;
5115                 zonecfg_close_scratch(fp);
5116                 return (retv);
5117         } else {
5118                 return (0);
5119         }
5120 }
5121 
5122 int
5123 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
5124 {
5125         char *kzone;
5126         zoneid_t zoneid;
5127         int res;
5128         char pool_err[128];
5129         char zpath[MAXPATHLEN];
5130         char cmdbuf[MAXPATHLEN];
5131         brand_handle_t bh = NULL;
5132         dladm_status_t status;
5133         char errmsg[DLADM_STRSIZE];
5134         ushort_t flags;
5135 
5136         kzone = zone_name;
5137         if (zonecfg_in_alt_root()) {
5138                 FILE *fp;
5139 
5140                 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5141                         zerror(zlogp, B_TRUE, "unable to open map file");
5142                         goto error;
5143                 }
5144                 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
5145                     kernzone, sizeof (kernzone)) != 0) {
5146                         zerror(zlogp, B_FALSE, "unable to find scratch zone");
5147                         zonecfg_close_scratch(fp);
5148                         goto error;
5149                 }
 
5165         }
5166 
5167         if (remove_datalink_protect(zlogp, zoneid) != 0) {
5168                 zerror(zlogp, B_FALSE,
5169                     "unable clear datalink protect property");
5170                 goto error;
5171         }
5172 
5173         /*
5174          * The datalinks assigned to the zone will be removed from the NGZ as
5175          * part of zone_shutdown() so that we need to remove protect/pool etc.
5176          * before zone_shutdown(). Even if the shutdown itself fails, the zone
5177          * will not be able to violate any constraints applied because the
5178          * datalinks are no longer available to the zone.
5179          */
5180         if (zone_shutdown(zoneid) != 0) {
5181                 zerror(zlogp, B_TRUE, "unable to shutdown zone");
5182                 goto error;
5183         }
5184 
5185         /* Get the zonepath of this zone */
5186         if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
5187                 zerror(zlogp, B_FALSE, "unable to determine zone path");
5188                 goto error;
5189         }
5190 
5191         /* Get a handle to the brand info for this zone */
5192         if ((bh = brand_open(brand_name)) == NULL) {
5193                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
5194                 return (-1);
5195         }
5196         /*
5197          * If there is a brand 'halt' callback, execute it now to give the
5198          * brand a chance to cleanup any custom configuration.
5199          */
5200         (void) strcpy(cmdbuf, EXEC_PREFIX);
5201         if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
5202             sizeof (cmdbuf) - EXEC_LEN) < 0) {
5203                 brand_close(bh);
5204                 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
5205                     "halt callback.");
5206                 goto error;
5207         }
5208         brand_close(bh);
5209 
5210         if ((strlen(cmdbuf) > EXEC_LEN) &&
5211             (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
5212                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
5213                 goto error;
5214         }
5215 
5216         if (!unmount_cmd) {
5217                 zone_iptype_t iptype;
5218 
5219                 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags,
5220                     sizeof (flags)) < 0) {
5221                         if (vplat_get_iptype(zlogp, &iptype) < 0) {
 
 | 
 
 
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2015 by Delphix. All rights reserved.
  25  * Copyright 2016, Joyent Inc.
  26  */
  27 
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  */
  31 
  32 /*
  33  * This module contains functions used to bring up and tear down the
  34  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
  35  * interfaces, [un]configuring devices, establishing resource controls,
  36  * and creating/destroying the zone in the kernel.  These actions, on
  37  * the way up, ready the zone; on the way down, they halt the zone.
  38  * See the much longer block comment at the beginning of zoneadmd.c
  39  * for a bigger picture of how the whole program functions.
  40  *
  41  * This module also has primary responsibility for the layout of "scratch
  42  * zones."  These are mounted, but inactive, zones that are used during
  43  * operating system upgrade and potentially other administrative action.  The
  44  * scratch zone environment is similar to the miniroot environment.  The zone's
  45  * actual root is mounted read-write on /a, and the standard paths (/usr,
 
 
 119 #include <libbrand.h>
 120 #include <sys/brand.h>
 121 #include <libzonecfg.h>
 122 #include <synch.h>
 123 
 124 #include "zoneadmd.h"
 125 #include <tsol/label.h>
 126 #include <libtsnet.h>
 127 #include <sys/priv.h>
 128 #include <libinetutil.h>
 129 
 130 #define V4_ADDR_LEN     32
 131 #define V6_ADDR_LEN     128
 132 
 133 #define RESOURCE_DEFAULT_OPTS \
 134         MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
 135 
 136 #define DFSTYPES        "/etc/dfs/fstypes"
 137 #define MAXTNZLEN       2048
 138 
 139 /* a reasonable estimate for the number of lwps per process */
 140 #define LWPS_PER_PROCESS        10
 141 
 142 /* for routing socket */
 143 static int rts_seqno = 0;
 144 
 145 /* mangled zone name when mounting in an alternate root environment */
 146 static char kernzone[ZONENAME_MAX];
 147 
 148 /* array of cached mount entries for resolve_lofs */
 149 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
 150 
 151 /* for Trusted Extensions */
 152 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
 153 static int tsol_mounts(zlog_t *, char *, char *);
 154 static void tsol_unmounts(zlog_t *, char *);
 155 
 156 static m_label_t *zlabel = NULL;
 157 static m_label_t *zid_label = NULL;
 158 static priv_set_t *zprivs = NULL;
 159 
 160 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
 161 
 162 typedef struct zone_proj_rctl_map {
 163         char *zpr_zone_rctl;
 164         char *zpr_project_rctl;
 165 } zone_proj_rctl_map_t;
 166 
 167 static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
 168         {"zone.max-msg-ids",    "project.max-msg-ids"},
 169         {"zone.max-sem-ids",    "project.max-sem-ids"},
 170         {"zone.max-shm-ids",    "project.max-shm-ids"},
 171         {"zone.max-shm-memory", "project.max-shm-memory"},
 172         {NULL,                  NULL}
 173 };
 174 
 175 /* from libsocket, not in any header file */
 176 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
 177 
 178 /* from zoneadmd */
 179 extern char query_hook[];
 180 
 181 /*
 182  * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
 183  * node in a linked list that is sorted by linkid.  The list is constructed as
 184  * the xml configuration file is parsed, and the information
 185  * contained in each node is added to the kernel before the zone is
 186  * booted, to be retrieved and applied from within the exclusive-IP NGZ
 187  * on boot.
 188  */
 189 typedef struct zone_addr_list {
 190         struct zone_addr_list *za_next;
 191         datalink_id_t za_linkid;        /* datalink_id_t of interface */
 192         struct zone_nwiftab za_nwiftab; /* address, defrouter properties */
 193 } zone_addr_list_t;
 194 
 
1144                 break;
1145         }
1146 
1147         if (brand_platform_iter_devices(bh, zone_name,
1148             mount_one_dev_device_cb, prof, curr_iptype) != 0) {
1149                 zerror(zlogp, B_TRUE, "failed to add standard device");
1150                 goto cleanup;
1151         }
1152 
1153         if (brand_platform_iter_link(bh,
1154             mount_one_dev_symlink_cb, prof) != 0) {
1155                 zerror(zlogp, B_TRUE, "failed to add standard symlink");
1156                 goto cleanup;
1157         }
1158 
1159         /* Add user-specified devices and directories */
1160         if ((handle = zonecfg_init_handle()) == NULL) {
1161                 zerror(zlogp, B_FALSE, "can't initialize zone handle");
1162                 goto cleanup;
1163         }
1164         if ((err = zonecfg_get_handle(zone_name, handle)) != 0) {
1165                 zerror(zlogp, B_FALSE, "can't get handle for zone "
1166                     "%s: %s", zone_name, zonecfg_strerror(err));
1167                 goto cleanup;
1168         }
1169         if ((err = zonecfg_setdevent(handle)) != 0) {
1170                 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
1171                     zonecfg_strerror(err));
1172                 goto cleanup;
1173         }
1174         while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
1175                 if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
1176                         zerror(zlogp, B_TRUE, "failed to add "
1177                             "user-specified device");
1178                         goto cleanup;
1179                 }
1180         }
1181         (void) zonecfg_enddevent(handle);
1182 
1183         /* Send profile to kernel */
1184         if (di_prof_commit(prof)) {
1185                 zerror(zlogp, B_TRUE, "failed to commit profile");
1186                 goto cleanup;
1187         }
1188 
1189         retval = 0;
 
1665                  * for non-lofs mounts since they will have a device
1666                  * as a backing store and device paths must always be
1667                  * specified relative to the current boot environment.
1668                  */
1669                 fsp->zone_fs_special[0] = '\0';
1670                 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
1671                         (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
1672                             sizeof (fsp->zone_fs_special));
1673                 }
1674                 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
1675                     sizeof (fsp->zone_fs_special));
1676         }
1677         (void) zonecfg_endfsent(handle);
1678         return (0);
1679 }
1680 
1681 static int
1682 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
1683 {
1684         char rootpath[MAXPATHLEN];
1685         char brand[MAXNAMELEN];
1686         char luroot[MAXPATHLEN];
1687         int i, num_fs = 0;
1688         struct zone_fstab *fs_ptr = NULL;
1689         zone_dochandle_t handle = NULL;
1690         zone_state_t zstate;
1691         brand_handle_t bh;
1692         plat_gmount_cb_data_t cb;
1693 
1694         if (zone_get_state(zone_name, &zstate) != Z_OK ||
1695             (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1696                 zerror(zlogp, B_FALSE,
1697                     "zone must be in '%s' or '%s' state to mount file-systems",
1698                     zone_state_str(ZONE_STATE_READY),
1699                     zone_state_str(ZONE_STATE_MOUNTED));
1700                 goto bad;
1701         }
1702 
1703         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1704                 zerror(zlogp, B_TRUE, "unable to determine zone root");
1705                 goto bad;
1706         }
1707 
1708         if ((handle = zonecfg_init_handle()) == NULL) {
1709                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
1710                 goto bad;
1711         }
1712         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
1713             zonecfg_setfsent(handle) != Z_OK) {
1714                 zerror(zlogp, B_FALSE, "invalid configuration");
1715                 goto bad;
1716         }
1717 
1718         /*
1719          * If we are mounting the zone, then we must always use the default
1720          * brand global mounts.
1721          */
1722         if (ALT_MOUNT(mount_cmd)) {
 
1783          * we need <zoneroot>/lu/dev to be the /dev filesystem
1784          * for the zone and we don't want to have any /dev filesystem
1785          * mounted at <zoneroot>/lu/a/dev.  Since /dev is specified
1786          * as a normal zone filesystem by default we'll try to mount
1787          * it at <zoneroot>/lu/a/dev, so we have to detect this
1788          * case and instead mount it at <zoneroot>/lu/dev.
1789          *
1790          * All this work is done in three phases:
1791          *   1) Create and populate lu directory (build_mounted_pre_var()).
1792          *   2) Mount the required filesystems as per the zone configuration.
1793          *   3) Set up the rest of the scratch zone environment
1794          *      (build_mounted_post_var()).
1795          */
1796         if (ALT_MOUNT(mount_cmd) && !build_mounted_pre_var(zlogp,
1797             rootpath, sizeof (rootpath), zonepath, luroot, sizeof (luroot)))
1798                 goto bad;
1799 
1800         qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
1801 
1802         for (i = 0; i < num_fs; i++) {
1803                 if (ALT_MOUNT(mount_cmd)) {
1804                         if (strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
1805                                 size_t slen = strlen(rootpath) - 2;
1806 
1807                                 /*
1808                                  * By default we'll try to mount /dev
1809                                  * as /a/dev but /dev is special and
1810                                  * always goes at the top so strip the
1811                                  * trailing '/a' from the rootpath.
1812                                  */
1813                                 assert(strcmp(&rootpath[slen], "/a") == 0);
1814                                 rootpath[slen] = '\0';
1815                                 if (mount_one(zlogp, &fs_ptr[i], rootpath,
1816                                     mount_cmd) != 0)
1817                                         goto bad;
1818                                 rootpath[slen] = '/';
1819                                 continue;
1820                         } else if (strcmp(brand_name, default_brand) != 0) {
1821                                 /*
1822                                  * If mounting non-native brand, skip
1823                                  * mounting global mounts and
1824                                  * filesystem entries since they are
1825                                  * only needed for native pkg upgrade
1826                                  * tools.
1827                                  *
1828                                  * The only exception right now is
1829                                  * /dev (handled above), which is
1830                                  * needed in the luroot in order to
1831                                  * zlogin -S into the zone.
1832                                  */
1833                                 continue;
1834                         }
1835                 }
1836 
1837                 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
1838                         goto bad;
1839         }
1840         if (ALT_MOUNT(mount_cmd) &&
1841             !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
1842                 goto bad;
1843 
1844         /*
1845          * For Trusted Extensions cross-mount each lower level /export/home
1846          */
1847         if (mount_cmd == Z_MNT_BOOT &&
1848             tsol_mounts(zlogp, zone_name, rootpath) != 0)
1849                 goto bad;
1850 
1851         free_fs_data(fs_ptr, num_fs);
1852 
1853         /*
1854          * Everything looks fine.
1855          */
1856         return (0);
 
2893         }
2894         /* insert new after ptr */
2895         new->za_next = next;
2896         ptr->za_next = new;
2897         return (old);
2898 }
2899 
2900 void
2901 free_ip_interface(zone_addr_list_t *zalist)
2902 {
2903         zone_addr_list_t *ptr, *new;
2904 
2905         for (ptr = zalist; ptr != NULL; ) {
2906                 new = ptr;
2907                 ptr = ptr->za_next;
2908                 free(new);
2909         }
2910 }
2911 
2912 /*
2913  * For IP networking, we need to use the illumos-native device tree.  For most
2914  * zones, this is $ZONEROOT/dev.  For LX ones, it's $ZONEROOT/native/dev.
2915  * Return the appropriate post-$ZONEROOT path.
2916  */
2917 static char *
2918 get_brand_dev(void)
2919 {
2920         static char *lxpath = "/native/dev";
2921         /* Cheesy hard-coding of strlen("/native") */
2922         char *default_path = lxpath + 7;
2923 
2924         /* LX zones are the exception... */
2925         if (strcmp(brand_name, "lx") == 0)
2926                 return (lxpath);
2927 
2928         return (default_path);
2929 }
2930 
2931 /*
2932  * Add the kernel access control information for the interface names.
2933  * If anything goes wrong, we log a general error message, attempt to tear down
2934  * whatever we set up, and return an error.
2935  */
2936 static int
2937 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
2938 {
2939         zone_dochandle_t handle;
2940         struct zone_nwiftab nwiftab;
2941         char rootpath[MAXPATHLEN];
2942         char path[MAXPATHLEN];
2943         datalink_id_t linkid;
2944         di_prof_t prof = NULL;
2945         boolean_t added = B_FALSE;
2946         zone_addr_list_t *zalist = NULL, *new;
2947 
2948         if ((handle = zonecfg_init_handle()) == NULL) {
2949                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
2950                 return (-1);
2951         }
 
2957 
2958         if (zonecfg_setnwifent(handle) != Z_OK) {
2959                 zonecfg_fini_handle(handle);
2960                 return (0);
2961         }
2962 
2963         for (;;) {
2964                 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2965                         break;
2966 
2967                 if (prof == NULL) {
2968                         if (zone_get_devroot(zone_name, rootpath,
2969                             sizeof (rootpath)) != Z_OK) {
2970                                 (void) zonecfg_endnwifent(handle);
2971                                 zonecfg_fini_handle(handle);
2972                                 zerror(zlogp, B_TRUE,
2973                                     "unable to determine dev root");
2974                                 return (-1);
2975                         }
2976                         (void) snprintf(path, sizeof (path), "%s%s", rootpath,
2977                             get_brand_dev());
2978                         if (di_prof_init(path, &prof) != 0) {
2979                                 (void) zonecfg_endnwifent(handle);
2980                                 zonecfg_fini_handle(handle);
2981                                 zerror(zlogp, B_TRUE,
2982                                     "failed to initialize profile");
2983                                 return (-1);
2984                         }
2985                 }
2986 
2987                 /*
2988                  * Create the /dev entry for backward compatibility.
2989                  * Only create the /dev entry if it's not in use.
2990                  * Note that the zone still boots when the assigned
2991                  * interface is inaccessible, used by others, etc.
2992                  * Also, when vanity naming is used, some interface do
2993                  * do not have corresponding /dev node names (for example,
2994                  * vanity named aggregations).  The /dev entry is not
2995                  * created in that case.  The /dev/net entry is always
2996                  * accessible.
2997                  */
 
3341                 break;
3342         case Z_PRIV_REQUIRED:
3343                 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
3344                     "from the zone's privilege set", privname);
3345                 break;
3346         case Z_PRIV_UNKNOWN:
3347                 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
3348                     "in the zone's privilege set", privname);
3349                 break;
3350         default:
3351                 zerror(zlogp, B_FALSE, "failed to determine the zone's "
3352                     "privilege set");
3353                 break;
3354         }
3355 
3356         free(privname);
3357         zonecfg_fini_handle(handle);
3358         return (error);
3359 }
3360 
3361 static char *
3362 zone_proj_rctl(const char *name)
3363 {
3364         int i;
3365 
3366         for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
3367                 if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
3368                         return (zone_proj_rctl_map[i].zpr_project_rctl);
3369                 }
3370         }
3371         return (NULL);
3372 }
3373 
3374 static int
3375 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3376 {
3377         nvlist_t *nvl = NULL;
3378         char *nvl_packed = NULL;
3379         size_t nvl_size = 0;
3380         nvlist_t **nvlv = NULL;
3381         int rctlcount = 0;
3382         int error = -1;
3383         zone_dochandle_t handle;
3384         struct zone_rctltab rctltab;
3385         rctlblk_t *rctlblk = NULL;
3386         uint64_t maxlwps;
3387         uint64_t maxprocs;
3388         int rproc, rlwp;
3389 
3390         *bufp = NULL;
3391         *bufsizep = 0;
3392 
3393         if ((handle = zonecfg_init_handle()) == NULL) {
3394                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3395                 return (-1);
3396         }
3397         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3398                 zerror(zlogp, B_FALSE, "invalid configuration");
3399                 zonecfg_fini_handle(handle);
3400                 return (-1);
3401         }
3402 
3403         rctltab.zone_rctl_valptr = NULL;
3404         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
3405                 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
3406                 goto out;
3407         }
3408 
3409         /*
3410          * Allow the administrator to control both the maximum number of
3411          * process table slots, and the maximum number of lwps, with a single
3412          * max-processes or max-lwps property. If only the max-processes
3413          * property is set, we add a max-lwps property with a limit derived
3414          * from max-processes. If only the max-lwps property is set, we add a
3415          * max-processes property with the same limit as max-lwps.
3416          */
3417         rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
3418         rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
3419         if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
3420                 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
3421                     maxprocs * LWPS_PER_PROCESS) != Z_OK) {
3422                         zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
3423                         goto out;
3424                 }
3425         } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
3426                 /* no scaling for max-proc value */
3427                 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
3428                     maxlwps) != Z_OK) {
3429                         zerror(zlogp, B_FALSE,
3430                             "unable to set max-processes alias");
3431                         goto out;
3432                 }
3433         }
3434 
3435         if (zonecfg_setrctlent(handle) != Z_OK) {
3436                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
3437                 goto out;
3438         }
3439 
3440         if ((rctlblk = malloc(rctlblk_size())) == NULL) {
3441                 zerror(zlogp, B_TRUE, "memory allocation failed");
3442                 goto out;
3443         }
3444         while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
3445                 struct zone_rctlvaltab *rctlval;
3446                 uint_t i, count;
3447                 const char *name = rctltab.zone_rctl_name;
3448                 char *proj_nm;
3449 
3450                 /* zoneadm should have already warned about unknown rctls. */
3451                 if (!zonecfg_is_rctl(name)) {
3452                         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3453                         rctltab.zone_rctl_valptr = NULL;
3454                         continue;
3455                 }
3456                 count = 0;
3457                 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3458                     rctlval = rctlval->zone_rctlval_next) {
3459                         count++;
3460                 }
3461                 if (count == 0) {       /* ignore */
3462                         continue;       /* Nothing to free */
3463                 }
3464                 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
3465                         goto out;
3466                 i = 0;
3467                 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3468                     rctlval = rctlval->zone_rctlval_next, i++) {
 
3495                                 zerror(zlogp, B_FALSE, "%s failed",
3496                                     "nvlist_add_uint64");
3497                                 goto out;
3498                         }
3499                         if (nvlist_add_uint64(nvlv[i], "limit",
3500                             rctlblk_get_value(rctlblk)) != 0) {
3501                                 zerror(zlogp, B_FALSE, "%s failed",
3502                                     "nvlist_add_uint64");
3503                                 goto out;
3504                         }
3505                         if (nvlist_add_uint64(nvlv[i], "action",
3506                             (uint_t)rctlblk_get_local_action(rctlblk, NULL))
3507                             != 0) {
3508                                 zerror(zlogp, B_FALSE, "%s failed",
3509                                     "nvlist_add_uint64");
3510                                 goto out;
3511                         }
3512                 }
3513                 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3514                 rctltab.zone_rctl_valptr = NULL;
3515 
3516                 /*
3517                  * With no action on our part we will start zsched with the
3518                  * project rctl values for our (zoneadmd) current project. For
3519                  * brands running a variant of Illumos, that's not a problem
3520                  * since they will setup their own projects, but for a
3521                  * non-native brand like lx, where there are no projects, we
3522                  * want to start things up with the same project rctls as the
3523                  * corresponding zone rctls, since nothing within the zone will
3524                  * ever change the project rctls.
3525                  */
3526                 if ((proj_nm = zone_proj_rctl(name)) != NULL) {
3527                         if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
3528                             != 0) {
3529                                 zerror(zlogp, B_FALSE,
3530                                     "nvlist_add_nvlist_arrays failed");
3531                                 goto out;
3532                         }
3533                 }
3534 
3535                 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
3536                     != 0) {
3537                         zerror(zlogp, B_FALSE, "%s failed",
3538                             "nvlist_add_nvlist_array");
3539                         goto out;
3540                 }
3541                 for (i = 0; i < count; i++)
3542                         nvlist_free(nvlv[i]);
3543                 free(nvlv);
3544                 nvlv = NULL;
3545                 rctlcount++;
3546         }
3547         (void) zonecfg_endrctlent(handle);
3548 
3549         if (rctlcount == 0) {
3550                 error = 0;
3551                 goto out;
3552         }
3553         if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
3554             != 0) {
 
3776 
3777         if (buf1.f_fsid == buf2.f_fsid)
3778                 return (B_FALSE);
3779 
3780         return (B_TRUE);
3781 }
3782 
3783 /*
3784  * Verify the MAC label in the root dataset for the zone.
3785  * If the label exists, it must match the label configured for the zone.
3786  * Otherwise if there's no label on the dataset, create one here.
3787  */
3788 
3789 static int
3790 validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl)
3791 {
3792         int             error = -1;
3793         zfs_handle_t    *zhp;
3794         libzfs_handle_t *hdl;
3795         m_label_t       ds_sl;
3796         char            ds_hexsl[MAXNAMELEN];
3797 
3798         if (!is_system_labeled())
3799                 return (0);
3800 
3801         if (!is_zonepath_zfs(zonepath))
3802                 return (0);
3803 
3804         if ((hdl = libzfs_init()) == NULL) {
3805                 zerror(zlogp, B_FALSE, "opening ZFS library");
3806                 return (-1);
3807         }
3808 
3809         if ((zhp = zfs_path_to_zhandle(hdl, rootpath,
3810             ZFS_TYPE_FILESYSTEM)) == NULL) {
3811                 zerror(zlogp, B_FALSE, "cannot open ZFS dataset for path '%s'",
3812                     rootpath);
3813                 libzfs_fini(hdl);
3814                 return (-1);
3815         }
3816 
3817         /* Get the mlslabel property if it exists. */
3818         if ((zfs_prop_get(zhp, ZFS_PROP_MLSLABEL, ds_hexsl, MAXNAMELEN,
3819             NULL, NULL, 0, B_TRUE) != 0) ||
3820             (strcmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)) {
 
4451         if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
4452                 return (B_TRUE);
4453         for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
4454                 if (mnp->mnt_fstype == NULL ||
4455                     strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
4456                         continue;
4457                 /* We're looking at a loopback mount.  Stat it. */
4458                 if (mnp->mnt_special != NULL &&
4459                     stat64(mnp->mnt_special, &zst) != -1 &&
4460                     rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
4461                         zerror(zlogp, B_FALSE,
4462                             "zone root %s is reachable through %s",
4463                             rootpath, mnp->mnt_mountp);
4464                         return (B_TRUE);
4465                 }
4466         }
4467         return (B_FALSE);
4468 }
4469 
4470 /*
4471  * Set pool info for the zone's resource management configuration.
4472  */
4473 static int
4474 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
4475 {
4476         int res;
4477         uint64_t tmp;
4478         char sched[MAXNAMELEN];
4479         zone_dochandle_t handle = NULL;
4480         char pool_err[128];
4481 
4482         if ((handle = zonecfg_init_handle()) == NULL) {
4483                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
4484                 return (Z_BAD_HANDLE);
4485         }
4486 
4487         if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
4488                 zerror(zlogp, B_FALSE, "invalid configuration");
4489                 zonecfg_fini_handle(handle);
4490                 return (res);
4491         }
4492 
4493         /* Get the scheduling class set in the zone configuration. */
4494         if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
4495             strlen(sched) > 0) {
4496                 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
4497                     strlen(sched)) == -1)
4498                         zerror(zlogp, B_TRUE, "WARNING: unable to set the "
4499                             "default scheduling class");
4500 
4501                 if (strcmp(sched, "FX") == 0) {
4502                         /*
4503                          * When FX is specified then by default all processes
4504                          * will start at the lowest priority level (0) and
4505                          * stay there. We support an optional attr which
4506                          * indicates that all the processes should be "high
4507                          * priority". We set this on the zone so that starting
4508                          * init will set the priority high.
4509                          */
4510                         struct zone_attrtab a;
4511 
4512                         bzero(&a, sizeof (a));
4513                         (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
4514                             sizeof (a.zone_attr_name));
4515 
4516                         if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
4517                             strcmp(a.zone_attr_value, "true") == 0) {
4518                                 boolean_t hi = B_TRUE;
4519 
4520                                 if (zone_setattr(zoneid,
4521                                     ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
4522                                     sizeof (hi)) == -1)
4523                                         zerror(zlogp, B_TRUE, "WARNING: unable "
4524                                             "to set high priority");
4525                         }
4526                 }
4527 
4528         } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
4529             == Z_OK) {
4530                 /*
4531                  * If the zone has the zone.cpu-shares rctl set then we want to
4532                  * use the Fair Share Scheduler (FSS) for processes in the
4533                  * zone.  Check what scheduling class the zone would be running
4534                  * in by default so we can print a warning and modify the class
4535                  * if we wouldn't be using FSS.
4536                  */
4537                 char class_name[PC_CLNMSZ];
4538 
4539                 if (zonecfg_get_dflt_sched_class(handle, class_name,
4540                     sizeof (class_name)) != Z_OK) {
4541                         zerror(zlogp, B_FALSE, "WARNING: unable to determine "
4542                             "the zone's scheduling class");
4543 
4544                 } else if (strcmp("FSS", class_name) != 0) {
4545                         zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
4546                             "rctl is set but\nFSS is not the default "
4547                             "scheduling class for\nthis zone.  FSS will be "
4548                             "used for processes\nin the zone but to get the "
 
5055                 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
5056                     ZONE_INDEX_MODE);
5057                 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
5058                         (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
5059                         if (uuid_is_null(zep->zone_uuid))
5060                                 uuidstr[0] = '\0';
5061                         else
5062                                 uuid_unparse(zep->zone_uuid, uuidstr);
5063                         (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
5064                             zone_state_str(zep->zone_state),
5065                             uuidstr);
5066                         (void) fclose(zet);
5067                 }
5068         }
5069         _exit(0);
5070 }
5071 
5072 int
5073 vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
5074 {
5075         char zpath[MAXPATHLEN];
5076 
5077         if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
5078                 lofs_discard_mnttab();
5079                 return (-1);
5080         }
5081 
5082         /*
5083          * Before we try to mount filesystems we need to create the
5084          * attribute backing store for /dev
5085          */
5086         (void) strlcpy(zpath, zonepath, sizeof (zpath));
5087         resolve_lofs(zlogp, zpath, sizeof (zpath));
5088 
5089         /* Make /dev directory owned by root, grouped sys */
5090         if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) {
5091                 lofs_discard_mnttab();
5092                 return (-1);
5093         }
5094 
5095         if (mount_filesystems(zlogp, mount_cmd) != 0) {
5096                 lofs_discard_mnttab();
5097                 return (-1);
5098         }
5099 
5100         if (mount_cmd == Z_MNT_BOOT) {
5101                 zone_iptype_t iptype;
5102 
5103                 if (vplat_get_iptype(zlogp, &iptype) < 0) {
5104                         zerror(zlogp, B_TRUE, "unable to determine ip-type");
5105                         lofs_discard_mnttab();
5106                         return (-1);
5107                 }
5108 
5109                 switch (iptype) {
5110                 case ZS_SHARED:
 
5186                 if (zonecfg_lock_scratch(fp) != 0)
5187                         zerror(zlogp, B_TRUE, "cannot lock mapfile");
5188                 else if (zonecfg_delete_scratch(fp, kernzone) != 0)
5189                         zerror(zlogp, B_TRUE, "cannot delete map entry");
5190                 else
5191                         retv = 0;
5192                 zonecfg_close_scratch(fp);
5193                 return (retv);
5194         } else {
5195                 return (0);
5196         }
5197 }
5198 
5199 int
5200 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
5201 {
5202         char *kzone;
5203         zoneid_t zoneid;
5204         int res;
5205         char pool_err[128];
5206         char cmdbuf[MAXPATHLEN];
5207         brand_handle_t bh = NULL;
5208         dladm_status_t status;
5209         char errmsg[DLADM_STRSIZE];
5210         ushort_t flags;
5211 
5212         kzone = zone_name;
5213         if (zonecfg_in_alt_root()) {
5214                 FILE *fp;
5215 
5216                 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5217                         zerror(zlogp, B_TRUE, "unable to open map file");
5218                         goto error;
5219                 }
5220                 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
5221                     kernzone, sizeof (kernzone)) != 0) {
5222                         zerror(zlogp, B_FALSE, "unable to find scratch zone");
5223                         zonecfg_close_scratch(fp);
5224                         goto error;
5225                 }
 
5241         }
5242 
5243         if (remove_datalink_protect(zlogp, zoneid) != 0) {
5244                 zerror(zlogp, B_FALSE,
5245                     "unable clear datalink protect property");
5246                 goto error;
5247         }
5248 
5249         /*
5250          * The datalinks assigned to the zone will be removed from the NGZ as
5251          * part of zone_shutdown() so that we need to remove protect/pool etc.
5252          * before zone_shutdown(). Even if the shutdown itself fails, the zone
5253          * will not be able to violate any constraints applied because the
5254          * datalinks are no longer available to the zone.
5255          */
5256         if (zone_shutdown(zoneid) != 0) {
5257                 zerror(zlogp, B_TRUE, "unable to shutdown zone");
5258                 goto error;
5259         }
5260 
5261         /* Get a handle to the brand info for this zone */
5262         if ((bh = brand_open(brand_name)) == NULL) {
5263                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
5264                 return (-1);
5265         }
5266         /*
5267          * If there is a brand 'halt' callback, execute it now to give the
5268          * brand a chance to cleanup any custom configuration.
5269          */
5270         (void) strcpy(cmdbuf, EXEC_PREFIX);
5271         if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
5272             sizeof (cmdbuf) - EXEC_LEN) < 0) {
5273                 brand_close(bh);
5274                 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
5275                     "halt callback.");
5276                 goto error;
5277         }
5278         brand_close(bh);
5279 
5280         if ((strlen(cmdbuf) > EXEC_LEN) &&
5281             (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
5282                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
5283                 goto error;
5284         }
5285 
5286         if (!unmount_cmd) {
5287                 zone_iptype_t iptype;
5288 
5289                 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags,
5290                     sizeof (flags)) < 0) {
5291                         if (vplat_get_iptype(zlogp, &iptype) < 0) {
 
 |