4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent Inc. All rights reserved.
25 * Copyright (c) 2015 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 */
31
32 /*
33 * This module contains functions used to bring up and tear down the
34 * Virtual Platform: [un]mounting file-systems, [un]plumbing network
35 * interfaces, [un]configuring devices, establishing resource controls,
36 * and creating/destroying the zone in the kernel. These actions, on
37 * the way up, ready the zone; on the way down, they halt the zone.
38 * See the much longer block comment at the beginning of zoneadmd.c
39 * for a bigger picture of how the whole program functions.
40 *
41 * This module also has primary responsibility for the layout of "scratch
42 * zones." These are mounted, but inactive, zones that are used during
43 * operating system upgrade and potentially other administrative action. The
44 * scratch zone environment is similar to the miniroot environment. The zone's
45 * actual root is mounted read-write on /a, and the standard paths (/usr,
119 #include <libbrand.h>
120 #include <sys/brand.h>
121 #include <libzonecfg.h>
122 #include <synch.h>
123
124 #include "zoneadmd.h"
125 #include <tsol/label.h>
126 #include <libtsnet.h>
127 #include <sys/priv.h>
128 #include <libinetutil.h>
129
130 #define V4_ADDR_LEN 32
131 #define V6_ADDR_LEN 128
132
133 #define RESOURCE_DEFAULT_OPTS \
134 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
135
136 #define DFSTYPES "/etc/dfs/fstypes"
137 #define MAXTNZLEN 2048
138
139 #define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
140
141 /* a reasonable estimate for the number of lwps per process */
142 #define LWPS_PER_PROCESS 10
143
144 /* for routing socket */
145 static int rts_seqno = 0;
146
147 /* mangled zone name when mounting in an alternate root environment */
148 static char kernzone[ZONENAME_MAX];
149
150 /* array of cached mount entries for resolve_lofs */
151 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
152
153 /* for Trusted Extensions */
154 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
155 static int tsol_mounts(zlog_t *, char *, char *);
156 static void tsol_unmounts(zlog_t *, char *);
157
158 static m_label_t *zlabel = NULL;
159 static m_label_t *zid_label = NULL;
160 static priv_set_t *zprivs = NULL;
161
162 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
163
164 /* from libsocket, not in any header file */
165 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
166
167 /* from zoneadmd */
168 extern char query_hook[];
169
170 /*
171 * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
172 * node in a linked list that is sorted by linkid. The list is constructed as
173 * the xml configuration file is parsed, and the information
174 * contained in each node is added to the kernel before the zone is
175 * booted, to be retrieved and applied from within the exclusive-IP NGZ
176 * on boot.
177 */
178 typedef struct zone_addr_list {
179 struct zone_addr_list *za_next;
180 datalink_id_t za_linkid; /* datalink_id_t of interface */
181 struct zone_nwiftab za_nwiftab; /* address, defrouter properties */
182 } zone_addr_list_t;
183
1133 break;
1134 }
1135
1136 if (brand_platform_iter_devices(bh, zone_name,
1137 mount_one_dev_device_cb, prof, curr_iptype) != 0) {
1138 zerror(zlogp, B_TRUE, "failed to add standard device");
1139 goto cleanup;
1140 }
1141
1142 if (brand_platform_iter_link(bh,
1143 mount_one_dev_symlink_cb, prof) != 0) {
1144 zerror(zlogp, B_TRUE, "failed to add standard symlink");
1145 goto cleanup;
1146 }
1147
1148 /* Add user-specified devices and directories */
1149 if ((handle = zonecfg_init_handle()) == NULL) {
1150 zerror(zlogp, B_FALSE, "can't initialize zone handle");
1151 goto cleanup;
1152 }
1153 if (err = zonecfg_get_handle(zone_name, handle)) {
1154 zerror(zlogp, B_FALSE, "can't get handle for zone "
1155 "%s: %s", zone_name, zonecfg_strerror(err));
1156 goto cleanup;
1157 }
1158 if (err = zonecfg_setdevent(handle)) {
1159 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
1160 zonecfg_strerror(err));
1161 goto cleanup;
1162 }
1163 while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
1164 if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
1165 zerror(zlogp, B_TRUE, "failed to add "
1166 "user-specified device");
1167 goto cleanup;
1168 }
1169 }
1170 (void) zonecfg_enddevent(handle);
1171
1172 /* Send profile to kernel */
1173 if (di_prof_commit(prof)) {
1174 zerror(zlogp, B_TRUE, "failed to commit profile");
1175 goto cleanup;
1176 }
1177
1178 retval = 0;
1654 * for non-lofs mounts since they will have a device
1655 * as a backing store and device paths must always be
1656 * specified relative to the current boot environment.
1657 */
1658 fsp->zone_fs_special[0] = '\0';
1659 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
1660 (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
1661 sizeof (fsp->zone_fs_special));
1662 }
1663 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
1664 sizeof (fsp->zone_fs_special));
1665 }
1666 (void) zonecfg_endfsent(handle);
1667 return (0);
1668 }
1669
1670 static int
1671 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
1672 {
1673 char rootpath[MAXPATHLEN];
1674 char zonepath[MAXPATHLEN];
1675 char brand[MAXNAMELEN];
1676 char luroot[MAXPATHLEN];
1677 int i, num_fs = 0;
1678 struct zone_fstab *fs_ptr = NULL;
1679 zone_dochandle_t handle = NULL;
1680 zone_state_t zstate;
1681 brand_handle_t bh;
1682 plat_gmount_cb_data_t cb;
1683
1684 if (zone_get_state(zone_name, &zstate) != Z_OK ||
1685 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1686 zerror(zlogp, B_FALSE,
1687 "zone must be in '%s' or '%s' state to mount file-systems",
1688 zone_state_str(ZONE_STATE_READY),
1689 zone_state_str(ZONE_STATE_MOUNTED));
1690 goto bad;
1691 }
1692
1693 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
1694 zerror(zlogp, B_TRUE, "unable to determine zone path");
1695 goto bad;
1696 }
1697
1698 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1699 zerror(zlogp, B_TRUE, "unable to determine zone root");
1700 goto bad;
1701 }
1702
1703 if ((handle = zonecfg_init_handle()) == NULL) {
1704 zerror(zlogp, B_TRUE, "getting zone configuration handle");
1705 goto bad;
1706 }
1707 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
1708 zonecfg_setfsent(handle) != Z_OK) {
1709 zerror(zlogp, B_FALSE, "invalid configuration");
1710 goto bad;
1711 }
1712
1713 /*
1714 * If we are mounting the zone, then we must always use the default
1715 * brand global mounts.
1716 */
1717 if (ALT_MOUNT(mount_cmd)) {
1778 * we need <zoneroot>/lu/dev to be the /dev filesystem
1779 * for the zone and we don't want to have any /dev filesystem
1780 * mounted at <zoneroot>/lu/a/dev. Since /dev is specified
1781 * as a normal zone filesystem by default we'll try to mount
1782 * it at <zoneroot>/lu/a/dev, so we have to detect this
1783 * case and instead mount it at <zoneroot>/lu/dev.
1784 *
1785 * All this work is done in three phases:
1786 * 1) Create and populate lu directory (build_mounted_pre_var()).
1787 * 2) Mount the required filesystems as per the zone configuration.
1788 * 3) Set up the rest of the scratch zone environment
1789 * (build_mounted_post_var()).
1790 */
1791 if (ALT_MOUNT(mount_cmd) && !build_mounted_pre_var(zlogp,
1792 rootpath, sizeof (rootpath), zonepath, luroot, sizeof (luroot)))
1793 goto bad;
1794
1795 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
1796
1797 for (i = 0; i < num_fs; i++) {
1798 if (ALT_MOUNT(mount_cmd) &&
1799 strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
1800 size_t slen = strlen(rootpath) - 2;
1801
1802 /*
1803 * By default we'll try to mount /dev as /a/dev
1804 * but /dev is special and always goes at the top
1805 * so strip the trailing '/a' from the rootpath.
1806 */
1807 assert(strcmp(&rootpath[slen], "/a") == 0);
1808 rootpath[slen] = '\0';
1809 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd)
1810 != 0)
1811 goto bad;
1812 rootpath[slen] = '/';
1813 continue;
1814 }
1815 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
1816 goto bad;
1817 }
1818 if (ALT_MOUNT(mount_cmd) &&
1819 !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
1820 goto bad;
1821
1822 /*
1823 * For Trusted Extensions cross-mount each lower level /export/home
1824 */
1825 if (mount_cmd == Z_MNT_BOOT &&
1826 tsol_mounts(zlogp, zone_name, rootpath) != 0)
1827 goto bad;
1828
1829 free_fs_data(fs_ptr, num_fs);
1830
1831 /*
1832 * Everything looks fine.
1833 */
1834 return (0);
2871 }
2872 /* insert new after ptr */
2873 new->za_next = next;
2874 ptr->za_next = new;
2875 return (old);
2876 }
2877
2878 void
2879 free_ip_interface(zone_addr_list_t *zalist)
2880 {
2881 zone_addr_list_t *ptr, *new;
2882
2883 for (ptr = zalist; ptr != NULL; ) {
2884 new = ptr;
2885 ptr = ptr->za_next;
2886 free(new);
2887 }
2888 }
2889
2890 /*
2891 * Add the kernel access control information for the interface names.
2892 * If anything goes wrong, we log a general error message, attempt to tear down
2893 * whatever we set up, and return an error.
2894 */
2895 static int
2896 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
2897 {
2898 zone_dochandle_t handle;
2899 struct zone_nwiftab nwiftab;
2900 char rootpath[MAXPATHLEN];
2901 char path[MAXPATHLEN];
2902 datalink_id_t linkid;
2903 di_prof_t prof = NULL;
2904 boolean_t added = B_FALSE;
2905 zone_addr_list_t *zalist = NULL, *new;
2906
2907 if ((handle = zonecfg_init_handle()) == NULL) {
2908 zerror(zlogp, B_TRUE, "getting zone configuration handle");
2909 return (-1);
2910 }
2916
2917 if (zonecfg_setnwifent(handle) != Z_OK) {
2918 zonecfg_fini_handle(handle);
2919 return (0);
2920 }
2921
2922 for (;;) {
2923 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2924 break;
2925
2926 if (prof == NULL) {
2927 if (zone_get_devroot(zone_name, rootpath,
2928 sizeof (rootpath)) != Z_OK) {
2929 (void) zonecfg_endnwifent(handle);
2930 zonecfg_fini_handle(handle);
2931 zerror(zlogp, B_TRUE,
2932 "unable to determine dev root");
2933 return (-1);
2934 }
2935 (void) snprintf(path, sizeof (path), "%s%s", rootpath,
2936 "/dev");
2937 if (di_prof_init(path, &prof) != 0) {
2938 (void) zonecfg_endnwifent(handle);
2939 zonecfg_fini_handle(handle);
2940 zerror(zlogp, B_TRUE,
2941 "failed to initialize profile");
2942 return (-1);
2943 }
2944 }
2945
2946 /*
2947 * Create the /dev entry for backward compatibility.
2948 * Only create the /dev entry if it's not in use.
2949 * Note that the zone still boots when the assigned
2950 * interface is inaccessible, used by others, etc.
2951 * Also, when vanity naming is used, some interface do
2952 * do not have corresponding /dev node names (for example,
2953 * vanity named aggregations). The /dev entry is not
2954 * created in that case. The /dev/net entry is always
2955 * accessible.
2956 */
3300 break;
3301 case Z_PRIV_REQUIRED:
3302 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
3303 "from the zone's privilege set", privname);
3304 break;
3305 case Z_PRIV_UNKNOWN:
3306 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
3307 "in the zone's privilege set", privname);
3308 break;
3309 default:
3310 zerror(zlogp, B_FALSE, "failed to determine the zone's "
3311 "privilege set");
3312 break;
3313 }
3314
3315 free(privname);
3316 zonecfg_fini_handle(handle);
3317 return (error);
3318 }
3319
3320 static int
3321 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3322 {
3323 nvlist_t *nvl = NULL;
3324 char *nvl_packed = NULL;
3325 size_t nvl_size = 0;
3326 nvlist_t **nvlv = NULL;
3327 int rctlcount = 0;
3328 int error = -1;
3329 zone_dochandle_t handle;
3330 struct zone_rctltab rctltab;
3331 rctlblk_t *rctlblk = NULL;
3332 uint64_t maxlwps;
3333 uint64_t maxprocs;
3334
3335 *bufp = NULL;
3336 *bufsizep = 0;
3337
3338 if ((handle = zonecfg_init_handle()) == NULL) {
3339 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3340 return (-1);
3341 }
3342 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3343 zerror(zlogp, B_FALSE, "invalid configuration");
3344 zonecfg_fini_handle(handle);
3345 return (-1);
3346 }
3347
3348 rctltab.zone_rctl_valptr = NULL;
3349 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
3350 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
3351 goto out;
3352 }
3353
3354 /*
3355 * Allow the administrator to control both the maximum number of
3356 * process table slots and the maximum number of lwps with just the
3357 * max-processes property. If only the max-processes property is set,
3358 * we add a max-lwps property with a limit derived from max-processes.
3359 */
3360 if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs)
3361 == Z_OK &&
3362 zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps)
3363 == Z_NO_ENTRY) {
3364 if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
3365 maxprocs * LWPS_PER_PROCESS) != Z_OK) {
3366 zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
3367 goto out;
3368 }
3369 }
3370
3371 if (zonecfg_setrctlent(handle) != Z_OK) {
3372 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
3373 goto out;
3374 }
3375
3376 if ((rctlblk = malloc(rctlblk_size())) == NULL) {
3377 zerror(zlogp, B_TRUE, "memory allocation failed");
3378 goto out;
3379 }
3380 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
3381 struct zone_rctlvaltab *rctlval;
3382 uint_t i, count;
3383 const char *name = rctltab.zone_rctl_name;
3384
3385 /* zoneadm should have already warned about unknown rctls. */
3386 if (!zonecfg_is_rctl(name)) {
3387 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3388 rctltab.zone_rctl_valptr = NULL;
3389 continue;
3390 }
3391 count = 0;
3392 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3393 rctlval = rctlval->zone_rctlval_next) {
3394 count++;
3395 }
3396 if (count == 0) { /* ignore */
3397 continue; /* Nothing to free */
3398 }
3399 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
3400 goto out;
3401 i = 0;
3402 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3403 rctlval = rctlval->zone_rctlval_next, i++) {
3430 zerror(zlogp, B_FALSE, "%s failed",
3431 "nvlist_add_uint64");
3432 goto out;
3433 }
3434 if (nvlist_add_uint64(nvlv[i], "limit",
3435 rctlblk_get_value(rctlblk)) != 0) {
3436 zerror(zlogp, B_FALSE, "%s failed",
3437 "nvlist_add_uint64");
3438 goto out;
3439 }
3440 if (nvlist_add_uint64(nvlv[i], "action",
3441 (uint_t)rctlblk_get_local_action(rctlblk, NULL))
3442 != 0) {
3443 zerror(zlogp, B_FALSE, "%s failed",
3444 "nvlist_add_uint64");
3445 goto out;
3446 }
3447 }
3448 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3449 rctltab.zone_rctl_valptr = NULL;
3450 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
3451 != 0) {
3452 zerror(zlogp, B_FALSE, "%s failed",
3453 "nvlist_add_nvlist_array");
3454 goto out;
3455 }
3456 for (i = 0; i < count; i++)
3457 nvlist_free(nvlv[i]);
3458 free(nvlv);
3459 nvlv = NULL;
3460 rctlcount++;
3461 }
3462 (void) zonecfg_endrctlent(handle);
3463
3464 if (rctlcount == 0) {
3465 error = 0;
3466 goto out;
3467 }
3468 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
3469 != 0) {
3691
3692 if (buf1.f_fsid == buf2.f_fsid)
3693 return (B_FALSE);
3694
3695 return (B_TRUE);
3696 }
3697
3698 /*
3699 * Verify the MAC label in the root dataset for the zone.
3700 * If the label exists, it must match the label configured for the zone.
3701 * Otherwise if there's no label on the dataset, create one here.
3702 */
3703
3704 static int
3705 validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl)
3706 {
3707 int error = -1;
3708 zfs_handle_t *zhp;
3709 libzfs_handle_t *hdl;
3710 m_label_t ds_sl;
3711 char zonepath[MAXPATHLEN];
3712 char ds_hexsl[MAXNAMELEN];
3713
3714 if (!is_system_labeled())
3715 return (0);
3716
3717 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
3718 zerror(zlogp, B_TRUE, "unable to determine zone path");
3719 return (-1);
3720 }
3721
3722 if (!is_zonepath_zfs(zonepath))
3723 return (0);
3724
3725 if ((hdl = libzfs_init()) == NULL) {
3726 zerror(zlogp, B_FALSE, "opening ZFS library");
3727 return (-1);
3728 }
3729
3730 if ((zhp = zfs_path_to_zhandle(hdl, rootpath,
3731 ZFS_TYPE_FILESYSTEM)) == NULL) {
3732 zerror(zlogp, B_FALSE, "cannot open ZFS dataset for path '%s'",
3733 rootpath);
3734 libzfs_fini(hdl);
3735 return (-1);
3736 }
3737
3738 /* Get the mlslabel property if it exists. */
3739 if ((zfs_prop_get(zhp, ZFS_PROP_MLSLABEL, ds_hexsl, MAXNAMELEN,
3740 NULL, NULL, 0, B_TRUE) != 0) ||
3741 (strcmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)) {
4372 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
4373 return (B_TRUE);
4374 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
4375 if (mnp->mnt_fstype == NULL ||
4376 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
4377 continue;
4378 /* We're looking at a loopback mount. Stat it. */
4379 if (mnp->mnt_special != NULL &&
4380 stat64(mnp->mnt_special, &zst) != -1 &&
4381 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
4382 zerror(zlogp, B_FALSE,
4383 "zone root %s is reachable through %s",
4384 rootpath, mnp->mnt_mountp);
4385 return (B_TRUE);
4386 }
4387 }
4388 return (B_FALSE);
4389 }
4390
4391 /*
4392 * Set memory cap and pool info for the zone's resource management
4393 * configuration.
4394 */
4395 static int
4396 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
4397 {
4398 int res;
4399 uint64_t tmp;
4400 struct zone_mcaptab mcap;
4401 char sched[MAXNAMELEN];
4402 zone_dochandle_t handle = NULL;
4403 char pool_err[128];
4404
4405 if ((handle = zonecfg_init_handle()) == NULL) {
4406 zerror(zlogp, B_TRUE, "getting zone configuration handle");
4407 return (Z_BAD_HANDLE);
4408 }
4409
4410 if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
4411 zerror(zlogp, B_FALSE, "invalid configuration");
4412 zonecfg_fini_handle(handle);
4413 return (res);
4414 }
4415
4416 /*
4417 * If a memory cap is configured, set the cap in the kernel using
4418 * zone_setattr() and make sure the rcapd SMF service is enabled.
4419 */
4420 if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
4421 uint64_t num;
4422 char smf_err[128];
4423
4424 num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
4425 if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
4426 zerror(zlogp, B_TRUE, "could not set zone memory cap");
4427 zonecfg_fini_handle(handle);
4428 return (Z_INVAL);
4429 }
4430
4431 if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
4432 zerror(zlogp, B_FALSE, "enabling system/rcap service "
4433 "failed: %s", smf_err);
4434 zonecfg_fini_handle(handle);
4435 return (Z_INVAL);
4436 }
4437 }
4438
4439 /* Get the scheduling class set in the zone configuration. */
4440 if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
4441 strlen(sched) > 0) {
4442 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
4443 strlen(sched)) == -1)
4444 zerror(zlogp, B_TRUE, "WARNING: unable to set the "
4445 "default scheduling class");
4446
4447 } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
4448 == Z_OK) {
4449 /*
4450 * If the zone has the zone.cpu-shares rctl set then we want to
4451 * use the Fair Share Scheduler (FSS) for processes in the
4452 * zone. Check what scheduling class the zone would be running
4453 * in by default so we can print a warning and modify the class
4454 * if we wouldn't be using FSS.
4455 */
4456 char class_name[PC_CLNMSZ];
4457
4458 if (zonecfg_get_dflt_sched_class(handle, class_name,
4459 sizeof (class_name)) != Z_OK) {
4460 zerror(zlogp, B_FALSE, "WARNING: unable to determine "
4461 "the zone's scheduling class");
4462
4463 } else if (strcmp("FSS", class_name) != 0) {
4464 zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
4465 "rctl is set but\nFSS is not the default "
4466 "scheduling class for\nthis zone. FSS will be "
4467 "used for processes\nin the zone but to get the "
4974 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
4975 ZONE_INDEX_MODE);
4976 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
4977 (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
4978 if (uuid_is_null(zep->zone_uuid))
4979 uuidstr[0] = '\0';
4980 else
4981 uuid_unparse(zep->zone_uuid, uuidstr);
4982 (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
4983 zone_state_str(zep->zone_state),
4984 uuidstr);
4985 (void) fclose(zet);
4986 }
4987 }
4988 _exit(0);
4989 }
4990
4991 int
4992 vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
4993 {
4994 char zonepath[MAXPATHLEN];
4995
4996 if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
4997 lofs_discard_mnttab();
4998 return (-1);
4999 }
5000
5001 /*
5002 * Before we try to mount filesystems we need to create the
5003 * attribute backing store for /dev
5004 */
5005 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
5006 lofs_discard_mnttab();
5007 return (-1);
5008 }
5009 resolve_lofs(zlogp, zonepath, sizeof (zonepath));
5010
5011 /* Make /dev directory owned by root, grouped sys */
5012 if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE,
5013 0, 3) != 0) {
5014 lofs_discard_mnttab();
5015 return (-1);
5016 }
5017
5018 if (mount_filesystems(zlogp, mount_cmd) != 0) {
5019 lofs_discard_mnttab();
5020 return (-1);
5021 }
5022
5023 if (mount_cmd == Z_MNT_BOOT) {
5024 zone_iptype_t iptype;
5025
5026 if (vplat_get_iptype(zlogp, &iptype) < 0) {
5027 zerror(zlogp, B_TRUE, "unable to determine ip-type");
5028 lofs_discard_mnttab();
5029 return (-1);
5030 }
5031
5032 switch (iptype) {
5033 case ZS_SHARED:
5109 if (zonecfg_lock_scratch(fp) != 0)
5110 zerror(zlogp, B_TRUE, "cannot lock mapfile");
5111 else if (zonecfg_delete_scratch(fp, kernzone) != 0)
5112 zerror(zlogp, B_TRUE, "cannot delete map entry");
5113 else
5114 retv = 0;
5115 zonecfg_close_scratch(fp);
5116 return (retv);
5117 } else {
5118 return (0);
5119 }
5120 }
5121
5122 int
5123 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
5124 {
5125 char *kzone;
5126 zoneid_t zoneid;
5127 int res;
5128 char pool_err[128];
5129 char zpath[MAXPATHLEN];
5130 char cmdbuf[MAXPATHLEN];
5131 brand_handle_t bh = NULL;
5132 dladm_status_t status;
5133 char errmsg[DLADM_STRSIZE];
5134 ushort_t flags;
5135
5136 kzone = zone_name;
5137 if (zonecfg_in_alt_root()) {
5138 FILE *fp;
5139
5140 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5141 zerror(zlogp, B_TRUE, "unable to open map file");
5142 goto error;
5143 }
5144 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
5145 kernzone, sizeof (kernzone)) != 0) {
5146 zerror(zlogp, B_FALSE, "unable to find scratch zone");
5147 zonecfg_close_scratch(fp);
5148 goto error;
5149 }
5165 }
5166
5167 if (remove_datalink_protect(zlogp, zoneid) != 0) {
5168 zerror(zlogp, B_FALSE,
5169 "unable clear datalink protect property");
5170 goto error;
5171 }
5172
5173 /*
5174 * The datalinks assigned to the zone will be removed from the NGZ as
5175 * part of zone_shutdown() so that we need to remove protect/pool etc.
5176 * before zone_shutdown(). Even if the shutdown itself fails, the zone
5177 * will not be able to violate any constraints applied because the
5178 * datalinks are no longer available to the zone.
5179 */
5180 if (zone_shutdown(zoneid) != 0) {
5181 zerror(zlogp, B_TRUE, "unable to shutdown zone");
5182 goto error;
5183 }
5184
5185 /* Get the zonepath of this zone */
5186 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
5187 zerror(zlogp, B_FALSE, "unable to determine zone path");
5188 goto error;
5189 }
5190
5191 /* Get a handle to the brand info for this zone */
5192 if ((bh = brand_open(brand_name)) == NULL) {
5193 zerror(zlogp, B_FALSE, "unable to determine zone brand");
5194 return (-1);
5195 }
5196 /*
5197 * If there is a brand 'halt' callback, execute it now to give the
5198 * brand a chance to cleanup any custom configuration.
5199 */
5200 (void) strcpy(cmdbuf, EXEC_PREFIX);
5201 if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
5202 sizeof (cmdbuf) - EXEC_LEN) < 0) {
5203 brand_close(bh);
5204 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
5205 "halt callback.");
5206 goto error;
5207 }
5208 brand_close(bh);
5209
5210 if ((strlen(cmdbuf) > EXEC_LEN) &&
5211 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
5212 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
5213 goto error;
5214 }
5215
5216 if (!unmount_cmd) {
5217 zone_iptype_t iptype;
5218
5219 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags,
5220 sizeof (flags)) < 0) {
5221 if (vplat_get_iptype(zlogp, &iptype) < 0) {
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2015 by Delphix. All rights reserved.
25 * Copyright 2016, Joyent Inc.
26 */
27
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 */
31
32 /*
33 * This module contains functions used to bring up and tear down the
34 * Virtual Platform: [un]mounting file-systems, [un]plumbing network
35 * interfaces, [un]configuring devices, establishing resource controls,
36 * and creating/destroying the zone in the kernel. These actions, on
37 * the way up, ready the zone; on the way down, they halt the zone.
38 * See the much longer block comment at the beginning of zoneadmd.c
39 * for a bigger picture of how the whole program functions.
40 *
41 * This module also has primary responsibility for the layout of "scratch
42 * zones." These are mounted, but inactive, zones that are used during
43 * operating system upgrade and potentially other administrative action. The
44 * scratch zone environment is similar to the miniroot environment. The zone's
45 * actual root is mounted read-write on /a, and the standard paths (/usr,
119 #include <libbrand.h>
120 #include <sys/brand.h>
121 #include <libzonecfg.h>
122 #include <synch.h>
123
124 #include "zoneadmd.h"
125 #include <tsol/label.h>
126 #include <libtsnet.h>
127 #include <sys/priv.h>
128 #include <libinetutil.h>
129
130 #define V4_ADDR_LEN 32
131 #define V6_ADDR_LEN 128
132
133 #define RESOURCE_DEFAULT_OPTS \
134 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
135
136 #define DFSTYPES "/etc/dfs/fstypes"
137 #define MAXTNZLEN 2048
138
139 /* a reasonable estimate for the number of lwps per process */
140 #define LWPS_PER_PROCESS 10
141
142 /* for routing socket */
143 static int rts_seqno = 0;
144
145 /* mangled zone name when mounting in an alternate root environment */
146 static char kernzone[ZONENAME_MAX];
147
148 /* array of cached mount entries for resolve_lofs */
149 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
150
151 /* for Trusted Extensions */
152 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
153 static int tsol_mounts(zlog_t *, char *, char *);
154 static void tsol_unmounts(zlog_t *, char *);
155
156 static m_label_t *zlabel = NULL;
157 static m_label_t *zid_label = NULL;
158 static priv_set_t *zprivs = NULL;
159
160 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
161
162 typedef struct zone_proj_rctl_map {
163 char *zpr_zone_rctl;
164 char *zpr_project_rctl;
165 } zone_proj_rctl_map_t;
166
167 static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
168 {"zone.max-msg-ids", "project.max-msg-ids"},
169 {"zone.max-sem-ids", "project.max-sem-ids"},
170 {"zone.max-shm-ids", "project.max-shm-ids"},
171 {"zone.max-shm-memory", "project.max-shm-memory"},
172 {NULL, NULL}
173 };
174
175 /* from libsocket, not in any header file */
176 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
177
178 /* from zoneadmd */
179 extern char query_hook[];
180
181 /*
182 * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
183 * node in a linked list that is sorted by linkid. The list is constructed as
184 * the xml configuration file is parsed, and the information
185 * contained in each node is added to the kernel before the zone is
186 * booted, to be retrieved and applied from within the exclusive-IP NGZ
187 * on boot.
188 */
189 typedef struct zone_addr_list {
190 struct zone_addr_list *za_next;
191 datalink_id_t za_linkid; /* datalink_id_t of interface */
192 struct zone_nwiftab za_nwiftab; /* address, defrouter properties */
193 } zone_addr_list_t;
194
1144 break;
1145 }
1146
1147 if (brand_platform_iter_devices(bh, zone_name,
1148 mount_one_dev_device_cb, prof, curr_iptype) != 0) {
1149 zerror(zlogp, B_TRUE, "failed to add standard device");
1150 goto cleanup;
1151 }
1152
1153 if (brand_platform_iter_link(bh,
1154 mount_one_dev_symlink_cb, prof) != 0) {
1155 zerror(zlogp, B_TRUE, "failed to add standard symlink");
1156 goto cleanup;
1157 }
1158
1159 /* Add user-specified devices and directories */
1160 if ((handle = zonecfg_init_handle()) == NULL) {
1161 zerror(zlogp, B_FALSE, "can't initialize zone handle");
1162 goto cleanup;
1163 }
1164 if ((err = zonecfg_get_handle(zone_name, handle)) != 0) {
1165 zerror(zlogp, B_FALSE, "can't get handle for zone "
1166 "%s: %s", zone_name, zonecfg_strerror(err));
1167 goto cleanup;
1168 }
1169 if ((err = zonecfg_setdevent(handle)) != 0) {
1170 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
1171 zonecfg_strerror(err));
1172 goto cleanup;
1173 }
1174 while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
1175 if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
1176 zerror(zlogp, B_TRUE, "failed to add "
1177 "user-specified device");
1178 goto cleanup;
1179 }
1180 }
1181 (void) zonecfg_enddevent(handle);
1182
1183 /* Send profile to kernel */
1184 if (di_prof_commit(prof)) {
1185 zerror(zlogp, B_TRUE, "failed to commit profile");
1186 goto cleanup;
1187 }
1188
1189 retval = 0;
1665 * for non-lofs mounts since they will have a device
1666 * as a backing store and device paths must always be
1667 * specified relative to the current boot environment.
1668 */
1669 fsp->zone_fs_special[0] = '\0';
1670 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
1671 (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
1672 sizeof (fsp->zone_fs_special));
1673 }
1674 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
1675 sizeof (fsp->zone_fs_special));
1676 }
1677 (void) zonecfg_endfsent(handle);
1678 return (0);
1679 }
1680
1681 static int
1682 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
1683 {
1684 char rootpath[MAXPATHLEN];
1685 char brand[MAXNAMELEN];
1686 char luroot[MAXPATHLEN];
1687 int i, num_fs = 0;
1688 struct zone_fstab *fs_ptr = NULL;
1689 zone_dochandle_t handle = NULL;
1690 zone_state_t zstate;
1691 brand_handle_t bh;
1692 plat_gmount_cb_data_t cb;
1693
1694 if (zone_get_state(zone_name, &zstate) != Z_OK ||
1695 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1696 zerror(zlogp, B_FALSE,
1697 "zone must be in '%s' or '%s' state to mount file-systems",
1698 zone_state_str(ZONE_STATE_READY),
1699 zone_state_str(ZONE_STATE_MOUNTED));
1700 goto bad;
1701 }
1702
1703 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1704 zerror(zlogp, B_TRUE, "unable to determine zone root");
1705 goto bad;
1706 }
1707
1708 if ((handle = zonecfg_init_handle()) == NULL) {
1709 zerror(zlogp, B_TRUE, "getting zone configuration handle");
1710 goto bad;
1711 }
1712 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
1713 zonecfg_setfsent(handle) != Z_OK) {
1714 zerror(zlogp, B_FALSE, "invalid configuration");
1715 goto bad;
1716 }
1717
1718 /*
1719 * If we are mounting the zone, then we must always use the default
1720 * brand global mounts.
1721 */
1722 if (ALT_MOUNT(mount_cmd)) {
1783 * we need <zoneroot>/lu/dev to be the /dev filesystem
1784 * for the zone and we don't want to have any /dev filesystem
1785 * mounted at <zoneroot>/lu/a/dev. Since /dev is specified
1786 * as a normal zone filesystem by default we'll try to mount
1787 * it at <zoneroot>/lu/a/dev, so we have to detect this
1788 * case and instead mount it at <zoneroot>/lu/dev.
1789 *
1790 * All this work is done in three phases:
1791 * 1) Create and populate lu directory (build_mounted_pre_var()).
1792 * 2) Mount the required filesystems as per the zone configuration.
1793 * 3) Set up the rest of the scratch zone environment
1794 * (build_mounted_post_var()).
1795 */
1796 if (ALT_MOUNT(mount_cmd) && !build_mounted_pre_var(zlogp,
1797 rootpath, sizeof (rootpath), zonepath, luroot, sizeof (luroot)))
1798 goto bad;
1799
1800 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
1801
1802 for (i = 0; i < num_fs; i++) {
1803 if (ALT_MOUNT(mount_cmd)) {
1804 if (strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
1805 size_t slen = strlen(rootpath) - 2;
1806
1807 /*
1808 * By default we'll try to mount /dev
1809 * as /a/dev but /dev is special and
1810 * always goes at the top so strip the
1811 * trailing '/a' from the rootpath.
1812 */
1813 assert(strcmp(&rootpath[slen], "/a") == 0);
1814 rootpath[slen] = '\0';
1815 if (mount_one(zlogp, &fs_ptr[i], rootpath,
1816 mount_cmd) != 0)
1817 goto bad;
1818 rootpath[slen] = '/';
1819 continue;
1820 } else if (strcmp(brand_name, default_brand) != 0) {
1821 /*
1822 * If mounting non-native brand, skip
1823 * mounting global mounts and
1824 * filesystem entries since they are
1825 * only needed for native pkg upgrade
1826 * tools.
1827 *
1828 * The only exception right now is
1829 * /dev (handled above), which is
1830 * needed in the luroot in order to
1831 * zlogin -S into the zone.
1832 */
1833 continue;
1834 }
1835 }
1836
1837 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
1838 goto bad;
1839 }
1840 if (ALT_MOUNT(mount_cmd) &&
1841 !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
1842 goto bad;
1843
1844 /*
1845 * For Trusted Extensions cross-mount each lower level /export/home
1846 */
1847 if (mount_cmd == Z_MNT_BOOT &&
1848 tsol_mounts(zlogp, zone_name, rootpath) != 0)
1849 goto bad;
1850
1851 free_fs_data(fs_ptr, num_fs);
1852
1853 /*
1854 * Everything looks fine.
1855 */
1856 return (0);
2893 }
2894 /* insert new after ptr */
2895 new->za_next = next;
2896 ptr->za_next = new;
2897 return (old);
2898 }
2899
2900 void
2901 free_ip_interface(zone_addr_list_t *zalist)
2902 {
2903 zone_addr_list_t *ptr, *new;
2904
2905 for (ptr = zalist; ptr != NULL; ) {
2906 new = ptr;
2907 ptr = ptr->za_next;
2908 free(new);
2909 }
2910 }
2911
2912 /*
2913 * For IP networking, we need to use the illumos-native device tree. For most
2914 * zones, this is $ZONEROOT/dev. For LX ones, it's $ZONEROOT/native/dev.
2915 * Return the appropriate post-$ZONEROOT path.
2916 */
2917 static char *
2918 get_brand_dev(void)
2919 {
2920 static char *lxpath = "/native/dev";
2921 /* Cheesy hard-coding of strlen("/native") */
2922 char *default_path = lxpath + 7;
2923
2924 /* LX zones are the exception... */
2925 if (strcmp(brand_name, "lx") == 0)
2926 return (lxpath);
2927
2928 return (default_path);
2929 }
2930
2931 /*
2932 * Add the kernel access control information for the interface names.
2933 * If anything goes wrong, we log a general error message, attempt to tear down
2934 * whatever we set up, and return an error.
2935 */
2936 static int
2937 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
2938 {
2939 zone_dochandle_t handle;
2940 struct zone_nwiftab nwiftab;
2941 char rootpath[MAXPATHLEN];
2942 char path[MAXPATHLEN];
2943 datalink_id_t linkid;
2944 di_prof_t prof = NULL;
2945 boolean_t added = B_FALSE;
2946 zone_addr_list_t *zalist = NULL, *new;
2947
2948 if ((handle = zonecfg_init_handle()) == NULL) {
2949 zerror(zlogp, B_TRUE, "getting zone configuration handle");
2950 return (-1);
2951 }
2957
2958 if (zonecfg_setnwifent(handle) != Z_OK) {
2959 zonecfg_fini_handle(handle);
2960 return (0);
2961 }
2962
2963 for (;;) {
2964 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2965 break;
2966
2967 if (prof == NULL) {
2968 if (zone_get_devroot(zone_name, rootpath,
2969 sizeof (rootpath)) != Z_OK) {
2970 (void) zonecfg_endnwifent(handle);
2971 zonecfg_fini_handle(handle);
2972 zerror(zlogp, B_TRUE,
2973 "unable to determine dev root");
2974 return (-1);
2975 }
2976 (void) snprintf(path, sizeof (path), "%s%s", rootpath,
2977 get_brand_dev());
2978 if (di_prof_init(path, &prof) != 0) {
2979 (void) zonecfg_endnwifent(handle);
2980 zonecfg_fini_handle(handle);
2981 zerror(zlogp, B_TRUE,
2982 "failed to initialize profile");
2983 return (-1);
2984 }
2985 }
2986
2987 /*
2988 * Create the /dev entry for backward compatibility.
2989 * Only create the /dev entry if it's not in use.
2990 * Note that the zone still boots when the assigned
2991 * interface is inaccessible, used by others, etc.
2992 * Also, when vanity naming is used, some interface do
2993 * do not have corresponding /dev node names (for example,
2994 * vanity named aggregations). The /dev entry is not
2995 * created in that case. The /dev/net entry is always
2996 * accessible.
2997 */
3341 break;
3342 case Z_PRIV_REQUIRED:
3343 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
3344 "from the zone's privilege set", privname);
3345 break;
3346 case Z_PRIV_UNKNOWN:
3347 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
3348 "in the zone's privilege set", privname);
3349 break;
3350 default:
3351 zerror(zlogp, B_FALSE, "failed to determine the zone's "
3352 "privilege set");
3353 break;
3354 }
3355
3356 free(privname);
3357 zonecfg_fini_handle(handle);
3358 return (error);
3359 }
3360
3361 static char *
3362 zone_proj_rctl(const char *name)
3363 {
3364 int i;
3365
3366 for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
3367 if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
3368 return (zone_proj_rctl_map[i].zpr_project_rctl);
3369 }
3370 }
3371 return (NULL);
3372 }
3373
3374 static int
3375 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3376 {
3377 nvlist_t *nvl = NULL;
3378 char *nvl_packed = NULL;
3379 size_t nvl_size = 0;
3380 nvlist_t **nvlv = NULL;
3381 int rctlcount = 0;
3382 int error = -1;
3383 zone_dochandle_t handle;
3384 struct zone_rctltab rctltab;
3385 rctlblk_t *rctlblk = NULL;
3386 uint64_t maxlwps;
3387 uint64_t maxprocs;
3388 int rproc, rlwp;
3389
3390 *bufp = NULL;
3391 *bufsizep = 0;
3392
3393 if ((handle = zonecfg_init_handle()) == NULL) {
3394 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3395 return (-1);
3396 }
3397 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3398 zerror(zlogp, B_FALSE, "invalid configuration");
3399 zonecfg_fini_handle(handle);
3400 return (-1);
3401 }
3402
3403 rctltab.zone_rctl_valptr = NULL;
3404 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
3405 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
3406 goto out;
3407 }
3408
3409 /*
3410 * Allow the administrator to control both the maximum number of
3411 * process table slots, and the maximum number of lwps, with a single
3412 * max-processes or max-lwps property. If only the max-processes
3413 * property is set, we add a max-lwps property with a limit derived
3414 * from max-processes. If only the max-lwps property is set, we add a
3415 * max-processes property with the same limit as max-lwps.
3416 */
3417 rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
3418 rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
3419 if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
3420 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
3421 maxprocs * LWPS_PER_PROCESS) != Z_OK) {
3422 zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
3423 goto out;
3424 }
3425 } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
3426 /* no scaling for max-proc value */
3427 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
3428 maxlwps) != Z_OK) {
3429 zerror(zlogp, B_FALSE,
3430 "unable to set max-processes alias");
3431 goto out;
3432 }
3433 }
3434
3435 if (zonecfg_setrctlent(handle) != Z_OK) {
3436 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
3437 goto out;
3438 }
3439
3440 if ((rctlblk = malloc(rctlblk_size())) == NULL) {
3441 zerror(zlogp, B_TRUE, "memory allocation failed");
3442 goto out;
3443 }
3444 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
3445 struct zone_rctlvaltab *rctlval;
3446 uint_t i, count;
3447 const char *name = rctltab.zone_rctl_name;
3448 char *proj_nm;
3449
3450 /* zoneadm should have already warned about unknown rctls. */
3451 if (!zonecfg_is_rctl(name)) {
3452 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3453 rctltab.zone_rctl_valptr = NULL;
3454 continue;
3455 }
3456 count = 0;
3457 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3458 rctlval = rctlval->zone_rctlval_next) {
3459 count++;
3460 }
3461 if (count == 0) { /* ignore */
3462 continue; /* Nothing to free */
3463 }
3464 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
3465 goto out;
3466 i = 0;
3467 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3468 rctlval = rctlval->zone_rctlval_next, i++) {
3495 zerror(zlogp, B_FALSE, "%s failed",
3496 "nvlist_add_uint64");
3497 goto out;
3498 }
3499 if (nvlist_add_uint64(nvlv[i], "limit",
3500 rctlblk_get_value(rctlblk)) != 0) {
3501 zerror(zlogp, B_FALSE, "%s failed",
3502 "nvlist_add_uint64");
3503 goto out;
3504 }
3505 if (nvlist_add_uint64(nvlv[i], "action",
3506 (uint_t)rctlblk_get_local_action(rctlblk, NULL))
3507 != 0) {
3508 zerror(zlogp, B_FALSE, "%s failed",
3509 "nvlist_add_uint64");
3510 goto out;
3511 }
3512 }
3513 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3514 rctltab.zone_rctl_valptr = NULL;
3515
3516 /*
3517 * With no action on our part we will start zsched with the
3518 * project rctl values for our (zoneadmd) current project. For
3519 * brands running a variant of Illumos, that's not a problem
3520 * since they will setup their own projects, but for a
3521 * non-native brand like lx, where there are no projects, we
3522 * want to start things up with the same project rctls as the
3523 * corresponding zone rctls, since nothing within the zone will
3524 * ever change the project rctls.
3525 */
3526 if ((proj_nm = zone_proj_rctl(name)) != NULL) {
3527 if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
3528 != 0) {
3529 zerror(zlogp, B_FALSE,
3530 "nvlist_add_nvlist_arrays failed");
3531 goto out;
3532 }
3533 }
3534
3535 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
3536 != 0) {
3537 zerror(zlogp, B_FALSE, "%s failed",
3538 "nvlist_add_nvlist_array");
3539 goto out;
3540 }
3541 for (i = 0; i < count; i++)
3542 nvlist_free(nvlv[i]);
3543 free(nvlv);
3544 nvlv = NULL;
3545 rctlcount++;
3546 }
3547 (void) zonecfg_endrctlent(handle);
3548
3549 if (rctlcount == 0) {
3550 error = 0;
3551 goto out;
3552 }
3553 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
3554 != 0) {
3776
3777 if (buf1.f_fsid == buf2.f_fsid)
3778 return (B_FALSE);
3779
3780 return (B_TRUE);
3781 }
3782
3783 /*
3784 * Verify the MAC label in the root dataset for the zone.
3785 * If the label exists, it must match the label configured for the zone.
3786 * Otherwise if there's no label on the dataset, create one here.
3787 */
3788
3789 static int
3790 validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl)
3791 {
3792 int error = -1;
3793 zfs_handle_t *zhp;
3794 libzfs_handle_t *hdl;
3795 m_label_t ds_sl;
3796 char ds_hexsl[MAXNAMELEN];
3797
3798 if (!is_system_labeled())
3799 return (0);
3800
3801 if (!is_zonepath_zfs(zonepath))
3802 return (0);
3803
3804 if ((hdl = libzfs_init()) == NULL) {
3805 zerror(zlogp, B_FALSE, "opening ZFS library");
3806 return (-1);
3807 }
3808
3809 if ((zhp = zfs_path_to_zhandle(hdl, rootpath,
3810 ZFS_TYPE_FILESYSTEM)) == NULL) {
3811 zerror(zlogp, B_FALSE, "cannot open ZFS dataset for path '%s'",
3812 rootpath);
3813 libzfs_fini(hdl);
3814 return (-1);
3815 }
3816
3817 /* Get the mlslabel property if it exists. */
3818 if ((zfs_prop_get(zhp, ZFS_PROP_MLSLABEL, ds_hexsl, MAXNAMELEN,
3819 NULL, NULL, 0, B_TRUE) != 0) ||
3820 (strcmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)) {
4451 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
4452 return (B_TRUE);
4453 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
4454 if (mnp->mnt_fstype == NULL ||
4455 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
4456 continue;
4457 /* We're looking at a loopback mount. Stat it. */
4458 if (mnp->mnt_special != NULL &&
4459 stat64(mnp->mnt_special, &zst) != -1 &&
4460 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
4461 zerror(zlogp, B_FALSE,
4462 "zone root %s is reachable through %s",
4463 rootpath, mnp->mnt_mountp);
4464 return (B_TRUE);
4465 }
4466 }
4467 return (B_FALSE);
4468 }
4469
4470 /*
4471 * Set pool info for the zone's resource management configuration.
4472 */
4473 static int
4474 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
4475 {
4476 int res;
4477 uint64_t tmp;
4478 char sched[MAXNAMELEN];
4479 zone_dochandle_t handle = NULL;
4480 char pool_err[128];
4481
4482 if ((handle = zonecfg_init_handle()) == NULL) {
4483 zerror(zlogp, B_TRUE, "getting zone configuration handle");
4484 return (Z_BAD_HANDLE);
4485 }
4486
4487 if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
4488 zerror(zlogp, B_FALSE, "invalid configuration");
4489 zonecfg_fini_handle(handle);
4490 return (res);
4491 }
4492
4493 /* Get the scheduling class set in the zone configuration. */
4494 if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
4495 strlen(sched) > 0) {
4496 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
4497 strlen(sched)) == -1)
4498 zerror(zlogp, B_TRUE, "WARNING: unable to set the "
4499 "default scheduling class");
4500
4501 if (strcmp(sched, "FX") == 0) {
4502 /*
4503 * When FX is specified then by default all processes
4504 * will start at the lowest priority level (0) and
4505 * stay there. We support an optional attr which
4506 * indicates that all the processes should be "high
4507 * priority". We set this on the zone so that starting
4508 * init will set the priority high.
4509 */
4510 struct zone_attrtab a;
4511
4512 bzero(&a, sizeof (a));
4513 (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
4514 sizeof (a.zone_attr_name));
4515
4516 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
4517 strcmp(a.zone_attr_value, "true") == 0) {
4518 boolean_t hi = B_TRUE;
4519
4520 if (zone_setattr(zoneid,
4521 ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
4522 sizeof (hi)) == -1)
4523 zerror(zlogp, B_TRUE, "WARNING: unable "
4524 "to set high priority");
4525 }
4526 }
4527
4528 } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
4529 == Z_OK) {
4530 /*
4531 * If the zone has the zone.cpu-shares rctl set then we want to
4532 * use the Fair Share Scheduler (FSS) for processes in the
4533 * zone. Check what scheduling class the zone would be running
4534 * in by default so we can print a warning and modify the class
4535 * if we wouldn't be using FSS.
4536 */
4537 char class_name[PC_CLNMSZ];
4538
4539 if (zonecfg_get_dflt_sched_class(handle, class_name,
4540 sizeof (class_name)) != Z_OK) {
4541 zerror(zlogp, B_FALSE, "WARNING: unable to determine "
4542 "the zone's scheduling class");
4543
4544 } else if (strcmp("FSS", class_name) != 0) {
4545 zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
4546 "rctl is set but\nFSS is not the default "
4547 "scheduling class for\nthis zone. FSS will be "
4548 "used for processes\nin the zone but to get the "
5055 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
5056 ZONE_INDEX_MODE);
5057 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
5058 (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
5059 if (uuid_is_null(zep->zone_uuid))
5060 uuidstr[0] = '\0';
5061 else
5062 uuid_unparse(zep->zone_uuid, uuidstr);
5063 (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
5064 zone_state_str(zep->zone_state),
5065 uuidstr);
5066 (void) fclose(zet);
5067 }
5068 }
5069 _exit(0);
5070 }
5071
5072 int
5073 vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
5074 {
5075 char zpath[MAXPATHLEN];
5076
5077 if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
5078 lofs_discard_mnttab();
5079 return (-1);
5080 }
5081
5082 /*
5083 * Before we try to mount filesystems we need to create the
5084 * attribute backing store for /dev
5085 */
5086 (void) strlcpy(zpath, zonepath, sizeof (zpath));
5087 resolve_lofs(zlogp, zpath, sizeof (zpath));
5088
5089 /* Make /dev directory owned by root, grouped sys */
5090 if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) {
5091 lofs_discard_mnttab();
5092 return (-1);
5093 }
5094
5095 if (mount_filesystems(zlogp, mount_cmd) != 0) {
5096 lofs_discard_mnttab();
5097 return (-1);
5098 }
5099
5100 if (mount_cmd == Z_MNT_BOOT) {
5101 zone_iptype_t iptype;
5102
5103 if (vplat_get_iptype(zlogp, &iptype) < 0) {
5104 zerror(zlogp, B_TRUE, "unable to determine ip-type");
5105 lofs_discard_mnttab();
5106 return (-1);
5107 }
5108
5109 switch (iptype) {
5110 case ZS_SHARED:
5186 if (zonecfg_lock_scratch(fp) != 0)
5187 zerror(zlogp, B_TRUE, "cannot lock mapfile");
5188 else if (zonecfg_delete_scratch(fp, kernzone) != 0)
5189 zerror(zlogp, B_TRUE, "cannot delete map entry");
5190 else
5191 retv = 0;
5192 zonecfg_close_scratch(fp);
5193 return (retv);
5194 } else {
5195 return (0);
5196 }
5197 }
5198
5199 int
5200 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
5201 {
5202 char *kzone;
5203 zoneid_t zoneid;
5204 int res;
5205 char pool_err[128];
5206 char cmdbuf[MAXPATHLEN];
5207 brand_handle_t bh = NULL;
5208 dladm_status_t status;
5209 char errmsg[DLADM_STRSIZE];
5210 ushort_t flags;
5211
5212 kzone = zone_name;
5213 if (zonecfg_in_alt_root()) {
5214 FILE *fp;
5215
5216 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5217 zerror(zlogp, B_TRUE, "unable to open map file");
5218 goto error;
5219 }
5220 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
5221 kernzone, sizeof (kernzone)) != 0) {
5222 zerror(zlogp, B_FALSE, "unable to find scratch zone");
5223 zonecfg_close_scratch(fp);
5224 goto error;
5225 }
5241 }
5242
5243 if (remove_datalink_protect(zlogp, zoneid) != 0) {
5244 zerror(zlogp, B_FALSE,
5245 "unable clear datalink protect property");
5246 goto error;
5247 }
5248
5249 /*
5250 * The datalinks assigned to the zone will be removed from the NGZ as
5251 * part of zone_shutdown() so that we need to remove protect/pool etc.
5252 * before zone_shutdown(). Even if the shutdown itself fails, the zone
5253 * will not be able to violate any constraints applied because the
5254 * datalinks are no longer available to the zone.
5255 */
5256 if (zone_shutdown(zoneid) != 0) {
5257 zerror(zlogp, B_TRUE, "unable to shutdown zone");
5258 goto error;
5259 }
5260
5261 /* Get a handle to the brand info for this zone */
5262 if ((bh = brand_open(brand_name)) == NULL) {
5263 zerror(zlogp, B_FALSE, "unable to determine zone brand");
5264 return (-1);
5265 }
5266 /*
5267 * If there is a brand 'halt' callback, execute it now to give the
5268 * brand a chance to cleanup any custom configuration.
5269 */
5270 (void) strcpy(cmdbuf, EXEC_PREFIX);
5271 if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
5272 sizeof (cmdbuf) - EXEC_LEN) < 0) {
5273 brand_close(bh);
5274 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
5275 "halt callback.");
5276 goto error;
5277 }
5278 brand_close(bh);
5279
5280 if ((strlen(cmdbuf) > EXEC_LEN) &&
5281 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
5282 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
5283 goto error;
5284 }
5285
5286 if (!unmount_cmd) {
5287 zone_iptype_t iptype;
5288
5289 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags,
5290 sizeof (flags)) < 0) {
5291 if (vplat_get_iptype(zlogp, &iptype) < 0) {
|