Print this page




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2016, Joyent Inc.
  25  * Copyright (c) 2015 by Delphix. All rights reserved.

  26  */
  27 
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  */
  31 
  32 /*
  33  * This module contains functions used to bring up and tear down the
  34  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
  35  * interfaces, [un]configuring devices, establishing resource controls,
  36  * and creating/destroying the zone in the kernel.  These actions, on
  37  * the way up, ready the zone; on the way down, they halt the zone.
  38  * See the much longer block comment at the beginning of zoneadmd.c
  39  * for a bigger picture of how the whole program functions.
  40  *
  41  * This module also has primary responsibility for the layout of "scratch
  42  * zones."  These are mounted, but inactive, zones that are used during
  43  * operating system upgrade and potentially other administrative action.  The
  44  * scratch zone environment is similar to the miniroot environment.  The zone's
  45  * actual root is mounted read-write on /a, and the standard paths (/usr,


 119 #include <libbrand.h>
 120 #include <sys/brand.h>
 121 #include <libzonecfg.h>
 122 #include <synch.h>
 123 
 124 #include "zoneadmd.h"
 125 #include <tsol/label.h>
 126 #include <libtsnet.h>
 127 #include <sys/priv.h>
 128 #include <libinetutil.h>
 129 
 130 #define V4_ADDR_LEN     32
 131 #define V6_ADDR_LEN     128
 132 
 133 #define RESOURCE_DEFAULT_OPTS \
 134         MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
 135 
 136 #define DFSTYPES        "/etc/dfs/fstypes"
 137 #define MAXTNZLEN       2048
 138 
 139 /* Number of times to retry unmounting if it fails */
 140 #define UMOUNT_RETRIES  30
 141 
 142 /* a reasonable estimate for the number of lwps per process */
 143 #define LWPS_PER_PROCESS        10
 144 
 145 /* for routing socket */
 146 static int rts_seqno = 0;
 147 
 148 /* mangled zone name when mounting in an alternate root environment */
 149 static char kernzone[ZONENAME_MAX];
 150 
 151 /* array of cached mount entries for resolve_lofs */
 152 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
 153 
 154 /* for Trusted Extensions */
 155 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
 156 static int tsol_mounts(zlog_t *, char *, char *);
 157 static void tsol_unmounts(zlog_t *, char *);
 158 
 159 static m_label_t *zlabel = NULL;
 160 static m_label_t *zid_label = NULL;
 161 static priv_set_t *zprivs = NULL;


 163 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
 164 
 165 typedef struct zone_proj_rctl_map {
 166         char *zpr_zone_rctl;
 167         char *zpr_project_rctl;
 168 } zone_proj_rctl_map_t;
 169 
 170 static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
 171         {"zone.max-msg-ids",    "project.max-msg-ids"},
 172         {"zone.max-sem-ids",    "project.max-sem-ids"},
 173         {"zone.max-shm-ids",    "project.max-shm-ids"},
 174         {"zone.max-shm-memory", "project.max-shm-memory"},
 175         {NULL,                  NULL}
 176 };
 177 
 178 /* from libsocket, not in any header file */
 179 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
 180 
 181 /* from zoneadmd */
 182 extern char query_hook[];
 183 extern char post_statechg_hook[];
 184 
 185 /*
 186  * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
 187  * node in a linked list that is sorted by linkid.  The list is constructed as
 188  * the xml configuration file is parsed, and the information
 189  * contained in each node is added to the kernel before the zone is
 190  * booted, to be retrieved and applied from within the exclusive-IP NGZ
 191  * on boot.
 192  */
 193 typedef struct zone_addr_list {
 194         struct zone_addr_list *za_next;
 195         datalink_id_t za_linkid;        /* datalink_id_t of interface */
 196         struct zone_nwiftab za_nwiftab; /* address, defrouter properties */
 197 } zone_addr_list_t;
 198 
 199 /*
 200  * An optimization for build_mnttable: reallocate (and potentially copy the
 201  * data) only once every N times through the loop.
 202  */
 203 #define MNTTAB_HUNK     32
 204 
 205 /* some handy macros */
 206 #define SIN(s)  ((struct sockaddr_in *)s)
 207 #define SIN6(s) ((struct sockaddr_in6 *)s)
 208 
 209 /*
 210  * Private autofs system call
 211  */
 212 extern int _autofssys(int, void *);
 213 
 214 static int
 215 autofs_cleanup(zoneid_t zoneid)
 216 {
 217         /*
 218          * Ask autofs to unmount all trigger nodes in the given zone.
 219          */
 220         return (_autofssys(AUTOFS_UNMOUNTALL, (void *)((uintptr_t)zoneid)));
 221 }
 222 
 223 static void
 224 free_mnttable(struct mnttab *mnt_array, uint_t nelem)
 225 {
 226         uint_t i;
 227 
 228         if (mnt_array == NULL)
 229                 return;
 230         for (i = 0; i < nelem; i++) {
 231                 free(mnt_array[i].mnt_mountp);
 232                 free(mnt_array[i].mnt_fstype);
 233                 free(mnt_array[i].mnt_special);
 234                 free(mnt_array[i].mnt_mntopts);
 235                 assert(mnt_array[i].mnt_time == NULL);
 236         }
 237         free(mnt_array);
 238 }
 239 
 240 /*


 591         for (i = 0; remote_fstypes[i] != NULL; i++) {
 592                 if (strcmp(remote_fstypes[i], fstype) == 0)
 593                         return (B_TRUE);
 594         }
 595         return (B_FALSE);
 596 }
 597 
 598 /*
 599  * This converts a zone root path (normally of the form .../root) to a Live
 600  * Upgrade scratch zone root (of the form .../lu).
 601  */
 602 static void
 603 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
 604 {
 605         if (!isresolved && zonecfg_in_alt_root())
 606                 resolve_lofs(zlogp, zroot, zrootlen);
 607         (void) strcpy(strrchr(zroot, '/') + 1, "lu");
 608 }
 609 
 610 /*
 611  * Perform brand-specific cleanup if we are unable to unmount a FS.
 612  */
 613 static void
 614 brand_umount_cleanup(zlog_t *zlogp, char *path)
 615 {
 616         char cmdbuf[2 * MAXPATHLEN];
 617 
 618         if (post_statechg_hook[0] == '\0')
 619                 return;
 620 
 621         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
 622             ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf))
 623                 return;
 624 
 625         (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE);
 626 }
 627 
 628 /*
 629  * The general strategy for unmounting filesystems is as follows:
 630  *
 631  * - Remote filesystems may be dead, and attempting to contact them as
 632  * part of a regular unmount may hang forever; we want to always try to
 633  * forcibly unmount such filesystems and only fall back to regular
 634  * unmounts if the filesystem doesn't support forced unmounts.
 635  *
 636  * - We don't want to unnecessarily corrupt metadata on local
 637  * filesystems (ie UFS), so we want to start off with graceful unmounts,
 638  * and only escalate to doing forced unmounts if we get stuck.
 639  *
 640  * We start off walking backwards through the mount table.  This doesn't
 641  * give us strict ordering but ensures that we try to unmount submounts
 642  * first.  We thus limit the number of failed umount2(2) calls.
 643  *
 644  * The mechanism for determining if we're stuck is to count the number
 645  * of failed unmounts each iteration through the mount table.  This
 646  * gives us an upper bound on the number of filesystems which remain
 647  * mounted (autofs trigger nodes are dealt with separately).  If at the
 648  * end of one unmount+autofs_cleanup cycle we still have the same number
 649  * of mounts that we started out with, we're stuck and try a forced
 650  * unmount.  If that fails (filesystem doesn't support forced unmounts)
 651  * then we bail and are unable to teardown the zone.  If it succeeds,
 652  * we're no longer stuck so we continue with our policy of trying
 653  * graceful mounts first.
 654  *
 655  * Zone must be down (ie, no processes or threads active).
 656  */
 657 static int
 658 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
 659 {
 660         int error = 0;
 661         int fail = 0;
 662         FILE *mnttab;
 663         struct mnttab *mnts;
 664         uint_t nmnt;
 665         char zroot[MAXPATHLEN + 1];
 666         size_t zrootlen;
 667         uint_t oldcount = UINT_MAX;
 668         boolean_t stuck = B_FALSE;
 669         char **remote_fstypes = NULL;
 670 
 671         if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
 672                 zerror(zlogp, B_FALSE, "unable to determine zone root");
 673                 return (-1);
 674         }
 675         if (unmount_cmd)
 676                 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
 677 
 678         (void) strcat(zroot, "/");
 679         zrootlen = strlen(zroot);
 680 
 681         /*


 729                         path = mnp->mnt_mountp;
 730                         unmounted = B_FALSE;
 731                         /*
 732                          * Try forced unmount first for remote filesystems.
 733                          *
 734                          * Not all remote filesystems support forced unmounts,
 735                          * so if this fails (ENOTSUP) we'll continue on
 736                          * and try a regular unmount.
 737                          */
 738                         if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
 739                                 if (umount2(path, MS_FORCE) == 0)
 740                                         unmounted = B_TRUE;
 741                         }
 742                         /*
 743                          * Try forced unmount if we're stuck.
 744                          */
 745                         if (stuck) {
 746                                 if (umount2(path, MS_FORCE) == 0) {
 747                                         unmounted = B_TRUE;
 748                                         stuck = B_FALSE;
 749                                         fail = 0;
 750                                 } else {
 751                                         /*
 752                                          * We may hit a failure here if there
 753                                          * is an app in the GZ with an open
 754                                          * pipe into the zone (commonly into
 755                                          * the zone's /var/run).  This type
 756                                          * of app will notice the closed
 757                                          * connection and cleanup, but it may
 758                                          * take a while and we have no easy
 759                                          * way to notice that.  To deal with
 760                                          * this case, we will wait and retry
 761                                          * a few times before we give up.
 762                                          */
 763                                         fail++;
 764                                         if (fail < (UMOUNT_RETRIES - 1)) {
 765                                                 zerror(zlogp, B_FALSE,
 766                                                     "unable to unmount '%s', "
 767                                                     "retrying in 2 seconds",
 768                                                     path);
 769                                                 (void) sleep(2);
 770                                         } else if (fail > UMOUNT_RETRIES) {
 771                                                 error++;
 772                                                 zerror(zlogp, B_FALSE,
 773                                                     "unmount of '%s' failed",
 774                                                     path);
 775                                                 free_mnttable(mnts, nmnt);
 776                                                 goto out;
 777                                         } else {
 778                                                 /* Try the hook 2 times */
 779                                                 brand_umount_cleanup(zlogp,
 780                                                     path);
 781                                         }
 782                                 }
 783                         }
 784                         /*
 785                          * Try regular unmounts for everything else.
 786                          */
 787                         if (!unmounted && umount2(path, 0) != 0)
 788                                 newcount++;
 789                 }
 790                 free_mnttable(mnts, nmnt);
 791 
 792                 if (newcount == 0)
 793                         break;
 794                 if (newcount >= oldcount) {
 795                         /*
 796                          * Last round didn't unmount anything; we're stuck and
 797                          * should start trying forced unmounts.
 798                          */
 799                         stuck = B_TRUE;
 800                 }
 801                 oldcount = newcount;
 802 
 803                 /*


1099 mount_one_dev_device_cb(void *arg, const char *match, const char *name)
1100 {
1101         di_prof_t prof = arg;
1102 
1103         if (name == NULL)
1104                 return (di_prof_add_dev(prof, match));
1105         return (di_prof_add_map(prof, match, name));
1106 }
1107 
1108 static int
1109 mount_one_dev_symlink_cb(void *arg, const char *source, const char *target)
1110 {
1111         di_prof_t prof = arg;
1112 
1113         return (di_prof_add_symlink(prof, source, target));
1114 }
1115 
1116 int
1117 vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep)
1118 {
1119         if (zonecfg_get_iptype(snap_hndl, iptypep) != Z_OK) {











1120                 zerror(zlogp, B_FALSE, "invalid ip-type configuration");

1121                 return (-1);
1122         }

1123         return (0);
1124 }
1125 
1126 /*
1127  * Apply the standard lists of devices/symlinks/mappings and the user-specified
1128  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
1129  * use these as a profile/filter to determine what exists in /dev.
1130  */
1131 static int
1132 mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
1133 {
1134         char                    brand[MAXNAMELEN];

1135         brand_handle_t          bh = NULL;
1136         struct zone_devtab      ztab;
1137         di_prof_t               prof = NULL;
1138         int                     err;
1139         int                     retval = -1;
1140         zone_iptype_t           iptype;
1141         const char              *curr_iptype = NULL;
1142 
1143         if (di_prof_init(devpath, &prof)) {
1144                 zerror(zlogp, B_TRUE, "failed to initialize profile");
1145                 goto cleanup;
1146         }
1147 
1148         /*
1149          * Get a handle to the brand info for this zone.
1150          * If we are mounting the zone, then we must always use the default
1151          * brand device mounts.
1152          */
1153         if (ALT_MOUNT(mount_cmd)) {
1154                 (void) strlcpy(brand, default_brand, sizeof (brand));
1155         } else {
1156                 (void) strlcpy(brand, brand_name, sizeof (brand));
1157         }
1158 
1159         if ((bh = brand_open(brand)) == NULL) {
1160                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1161                 goto cleanup;
1162         }
1163 
1164         if (vplat_get_iptype(zlogp, &iptype) < 0) {
1165                 zerror(zlogp, B_TRUE, "unable to determine ip-type");
1166                 goto cleanup;
1167         }
1168         switch (iptype) {
1169         case ZS_SHARED:
1170                 curr_iptype = "shared";
1171                 break;
1172         case ZS_EXCLUSIVE:
1173                 curr_iptype = "exclusive";
1174                 break;
1175         }
1176         if (curr_iptype == NULL)
1177                 abort();
1178 
1179         if (brand_platform_iter_devices(bh, zone_name,
1180             mount_one_dev_device_cb, prof, curr_iptype) != 0) {
1181                 zerror(zlogp, B_TRUE, "failed to add standard device");
1182                 goto cleanup;
1183         }
1184 
1185         if (brand_platform_iter_link(bh,
1186             mount_one_dev_symlink_cb, prof) != 0) {
1187                 zerror(zlogp, B_TRUE, "failed to add standard symlink");
1188                 goto cleanup;
1189         }
1190 
1191         /* Add user-specified devices and directories */
1192         if ((err = zonecfg_setdevent(snap_hndl)) != 0) {









1193                 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
1194                     zonecfg_strerror(err));
1195                 goto cleanup;
1196         }
1197         while (zonecfg_getdevent(snap_hndl, &ztab) == Z_OK) {
1198                 if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
1199                         zerror(zlogp, B_TRUE, "failed to add "
1200                             "user-specified device");
1201                         goto cleanup;
1202                 }
1203         }
1204         (void) zonecfg_enddevent(snap_hndl);
1205 
1206         /* Send profile to kernel */
1207         if (di_prof_commit(prof)) {
1208                 zerror(zlogp, B_TRUE, "failed to commit profile");
1209                 goto cleanup;
1210         }
1211 
1212         retval = 0;
1213 
1214 cleanup:
1215         if (bh != NULL)
1216                 brand_close(bh);


1217         if (prof)
1218                 di_prof_fini(prof);
1219         return (retval);
1220 }
1221 
1222 static int
1223 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath,
1224     zone_mnt_t mount_cmd)
1225 {
1226         char path[MAXPATHLEN];
1227         char optstr[MAX_MNTOPT_STR];
1228         zone_fsopt_t *optptr;
1229         int rv;
1230 
1231         if ((rv = valid_mount_path(zlogp, rootpath, fsptr->zone_fs_special,
1232             fsptr->zone_fs_dir, fsptr->zone_fs_type)) < 0) {
1233                 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
1234                     rootpath, fsptr->zone_fs_dir);
1235                 return (-1);
1236         } else if (rv > 0) {


1690                 fsp->zone_fs_special[0] = '\0';
1691                 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
1692                         (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
1693                             sizeof (fsp->zone_fs_special));
1694                 }
1695                 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
1696                     sizeof (fsp->zone_fs_special));
1697         }
1698         (void) zonecfg_endfsent(handle);
1699         return (0);
1700 }
1701 
1702 static int
1703 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
1704 {
1705         char rootpath[MAXPATHLEN];
1706         char brand[MAXNAMELEN];
1707         char luroot[MAXPATHLEN];
1708         int i, num_fs = 0;
1709         struct zone_fstab *fs_ptr = NULL;

1710         zone_state_t zstate;
1711         brand_handle_t bh;
1712         plat_gmount_cb_data_t cb;
1713 
1714         if (zone_get_state(zone_name, &zstate) != Z_OK ||
1715             (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1716                 zerror(zlogp, B_FALSE,
1717                     "zone must be in '%s' or '%s' state to mount file-systems",
1718                     zone_state_str(ZONE_STATE_READY),
1719                     zone_state_str(ZONE_STATE_MOUNTED));
1720                 goto bad;
1721         }
1722 
1723         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1724                 zerror(zlogp, B_TRUE, "unable to determine zone root");
1725                 goto bad;
1726         }
1727 
1728         if (zonecfg_setfsent(snap_hndl) != Z_OK) {





1729                 zerror(zlogp, B_FALSE, "invalid configuration");
1730                 goto bad;
1731         }
1732 
1733         /*
1734          * If we are mounting the zone, then we must always use the default
1735          * brand global mounts.
1736          */
1737         if (ALT_MOUNT(mount_cmd)) {
1738                 (void) strlcpy(brand, default_brand, sizeof (brand));
1739         } else {
1740                 (void) strlcpy(brand, brand_name, sizeof (brand));
1741         }
1742 
1743         /* Get a handle to the brand info for this zone */
1744         if ((bh = brand_open(brand)) == NULL) {
1745                 zerror(zlogp, B_FALSE, "unable to determine zone brand");

1746                 return (-1);
1747         }
1748 
1749         /*
1750          * Get the list of global filesystems to mount from the brand
1751          * configuration.
1752          */
1753         cb.pgcd_zlogp = zlogp;
1754         cb.pgcd_fs_tab = &fs_ptr;
1755         cb.pgcd_num_fs = &num_fs;
1756         if (brand_platform_iter_gmounts(bh, zone_name, zonepath,
1757             plat_gmount_cb, &cb) != 0) {
1758                 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1759                 brand_close(bh);

1760                 return (-1);
1761         }
1762         brand_close(bh);
1763 
1764         /*
1765          * Iterate through the rest of the filesystems. Sort them all,
1766          * then mount them in sorted order. This is to make sure the
1767          * higher level directories (e.g., /usr) get mounted before
1768          * any beneath them (e.g., /usr/local).
1769          */
1770         if (mount_filesystems_fsent(snap_hndl, zlogp, &fs_ptr, &num_fs,
1771             mount_cmd) != 0)
1772                 goto bad;
1773 



1774         /*
1775          * Normally when we mount a zone all the zone filesystems
1776          * get mounted relative to rootpath, which is usually
1777          * <zonepath>/root.  But when mounting a zone for administration
1778          * purposes via the zone "mount" state, build_mounted_pre_var()
1779          * updates rootpath to be <zonepath>/lu/a so we'll mount all
1780          * the zones filesystems there instead.
1781          *
1782          * build_mounted_pre_var() and build_mounted_post_var() will
1783          * also do some extra work to create directories and lofs mount
1784          * a bunch of global zone file system paths into <zonepath>/lu.
1785          *
1786          * This allows us to be able to enter the zone (now rooted at
1787          * <zonepath>/lu) and run the upgrade/patch tools that are in the
1788          * global zone and have them upgrade the to-be-modified zone's
1789          * files mounted on /a.  (Which mirrors the existing standard
1790          * upgrade environment.)
1791          *
1792          * There is of course one catch.  When doing the upgrade
1793          * we need <zoneroot>/lu/dev to be the /dev filesystem


1849         }
1850         if (ALT_MOUNT(mount_cmd) &&
1851             !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
1852                 goto bad;
1853 
1854         /*
1855          * For Trusted Extensions cross-mount each lower level /export/home
1856          */
1857         if (mount_cmd == Z_MNT_BOOT &&
1858             tsol_mounts(zlogp, zone_name, rootpath) != 0)
1859                 goto bad;
1860 
1861         free_fs_data(fs_ptr, num_fs);
1862 
1863         /*
1864          * Everything looks fine.
1865          */
1866         return (0);
1867 
1868 bad:


1869         free_fs_data(fs_ptr, num_fs);
1870         return (-1);
1871 }
1872 
1873 /* caller makes sure neither parameter is NULL */
1874 static int
1875 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
1876 {
1877         int prefixlen;
1878 
1879         prefixlen = atoi(prefixstr);
1880         if (prefixlen < 0 || prefixlen > maxprefixlen)
1881                 return (1);
1882         while (prefixlen > 0) {
1883                 if (prefixlen >= 8) {
1884                         *maskstr++ = 0xFF;
1885                         prefixlen -= 8;
1886                         continue;
1887                 }
1888                 *maskstr |= 1 << (8 - prefixlen);


2204                 zerror(zlogp, B_TRUE, "could not get socket");
2205                 return (-1);
2206         }
2207 
2208         /*
2209          * This is a similar kind of "hack" like in addif() to get around
2210          * the problem of SIOCLIFADDIF.  The problem is that this ioctl
2211          * does not include the netmask when adding a logical interface.
2212          * To get around this problem, we first add the logical interface
2213          * with a 0 address.  After that, we set the netmask if provided.
2214          * Finally we set the interface address.
2215          */
2216         laddr = lifr.lifr_addr;
2217         (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
2218             sizeof (lifr.lifr_name));
2219         (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr));
2220 
2221         if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
2222                 /*
2223                  * Here, we know that the interface can't be brought up.



2224                  */



2225                 (void) close(s);
2226                 return (Z_OK);
2227         }
2228 
2229         /* Preserve literal IPv4 address for later potential printing. */
2230         if (af == AF_INET)
2231                 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
2232 
2233         lifr.lifr_zoneid = zone_id;
2234         if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
2235                 zerror(zlogp, B_TRUE, "%s: could not place network interface "
2236                     "into zone", lifr.lifr_name);
2237                 goto bad;
2238         }
2239 
2240         /*
2241          * Loopback interface will use the default netmask assigned, if no
2242          * netmask is found.
2243          */
2244         if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {


2417 
2418         (void) close(s);
2419         return (Z_OK);
2420 bad:
2421         (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
2422         (void) close(s);
2423         return (-1);
2424 }
2425 
2426 /*
2427  * Sets up network interfaces based on information from the zone configuration.
2428  * IPv4 and IPv6 loopback interfaces are set up "for free", modeling the global
2429  * system.
2430  *
2431  * If anything goes wrong, we log a general error message, attempt to tear down
2432  * whatever we set up, and return an error.
2433  */
2434 static int
2435 configure_shared_network_interfaces(zlog_t *zlogp)
2436 {

2437         struct zone_nwiftab nwiftab, loopback_iftab;
2438         zoneid_t zoneid;
2439 
2440         if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
2441                 zerror(zlogp, B_TRUE, "unable to get zoneid");
2442                 return (-1);
2443         }
2444 
2445         if (zonecfg_setnwifent(snap_hndl) == Z_OK) {









2446                 for (;;) {
2447                         if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
2448                                 break;
2449                         nwifent_free_attrs(&nwiftab);
2450                         if (configure_one_interface(zlogp, zoneid, &nwiftab) !=
2451                             Z_OK) {
2452                                 (void) zonecfg_endnwifent(snap_hndl);

2453                                 return (-1);
2454                         }
2455                 }
2456                 (void) zonecfg_endnwifent(snap_hndl);
2457         }

2458         if (is_system_labeled()) {
2459                 /*
2460                  * Labeled zones share the loopback interface
2461                  * so it is not plumbed for shared stack instances.
2462                  */
2463                 return (0);
2464         }
2465         (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
2466             sizeof (loopback_iftab.zone_nwif_physical));
2467         (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
2468             sizeof (loopback_iftab.zone_nwif_address));
2469         loopback_iftab.zone_nwif_defrouter[0] = '\0';
2470         if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK)
2471                 return (-1);
2472 
2473         /* Always plumb up the IPv6 loopback interface. */
2474         (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
2475             sizeof (loopback_iftab.zone_nwif_address));
2476         if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK)
2477                 return (-1);


2884         }
2885         /* insert new after ptr */
2886         new->za_next = next;
2887         ptr->za_next = new;
2888         return (old);
2889 }
2890 
2891 void
2892 free_ip_interface(zone_addr_list_t *zalist)
2893 {
2894         zone_addr_list_t *ptr, *new;
2895 
2896         for (ptr = zalist; ptr != NULL; ) {
2897                 new = ptr;
2898                 ptr = ptr->za_next;
2899                 free(new);
2900         }
2901 }
2902 
2903 /*



















2904  * Add the kernel access control information for the interface names.
2905  * If anything goes wrong, we log a general error message, attempt to tear down
2906  * whatever we set up, and return an error.
2907  */
2908 static int
2909 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
2910 {

2911         struct zone_nwiftab nwiftab;
2912         char rootpath[MAXPATHLEN];
2913         char path[MAXPATHLEN];
2914         datalink_id_t linkid;
2915         di_prof_t prof = NULL;
2916         boolean_t added = B_FALSE;
2917         zone_addr_list_t *zalist = NULL, *new;
2918 
2919         if (zonecfg_setnwifent(snap_hndl) != Z_OK)











2920                 return (0);

2921 
2922         for (;;) {
2923                 if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
2924                         break;
2925 
2926                 nwifent_free_attrs(&nwiftab);
2927                 if (prof == NULL) {
2928                         if (zone_get_devroot(zone_name, rootpath,
2929                             sizeof (rootpath)) != Z_OK) {
2930                                 (void) zonecfg_endnwifent(snap_hndl);

2931                                 zerror(zlogp, B_TRUE,
2932                                     "unable to determine dev root");
2933                                 return (-1);
2934                         }
2935                         (void) snprintf(path, sizeof (path), "%s%s", rootpath,
2936                             "/dev");
2937                         if (di_prof_init(path, &prof) != 0) {
2938                                 (void) zonecfg_endnwifent(snap_hndl);

2939                                 zerror(zlogp, B_TRUE,
2940                                     "failed to initialize profile");
2941                                 return (-1);
2942                         }
2943                 }
2944 
2945                 /*
2946                  * Create the /dev entry for backward compatibility.
2947                  * Only create the /dev entry if it's not in use.
2948                  * Note that the zone still boots when the assigned
2949                  * interface is inaccessible, used by others, etc.
2950                  * Also, when vanity naming is used, some interface do
2951                  * do not have corresponding /dev node names (for example,
2952                  * vanity named aggregations).  The /dev entry is not
2953                  * created in that case.  The /dev/net entry is always
2954                  * accessible.
2955                  */
2956                 if (dladm_name2info(dld_handle, nwiftab.zone_nwif_physical,
2957                     &linkid, NULL, NULL, NULL) == DLADM_STATUS_OK &&
2958                     add_datalink(zlogp, zone_name, linkid,
2959                     nwiftab.zone_nwif_physical) == 0) {
2960                         added = B_TRUE;
2961                 } else {
2962                         /*
2963                          * Failed to add network device, but the brand hook
2964                          * might be doing this for us, so keep silent.
2965                          */
2966                         continue;
2967                 }
2968                 /* set up the new IP interface, and add them all later */
2969                 new = malloc(sizeof (*new));
2970                 if (new == NULL) {
2971                         zerror(zlogp, B_TRUE, "no memory for %s",
2972                             nwiftab.zone_nwif_physical);

2973                         free_ip_interface(zalist);
2974                 }
2975                 bzero(new, sizeof (*new));
2976                 new->za_nwiftab = nwiftab;
2977                 new->za_linkid = linkid;
2978                 zalist = add_ip_interface(zalist, new);
2979         }
2980         if (zalist != NULL) {
2981                 if ((errno = add_net(zlogp, zoneid, zalist)) != 0) {
2982                         (void) zonecfg_endnwifent(snap_hndl);

2983                         zerror(zlogp, B_TRUE, "failed to add address");
2984                         free_ip_interface(zalist);
2985                         return (-1);
2986                 }
2987                 free_ip_interface(zalist);
2988         }
2989         (void) zonecfg_endnwifent(snap_hndl);

2990 
2991         if (prof != NULL && added) {
2992                 if (di_prof_commit(prof) != 0) {
2993                         zerror(zlogp, B_TRUE, "failed to commit profile");
2994                         return (-1);
2995                 }
2996         }
2997         if (prof != NULL)
2998                 di_prof_fini(prof);
2999 
3000         return (0);
3001 }
3002 
3003 static int
3004 remove_datalink_pool(zlog_t *zlogp, zoneid_t zoneid)
3005 {
3006         ushort_t flags;
3007         zone_iptype_t iptype;
3008         int i, dlnum = 0;
3009         datalink_id_t *dllink, *dllinks = NULL;


3105 
3106         if ((dllinks = malloc(dlnum * sizeof (datalink_id_t))) == NULL) {
3107                 zerror(zlogp, B_TRUE, "memory allocation failed");
3108                 return (-1);
3109         }
3110         if (zone_list_datalink(zoneid, &dlnum, dllinks) != 0) {
3111                 zerror(zlogp, B_TRUE, "unable to list network interfaces");
3112                 free(dllinks);
3113                 return (-1);
3114         }
3115 
3116         for (i = 0, dllink = dllinks; i < dlnum; i++, dllink++) {
3117                 char dlerr[DLADM_STRSIZE];
3118 
3119                 dlstatus = dladm_set_linkprop(dld_handle, *dllink,
3120                     "protection", NULL, 0, DLADM_OPT_ACTIVE);
3121                 if (dlstatus == DLADM_STATUS_NOTFOUND) {
3122                         /* datalink does not belong to the GZ */
3123                         continue;
3124                 }
3125                 if (dlstatus != DLADM_STATUS_OK)
3126                         zerror(zlogp, B_FALSE,
3127                             "clear 'protection' link property: %s",
3128                             dladm_status2str(dlstatus, dlerr));
3129 


3130                 dlstatus = dladm_set_linkprop(dld_handle, *dllink,
3131                     "allowed-ips", NULL, 0, DLADM_OPT_ACTIVE);
3132                 if (dlstatus != DLADM_STATUS_OK)
3133                         zerror(zlogp, B_FALSE,
3134                             "clear 'allowed-ips' link property: %s",
3135                             dladm_status2str(dlstatus, dlerr));


3136         }

3137         free(dllinks);
3138         return (0);
3139 }
3140 
3141 static int






















3142 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
3143     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
3144 {
3145         int fd;
3146         struct strioctl ioc;
3147         tcp_ioc_abort_conn_t conn;
3148         int error;
3149 
3150         conn.ac_local = *local;
3151         conn.ac_remote = *remote;
3152         conn.ac_start = TCPS_SYN_SENT;
3153         conn.ac_end = TCPS_TIME_WAIT;
3154         conn.ac_zoneid = zoneid;
3155 
3156         ioc.ic_cmd = TCP_IOC_ABORT_CONN;
3157         ioc.ic_timout = -1; /* infinite timeout */
3158         ioc.ic_len = sizeof (conn);
3159         ioc.ic_dp = (char *)&conn;
3160 
3161         if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {


3203         local6 = (struct sockaddr_in6 *)&l;
3204         local6->sin6_family = AF_INET6;
3205         local6->sin6_port = 0;
3206         local6->sin6_addr = in6addr_any;
3207 
3208         bzero(&r, sizeof (*remote6));
3209         remote6 = (struct sockaddr_in6 *)&r;
3210         remote6->sin6_family = AF_INET6;
3211         remote6->sin6_port = 0;
3212         remote6->sin6_addr = in6addr_any;
3213 
3214         if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
3215                 return (error);
3216         return (0);
3217 }
3218 
3219 static int
3220 get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
3221 {
3222         int error = -1;

3223         char *privname = NULL;
3224 










3225         if (ALT_MOUNT(mount_cmd)) {
3226                 zone_iptype_t   iptype;
3227                 const char      *curr_iptype = NULL;
3228 
3229                 if (zonecfg_get_iptype(snap_hndl, &iptype) != Z_OK) {
3230                         zerror(zlogp, B_TRUE, "unable to determine ip-type");

3231                         return (-1);
3232                 }
3233 
3234                 switch (iptype) {
3235                 case ZS_SHARED:
3236                         curr_iptype = "shared";
3237                         break;
3238                 case ZS_EXCLUSIVE:
3239                         curr_iptype = "exclusive";
3240                         break;
3241                 }
3242 
3243                 if (zonecfg_default_privset(privs, curr_iptype) == Z_OK)

3244                         return (0);
3245 
3246                 zerror(zlogp, B_FALSE,
3247                     "failed to determine the zone's default privilege set");

3248                 return (-1);
3249         }
3250 
3251         switch (zonecfg_get_privset(snap_hndl, privs, &privname)) {
3252         case Z_OK:
3253                 error = 0;
3254                 break;
3255         case Z_PRIV_PROHIBITED:
3256                 zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
3257                     "within the zone's privilege set", privname);
3258                 break;
3259         case Z_PRIV_REQUIRED:
3260                 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
3261                     "from the zone's privilege set", privname);
3262                 break;
3263         case Z_PRIV_UNKNOWN:
3264                 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
3265                     "in the zone's privilege set", privname);
3266                 break;
3267         default:
3268                 zerror(zlogp, B_FALSE, "failed to determine the zone's "
3269                     "privilege set");
3270                 break;
3271         }
3272 
3273         free(privname);

3274         return (error);
3275 }
3276 
3277 static char *
3278 zone_proj_rctl(const char *name)
3279 {
3280         int i;
3281 
3282         for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
3283                 if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
3284                         return (zone_proj_rctl_map[i].zpr_project_rctl);
3285                 }
3286         }
3287         return (NULL);
3288 }
3289 
3290 static int
3291 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3292 {
3293         nvlist_t *nvl = NULL;
3294         char *nvl_packed = NULL;
3295         size_t nvl_size = 0;
3296         nvlist_t **nvlv = NULL;
3297         int rctlcount = 0;
3298         int error = -1;

3299         struct zone_rctltab rctltab;
3300         rctlblk_t *rctlblk = NULL;
3301         uint64_t maxlwps;
3302         uint64_t maxprocs;
3303         int rproc, rlwp;
3304 
3305         *bufp = NULL;
3306         *bufsizep = 0;
3307 










3308         rctltab.zone_rctl_valptr = NULL;
3309         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
3310                 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
3311                 goto out;
3312         }
3313 
3314         /*
3315          * Allow the administrator to control both the maximum number of
3316          * process table slots, and the maximum number of lwps, with a single
3317          * max-processes or max-lwps property. If only the max-processes
3318          * property is set, we add a max-lwps property with a limit derived
3319          * from max-processes. If only the max-lwps property is set, we add a
3320          * max-processes property with the same limit as max-lwps.
3321          */
3322         rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
3323         rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
3324         if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
3325                 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
3326                     maxprocs * LWPS_PER_PROCESS) != Z_OK) {
3327                         zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
3328                         goto out;
3329                 }
3330         } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
3331                 /* no scaling for max-proc value */
3332                 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
3333                     maxlwps) != Z_OK) {
3334                         zerror(zlogp, B_FALSE,
3335                             "unable to set max-processes alias");
3336                         goto out;
3337                 }
3338         }
3339 
3340         if (zonecfg_setrctlent(snap_hndl) != Z_OK) {
3341                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
3342                 goto out;
3343         }
3344 
3345         if ((rctlblk = malloc(rctlblk_size())) == NULL) {
3346                 zerror(zlogp, B_TRUE, "memory allocation failed");
3347                 goto out;
3348         }
3349         while (zonecfg_getrctlent(snap_hndl, &rctltab) == Z_OK) {
3350                 struct zone_rctlvaltab *rctlval;
3351                 uint_t i, count;
3352                 const char *name = rctltab.zone_rctl_name;
3353                 char *proj_nm;
3354 
3355                 /* zoneadm should have already warned about unknown rctls. */
3356                 if (!zonecfg_is_rctl(name)) {
3357                         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3358                         rctltab.zone_rctl_valptr = NULL;
3359                         continue;
3360                 }
3361                 count = 0;
3362                 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3363                     rctlval = rctlval->zone_rctlval_next) {
3364                         count++;
3365                 }
3366                 if (count == 0) {       /* ignore */
3367                         continue;       /* Nothing to free */
3368                 }
3369                 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)


3432                         if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
3433                             != 0) {
3434                                 zerror(zlogp, B_FALSE,
3435                                     "nvlist_add_nvlist_arrays failed");
3436                                 goto out;
3437                         }
3438                 }
3439 
3440                 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
3441                     != 0) {
3442                         zerror(zlogp, B_FALSE, "%s failed",
3443                             "nvlist_add_nvlist_array");
3444                         goto out;
3445                 }
3446                 for (i = 0; i < count; i++)
3447                         nvlist_free(nvlv[i]);
3448                 free(nvlv);
3449                 nvlv = NULL;
3450                 rctlcount++;
3451         }
3452         (void) zonecfg_endrctlent(snap_hndl);
3453 
3454         if (rctlcount == 0) {
3455                 error = 0;
3456                 goto out;
3457         }
3458         if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
3459             != 0) {
3460                 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
3461                 goto out;
3462         }
3463 
3464         error = 0;
3465         *bufp = nvl_packed;
3466         *bufsizep = nvl_size;
3467 
3468 out:
3469         free(rctlblk);
3470         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3471         if (error && nvl_packed != NULL)
3472                 free(nvl_packed);
3473         nvlist_free(nvl);
3474         if (nvlv != NULL)
3475                 free(nvlv);


3476         return (error);
3477 }
3478 
3479 static int
3480 get_implicit_datasets(zlog_t *zlogp, char **retstr)
3481 {
3482         char cmdbuf[2 * MAXPATHLEN];
3483 
3484         if (query_hook[0] == '\0')
3485                 return (0);
3486 
3487         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s datasets", query_hook)
3488             > sizeof (cmdbuf))
3489                 return (-1);
3490 
3491         if (do_subproc(zlogp, cmdbuf, retstr, B_FALSE) != 0)
3492                 return (-1);
3493 
3494         return (0);
3495 }
3496 
3497 static int
3498 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3499 {

3500         struct zone_dstab dstab;
3501         size_t total, offset, len;
3502         int error = -1;
3503         char *str = NULL;
3504         char *implicit_datasets = NULL;
3505         int implicit_len = 0;
3506 
3507         *bufp = NULL;
3508         *bufsizep = 0;
3509 










3510         if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) {
3511                 zerror(zlogp, B_FALSE, "getting implicit datasets failed");
3512                 goto out;
3513         }
3514 
3515         if (zonecfg_setdsent(snap_hndl) != Z_OK) {
3516                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
3517                 goto out;
3518         }
3519 
3520         total = 0;
3521         while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK)
3522                 total += strlen(dstab.zone_dataset_name) + 1;
3523         (void) zonecfg_enddsent(snap_hndl);
3524 
3525         if (implicit_datasets != NULL)
3526                 implicit_len = strlen(implicit_datasets);
3527         if (implicit_len > 0)
3528                 total += implicit_len + 1;
3529 
3530         if (total == 0) {
3531                 error = 0;
3532                 goto out;
3533         }
3534 
3535         if ((str = malloc(total)) == NULL) {
3536                 zerror(zlogp, B_TRUE, "memory allocation failed");
3537                 goto out;
3538         }
3539 
3540         if (zonecfg_setdsent(snap_hndl) != Z_OK) {
3541                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
3542                 goto out;
3543         }
3544         offset = 0;
3545         while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
3546                 len = strlen(dstab.zone_dataset_name);
3547                 (void) strlcpy(str + offset, dstab.zone_dataset_name,
3548                     total - offset);
3549                 offset += len;
3550                 if (offset < total - 1)
3551                         str[offset++] = ',';
3552         }
3553         (void) zonecfg_enddsent(snap_hndl);
3554 
3555         if (implicit_len > 0)
3556                 (void) strlcpy(str + offset, implicit_datasets, total - offset);
3557 
3558         error = 0;
3559         *bufp = str;
3560         *bufsizep = total;
3561 
3562 out:
3563         if (error != 0 && str != NULL)
3564                 free(str);


3565         if (implicit_datasets != NULL)
3566                 free(implicit_datasets);
3567 
3568         return (error);
3569 }
3570 
3571 static int
3572 validate_datasets(zlog_t *zlogp)
3573 {

3574         struct zone_dstab dstab;
3575         zfs_handle_t *zhp;
3576         libzfs_handle_t *hdl;
3577 
3578         if (zonecfg_setdsent(snap_hndl) != Z_OK) {




3579                 zerror(zlogp, B_FALSE, "invalid configuration");

3580                 return (-1);
3581         }
3582 






3583         if ((hdl = libzfs_init()) == NULL) {
3584                 zerror(zlogp, B_FALSE, "opening ZFS library");

3585                 return (-1);
3586         }
3587 
3588         while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
3589 
3590                 if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
3591                     ZFS_TYPE_FILESYSTEM)) == NULL) {
3592                         zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
3593                             dstab.zone_dataset_name);

3594                         libzfs_fini(hdl);
3595                         return (-1);
3596                 }
3597 
3598                 /*
3599                  * Automatically set the 'zoned' property.  We check the value
3600                  * first because we'll get EPERM if it is already set.
3601                  */
3602                 if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
3603                     zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_ZONED),
3604                     "on") != 0) {
3605                         zerror(zlogp, B_FALSE, "cannot set 'zoned' "
3606                             "property for ZFS dataset '%s'\n",
3607                             dstab.zone_dataset_name);

3608                         zfs_close(zhp);
3609                         libzfs_fini(hdl);
3610                         return (-1);
3611                 }
3612 
3613                 zfs_close(zhp);
3614         }
3615         (void) zonecfg_enddsent(snap_hndl);
3616 

3617         libzfs_fini(hdl);
3618 
3619         return (0);
3620 }
3621 
3622 /*
3623  * Return true if the path is its own zfs file system.  We determine this
3624  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
3625  * if it is a different fs.
3626  */
3627 boolean_t
3628 is_zonepath_zfs(char *zonepath)
3629 {
3630         int res;
3631         char *path;
3632         char *parent;
3633         struct statvfs64 buf1, buf2;
3634 
3635         if (statvfs64(zonepath, &buf1) != 0)
3636                 return (B_FALSE);


4333                     stat64(mnp->mnt_special, &zst) != -1 &&
4334                     rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
4335                         zerror(zlogp, B_FALSE,
4336                             "zone root %s is reachable through %s",
4337                             rootpath, mnp->mnt_mountp);
4338                         return (B_TRUE);
4339                 }
4340         }
4341         return (B_FALSE);
4342 }
4343 
4344 /*
4345  * Set pool info for the zone's resource management configuration.
4346  */
4347 static int
4348 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
4349 {
4350         int res;
4351         uint64_t tmp;
4352         char sched[MAXNAMELEN];

4353         char pool_err[128];
4354 











4355         /* Get the scheduling class set in the zone configuration. */
4356         if (zonecfg_get_sched_class(snap_hndl, sched, sizeof (sched)) == Z_OK &&
4357             strlen(sched) > 0) {
4358                 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
4359                     strlen(sched)) == -1)
4360                         zerror(zlogp, B_TRUE, "WARNING: unable to set the "
4361                             "default scheduling class");
4362 
4363                 if (strcmp(sched, "FX") == 0) {
4364                         /*
4365                          * When FX is specified then by default all processes
4366                          * will start at the lowest priority level (0) and
4367                          * stay there. We support an optional attr which
4368                          * indicates that all the processes should be "high
4369                          * priority". We set this on the zone so that starting
4370                          * init will set the priority high.
4371                          */
4372                         struct zone_attrtab a;
4373 
4374                         bzero(&a, sizeof (a));
4375                         (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
4376                             sizeof (a.zone_attr_name));


4381 
4382                                 if (zone_setattr(zoneid,
4383                                     ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
4384                                     sizeof (hi)) == -1)
4385                                         zerror(zlogp, B_TRUE, "WARNING: unable "
4386                                             "to set high priority");
4387                         }
4388                 }
4389 
4390         } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
4391             == Z_OK) {
4392                 /*
4393                  * If the zone has the zone.cpu-shares rctl set then we want to
4394                  * use the Fair Share Scheduler (FSS) for processes in the
4395                  * zone.  Check what scheduling class the zone would be running
4396                  * in by default so we can print a warning and modify the class
4397                  * if we wouldn't be using FSS.
4398                  */
4399                 char class_name[PC_CLNMSZ];
4400 
4401                 if (zonecfg_get_dflt_sched_class(snap_hndl, class_name,
4402                     sizeof (class_name)) != Z_OK) {
4403                         zerror(zlogp, B_FALSE, "WARNING: unable to determine "
4404                             "the zone's scheduling class");
4405 
4406                 } else if (strcmp("FSS", class_name) != 0) {
4407                         zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
4408                             "rctl is set but\nFSS is not the default "
4409                             "scheduling class for\nthis zone.  FSS will be "
4410                             "used for processes\nin the zone but to get the "
4411                             "full benefit of FSS,\nit should be the default "
4412                             "scheduling class.\nSee dispadmin(1M) for more "
4413                             "details.");
4414 
4415                         if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, "FSS",
4416                             strlen("FSS")) == -1)
4417                                 zerror(zlogp, B_TRUE, "WARNING: unable to set "
4418                                     "zone scheduling class to FSS");
4419                 }
4420         }
4421 
4422         /*
4423          * The next few blocks of code attempt to set up temporary pools as
4424          * well as persistent pools.  In all cases we call the functions
4425          * unconditionally.  Within each funtion the code will check if the
4426          * zone is actually configured for a temporary pool or persistent pool
4427          * and just return if there is nothing to do.
4428          *
4429          * If we are rebooting we want to attempt to reuse any temporary pool
4430          * that was previously set up.  zonecfg_bind_tmp_pool() will do the
4431          * right thing in all cases (reuse or create) based on the current
4432          * zonecfg.
4433          */
4434         if ((res = zonecfg_bind_tmp_pool(snap_hndl, zoneid, pool_err,
4435             sizeof (pool_err))) != Z_OK) {
4436                 if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND)
4437                         zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting "
4438                             "cannot be instantiated", zonecfg_strerror(res),
4439                             pool_err);
4440                 else
4441                         zerror(zlogp, B_FALSE, "could not bind zone to "
4442                             "temporary pool: %s", zonecfg_strerror(res));

4443                 return (Z_POOL_BIND);
4444         }
4445 
4446         /*
4447          * Check if we need to warn about poold not being enabled.
4448          */
4449         if (zonecfg_warn_poold(snap_hndl)) {
4450                 zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has "
4451                     "been specified\nbut the dynamic pool service is not "
4452                     "enabled.\nThe system will not dynamically adjust the\n"
4453                     "processor allocation within the specified range\n"
4454                     "until svc:/system/pools/dynamic is enabled.\n"
4455                     "See poold(1M).");
4456         }
4457 
4458         /* The following is a warning, not an error. */
4459         if ((res = zonecfg_bind_pool(snap_hndl, zoneid, pool_err,
4460             sizeof (pool_err))) != Z_OK) {
4461                 if (res == Z_POOL_BIND)
4462                         zerror(zlogp, B_FALSE, "WARNING: unable to bind to "
4463                             "pool '%s'; using default pool.", pool_err);
4464                 else if (res == Z_POOL)
4465                         zerror(zlogp, B_FALSE, "WARNING: %s: %s",
4466                             zonecfg_strerror(res), pool_err);
4467                 else
4468                         zerror(zlogp, B_FALSE, "WARNING: %s",
4469                             zonecfg_strerror(res));
4470         }
4471 
4472         /* Update saved pool name in case it has changed */
4473         (void) zonecfg_get_poolname(snap_hndl, zone_name, pool_name,
4474             sizeof (pool_name));
4475 

4476         return (Z_OK);
4477 }
4478 
4479 static void
4480 report_prop_err(zlog_t *zlogp, const char *name, const char *value, int res)
4481 {
4482         switch (res) {
4483         case Z_TOO_BIG:
4484                 zerror(zlogp, B_FALSE, "%s property value is too large.", name);
4485                 break;
4486 
4487         case Z_INVALID_PROPERTY:
4488                 zerror(zlogp, B_FALSE, "%s property value \"%s\" is not valid",
4489                     name, value);
4490                 break;
4491 
4492         default:
4493                 zerror(zlogp, B_TRUE, "fetching property %s: %d", name, res);
4494                 break;
4495         }


4556         } else {
4557                 /* Has a value, append the defaults */
4558                 if (strlcat(fsallowed, ",", len) >= len ||
4559                     strlcat(fsallowed, DFLT_FS_ALLOWED, len) >= len) {
4560                         report_prop_err(zlogp, "fs-allowed", fsallowed,
4561                             Z_TOO_BIG);
4562                         return (Z_TOO_BIG);
4563                 }
4564         }
4565 
4566         if (zone_setattr(zoneid, ZONE_ATTR_FS_ALLOWED, fsallowedp, len) != 0) {
4567                 zerror(zlogp, B_TRUE,
4568                     "fs-allowed couldn't be set: %s: %d", fsallowedp, res);
4569                 return (Z_SYSTEM);
4570         }
4571 
4572         return (Z_OK);
4573 }
4574 
4575 static int
4576 setup_zone_attrs(zlog_t *zlogp, zoneid_t zoneid)
4577 {

4578         int res = Z_OK;
4579 
4580         if ((res = setup_zone_hostid(snap_hndl, zlogp, zoneid)) != Z_OK)





4581                 goto out;

4582 
4583         if ((res = setup_zone_fs_allowed(snap_hndl, zlogp, zoneid)) != Z_OK)
4584                 goto out;
4585 



4586 out:

4587         return (res);
4588 }
4589 
4590 /*
4591  * The zone_did is a persistent debug ID.  Each zone should have a unique ID
4592  * in the kernel.  This is used for things like DTrace which want to monitor
4593  * zones across reboots.  They can't use the zoneid since that changes on
4594  * each boot.
4595  */
4596 zoneid_t
4597 vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zone_did)
4598 {
4599         zoneid_t rval = -1;
4600         priv_set_t *privs;
4601         char rootpath[MAXPATHLEN];
4602         char *rctlbuf = NULL;
4603         size_t rctlbufsz = 0;
4604         char *zfsbuf = NULL;
4605         size_t zfsbufsz = 0;
4606         zoneid_t zoneid = -1;
4607         int xerr;
4608         char *kzone;
4609         FILE *fp = NULL;
4610         tsol_zcent_t *zcent = NULL;
4611         int match = 0;
4612         int doi = 0;
4613         int flags = -1;
4614         zone_iptype_t iptype;
4615 
4616         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
4617                 zerror(zlogp, B_TRUE, "unable to determine zone root");
4618                 return (-1);
4619         }
4620         if (zonecfg_in_alt_root())
4621                 resolve_lofs(zlogp, rootpath, sizeof (rootpath));
4622 
4623         if (vplat_get_iptype(zlogp, &iptype) < 0) {
4624                 zerror(zlogp, B_TRUE, "unable to determine ip-type");
4625                 return (-1);
4626         }
4627         switch (iptype) {
4628         case ZS_SHARED:
4629                 flags = 0;
4630                 break;
4631         case ZS_EXCLUSIVE:
4632                 flags = ZCF_NET_EXCL;
4633                 break;
4634         }
4635         if (flags == -1)
4636                 abort();
4637 
4638         if ((privs = priv_allocset()) == NULL) {
4639                 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
4640                 return (-1);
4641         }
4642         priv_emptyset(privs);
4643         if (get_privset(zlogp, privs, mount_cmd) != 0)
4644                 goto error;
4645 
4646         if (mount_cmd == Z_MNT_BOOT &&
4647             get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
4648                 zerror(zlogp, B_FALSE, "Unable to get list of rctls");
4649                 goto error;
4650         }
4651 
4652         if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) {
4653                 zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets");
4654                 goto error;
4655         }
4656 


4720                     NULL, 0) == 0) {
4721                         zerror(zlogp, B_FALSE, "scratch zone already running");
4722                         goto error;
4723                 }
4724                 /* This is the preferred name */
4725                 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
4726                     zone_name);
4727                 srandom(getpid());
4728                 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
4729                     0) == 0) {
4730                         /* This is just an arbitrary name; note "." usage */
4731                         (void) snprintf(kernzone, sizeof (kernzone),
4732                             "SUNWlu.%08lX%08lX", random(), random());
4733                 }
4734                 kzone = kernzone;
4735         }
4736 
4737         xerr = 0;
4738         if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
4739             rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel,
4740             flags, zone_did)) == -1) {
4741                 if (xerr == ZE_AREMOUNTS) {
4742                         if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
4743                                 zerror(zlogp, B_FALSE,
4744                                     "An unknown file-system is mounted on "
4745                                     "a subdirectory of %s", rootpath);
4746                         } else {
4747 
4748                                 zerror(zlogp, B_FALSE,
4749                                     "These file-systems are mounted on "
4750                                     "subdirectories of %s:", rootpath);
4751                                 (void) zonecfg_find_mounts(rootpath,
4752                                     prtmount, zlogp);
4753                         }
4754                 } else if (xerr == ZE_CHROOTED) {
4755                         zerror(zlogp, B_FALSE, "%s: "
4756                             "cannot create a zone from a chrooted "
4757                             "environment", "zone_create");
4758                 } else if (xerr == ZE_LABELINUSE) {
4759                         char zonename[ZONENAME_MAX];
4760                         (void) getzonenamebyid(getzoneidbylabel(zlabel),


4766                 }
4767                 goto error;
4768         }
4769 
4770         if (zonecfg_in_alt_root() &&
4771             zonecfg_add_scratch(fp, zone_name, kernzone,
4772             zonecfg_get_root()) == -1) {
4773                 zerror(zlogp, B_TRUE, "cannot add mapfile entry");
4774                 goto error;
4775         }
4776 
4777         /*
4778          * The following actions are not performed when merely mounting a zone
4779          * for administrative use.
4780          */
4781         if (mount_cmd == Z_MNT_BOOT) {
4782                 brand_handle_t bh;
4783                 struct brand_attr attr;
4784                 char modname[MAXPATHLEN];
4785 
4786                 if (setup_zone_attrs(zlogp, zoneid) != Z_OK)
4787                         goto error;
4788 
4789                 if ((bh = brand_open(brand_name)) == NULL) {
4790                         zerror(zlogp, B_FALSE,
4791                             "unable to determine brand name");
4792                         goto error;
4793                 }
4794 
4795                 if (!is_system_labeled() &&
4796                     (strcmp(brand_name, LABELED_BRAND_NAME) == 0)) {
4797                         brand_close(bh);
4798                         zerror(zlogp, B_FALSE,
4799                             "cannot boot labeled zone on unlabeled system");
4800                         goto error;
4801                 }
4802 
4803                 /*
4804                  * If this brand requires any kernel support, now is the time to
4805                  * get it loaded and initialized.
4806                  */


4824                                 goto error;
4825                         }
4826                 }
4827 
4828                 if (setup_zone_rm(zlogp, zone_name, zoneid) != Z_OK)
4829                         goto error;
4830 
4831                 set_mlps(zlogp, zoneid, zcent);
4832         }
4833 
4834         rval = zoneid;
4835         zoneid = -1;
4836 
4837 error:
4838         if (zoneid != -1) {
4839                 (void) zone_shutdown(zoneid);
4840                 (void) zone_destroy(zoneid);
4841         }
4842         if (rctlbuf != NULL)
4843                 free(rctlbuf);
4844         if (zfsbuf != NULL)
4845                 free(zfsbuf);
4846         priv_freeset(privs);
4847         if (fp != NULL)
4848                 zonecfg_close_scratch(fp);
4849         lofs_discard_mnttab();
4850         if (zcent != NULL)
4851                 tsol_freezcent(zcent);
4852         return (rval);
4853 }
4854 
4855 /*
4856  * Enter the zone and write a /etc/zones/index file there.  This allows
4857  * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
4858  * details from inside the zone.
4859  */
4860 static void
4861 write_index_file(zoneid_t zoneid)
4862 {
4863         FILE *zef;
4864         FILE *zet;
4865         struct zoneent *zep;


4964                         lofs_discard_mnttab();
4965                         return (-1);
4966                 }
4967 
4968                 switch (iptype) {
4969                 case ZS_SHARED:
4970                         /* Always do this to make lo0 get configured */
4971                         if (configure_shared_network_interfaces(zlogp) != 0) {
4972                                 lofs_discard_mnttab();
4973                                 return (-1);
4974                         }
4975                         break;
4976                 case ZS_EXCLUSIVE:
4977                         if (configure_exclusive_network_interfaces(zlogp,
4978                             zoneid) !=
4979                             0) {
4980                                 lofs_discard_mnttab();
4981                                 return (-1);
4982                         }
4983                         break;
4984                 default:
4985                         abort();
4986                 }
4987         }
4988 
4989         write_index_file(zoneid);
4990 
4991         lofs_discard_mnttab();
4992         return (0);
4993 }
4994 
4995 static int
4996 lu_root_teardown(zlog_t *zlogp)
4997 {
4998         char zroot[MAXPATHLEN];
4999 
5000         if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
5001                 zerror(zlogp, B_FALSE, "unable to determine zone root");
5002                 return (-1);
5003         }
5004         root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
5005 


5041 
5042                 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5043                         zerror(zlogp, B_TRUE, "cannot open mapfile");
5044                         return (-1);
5045                 }
5046                 retv = -1;
5047                 if (zonecfg_lock_scratch(fp) != 0)
5048                         zerror(zlogp, B_TRUE, "cannot lock mapfile");
5049                 else if (zonecfg_delete_scratch(fp, kernzone) != 0)
5050                         zerror(zlogp, B_TRUE, "cannot delete map entry");
5051                 else
5052                         retv = 0;
5053                 zonecfg_close_scratch(fp);
5054                 return (retv);
5055         } else {
5056                 return (0);
5057         }
5058 }
5059 
5060 int
5061 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting,
5062     boolean_t debug)
5063 {
5064         char *kzone;
5065         zoneid_t zoneid;
5066         int res;
5067         char pool_err[128];
5068         char cmdbuf[MAXPATHLEN];
5069         brand_handle_t bh = NULL;
5070         dladm_status_t status;
5071         char errmsg[DLADM_STRSIZE];
5072         ushort_t flags;
5073 
5074         kzone = zone_name;
5075         if (zonecfg_in_alt_root()) {
5076                 FILE *fp;
5077 
5078                 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5079                         zerror(zlogp, B_TRUE, "unable to open map file");
5080                         goto error;
5081                 }
5082                 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
5083                     kernzone, sizeof (kernzone)) != 0) {
5084                         zerror(zlogp, B_FALSE, "unable to find scratch zone");
5085                         zonecfg_close_scratch(fp);
5086                         goto error;
5087                 }
5088                 zonecfg_close_scratch(fp);
5089                 kzone = kernzone;
5090         }
5091 
5092         if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
5093                 if (!bringup_failure_recovery)
5094                         zerror(zlogp, B_TRUE, "unable to get zoneid");
5095                 if (unmount_cmd)
5096                         (void) lu_root_teardown(zlogp);
5097                 goto error;
5098         }
5099 
5100         if (remove_datalink_pool(zlogp, zoneid) != 0)
5101                 zerror(zlogp, B_FALSE, "unable clear datalink pool property");


5102 
5103         if (remove_datalink_protect(zlogp, zoneid) != 0)
5104                 zerror(zlogp, B_FALSE,
5105                     "unable clear datalink protect property");


5106 
5107         /*
5108          * The datalinks assigned to the zone will be removed from the NGZ as
5109          * part of zone_shutdown() so that we need to remove protect/pool etc.
5110          * before zone_shutdown(). Even if the shutdown itself fails, the zone
5111          * will not be able to violate any constraints applied because the
5112          * datalinks are no longer available to the zone.
5113          */
5114         if (zone_shutdown(zoneid) != 0) {
5115                 zerror(zlogp, B_TRUE, "unable to shutdown zone");
5116                 goto error;
5117         }
5118 
5119         /* Get a handle to the brand info for this zone */
5120         if ((bh = brand_open(brand_name)) == NULL) {
5121                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
5122                 return (-1);
5123         }
5124         /*
5125          * If there is a brand 'halt' callback, execute it now to give the
5126          * brand a chance to cleanup any custom configuration.
5127          */
5128         (void) strcpy(cmdbuf, EXEC_PREFIX);
5129         if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
5130             sizeof (cmdbuf) - EXEC_LEN) < 0) {
5131                 brand_close(bh);
5132                 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
5133                     "halt callback.");
5134                 goto error;
5135         }
5136         brand_close(bh);
5137 
5138         if ((strlen(cmdbuf) > EXEC_LEN) &&
5139             (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
5140                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
5141                 goto error;
5142         }
5143 
5144         if (!unmount_cmd) {
5145                 zone_iptype_t iptype;
5146 
5147                 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags,
5148                     sizeof (flags)) < 0) {
5149                         if (vplat_get_iptype(zlogp, &iptype) < 0) {
5150                                 zerror(zlogp, B_TRUE, "unable to determine "
5151                                     "ip-type");
5152                                 goto error;
5153                         }
5154                 } else {
5155                         if (flags & ZF_NET_EXCL)
5156                                 iptype = ZS_EXCLUSIVE;
5157                         else
5158                                 iptype = ZS_SHARED;
5159                 }
5160 
5161                 switch (iptype) {
5162                 case ZS_SHARED:
5163                         if (unconfigure_shared_network_interfaces(zlogp,
5164                             zoneid) != 0) {
5165                                 zerror(zlogp, B_FALSE, "unable to unconfigure "
5166                                     "network interfaces in zone");
5167                                 goto error;
5168                         }
5169                         break;
5170                 case ZS_EXCLUSIVE:






5171                         status = dladm_zone_halt(dld_handle, zoneid);
5172                         if (status != DLADM_STATUS_OK) {
5173                                 zerror(zlogp, B_FALSE, "unable to notify "
5174                                     "dlmgmtd of zone halt: %s",
5175                                     dladm_status2str(status, errmsg));
5176                         }
5177                         break;
5178                 }
5179         }
5180 
5181         if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
5182                 zerror(zlogp, B_TRUE, "unable to abort TCP connections");
5183                 goto error;
5184         }
5185 
5186         if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
5187                 zerror(zlogp, B_FALSE,
5188                     "unable to unmount file systems in zone");
5189                 goto error;
5190         }
5191 
5192         /*
5193          * If we are rebooting then we normally don't want to destroy an
5194          * existing temporary pool at this point so that we can just reuse it
5195          * when the zone boots back up.  However, it is also possible we were
5196          * running with a temporary pool and the zone configuration has been
5197          * modified to no longer use a temporary pool.  In that case we need
5198          * to destroy the temporary pool now.  This case looks like the case
5199          * where we never had a temporary pool configured but
5200          * zonecfg_destroy_tmp_pool will do the right thing either way.
5201          */
5202         if (!unmount_cmd) {
5203                 boolean_t destroy_tmp_pool = B_TRUE;
5204 
5205                 if (rebooting) {
5206                         struct zone_psettab pset_tab;

5207 
5208                         if (zonecfg_lookup_pset(snap_hndl, &pset_tab) == Z_OK)


5209                                 destroy_tmp_pool = B_FALSE;


5210                 }
5211 
5212                 if (destroy_tmp_pool) {
5213                         if ((res = zonecfg_destroy_tmp_pool(zone_name, pool_err,
5214                             sizeof (pool_err))) != Z_OK) {
5215                                 if (res == Z_POOL)
5216                                         zerror(zlogp, B_FALSE, pool_err);
5217                         }
5218                 }
5219         }
5220 
5221         remove_mlps(zlogp, zoneid);
5222 
5223         if (zone_destroy(zoneid) != 0) {
5224                 zerror(zlogp, B_TRUE, "unable to destroy zone");
5225                 goto error;
5226         }
5227 
5228         /*
5229          * Special teardown for alternate boot environments: remove the tmpfs


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.

  24  * Copyright (c) 2015 by Delphix. All rights reserved.
  25  * Copyright 2016, Joyent Inc.
  26  */
  27 
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  */
  31 
  32 /*
  33  * This module contains functions used to bring up and tear down the
  34  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
  35  * interfaces, [un]configuring devices, establishing resource controls,
  36  * and creating/destroying the zone in the kernel.  These actions, on
  37  * the way up, ready the zone; on the way down, they halt the zone.
  38  * See the much longer block comment at the beginning of zoneadmd.c
  39  * for a bigger picture of how the whole program functions.
  40  *
  41  * This module also has primary responsibility for the layout of "scratch
  42  * zones."  These are mounted, but inactive, zones that are used during
  43  * operating system upgrade and potentially other administrative action.  The
  44  * scratch zone environment is similar to the miniroot environment.  The zone's
  45  * actual root is mounted read-write on /a, and the standard paths (/usr,


 119 #include <libbrand.h>
 120 #include <sys/brand.h>
 121 #include <libzonecfg.h>
 122 #include <synch.h>
 123 
 124 #include "zoneadmd.h"
 125 #include <tsol/label.h>
 126 #include <libtsnet.h>
 127 #include <sys/priv.h>
 128 #include <libinetutil.h>
 129 
 130 #define V4_ADDR_LEN     32
 131 #define V6_ADDR_LEN     128
 132 
 133 #define RESOURCE_DEFAULT_OPTS \
 134         MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
 135 
 136 #define DFSTYPES        "/etc/dfs/fstypes"
 137 #define MAXTNZLEN       2048
 138 



 139 /* a reasonable estimate for the number of lwps per process */
 140 #define LWPS_PER_PROCESS        10
 141 
 142 /* for routing socket */
 143 static int rts_seqno = 0;
 144 
 145 /* mangled zone name when mounting in an alternate root environment */
 146 static char kernzone[ZONENAME_MAX];
 147 
 148 /* array of cached mount entries for resolve_lofs */
 149 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
 150 
 151 /* for Trusted Extensions */
 152 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
 153 static int tsol_mounts(zlog_t *, char *, char *);
 154 static void tsol_unmounts(zlog_t *, char *);
 155 
 156 static m_label_t *zlabel = NULL;
 157 static m_label_t *zid_label = NULL;
 158 static priv_set_t *zprivs = NULL;


 160 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
 161 
 162 typedef struct zone_proj_rctl_map {
 163         char *zpr_zone_rctl;
 164         char *zpr_project_rctl;
 165 } zone_proj_rctl_map_t;
 166 
 167 static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
 168         {"zone.max-msg-ids",    "project.max-msg-ids"},
 169         {"zone.max-sem-ids",    "project.max-sem-ids"},
 170         {"zone.max-shm-ids",    "project.max-shm-ids"},
 171         {"zone.max-shm-memory", "project.max-shm-memory"},
 172         {NULL,                  NULL}
 173 };
 174 
 175 /* from libsocket, not in any header file */
 176 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
 177 
 178 /* from zoneadmd */
 179 extern char query_hook[];

 180 
 181 /*
 182  * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
 183  * node in a linked list that is sorted by linkid.  The list is constructed as
 184  * the xml configuration file is parsed, and the information
 185  * contained in each node is added to the kernel before the zone is
 186  * booted, to be retrieved and applied from within the exclusive-IP NGZ
 187  * on boot.
 188  */
 189 typedef struct zone_addr_list {
 190         struct zone_addr_list *za_next;
 191         datalink_id_t za_linkid;        /* datalink_id_t of interface */
 192         struct zone_nwiftab za_nwiftab; /* address, defrouter properties */
 193 } zone_addr_list_t;
 194 
 195 /*
 196  * An optimization for build_mnttable: reallocate (and potentially copy the
 197  * data) only once every N times through the loop.
 198  */
 199 #define MNTTAB_HUNK     32
 200 
 201 /* some handy macros */
 202 #define SIN(s)  ((struct sockaddr_in *)s)
 203 #define SIN6(s) ((struct sockaddr_in6 *)s)
 204 
 205 /*
 206  * Private autofs system call
 207  */
 208 extern int _autofssys(int, void *);
 209 
 210 static int
 211 autofs_cleanup(zoneid_t zoneid)
 212 {
 213         /*
 214          * Ask autofs to unmount all trigger nodes in the given zone.
 215          */
 216         return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
 217 }
 218 
 219 static void
 220 free_mnttable(struct mnttab *mnt_array, uint_t nelem)
 221 {
 222         uint_t i;
 223 
 224         if (mnt_array == NULL)
 225                 return;
 226         for (i = 0; i < nelem; i++) {
 227                 free(mnt_array[i].mnt_mountp);
 228                 free(mnt_array[i].mnt_fstype);
 229                 free(mnt_array[i].mnt_special);
 230                 free(mnt_array[i].mnt_mntopts);
 231                 assert(mnt_array[i].mnt_time == NULL);
 232         }
 233         free(mnt_array);
 234 }
 235 
 236 /*


 587         for (i = 0; remote_fstypes[i] != NULL; i++) {
 588                 if (strcmp(remote_fstypes[i], fstype) == 0)
 589                         return (B_TRUE);
 590         }
 591         return (B_FALSE);
 592 }
 593 
 594 /*
 595  * This converts a zone root path (normally of the form .../root) to a Live
 596  * Upgrade scratch zone root (of the form .../lu).
 597  */
 598 static void
 599 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
 600 {
 601         if (!isresolved && zonecfg_in_alt_root())
 602                 resolve_lofs(zlogp, zroot, zrootlen);
 603         (void) strcpy(strrchr(zroot, '/') + 1, "lu");
 604 }
 605 
 606 /*


















 607  * The general strategy for unmounting filesystems is as follows:
 608  *
 609  * - Remote filesystems may be dead, and attempting to contact them as
 610  * part of a regular unmount may hang forever; we want to always try to
 611  * forcibly unmount such filesystems and only fall back to regular
 612  * unmounts if the filesystem doesn't support forced unmounts.
 613  *
 614  * - We don't want to unnecessarily corrupt metadata on local
 615  * filesystems (ie UFS), so we want to start off with graceful unmounts,
 616  * and only escalate to doing forced unmounts if we get stuck.
 617  *
 618  * We start off walking backwards through the mount table.  This doesn't
 619  * give us strict ordering but ensures that we try to unmount submounts
 620  * first.  We thus limit the number of failed umount2(2) calls.
 621  *
 622  * The mechanism for determining if we're stuck is to count the number
 623  * of failed unmounts each iteration through the mount table.  This
 624  * gives us an upper bound on the number of filesystems which remain
 625  * mounted (autofs trigger nodes are dealt with separately).  If at the
 626  * end of one unmount+autofs_cleanup cycle we still have the same number
 627  * of mounts that we started out with, we're stuck and try a forced
 628  * unmount.  If that fails (filesystem doesn't support forced unmounts)
 629  * then we bail and are unable to teardown the zone.  If it succeeds,
 630  * we're no longer stuck so we continue with our policy of trying
 631  * graceful mounts first.
 632  *
 633  * Zone must be down (ie, no processes or threads active).
 634  */
 635 static int
 636 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
 637 {
 638         int error = 0;

 639         FILE *mnttab;
 640         struct mnttab *mnts;
 641         uint_t nmnt;
 642         char zroot[MAXPATHLEN + 1];
 643         size_t zrootlen;
 644         uint_t oldcount = UINT_MAX;
 645         boolean_t stuck = B_FALSE;
 646         char **remote_fstypes = NULL;
 647 
 648         if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
 649                 zerror(zlogp, B_FALSE, "unable to determine zone root");
 650                 return (-1);
 651         }
 652         if (unmount_cmd)
 653                 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
 654 
 655         (void) strcat(zroot, "/");
 656         zrootlen = strlen(zroot);
 657 
 658         /*


 706                         path = mnp->mnt_mountp;
 707                         unmounted = B_FALSE;
 708                         /*
 709                          * Try forced unmount first for remote filesystems.
 710                          *
 711                          * Not all remote filesystems support forced unmounts,
 712                          * so if this fails (ENOTSUP) we'll continue on
 713                          * and try a regular unmount.
 714                          */
 715                         if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
 716                                 if (umount2(path, MS_FORCE) == 0)
 717                                         unmounted = B_TRUE;
 718                         }
 719                         /*
 720                          * Try forced unmount if we're stuck.
 721                          */
 722                         if (stuck) {
 723                                 if (umount2(path, MS_FORCE) == 0) {
 724                                         unmounted = B_TRUE;
 725                                         stuck = B_FALSE;

 726                                 } else {
 727                                         /*
 728                                          * The first failure indicates a
 729                                          * mount we won't be able to get
 730                                          * rid of automatically, so we
 731                                          * bail.






 732                                          */








 733                                         error++;
 734                                         zerror(zlogp, B_FALSE,
 735                                             "unable to unmount '%s'", path);

 736                                         free_mnttable(mnts, nmnt);
 737                                         goto out;




 738                                 }
 739                         }

 740                         /*
 741                          * Try regular unmounts for everything else.
 742                          */
 743                         if (!unmounted && umount2(path, 0) != 0)
 744                                 newcount++;
 745                 }
 746                 free_mnttable(mnts, nmnt);
 747 
 748                 if (newcount == 0)
 749                         break;
 750                 if (newcount >= oldcount) {
 751                         /*
 752                          * Last round didn't unmount anything; we're stuck and
 753                          * should start trying forced unmounts.
 754                          */
 755                         stuck = B_TRUE;
 756                 }
 757                 oldcount = newcount;
 758 
 759                 /*


1055 mount_one_dev_device_cb(void *arg, const char *match, const char *name)
1056 {
1057         di_prof_t prof = arg;
1058 
1059         if (name == NULL)
1060                 return (di_prof_add_dev(prof, match));
1061         return (di_prof_add_map(prof, match, name));
1062 }
1063 
1064 static int
1065 mount_one_dev_symlink_cb(void *arg, const char *source, const char *target)
1066 {
1067         di_prof_t prof = arg;
1068 
1069         return (di_prof_add_symlink(prof, source, target));
1070 }
1071 
1072 int
1073 vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep)
1074 {
1075         zone_dochandle_t handle;
1076 
1077         if ((handle = zonecfg_init_handle()) == NULL) {
1078                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
1079                 return (-1);
1080         }
1081         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
1082                 zerror(zlogp, B_FALSE, "invalid configuration");
1083                 zonecfg_fini_handle(handle);
1084                 return (-1);
1085         }
1086         if (zonecfg_get_iptype(handle, iptypep) != Z_OK) {
1087                 zerror(zlogp, B_FALSE, "invalid ip-type configuration");
1088                 zonecfg_fini_handle(handle);
1089                 return (-1);
1090         }
1091         zonecfg_fini_handle(handle);
1092         return (0);
1093 }
1094 
1095 /*
1096  * Apply the standard lists of devices/symlinks/mappings and the user-specified
1097  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
1098  * use these as a profile/filter to determine what exists in /dev.
1099  */
1100 static int
1101 mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
1102 {
1103         char                    brand[MAXNAMELEN];
1104         zone_dochandle_t        handle = NULL;
1105         brand_handle_t          bh = NULL;
1106         struct zone_devtab      ztab;
1107         di_prof_t               prof = NULL;
1108         int                     err;
1109         int                     retval = -1;
1110         zone_iptype_t           iptype;
1111         const char              *curr_iptype;
1112 
1113         if (di_prof_init(devpath, &prof)) {
1114                 zerror(zlogp, B_TRUE, "failed to initialize profile");
1115                 goto cleanup;
1116         }
1117 
1118         /*
1119          * Get a handle to the brand info for this zone.
1120          * If we are mounting the zone, then we must always use the default
1121          * brand device mounts.
1122          */
1123         if (ALT_MOUNT(mount_cmd)) {
1124                 (void) strlcpy(brand, default_brand, sizeof (brand));
1125         } else {
1126                 (void) strlcpy(brand, brand_name, sizeof (brand));
1127         }
1128 
1129         if ((bh = brand_open(brand)) == NULL) {
1130                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1131                 goto cleanup;
1132         }
1133 
1134         if (vplat_get_iptype(zlogp, &iptype) < 0) {
1135                 zerror(zlogp, B_TRUE, "unable to determine ip-type");
1136                 goto cleanup;
1137         }
1138         switch (iptype) {
1139         case ZS_SHARED:
1140                 curr_iptype = "shared";
1141                 break;
1142         case ZS_EXCLUSIVE:
1143                 curr_iptype = "exclusive";
1144                 break;
1145         }


1146 
1147         if (brand_platform_iter_devices(bh, zone_name,
1148             mount_one_dev_device_cb, prof, curr_iptype) != 0) {
1149                 zerror(zlogp, B_TRUE, "failed to add standard device");
1150                 goto cleanup;
1151         }
1152 
1153         if (brand_platform_iter_link(bh,
1154             mount_one_dev_symlink_cb, prof) != 0) {
1155                 zerror(zlogp, B_TRUE, "failed to add standard symlink");
1156                 goto cleanup;
1157         }
1158 
1159         /* Add user-specified devices and directories */
1160         if ((handle = zonecfg_init_handle()) == NULL) {
1161                 zerror(zlogp, B_FALSE, "can't initialize zone handle");
1162                 goto cleanup;
1163         }
1164         if ((err = zonecfg_get_handle(zone_name, handle)) != 0) {
1165                 zerror(zlogp, B_FALSE, "can't get handle for zone "
1166                     "%s: %s", zone_name, zonecfg_strerror(err));
1167                 goto cleanup;
1168         }
1169         if ((err = zonecfg_setdevent(handle)) != 0) {
1170                 zerror(zlogp, B_FALSE, "%s: %s", zone_name,
1171                     zonecfg_strerror(err));
1172                 goto cleanup;
1173         }
1174         while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
1175                 if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
1176                         zerror(zlogp, B_TRUE, "failed to add "
1177                             "user-specified device");
1178                         goto cleanup;
1179                 }
1180         }
1181         (void) zonecfg_enddevent(handle);
1182 
1183         /* Send profile to kernel */
1184         if (di_prof_commit(prof)) {
1185                 zerror(zlogp, B_TRUE, "failed to commit profile");
1186                 goto cleanup;
1187         }
1188 
1189         retval = 0;
1190 
1191 cleanup:
1192         if (bh != NULL)
1193                 brand_close(bh);
1194         if (handle != NULL)
1195                 zonecfg_fini_handle(handle);
1196         if (prof)
1197                 di_prof_fini(prof);
1198         return (retval);
1199 }
1200 
1201 static int
1202 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath,
1203     zone_mnt_t mount_cmd)
1204 {
1205         char path[MAXPATHLEN];
1206         char optstr[MAX_MNTOPT_STR];
1207         zone_fsopt_t *optptr;
1208         int rv;
1209 
1210         if ((rv = valid_mount_path(zlogp, rootpath, fsptr->zone_fs_special,
1211             fsptr->zone_fs_dir, fsptr->zone_fs_type)) < 0) {
1212                 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
1213                     rootpath, fsptr->zone_fs_dir);
1214                 return (-1);
1215         } else if (rv > 0) {


1669                 fsp->zone_fs_special[0] = '\0';
1670                 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
1671                         (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
1672                             sizeof (fsp->zone_fs_special));
1673                 }
1674                 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
1675                     sizeof (fsp->zone_fs_special));
1676         }
1677         (void) zonecfg_endfsent(handle);
1678         return (0);
1679 }
1680 
1681 static int
1682 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
1683 {
1684         char rootpath[MAXPATHLEN];
1685         char brand[MAXNAMELEN];
1686         char luroot[MAXPATHLEN];
1687         int i, num_fs = 0;
1688         struct zone_fstab *fs_ptr = NULL;
1689         zone_dochandle_t handle = NULL;
1690         zone_state_t zstate;
1691         brand_handle_t bh;
1692         plat_gmount_cb_data_t cb;
1693 
1694         if (zone_get_state(zone_name, &zstate) != Z_OK ||
1695             (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1696                 zerror(zlogp, B_FALSE,
1697                     "zone must be in '%s' or '%s' state to mount file-systems",
1698                     zone_state_str(ZONE_STATE_READY),
1699                     zone_state_str(ZONE_STATE_MOUNTED));
1700                 goto bad;
1701         }
1702 
1703         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1704                 zerror(zlogp, B_TRUE, "unable to determine zone root");
1705                 goto bad;
1706         }
1707 
1708         if ((handle = zonecfg_init_handle()) == NULL) {
1709                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
1710                 goto bad;
1711         }
1712         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
1713             zonecfg_setfsent(handle) != Z_OK) {
1714                 zerror(zlogp, B_FALSE, "invalid configuration");
1715                 goto bad;
1716         }
1717 
1718         /*
1719          * If we are mounting the zone, then we must always use the default
1720          * brand global mounts.
1721          */
1722         if (ALT_MOUNT(mount_cmd)) {
1723                 (void) strlcpy(brand, default_brand, sizeof (brand));
1724         } else {
1725                 (void) strlcpy(brand, brand_name, sizeof (brand));
1726         }
1727 
1728         /* Get a handle to the brand info for this zone */
1729         if ((bh = brand_open(brand)) == NULL) {
1730                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1731                 zonecfg_fini_handle(handle);
1732                 return (-1);
1733         }
1734 
1735         /*
1736          * Get the list of global filesystems to mount from the brand
1737          * configuration.
1738          */
1739         cb.pgcd_zlogp = zlogp;
1740         cb.pgcd_fs_tab = &fs_ptr;
1741         cb.pgcd_num_fs = &num_fs;
1742         if (brand_platform_iter_gmounts(bh, zone_name, zonepath,
1743             plat_gmount_cb, &cb) != 0) {
1744                 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1745                 brand_close(bh);
1746                 zonecfg_fini_handle(handle);
1747                 return (-1);
1748         }
1749         brand_close(bh);
1750 
1751         /*
1752          * Iterate through the rest of the filesystems. Sort them all,
1753          * then mount them in sorted order. This is to make sure the
1754          * higher level directories (e.g., /usr) get mounted before
1755          * any beneath them (e.g., /usr/local).
1756          */
1757         if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs,
1758             mount_cmd) != 0)
1759                 goto bad;
1760 
1761         zonecfg_fini_handle(handle);
1762         handle = NULL;
1763 
1764         /*
1765          * Normally when we mount a zone all the zone filesystems
1766          * get mounted relative to rootpath, which is usually
1767          * <zonepath>/root.  But when mounting a zone for administration
1768          * purposes via the zone "mount" state, build_mounted_pre_var()
1769          * updates rootpath to be <zonepath>/lu/a so we'll mount all
1770          * the zones filesystems there instead.
1771          *
1772          * build_mounted_pre_var() and build_mounted_post_var() will
1773          * also do some extra work to create directories and lofs mount
1774          * a bunch of global zone file system paths into <zonepath>/lu.
1775          *
1776          * This allows us to be able to enter the zone (now rooted at
1777          * <zonepath>/lu) and run the upgrade/patch tools that are in the
1778          * global zone and have them upgrade the to-be-modified zone's
1779          * files mounted on /a.  (Which mirrors the existing standard
1780          * upgrade environment.)
1781          *
1782          * There is of course one catch.  When doing the upgrade
1783          * we need <zoneroot>/lu/dev to be the /dev filesystem


1839         }
1840         if (ALT_MOUNT(mount_cmd) &&
1841             !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
1842                 goto bad;
1843 
1844         /*
1845          * For Trusted Extensions cross-mount each lower level /export/home
1846          */
1847         if (mount_cmd == Z_MNT_BOOT &&
1848             tsol_mounts(zlogp, zone_name, rootpath) != 0)
1849                 goto bad;
1850 
1851         free_fs_data(fs_ptr, num_fs);
1852 
1853         /*
1854          * Everything looks fine.
1855          */
1856         return (0);
1857 
1858 bad:
1859         if (handle != NULL)
1860                 zonecfg_fini_handle(handle);
1861         free_fs_data(fs_ptr, num_fs);
1862         return (-1);
1863 }
1864 
1865 /* caller makes sure neither parameter is NULL */
1866 static int
1867 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
1868 {
1869         int prefixlen;
1870 
1871         prefixlen = atoi(prefixstr);
1872         if (prefixlen < 0 || prefixlen > maxprefixlen)
1873                 return (1);
1874         while (prefixlen > 0) {
1875                 if (prefixlen >= 8) {
1876                         *maskstr++ = 0xFF;
1877                         prefixlen -= 8;
1878                         continue;
1879                 }
1880                 *maskstr |= 1 << (8 - prefixlen);


2196                 zerror(zlogp, B_TRUE, "could not get socket");
2197                 return (-1);
2198         }
2199 
2200         /*
2201          * This is a similar kind of "hack" like in addif() to get around
2202          * the problem of SIOCLIFADDIF.  The problem is that this ioctl
2203          * does not include the netmask when adding a logical interface.
2204          * To get around this problem, we first add the logical interface
2205          * with a 0 address.  After that, we set the netmask if provided.
2206          * Finally we set the interface address.
2207          */
2208         laddr = lifr.lifr_addr;
2209         (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
2210             sizeof (lifr.lifr_name));
2211         (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr));
2212 
2213         if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
2214                 /*
2215                  * Here, we know that the interface can't be brought up.
2216                  * A similar warning message was already printed out to
2217                  * the console by zoneadm(1M) so instead we log the
2218                  * message to syslog and continue.
2219                  */
2220                 zerror(&logsys, B_TRUE, "WARNING: skipping network interface "
2221                     "'%s' which may not be present/plumbed in the "
2222                     "global zone.", lifr.lifr_name);
2223                 (void) close(s);
2224                 return (Z_OK);
2225         }
2226 
2227         /* Preserve literal IPv4 address for later potential printing. */
2228         if (af == AF_INET)
2229                 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
2230 
2231         lifr.lifr_zoneid = zone_id;
2232         if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
2233                 zerror(zlogp, B_TRUE, "%s: could not place network interface "
2234                     "into zone", lifr.lifr_name);
2235                 goto bad;
2236         }
2237 
2238         /*
2239          * Loopback interface will use the default netmask assigned, if no
2240          * netmask is found.
2241          */
2242         if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {


2415 
2416         (void) close(s);
2417         return (Z_OK);
2418 bad:
2419         (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
2420         (void) close(s);
2421         return (-1);
2422 }
2423 
2424 /*
2425  * Sets up network interfaces based on information from the zone configuration.
2426  * IPv4 and IPv6 loopback interfaces are set up "for free", modeling the global
2427  * system.
2428  *
2429  * If anything goes wrong, we log a general error message, attempt to tear down
2430  * whatever we set up, and return an error.
2431  */
2432 static int
2433 configure_shared_network_interfaces(zlog_t *zlogp)
2434 {
2435         zone_dochandle_t handle;
2436         struct zone_nwiftab nwiftab, loopback_iftab;
2437         zoneid_t zoneid;
2438 
2439         if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
2440                 zerror(zlogp, B_TRUE, "unable to get zoneid");
2441                 return (-1);
2442         }
2443 
2444         if ((handle = zonecfg_init_handle()) == NULL) {
2445                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
2446                 return (-1);
2447         }
2448         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2449                 zerror(zlogp, B_FALSE, "invalid configuration");
2450                 zonecfg_fini_handle(handle);
2451                 return (-1);
2452         }
2453         if (zonecfg_setnwifent(handle) == Z_OK) {
2454                 for (;;) {
2455                         if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2456                                 break;

2457                         if (configure_one_interface(zlogp, zoneid, &nwiftab) !=
2458                             Z_OK) {
2459                                 (void) zonecfg_endnwifent(handle);
2460                                 zonecfg_fini_handle(handle);
2461                                 return (-1);
2462                         }
2463                 }
2464                 (void) zonecfg_endnwifent(handle);
2465         }
2466         zonecfg_fini_handle(handle);
2467         if (is_system_labeled()) {
2468                 /*
2469                  * Labeled zones share the loopback interface
2470                  * so it is not plumbed for shared stack instances.
2471                  */
2472                 return (0);
2473         }
2474         (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
2475             sizeof (loopback_iftab.zone_nwif_physical));
2476         (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
2477             sizeof (loopback_iftab.zone_nwif_address));
2478         loopback_iftab.zone_nwif_defrouter[0] = '\0';
2479         if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK)
2480                 return (-1);
2481 
2482         /* Always plumb up the IPv6 loopback interface. */
2483         (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
2484             sizeof (loopback_iftab.zone_nwif_address));
2485         if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK)
2486                 return (-1);


2893         }
2894         /* insert new after ptr */
2895         new->za_next = next;
2896         ptr->za_next = new;
2897         return (old);
2898 }
2899 
2900 void
2901 free_ip_interface(zone_addr_list_t *zalist)
2902 {
2903         zone_addr_list_t *ptr, *new;
2904 
2905         for (ptr = zalist; ptr != NULL; ) {
2906                 new = ptr;
2907                 ptr = ptr->za_next;
2908                 free(new);
2909         }
2910 }
2911 
2912 /*
2913  * For IP networking, we need to use the illumos-native device tree.  For most
2914  * zones, this is $ZONEROOT/dev.  For LX ones, it's $ZONEROOT/native/dev.
2915  * Return the appropriate post-$ZONEROOT path.
2916  */
2917 static char *
2918 get_brand_dev(void)
2919 {
2920         static char *lxpath = "/native/dev";
2921         /* Cheesy hard-coding of strlen("/native") */
2922         char *default_path = lxpath + 7;
2923 
2924         /* LX zones are the exception... */
2925         if (strcmp(brand_name, "lx") == 0)
2926                 return (lxpath);
2927 
2928         return (default_path);
2929 }
2930 
2931 /*
2932  * Add the kernel access control information for the interface names.
2933  * If anything goes wrong, we log a general error message, attempt to tear down
2934  * whatever we set up, and return an error.
2935  */
2936 static int
2937 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
2938 {
2939         zone_dochandle_t handle;
2940         struct zone_nwiftab nwiftab;
2941         char rootpath[MAXPATHLEN];
2942         char path[MAXPATHLEN];
2943         datalink_id_t linkid;
2944         di_prof_t prof = NULL;
2945         boolean_t added = B_FALSE;
2946         zone_addr_list_t *zalist = NULL, *new;
2947 
2948         if ((handle = zonecfg_init_handle()) == NULL) {
2949                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
2950                 return (-1);
2951         }
2952         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2953                 zerror(zlogp, B_FALSE, "invalid configuration");
2954                 zonecfg_fini_handle(handle);
2955                 return (-1);
2956         }
2957 
2958         if (zonecfg_setnwifent(handle) != Z_OK) {
2959                 zonecfg_fini_handle(handle);
2960                 return (0);
2961         }
2962 
2963         for (;;) {
2964                 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2965                         break;
2966 

2967                 if (prof == NULL) {
2968                         if (zone_get_devroot(zone_name, rootpath,
2969                             sizeof (rootpath)) != Z_OK) {
2970                                 (void) zonecfg_endnwifent(handle);
2971                                 zonecfg_fini_handle(handle);
2972                                 zerror(zlogp, B_TRUE,
2973                                     "unable to determine dev root");
2974                                 return (-1);
2975                         }
2976                         (void) snprintf(path, sizeof (path), "%s%s", rootpath,
2977                             get_brand_dev());
2978                         if (di_prof_init(path, &prof) != 0) {
2979                                 (void) zonecfg_endnwifent(handle);
2980                                 zonecfg_fini_handle(handle);
2981                                 zerror(zlogp, B_TRUE,
2982                                     "failed to initialize profile");
2983                                 return (-1);
2984                         }
2985                 }
2986 
2987                 /*
2988                  * Create the /dev entry for backward compatibility.
2989                  * Only create the /dev entry if it's not in use.
2990                  * Note that the zone still boots when the assigned
2991                  * interface is inaccessible, used by others, etc.
2992                  * Also, when vanity naming is used, some interface do
2993                  * do not have corresponding /dev node names (for example,
2994                  * vanity named aggregations).  The /dev entry is not
2995                  * created in that case.  The /dev/net entry is always
2996                  * accessible.
2997                  */
2998                 if (dladm_name2info(dld_handle, nwiftab.zone_nwif_physical,
2999                     &linkid, NULL, NULL, NULL) == DLADM_STATUS_OK &&
3000                     add_datalink(zlogp, zone_name, linkid,
3001                     nwiftab.zone_nwif_physical) == 0) {
3002                         added = B_TRUE;
3003                 } else {
3004                         (void) zonecfg_endnwifent(handle);
3005                         zonecfg_fini_handle(handle);
3006                         zerror(zlogp, B_TRUE, "failed to add network device");
3007                         return (-1);

3008                 }
3009                 /* set up the new IP interface, and add them all later */
3010                 new = malloc(sizeof (*new));
3011                 if (new == NULL) {
3012                         zerror(zlogp, B_TRUE, "no memory for %s",
3013                             nwiftab.zone_nwif_physical);
3014                         zonecfg_fini_handle(handle);
3015                         free_ip_interface(zalist);
3016                 }
3017                 bzero(new, sizeof (*new));
3018                 new->za_nwiftab = nwiftab;
3019                 new->za_linkid = linkid;
3020                 zalist = add_ip_interface(zalist, new);
3021         }
3022         if (zalist != NULL) {
3023                 if ((errno = add_net(zlogp, zoneid, zalist)) != 0) {
3024                         (void) zonecfg_endnwifent(handle);
3025                         zonecfg_fini_handle(handle);
3026                         zerror(zlogp, B_TRUE, "failed to add address");
3027                         free_ip_interface(zalist);
3028                         return (-1);
3029                 }
3030                 free_ip_interface(zalist);
3031         }
3032         (void) zonecfg_endnwifent(handle);
3033         zonecfg_fini_handle(handle);
3034 
3035         if (prof != NULL && added) {
3036                 if (di_prof_commit(prof) != 0) {
3037                         zerror(zlogp, B_TRUE, "failed to commit profile");
3038                         return (-1);
3039                 }
3040         }
3041         if (prof != NULL)
3042                 di_prof_fini(prof);
3043 
3044         return (0);
3045 }
3046 
3047 static int
3048 remove_datalink_pool(zlog_t *zlogp, zoneid_t zoneid)
3049 {
3050         ushort_t flags;
3051         zone_iptype_t iptype;
3052         int i, dlnum = 0;
3053         datalink_id_t *dllink, *dllinks = NULL;


3149 
3150         if ((dllinks = malloc(dlnum * sizeof (datalink_id_t))) == NULL) {
3151                 zerror(zlogp, B_TRUE, "memory allocation failed");
3152                 return (-1);
3153         }
3154         if (zone_list_datalink(zoneid, &dlnum, dllinks) != 0) {
3155                 zerror(zlogp, B_TRUE, "unable to list network interfaces");
3156                 free(dllinks);
3157                 return (-1);
3158         }
3159 
3160         for (i = 0, dllink = dllinks; i < dlnum; i++, dllink++) {
3161                 char dlerr[DLADM_STRSIZE];
3162 
3163                 dlstatus = dladm_set_linkprop(dld_handle, *dllink,
3164                     "protection", NULL, 0, DLADM_OPT_ACTIVE);
3165                 if (dlstatus == DLADM_STATUS_NOTFOUND) {
3166                         /* datalink does not belong to the GZ */
3167                         continue;
3168                 }
3169                 if (dlstatus != DLADM_STATUS_OK) {
3170                         zerror(zlogp, B_FALSE,

3171                             dladm_status2str(dlstatus, dlerr));
3172                         free(dllinks);
3173                         return (-1);
3174                 }
3175                 dlstatus = dladm_set_linkprop(dld_handle, *dllink,
3176                     "allowed-ips", NULL, 0, DLADM_OPT_ACTIVE);
3177                 if (dlstatus != DLADM_STATUS_OK) {
3178                         zerror(zlogp, B_FALSE,

3179                             dladm_status2str(dlstatus, dlerr));
3180                         free(dllinks);
3181                         return (-1);
3182                 }
3183         }
3184         free(dllinks);
3185         return (0);
3186 }
3187 
3188 static int
3189 unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
3190 {
3191         int dlnum = 0;
3192 
3193         /*
3194          * The kernel shutdown callback for the dls module should have removed
3195          * all datalinks from this zone.  If any remain, then there's a
3196          * problem.
3197          */
3198         if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) {
3199                 zerror(zlogp, B_TRUE, "unable to list network interfaces");
3200                 return (-1);
3201         }
3202         if (dlnum != 0) {
3203                 zerror(zlogp, B_FALSE,
3204                     "datalinks remain in zone after shutdown");
3205                 return (-1);
3206         }
3207         return (0);
3208 }
3209 
3210 static int
3211 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
3212     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
3213 {
3214         int fd;
3215         struct strioctl ioc;
3216         tcp_ioc_abort_conn_t conn;
3217         int error;
3218 
3219         conn.ac_local = *local;
3220         conn.ac_remote = *remote;
3221         conn.ac_start = TCPS_SYN_SENT;
3222         conn.ac_end = TCPS_TIME_WAIT;
3223         conn.ac_zoneid = zoneid;
3224 
3225         ioc.ic_cmd = TCP_IOC_ABORT_CONN;
3226         ioc.ic_timout = -1; /* infinite timeout */
3227         ioc.ic_len = sizeof (conn);
3228         ioc.ic_dp = (char *)&conn;
3229 
3230         if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {


3272         local6 = (struct sockaddr_in6 *)&l;
3273         local6->sin6_family = AF_INET6;
3274         local6->sin6_port = 0;
3275         local6->sin6_addr = in6addr_any;
3276 
3277         bzero(&r, sizeof (*remote6));
3278         remote6 = (struct sockaddr_in6 *)&r;
3279         remote6->sin6_family = AF_INET6;
3280         remote6->sin6_port = 0;
3281         remote6->sin6_addr = in6addr_any;
3282 
3283         if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
3284                 return (error);
3285         return (0);
3286 }
3287 
3288 static int
3289 get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
3290 {
3291         int error = -1;
3292         zone_dochandle_t handle;
3293         char *privname = NULL;
3294 
3295         if ((handle = zonecfg_init_handle()) == NULL) {
3296                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3297                 return (-1);
3298         }
3299         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3300                 zerror(zlogp, B_FALSE, "invalid configuration");
3301                 zonecfg_fini_handle(handle);
3302                 return (-1);
3303         }
3304 
3305         if (ALT_MOUNT(mount_cmd)) {
3306                 zone_iptype_t   iptype;
3307                 const char      *curr_iptype;
3308 
3309                 if (zonecfg_get_iptype(handle, &iptype) != Z_OK) {
3310                         zerror(zlogp, B_TRUE, "unable to determine ip-type");
3311                         zonecfg_fini_handle(handle);
3312                         return (-1);
3313                 }
3314 
3315                 switch (iptype) {
3316                 case ZS_SHARED:
3317                         curr_iptype = "shared";
3318                         break;
3319                 case ZS_EXCLUSIVE:
3320                         curr_iptype = "exclusive";
3321                         break;
3322                 }
3323 
3324                 if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) {
3325                         zonecfg_fini_handle(handle);
3326                         return (0);
3327                 }
3328                 zerror(zlogp, B_FALSE,
3329                     "failed to determine the zone's default privilege set");
3330                 zonecfg_fini_handle(handle);
3331                 return (-1);
3332         }
3333 
3334         switch (zonecfg_get_privset(handle, privs, &privname)) {
3335         case Z_OK:
3336                 error = 0;
3337                 break;
3338         case Z_PRIV_PROHIBITED:
3339                 zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
3340                     "within the zone's privilege set", privname);
3341                 break;
3342         case Z_PRIV_REQUIRED:
3343                 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
3344                     "from the zone's privilege set", privname);
3345                 break;
3346         case Z_PRIV_UNKNOWN:
3347                 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
3348                     "in the zone's privilege set", privname);
3349                 break;
3350         default:
3351                 zerror(zlogp, B_FALSE, "failed to determine the zone's "
3352                     "privilege set");
3353                 break;
3354         }
3355 
3356         free(privname);
3357         zonecfg_fini_handle(handle);
3358         return (error);
3359 }
3360 
3361 static char *
3362 zone_proj_rctl(const char *name)
3363 {
3364         int i;
3365 
3366         for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
3367                 if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
3368                         return (zone_proj_rctl_map[i].zpr_project_rctl);
3369                 }
3370         }
3371         return (NULL);
3372 }
3373 
3374 static int
3375 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3376 {
3377         nvlist_t *nvl = NULL;
3378         char *nvl_packed = NULL;
3379         size_t nvl_size = 0;
3380         nvlist_t **nvlv = NULL;
3381         int rctlcount = 0;
3382         int error = -1;
3383         zone_dochandle_t handle;
3384         struct zone_rctltab rctltab;
3385         rctlblk_t *rctlblk = NULL;
3386         uint64_t maxlwps;
3387         uint64_t maxprocs;
3388         int rproc, rlwp;
3389 
3390         *bufp = NULL;
3391         *bufsizep = 0;
3392 
3393         if ((handle = zonecfg_init_handle()) == NULL) {
3394                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3395                 return (-1);
3396         }
3397         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3398                 zerror(zlogp, B_FALSE, "invalid configuration");
3399                 zonecfg_fini_handle(handle);
3400                 return (-1);
3401         }
3402 
3403         rctltab.zone_rctl_valptr = NULL;
3404         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
3405                 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
3406                 goto out;
3407         }
3408 
3409         /*
3410          * Allow the administrator to control both the maximum number of
3411          * process table slots, and the maximum number of lwps, with a single
3412          * max-processes or max-lwps property. If only the max-processes
3413          * property is set, we add a max-lwps property with a limit derived
3414          * from max-processes. If only the max-lwps property is set, we add a
3415          * max-processes property with the same limit as max-lwps.
3416          */
3417         rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
3418         rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
3419         if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
3420                 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
3421                     maxprocs * LWPS_PER_PROCESS) != Z_OK) {
3422                         zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
3423                         goto out;
3424                 }
3425         } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
3426                 /* no scaling for max-proc value */
3427                 if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
3428                     maxlwps) != Z_OK) {
3429                         zerror(zlogp, B_FALSE,
3430                             "unable to set max-processes alias");
3431                         goto out;
3432                 }
3433         }
3434 
3435         if (zonecfg_setrctlent(handle) != Z_OK) {
3436                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
3437                 goto out;
3438         }
3439 
3440         if ((rctlblk = malloc(rctlblk_size())) == NULL) {
3441                 zerror(zlogp, B_TRUE, "memory allocation failed");
3442                 goto out;
3443         }
3444         while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
3445                 struct zone_rctlvaltab *rctlval;
3446                 uint_t i, count;
3447                 const char *name = rctltab.zone_rctl_name;
3448                 char *proj_nm;
3449 
3450                 /* zoneadm should have already warned about unknown rctls. */
3451                 if (!zonecfg_is_rctl(name)) {
3452                         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3453                         rctltab.zone_rctl_valptr = NULL;
3454                         continue;
3455                 }
3456                 count = 0;
3457                 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
3458                     rctlval = rctlval->zone_rctlval_next) {
3459                         count++;
3460                 }
3461                 if (count == 0) {       /* ignore */
3462                         continue;       /* Nothing to free */
3463                 }
3464                 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)


3527                         if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
3528                             != 0) {
3529                                 zerror(zlogp, B_FALSE,
3530                                     "nvlist_add_nvlist_arrays failed");
3531                                 goto out;
3532                         }
3533                 }
3534 
3535                 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
3536                     != 0) {
3537                         zerror(zlogp, B_FALSE, "%s failed",
3538                             "nvlist_add_nvlist_array");
3539                         goto out;
3540                 }
3541                 for (i = 0; i < count; i++)
3542                         nvlist_free(nvlv[i]);
3543                 free(nvlv);
3544                 nvlv = NULL;
3545                 rctlcount++;
3546         }
3547         (void) zonecfg_endrctlent(handle);
3548 
3549         if (rctlcount == 0) {
3550                 error = 0;
3551                 goto out;
3552         }
3553         if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
3554             != 0) {
3555                 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
3556                 goto out;
3557         }
3558 
3559         error = 0;
3560         *bufp = nvl_packed;
3561         *bufsizep = nvl_size;
3562 
3563 out:
3564         free(rctlblk);
3565         zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
3566         if (error && nvl_packed != NULL)
3567                 free(nvl_packed);
3568         nvlist_free(nvl);
3569         if (nvlv != NULL)
3570                 free(nvlv);
3571         if (handle != NULL)
3572                 zonecfg_fini_handle(handle);
3573         return (error);
3574 }
3575 
3576 static int
3577 get_implicit_datasets(zlog_t *zlogp, char **retstr)
3578 {
3579         char cmdbuf[2 * MAXPATHLEN];
3580 
3581         if (query_hook[0] == '\0')
3582                 return (0);
3583 
3584         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s datasets", query_hook)
3585             > sizeof (cmdbuf))
3586                 return (-1);
3587 
3588         if (do_subproc(zlogp, cmdbuf, retstr) != 0)
3589                 return (-1);
3590 
3591         return (0);
3592 }
3593 
3594 static int
3595 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
3596 {
3597         zone_dochandle_t handle;
3598         struct zone_dstab dstab;
3599         size_t total, offset, len;
3600         int error = -1;
3601         char *str = NULL;
3602         char *implicit_datasets = NULL;
3603         int implicit_len = 0;
3604 
3605         *bufp = NULL;
3606         *bufsizep = 0;
3607 
3608         if ((handle = zonecfg_init_handle()) == NULL) {
3609                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3610                 return (-1);
3611         }
3612         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3613                 zerror(zlogp, B_FALSE, "invalid configuration");
3614                 zonecfg_fini_handle(handle);
3615                 return (-1);
3616         }
3617 
3618         if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) {
3619                 zerror(zlogp, B_FALSE, "getting implicit datasets failed");
3620                 goto out;
3621         }
3622 
3623         if (zonecfg_setdsent(handle) != Z_OK) {
3624                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
3625                 goto out;
3626         }
3627 
3628         total = 0;
3629         while (zonecfg_getdsent(handle, &dstab) == Z_OK)
3630                 total += strlen(dstab.zone_dataset_name) + 1;
3631         (void) zonecfg_enddsent(handle);
3632 
3633         if (implicit_datasets != NULL)
3634                 implicit_len = strlen(implicit_datasets);
3635         if (implicit_len > 0)
3636                 total += implicit_len + 1;
3637 
3638         if (total == 0) {
3639                 error = 0;
3640                 goto out;
3641         }
3642 
3643         if ((str = malloc(total)) == NULL) {
3644                 zerror(zlogp, B_TRUE, "memory allocation failed");
3645                 goto out;
3646         }
3647 
3648         if (zonecfg_setdsent(handle) != Z_OK) {
3649                 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
3650                 goto out;
3651         }
3652         offset = 0;
3653         while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
3654                 len = strlen(dstab.zone_dataset_name);
3655                 (void) strlcpy(str + offset, dstab.zone_dataset_name,
3656                     total - offset);
3657                 offset += len;
3658                 if (offset < total - 1)
3659                         str[offset++] = ',';
3660         }
3661         (void) zonecfg_enddsent(handle);
3662 
3663         if (implicit_len > 0)
3664                 (void) strlcpy(str + offset, implicit_datasets, total - offset);
3665 
3666         error = 0;
3667         *bufp = str;
3668         *bufsizep = total;
3669 
3670 out:
3671         if (error != 0 && str != NULL)
3672                 free(str);
3673         if (handle != NULL)
3674                 zonecfg_fini_handle(handle);
3675         if (implicit_datasets != NULL)
3676                 free(implicit_datasets);
3677 
3678         return (error);
3679 }
3680 
3681 static int
3682 validate_datasets(zlog_t *zlogp)
3683 {
3684         zone_dochandle_t handle;
3685         struct zone_dstab dstab;
3686         zfs_handle_t *zhp;
3687         libzfs_handle_t *hdl;
3688 
3689         if ((handle = zonecfg_init_handle()) == NULL) {
3690                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
3691                 return (-1);
3692         }
3693         if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
3694                 zerror(zlogp, B_FALSE, "invalid configuration");
3695                 zonecfg_fini_handle(handle);
3696                 return (-1);
3697         }
3698 
3699         if (zonecfg_setdsent(handle) != Z_OK) {
3700                 zerror(zlogp, B_FALSE, "invalid configuration");
3701                 zonecfg_fini_handle(handle);
3702                 return (-1);
3703         }
3704 
3705         if ((hdl = libzfs_init()) == NULL) {
3706                 zerror(zlogp, B_FALSE, "opening ZFS library");
3707                 zonecfg_fini_handle(handle);
3708                 return (-1);
3709         }
3710 
3711         while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
3712 
3713                 if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
3714                     ZFS_TYPE_FILESYSTEM)) == NULL) {
3715                         zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
3716                             dstab.zone_dataset_name);
3717                         zonecfg_fini_handle(handle);
3718                         libzfs_fini(hdl);
3719                         return (-1);
3720                 }
3721 
3722                 /*
3723                  * Automatically set the 'zoned' property.  We check the value
3724                  * first because we'll get EPERM if it is already set.
3725                  */
3726                 if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
3727                     zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_ZONED),
3728                     "on") != 0) {
3729                         zerror(zlogp, B_FALSE, "cannot set 'zoned' "
3730                             "property for ZFS dataset '%s'\n",
3731                             dstab.zone_dataset_name);
3732                         zonecfg_fini_handle(handle);
3733                         zfs_close(zhp);
3734                         libzfs_fini(hdl);
3735                         return (-1);
3736                 }
3737 
3738                 zfs_close(zhp);
3739         }
3740         (void) zonecfg_enddsent(handle);
3741 
3742         zonecfg_fini_handle(handle);
3743         libzfs_fini(hdl);
3744 
3745         return (0);
3746 }
3747 
3748 /*
3749  * Return true if the path is its own zfs file system.  We determine this
3750  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
3751  * if it is a different fs.
3752  */
3753 boolean_t
3754 is_zonepath_zfs(char *zonepath)
3755 {
3756         int res;
3757         char *path;
3758         char *parent;
3759         struct statvfs64 buf1, buf2;
3760 
3761         if (statvfs64(zonepath, &buf1) != 0)
3762                 return (B_FALSE);


4459                     stat64(mnp->mnt_special, &zst) != -1 &&
4460                     rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
4461                         zerror(zlogp, B_FALSE,
4462                             "zone root %s is reachable through %s",
4463                             rootpath, mnp->mnt_mountp);
4464                         return (B_TRUE);
4465                 }
4466         }
4467         return (B_FALSE);
4468 }
4469 
4470 /*
4471  * Set pool info for the zone's resource management configuration.
4472  */
4473 static int
4474 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
4475 {
4476         int res;
4477         uint64_t tmp;
4478         char sched[MAXNAMELEN];
4479         zone_dochandle_t handle = NULL;
4480         char pool_err[128];
4481 
4482         if ((handle = zonecfg_init_handle()) == NULL) {
4483                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
4484                 return (Z_BAD_HANDLE);
4485         }
4486 
4487         if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
4488                 zerror(zlogp, B_FALSE, "invalid configuration");
4489                 zonecfg_fini_handle(handle);
4490                 return (res);
4491         }
4492 
4493         /* Get the scheduling class set in the zone configuration. */
4494         if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
4495             strlen(sched) > 0) {
4496                 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
4497                     strlen(sched)) == -1)
4498                         zerror(zlogp, B_TRUE, "WARNING: unable to set the "
4499                             "default scheduling class");
4500 
4501                 if (strcmp(sched, "FX") == 0) {
4502                         /*
4503                          * When FX is specified then by default all processes
4504                          * will start at the lowest priority level (0) and
4505                          * stay there. We support an optional attr which
4506                          * indicates that all the processes should be "high
4507                          * priority". We set this on the zone so that starting
4508                          * init will set the priority high.
4509                          */
4510                         struct zone_attrtab a;
4511 
4512                         bzero(&a, sizeof (a));
4513                         (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
4514                             sizeof (a.zone_attr_name));


4519 
4520                                 if (zone_setattr(zoneid,
4521                                     ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
4522                                     sizeof (hi)) == -1)
4523                                         zerror(zlogp, B_TRUE, "WARNING: unable "
4524                                             "to set high priority");
4525                         }
4526                 }
4527 
4528         } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
4529             == Z_OK) {
4530                 /*
4531                  * If the zone has the zone.cpu-shares rctl set then we want to
4532                  * use the Fair Share Scheduler (FSS) for processes in the
4533                  * zone.  Check what scheduling class the zone would be running
4534                  * in by default so we can print a warning and modify the class
4535                  * if we wouldn't be using FSS.
4536                  */
4537                 char class_name[PC_CLNMSZ];
4538 
4539                 if (zonecfg_get_dflt_sched_class(handle, class_name,
4540                     sizeof (class_name)) != Z_OK) {
4541                         zerror(zlogp, B_FALSE, "WARNING: unable to determine "
4542                             "the zone's scheduling class");
4543 
4544                 } else if (strcmp("FSS", class_name) != 0) {
4545                         zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
4546                             "rctl is set but\nFSS is not the default "
4547                             "scheduling class for\nthis zone.  FSS will be "
4548                             "used for processes\nin the zone but to get the "
4549                             "full benefit of FSS,\nit should be the default "
4550                             "scheduling class.\nSee dispadmin(1M) for more "
4551                             "details.");
4552 
4553                         if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, "FSS",
4554                             strlen("FSS")) == -1)
4555                                 zerror(zlogp, B_TRUE, "WARNING: unable to set "
4556                                     "zone scheduling class to FSS");
4557                 }
4558         }
4559 
4560         /*
4561          * The next few blocks of code attempt to set up temporary pools as
4562          * well as persistent pools.  In all cases we call the functions
4563          * unconditionally.  Within each funtion the code will check if the
4564          * zone is actually configured for a temporary pool or persistent pool
4565          * and just return if there is nothing to do.
4566          *
4567          * If we are rebooting we want to attempt to reuse any temporary pool
4568          * that was previously set up.  zonecfg_bind_tmp_pool() will do the
4569          * right thing in all cases (reuse or create) based on the current
4570          * zonecfg.
4571          */
4572         if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err,
4573             sizeof (pool_err))) != Z_OK) {
4574                 if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND)
4575                         zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting "
4576                             "cannot be instantiated", zonecfg_strerror(res),
4577                             pool_err);
4578                 else
4579                         zerror(zlogp, B_FALSE, "could not bind zone to "
4580                             "temporary pool: %s", zonecfg_strerror(res));
4581                 zonecfg_fini_handle(handle);
4582                 return (Z_POOL_BIND);
4583         }
4584 
4585         /*
4586          * Check if we need to warn about poold not being enabled.
4587          */
4588         if (zonecfg_warn_poold(handle)) {
4589                 zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has "
4590                     "been specified\nbut the dynamic pool service is not "
4591                     "enabled.\nThe system will not dynamically adjust the\n"
4592                     "processor allocation within the specified range\n"
4593                     "until svc:/system/pools/dynamic is enabled.\n"
4594                     "See poold(1M).");
4595         }
4596 
4597         /* The following is a warning, not an error. */
4598         if ((res = zonecfg_bind_pool(handle, zoneid, pool_err,
4599             sizeof (pool_err))) != Z_OK) {
4600                 if (res == Z_POOL_BIND)
4601                         zerror(zlogp, B_FALSE, "WARNING: unable to bind to "
4602                             "pool '%s'; using default pool.", pool_err);
4603                 else if (res == Z_POOL)
4604                         zerror(zlogp, B_FALSE, "WARNING: %s: %s",
4605                             zonecfg_strerror(res), pool_err);
4606                 else
4607                         zerror(zlogp, B_FALSE, "WARNING: %s",
4608                             zonecfg_strerror(res));
4609         }
4610 
4611         /* Update saved pool name in case it has changed */
4612         (void) zonecfg_get_poolname(handle, zone_name, pool_name,
4613             sizeof (pool_name));
4614 
4615         zonecfg_fini_handle(handle);
4616         return (Z_OK);
4617 }
4618 
4619 static void
4620 report_prop_err(zlog_t *zlogp, const char *name, const char *value, int res)
4621 {
4622         switch (res) {
4623         case Z_TOO_BIG:
4624                 zerror(zlogp, B_FALSE, "%s property value is too large.", name);
4625                 break;
4626 
4627         case Z_INVALID_PROPERTY:
4628                 zerror(zlogp, B_FALSE, "%s property value \"%s\" is not valid",
4629                     name, value);
4630                 break;
4631 
4632         default:
4633                 zerror(zlogp, B_TRUE, "fetching property %s: %d", name, res);
4634                 break;
4635         }


4696         } else {
4697                 /* Has a value, append the defaults */
4698                 if (strlcat(fsallowed, ",", len) >= len ||
4699                     strlcat(fsallowed, DFLT_FS_ALLOWED, len) >= len) {
4700                         report_prop_err(zlogp, "fs-allowed", fsallowed,
4701                             Z_TOO_BIG);
4702                         return (Z_TOO_BIG);
4703                 }
4704         }
4705 
4706         if (zone_setattr(zoneid, ZONE_ATTR_FS_ALLOWED, fsallowedp, len) != 0) {
4707                 zerror(zlogp, B_TRUE,
4708                     "fs-allowed couldn't be set: %s: %d", fsallowedp, res);
4709                 return (Z_SYSTEM);
4710         }
4711 
4712         return (Z_OK);
4713 }
4714 
4715 static int
4716 setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid)
4717 {
4718         zone_dochandle_t handle;
4719         int res = Z_OK;
4720 
4721         if ((handle = zonecfg_init_handle()) == NULL) {
4722                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
4723                 return (Z_BAD_HANDLE);
4724         }
4725         if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) {
4726                 zerror(zlogp, B_FALSE, "invalid configuration");
4727                 goto out;
4728         }
4729 
4730         if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK)
4731                 goto out;
4732 
4733         if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK)
4734                 goto out;
4735 
4736 out:
4737         zonecfg_fini_handle(handle);
4738         return (res);
4739 }
4740 






4741 zoneid_t
4742 vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
4743 {
4744         zoneid_t rval = -1;
4745         priv_set_t *privs;
4746         char rootpath[MAXPATHLEN];
4747         char *rctlbuf = NULL;
4748         size_t rctlbufsz = 0;
4749         char *zfsbuf = NULL;
4750         size_t zfsbufsz = 0;
4751         zoneid_t zoneid = -1;
4752         int xerr;
4753         char *kzone;
4754         FILE *fp = NULL;
4755         tsol_zcent_t *zcent = NULL;
4756         int match = 0;
4757         int doi = 0;
4758         int flags;
4759         zone_iptype_t iptype;
4760 
4761         if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
4762                 zerror(zlogp, B_TRUE, "unable to determine zone root");
4763                 return (-1);
4764         }
4765         if (zonecfg_in_alt_root())
4766                 resolve_lofs(zlogp, rootpath, sizeof (rootpath));
4767 
4768         if (vplat_get_iptype(zlogp, &iptype) < 0) {
4769                 zerror(zlogp, B_TRUE, "unable to determine ip-type");
4770                 return (-1);
4771         }
4772         switch (iptype) {
4773         case ZS_SHARED:
4774                 flags = 0;
4775                 break;
4776         case ZS_EXCLUSIVE:
4777                 flags = ZCF_NET_EXCL;
4778                 break;
4779         }


4780 
4781         if ((privs = priv_allocset()) == NULL) {
4782                 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
4783                 return (-1);
4784         }
4785         priv_emptyset(privs);
4786         if (get_privset(zlogp, privs, mount_cmd) != 0)
4787                 goto error;
4788 
4789         if (mount_cmd == Z_MNT_BOOT &&
4790             get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
4791                 zerror(zlogp, B_FALSE, "Unable to get list of rctls");
4792                 goto error;
4793         }
4794 
4795         if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) {
4796                 zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets");
4797                 goto error;
4798         }
4799 


4863                     NULL, 0) == 0) {
4864                         zerror(zlogp, B_FALSE, "scratch zone already running");
4865                         goto error;
4866                 }
4867                 /* This is the preferred name */
4868                 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
4869                     zone_name);
4870                 srandom(getpid());
4871                 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
4872                     0) == 0) {
4873                         /* This is just an arbitrary name; note "." usage */
4874                         (void) snprintf(kernzone, sizeof (kernzone),
4875                             "SUNWlu.%08lX%08lX", random(), random());
4876                 }
4877                 kzone = kernzone;
4878         }
4879 
4880         xerr = 0;
4881         if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
4882             rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel,
4883             flags)) == -1) {
4884                 if (xerr == ZE_AREMOUNTS) {
4885                         if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
4886                                 zerror(zlogp, B_FALSE,
4887                                     "An unknown file-system is mounted on "
4888                                     "a subdirectory of %s", rootpath);
4889                         } else {
4890 
4891                                 zerror(zlogp, B_FALSE,
4892                                     "These file-systems are mounted on "
4893                                     "subdirectories of %s:", rootpath);
4894                                 (void) zonecfg_find_mounts(rootpath,
4895                                     prtmount, zlogp);
4896                         }
4897                 } else if (xerr == ZE_CHROOTED) {
4898                         zerror(zlogp, B_FALSE, "%s: "
4899                             "cannot create a zone from a chrooted "
4900                             "environment", "zone_create");
4901                 } else if (xerr == ZE_LABELINUSE) {
4902                         char zonename[ZONENAME_MAX];
4903                         (void) getzonenamebyid(getzoneidbylabel(zlabel),


4909                 }
4910                 goto error;
4911         }
4912 
4913         if (zonecfg_in_alt_root() &&
4914             zonecfg_add_scratch(fp, zone_name, kernzone,
4915             zonecfg_get_root()) == -1) {
4916                 zerror(zlogp, B_TRUE, "cannot add mapfile entry");
4917                 goto error;
4918         }
4919 
4920         /*
4921          * The following actions are not performed when merely mounting a zone
4922          * for administrative use.
4923          */
4924         if (mount_cmd == Z_MNT_BOOT) {
4925                 brand_handle_t bh;
4926                 struct brand_attr attr;
4927                 char modname[MAXPATHLEN];
4928 
4929                 if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK)
4930                         goto error;
4931 
4932                 if ((bh = brand_open(brand_name)) == NULL) {
4933                         zerror(zlogp, B_FALSE,
4934                             "unable to determine brand name");
4935                         goto error;
4936                 }
4937 
4938                 if (!is_system_labeled() &&
4939                     (strcmp(brand_name, LABELED_BRAND_NAME) == 0)) {
4940                         brand_close(bh);
4941                         zerror(zlogp, B_FALSE,
4942                             "cannot boot labeled zone on unlabeled system");
4943                         goto error;
4944                 }
4945 
4946                 /*
4947                  * If this brand requires any kernel support, now is the time to
4948                  * get it loaded and initialized.
4949                  */


4967                                 goto error;
4968                         }
4969                 }
4970 
4971                 if (setup_zone_rm(zlogp, zone_name, zoneid) != Z_OK)
4972                         goto error;
4973 
4974                 set_mlps(zlogp, zoneid, zcent);
4975         }
4976 
4977         rval = zoneid;
4978         zoneid = -1;
4979 
4980 error:
4981         if (zoneid != -1) {
4982                 (void) zone_shutdown(zoneid);
4983                 (void) zone_destroy(zoneid);
4984         }
4985         if (rctlbuf != NULL)
4986                 free(rctlbuf);


4987         priv_freeset(privs);
4988         if (fp != NULL)
4989                 zonecfg_close_scratch(fp);
4990         lofs_discard_mnttab();
4991         if (zcent != NULL)
4992                 tsol_freezcent(zcent);
4993         return (rval);
4994 }
4995 
4996 /*
4997  * Enter the zone and write a /etc/zones/index file there.  This allows
4998  * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
4999  * details from inside the zone.
5000  */
5001 static void
5002 write_index_file(zoneid_t zoneid)
5003 {
5004         FILE *zef;
5005         FILE *zet;
5006         struct zoneent *zep;


5105                         lofs_discard_mnttab();
5106                         return (-1);
5107                 }
5108 
5109                 switch (iptype) {
5110                 case ZS_SHARED:
5111                         /* Always do this to make lo0 get configured */
5112                         if (configure_shared_network_interfaces(zlogp) != 0) {
5113                                 lofs_discard_mnttab();
5114                                 return (-1);
5115                         }
5116                         break;
5117                 case ZS_EXCLUSIVE:
5118                         if (configure_exclusive_network_interfaces(zlogp,
5119                             zoneid) !=
5120                             0) {
5121                                 lofs_discard_mnttab();
5122                                 return (-1);
5123                         }
5124                         break;


5125                 }
5126         }
5127 
5128         write_index_file(zoneid);
5129 
5130         lofs_discard_mnttab();
5131         return (0);
5132 }
5133 
5134 static int
5135 lu_root_teardown(zlog_t *zlogp)
5136 {
5137         char zroot[MAXPATHLEN];
5138 
5139         if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
5140                 zerror(zlogp, B_FALSE, "unable to determine zone root");
5141                 return (-1);
5142         }
5143         root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
5144 


5180 
5181                 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5182                         zerror(zlogp, B_TRUE, "cannot open mapfile");
5183                         return (-1);
5184                 }
5185                 retv = -1;
5186                 if (zonecfg_lock_scratch(fp) != 0)
5187                         zerror(zlogp, B_TRUE, "cannot lock mapfile");
5188                 else if (zonecfg_delete_scratch(fp, kernzone) != 0)
5189                         zerror(zlogp, B_TRUE, "cannot delete map entry");
5190                 else
5191                         retv = 0;
5192                 zonecfg_close_scratch(fp);
5193                 return (retv);
5194         } else {
5195                 return (0);
5196         }
5197 }
5198 
5199 int
5200 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)

5201 {
5202         char *kzone;
5203         zoneid_t zoneid;
5204         int res;
5205         char pool_err[128];
5206         char cmdbuf[MAXPATHLEN];
5207         brand_handle_t bh = NULL;
5208         dladm_status_t status;
5209         char errmsg[DLADM_STRSIZE];
5210         ushort_t flags;
5211 
5212         kzone = zone_name;
5213         if (zonecfg_in_alt_root()) {
5214                 FILE *fp;
5215 
5216                 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
5217                         zerror(zlogp, B_TRUE, "unable to open map file");
5218                         goto error;
5219                 }
5220                 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
5221                     kernzone, sizeof (kernzone)) != 0) {
5222                         zerror(zlogp, B_FALSE, "unable to find scratch zone");
5223                         zonecfg_close_scratch(fp);
5224                         goto error;
5225                 }
5226                 zonecfg_close_scratch(fp);
5227                 kzone = kernzone;
5228         }
5229 
5230         if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
5231                 if (!bringup_failure_recovery)
5232                         zerror(zlogp, B_TRUE, "unable to get zoneid");
5233                 if (unmount_cmd)
5234                         (void) lu_root_teardown(zlogp);
5235                 goto error;
5236         }
5237 
5238         if (remove_datalink_pool(zlogp, zoneid) != 0) {
5239                 zerror(zlogp, B_FALSE, "unable clear datalink pool property");
5240                 goto error;
5241         }
5242 
5243         if (remove_datalink_protect(zlogp, zoneid) != 0) {
5244                 zerror(zlogp, B_FALSE,
5245                     "unable clear datalink protect property");
5246                 goto error;
5247         }
5248 
5249         /*
5250          * The datalinks assigned to the zone will be removed from the NGZ as
5251          * part of zone_shutdown() so that we need to remove protect/pool etc.
5252          * before zone_shutdown(). Even if the shutdown itself fails, the zone
5253          * will not be able to violate any constraints applied because the
5254          * datalinks are no longer available to the zone.
5255          */
5256         if (zone_shutdown(zoneid) != 0) {
5257                 zerror(zlogp, B_TRUE, "unable to shutdown zone");
5258                 goto error;
5259         }
5260 
5261         /* Get a handle to the brand info for this zone */
5262         if ((bh = brand_open(brand_name)) == NULL) {
5263                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
5264                 return (-1);
5265         }
5266         /*
5267          * If there is a brand 'halt' callback, execute it now to give the
5268          * brand a chance to cleanup any custom configuration.
5269          */
5270         (void) strcpy(cmdbuf, EXEC_PREFIX);
5271         if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
5272             sizeof (cmdbuf) - EXEC_LEN) < 0) {
5273                 brand_close(bh);
5274                 zerror(zlogp, B_FALSE, "unable to determine branded zone's "
5275                     "halt callback.");
5276                 goto error;
5277         }
5278         brand_close(bh);
5279 
5280         if ((strlen(cmdbuf) > EXEC_LEN) &&
5281             (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
5282                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
5283                 goto error;
5284         }
5285 
5286         if (!unmount_cmd) {
5287                 zone_iptype_t iptype;
5288 
5289                 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags,
5290                     sizeof (flags)) < 0) {
5291                         if (vplat_get_iptype(zlogp, &iptype) < 0) {
5292                                 zerror(zlogp, B_TRUE, "unable to determine "
5293                                     "ip-type");
5294                                 goto error;
5295                         }
5296                 } else {
5297                         if (flags & ZF_NET_EXCL)
5298                                 iptype = ZS_EXCLUSIVE;
5299                         else
5300                                 iptype = ZS_SHARED;
5301                 }
5302 
5303                 switch (iptype) {
5304                 case ZS_SHARED:
5305                         if (unconfigure_shared_network_interfaces(zlogp,
5306                             zoneid) != 0) {
5307                                 zerror(zlogp, B_FALSE, "unable to unconfigure "
5308                                     "network interfaces in zone");
5309                                 goto error;
5310                         }
5311                         break;
5312                 case ZS_EXCLUSIVE:
5313                         if (unconfigure_exclusive_network_interfaces(zlogp,
5314                             zoneid) != 0) {
5315                                 zerror(zlogp, B_FALSE, "unable to unconfigure "
5316                                     "network interfaces in zone");
5317                                 goto error;
5318                         }
5319                         status = dladm_zone_halt(dld_handle, zoneid);
5320                         if (status != DLADM_STATUS_OK) {
5321                                 zerror(zlogp, B_FALSE, "unable to notify "
5322                                     "dlmgmtd of zone halt: %s",
5323                                     dladm_status2str(status, errmsg));
5324                         }
5325                         break;
5326                 }
5327         }
5328 
5329         if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
5330                 zerror(zlogp, B_TRUE, "unable to abort TCP connections");
5331                 goto error;
5332         }
5333 
5334         if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
5335                 zerror(zlogp, B_FALSE,
5336                     "unable to unmount file systems in zone");
5337                 goto error;
5338         }
5339 
5340         /*
5341          * If we are rebooting then we normally don't want to destroy an
5342          * existing temporary pool at this point so that we can just reuse it
5343          * when the zone boots back up.  However, it is also possible we were
5344          * running with a temporary pool and the zone configuration has been
5345          * modified to no longer use a temporary pool.  In that case we need
5346          * to destroy the temporary pool now.  This case looks like the case
5347          * where we never had a temporary pool configured but
5348          * zonecfg_destroy_tmp_pool will do the right thing either way.
5349          */
5350         if (!unmount_cmd) {
5351                 boolean_t destroy_tmp_pool = B_TRUE;
5352 
5353                 if (rebooting) {
5354                         struct zone_psettab pset_tab;
5355                         zone_dochandle_t handle;
5356 
5357                         if ((handle = zonecfg_init_handle()) != NULL &&
5358                             zonecfg_get_handle(zone_name, handle) == Z_OK &&
5359                             zonecfg_lookup_pset(handle, &pset_tab) == Z_OK)
5360                                 destroy_tmp_pool = B_FALSE;
5361 
5362                         zonecfg_fini_handle(handle);
5363                 }
5364 
5365                 if (destroy_tmp_pool) {
5366                         if ((res = zonecfg_destroy_tmp_pool(zone_name, pool_err,
5367                             sizeof (pool_err))) != Z_OK) {
5368                                 if (res == Z_POOL)
5369                                         zerror(zlogp, B_FALSE, pool_err);
5370                         }
5371                 }
5372         }
5373 
5374         remove_mlps(zlogp, zoneid);
5375 
5376         if (zone_destroy(zoneid) != 0) {
5377                 zerror(zlogp, B_TRUE, "unable to destroy zone");
5378                 goto error;
5379         }
5380 
5381         /*
5382          * Special teardown for alternate boot environments: remove the tmpfs