103 #include <libdladm.h>
 104 #include <sys/dls_mgmt.h>
 105 #include <libscf.h>
 106 
 107 #include <libzonecfg.h>
 108 #include <zonestat_impl.h>
 109 #include "zoneadmd.h"
 110 
 111 static char *progname;
 112 char *zone_name;        /* zone which we are managing */
 113 zone_dochandle_t snap_hndl;     /* handle for snapshot created when ready */
 114 char zonepath[MAXNAMELEN];
 115 char pool_name[MAXNAMELEN];
 116 char default_brand[MAXNAMELEN];
 117 char brand_name[MAXNAMELEN];
 118 boolean_t zone_isnative;
 119 boolean_t zone_iscluster;
 120 boolean_t zone_islabeled;
 121 boolean_t shutdown_in_progress;
 122 static zoneid_t zone_id;
 123 static zoneid_t zone_did = 0;
 124 dladm_handle_t dld_handle = NULL;
 125 
 126 char pre_statechg_hook[2 * MAXPATHLEN];
 127 char post_statechg_hook[2 * MAXPATHLEN];
 128 char query_hook[2 * MAXPATHLEN];
 129 
 130 zlog_t logsys;
 131 
 132 mutex_t lock = DEFAULTMUTEX;    /* to serialize stuff */
 133 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
 134 
 135 static sema_t scratch_sem;      /* for scratch zones */
 136 
 137 static char     zone_door_path[MAXPATHLEN];
 138 static int      zone_door = -1;
 139 
 140 boolean_t in_death_throes = B_FALSE;    /* daemon is dying */
 141 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
 142 
 143 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 144 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 145 #endif
 146 
 147 #define DEFAULT_LOCALE  "C"
 
 
 277 
 278 /*
 279  * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
 280  * put the arguments into an argv style array, use getopt to process them,
 281  * and put the resultant argument string back into outargs. Non-native brands
 282  * may support alternate forms of boot arguments so we must handle that as well.
 283  *
 284  * During the filtering, we pull out any arguments which are truly "boot"
 285  * arguments, leaving only those which are to be passed intact to the
 286  * progenitor process.  The one we support at the moment is -i, which
 287  * indicates to the kernel which program should be launched as 'init'.
 288  *
 289  * Except for Z_OK, all other return values are treated as fatal.
 290  */
 291 static int
 292 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
 293     char *init_file)
 294 {
 295         int argc = 0, argc_save;
 296         int i;
 297         int err = Z_OK;
 298         char *arg, *lasts, **argv = NULL, **argv_save;
 299         char zonecfg_args[BOOTARGS_MAX];
 300         char scratchargs[BOOTARGS_MAX], *sargs;
 301         char scratchopt[3];
 302         char c;
 303 
 304         bzero(outargs, BOOTARGS_MAX);
 305 
 306         /*
 307          * If the user didn't specify transient boot arguments, check
 308          * to see if there were any specified in the zone configuration,
 309          * and use them if applicable.
 310          */
 311         if (inargs == NULL || inargs[0] == '\0')  {
 312                 bzero(zonecfg_args, sizeof (zonecfg_args));
 313                 (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
 314                     sizeof (zonecfg_args));
 315                 inargs = zonecfg_args;
 316         }
 317 
 318         if (strlen(inargs) >= BOOTARGS_MAX) {
 319                 zerror(zlogp, B_FALSE, "boot argument string too long");
 320                 return (Z_INVAL);
 321         }
 322 
 323         (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
 324         sargs = scratchargs;
 325         while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
 326                 sargs = NULL;
 327                 argc++;
 328         }
 329 
 330         if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
 331                 zerror(zlogp, B_FALSE, "memory allocation failed");
 332                 return (Z_NOMEM);
 333         }
 334 
 335         argv_save = argv;
 
 445          * since anyone can open any UNIX domain socket, regardless of
 446          * its file system permissions.  Sigh...
 447          */
 448         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
 449                 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
 450                 return (-1);
 451         }
 452         /* paranoia */
 453         if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
 454                 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
 455                 return (-1);
 456         }
 457         (void) chmod(ZONES_TMPDIR, S_IRWXU);
 458         return (0);
 459 }
 460 
 461 /*
 462  * Run the brand's pre-state change callback, if it exists.
 463  */
 464 static int
 465 brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
 466 {
 467         char cmdbuf[2 * MAXPATHLEN];
 468         const char *altroot;
 469 
 470         if (pre_statechg_hook[0] == '\0')
 471                 return (0);
 472 
 473         altroot = zonecfg_get_root();
 474         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
 475             state, cmd, altroot) > sizeof (cmdbuf))
 476                 return (-1);
 477 
 478         if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
 479                 return (-1);
 480 
 481         return (0);
 482 }
 483 
 484 /*
 485  * Run the brand's post-state change callback, if it exists.
 486  */
 487 static int
 488 brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
 489 {
 490         char cmdbuf[2 * MAXPATHLEN];
 491         const char *altroot;
 492 
 493         if (post_statechg_hook[0] == '\0')
 494                 return (0);
 495 
 496         altroot = zonecfg_get_root();
 497         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
 498             state, cmd, altroot) > sizeof (cmdbuf))
 499                 return (-1);
 500 
 501         if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
 502                 return (-1);
 503 
 504         return (0);
 505 }
 506 
 507 /*
 508  * Notify zonestatd of the new zone.  If zonestatd is not running, this
 509  * will do nothing.
 510  */
 511 static void
 512 notify_zonestatd(zoneid_t zoneid)
 513 {
 514         int cmd[2];
 515         int fd;
 516         door_arg_t params;
 517 
 518         fd = open(ZS_DOOR_PATH, O_RDONLY);
 519         if (fd < 0)
 520                 return;
 521 
 522         cmd[0] = ZSD_CMD_NEW_ZONE;
 523         cmd[1] = zoneid;
 524         params.data_ptr = (char *)&cmd;
 525         params.data_size = sizeof (cmd);
 526         params.desc_ptr = NULL;
 527         params.desc_num = 0;
 528         params.rbuf = NULL;
 529         params.rsize = NULL;
 530         (void) door_call(fd, ¶ms);
 531         (void) close(fd);
 532 }
 533 
 534 /*
 535  * Bring a zone up to the pre-boot "ready" stage.  The mount_cmd argument is
 536  * 'true' if this is being invoked as part of the processing for the "mount"
 537  * subcommand.
 538  *
 539  * If a scratch zone mount (ALT_MOUNT) is being performed then do not
 540  * call the state change hooks.
 541  */
 542 static int
 543 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
 544 {
 545         int err;
 546         boolean_t snapped = B_FALSE;
 547 
 548         if ((snap_hndl = zonecfg_init_handle()) == NULL) {
 549                 zerror(zlogp, B_TRUE, "getting zone configuration handle");
 550                 goto bad;
 551         }
 552         if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
 553                 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
 554                     zonecfg_strerror(err));
 555                 goto bad;
 556         }
 557         snapped = B_TRUE;
 558 
 559         if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
 560                 zerror(zlogp, B_FALSE, "invalid configuration snapshot");
 561                 goto bad;
 562         }
 563 
 564         if (zone_did == 0)
 565                 zone_did = zone_get_did(zone_name);
 566 
 567         if (!ALT_MOUNT(mount_cmd) &&
 568             brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
 569                 goto bad;
 570 
 571         if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
 572                 goto bad;
 573 
 574         if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
 575                 bringup_failure_recovery = B_TRUE;
 576                 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
 577                     debug);
 578                 goto bad;
 579         }
 580 
 581         if (!ALT_MOUNT(mount_cmd) &&
 582             brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
 583                 goto bad;
 584 
 585         return (0);
 586 
 587 bad:
 588         /*
 589          * If something goes wrong, we up the zones's state to the target
 590          * state, READY, and then invoke the hook as if we're halting.
 591          */
 592         if (!ALT_MOUNT(mount_cmd))
 593                 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT,
 594                     debug);
 595 
 596         if (snapped)
 597                 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 598                         zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 599                             zonecfg_strerror(err));
 600         zonecfg_fini_handle(snap_hndl);
 601         snap_hndl = NULL;
 602         return (-1);
 603 }
 604 
 605 int
 606 init_template(void)
 607 {
 608         int fd;
 609         int err = 0;
 610 
 611         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
 612         if (fd == -1)
 613                 return (-1);
 614 
 615         /*
 616          * For now, zoneadmd doesn't do anything with the contract.
 617          * Deliver no events, don't inherit, and allow it to be orphaned.
 618          */
 619         err |= ct_tmpl_set_critical(fd, 0);
 620         err |= ct_tmpl_set_informative(fd, 0);
 621         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
 
 762                 ct = -1;
 763         (void) ct_tmpl_clear(tmpl_fd);
 764         (void) close(tmpl_fd);
 765         if (waitpid(child, &child_status, 0) != child) {
 766                 /* unexpected: we must have been signalled */
 767                 (void) contract_abandon_id(ct);
 768                 return (-1);
 769         }
 770         (void) contract_abandon_id(ct);
 771         if (WEXITSTATUS(child_status) != 0) {
 772                 errno = WEXITSTATUS(child_status);
 773                 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
 774                 return (-1);
 775         }
 776 
 777         return (0);
 778 }
 779 
 780 /*
 781  * env variable name format
 782  *      _ZONECFG_{resource name}_{identifying attr. name}_{property name}
 783  * Any dashes (-) in the property names are replaced with underscore (_).
 784  */
 785 static void
 786 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
 787 {
 788         char *p;
 789         /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
 790         char nm[2 * MAXNAMELEN + 32];
 791 
 792         if (attr == NULL)
 793                 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
 794                     name);
 795         else
 796                 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
 797                     attr, name);
 798 
 799         p = nm;
 800         while ((p = strchr(p, '-')) != NULL)
 801                 *p++ = '_';
 802 
 803         (void) setenv(nm, val, 1);
 804 }
 805 
 806 /*
 807  * Export zonecfg network and device properties into environment for the boot
 808  * and state change hooks.
 809  * If debug is true, export the brand hook debug env. variable as well.
 810  *
 811  * We could export more of the config in the future, as necessary.
 812  */
 813 static int
 814 setup_subproc_env(boolean_t debug)
 815 {
 816         int res;
 817         struct zone_nwiftab ntab;
 818         struct zone_devtab dtab;
 819         struct zone_attrtab atab;
 820         char net_resources[MAXNAMELEN * 2];
 821         char dev_resources[MAXNAMELEN * 2];
 822 
 823         /* snap_hndl is null when called through the set_brand_env code path */
 824         if (snap_hndl == NULL)
 825                 return (Z_OK);
 826 
 827         net_resources[0] = '\0';
 828         if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
 829                 goto done;
 830 
 831         while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
 832                 struct zone_res_attrtab *rap;
 833                 char *phys;
 834 
 835                 phys = ntab.zone_nwif_physical;
 836 
 837                 (void) strlcat(net_resources, phys, sizeof (net_resources));
 838                 (void) strlcat(net_resources, " ", sizeof (net_resources));
 839 
 840                 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
 841 
 842                 set_zonecfg_env(RSRC_NET, phys, "address",
 843                     ntab.zone_nwif_address);
 844                 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
 845                     ntab.zone_nwif_allowed_address);
 846                 set_zonecfg_env(RSRC_NET, phys, "defrouter",
 847                     ntab.zone_nwif_defrouter);
 848                 set_zonecfg_env(RSRC_NET, phys, "global-nic",
 849                     ntab.zone_nwif_gnic);
 850                 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
 851                 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
 852                     ntab.zone_nwif_vlan_id);
 853 
 854                 for (rap = ntab.zone_nwif_attrp; rap != NULL;
 855                     rap = rap->zone_res_attr_next)
 856                         set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
 857                             rap->zone_res_attr_value);
 858                 nwifent_free_attrs(&ntab);
 859         }
 860 
 861         (void) setenv("_ZONECFG_net_resources", net_resources, 1);
 862 
 863         (void) zonecfg_endnwifent(snap_hndl);
 864 
 865         if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
 866                 goto done;
 867 
 868         while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
 869                 struct zone_res_attrtab *rap;
 870                 char *match;
 871 
 872                 match = dtab.zone_dev_match;
 873 
 874                 (void) strlcat(dev_resources, match, sizeof (dev_resources));
 875                 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
 876 
 877                 for (rap = dtab.zone_dev_attrp; rap != NULL;
 878                     rap = rap->zone_res_attr_next)
 879                         set_zonecfg_env(RSRC_DEV, match,
 880                             rap->zone_res_attr_name, rap->zone_res_attr_value);
 881         }
 882 
 883         (void) zonecfg_enddevent(snap_hndl);
 884 
 885         if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
 886                 goto done;
 887 
 888         while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
 889                 set_zonecfg_env("attr", NULL, atab.zone_attr_name,
 890                     atab.zone_attr_value);
 891         }
 892 
 893         (void) zonecfg_endattrent(snap_hndl);
 894 
 895         if (debug)
 896                 (void) setenv("_ZONEADMD_brand_debug", "1", 1);
 897         else
 898                 (void) setenv("_ZONEADMD_brand_debug", "", 1);
 899 
 900         res = Z_OK;
 901 
 902 done:
 903         return (res);
 904 }
 905 
 906 void
 907 nwifent_free_attrs(struct zone_nwiftab *np)
 908 {
 909         struct zone_res_attrtab *rap;
 910 
 911         for (rap = np->zone_nwif_attrp; rap != NULL; ) {
 912                 struct zone_res_attrtab *tp = rap;
 913 
 914                 rap = rap->zone_res_attr_next;
 915                 free(tp);
 916         }
 917 }
 918 
 919 /*
 920  * If retstr is not NULL, the output of the subproc is returned in the str,
 921  * otherwise it is output using zerror().  Any memory allocated for retstr
 922  * should be freed by the caller.
 923  */
 924 int
 925 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
 926 {
 927         char buf[1024];         /* arbitrary large amount */
 928         char *inbuf;
 929         FILE *file;
 930         int status;
 931         int rd_cnt;
 932 
 933         if (retstr != NULL) {
 934                 if ((*retstr = malloc(1024)) == NULL) {
 935                         zerror(zlogp, B_FALSE, "out of memory");
 936                         return (-1);
 937                 }
 938                 inbuf = *retstr;
 939                 rd_cnt = 0;
 940         } else {
 941                 inbuf = buf;
 942         }
 943 
 944         if (setup_subproc_env(debug) != Z_OK) {
 945                 zerror(zlogp, B_FALSE, "failed to setup environment");
 946                 return (-1);
 947         }
 948 
 949         file = popen(cmdbuf, "r");
 950         if (file == NULL) {
 951                 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
 952                 return (-1);
 953         }
 954 
 955         while (fgets(inbuf, 1024, file) != NULL) {
 956                 if (retstr == NULL) {
 957                         if (zlogp != &logsys) {
 958                                 int last = strlen(inbuf) - 1;
 959 
 960                                 if (inbuf[last] == '\n')
 961                                         inbuf[last] = '\0';
 962                                 zerror(zlogp, B_FALSE, "%s", inbuf);
 963                         }
 964                 } else {
 965                         char *p;
 966 
 967                         rd_cnt += 1024 - 1;
 968                         if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
 969                                 zerror(zlogp, B_FALSE, "out of memory");
 970                                 (void) pclose(file);
 971                                 return (-1);
 972                         }
 973 
 974                         *retstr = p;
 975                         inbuf = *retstr + rd_cnt;
 976                 }
 977         }
 978         status = pclose(file);
 979 
 980         if (WIFSIGNALED(status)) {
 981                 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
 982                     "signal %d", cmdbuf, WTERMSIG(status));
 983                 return (-1);
 984         }
 985         assert(WIFEXITED(status));
 986         if (WEXITSTATUS(status) == ZEXIT_EXEC) {
 987                 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
 988                 return (-1);
 989         }
 990         return (WEXITSTATUS(status));
 991 }
 992 
 993 /*
 994  * Get the path for this zone's init(1M) (or equivalent) process. First look
 995  * for a zone-specific init-name attr, then get it from the brand.
 996  */
 997 static int
 998 get_initname(brand_handle_t bh, char *initname, int len)
 999 {
1000         struct zone_attrtab a;
1001 
1002         bzero(&a, sizeof (a));
1003         (void) strlcpy(a.zone_attr_name, "init-name",
1004             sizeof (a.zone_attr_name));
1005 
1006         if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1007                 (void) strlcpy(initname, a.zone_attr_value, len);
1008                 return (0);
1009         }
1010 
1011         return (brand_get_initname(bh, initname, len));
1012 }
1013 
1014 /*
1015  * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
1016  * First look for a zone-specific restart-init attr, then get it from the brand.
1017  */
1018 static boolean_t
1019 restartinit(brand_handle_t bh)
1020 {
1021         struct zone_attrtab a;
1022 
1023         bzero(&a, sizeof (a));
1024         (void) strlcpy(a.zone_attr_name, "restart-init",
1025             sizeof (a.zone_attr_name));
1026 
1027         if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1028                 if (strcmp(a.zone_attr_value, "false") == 0)
1029                         return (B_FALSE);
1030                 return (B_TRUE);
1031         }
1032 
1033         return (brand_restartinit(bh));
1034 }
1035 
1036 /*
1037  * Get the app-svc-dependent flag for this zone's init process. This is a
1038  * zone-specific attr which controls the type of contract we create for the
1039  * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
1040  * set, so that when any service which is in the same contract exits, the init
1041  * application will be terminated.
1042  */
1043 static boolean_t
1044 is_app_svc_dep(brand_handle_t bh)
1045 {
1046         struct zone_attrtab a;
1047 
1048         bzero(&a, sizeof (a));
1049         (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
1050             sizeof (a.zone_attr_name));
1051 
1052         if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
1053             strcmp(a.zone_attr_value, "true") == 0) {
1054                 return (B_TRUE);
1055         }
1056 
1057         return (B_FALSE);
1058 }
1059 
1060 static int
1061 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
1062 {
1063         zoneid_t zoneid;
1064         struct stat st;
1065         char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
1066         char nbootargs[BOOTARGS_MAX];
1067         char cmdbuf[MAXPATHLEN];
1068         fs_callback_t cb;
1069         brand_handle_t bh;
1070         zone_iptype_t iptype;
1071         dladm_status_t status;
1072         char errmsg[DLADM_STRSIZE];
1073         int err;
1074         boolean_t restart_init;
1075         boolean_t app_svc_dep;
1076 
1077         if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1078                 return (-1);
1079 
1080         if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1081                 zerror(zlogp, B_TRUE, "unable to get zoneid");
1082                 goto bad;
1083         }
1084 
1085         cb.zlogp = zlogp;
1086         cb.zoneid = zoneid;
1087         cb.mount_cmd = B_FALSE;
1088 
1089         /* Get a handle to the brand info for this zone */
1090         if ((bh = brand_open(brand_name)) == NULL) {
1091                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1092                 goto bad;
1093         }
1094 
1095         /*
1096          * Get the list of filesystems to mount from the brand
1097          * configuration.  These mounts are done via a thread that will
 
1100          */
1101         if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1102                 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1103                 brand_close(bh);
1104                 goto bad;
1105         }
1106 
1107         /*
1108          * Get the brand's boot callback if it exists.
1109          */
1110         (void) strcpy(cmdbuf, EXEC_PREFIX);
1111         if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1112             sizeof (cmdbuf) - EXEC_LEN) != 0) {
1113                 zerror(zlogp, B_FALSE,
1114                     "unable to determine branded zone's boot callback");
1115                 brand_close(bh);
1116                 goto bad;
1117         }
1118 
1119         /* Get the path for this zone's init(1M) (or equivalent) process.  */
1120         if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
1121                 zerror(zlogp, B_FALSE,
1122                     "unable to determine zone's init(1M) location");
1123                 brand_close(bh);
1124                 goto bad;
1125         }
1126 
1127         /* See if we should restart init if it dies. */
1128         restart_init = restartinit(bh);
1129 
1130         /*
1131          * See if we need to setup contract dependencies between the zone's
1132          * primary application and any of its services.
1133          */
1134         app_svc_dep = is_app_svc_dep(bh);
1135 
1136         brand_close(bh);
1137 
1138         err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1139         if (err != Z_OK)
1140                 goto bad;
1141 
1142         assert(init_file[0] != '\0');
1143 
1144         /*
1145          * Try to anticipate possible problems: If possible, make sure init is
1146          * executable.
1147          */
1148         if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1149                 zerror(zlogp, B_FALSE, "unable to determine zone root");
1150                 goto bad;
1151         }
1152 
1153         (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1154 
1155         if (lstat(initpath, &st) == -1) {
1156                 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1157                 goto bad;
1158         }
1159 
1160         if ((st.st_mode & S_IFMT) == S_IFLNK) {
1161                 /* symlink, we'll have to wait and resolve when we boot */
1162         } else if ((st.st_mode & S_IXUSR) == 0) {
1163                 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1164                 goto bad;
1165         }
1166 
1167         /*
1168          * Exclusive stack zones interact with the dlmgmtd running in the
1169          * global zone.  dladm_zone_boot() tells dlmgmtd that this zone is
1170          * booting, and loads its datalinks from the zone's datalink
1171          * configuration file.
1172          */
1173         if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1174                 status = dladm_zone_boot(dld_handle, zoneid);
1175                 if (status != DLADM_STATUS_OK) {
1176                         zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1177                             " %s", dladm_status2str(status, errmsg));
1178                         goto bad;
1179                 }
1180         }
1181 
1182         /*
1183          * If there is a brand 'boot' callback, execute it now to give the
1184          * brand one last chance to do any additional setup before the zone
1185          * is booted.
1186          */
1187         if ((strlen(cmdbuf) > EXEC_LEN) &&
1188             (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
1189                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1190                 goto bad;
1191         }
1192 
1193         if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1194                 zerror(zlogp, B_TRUE, "could not set zone boot file");
1195                 goto bad;
1196         }
1197 
1198         if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1199                 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1200                 goto bad;
1201         }
1202 
1203         if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1204             NULL, 0) == -1) {
1205                 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1206                 goto bad;
1207         }
1208 
1209         if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1210             (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1211                 zerror(zlogp, B_TRUE, "could not set zone app-die");
1212                 goto bad;
1213         }
1214 
1215         /*
1216          * Inform zonestatd of a new zone so that it can install a door for
1217          * the zone to contact it.
1218          */
1219         notify_zonestatd(zone_id);
1220 
1221         if (zone_boot(zoneid) == -1) {
1222                 zerror(zlogp, B_TRUE, "unable to boot zone");
1223                 goto bad;
1224         }
1225 
1226         if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1227                 goto bad;
1228 
1229         /* Startup a thread to perform zfd logging/tty svc for the zone. */
1230         create_log_thread(zlogp, zone_id);
1231 
1232         /* Startup a thread to perform memory capping for the zone. */
1233         create_mcap_thread(zlogp, zone_id);
1234 
1235         return (0);
1236 
1237 bad:
1238         /*
1239          * If something goes wrong, we up the zones's state to the target
1240          * state, RUNNING, and then invoke the hook as if we're halting.
1241          */
1242         (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
1243 
1244         return (-1);
1245 }
1246 
1247 static int
1248 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
1249     boolean_t debug)
1250 {
1251         int err;
1252 
1253         /*
1254          * If performing a scratch zone unmount then do not call the
1255          * state change hooks.
1256          */
1257         if (unmount_cmd == B_FALSE &&
1258             brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
1259                 return (-1);
1260 
1261         /* Shutting down, stop the memcap thread */
1262         destroy_mcap_thread();
1263 
1264         if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
1265                 if (!bringup_failure_recovery)
1266                         zerror(zlogp, B_FALSE, "unable to destroy zone");
1267                 destroy_log_thread();
1268                 return (-1);
1269         }
1270 
1271         /* Shut down is done, stop the log thread */
1272         destroy_log_thread();
1273 
1274         if (unmount_cmd == B_FALSE &&
1275             brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
1276                 return (-1);
1277 
1278         if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1279                 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1280                     zonecfg_strerror(err));
1281 
1282         zonecfg_fini_handle(snap_hndl);
1283         snap_hndl = NULL;
1284 
1285         return (0);
1286 }
1287 
1288 static int
1289 zone_graceful_shutdown(zlog_t *zlogp)
1290 {
1291         zoneid_t zoneid;
1292         pid_t child;
1293         char cmdbuf[MAXPATHLEN];
1294         brand_handle_t bh = NULL;
1295         ctid_t ct;
1296         int tmpl_fd;
1297         int child_status;
1298 
1299         if (shutdown_in_progress) {
1300                 zerror(zlogp, B_FALSE, "shutdown already in progress");
1301                 return (-1);
1302         }
1303 
1304         if ((zoneid = getzoneidbyname(zone_name)) == -1) {
 
1480                 return;
1481 
1482         (void) write(fd, buf, strlen(buf));
1483 
1484         (void) close(fd);
1485 }
1486 
1487 /*
1488  * The main routine for the door server that deals with zone state transitions.
1489  */
1490 /* ARGSUSED */
1491 static void
1492 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1493     uint_t n_desc)
1494 {
1495         ucred_t *uc = NULL;
1496         const priv_set_t *eset;
1497 
1498         zone_state_t zstate;
1499         zone_cmd_t cmd;
1500         boolean_t debug;
1501         int init_status;
1502         zone_cmd_arg_t *zargp;
1503 
1504         boolean_t kernelcall = B_TRUE;
1505 
1506         int rval = -1;
1507         uint64_t uniqid;
1508         zoneid_t zoneid = -1;
1509         zlog_t zlog;
1510         zlog_t *zlogp;
1511         zone_cmd_rval_t *rvalp;
1512         size_t rlen = getpagesize(); /* conservative */
1513         fs_callback_t cb;
1514         brand_handle_t bh;
1515         boolean_t wait_shut = B_FALSE;
1516 
1517         /* LINTED E_BAD_PTR_CAST_ALIGN */
1518         zargp = (zone_cmd_arg_t *)args;
1519 
1520         /*
1521          * When we get the door unref message, we've fdetach'd the door, and
1522          * it is time for us to shut down zoneadmd.
1523          */
1524         if (zargp == DOOR_UNREF_DATA) {
 
1534 
1535         rvalp = alloca(rlen);
1536         bzero(rvalp, rlen);
1537         zlog.logfile = NULL;
1538         zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1539         zlog.buf = rvalp->errbuf;
1540         zlog.log = zlog.buf;
1541         /* defer initialization of zlog.locale until after credential check */
1542         zlogp = &zlog;
1543 
1544         if (alen != sizeof (zone_cmd_arg_t)) {
1545                 /*
1546                  * This really shouldn't be happening.
1547                  */
1548                 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1549                     "unexpected (expected %d bytes)", alen,
1550                     sizeof (zone_cmd_arg_t));
1551                 goto out;
1552         }
1553         cmd = zargp->cmd;
1554         debug = zargp->debug;
1555         init_status = zargp->status;
1556 
1557         if (door_ucred(&uc) != 0) {
1558                 zerror(&logsys, B_TRUE, "door_ucred");
1559                 goto out;
1560         }
1561         eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1562         if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1563             (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1564             ucred_geteuid(uc) != 0)) {
1565                 zerror(&logsys, B_FALSE, "insufficient privileges");
1566                 goto out;
1567         }
1568 
1569         kernelcall = ucred_getpid(uc) == 0;
1570 
1571         /*
1572          * This is safe because we only use a zlog_t throughout the
1573          * duration of a door call; i.e., by the time the pointer
1574          * might become invalid, the door call would be over.
 
1642          */
1643         if (zstate == ZONE_STATE_INCOMPLETE &&
1644             (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1645                 zstate = ZONE_STATE_INSTALLED;
1646 
1647         switch (zstate) {
1648         case ZONE_STATE_CONFIGURED:
1649         case ZONE_STATE_INCOMPLETE:
1650                 /*
1651                  * Not our area of expertise; we just print a nice message
1652                  * and die off.
1653                  */
1654                 zerror(zlogp, B_FALSE,
1655                     "%s operation is invalid for zones in state '%s'",
1656                     z_cmd_name(cmd), zone_state_str(zstate));
1657                 break;
1658 
1659         case ZONE_STATE_INSTALLED:
1660                 switch (cmd) {
1661                 case Z_READY:
1662                         rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
1663                         if (rval == 0)
1664                                 eventstream_write(Z_EVT_ZONE_READIED);
1665                         zcons_statechanged();
1666                         break;
1667                 case Z_BOOT:
1668                 case Z_FORCEBOOT:
1669                         eventstream_write(Z_EVT_ZONE_BOOTING);
1670                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1671                             debug)) == 0) {
1672                                 rval = zone_bootup(zlogp, zargp->bootbuf,
1673                                     zstate, debug);
1674                         }
1675                         audit_put_record(zlogp, uc, rval, "boot");
1676                         zcons_statechanged();
1677                         if (rval != 0) {
1678                                 bringup_failure_recovery = B_TRUE;
1679                                 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1680                                     zstate, debug);
1681                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1682                         }
1683                         break;
1684                 case Z_SHUTDOWN:
1685                 case Z_HALT:
1686                         if (kernelcall) /* Invalid; can't happen */
1687                                 abort();
1688                         /*
1689                          * We could have two clients racing to halt this
1690                          * zone; the second client loses, but his request
1691                          * doesn't fail, since the zone is now in the desired
1692                          * state.
1693                          */
1694                         zerror(zlogp, B_FALSE, "zone is already halted");
1695                         rval = 0;
1696                         break;
1697                 case Z_REBOOT:
1698                         if (kernelcall) /* Invalid; can't happen */
1699                                 abort();
1700                         zerror(zlogp, B_FALSE, "%s operation is invalid "
 
1712                         eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1713                         break;
1714                 case Z_MOUNT:
1715                 case Z_FORCEMOUNT:
1716                         if (kernelcall) /* Invalid; can't happen */
1717                                 abort();
1718                         if (!zone_isnative && !zone_iscluster &&
1719                             !zone_islabeled) {
1720                                 /*
1721                                  * -U mounts the zone without lofs mounting
1722                                  * zone file systems back into the scratch
1723                                  * zone.  This is required when mounting
1724                                  * non-native branded zones.
1725                                  */
1726                                 (void) strlcpy(zargp->bootbuf, "-U",
1727                                     BOOTARGS_MAX);
1728                         }
1729 
1730                         rval = zone_ready(zlogp,
1731                             strcmp(zargp->bootbuf, "-U") == 0 ?
1732                             Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
1733                         if (rval != 0)
1734                                 break;
1735 
1736                         eventstream_write(Z_EVT_ZONE_READIED);
1737 
1738                         /*
1739                          * Get a handle to the default brand info.
1740                          * We must always use the default brand file system
1741                          * list when mounting the zone.
1742                          */
1743                         if ((bh = brand_open(default_brand)) == NULL) {
1744                                 rval = -1;
1745                                 break;
1746                         }
1747 
1748                         /*
1749                          * Get the list of filesystems to mount from
1750                          * the brand configuration.  These mounts are done
1751                          * via a thread that will enter the zone, so they
1752                          * are done from within the context of the zone.
 
1774                                 abort();
1775                         zerror(zlogp, B_FALSE, "zone is already unmounted");
1776                         rval = 0;
1777                         break;
1778                 }
1779                 break;
1780 
1781         case ZONE_STATE_READY:
1782                 switch (cmd) {
1783                 case Z_READY:
1784                         /*
1785                          * We could have two clients racing to ready this
1786                          * zone; the second client loses, but his request
1787                          * doesn't fail, since the zone is now in the desired
1788                          * state.
1789                          */
1790                         zerror(zlogp, B_FALSE, "zone is already ready");
1791                         rval = 0;
1792                         break;
1793                 case Z_BOOT:
1794                 case Z_FORCEBOOT:
1795                         (void) strlcpy(boot_args, zargp->bootbuf,
1796                             sizeof (boot_args));
1797                         eventstream_write(Z_EVT_ZONE_BOOTING);
1798                         rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
1799                             debug);
1800                         audit_put_record(zlogp, uc, rval, "boot");
1801                         zcons_statechanged();
1802                         if (rval != 0) {
1803                                 bringup_failure_recovery = B_TRUE;
1804                                 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1805                                     zstate, debug);
1806                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1807                         }
1808                         boot_args[0] = '\0';
1809                         break;
1810                 case Z_HALT:
1811                         if (kernelcall) /* Invalid; can't happen */
1812                                 abort();
1813                         if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
1814                             debug)) != 0)
1815                                 break;
1816                         zcons_statechanged();
1817                         eventstream_write(Z_EVT_ZONE_HALTED);
1818                         break;
1819                 case Z_SHUTDOWN:
1820                 case Z_REBOOT:
1821                 case Z_NOTE_UNINSTALLING:
1822                 case Z_MOUNT:
1823                 case Z_FORCEMOUNT:
1824                 case Z_UNMOUNT:
1825                         if (kernelcall) /* Invalid; can't happen */
1826                                 abort();
1827                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1828                             "for zones in state '%s'", z_cmd_name(cmd),
1829                             zone_state_str(zstate));
1830                         rval = -1;
1831                         break;
1832                 }
1833                 break;
1834 
1835         case ZONE_STATE_MOUNTED:
1836                 switch (cmd) {
1837                 case Z_UNMOUNT:
1838                         if (kernelcall) /* Invalid; can't happen */
1839                                 abort();
1840                         rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
1841                         if (rval == 0) {
1842                                 eventstream_write(Z_EVT_ZONE_HALTED);
1843                                 (void) sema_post(&scratch_sem);
1844                         }
1845                         break;
1846                 default:
1847                         if (kernelcall) /* Invalid; can't happen */
1848                                 abort();
1849                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1850                             "for zones in state '%s'", z_cmd_name(cmd),
1851                             zone_state_str(zstate));
1852                         rval = -1;
1853                         break;
1854                 }
1855                 break;
1856 
1857         case ZONE_STATE_RUNNING:
1858         case ZONE_STATE_SHUTTING_DOWN:
1859         case ZONE_STATE_DOWN:
1860                 switch (cmd) {
1861                 case Z_READY:
1862                         if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
1863                             debug)) != 0)
1864                                 break;
1865                         zcons_statechanged();
1866                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1867                             debug)) == 0)
1868                                 eventstream_write(Z_EVT_ZONE_READIED);
1869                         else
1870                                 eventstream_write(Z_EVT_ZONE_HALTED);
1871                         break;
1872                 case Z_BOOT:
1873                 case Z_FORCEBOOT:
1874                         /*
1875                          * We could have two clients racing to boot this
1876                          * zone; the second client loses, but his request
1877                          * doesn't fail, since the zone is now in the desired
1878                          * state.
1879                          */
1880                         zerror(zlogp, B_FALSE, "zone is already booted");
1881                         rval = 0;
1882                         break;
1883                 case Z_HALT:
1884                         if (kernelcall) {
1885                                 log_init_exit(init_status);
1886                         } else {
1887                                 log_init_exit(-1);
1888                         }
1889                         if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
1890                             debug)) != 0)
1891                                 break;
1892                         eventstream_write(Z_EVT_ZONE_HALTED);
1893                         zcons_statechanged();
1894                         break;
1895                 case Z_REBOOT:
1896                         (void) strlcpy(boot_args, zargp->bootbuf,
1897                             sizeof (boot_args));
1898                         eventstream_write(Z_EVT_ZONE_REBOOTING);
1899                         if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
1900                             debug)) != 0) {
1901                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1902                                 boot_args[0] = '\0';
1903                                 break;
1904                         }
1905                         zcons_statechanged();
1906                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1907                             debug)) != 0) {
1908                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1909                                 boot_args[0] = '\0';
1910                                 break;
1911                         }
1912                         rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
1913                             debug);
1914                         audit_put_record(zlogp, uc, rval, "reboot");
1915                         if (rval != 0) {
1916                                 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1917                                     zstate, debug);
1918                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1919                         }
1920                         boot_args[0] = '\0';
1921                         break;
1922                 case Z_SHUTDOWN:
1923                         if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1924                                 wait_shut = B_TRUE;
1925                         }
1926                         break;
1927                 case Z_NOTE_UNINSTALLING:
1928                 case Z_MOUNT:
1929                 case Z_FORCEMOUNT:
1930                 case Z_UNMOUNT:
1931                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1932                             "for zones in state '%s'", z_cmd_name(cmd),
1933                             zone_state_str(zstate));
1934                         rval = -1;
1935                         break;
1936                 }
1937                 break;
1938         default:
1939                 abort();
1940         }
1941 
1942         /*
1943          * Because the state of the zone may have changed, we make sure
1944          * to wake the console poller, which is in charge of initiating
1945          * the shutdown procedure as necessary.
1946          */
1947         eventstream_write(Z_EVT_NULL);
1948 
1949 out:
 
2118                                     zonecfg_get_snapshot_handle(zone_name,
2119                                     snap_hndl) != Z_OK) {
2120                                         zerror(zlogp, B_FALSE, "recovering "
2121                                             "zone configuration handle");
2122                                         goto out;
2123                                 }
2124                         }
2125                 }
2126 
2127                 (void) fdetach(zone_door_path);
2128                 (void) close(doorfd);
2129                 goto top;
2130         }
2131         ret = 0;
2132 out:
2133         (void) close(doorfd);
2134         return (ret);
2135 }
2136 
2137 /*
2138  * Run the query hook with the 'env' parameter.  It should return a
2139  * string of tab-delimited key-value pairs, each of which should be set
2140  * in the environment.
2141  *
2142  * Because the env_vars string values become part of the environment, the
2143  * string is static and we don't free it.
2144  *
2145  * This function is always called before zoneadmd forks and makes itself
2146  * exclusive, so it is possible there could more than one instance of zoneadmd
2147  * running in parallel at this point. Thus, we have no zonecfg snapshot and
2148  * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
2149  * need any zonecfg info to query for a brand-specific env value.
2150  */
2151 static int
2152 set_brand_env(zlog_t *zlogp)
2153 {
2154         int ret = 0;
2155         static char *env_vars = NULL;
2156         char buf[2 * MAXPATHLEN];
2157 
2158         if (query_hook[0] == '\0' || env_vars != NULL)
2159                 return (0);
2160 
2161         if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
2162                 return (-1);
2163 
2164         if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
2165                 return (-1);
2166 
2167         if (env_vars != NULL) {
2168                 char *sp;
2169 
2170                 sp = strtok(env_vars, "\t");
2171                 while (sp != NULL) {
2172                         if (putenv(sp) != 0) {
2173                                 ret = -1;
2174                                 break;
2175                         }
2176                         sp = strtok(NULL, "\t");
2177                 }
2178         }
2179 
2180         return (ret);
2181 }
2182 
2183 /*
2184  * Setup the brand's pre and post state change callbacks, as well as the
2185  * query callback, if any of these exist.
2186  */
2187 static int
2188 brand_callback_init(brand_handle_t bh, char *zone_name)
2189 {
2190         (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2191             sizeof (pre_statechg_hook));
2192 
2193         if (brand_get_prestatechange(bh, zone_name, zonepath,
2194             pre_statechg_hook + EXEC_LEN,
2195             sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2196                 return (-1);
2197 
2198         if (strlen(pre_statechg_hook) <= EXEC_LEN)
2199                 pre_statechg_hook[0] = '\0';
2200 
2201         (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2202             sizeof (post_statechg_hook));
2203 
 
2399          */
2400         if ((privset = priv_allocset()) == NULL) {
2401                 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2402                 return (1);
2403         }
2404 
2405         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2406                 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2407                 priv_freeset(privset);
2408                 return (1);
2409         }
2410 
2411         if (priv_isfullset(privset) == B_FALSE) {
2412                 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2413                     "run this command (all privs required)");
2414                 priv_freeset(privset);
2415                 return (1);
2416         }
2417         priv_freeset(privset);
2418 
2419         if (set_brand_env(zlogp) != 0) {
2420                 zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
2421                 return (1);
2422         }
2423 
2424         if (mkzonedir(zlogp) != 0)
2425                 return (1);
2426 
2427         /*
2428          * Pre-fork: setup shared state
2429          */
2430         if ((shstate = (void *)mmap(NULL, shstatelen,
2431             PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2432             MAP_FAILED) {
2433                 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2434                 return (1);
2435         }
2436         if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2437                 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2438                 (void) munmap((char *)shstate, shstatelen);
2439                 return (1);
2440         }
2441         shstate->log.logfile = NULL;
2442         shstate->log.buflen = shstatelen - sizeof (*shstate);
2443         shstate->log.loglen = shstate->log.buflen;
 
 | 
 
 
 103 #include <libdladm.h>
 104 #include <sys/dls_mgmt.h>
 105 #include <libscf.h>
 106 
 107 #include <libzonecfg.h>
 108 #include <zonestat_impl.h>
 109 #include "zoneadmd.h"
 110 
 111 static char *progname;
 112 char *zone_name;        /* zone which we are managing */
 113 zone_dochandle_t snap_hndl;     /* handle for snapshot created when ready */
 114 char zonepath[MAXNAMELEN];
 115 char pool_name[MAXNAMELEN];
 116 char default_brand[MAXNAMELEN];
 117 char brand_name[MAXNAMELEN];
 118 boolean_t zone_isnative;
 119 boolean_t zone_iscluster;
 120 boolean_t zone_islabeled;
 121 boolean_t shutdown_in_progress;
 122 static zoneid_t zone_id;
 123 dladm_handle_t dld_handle = NULL;
 124 
 125 static char pre_statechg_hook[2 * MAXPATHLEN];
 126 static char post_statechg_hook[2 * MAXPATHLEN];
 127 char query_hook[2 * MAXPATHLEN];
 128 
 129 zlog_t logsys;
 130 
 131 mutex_t lock = DEFAULTMUTEX;    /* to serialize stuff */
 132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
 133 
 134 static sema_t scratch_sem;      /* for scratch zones */
 135 
 136 static char     zone_door_path[MAXPATHLEN];
 137 static int      zone_door = -1;
 138 
 139 boolean_t in_death_throes = B_FALSE;    /* daemon is dying */
 140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
 141 
 142 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 143 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 144 #endif
 145 
 146 #define DEFAULT_LOCALE  "C"
 
 
 276 
 277 /*
 278  * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
 279  * put the arguments into an argv style array, use getopt to process them,
 280  * and put the resultant argument string back into outargs. Non-native brands
 281  * may support alternate forms of boot arguments so we must handle that as well.
 282  *
 283  * During the filtering, we pull out any arguments which are truly "boot"
 284  * arguments, leaving only those which are to be passed intact to the
 285  * progenitor process.  The one we support at the moment is -i, which
 286  * indicates to the kernel which program should be launched as 'init'.
 287  *
 288  * Except for Z_OK, all other return values are treated as fatal.
 289  */
 290 static int
 291 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
 292     char *init_file)
 293 {
 294         int argc = 0, argc_save;
 295         int i;
 296         int err;
 297         char *arg, *lasts, **argv = NULL, **argv_save;
 298         char zonecfg_args[BOOTARGS_MAX];
 299         char scratchargs[BOOTARGS_MAX], *sargs;
 300         char scratchopt[3];
 301         char c;
 302 
 303         bzero(outargs, BOOTARGS_MAX);
 304 
 305         /*
 306          * If the user didn't specify transient boot arguments, check
 307          * to see if there were any specified in the zone configuration,
 308          * and use them if applicable.
 309          */
 310         if (inargs == NULL || inargs[0] == '\0')  {
 311                 zone_dochandle_t handle;
 312                 if ((handle = zonecfg_init_handle()) == NULL) {
 313                         zerror(zlogp, B_TRUE,
 314                             "getting zone configuration handle");
 315                         return (Z_BAD_HANDLE);
 316                 }
 317                 err = zonecfg_get_snapshot_handle(zone_name, handle);
 318                 if (err != Z_OK) {
 319                         zerror(zlogp, B_FALSE,
 320                             "invalid configuration snapshot");
 321                         zonecfg_fini_handle(handle);
 322                         return (Z_BAD_HANDLE);
 323                 }
 324 
 325                 bzero(zonecfg_args, sizeof (zonecfg_args));
 326                 (void) zonecfg_get_bootargs(handle, zonecfg_args,
 327                     sizeof (zonecfg_args));
 328                 inargs = zonecfg_args;
 329                 zonecfg_fini_handle(handle);
 330         }
 331 
 332         if (strlen(inargs) >= BOOTARGS_MAX) {
 333                 zerror(zlogp, B_FALSE, "boot argument string too long");
 334                 return (Z_INVAL);
 335         }
 336 
 337         (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
 338         sargs = scratchargs;
 339         while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
 340                 sargs = NULL;
 341                 argc++;
 342         }
 343 
 344         if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
 345                 zerror(zlogp, B_FALSE, "memory allocation failed");
 346                 return (Z_NOMEM);
 347         }
 348 
 349         argv_save = argv;
 
 459          * since anyone can open any UNIX domain socket, regardless of
 460          * its file system permissions.  Sigh...
 461          */
 462         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
 463                 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
 464                 return (-1);
 465         }
 466         /* paranoia */
 467         if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
 468                 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
 469                 return (-1);
 470         }
 471         (void) chmod(ZONES_TMPDIR, S_IRWXU);
 472         return (0);
 473 }
 474 
 475 /*
 476  * Run the brand's pre-state change callback, if it exists.
 477  */
 478 static int
 479 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
 480 {
 481         char cmdbuf[2 * MAXPATHLEN];
 482         const char *altroot;
 483 
 484         if (pre_statechg_hook[0] == '\0')
 485                 return (0);
 486 
 487         altroot = zonecfg_get_root();
 488         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
 489             state, cmd, altroot) > sizeof (cmdbuf))
 490                 return (-1);
 491 
 492         if (do_subproc(zlogp, cmdbuf, NULL) != 0)
 493                 return (-1);
 494 
 495         return (0);
 496 }
 497 
 498 /*
 499  * Run the brand's post-state change callback, if it exists.
 500  */
 501 static int
 502 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
 503 {
 504         char cmdbuf[2 * MAXPATHLEN];
 505         const char *altroot;
 506 
 507         if (post_statechg_hook[0] == '\0')
 508                 return (0);
 509 
 510         altroot = zonecfg_get_root();
 511         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
 512             state, cmd, altroot) > sizeof (cmdbuf))
 513                 return (-1);
 514 
 515         if (do_subproc(zlogp, cmdbuf, NULL) != 0)
 516                 return (-1);
 517 
 518         return (0);
 519 }
 520 
 521 /*
 522  * Notify zonestatd of the new zone.  If zonestatd is not running, this
 523  * will do nothing.
 524  */
 525 static void
 526 notify_zonestatd(zoneid_t zoneid)
 527 {
 528         int cmd[2];
 529         int fd;
 530         door_arg_t params;
 531 
 532         fd = open(ZS_DOOR_PATH, O_RDONLY);
 533         if (fd < 0)
 534                 return;
 535 
 536         cmd[0] = ZSD_CMD_NEW_ZONE;
 537         cmd[1] = zoneid;
 538         params.data_ptr = (char *)&cmd;
 539         params.data_size = sizeof (cmd);
 540         params.desc_ptr = NULL;
 541         params.desc_num = 0;
 542         params.rbuf = NULL;
 543         params.rsize = NULL;
 544         (void) door_call(fd, ¶ms);
 545         (void) close(fd);
 546 }
 547 
 548 /*
 549  * Bring a zone up to the pre-boot "ready" stage.  The mount_cmd argument is
 550  * 'true' if this is being invoked as part of the processing for the "mount"
 551  * subcommand.
 552  */
 553 static int
 554 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
 555 {
 556         int err;
 557 
 558         if (!ALT_MOUNT(mount_cmd) &&
 559             brand_prestatechg(zlogp, zstate, Z_READY) != 0)
 560                 return (-1);
 561 
 562         if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
 563                 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
 564                     zonecfg_strerror(err));
 565                 goto bad;
 566         }
 567 
 568         if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
 569                 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 570                         zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 571                             zonecfg_strerror(err));
 572                 goto bad;
 573         }
 574         if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
 575                 bringup_failure_recovery = B_TRUE;
 576                 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
 577                 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 578                         zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 579                             zonecfg_strerror(err));
 580                 goto bad;
 581         }
 582 
 583         if (!ALT_MOUNT(mount_cmd) &&
 584             brand_poststatechg(zlogp, zstate, Z_READY) != 0)
 585                 goto bad;
 586 
 587         return (0);
 588 
 589 bad:
 590         /*
 591          * If something goes wrong, we up the zones's state to the target
 592          * state, READY, and then invoke the hook as if we're halting.
 593          */
 594         if (!ALT_MOUNT(mount_cmd))
 595                 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
 596         return (-1);
 597 }
 598 
 599 int
 600 init_template(void)
 601 {
 602         int fd;
 603         int err = 0;
 604 
 605         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
 606         if (fd == -1)
 607                 return (-1);
 608 
 609         /*
 610          * For now, zoneadmd doesn't do anything with the contract.
 611          * Deliver no events, don't inherit, and allow it to be orphaned.
 612          */
 613         err |= ct_tmpl_set_critical(fd, 0);
 614         err |= ct_tmpl_set_informative(fd, 0);
 615         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
 
 756                 ct = -1;
 757         (void) ct_tmpl_clear(tmpl_fd);
 758         (void) close(tmpl_fd);
 759         if (waitpid(child, &child_status, 0) != child) {
 760                 /* unexpected: we must have been signalled */
 761                 (void) contract_abandon_id(ct);
 762                 return (-1);
 763         }
 764         (void) contract_abandon_id(ct);
 765         if (WEXITSTATUS(child_status) != 0) {
 766                 errno = WEXITSTATUS(child_status);
 767                 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
 768                 return (-1);
 769         }
 770 
 771         return (0);
 772 }
 773 
 774 /*
 775  * env variable name format
 776  *      _ZONECFG;{resource name};{identifying attr. name};{property name}
 777  */
 778 static void
 779 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
 780 {
 781         char *p;
 782         /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
 783         char nm[2 * MAXNAMELEN + 32];
 784 
 785         if (attr == NULL)
 786                 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
 787                     name);
 788         else
 789                 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
 790                     attr, name);
 791 
 792         p = nm;
 793         while ((p = strchr(p, '-')) != NULL)
 794                 *p++ = '_';
 795 
 796         (void) setenv(nm, val, 1);
 797 }
 798 
 799 /*
 800  * Export zonecfg network and device properties into environment for the boot
 801  * and state change hooks.
 802  * If debug is true, export the brand hook debug env. variable as well.
 803  *
 804  * We could export more of the config in the future, as necessary.
 805  */
 806 static int
 807 setup_subproc_env()
 808 {
 809         int res;
 810         zone_dochandle_t handle;
 811         struct zone_nwiftab ntab;
 812         struct zone_devtab dtab;
 813         char net_resources[MAXNAMELEN * 2];
 814         char dev_resources[MAXNAMELEN * 2];
 815 
 816         if ((handle = zonecfg_init_handle()) == NULL)
 817                 exit(Z_NOMEM);
 818 
 819         if ((res = zonecfg_get_handle(zone_name, handle)) != Z_OK)
 820                 goto done;
 821 
 822         if ((res = zonecfg_setnwifent(handle)) != Z_OK)
 823                 goto done;
 824 
 825         while (zonecfg_getnwifent(handle, &ntab) == Z_OK) {
 826                 struct zone_res_attrtab *rap;
 827                 char *phys;
 828 
 829                 phys = ntab.zone_nwif_physical;
 830 
 831                 (void) strlcat(net_resources, phys, sizeof (net_resources));
 832                 (void) strlcat(net_resources, " ", sizeof (net_resources));
 833 
 834                 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
 835 
 836                 set_zonecfg_env(RSRC_NET, phys, "address",
 837                     ntab.zone_nwif_address);
 838                 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
 839                     ntab.zone_nwif_allowed_address);
 840                 set_zonecfg_env(RSRC_NET, phys, "defrouter",
 841                     ntab.zone_nwif_defrouter);
 842                 set_zonecfg_env(RSRC_NET, phys, "global-nic",
 843                     ntab.zone_nwif_gnic);
 844                 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
 845                 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
 846                     ntab.zone_nwif_vlan_id);
 847 
 848                 for (rap = ntab.zone_nwif_attrp; rap != NULL;
 849                     rap = rap->zone_res_attr_next)
 850                         set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
 851                             rap->zone_res_attr_value);
 852         }
 853 
 854         (void) zonecfg_endnwifent(handle);
 855 
 856         if ((res = zonecfg_setdevent(handle)) != Z_OK)
 857                 goto done;
 858 
 859         while (zonecfg_getdevent(handle, &dtab) == Z_OK) {
 860                 struct zone_res_attrtab *rap;
 861                 char *match;
 862 
 863                 match = dtab.zone_dev_match;
 864 
 865                 (void) strlcat(dev_resources, match, sizeof (dev_resources));
 866                 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
 867 
 868                 for (rap = dtab.zone_dev_attrp; rap != NULL;
 869                     rap = rap->zone_res_attr_next)
 870                         set_zonecfg_env(RSRC_DEV, match,
 871                             rap->zone_res_attr_name, rap->zone_res_attr_value);
 872         }
 873 
 874         (void) zonecfg_enddevent(handle);
 875 
 876         res = Z_OK;
 877 
 878 done:
 879         zonecfg_fini_handle(handle);
 880         return (res);
 881 }
 882 
 883 /*
 884  * If retstr is not NULL, the output of the subproc is returned in the str,
 885  * otherwise it is output using zerror().  Any memory allocated for retstr
 886  * should be freed by the caller.
 887  */
 888 int
 889 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
 890 {
 891         char buf[1024];         /* arbitrary large amount */
 892         char *inbuf;
 893         FILE *file;
 894         int status;
 895         int rd_cnt;
 896 
 897         if (retstr != NULL) {
 898                 if ((*retstr = malloc(1024)) == NULL) {
 899                         zerror(zlogp, B_FALSE, "out of memory");
 900                         return (-1);
 901                 }
 902                 inbuf = *retstr;
 903                 rd_cnt = 0;
 904         } else {
 905                 inbuf = buf;
 906         }
 907 
 908         if (setup_subproc_env() != Z_OK) {
 909                 zerror(zlogp, B_FALSE, "failed to setup environment");
 910                 return (-1);
 911         }
 912 
 913         file = popen(cmdbuf, "r");
 914         if (file == NULL) {
 915                 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
 916                 return (-1);
 917         }
 918 
 919         while (fgets(inbuf, 1024, file) != NULL) {
 920                 if (retstr == NULL) {
 921                         if (zlogp != &logsys)
 922                                 zerror(zlogp, B_FALSE, "%s", inbuf);
 923                 } else {
 924                         char *p;
 925 
 926                         rd_cnt += 1024 - 1;
 927                         if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
 928                                 zerror(zlogp, B_FALSE, "out of memory");
 929                                 (void) pclose(file);
 930                                 return (-1);
 931                         }
 932 
 933                         *retstr = p;
 934                         inbuf = *retstr + rd_cnt;
 935                 }
 936         }
 937         status = pclose(file);
 938 
 939         if (WIFSIGNALED(status)) {
 940                 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
 941                     "signal %d", cmdbuf, WTERMSIG(status));
 942                 return (-1);
 943         }
 944         assert(WIFEXITED(status));
 945         if (WEXITSTATUS(status) == ZEXIT_EXEC) {
 946                 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
 947                 return (-1);
 948         }
 949         return (WEXITSTATUS(status));
 950 }
 951 
 952 /*
 953  * Get the app-svc-dependent flag for this zone's init process. This is a
 954  * zone-specific attr which controls the type of contract we create for the
 955  * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
 956  * set, so that when any service which is in the same contract exits, the init
 957  * application will be terminated.
 958  *
 959  * We use the global "snap_hndl", so no parameters get passed here.
 960  */
 961 static boolean_t
 962 is_app_svc_dep(void)
 963 {
 964         struct zone_attrtab a;
 965 
 966         bzero(&a, sizeof (a));
 967         (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
 968             sizeof (a.zone_attr_name));
 969 
 970         if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
 971             strcmp(a.zone_attr_value, "true") == 0) {
 972                 return (B_TRUE);
 973         }
 974 
 975         return (B_FALSE);
 976 }
 977 
 978 static int
 979 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
 980 {
 981         zoneid_t zoneid;
 982         struct stat st;
 983         char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
 984         char nbootargs[BOOTARGS_MAX];
 985         char cmdbuf[MAXPATHLEN];
 986         fs_callback_t cb;
 987         brand_handle_t bh;
 988         zone_iptype_t iptype;
 989         dladm_status_t status;
 990         char errmsg[DLADM_STRSIZE];
 991         int err;
 992         boolean_t restart_init;
 993         boolean_t app_svc_dep;
 994 
 995         if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
 996                 return (-1);
 997 
 998         if ((zoneid = getzoneidbyname(zone_name)) == -1) {
 999                 zerror(zlogp, B_TRUE, "unable to get zoneid");
1000                 goto bad;
1001         }
1002 
1003         cb.zlogp = zlogp;
1004         cb.zoneid = zoneid;
1005         cb.mount_cmd = B_FALSE;
1006 
1007         /* Get a handle to the brand info for this zone */
1008         if ((bh = brand_open(brand_name)) == NULL) {
1009                 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1010                 goto bad;
1011         }
1012 
1013         /*
1014          * Get the list of filesystems to mount from the brand
1015          * configuration.  These mounts are done via a thread that will
 
1018          */
1019         if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1020                 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1021                 brand_close(bh);
1022                 goto bad;
1023         }
1024 
1025         /*
1026          * Get the brand's boot callback if it exists.
1027          */
1028         (void) strcpy(cmdbuf, EXEC_PREFIX);
1029         if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1030             sizeof (cmdbuf) - EXEC_LEN) != 0) {
1031                 zerror(zlogp, B_FALSE,
1032                     "unable to determine branded zone's boot callback");
1033                 brand_close(bh);
1034                 goto bad;
1035         }
1036 
1037         /* Get the path for this zone's init(1M) (or equivalent) process.  */
1038         if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
1039                 zerror(zlogp, B_FALSE,
1040                     "unable to determine zone's init(1M) location");
1041                 brand_close(bh);
1042                 goto bad;
1043         }
1044 
1045         /* See if this zone's brand should restart init if it dies. */
1046         restart_init = brand_restartinit(bh);
1047 
1048         /*
1049          * See if we need to setup contract dependencies between the zone's
1050          * primary application and any of its services.
1051          */
1052         app_svc_dep = is_app_svc_dep();
1053 
1054         brand_close(bh);
1055 
1056         err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1057         if (err != Z_OK)
1058                 goto bad;
1059 
1060         assert(init_file[0] != '\0');
1061 
1062         /*
1063          * Try to anticipate possible problems: If possible, make sure init is
1064          * executable.
1065          */
1066         if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1067                 zerror(zlogp, B_FALSE, "unable to determine zone root");
1068                 goto bad;
1069         }
1070 
1071         (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1072 
1073         if (lstat(initpath, &st) == -1) {
1074                 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1075                 goto bad;
1076         }
1077 
1078         /*
1079          * If a symlink, we'll have to wait and resolve when we boot,
1080          * otherwise check the executable bits now.
1081          */
1082         if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
1083                 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1084                 goto bad;
1085         }
1086 
1087         /*
1088          * Exclusive stack zones interact with the dlmgmtd running in the
1089          * global zone.  dladm_zone_boot() tells dlmgmtd that this zone is
1090          * booting, and loads its datalinks from the zone's datalink
1091          * configuration file.
1092          */
1093         if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1094                 status = dladm_zone_boot(dld_handle, zoneid);
1095                 if (status != DLADM_STATUS_OK) {
1096                         zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1097                             " %s", dladm_status2str(status, errmsg));
1098                         goto bad;
1099                 }
1100         }
1101 
1102         /*
1103          * If there is a brand 'boot' callback, execute it now to give the
1104          * brand one last chance to do any additional setup before the zone
1105          * is booted.
1106          */
1107         if ((strlen(cmdbuf) > EXEC_LEN) &&
1108             (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
1109                 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1110                 goto bad;
1111         }
1112 
1113         if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1114                 zerror(zlogp, B_TRUE, "could not set zone boot file");
1115                 goto bad;
1116         }
1117 
1118         if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1119                 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1120                 goto bad;
1121         }
1122 
1123         if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1124             NULL, 0) == -1) {
1125                 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1126                 goto bad;
1127         }
1128 
1129         if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1130             (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1131                 zerror(zlogp, B_TRUE, "could not set zone app-die");
1132                 goto bad;
1133         }
1134 
1135         /*
1136          * Inform zonestatd of a new zone so that it can install a door for
1137          * the zone to contact it.
1138          */
1139         notify_zonestatd(zone_id);
1140 
1141         if (zone_boot(zoneid) == -1) {
1142                 zerror(zlogp, B_TRUE, "unable to boot zone");
1143                 goto bad;
1144         }
1145 
1146         if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1147                 goto bad;
1148 
1149         /* Startup a thread to perform zfd logging/tty svc for the zone. */
1150         create_log_thread(zlogp, zone_id);
1151 
1152         /* Startup a thread to perform memory capping for the zone. */
1153         create_mcap_thread(zlogp, zone_id);
1154 
1155         return (0);
1156 
1157 bad:
1158         /*
1159          * If something goes wrong, we up the zones's state to the target
1160          * state, RUNNING, and then invoke the hook as if we're halting.
1161          */
1162         (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1163 
1164         return (-1);
1165 }
1166 
1167 static int
1168 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1169 {
1170         int err;
1171 
1172         if (unmount_cmd == B_FALSE &&
1173             brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1174                 return (-1);
1175 
1176         /* Shutting down, stop the memcap thread */
1177         destroy_mcap_thread();
1178 
1179         if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1180                 if (!bringup_failure_recovery)
1181                         zerror(zlogp, B_FALSE, "unable to destroy zone");
1182                 destroy_log_thread();
1183                 return (-1);
1184         }
1185 
1186         /* Shut down is done, stop the log thread */
1187         destroy_log_thread();
1188 
1189         if (unmount_cmd == B_FALSE &&
1190             brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1191                 return (-1);
1192 
1193         if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1194                 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1195                     zonecfg_strerror(err));
1196 
1197         return (0);
1198 }
1199 
1200 static int
1201 zone_graceful_shutdown(zlog_t *zlogp)
1202 {
1203         zoneid_t zoneid;
1204         pid_t child;
1205         char cmdbuf[MAXPATHLEN];
1206         brand_handle_t bh = NULL;
1207         ctid_t ct;
1208         int tmpl_fd;
1209         int child_status;
1210 
1211         if (shutdown_in_progress) {
1212                 zerror(zlogp, B_FALSE, "shutdown already in progress");
1213                 return (-1);
1214         }
1215 
1216         if ((zoneid = getzoneidbyname(zone_name)) == -1) {
 
1392                 return;
1393 
1394         (void) write(fd, buf, strlen(buf));
1395 
1396         (void) close(fd);
1397 }
1398 
1399 /*
1400  * The main routine for the door server that deals with zone state transitions.
1401  */
1402 /* ARGSUSED */
1403 static void
1404 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1405     uint_t n_desc)
1406 {
1407         ucred_t *uc = NULL;
1408         const priv_set_t *eset;
1409 
1410         zone_state_t zstate;
1411         zone_cmd_t cmd;
1412         int init_status;
1413         zone_cmd_arg_t *zargp;
1414 
1415         boolean_t kernelcall;
1416 
1417         int rval = -1;
1418         uint64_t uniqid;
1419         zoneid_t zoneid = -1;
1420         zlog_t zlog;
1421         zlog_t *zlogp;
1422         zone_cmd_rval_t *rvalp;
1423         size_t rlen = getpagesize(); /* conservative */
1424         fs_callback_t cb;
1425         brand_handle_t bh;
1426         boolean_t wait_shut = B_FALSE;
1427 
1428         /* LINTED E_BAD_PTR_CAST_ALIGN */
1429         zargp = (zone_cmd_arg_t *)args;
1430 
1431         /*
1432          * When we get the door unref message, we've fdetach'd the door, and
1433          * it is time for us to shut down zoneadmd.
1434          */
1435         if (zargp == DOOR_UNREF_DATA) {
 
1445 
1446         rvalp = alloca(rlen);
1447         bzero(rvalp, rlen);
1448         zlog.logfile = NULL;
1449         zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1450         zlog.buf = rvalp->errbuf;
1451         zlog.log = zlog.buf;
1452         /* defer initialization of zlog.locale until after credential check */
1453         zlogp = &zlog;
1454 
1455         if (alen != sizeof (zone_cmd_arg_t)) {
1456                 /*
1457                  * This really shouldn't be happening.
1458                  */
1459                 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1460                     "unexpected (expected %d bytes)", alen,
1461                     sizeof (zone_cmd_arg_t));
1462                 goto out;
1463         }
1464         cmd = zargp->cmd;
1465         init_status = zargp->status;
1466 
1467         if (door_ucred(&uc) != 0) {
1468                 zerror(&logsys, B_TRUE, "door_ucred");
1469                 goto out;
1470         }
1471         eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1472         if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1473             (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1474             ucred_geteuid(uc) != 0)) {
1475                 zerror(&logsys, B_FALSE, "insufficient privileges");
1476                 goto out;
1477         }
1478 
1479         kernelcall = ucred_getpid(uc) == 0;
1480 
1481         /*
1482          * This is safe because we only use a zlog_t throughout the
1483          * duration of a door call; i.e., by the time the pointer
1484          * might become invalid, the door call would be over.
 
1552          */
1553         if (zstate == ZONE_STATE_INCOMPLETE &&
1554             (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1555                 zstate = ZONE_STATE_INSTALLED;
1556 
1557         switch (zstate) {
1558         case ZONE_STATE_CONFIGURED:
1559         case ZONE_STATE_INCOMPLETE:
1560                 /*
1561                  * Not our area of expertise; we just print a nice message
1562                  * and die off.
1563                  */
1564                 zerror(zlogp, B_FALSE,
1565                     "%s operation is invalid for zones in state '%s'",
1566                     z_cmd_name(cmd), zone_state_str(zstate));
1567                 break;
1568 
1569         case ZONE_STATE_INSTALLED:
1570                 switch (cmd) {
1571                 case Z_READY:
1572                         rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1573                         if (rval == 0)
1574                                 eventstream_write(Z_EVT_ZONE_READIED);
1575                         zcons_statechanged();
1576                         break;
1577                 case Z_BOOT:
1578                 case Z_FORCEBOOT:
1579                         eventstream_write(Z_EVT_ZONE_BOOTING);
1580                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1581                             == 0) {
1582                                 rval = zone_bootup(zlogp, zargp->bootbuf,
1583                                     zstate);
1584                         }
1585                         audit_put_record(zlogp, uc, rval, "boot");
1586                         zcons_statechanged();
1587                         if (rval != 0) {
1588                                 bringup_failure_recovery = B_TRUE;
1589                                 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1590                                     zstate);
1591                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1592                         }
1593                         break;
1594                 case Z_SHUTDOWN:
1595                 case Z_HALT:
1596                         if (kernelcall) /* Invalid; can't happen */
1597                                 abort();
1598                         /*
1599                          * We could have two clients racing to halt this
1600                          * zone; the second client loses, but his request
1601                          * doesn't fail, since the zone is now in the desired
1602                          * state.
1603                          */
1604                         zerror(zlogp, B_FALSE, "zone is already halted");
1605                         rval = 0;
1606                         break;
1607                 case Z_REBOOT:
1608                         if (kernelcall) /* Invalid; can't happen */
1609                                 abort();
1610                         zerror(zlogp, B_FALSE, "%s operation is invalid "
 
1622                         eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1623                         break;
1624                 case Z_MOUNT:
1625                 case Z_FORCEMOUNT:
1626                         if (kernelcall) /* Invalid; can't happen */
1627                                 abort();
1628                         if (!zone_isnative && !zone_iscluster &&
1629                             !zone_islabeled) {
1630                                 /*
1631                                  * -U mounts the zone without lofs mounting
1632                                  * zone file systems back into the scratch
1633                                  * zone.  This is required when mounting
1634                                  * non-native branded zones.
1635                                  */
1636                                 (void) strlcpy(zargp->bootbuf, "-U",
1637                                     BOOTARGS_MAX);
1638                         }
1639 
1640                         rval = zone_ready(zlogp,
1641                             strcmp(zargp->bootbuf, "-U") == 0 ?
1642                             Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1643                         if (rval != 0)
1644                                 break;
1645 
1646                         eventstream_write(Z_EVT_ZONE_READIED);
1647 
1648                         /*
1649                          * Get a handle to the default brand info.
1650                          * We must always use the default brand file system
1651                          * list when mounting the zone.
1652                          */
1653                         if ((bh = brand_open(default_brand)) == NULL) {
1654                                 rval = -1;
1655                                 break;
1656                         }
1657 
1658                         /*
1659                          * Get the list of filesystems to mount from
1660                          * the brand configuration.  These mounts are done
1661                          * via a thread that will enter the zone, so they
1662                          * are done from within the context of the zone.
 
1684                                 abort();
1685                         zerror(zlogp, B_FALSE, "zone is already unmounted");
1686                         rval = 0;
1687                         break;
1688                 }
1689                 break;
1690 
1691         case ZONE_STATE_READY:
1692                 switch (cmd) {
1693                 case Z_READY:
1694                         /*
1695                          * We could have two clients racing to ready this
1696                          * zone; the second client loses, but his request
1697                          * doesn't fail, since the zone is now in the desired
1698                          * state.
1699                          */
1700                         zerror(zlogp, B_FALSE, "zone is already ready");
1701                         rval = 0;
1702                         break;
1703                 case Z_BOOT:
1704                         (void) strlcpy(boot_args, zargp->bootbuf,
1705                             sizeof (boot_args));
1706                         eventstream_write(Z_EVT_ZONE_BOOTING);
1707                         rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1708                         audit_put_record(zlogp, uc, rval, "boot");
1709                         zcons_statechanged();
1710                         if (rval != 0) {
1711                                 bringup_failure_recovery = B_TRUE;
1712                                 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1713                                     zstate);
1714                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1715                         }
1716                         boot_args[0] = '\0';
1717                         break;
1718                 case Z_HALT:
1719                         if (kernelcall) /* Invalid; can't happen */
1720                                 abort();
1721                         if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1722                             != 0)
1723                                 break;
1724                         zcons_statechanged();
1725                         eventstream_write(Z_EVT_ZONE_HALTED);
1726                         break;
1727                 case Z_SHUTDOWN:
1728                 case Z_REBOOT:
1729                 case Z_NOTE_UNINSTALLING:
1730                 case Z_MOUNT:
1731                 case Z_UNMOUNT:
1732                         if (kernelcall) /* Invalid; can't happen */
1733                                 abort();
1734                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1735                             "for zones in state '%s'", z_cmd_name(cmd),
1736                             zone_state_str(zstate));
1737                         rval = -1;
1738                         break;
1739                 }
1740                 break;
1741 
1742         case ZONE_STATE_MOUNTED:
1743                 switch (cmd) {
1744                 case Z_UNMOUNT:
1745                         if (kernelcall) /* Invalid; can't happen */
1746                                 abort();
1747                         rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1748                         if (rval == 0) {
1749                                 eventstream_write(Z_EVT_ZONE_HALTED);
1750                                 (void) sema_post(&scratch_sem);
1751                         }
1752                         break;
1753                 default:
1754                         if (kernelcall) /* Invalid; can't happen */
1755                                 abort();
1756                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1757                             "for zones in state '%s'", z_cmd_name(cmd),
1758                             zone_state_str(zstate));
1759                         rval = -1;
1760                         break;
1761                 }
1762                 break;
1763 
1764         case ZONE_STATE_RUNNING:
1765         case ZONE_STATE_SHUTTING_DOWN:
1766         case ZONE_STATE_DOWN:
1767                 switch (cmd) {
1768                 case Z_READY:
1769                         if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1770                             != 0)
1771                                 break;
1772                         zcons_statechanged();
1773                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1774                                 eventstream_write(Z_EVT_ZONE_READIED);
1775                         else
1776                                 eventstream_write(Z_EVT_ZONE_HALTED);
1777                         break;
1778                 case Z_BOOT:
1779                         /*
1780                          * We could have two clients racing to boot this
1781                          * zone; the second client loses, but his request
1782                          * doesn't fail, since the zone is now in the desired
1783                          * state.
1784                          */
1785                         zerror(zlogp, B_FALSE, "zone is already booted");
1786                         rval = 0;
1787                         break;
1788                 case Z_HALT:
1789                         if (kernelcall) {
1790                                 log_init_exit(init_status);
1791                         } else {
1792                                 log_init_exit(-1);
1793                         }
1794                         if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1795                             != 0)
1796                                 break;
1797                         eventstream_write(Z_EVT_ZONE_HALTED);
1798                         zcons_statechanged();
1799                         break;
1800                 case Z_REBOOT:
1801                         (void) strlcpy(boot_args, zargp->bootbuf,
1802                             sizeof (boot_args));
1803                         eventstream_write(Z_EVT_ZONE_REBOOTING);
1804                         if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1805                             != 0) {
1806                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1807                                 boot_args[0] = '\0';
1808                                 break;
1809                         }
1810                         zcons_statechanged();
1811                         if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) !=
1812                             0) {
1813                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1814                                 boot_args[0] = '\0';
1815                                 break;
1816                         }
1817                         rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1818                         audit_put_record(zlogp, uc, rval, "reboot");
1819                         if (rval != 0) {
1820                                 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1821                                     zstate);
1822                                 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1823                         }
1824                         boot_args[0] = '\0';
1825                         break;
1826                 case Z_SHUTDOWN:
1827                         if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1828                                 wait_shut = B_TRUE;
1829                         }
1830                         break;
1831                 case Z_NOTE_UNINSTALLING:
1832                 case Z_MOUNT:
1833                 case Z_UNMOUNT:
1834                         zerror(zlogp, B_FALSE, "%s operation is invalid "
1835                             "for zones in state '%s'", z_cmd_name(cmd),
1836                             zone_state_str(zstate));
1837                         rval = -1;
1838                         break;
1839                 }
1840                 break;
1841         default:
1842                 abort();
1843         }
1844 
1845         /*
1846          * Because the state of the zone may have changed, we make sure
1847          * to wake the console poller, which is in charge of initiating
1848          * the shutdown procedure as necessary.
1849          */
1850         eventstream_write(Z_EVT_NULL);
1851 
1852 out:
 
2021                                     zonecfg_get_snapshot_handle(zone_name,
2022                                     snap_hndl) != Z_OK) {
2023                                         zerror(zlogp, B_FALSE, "recovering "
2024                                             "zone configuration handle");
2025                                         goto out;
2026                                 }
2027                         }
2028                 }
2029 
2030                 (void) fdetach(zone_door_path);
2031                 (void) close(doorfd);
2032                 goto top;
2033         }
2034         ret = 0;
2035 out:
2036         (void) close(doorfd);
2037         return (ret);
2038 }
2039 
2040 /*
2041  * Setup the brand's pre and post state change callbacks, as well as the
2042  * query callback, if any of these exist.
2043  */
2044 static int
2045 brand_callback_init(brand_handle_t bh, char *zone_name)
2046 {
2047         (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2048             sizeof (pre_statechg_hook));
2049 
2050         if (brand_get_prestatechange(bh, zone_name, zonepath,
2051             pre_statechg_hook + EXEC_LEN,
2052             sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2053                 return (-1);
2054 
2055         if (strlen(pre_statechg_hook) <= EXEC_LEN)
2056                 pre_statechg_hook[0] = '\0';
2057 
2058         (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2059             sizeof (post_statechg_hook));
2060 
 
2256          */
2257         if ((privset = priv_allocset()) == NULL) {
2258                 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2259                 return (1);
2260         }
2261 
2262         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2263                 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2264                 priv_freeset(privset);
2265                 return (1);
2266         }
2267 
2268         if (priv_isfullset(privset) == B_FALSE) {
2269                 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2270                     "run this command (all privs required)");
2271                 priv_freeset(privset);
2272                 return (1);
2273         }
2274         priv_freeset(privset);
2275 
2276         if (mkzonedir(zlogp) != 0)
2277                 return (1);
2278 
2279         /*
2280          * Pre-fork: setup shared state
2281          */
2282         if ((shstate = (void *)mmap(NULL, shstatelen,
2283             PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2284             MAP_FAILED) {
2285                 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2286                 return (1);
2287         }
2288         if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2289                 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2290                 (void) munmap((char *)shstate, shstatelen);
2291                 return (1);
2292         }
2293         shstate->log.logfile = NULL;
2294         shstate->log.buflen = shstatelen - sizeof (*shstate);
2295         shstate->log.loglen = shstate->log.buflen;
 
 |