103 #include <libdladm.h>
104 #include <sys/dls_mgmt.h>
105 #include <libscf.h>
106
107 #include <libzonecfg.h>
108 #include <zonestat_impl.h>
109 #include "zoneadmd.h"
110
111 static char *progname;
112 char *zone_name; /* zone which we are managing */
113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 char zonepath[MAXNAMELEN];
115 char pool_name[MAXNAMELEN];
116 char default_brand[MAXNAMELEN];
117 char brand_name[MAXNAMELEN];
118 boolean_t zone_isnative;
119 boolean_t zone_iscluster;
120 boolean_t zone_islabeled;
121 boolean_t shutdown_in_progress;
122 static zoneid_t zone_id;
123 static zoneid_t zone_did = 0;
124 dladm_handle_t dld_handle = NULL;
125
126 char pre_statechg_hook[2 * MAXPATHLEN];
127 char post_statechg_hook[2 * MAXPATHLEN];
128 char query_hook[2 * MAXPATHLEN];
129
130 zlog_t logsys;
131
132 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
133 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
134
135 static sema_t scratch_sem; /* for scratch zones */
136
137 static char zone_door_path[MAXPATHLEN];
138 static int zone_door = -1;
139
140 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
141 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
142
143 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
144 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
145 #endif
146
147 #define DEFAULT_LOCALE "C"
277
278 /*
279 * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
280 * put the arguments into an argv style array, use getopt to process them,
281 * and put the resultant argument string back into outargs. Non-native brands
282 * may support alternate forms of boot arguments so we must handle that as well.
283 *
284 * During the filtering, we pull out any arguments which are truly "boot"
285 * arguments, leaving only those which are to be passed intact to the
286 * progenitor process. The one we support at the moment is -i, which
287 * indicates to the kernel which program should be launched as 'init'.
288 *
289 * Except for Z_OK, all other return values are treated as fatal.
290 */
291 static int
292 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
293 char *init_file)
294 {
295 int argc = 0, argc_save;
296 int i;
297 int err = Z_OK;
298 char *arg, *lasts, **argv = NULL, **argv_save;
299 char zonecfg_args[BOOTARGS_MAX];
300 char scratchargs[BOOTARGS_MAX], *sargs;
301 char scratchopt[3];
302 char c;
303
304 bzero(outargs, BOOTARGS_MAX);
305
306 /*
307 * If the user didn't specify transient boot arguments, check
308 * to see if there were any specified in the zone configuration,
309 * and use them if applicable.
310 */
311 if (inargs == NULL || inargs[0] == '\0') {
312 bzero(zonecfg_args, sizeof (zonecfg_args));
313 (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
314 sizeof (zonecfg_args));
315 inargs = zonecfg_args;
316 }
317
318 if (strlen(inargs) >= BOOTARGS_MAX) {
319 zerror(zlogp, B_FALSE, "boot argument string too long");
320 return (Z_INVAL);
321 }
322
323 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
324 sargs = scratchargs;
325 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
326 sargs = NULL;
327 argc++;
328 }
329
330 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
331 zerror(zlogp, B_FALSE, "memory allocation failed");
332 return (Z_NOMEM);
333 }
334
335 argv_save = argv;
445 * since anyone can open any UNIX domain socket, regardless of
446 * its file system permissions. Sigh...
447 */
448 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
449 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
450 return (-1);
451 }
452 /* paranoia */
453 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
454 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
455 return (-1);
456 }
457 (void) chmod(ZONES_TMPDIR, S_IRWXU);
458 return (0);
459 }
460
461 /*
462 * Run the brand's pre-state change callback, if it exists.
463 */
464 static int
465 brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
466 {
467 char cmdbuf[2 * MAXPATHLEN];
468 const char *altroot;
469
470 if (pre_statechg_hook[0] == '\0')
471 return (0);
472
473 altroot = zonecfg_get_root();
474 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
475 state, cmd, altroot) > sizeof (cmdbuf))
476 return (-1);
477
478 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
479 return (-1);
480
481 return (0);
482 }
483
484 /*
485 * Run the brand's post-state change callback, if it exists.
486 */
487 static int
488 brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
489 {
490 char cmdbuf[2 * MAXPATHLEN];
491 const char *altroot;
492
493 if (post_statechg_hook[0] == '\0')
494 return (0);
495
496 altroot = zonecfg_get_root();
497 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
498 state, cmd, altroot) > sizeof (cmdbuf))
499 return (-1);
500
501 if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
502 return (-1);
503
504 return (0);
505 }
506
507 /*
508 * Notify zonestatd of the new zone. If zonestatd is not running, this
509 * will do nothing.
510 */
511 static void
512 notify_zonestatd(zoneid_t zoneid)
513 {
514 int cmd[2];
515 int fd;
516 door_arg_t params;
517
518 fd = open(ZS_DOOR_PATH, O_RDONLY);
519 if (fd < 0)
520 return;
521
522 cmd[0] = ZSD_CMD_NEW_ZONE;
523 cmd[1] = zoneid;
524 params.data_ptr = (char *)&cmd;
525 params.data_size = sizeof (cmd);
526 params.desc_ptr = NULL;
527 params.desc_num = 0;
528 params.rbuf = NULL;
529 params.rsize = NULL;
530 (void) door_call(fd, ¶ms);
531 (void) close(fd);
532 }
533
534 /*
535 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
536 * 'true' if this is being invoked as part of the processing for the "mount"
537 * subcommand.
538 *
539 * If a scratch zone mount (ALT_MOUNT) is being performed then do not
540 * call the state change hooks.
541 */
542 static int
543 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
544 {
545 int err;
546 boolean_t snapped = B_FALSE;
547
548 if ((snap_hndl = zonecfg_init_handle()) == NULL) {
549 zerror(zlogp, B_TRUE, "getting zone configuration handle");
550 goto bad;
551 }
552 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
553 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
554 zonecfg_strerror(err));
555 goto bad;
556 }
557 snapped = B_TRUE;
558
559 if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
560 zerror(zlogp, B_FALSE, "invalid configuration snapshot");
561 goto bad;
562 }
563
564 if (zone_did == 0)
565 zone_did = zone_get_did(zone_name);
566
567 if (!ALT_MOUNT(mount_cmd) &&
568 brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
569 goto bad;
570
571 if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
572 goto bad;
573
574 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
575 bringup_failure_recovery = B_TRUE;
576 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
577 debug);
578 goto bad;
579 }
580
581 if (!ALT_MOUNT(mount_cmd) &&
582 brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
583 goto bad;
584
585 return (0);
586
587 bad:
588 /*
589 * If something goes wrong, we up the zones's state to the target
590 * state, READY, and then invoke the hook as if we're halting.
591 */
592 if (!ALT_MOUNT(mount_cmd))
593 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT,
594 debug);
595
596 if (snapped)
597 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
598 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
599 zonecfg_strerror(err));
600 zonecfg_fini_handle(snap_hndl);
601 snap_hndl = NULL;
602 return (-1);
603 }
604
605 int
606 init_template(void)
607 {
608 int fd;
609 int err = 0;
610
611 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
612 if (fd == -1)
613 return (-1);
614
615 /*
616 * For now, zoneadmd doesn't do anything with the contract.
617 * Deliver no events, don't inherit, and allow it to be orphaned.
618 */
619 err |= ct_tmpl_set_critical(fd, 0);
620 err |= ct_tmpl_set_informative(fd, 0);
621 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
762 ct = -1;
763 (void) ct_tmpl_clear(tmpl_fd);
764 (void) close(tmpl_fd);
765 if (waitpid(child, &child_status, 0) != child) {
766 /* unexpected: we must have been signalled */
767 (void) contract_abandon_id(ct);
768 return (-1);
769 }
770 (void) contract_abandon_id(ct);
771 if (WEXITSTATUS(child_status) != 0) {
772 errno = WEXITSTATUS(child_status);
773 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
774 return (-1);
775 }
776
777 return (0);
778 }
779
780 /*
781 * env variable name format
782 * _ZONECFG_{resource name}_{identifying attr. name}_{property name}
783 * Any dashes (-) in the property names are replaced with underscore (_).
784 */
785 static void
786 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
787 {
788 char *p;
789 /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
790 char nm[2 * MAXNAMELEN + 32];
791
792 if (attr == NULL)
793 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
794 name);
795 else
796 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
797 attr, name);
798
799 p = nm;
800 while ((p = strchr(p, '-')) != NULL)
801 *p++ = '_';
802
803 (void) setenv(nm, val, 1);
804 }
805
806 /*
807 * Export zonecfg network and device properties into environment for the boot
808 * and state change hooks.
809 * If debug is true, export the brand hook debug env. variable as well.
810 *
811 * We could export more of the config in the future, as necessary.
812 */
813 static int
814 setup_subproc_env(boolean_t debug)
815 {
816 int res;
817 struct zone_nwiftab ntab;
818 struct zone_devtab dtab;
819 struct zone_attrtab atab;
820 char net_resources[MAXNAMELEN * 2];
821 char dev_resources[MAXNAMELEN * 2];
822
823 /* snap_hndl is null when called through the set_brand_env code path */
824 if (snap_hndl == NULL)
825 return (Z_OK);
826
827 net_resources[0] = '\0';
828 if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
829 goto done;
830
831 while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
832 struct zone_res_attrtab *rap;
833 char *phys;
834
835 phys = ntab.zone_nwif_physical;
836
837 (void) strlcat(net_resources, phys, sizeof (net_resources));
838 (void) strlcat(net_resources, " ", sizeof (net_resources));
839
840 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
841
842 set_zonecfg_env(RSRC_NET, phys, "address",
843 ntab.zone_nwif_address);
844 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
845 ntab.zone_nwif_allowed_address);
846 set_zonecfg_env(RSRC_NET, phys, "defrouter",
847 ntab.zone_nwif_defrouter);
848 set_zonecfg_env(RSRC_NET, phys, "global-nic",
849 ntab.zone_nwif_gnic);
850 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
851 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
852 ntab.zone_nwif_vlan_id);
853
854 for (rap = ntab.zone_nwif_attrp; rap != NULL;
855 rap = rap->zone_res_attr_next)
856 set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
857 rap->zone_res_attr_value);
858 nwifent_free_attrs(&ntab);
859 }
860
861 (void) setenv("_ZONECFG_net_resources", net_resources, 1);
862
863 (void) zonecfg_endnwifent(snap_hndl);
864
865 if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
866 goto done;
867
868 while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
869 struct zone_res_attrtab *rap;
870 char *match;
871
872 match = dtab.zone_dev_match;
873
874 (void) strlcat(dev_resources, match, sizeof (dev_resources));
875 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
876
877 for (rap = dtab.zone_dev_attrp; rap != NULL;
878 rap = rap->zone_res_attr_next)
879 set_zonecfg_env(RSRC_DEV, match,
880 rap->zone_res_attr_name, rap->zone_res_attr_value);
881 }
882
883 (void) zonecfg_enddevent(snap_hndl);
884
885 if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
886 goto done;
887
888 while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
889 set_zonecfg_env("attr", NULL, atab.zone_attr_name,
890 atab.zone_attr_value);
891 }
892
893 (void) zonecfg_endattrent(snap_hndl);
894
895 if (debug)
896 (void) setenv("_ZONEADMD_brand_debug", "1", 1);
897 else
898 (void) setenv("_ZONEADMD_brand_debug", "", 1);
899
900 res = Z_OK;
901
902 done:
903 return (res);
904 }
905
906 void
907 nwifent_free_attrs(struct zone_nwiftab *np)
908 {
909 struct zone_res_attrtab *rap;
910
911 for (rap = np->zone_nwif_attrp; rap != NULL; ) {
912 struct zone_res_attrtab *tp = rap;
913
914 rap = rap->zone_res_attr_next;
915 free(tp);
916 }
917 }
918
919 /*
920 * If retstr is not NULL, the output of the subproc is returned in the str,
921 * otherwise it is output using zerror(). Any memory allocated for retstr
922 * should be freed by the caller.
923 */
924 int
925 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
926 {
927 char buf[1024]; /* arbitrary large amount */
928 char *inbuf;
929 FILE *file;
930 int status;
931 int rd_cnt;
932
933 if (retstr != NULL) {
934 if ((*retstr = malloc(1024)) == NULL) {
935 zerror(zlogp, B_FALSE, "out of memory");
936 return (-1);
937 }
938 inbuf = *retstr;
939 rd_cnt = 0;
940 } else {
941 inbuf = buf;
942 }
943
944 if (setup_subproc_env(debug) != Z_OK) {
945 zerror(zlogp, B_FALSE, "failed to setup environment");
946 return (-1);
947 }
948
949 file = popen(cmdbuf, "r");
950 if (file == NULL) {
951 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
952 return (-1);
953 }
954
955 while (fgets(inbuf, 1024, file) != NULL) {
956 if (retstr == NULL) {
957 if (zlogp != &logsys) {
958 int last = strlen(inbuf) - 1;
959
960 if (inbuf[last] == '\n')
961 inbuf[last] = '\0';
962 zerror(zlogp, B_FALSE, "%s", inbuf);
963 }
964 } else {
965 char *p;
966
967 rd_cnt += 1024 - 1;
968 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
969 zerror(zlogp, B_FALSE, "out of memory");
970 (void) pclose(file);
971 return (-1);
972 }
973
974 *retstr = p;
975 inbuf = *retstr + rd_cnt;
976 }
977 }
978 status = pclose(file);
979
980 if (WIFSIGNALED(status)) {
981 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
982 "signal %d", cmdbuf, WTERMSIG(status));
983 return (-1);
984 }
985 assert(WIFEXITED(status));
986 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
987 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
988 return (-1);
989 }
990 return (WEXITSTATUS(status));
991 }
992
993 /*
994 * Get the path for this zone's init(1M) (or equivalent) process. First look
995 * for a zone-specific init-name attr, then get it from the brand.
996 */
997 static int
998 get_initname(brand_handle_t bh, char *initname, int len)
999 {
1000 struct zone_attrtab a;
1001
1002 bzero(&a, sizeof (a));
1003 (void) strlcpy(a.zone_attr_name, "init-name",
1004 sizeof (a.zone_attr_name));
1005
1006 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1007 (void) strlcpy(initname, a.zone_attr_value, len);
1008 return (0);
1009 }
1010
1011 return (brand_get_initname(bh, initname, len));
1012 }
1013
1014 /*
1015 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
1016 * First look for a zone-specific restart-init attr, then get it from the brand.
1017 */
1018 static boolean_t
1019 restartinit(brand_handle_t bh)
1020 {
1021 struct zone_attrtab a;
1022
1023 bzero(&a, sizeof (a));
1024 (void) strlcpy(a.zone_attr_name, "restart-init",
1025 sizeof (a.zone_attr_name));
1026
1027 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
1028 if (strcmp(a.zone_attr_value, "false") == 0)
1029 return (B_FALSE);
1030 return (B_TRUE);
1031 }
1032
1033 return (brand_restartinit(bh));
1034 }
1035
1036 /*
1037 * Get the app-svc-dependent flag for this zone's init process. This is a
1038 * zone-specific attr which controls the type of contract we create for the
1039 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
1040 * set, so that when any service which is in the same contract exits, the init
1041 * application will be terminated.
1042 */
1043 static boolean_t
1044 is_app_svc_dep(brand_handle_t bh)
1045 {
1046 struct zone_attrtab a;
1047
1048 bzero(&a, sizeof (a));
1049 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
1050 sizeof (a.zone_attr_name));
1051
1052 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
1053 strcmp(a.zone_attr_value, "true") == 0) {
1054 return (B_TRUE);
1055 }
1056
1057 return (B_FALSE);
1058 }
1059
1060 static int
1061 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
1062 {
1063 zoneid_t zoneid;
1064 struct stat st;
1065 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
1066 char nbootargs[BOOTARGS_MAX];
1067 char cmdbuf[MAXPATHLEN];
1068 fs_callback_t cb;
1069 brand_handle_t bh;
1070 zone_iptype_t iptype;
1071 dladm_status_t status;
1072 char errmsg[DLADM_STRSIZE];
1073 int err;
1074 boolean_t restart_init;
1075 boolean_t app_svc_dep;
1076
1077 if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1078 return (-1);
1079
1080 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1081 zerror(zlogp, B_TRUE, "unable to get zoneid");
1082 goto bad;
1083 }
1084
1085 cb.zlogp = zlogp;
1086 cb.zoneid = zoneid;
1087 cb.mount_cmd = B_FALSE;
1088
1089 /* Get a handle to the brand info for this zone */
1090 if ((bh = brand_open(brand_name)) == NULL) {
1091 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1092 goto bad;
1093 }
1094
1095 /*
1096 * Get the list of filesystems to mount from the brand
1097 * configuration. These mounts are done via a thread that will
1100 */
1101 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1102 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1103 brand_close(bh);
1104 goto bad;
1105 }
1106
1107 /*
1108 * Get the brand's boot callback if it exists.
1109 */
1110 (void) strcpy(cmdbuf, EXEC_PREFIX);
1111 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1112 sizeof (cmdbuf) - EXEC_LEN) != 0) {
1113 zerror(zlogp, B_FALSE,
1114 "unable to determine branded zone's boot callback");
1115 brand_close(bh);
1116 goto bad;
1117 }
1118
1119 /* Get the path for this zone's init(1M) (or equivalent) process. */
1120 if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
1121 zerror(zlogp, B_FALSE,
1122 "unable to determine zone's init(1M) location");
1123 brand_close(bh);
1124 goto bad;
1125 }
1126
1127 /* See if we should restart init if it dies. */
1128 restart_init = restartinit(bh);
1129
1130 /*
1131 * See if we need to setup contract dependencies between the zone's
1132 * primary application and any of its services.
1133 */
1134 app_svc_dep = is_app_svc_dep(bh);
1135
1136 brand_close(bh);
1137
1138 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1139 if (err != Z_OK)
1140 goto bad;
1141
1142 assert(init_file[0] != '\0');
1143
1144 /*
1145 * Try to anticipate possible problems: If possible, make sure init is
1146 * executable.
1147 */
1148 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1149 zerror(zlogp, B_FALSE, "unable to determine zone root");
1150 goto bad;
1151 }
1152
1153 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1154
1155 if (lstat(initpath, &st) == -1) {
1156 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1157 goto bad;
1158 }
1159
1160 if ((st.st_mode & S_IFMT) == S_IFLNK) {
1161 /* symlink, we'll have to wait and resolve when we boot */
1162 } else if ((st.st_mode & S_IXUSR) == 0) {
1163 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1164 goto bad;
1165 }
1166
1167 /*
1168 * Exclusive stack zones interact with the dlmgmtd running in the
1169 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
1170 * booting, and loads its datalinks from the zone's datalink
1171 * configuration file.
1172 */
1173 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1174 status = dladm_zone_boot(dld_handle, zoneid);
1175 if (status != DLADM_STATUS_OK) {
1176 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1177 " %s", dladm_status2str(status, errmsg));
1178 goto bad;
1179 }
1180 }
1181
1182 /*
1183 * If there is a brand 'boot' callback, execute it now to give the
1184 * brand one last chance to do any additional setup before the zone
1185 * is booted.
1186 */
1187 if ((strlen(cmdbuf) > EXEC_LEN) &&
1188 (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
1189 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1190 goto bad;
1191 }
1192
1193 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1194 zerror(zlogp, B_TRUE, "could not set zone boot file");
1195 goto bad;
1196 }
1197
1198 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1199 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1200 goto bad;
1201 }
1202
1203 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1204 NULL, 0) == -1) {
1205 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1206 goto bad;
1207 }
1208
1209 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1210 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1211 zerror(zlogp, B_TRUE, "could not set zone app-die");
1212 goto bad;
1213 }
1214
1215 /*
1216 * Inform zonestatd of a new zone so that it can install a door for
1217 * the zone to contact it.
1218 */
1219 notify_zonestatd(zone_id);
1220
1221 if (zone_boot(zoneid) == -1) {
1222 zerror(zlogp, B_TRUE, "unable to boot zone");
1223 goto bad;
1224 }
1225
1226 if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0)
1227 goto bad;
1228
1229 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1230 create_log_thread(zlogp, zone_id);
1231
1232 /* Startup a thread to perform memory capping for the zone. */
1233 create_mcap_thread(zlogp, zone_id);
1234
1235 return (0);
1236
1237 bad:
1238 /*
1239 * If something goes wrong, we up the zones's state to the target
1240 * state, RUNNING, and then invoke the hook as if we're halting.
1241 */
1242 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
1243
1244 return (-1);
1245 }
1246
1247 static int
1248 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
1249 boolean_t debug)
1250 {
1251 int err;
1252
1253 /*
1254 * If performing a scratch zone unmount then do not call the
1255 * state change hooks.
1256 */
1257 if (unmount_cmd == B_FALSE &&
1258 brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
1259 return (-1);
1260
1261 /* Shutting down, stop the memcap thread */
1262 destroy_mcap_thread();
1263
1264 if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
1265 if (!bringup_failure_recovery)
1266 zerror(zlogp, B_FALSE, "unable to destroy zone");
1267 destroy_log_thread();
1268 return (-1);
1269 }
1270
1271 /* Shut down is done, stop the log thread */
1272 destroy_log_thread();
1273
1274 if (unmount_cmd == B_FALSE &&
1275 brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
1276 return (-1);
1277
1278 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1279 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1280 zonecfg_strerror(err));
1281
1282 zonecfg_fini_handle(snap_hndl);
1283 snap_hndl = NULL;
1284
1285 return (0);
1286 }
1287
1288 static int
1289 zone_graceful_shutdown(zlog_t *zlogp)
1290 {
1291 zoneid_t zoneid;
1292 pid_t child;
1293 char cmdbuf[MAXPATHLEN];
1294 brand_handle_t bh = NULL;
1295 ctid_t ct;
1296 int tmpl_fd;
1297 int child_status;
1298
1299 if (shutdown_in_progress) {
1300 zerror(zlogp, B_FALSE, "shutdown already in progress");
1301 return (-1);
1302 }
1303
1304 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1480 return;
1481
1482 (void) write(fd, buf, strlen(buf));
1483
1484 (void) close(fd);
1485 }
1486
1487 /*
1488 * The main routine for the door server that deals with zone state transitions.
1489 */
1490 /* ARGSUSED */
1491 static void
1492 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1493 uint_t n_desc)
1494 {
1495 ucred_t *uc = NULL;
1496 const priv_set_t *eset;
1497
1498 zone_state_t zstate;
1499 zone_cmd_t cmd;
1500 boolean_t debug;
1501 int init_status;
1502 zone_cmd_arg_t *zargp;
1503
1504 boolean_t kernelcall = B_TRUE;
1505
1506 int rval = -1;
1507 uint64_t uniqid;
1508 zoneid_t zoneid = -1;
1509 zlog_t zlog;
1510 zlog_t *zlogp;
1511 zone_cmd_rval_t *rvalp;
1512 size_t rlen = getpagesize(); /* conservative */
1513 fs_callback_t cb;
1514 brand_handle_t bh;
1515 boolean_t wait_shut = B_FALSE;
1516
1517 /* LINTED E_BAD_PTR_CAST_ALIGN */
1518 zargp = (zone_cmd_arg_t *)args;
1519
1520 /*
1521 * When we get the door unref message, we've fdetach'd the door, and
1522 * it is time for us to shut down zoneadmd.
1523 */
1524 if (zargp == DOOR_UNREF_DATA) {
1534
1535 rvalp = alloca(rlen);
1536 bzero(rvalp, rlen);
1537 zlog.logfile = NULL;
1538 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1539 zlog.buf = rvalp->errbuf;
1540 zlog.log = zlog.buf;
1541 /* defer initialization of zlog.locale until after credential check */
1542 zlogp = &zlog;
1543
1544 if (alen != sizeof (zone_cmd_arg_t)) {
1545 /*
1546 * This really shouldn't be happening.
1547 */
1548 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1549 "unexpected (expected %d bytes)", alen,
1550 sizeof (zone_cmd_arg_t));
1551 goto out;
1552 }
1553 cmd = zargp->cmd;
1554 debug = zargp->debug;
1555 init_status = zargp->status;
1556
1557 if (door_ucred(&uc) != 0) {
1558 zerror(&logsys, B_TRUE, "door_ucred");
1559 goto out;
1560 }
1561 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1562 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1563 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1564 ucred_geteuid(uc) != 0)) {
1565 zerror(&logsys, B_FALSE, "insufficient privileges");
1566 goto out;
1567 }
1568
1569 kernelcall = ucred_getpid(uc) == 0;
1570
1571 /*
1572 * This is safe because we only use a zlog_t throughout the
1573 * duration of a door call; i.e., by the time the pointer
1574 * might become invalid, the door call would be over.
1642 */
1643 if (zstate == ZONE_STATE_INCOMPLETE &&
1644 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1645 zstate = ZONE_STATE_INSTALLED;
1646
1647 switch (zstate) {
1648 case ZONE_STATE_CONFIGURED:
1649 case ZONE_STATE_INCOMPLETE:
1650 /*
1651 * Not our area of expertise; we just print a nice message
1652 * and die off.
1653 */
1654 zerror(zlogp, B_FALSE,
1655 "%s operation is invalid for zones in state '%s'",
1656 z_cmd_name(cmd), zone_state_str(zstate));
1657 break;
1658
1659 case ZONE_STATE_INSTALLED:
1660 switch (cmd) {
1661 case Z_READY:
1662 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
1663 if (rval == 0)
1664 eventstream_write(Z_EVT_ZONE_READIED);
1665 zcons_statechanged();
1666 break;
1667 case Z_BOOT:
1668 case Z_FORCEBOOT:
1669 eventstream_write(Z_EVT_ZONE_BOOTING);
1670 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1671 debug)) == 0) {
1672 rval = zone_bootup(zlogp, zargp->bootbuf,
1673 zstate, debug);
1674 }
1675 audit_put_record(zlogp, uc, rval, "boot");
1676 zcons_statechanged();
1677 if (rval != 0) {
1678 bringup_failure_recovery = B_TRUE;
1679 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1680 zstate, debug);
1681 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1682 }
1683 break;
1684 case Z_SHUTDOWN:
1685 case Z_HALT:
1686 if (kernelcall) /* Invalid; can't happen */
1687 abort();
1688 /*
1689 * We could have two clients racing to halt this
1690 * zone; the second client loses, but his request
1691 * doesn't fail, since the zone is now in the desired
1692 * state.
1693 */
1694 zerror(zlogp, B_FALSE, "zone is already halted");
1695 rval = 0;
1696 break;
1697 case Z_REBOOT:
1698 if (kernelcall) /* Invalid; can't happen */
1699 abort();
1700 zerror(zlogp, B_FALSE, "%s operation is invalid "
1712 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1713 break;
1714 case Z_MOUNT:
1715 case Z_FORCEMOUNT:
1716 if (kernelcall) /* Invalid; can't happen */
1717 abort();
1718 if (!zone_isnative && !zone_iscluster &&
1719 !zone_islabeled) {
1720 /*
1721 * -U mounts the zone without lofs mounting
1722 * zone file systems back into the scratch
1723 * zone. This is required when mounting
1724 * non-native branded zones.
1725 */
1726 (void) strlcpy(zargp->bootbuf, "-U",
1727 BOOTARGS_MAX);
1728 }
1729
1730 rval = zone_ready(zlogp,
1731 strcmp(zargp->bootbuf, "-U") == 0 ?
1732 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
1733 if (rval != 0)
1734 break;
1735
1736 eventstream_write(Z_EVT_ZONE_READIED);
1737
1738 /*
1739 * Get a handle to the default brand info.
1740 * We must always use the default brand file system
1741 * list when mounting the zone.
1742 */
1743 if ((bh = brand_open(default_brand)) == NULL) {
1744 rval = -1;
1745 break;
1746 }
1747
1748 /*
1749 * Get the list of filesystems to mount from
1750 * the brand configuration. These mounts are done
1751 * via a thread that will enter the zone, so they
1752 * are done from within the context of the zone.
1774 abort();
1775 zerror(zlogp, B_FALSE, "zone is already unmounted");
1776 rval = 0;
1777 break;
1778 }
1779 break;
1780
1781 case ZONE_STATE_READY:
1782 switch (cmd) {
1783 case Z_READY:
1784 /*
1785 * We could have two clients racing to ready this
1786 * zone; the second client loses, but his request
1787 * doesn't fail, since the zone is now in the desired
1788 * state.
1789 */
1790 zerror(zlogp, B_FALSE, "zone is already ready");
1791 rval = 0;
1792 break;
1793 case Z_BOOT:
1794 case Z_FORCEBOOT:
1795 (void) strlcpy(boot_args, zargp->bootbuf,
1796 sizeof (boot_args));
1797 eventstream_write(Z_EVT_ZONE_BOOTING);
1798 rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
1799 debug);
1800 audit_put_record(zlogp, uc, rval, "boot");
1801 zcons_statechanged();
1802 if (rval != 0) {
1803 bringup_failure_recovery = B_TRUE;
1804 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1805 zstate, debug);
1806 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1807 }
1808 boot_args[0] = '\0';
1809 break;
1810 case Z_HALT:
1811 if (kernelcall) /* Invalid; can't happen */
1812 abort();
1813 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
1814 debug)) != 0)
1815 break;
1816 zcons_statechanged();
1817 eventstream_write(Z_EVT_ZONE_HALTED);
1818 break;
1819 case Z_SHUTDOWN:
1820 case Z_REBOOT:
1821 case Z_NOTE_UNINSTALLING:
1822 case Z_MOUNT:
1823 case Z_FORCEMOUNT:
1824 case Z_UNMOUNT:
1825 if (kernelcall) /* Invalid; can't happen */
1826 abort();
1827 zerror(zlogp, B_FALSE, "%s operation is invalid "
1828 "for zones in state '%s'", z_cmd_name(cmd),
1829 zone_state_str(zstate));
1830 rval = -1;
1831 break;
1832 }
1833 break;
1834
1835 case ZONE_STATE_MOUNTED:
1836 switch (cmd) {
1837 case Z_UNMOUNT:
1838 if (kernelcall) /* Invalid; can't happen */
1839 abort();
1840 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
1841 if (rval == 0) {
1842 eventstream_write(Z_EVT_ZONE_HALTED);
1843 (void) sema_post(&scratch_sem);
1844 }
1845 break;
1846 default:
1847 if (kernelcall) /* Invalid; can't happen */
1848 abort();
1849 zerror(zlogp, B_FALSE, "%s operation is invalid "
1850 "for zones in state '%s'", z_cmd_name(cmd),
1851 zone_state_str(zstate));
1852 rval = -1;
1853 break;
1854 }
1855 break;
1856
1857 case ZONE_STATE_RUNNING:
1858 case ZONE_STATE_SHUTTING_DOWN:
1859 case ZONE_STATE_DOWN:
1860 switch (cmd) {
1861 case Z_READY:
1862 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
1863 debug)) != 0)
1864 break;
1865 zcons_statechanged();
1866 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1867 debug)) == 0)
1868 eventstream_write(Z_EVT_ZONE_READIED);
1869 else
1870 eventstream_write(Z_EVT_ZONE_HALTED);
1871 break;
1872 case Z_BOOT:
1873 case Z_FORCEBOOT:
1874 /*
1875 * We could have two clients racing to boot this
1876 * zone; the second client loses, but his request
1877 * doesn't fail, since the zone is now in the desired
1878 * state.
1879 */
1880 zerror(zlogp, B_FALSE, "zone is already booted");
1881 rval = 0;
1882 break;
1883 case Z_HALT:
1884 if (kernelcall) {
1885 log_init_exit(init_status);
1886 } else {
1887 log_init_exit(-1);
1888 }
1889 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
1890 debug)) != 0)
1891 break;
1892 eventstream_write(Z_EVT_ZONE_HALTED);
1893 zcons_statechanged();
1894 break;
1895 case Z_REBOOT:
1896 (void) strlcpy(boot_args, zargp->bootbuf,
1897 sizeof (boot_args));
1898 eventstream_write(Z_EVT_ZONE_REBOOTING);
1899 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
1900 debug)) != 0) {
1901 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1902 boot_args[0] = '\0';
1903 break;
1904 }
1905 zcons_statechanged();
1906 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
1907 debug)) != 0) {
1908 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1909 boot_args[0] = '\0';
1910 break;
1911 }
1912 rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
1913 debug);
1914 audit_put_record(zlogp, uc, rval, "reboot");
1915 if (rval != 0) {
1916 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1917 zstate, debug);
1918 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1919 }
1920 boot_args[0] = '\0';
1921 break;
1922 case Z_SHUTDOWN:
1923 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1924 wait_shut = B_TRUE;
1925 }
1926 break;
1927 case Z_NOTE_UNINSTALLING:
1928 case Z_MOUNT:
1929 case Z_FORCEMOUNT:
1930 case Z_UNMOUNT:
1931 zerror(zlogp, B_FALSE, "%s operation is invalid "
1932 "for zones in state '%s'", z_cmd_name(cmd),
1933 zone_state_str(zstate));
1934 rval = -1;
1935 break;
1936 }
1937 break;
1938 default:
1939 abort();
1940 }
1941
1942 /*
1943 * Because the state of the zone may have changed, we make sure
1944 * to wake the console poller, which is in charge of initiating
1945 * the shutdown procedure as necessary.
1946 */
1947 eventstream_write(Z_EVT_NULL);
1948
1949 out:
2118 zonecfg_get_snapshot_handle(zone_name,
2119 snap_hndl) != Z_OK) {
2120 zerror(zlogp, B_FALSE, "recovering "
2121 "zone configuration handle");
2122 goto out;
2123 }
2124 }
2125 }
2126
2127 (void) fdetach(zone_door_path);
2128 (void) close(doorfd);
2129 goto top;
2130 }
2131 ret = 0;
2132 out:
2133 (void) close(doorfd);
2134 return (ret);
2135 }
2136
2137 /*
2138 * Run the query hook with the 'env' parameter. It should return a
2139 * string of tab-delimited key-value pairs, each of which should be set
2140 * in the environment.
2141 *
2142 * Because the env_vars string values become part of the environment, the
2143 * string is static and we don't free it.
2144 *
2145 * This function is always called before zoneadmd forks and makes itself
2146 * exclusive, so it is possible there could more than one instance of zoneadmd
2147 * running in parallel at this point. Thus, we have no zonecfg snapshot and
2148 * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
2149 * need any zonecfg info to query for a brand-specific env value.
2150 */
2151 static int
2152 set_brand_env(zlog_t *zlogp)
2153 {
2154 int ret = 0;
2155 static char *env_vars = NULL;
2156 char buf[2 * MAXPATHLEN];
2157
2158 if (query_hook[0] == '\0' || env_vars != NULL)
2159 return (0);
2160
2161 if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
2162 return (-1);
2163
2164 if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
2165 return (-1);
2166
2167 if (env_vars != NULL) {
2168 char *sp;
2169
2170 sp = strtok(env_vars, "\t");
2171 while (sp != NULL) {
2172 if (putenv(sp) != 0) {
2173 ret = -1;
2174 break;
2175 }
2176 sp = strtok(NULL, "\t");
2177 }
2178 }
2179
2180 return (ret);
2181 }
2182
2183 /*
2184 * Setup the brand's pre and post state change callbacks, as well as the
2185 * query callback, if any of these exist.
2186 */
2187 static int
2188 brand_callback_init(brand_handle_t bh, char *zone_name)
2189 {
2190 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2191 sizeof (pre_statechg_hook));
2192
2193 if (brand_get_prestatechange(bh, zone_name, zonepath,
2194 pre_statechg_hook + EXEC_LEN,
2195 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2196 return (-1);
2197
2198 if (strlen(pre_statechg_hook) <= EXEC_LEN)
2199 pre_statechg_hook[0] = '\0';
2200
2201 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2202 sizeof (post_statechg_hook));
2203
2399 */
2400 if ((privset = priv_allocset()) == NULL) {
2401 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2402 return (1);
2403 }
2404
2405 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2406 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2407 priv_freeset(privset);
2408 return (1);
2409 }
2410
2411 if (priv_isfullset(privset) == B_FALSE) {
2412 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2413 "run this command (all privs required)");
2414 priv_freeset(privset);
2415 return (1);
2416 }
2417 priv_freeset(privset);
2418
2419 if (set_brand_env(zlogp) != 0) {
2420 zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
2421 return (1);
2422 }
2423
2424 if (mkzonedir(zlogp) != 0)
2425 return (1);
2426
2427 /*
2428 * Pre-fork: setup shared state
2429 */
2430 if ((shstate = (void *)mmap(NULL, shstatelen,
2431 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2432 MAP_FAILED) {
2433 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2434 return (1);
2435 }
2436 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2437 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2438 (void) munmap((char *)shstate, shstatelen);
2439 return (1);
2440 }
2441 shstate->log.logfile = NULL;
2442 shstate->log.buflen = shstatelen - sizeof (*shstate);
2443 shstate->log.loglen = shstate->log.buflen;
|
103 #include <libdladm.h>
104 #include <sys/dls_mgmt.h>
105 #include <libscf.h>
106
107 #include <libzonecfg.h>
108 #include <zonestat_impl.h>
109 #include "zoneadmd.h"
110
111 static char *progname;
112 char *zone_name; /* zone which we are managing */
113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 char zonepath[MAXNAMELEN];
115 char pool_name[MAXNAMELEN];
116 char default_brand[MAXNAMELEN];
117 char brand_name[MAXNAMELEN];
118 boolean_t zone_isnative;
119 boolean_t zone_iscluster;
120 boolean_t zone_islabeled;
121 boolean_t shutdown_in_progress;
122 static zoneid_t zone_id;
123 dladm_handle_t dld_handle = NULL;
124
125 static char pre_statechg_hook[2 * MAXPATHLEN];
126 static char post_statechg_hook[2 * MAXPATHLEN];
127 char query_hook[2 * MAXPATHLEN];
128
129 zlog_t logsys;
130
131 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
133
134 static sema_t scratch_sem; /* for scratch zones */
135
136 static char zone_door_path[MAXPATHLEN];
137 static int zone_door = -1;
138
139 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
141
142 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
144 #endif
145
146 #define DEFAULT_LOCALE "C"
276
277 /*
278 * Since illumos boot arguments are getopt(3c) compatible (see kernel(1m)), we
279 * put the arguments into an argv style array, use getopt to process them,
280 * and put the resultant argument string back into outargs. Non-native brands
281 * may support alternate forms of boot arguments so we must handle that as well.
282 *
283 * During the filtering, we pull out any arguments which are truly "boot"
284 * arguments, leaving only those which are to be passed intact to the
285 * progenitor process. The one we support at the moment is -i, which
286 * indicates to the kernel which program should be launched as 'init'.
287 *
288 * Except for Z_OK, all other return values are treated as fatal.
289 */
290 static int
291 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
292 char *init_file)
293 {
294 int argc = 0, argc_save;
295 int i;
296 int err;
297 char *arg, *lasts, **argv = NULL, **argv_save;
298 char zonecfg_args[BOOTARGS_MAX];
299 char scratchargs[BOOTARGS_MAX], *sargs;
300 char scratchopt[3];
301 char c;
302
303 bzero(outargs, BOOTARGS_MAX);
304
305 /*
306 * If the user didn't specify transient boot arguments, check
307 * to see if there were any specified in the zone configuration,
308 * and use them if applicable.
309 */
310 if (inargs == NULL || inargs[0] == '\0') {
311 zone_dochandle_t handle;
312 if ((handle = zonecfg_init_handle()) == NULL) {
313 zerror(zlogp, B_TRUE,
314 "getting zone configuration handle");
315 return (Z_BAD_HANDLE);
316 }
317 err = zonecfg_get_snapshot_handle(zone_name, handle);
318 if (err != Z_OK) {
319 zerror(zlogp, B_FALSE,
320 "invalid configuration snapshot");
321 zonecfg_fini_handle(handle);
322 return (Z_BAD_HANDLE);
323 }
324
325 bzero(zonecfg_args, sizeof (zonecfg_args));
326 (void) zonecfg_get_bootargs(handle, zonecfg_args,
327 sizeof (zonecfg_args));
328 inargs = zonecfg_args;
329 zonecfg_fini_handle(handle);
330 }
331
332 if (strlen(inargs) >= BOOTARGS_MAX) {
333 zerror(zlogp, B_FALSE, "boot argument string too long");
334 return (Z_INVAL);
335 }
336
337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 sargs = scratchargs;
339 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
340 sargs = NULL;
341 argc++;
342 }
343
344 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
345 zerror(zlogp, B_FALSE, "memory allocation failed");
346 return (Z_NOMEM);
347 }
348
349 argv_save = argv;
459 * since anyone can open any UNIX domain socket, regardless of
460 * its file system permissions. Sigh...
461 */
462 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
463 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
464 return (-1);
465 }
466 /* paranoia */
467 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
468 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
469 return (-1);
470 }
471 (void) chmod(ZONES_TMPDIR, S_IRWXU);
472 return (0);
473 }
474
475 /*
476 * Run the brand's pre-state change callback, if it exists.
477 */
478 static int
479 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
480 {
481 char cmdbuf[2 * MAXPATHLEN];
482 const char *altroot;
483
484 if (pre_statechg_hook[0] == '\0')
485 return (0);
486
487 altroot = zonecfg_get_root();
488 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
489 state, cmd, altroot) > sizeof (cmdbuf))
490 return (-1);
491
492 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
493 return (-1);
494
495 return (0);
496 }
497
498 /*
499 * Run the brand's post-state change callback, if it exists.
500 */
501 static int
502 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
503 {
504 char cmdbuf[2 * MAXPATHLEN];
505 const char *altroot;
506
507 if (post_statechg_hook[0] == '\0')
508 return (0);
509
510 altroot = zonecfg_get_root();
511 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
512 state, cmd, altroot) > sizeof (cmdbuf))
513 return (-1);
514
515 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
516 return (-1);
517
518 return (0);
519 }
520
521 /*
522 * Notify zonestatd of the new zone. If zonestatd is not running, this
523 * will do nothing.
524 */
525 static void
526 notify_zonestatd(zoneid_t zoneid)
527 {
528 int cmd[2];
529 int fd;
530 door_arg_t params;
531
532 fd = open(ZS_DOOR_PATH, O_RDONLY);
533 if (fd < 0)
534 return;
535
536 cmd[0] = ZSD_CMD_NEW_ZONE;
537 cmd[1] = zoneid;
538 params.data_ptr = (char *)&cmd;
539 params.data_size = sizeof (cmd);
540 params.desc_ptr = NULL;
541 params.desc_num = 0;
542 params.rbuf = NULL;
543 params.rsize = NULL;
544 (void) door_call(fd, ¶ms);
545 (void) close(fd);
546 }
547
548 /*
549 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
550 * 'true' if this is being invoked as part of the processing for the "mount"
551 * subcommand.
552 */
553 static int
554 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
555 {
556 int err;
557
558 if (!ALT_MOUNT(mount_cmd) &&
559 brand_prestatechg(zlogp, zstate, Z_READY) != 0)
560 return (-1);
561
562 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
563 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
564 zonecfg_strerror(err));
565 goto bad;
566 }
567
568 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
569 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
570 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
571 zonecfg_strerror(err));
572 goto bad;
573 }
574 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
575 bringup_failure_recovery = B_TRUE;
576 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
577 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
578 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
579 zonecfg_strerror(err));
580 goto bad;
581 }
582
583 if (!ALT_MOUNT(mount_cmd) &&
584 brand_poststatechg(zlogp, zstate, Z_READY) != 0)
585 goto bad;
586
587 return (0);
588
589 bad:
590 /*
591 * If something goes wrong, we up the zones's state to the target
592 * state, READY, and then invoke the hook as if we're halting.
593 */
594 if (!ALT_MOUNT(mount_cmd))
595 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
596 return (-1);
597 }
598
599 int
600 init_template(void)
601 {
602 int fd;
603 int err = 0;
604
605 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
606 if (fd == -1)
607 return (-1);
608
609 /*
610 * For now, zoneadmd doesn't do anything with the contract.
611 * Deliver no events, don't inherit, and allow it to be orphaned.
612 */
613 err |= ct_tmpl_set_critical(fd, 0);
614 err |= ct_tmpl_set_informative(fd, 0);
615 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
756 ct = -1;
757 (void) ct_tmpl_clear(tmpl_fd);
758 (void) close(tmpl_fd);
759 if (waitpid(child, &child_status, 0) != child) {
760 /* unexpected: we must have been signalled */
761 (void) contract_abandon_id(ct);
762 return (-1);
763 }
764 (void) contract_abandon_id(ct);
765 if (WEXITSTATUS(child_status) != 0) {
766 errno = WEXITSTATUS(child_status);
767 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
768 return (-1);
769 }
770
771 return (0);
772 }
773
774 /*
775 * env variable name format
776 * _ZONECFG;{resource name};{identifying attr. name};{property name}
777 */
778 static void
779 set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
780 {
781 char *p;
782 /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
783 char nm[2 * MAXNAMELEN + 32];
784
785 if (attr == NULL)
786 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
787 name);
788 else
789 (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
790 attr, name);
791
792 p = nm;
793 while ((p = strchr(p, '-')) != NULL)
794 *p++ = '_';
795
796 (void) setenv(nm, val, 1);
797 }
798
799 /*
800 * Export zonecfg network and device properties into environment for the boot
801 * and state change hooks.
802 * If debug is true, export the brand hook debug env. variable as well.
803 *
804 * We could export more of the config in the future, as necessary.
805 */
806 static int
807 setup_subproc_env()
808 {
809 int res;
810 zone_dochandle_t handle;
811 struct zone_nwiftab ntab;
812 struct zone_devtab dtab;
813 char net_resources[MAXNAMELEN * 2];
814 char dev_resources[MAXNAMELEN * 2];
815
816 if ((handle = zonecfg_init_handle()) == NULL)
817 exit(Z_NOMEM);
818
819 if ((res = zonecfg_get_handle(zone_name, handle)) != Z_OK)
820 goto done;
821
822 if ((res = zonecfg_setnwifent(handle)) != Z_OK)
823 goto done;
824
825 while (zonecfg_getnwifent(handle, &ntab) == Z_OK) {
826 struct zone_res_attrtab *rap;
827 char *phys;
828
829 phys = ntab.zone_nwif_physical;
830
831 (void) strlcat(net_resources, phys, sizeof (net_resources));
832 (void) strlcat(net_resources, " ", sizeof (net_resources));
833
834 set_zonecfg_env(RSRC_NET, phys, "physical", phys);
835
836 set_zonecfg_env(RSRC_NET, phys, "address",
837 ntab.zone_nwif_address);
838 set_zonecfg_env(RSRC_NET, phys, "allowed-address",
839 ntab.zone_nwif_allowed_address);
840 set_zonecfg_env(RSRC_NET, phys, "defrouter",
841 ntab.zone_nwif_defrouter);
842 set_zonecfg_env(RSRC_NET, phys, "global-nic",
843 ntab.zone_nwif_gnic);
844 set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
845 set_zonecfg_env(RSRC_NET, phys, "vlan-id",
846 ntab.zone_nwif_vlan_id);
847
848 for (rap = ntab.zone_nwif_attrp; rap != NULL;
849 rap = rap->zone_res_attr_next)
850 set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
851 rap->zone_res_attr_value);
852 }
853
854 (void) zonecfg_endnwifent(handle);
855
856 if ((res = zonecfg_setdevent(handle)) != Z_OK)
857 goto done;
858
859 while (zonecfg_getdevent(handle, &dtab) == Z_OK) {
860 struct zone_res_attrtab *rap;
861 char *match;
862
863 match = dtab.zone_dev_match;
864
865 (void) strlcat(dev_resources, match, sizeof (dev_resources));
866 (void) strlcat(dev_resources, " ", sizeof (dev_resources));
867
868 for (rap = dtab.zone_dev_attrp; rap != NULL;
869 rap = rap->zone_res_attr_next)
870 set_zonecfg_env(RSRC_DEV, match,
871 rap->zone_res_attr_name, rap->zone_res_attr_value);
872 }
873
874 (void) zonecfg_enddevent(handle);
875
876 res = Z_OK;
877
878 done:
879 zonecfg_fini_handle(handle);
880 return (res);
881 }
882
883 /*
884 * If retstr is not NULL, the output of the subproc is returned in the str,
885 * otherwise it is output using zerror(). Any memory allocated for retstr
886 * should be freed by the caller.
887 */
888 int
889 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
890 {
891 char buf[1024]; /* arbitrary large amount */
892 char *inbuf;
893 FILE *file;
894 int status;
895 int rd_cnt;
896
897 if (retstr != NULL) {
898 if ((*retstr = malloc(1024)) == NULL) {
899 zerror(zlogp, B_FALSE, "out of memory");
900 return (-1);
901 }
902 inbuf = *retstr;
903 rd_cnt = 0;
904 } else {
905 inbuf = buf;
906 }
907
908 if (setup_subproc_env() != Z_OK) {
909 zerror(zlogp, B_FALSE, "failed to setup environment");
910 return (-1);
911 }
912
913 file = popen(cmdbuf, "r");
914 if (file == NULL) {
915 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
916 return (-1);
917 }
918
919 while (fgets(inbuf, 1024, file) != NULL) {
920 if (retstr == NULL) {
921 if (zlogp != &logsys)
922 zerror(zlogp, B_FALSE, "%s", inbuf);
923 } else {
924 char *p;
925
926 rd_cnt += 1024 - 1;
927 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
928 zerror(zlogp, B_FALSE, "out of memory");
929 (void) pclose(file);
930 return (-1);
931 }
932
933 *retstr = p;
934 inbuf = *retstr + rd_cnt;
935 }
936 }
937 status = pclose(file);
938
939 if (WIFSIGNALED(status)) {
940 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
941 "signal %d", cmdbuf, WTERMSIG(status));
942 return (-1);
943 }
944 assert(WIFEXITED(status));
945 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
946 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
947 return (-1);
948 }
949 return (WEXITSTATUS(status));
950 }
951
952 /*
953 * Get the app-svc-dependent flag for this zone's init process. This is a
954 * zone-specific attr which controls the type of contract we create for the
955 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
956 * set, so that when any service which is in the same contract exits, the init
957 * application will be terminated.
958 *
959 * We use the global "snap_hndl", so no parameters get passed here.
960 */
961 static boolean_t
962 is_app_svc_dep(void)
963 {
964 struct zone_attrtab a;
965
966 bzero(&a, sizeof (a));
967 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
968 sizeof (a.zone_attr_name));
969
970 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
971 strcmp(a.zone_attr_value, "true") == 0) {
972 return (B_TRUE);
973 }
974
975 return (B_FALSE);
976 }
977
978 static int
979 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
980 {
981 zoneid_t zoneid;
982 struct stat st;
983 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
984 char nbootargs[BOOTARGS_MAX];
985 char cmdbuf[MAXPATHLEN];
986 fs_callback_t cb;
987 brand_handle_t bh;
988 zone_iptype_t iptype;
989 dladm_status_t status;
990 char errmsg[DLADM_STRSIZE];
991 int err;
992 boolean_t restart_init;
993 boolean_t app_svc_dep;
994
995 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
996 return (-1);
997
998 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
999 zerror(zlogp, B_TRUE, "unable to get zoneid");
1000 goto bad;
1001 }
1002
1003 cb.zlogp = zlogp;
1004 cb.zoneid = zoneid;
1005 cb.mount_cmd = B_FALSE;
1006
1007 /* Get a handle to the brand info for this zone */
1008 if ((bh = brand_open(brand_name)) == NULL) {
1009 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1010 goto bad;
1011 }
1012
1013 /*
1014 * Get the list of filesystems to mount from the brand
1015 * configuration. These mounts are done via a thread that will
1018 */
1019 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
1020 zerror(zlogp, B_FALSE, "unable to mount filesystems");
1021 brand_close(bh);
1022 goto bad;
1023 }
1024
1025 /*
1026 * Get the brand's boot callback if it exists.
1027 */
1028 (void) strcpy(cmdbuf, EXEC_PREFIX);
1029 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1030 sizeof (cmdbuf) - EXEC_LEN) != 0) {
1031 zerror(zlogp, B_FALSE,
1032 "unable to determine branded zone's boot callback");
1033 brand_close(bh);
1034 goto bad;
1035 }
1036
1037 /* Get the path for this zone's init(1M) (or equivalent) process. */
1038 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
1039 zerror(zlogp, B_FALSE,
1040 "unable to determine zone's init(1M) location");
1041 brand_close(bh);
1042 goto bad;
1043 }
1044
1045 /* See if this zone's brand should restart init if it dies. */
1046 restart_init = brand_restartinit(bh);
1047
1048 /*
1049 * See if we need to setup contract dependencies between the zone's
1050 * primary application and any of its services.
1051 */
1052 app_svc_dep = is_app_svc_dep();
1053
1054 brand_close(bh);
1055
1056 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
1057 if (err != Z_OK)
1058 goto bad;
1059
1060 assert(init_file[0] != '\0');
1061
1062 /*
1063 * Try to anticipate possible problems: If possible, make sure init is
1064 * executable.
1065 */
1066 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
1067 zerror(zlogp, B_FALSE, "unable to determine zone root");
1068 goto bad;
1069 }
1070
1071 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
1072
1073 if (lstat(initpath, &st) == -1) {
1074 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
1075 goto bad;
1076 }
1077
1078 /*
1079 * If a symlink, we'll have to wait and resolve when we boot,
1080 * otherwise check the executable bits now.
1081 */
1082 if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
1083 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
1084 goto bad;
1085 }
1086
1087 /*
1088 * Exclusive stack zones interact with the dlmgmtd running in the
1089 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
1090 * booting, and loads its datalinks from the zone's datalink
1091 * configuration file.
1092 */
1093 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
1094 status = dladm_zone_boot(dld_handle, zoneid);
1095 if (status != DLADM_STATUS_OK) {
1096 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
1097 " %s", dladm_status2str(status, errmsg));
1098 goto bad;
1099 }
1100 }
1101
1102 /*
1103 * If there is a brand 'boot' callback, execute it now to give the
1104 * brand one last chance to do any additional setup before the zone
1105 * is booted.
1106 */
1107 if ((strlen(cmdbuf) > EXEC_LEN) &&
1108 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
1109 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1110 goto bad;
1111 }
1112
1113 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1114 zerror(zlogp, B_TRUE, "could not set zone boot file");
1115 goto bad;
1116 }
1117
1118 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1119 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1120 goto bad;
1121 }
1122
1123 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1124 NULL, 0) == -1) {
1125 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1126 goto bad;
1127 }
1128
1129 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1130 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1131 zerror(zlogp, B_TRUE, "could not set zone app-die");
1132 goto bad;
1133 }
1134
1135 /*
1136 * Inform zonestatd of a new zone so that it can install a door for
1137 * the zone to contact it.
1138 */
1139 notify_zonestatd(zone_id);
1140
1141 if (zone_boot(zoneid) == -1) {
1142 zerror(zlogp, B_TRUE, "unable to boot zone");
1143 goto bad;
1144 }
1145
1146 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1147 goto bad;
1148
1149 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1150 create_log_thread(zlogp, zone_id);
1151
1152 /* Startup a thread to perform memory capping for the zone. */
1153 create_mcap_thread(zlogp, zone_id);
1154
1155 return (0);
1156
1157 bad:
1158 /*
1159 * If something goes wrong, we up the zones's state to the target
1160 * state, RUNNING, and then invoke the hook as if we're halting.
1161 */
1162 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1163
1164 return (-1);
1165 }
1166
1167 static int
1168 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1169 {
1170 int err;
1171
1172 if (unmount_cmd == B_FALSE &&
1173 brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1174 return (-1);
1175
1176 /* Shutting down, stop the memcap thread */
1177 destroy_mcap_thread();
1178
1179 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1180 if (!bringup_failure_recovery)
1181 zerror(zlogp, B_FALSE, "unable to destroy zone");
1182 destroy_log_thread();
1183 return (-1);
1184 }
1185
1186 /* Shut down is done, stop the log thread */
1187 destroy_log_thread();
1188
1189 if (unmount_cmd == B_FALSE &&
1190 brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1191 return (-1);
1192
1193 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1194 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1195 zonecfg_strerror(err));
1196
1197 return (0);
1198 }
1199
1200 static int
1201 zone_graceful_shutdown(zlog_t *zlogp)
1202 {
1203 zoneid_t zoneid;
1204 pid_t child;
1205 char cmdbuf[MAXPATHLEN];
1206 brand_handle_t bh = NULL;
1207 ctid_t ct;
1208 int tmpl_fd;
1209 int child_status;
1210
1211 if (shutdown_in_progress) {
1212 zerror(zlogp, B_FALSE, "shutdown already in progress");
1213 return (-1);
1214 }
1215
1216 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1392 return;
1393
1394 (void) write(fd, buf, strlen(buf));
1395
1396 (void) close(fd);
1397 }
1398
1399 /*
1400 * The main routine for the door server that deals with zone state transitions.
1401 */
1402 /* ARGSUSED */
1403 static void
1404 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1405 uint_t n_desc)
1406 {
1407 ucred_t *uc = NULL;
1408 const priv_set_t *eset;
1409
1410 zone_state_t zstate;
1411 zone_cmd_t cmd;
1412 int init_status;
1413 zone_cmd_arg_t *zargp;
1414
1415 boolean_t kernelcall;
1416
1417 int rval = -1;
1418 uint64_t uniqid;
1419 zoneid_t zoneid = -1;
1420 zlog_t zlog;
1421 zlog_t *zlogp;
1422 zone_cmd_rval_t *rvalp;
1423 size_t rlen = getpagesize(); /* conservative */
1424 fs_callback_t cb;
1425 brand_handle_t bh;
1426 boolean_t wait_shut = B_FALSE;
1427
1428 /* LINTED E_BAD_PTR_CAST_ALIGN */
1429 zargp = (zone_cmd_arg_t *)args;
1430
1431 /*
1432 * When we get the door unref message, we've fdetach'd the door, and
1433 * it is time for us to shut down zoneadmd.
1434 */
1435 if (zargp == DOOR_UNREF_DATA) {
1445
1446 rvalp = alloca(rlen);
1447 bzero(rvalp, rlen);
1448 zlog.logfile = NULL;
1449 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1450 zlog.buf = rvalp->errbuf;
1451 zlog.log = zlog.buf;
1452 /* defer initialization of zlog.locale until after credential check */
1453 zlogp = &zlog;
1454
1455 if (alen != sizeof (zone_cmd_arg_t)) {
1456 /*
1457 * This really shouldn't be happening.
1458 */
1459 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1460 "unexpected (expected %d bytes)", alen,
1461 sizeof (zone_cmd_arg_t));
1462 goto out;
1463 }
1464 cmd = zargp->cmd;
1465 init_status = zargp->status;
1466
1467 if (door_ucred(&uc) != 0) {
1468 zerror(&logsys, B_TRUE, "door_ucred");
1469 goto out;
1470 }
1471 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1472 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1473 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1474 ucred_geteuid(uc) != 0)) {
1475 zerror(&logsys, B_FALSE, "insufficient privileges");
1476 goto out;
1477 }
1478
1479 kernelcall = ucred_getpid(uc) == 0;
1480
1481 /*
1482 * This is safe because we only use a zlog_t throughout the
1483 * duration of a door call; i.e., by the time the pointer
1484 * might become invalid, the door call would be over.
1552 */
1553 if (zstate == ZONE_STATE_INCOMPLETE &&
1554 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1555 zstate = ZONE_STATE_INSTALLED;
1556
1557 switch (zstate) {
1558 case ZONE_STATE_CONFIGURED:
1559 case ZONE_STATE_INCOMPLETE:
1560 /*
1561 * Not our area of expertise; we just print a nice message
1562 * and die off.
1563 */
1564 zerror(zlogp, B_FALSE,
1565 "%s operation is invalid for zones in state '%s'",
1566 z_cmd_name(cmd), zone_state_str(zstate));
1567 break;
1568
1569 case ZONE_STATE_INSTALLED:
1570 switch (cmd) {
1571 case Z_READY:
1572 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1573 if (rval == 0)
1574 eventstream_write(Z_EVT_ZONE_READIED);
1575 zcons_statechanged();
1576 break;
1577 case Z_BOOT:
1578 case Z_FORCEBOOT:
1579 eventstream_write(Z_EVT_ZONE_BOOTING);
1580 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1581 == 0) {
1582 rval = zone_bootup(zlogp, zargp->bootbuf,
1583 zstate);
1584 }
1585 audit_put_record(zlogp, uc, rval, "boot");
1586 zcons_statechanged();
1587 if (rval != 0) {
1588 bringup_failure_recovery = B_TRUE;
1589 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1590 zstate);
1591 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1592 }
1593 break;
1594 case Z_SHUTDOWN:
1595 case Z_HALT:
1596 if (kernelcall) /* Invalid; can't happen */
1597 abort();
1598 /*
1599 * We could have two clients racing to halt this
1600 * zone; the second client loses, but his request
1601 * doesn't fail, since the zone is now in the desired
1602 * state.
1603 */
1604 zerror(zlogp, B_FALSE, "zone is already halted");
1605 rval = 0;
1606 break;
1607 case Z_REBOOT:
1608 if (kernelcall) /* Invalid; can't happen */
1609 abort();
1610 zerror(zlogp, B_FALSE, "%s operation is invalid "
1622 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1623 break;
1624 case Z_MOUNT:
1625 case Z_FORCEMOUNT:
1626 if (kernelcall) /* Invalid; can't happen */
1627 abort();
1628 if (!zone_isnative && !zone_iscluster &&
1629 !zone_islabeled) {
1630 /*
1631 * -U mounts the zone without lofs mounting
1632 * zone file systems back into the scratch
1633 * zone. This is required when mounting
1634 * non-native branded zones.
1635 */
1636 (void) strlcpy(zargp->bootbuf, "-U",
1637 BOOTARGS_MAX);
1638 }
1639
1640 rval = zone_ready(zlogp,
1641 strcmp(zargp->bootbuf, "-U") == 0 ?
1642 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1643 if (rval != 0)
1644 break;
1645
1646 eventstream_write(Z_EVT_ZONE_READIED);
1647
1648 /*
1649 * Get a handle to the default brand info.
1650 * We must always use the default brand file system
1651 * list when mounting the zone.
1652 */
1653 if ((bh = brand_open(default_brand)) == NULL) {
1654 rval = -1;
1655 break;
1656 }
1657
1658 /*
1659 * Get the list of filesystems to mount from
1660 * the brand configuration. These mounts are done
1661 * via a thread that will enter the zone, so they
1662 * are done from within the context of the zone.
1684 abort();
1685 zerror(zlogp, B_FALSE, "zone is already unmounted");
1686 rval = 0;
1687 break;
1688 }
1689 break;
1690
1691 case ZONE_STATE_READY:
1692 switch (cmd) {
1693 case Z_READY:
1694 /*
1695 * We could have two clients racing to ready this
1696 * zone; the second client loses, but his request
1697 * doesn't fail, since the zone is now in the desired
1698 * state.
1699 */
1700 zerror(zlogp, B_FALSE, "zone is already ready");
1701 rval = 0;
1702 break;
1703 case Z_BOOT:
1704 (void) strlcpy(boot_args, zargp->bootbuf,
1705 sizeof (boot_args));
1706 eventstream_write(Z_EVT_ZONE_BOOTING);
1707 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1708 audit_put_record(zlogp, uc, rval, "boot");
1709 zcons_statechanged();
1710 if (rval != 0) {
1711 bringup_failure_recovery = B_TRUE;
1712 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1713 zstate);
1714 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1715 }
1716 boot_args[0] = '\0';
1717 break;
1718 case Z_HALT:
1719 if (kernelcall) /* Invalid; can't happen */
1720 abort();
1721 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1722 != 0)
1723 break;
1724 zcons_statechanged();
1725 eventstream_write(Z_EVT_ZONE_HALTED);
1726 break;
1727 case Z_SHUTDOWN:
1728 case Z_REBOOT:
1729 case Z_NOTE_UNINSTALLING:
1730 case Z_MOUNT:
1731 case Z_UNMOUNT:
1732 if (kernelcall) /* Invalid; can't happen */
1733 abort();
1734 zerror(zlogp, B_FALSE, "%s operation is invalid "
1735 "for zones in state '%s'", z_cmd_name(cmd),
1736 zone_state_str(zstate));
1737 rval = -1;
1738 break;
1739 }
1740 break;
1741
1742 case ZONE_STATE_MOUNTED:
1743 switch (cmd) {
1744 case Z_UNMOUNT:
1745 if (kernelcall) /* Invalid; can't happen */
1746 abort();
1747 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1748 if (rval == 0) {
1749 eventstream_write(Z_EVT_ZONE_HALTED);
1750 (void) sema_post(&scratch_sem);
1751 }
1752 break;
1753 default:
1754 if (kernelcall) /* Invalid; can't happen */
1755 abort();
1756 zerror(zlogp, B_FALSE, "%s operation is invalid "
1757 "for zones in state '%s'", z_cmd_name(cmd),
1758 zone_state_str(zstate));
1759 rval = -1;
1760 break;
1761 }
1762 break;
1763
1764 case ZONE_STATE_RUNNING:
1765 case ZONE_STATE_SHUTTING_DOWN:
1766 case ZONE_STATE_DOWN:
1767 switch (cmd) {
1768 case Z_READY:
1769 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1770 != 0)
1771 break;
1772 zcons_statechanged();
1773 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1774 eventstream_write(Z_EVT_ZONE_READIED);
1775 else
1776 eventstream_write(Z_EVT_ZONE_HALTED);
1777 break;
1778 case Z_BOOT:
1779 /*
1780 * We could have two clients racing to boot this
1781 * zone; the second client loses, but his request
1782 * doesn't fail, since the zone is now in the desired
1783 * state.
1784 */
1785 zerror(zlogp, B_FALSE, "zone is already booted");
1786 rval = 0;
1787 break;
1788 case Z_HALT:
1789 if (kernelcall) {
1790 log_init_exit(init_status);
1791 } else {
1792 log_init_exit(-1);
1793 }
1794 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1795 != 0)
1796 break;
1797 eventstream_write(Z_EVT_ZONE_HALTED);
1798 zcons_statechanged();
1799 break;
1800 case Z_REBOOT:
1801 (void) strlcpy(boot_args, zargp->bootbuf,
1802 sizeof (boot_args));
1803 eventstream_write(Z_EVT_ZONE_REBOOTING);
1804 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1805 != 0) {
1806 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1807 boot_args[0] = '\0';
1808 break;
1809 }
1810 zcons_statechanged();
1811 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) !=
1812 0) {
1813 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1814 boot_args[0] = '\0';
1815 break;
1816 }
1817 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1818 audit_put_record(zlogp, uc, rval, "reboot");
1819 if (rval != 0) {
1820 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1821 zstate);
1822 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1823 }
1824 boot_args[0] = '\0';
1825 break;
1826 case Z_SHUTDOWN:
1827 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1828 wait_shut = B_TRUE;
1829 }
1830 break;
1831 case Z_NOTE_UNINSTALLING:
1832 case Z_MOUNT:
1833 case Z_UNMOUNT:
1834 zerror(zlogp, B_FALSE, "%s operation is invalid "
1835 "for zones in state '%s'", z_cmd_name(cmd),
1836 zone_state_str(zstate));
1837 rval = -1;
1838 break;
1839 }
1840 break;
1841 default:
1842 abort();
1843 }
1844
1845 /*
1846 * Because the state of the zone may have changed, we make sure
1847 * to wake the console poller, which is in charge of initiating
1848 * the shutdown procedure as necessary.
1849 */
1850 eventstream_write(Z_EVT_NULL);
1851
1852 out:
2021 zonecfg_get_snapshot_handle(zone_name,
2022 snap_hndl) != Z_OK) {
2023 zerror(zlogp, B_FALSE, "recovering "
2024 "zone configuration handle");
2025 goto out;
2026 }
2027 }
2028 }
2029
2030 (void) fdetach(zone_door_path);
2031 (void) close(doorfd);
2032 goto top;
2033 }
2034 ret = 0;
2035 out:
2036 (void) close(doorfd);
2037 return (ret);
2038 }
2039
2040 /*
2041 * Setup the brand's pre and post state change callbacks, as well as the
2042 * query callback, if any of these exist.
2043 */
2044 static int
2045 brand_callback_init(brand_handle_t bh, char *zone_name)
2046 {
2047 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
2048 sizeof (pre_statechg_hook));
2049
2050 if (brand_get_prestatechange(bh, zone_name, zonepath,
2051 pre_statechg_hook + EXEC_LEN,
2052 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
2053 return (-1);
2054
2055 if (strlen(pre_statechg_hook) <= EXEC_LEN)
2056 pre_statechg_hook[0] = '\0';
2057
2058 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
2059 sizeof (post_statechg_hook));
2060
2256 */
2257 if ((privset = priv_allocset()) == NULL) {
2258 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2259 return (1);
2260 }
2261
2262 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2263 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2264 priv_freeset(privset);
2265 return (1);
2266 }
2267
2268 if (priv_isfullset(privset) == B_FALSE) {
2269 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2270 "run this command (all privs required)");
2271 priv_freeset(privset);
2272 return (1);
2273 }
2274 priv_freeset(privset);
2275
2276 if (mkzonedir(zlogp) != 0)
2277 return (1);
2278
2279 /*
2280 * Pre-fork: setup shared state
2281 */
2282 if ((shstate = (void *)mmap(NULL, shstatelen,
2283 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2284 MAP_FAILED) {
2285 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2286 return (1);
2287 }
2288 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2289 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2290 (void) munmap((char *)shstate, shstatelen);
2291 return (1);
2292 }
2293 shstate->log.logfile = NULL;
2294 shstate->log.buflen = shstatelen - sizeof (*shstate);
2295 shstate->log.loglen = shstate->log.buflen;
|