Print this page
Merge cleanup from previous six commits
OS-200 need a better mechanism for storing persistent zone_did
OS-2564 zone boot failed: could not start zoneadmd
OS-1763 mount of /etc/svc/volatile failed: Device busy
OS-511 make zonecfg device resource extensible, like the net resource
OS-224 add more zonecfg net properties
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zoneadmd/zoneadmd.c
+++ new/usr/src/cmd/zoneadmd/zoneadmd.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 30 * non-global zone on the system. This daemon juggles four jobs:
31 31 *
32 32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 33 * unmount filesystems; create and destroy network interfaces; communicate
34 34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 35 * console device; configure process runtime attributes such as resource
36 36 * controls, pool bindings, fine-grained privileges.
37 37 *
38 38 * - Launch the zone's init(1M) process.
39 39 *
40 40 * - Implement a door server; clients (like zoneadm) connect to the door
41 41 * server and request zone state changes. The kernel is also a client of
42 42 * this door server. A request to halt or reboot the zone which originates
43 43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
44 44 *
45 45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 46 * back to the zoneadm process making the request. These messages need to
47 47 * be rendered in the client's locale; so, this is passed in as part of the
48 48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 49 * messages are syslog'd.
50 50 *
51 51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 53 * strings which do not need to be translated.
54 54 *
55 55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 56 * for more information about the zone console architecture.
57 57 *
58 58 * DESIGN NOTES
59 59 *
60 60 * Restart:
61 61 * A chief design constraint of zoneadmd is that it should be restartable in
62 62 * the case that the administrator kills it off, or it suffers a fatal error,
63 63 * without the running zone being impacted; this is akin to being able to
64 64 * reboot the service processor of a server without affecting the OS instance.
65 65 */
66 66
67 67 #include <sys/param.h>
68 68 #include <sys/mman.h>
69 69 #include <sys/types.h>
70 70 #include <sys/stat.h>
71 71 #include <sys/sysmacros.h>
72 72 #include <sys/time.h>
73 73
74 74 #include <bsm/adt.h>
75 75 #include <bsm/adt_event.h>
76 76
77 77 #include <alloca.h>
78 78 #include <assert.h>
79 79 #include <errno.h>
80 80 #include <door.h>
81 81 #include <fcntl.h>
82 82 #include <locale.h>
83 83 #include <signal.h>
84 84 #include <stdarg.h>
85 85 #include <stdio.h>
86 86 #include <stdlib.h>
87 87 #include <string.h>
88 88 #include <strings.h>
89 89 #include <synch.h>
90 90 #include <syslog.h>
91 91 #include <thread.h>
92 92 #include <unistd.h>
93 93 #include <wait.h>
94 94 #include <limits.h>
95 95 #include <zone.h>
96 96 #include <libbrand.h>
97 97 #include <sys/brand.h>
98 98 #include <libcontract.h>
99 99 #include <libcontract_priv.h>
100 100 #include <sys/brand.h>
101 101 #include <sys/contract/process.h>
102 102 #include <sys/ctfs.h>
103 103 #include <libdladm.h>
104 104 #include <sys/dls_mgmt.h>
105 105 #include <libscf.h>
106 106
107 107 #include <libzonecfg.h>
108 108 #include <zonestat_impl.h>
109 109 #include "zoneadmd.h"
110 110
111 111 static char *progname;
112 112 char *zone_name; /* zone which we are managing */
113 113 zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
114 114 char zonepath[MAXNAMELEN];
115 115 char pool_name[MAXNAMELEN];
116 116 char default_brand[MAXNAMELEN];
117 117 char brand_name[MAXNAMELEN];
118 118 boolean_t zone_isnative;
119 119 boolean_t zone_iscluster;
120 120 boolean_t zone_islabeled;
121 121 boolean_t shutdown_in_progress;
122 122 static zoneid_t zone_id;
123 123 dladm_handle_t dld_handle = NULL;
124 124
125 125 static char pre_statechg_hook[2 * MAXPATHLEN];
126 126 static char post_statechg_hook[2 * MAXPATHLEN];
127 127 char query_hook[2 * MAXPATHLEN];
128 128
129 129 zlog_t logsys;
130 130
131 131 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
132 132 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
133 133
134 134 static sema_t scratch_sem; /* for scratch zones */
135 135
136 136 static char zone_door_path[MAXPATHLEN];
137 137 static int zone_door = -1;
|
↓ open down ↓ |
137 lines elided |
↑ open up ↑ |
138 138
139 139 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
140 140 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
141 141
142 142 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
143 143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
144 144 #endif
145 145
146 146 #define DEFAULT_LOCALE "C"
147 147
148 +#define RSRC_NET "net"
149 +#define RSRC_DEV "device"
150 +
148 151 static const char *
149 152 z_cmd_name(zone_cmd_t zcmd)
150 153 {
151 154 /* This list needs to match the enum in sys/zone.h */
152 155 static const char *zcmdstr[] = {
153 156 "ready", "boot", "forceboot", "reboot", "halt",
154 157 "note_uninstalling", "mount", "forcemount", "unmount",
155 158 "shutdown"
156 159 };
157 160
158 161 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
159 162 return ("unknown");
160 163 else
161 164 return (zcmdstr[(int)zcmd]);
162 165 }
163 166
164 167 static char *
165 168 get_execbasename(char *execfullname)
166 169 {
167 170 char *last_slash, *execbasename;
168 171
169 172 /* guard against '/' at end of command invocation */
170 173 for (;;) {
171 174 last_slash = strrchr(execfullname, '/');
172 175 if (last_slash == NULL) {
173 176 execbasename = execfullname;
174 177 break;
175 178 } else {
176 179 execbasename = last_slash + 1;
177 180 if (*execbasename == '\0') {
178 181 *last_slash = '\0';
179 182 continue;
180 183 }
181 184 break;
182 185 }
183 186 }
184 187 return (execbasename);
185 188 }
186 189
187 190 static void
188 191 usage(void)
189 192 {
190 193 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
191 194 (void) fprintf(stderr,
192 195 gettext("\tNote: %s should not be run directly.\n"), progname);
193 196 exit(2);
194 197 }
195 198
196 199 /* ARGSUSED */
197 200 static void
198 201 sigchld(int sig)
199 202 {
200 203 }
201 204
202 205 char *
203 206 localize_msg(char *locale, const char *msg)
204 207 {
205 208 char *out;
206 209
207 210 (void) mutex_lock(&msglock);
208 211 (void) setlocale(LC_MESSAGES, locale);
209 212 out = gettext(msg);
210 213 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
211 214 (void) mutex_unlock(&msglock);
212 215 return (out);
213 216 }
214 217
215 218 /* PRINTFLIKE3 */
216 219 void
217 220 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
218 221 {
219 222 va_list alist;
220 223 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
221 224 char *bp;
222 225 int saved_errno = errno;
223 226
224 227 if (zlogp == NULL)
225 228 return;
226 229 if (zlogp == &logsys)
227 230 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
228 231 zone_name);
229 232 else
230 233 buf[0] = '\0';
231 234 bp = &(buf[strlen(buf)]);
232 235
233 236 /*
234 237 * In theory, the locale pointer should be set to either "C" or a
235 238 * char array, so it should never be NULL
236 239 */
237 240 assert(zlogp->locale != NULL);
238 241 /* Locale is per process, but we are multi-threaded... */
239 242 fmt = localize_msg(zlogp->locale, fmt);
240 243
241 244 va_start(alist, fmt);
242 245 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
243 246 va_end(alist);
244 247 bp = &(buf[strlen(buf)]);
245 248 if (use_strerror)
246 249 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
247 250 strerror(saved_errno));
248 251 if (zlogp == &logsys) {
249 252 (void) syslog(LOG_ERR, "%s", buf);
250 253 } else if (zlogp->logfile != NULL) {
251 254 (void) fprintf(zlogp->logfile, "%s\n", buf);
252 255 } else {
253 256 size_t buflen;
254 257 size_t copylen;
255 258
256 259 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
257 260 copylen = MIN(buflen, zlogp->loglen);
258 261 zlogp->log += copylen;
259 262 zlogp->loglen -= copylen;
260 263 }
261 264 }
262 265
263 266 /*
264 267 * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
265 268 * put the arguments into an argv style array, use getopt to process them,
266 269 * and put the resultant argument string back into outargs. Non-Solaris brands
267 270 * may support alternate forms of boot arguments so we must handle that as well.
268 271 *
269 272 * During the filtering, we pull out any arguments which are truly "boot"
270 273 * arguments, leaving only those which are to be passed intact to the
271 274 * progenitor process. The one we support at the moment is -i, which
272 275 * indicates to the kernel which program should be launched as 'init'.
273 276 *
274 277 * Except for Z_OK, all other return values are treated as fatal.
275 278 */
276 279 static int
277 280 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
278 281 char *init_file)
279 282 {
280 283 int argc = 0, argc_save;
281 284 int i;
282 285 int err;
283 286 char *arg, *lasts, **argv = NULL, **argv_save;
284 287 char zonecfg_args[BOOTARGS_MAX];
285 288 char scratchargs[BOOTARGS_MAX], *sargs;
286 289 char c;
287 290
288 291 bzero(outargs, BOOTARGS_MAX);
289 292
290 293 /*
291 294 * If the user didn't specify transient boot arguments, check
292 295 * to see if there were any specified in the zone configuration,
293 296 * and use them if applicable.
294 297 */
295 298 if (inargs == NULL || inargs[0] == '\0') {
296 299 zone_dochandle_t handle;
297 300 if ((handle = zonecfg_init_handle()) == NULL) {
298 301 zerror(zlogp, B_TRUE,
299 302 "getting zone configuration handle");
300 303 return (Z_BAD_HANDLE);
301 304 }
302 305 err = zonecfg_get_snapshot_handle(zone_name, handle);
303 306 if (err != Z_OK) {
304 307 zerror(zlogp, B_FALSE,
305 308 "invalid configuration snapshot");
306 309 zonecfg_fini_handle(handle);
307 310 return (Z_BAD_HANDLE);
308 311 }
309 312
310 313 bzero(zonecfg_args, sizeof (zonecfg_args));
311 314 (void) zonecfg_get_bootargs(handle, zonecfg_args,
312 315 sizeof (zonecfg_args));
313 316 inargs = zonecfg_args;
314 317 zonecfg_fini_handle(handle);
315 318 }
316 319
317 320 if (strlen(inargs) >= BOOTARGS_MAX) {
318 321 zerror(zlogp, B_FALSE, "boot argument string too long");
319 322 return (Z_INVAL);
320 323 }
321 324
322 325 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
323 326 sargs = scratchargs;
324 327 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
325 328 sargs = NULL;
326 329 argc++;
327 330 }
328 331
329 332 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
330 333 zerror(zlogp, B_FALSE, "memory allocation failed");
331 334 return (Z_NOMEM);
332 335 }
333 336
334 337 argv_save = argv;
335 338 argc_save = argc;
336 339
337 340 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
338 341 sargs = scratchargs;
339 342 i = 0;
340 343 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
341 344 sargs = NULL;
342 345 if ((argv[i] = strdup(arg)) == NULL) {
343 346 err = Z_NOMEM;
344 347 zerror(zlogp, B_FALSE, "memory allocation failed");
345 348 goto done;
346 349 }
347 350 i++;
348 351 }
349 352
350 353 /*
351 354 * We preserve compatibility with the illumos system boot behavior,
352 355 * which allows:
353 356 *
354 357 * # reboot kernel/unix -s -m verbose
355 358 *
356 359 * In this example, kernel/unix tells the booter what file to boot. The
357 360 * original intent of this was that we didn't want reboot in a zone to
358 361 * be gratuitously different, so we would silently ignore the boot
359 362 * file, if necessary. However, this usage is archaic and has never
360 363 * been common, since it is impossible to boot a zone onto a different
361 364 * kernel. Ignoring the first argument breaks for non-native brands
362 365 * which pass boot arguments in a different style. e.g.
363 366 * systemd.log_level=debug
364 367 * Thus, for backward compatibility we only ignore the first argument
365 368 * if it appears to be in the illumos form and attempting to specify a
366 369 * kernel.
367 370 */
368 371 if (argv[0] == NULL)
369 372 goto done;
370 373
371 374 assert(argv[0][0] != ' ');
372 375 assert(argv[0][0] != '\t');
373 376
374 377 if (strncmp(argv[0], "kernel/", 7) == 0) {
375 378 argv = &argv[1];
376 379 argc--;
377 380 }
378 381
379 382 optind = 0;
380 383 opterr = 0;
381 384 err = Z_OK;
382 385 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
383 386 switch (c) {
384 387 case 'i':
385 388 /*
386 389 * -i is handled by the runtime and is not passed
387 390 * along to userland
388 391 */
389 392 (void) strlcpy(init_file, optarg, MAXPATHLEN);
390 393 break;
391 394 case 'f':
392 395 /* This has already been processed by zoneadm */
393 396 break;
394 397 case 'm':
395 398 case 's':
396 399 /* These pass through unmolested */
397 400 (void) snprintf(outargs, BOOTARGS_MAX,
398 401 "%s -%c %s ", outargs, c, optarg ? optarg : "");
399 402 break;
400 403 case '?':
401 404 /*
402 405 * If a brand has its own init, we need to pass along
403 406 * whatever the user provides. We use the entire
404 407 * unknown string here so that we correctly handle
405 408 * unknown long options (e.g. --debug).
406 409 */
407 410 (void) snprintf(outargs, BOOTARGS_MAX,
408 411 "%s %s", outargs, argv[optind - 1]);
409 412 break;
410 413 }
411 414 }
412 415
413 416 /*
414 417 * We need to pass along everything else since we don't know what
415 418 * the brand's init is expecting. For example, an argument list like:
416 419 * --confdir /foo --debug
417 420 * will cause the getopt parsing to stop at '/foo' but we need to pass
418 421 * that on, along with the '--debug'. This does mean that we require
419 422 * any of our known options (-ifms) to preceed the brand-specific ones.
420 423 */
421 424 while (optind < argc) {
422 425 (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
423 426 argv[optind]);
424 427 optind++;
425 428 }
426 429
427 430 done:
428 431 for (i = 0; i < argc_save; i++) {
429 432 if (argv_save[i] != NULL)
430 433 free(argv_save[i]);
431 434 }
432 435 free(argv_save);
433 436 return (err);
434 437 }
435 438
436 439
437 440 static int
438 441 mkzonedir(zlog_t *zlogp)
439 442 {
440 443 struct stat st;
441 444 /*
442 445 * We must create and lock everyone but root out of ZONES_TMPDIR
443 446 * since anyone can open any UNIX domain socket, regardless of
444 447 * its file system permissions. Sigh...
445 448 */
446 449 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
447 450 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
448 451 return (-1);
449 452 }
450 453 /* paranoia */
451 454 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
452 455 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
453 456 return (-1);
454 457 }
455 458 (void) chmod(ZONES_TMPDIR, S_IRWXU);
456 459 return (0);
457 460 }
458 461
459 462 /*
460 463 * Run the brand's pre-state change callback, if it exists.
461 464 */
462 465 static int
463 466 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
464 467 {
465 468 char cmdbuf[2 * MAXPATHLEN];
466 469 const char *altroot;
467 470
468 471 if (pre_statechg_hook[0] == '\0')
469 472 return (0);
470 473
471 474 altroot = zonecfg_get_root();
472 475 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
473 476 state, cmd, altroot) > sizeof (cmdbuf))
474 477 return (-1);
475 478
476 479 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
477 480 return (-1);
478 481
479 482 return (0);
480 483 }
481 484
482 485 /*
483 486 * Run the brand's post-state change callback, if it exists.
484 487 */
485 488 static int
486 489 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
487 490 {
488 491 char cmdbuf[2 * MAXPATHLEN];
489 492 const char *altroot;
490 493
491 494 if (post_statechg_hook[0] == '\0')
492 495 return (0);
493 496
494 497 altroot = zonecfg_get_root();
495 498 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
496 499 state, cmd, altroot) > sizeof (cmdbuf))
497 500 return (-1);
498 501
499 502 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
500 503 return (-1);
501 504
502 505 return (0);
503 506 }
504 507
505 508 /*
506 509 * Notify zonestatd of the new zone. If zonestatd is not running, this
507 510 * will do nothing.
508 511 */
509 512 static void
510 513 notify_zonestatd(zoneid_t zoneid)
511 514 {
512 515 int cmd[2];
513 516 int fd;
514 517 door_arg_t params;
515 518
516 519 fd = open(ZS_DOOR_PATH, O_RDONLY);
517 520 if (fd < 0)
518 521 return;
519 522
520 523 cmd[0] = ZSD_CMD_NEW_ZONE;
521 524 cmd[1] = zoneid;
522 525 params.data_ptr = (char *)&cmd;
523 526 params.data_size = sizeof (cmd);
524 527 params.desc_ptr = NULL;
525 528 params.desc_num = 0;
526 529 params.rbuf = NULL;
527 530 params.rsize = NULL;
528 531 (void) door_call(fd, ¶ms);
529 532 (void) close(fd);
530 533 }
531 534
532 535 /*
533 536 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
534 537 * 'true' if this is being invoked as part of the processing for the "mount"
535 538 * subcommand.
536 539 */
537 540 static int
538 541 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
539 542 {
540 543 int err;
541 544
542 545 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
543 546 return (-1);
544 547
545 548 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
546 549 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
547 550 zonecfg_strerror(err));
548 551 goto bad;
549 552 }
550 553
551 554 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
552 555 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
553 556 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
554 557 zonecfg_strerror(err));
555 558 goto bad;
556 559 }
557 560 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
558 561 bringup_failure_recovery = B_TRUE;
559 562 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
560 563 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
561 564 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
562 565 zonecfg_strerror(err));
563 566 goto bad;
564 567 }
565 568
566 569 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
567 570 goto bad;
568 571
569 572 return (0);
570 573
571 574 bad:
572 575 /*
573 576 * If something goes wrong, we up the zones's state to the target
574 577 * state, READY, and then invoke the hook as if we're halting.
575 578 */
576 579 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
577 580 return (-1);
578 581 }
579 582
580 583 int
581 584 init_template(void)
582 585 {
583 586 int fd;
584 587 int err = 0;
585 588
586 589 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
587 590 if (fd == -1)
588 591 return (-1);
589 592
590 593 /*
591 594 * For now, zoneadmd doesn't do anything with the contract.
592 595 * Deliver no events, don't inherit, and allow it to be orphaned.
593 596 */
594 597 err |= ct_tmpl_set_critical(fd, 0);
595 598 err |= ct_tmpl_set_informative(fd, 0);
596 599 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
597 600 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
598 601 if (err || ct_tmpl_activate(fd)) {
599 602 (void) close(fd);
600 603 return (-1);
601 604 }
602 605
603 606 return (fd);
604 607 }
605 608
606 609 typedef struct fs_callback {
607 610 zlog_t *zlogp;
608 611 zoneid_t zoneid;
609 612 boolean_t mount_cmd;
610 613 } fs_callback_t;
611 614
612 615 static int
613 616 mount_early_fs(void *data, const char *spec, const char *dir,
614 617 const char *fstype, const char *opt)
615 618 {
616 619 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
617 620 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
618 621 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
619 622 char rootpath[MAXPATHLEN];
620 623 pid_t child;
621 624 int child_status;
622 625 int tmpl_fd;
623 626 int rv;
624 627 ctid_t ct;
625 628
626 629 /* determine the zone rootpath */
627 630 if (mount_cmd) {
628 631 char luroot[MAXPATHLEN];
629 632
630 633 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
631 634 resolve_lofs(zlogp, luroot, sizeof (luroot));
632 635 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
633 636 } else {
634 637 if (zone_get_rootpath(zone_name,
635 638 rootpath, sizeof (rootpath)) != Z_OK) {
636 639 zerror(zlogp, B_FALSE, "unable to determine zone root");
637 640 return (-1);
638 641 }
639 642 }
640 643
641 644 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
642 645 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
643 646 rootpath, dir);
644 647 return (-1);
645 648 } else if (rv > 0) {
646 649 /* The mount point path doesn't exist, create it now. */
647 650 if (make_one_dir(zlogp, rootpath, dir,
648 651 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
649 652 DEFAULT_DIR_GROUP) != 0) {
650 653 zerror(zlogp, B_FALSE, "failed to create mount point");
651 654 return (-1);
652 655 }
653 656
654 657 /*
655 658 * Now this might seem weird, but we need to invoke
656 659 * valid_mount_path() again. Why? Because it checks
657 660 * to make sure that the mount point path is canonical,
658 661 * which it can only do if the path exists, so now that
659 662 * we've created the path we have to verify it again.
660 663 */
661 664 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
662 665 fstype)) < 0) {
663 666 zerror(zlogp, B_FALSE,
664 667 "%s%s is not a valid mount point", rootpath, dir);
665 668 return (-1);
666 669 }
667 670 }
668 671
669 672 if ((tmpl_fd = init_template()) == -1) {
670 673 zerror(zlogp, B_TRUE, "failed to create contract");
671 674 return (-1);
672 675 }
673 676
|
↓ open down ↓ |
516 lines elided |
↑ open up ↑ |
674 677 if ((child = fork()) == -1) {
675 678 (void) ct_tmpl_clear(tmpl_fd);
676 679 (void) close(tmpl_fd);
677 680 zerror(zlogp, B_TRUE, "failed to fork");
678 681 return (-1);
679 682
680 683 } else if (child == 0) { /* child */
681 684 char opt_buf[MAX_MNTOPT_STR];
682 685 int optlen = 0;
683 686 int mflag = MS_DATA;
687 + int i;
688 + int ret;
684 689
685 690 (void) ct_tmpl_clear(tmpl_fd);
686 691 /*
687 692 * Even though there are no procs running in the zone, we
688 693 * do this for paranoia's sake.
689 694 */
690 695 (void) closefrom(0);
691 696
692 697 if (zone_enter(zoneid) == -1) {
693 698 _exit(errno);
694 699 }
695 700 if (opt != NULL) {
696 701 /*
697 702 * The mount() system call is incredibly annoying.
698 703 * If options are specified, we need to copy them
699 704 * into a temporary buffer since the mount() system
700 705 * call will overwrite the options string. It will
|
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
701 706 * also fail if the new option string it wants to
702 707 * write is bigger than the one we passed in, so
703 708 * you must pass in a buffer of the maximum possible
704 709 * option string length. sigh.
705 710 */
706 711 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
707 712 opt = opt_buf;
708 713 optlen = MAX_MNTOPT_STR;
709 714 mflag = MS_OPTIONSTR;
710 715 }
711 - if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
712 - _exit(errno);
713 - _exit(0);
716 +
717 + /*
718 + * There is an obscure race condition which can cause mount
719 + * to return EBUSY. This happens for example on the mount
720 + * of the zone's /etc/svc/volatile file system if there is
721 + * a GZ process running svcs -Z, which will touch the
722 + * mountpoint, just as we're trying to do the mount. To cope
723 + * with this, we retry up to 3 times to let this transient
724 + * process get out of the way.
725 + */
726 + for (i = 0; i < 3; i++) {
727 + ret = 0;
728 + if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
729 + optlen) != 0)
730 + ret = errno;
731 + if (ret != EBUSY)
732 + break;
733 + (void) sleep(1);
734 + }
735 + _exit(ret);
714 736 }
715 737
716 738 /* parent */
717 739 if (contract_latest(&ct) == -1)
718 740 ct = -1;
719 741 (void) ct_tmpl_clear(tmpl_fd);
720 742 (void) close(tmpl_fd);
721 743 if (waitpid(child, &child_status, 0) != child) {
722 744 /* unexpected: we must have been signalled */
723 745 (void) contract_abandon_id(ct);
724 746 return (-1);
725 747 }
726 748 (void) contract_abandon_id(ct);
|
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
727 749 if (WEXITSTATUS(child_status) != 0) {
728 750 errno = WEXITSTATUS(child_status);
729 751 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
730 752 return (-1);
731 753 }
732 754
733 755 return (0);
734 756 }
735 757
736 758 /*
759 + * env variable name format
760 + * _ZONECFG;{resource name};{identifying attr. name};{property name}
761 + */
762 +static void
763 +set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
764 +{
765 + char *p;
766 + /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
767 + char nm[2 * MAXNAMELEN + 32];
768 +
769 + if (attr == NULL)
770 + (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
771 + name);
772 + else
773 + (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
774 + attr, name);
775 +
776 + p = nm;
777 + while ((p = strchr(p, '-')) != NULL)
778 + *p++ = '_';
779 +
780 + (void) setenv(nm, val, 1);
781 +}
782 +
783 +/*
784 + * Export zonecfg network and device properties into environment for the boot
785 + * and state change hooks.
786 + * If debug is true, export the brand hook debug env. variable as well.
787 + *
788 + * We could export more of the config in the future, as necessary.
789 + */
790 +static int
791 +setup_subproc_env()
792 +{
793 + int res;
794 + zone_dochandle_t handle;
795 + struct zone_nwiftab ntab;
796 + struct zone_devtab dtab;
797 + char net_resources[MAXNAMELEN * 2];
798 + char dev_resources[MAXNAMELEN * 2];
799 +
800 + if ((handle = zonecfg_init_handle()) == NULL)
801 + exit(Z_NOMEM);
802 +
803 + if ((res = zonecfg_get_handle(zone_name, handle)) != Z_OK)
804 + goto done;
805 +
806 + if ((res = zonecfg_setnwifent(handle)) != Z_OK)
807 + goto done;
808 +
809 + while (zonecfg_getnwifent(handle, &ntab) == Z_OK) {
810 + struct zone_res_attrtab *rap;
811 + char *phys;
812 +
813 + phys = ntab.zone_nwif_physical;
814 +
815 + (void) strlcat(net_resources, phys, sizeof (net_resources));
816 + (void) strlcat(net_resources, " ", sizeof (net_resources));
817 +
818 + set_zonecfg_env(RSRC_NET, phys, "physical", phys);
819 +
820 + set_zonecfg_env(RSRC_NET, phys, "address",
821 + ntab.zone_nwif_address);
822 + set_zonecfg_env(RSRC_NET, phys, "allowed-address",
823 + ntab.zone_nwif_allowed_address);
824 + set_zonecfg_env(RSRC_NET, phys, "defrouter",
825 + ntab.zone_nwif_defrouter);
826 + set_zonecfg_env(RSRC_NET, phys, "global-nic",
827 + ntab.zone_nwif_gnic);
828 + set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
829 + set_zonecfg_env(RSRC_NET, phys, "vlan-id",
830 + ntab.zone_nwif_vlan_id);
831 +
832 + for (rap = ntab.zone_nwif_attrp; rap != NULL;
833 + rap = rap->zone_res_attr_next)
834 + set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
835 + rap->zone_res_attr_value);
836 + }
837 +
838 + (void) zonecfg_endnwifent(handle);
839 +
840 + if ((res = zonecfg_setdevent(handle)) != Z_OK)
841 + goto done;
842 +
843 + while (zonecfg_getdevent(handle, &dtab) == Z_OK) {
844 + struct zone_res_attrtab *rap;
845 + char *match;
846 +
847 + match = dtab.zone_dev_match;
848 +
849 + (void) strlcat(dev_resources, match, sizeof (dev_resources));
850 + (void) strlcat(dev_resources, " ", sizeof (dev_resources));
851 +
852 + for (rap = dtab.zone_dev_attrp; rap != NULL;
853 + rap = rap->zone_res_attr_next)
854 + set_zonecfg_env(RSRC_DEV, match,
855 + rap->zone_res_attr_name, rap->zone_res_attr_value);
856 + }
857 +
858 + (void) zonecfg_enddevent(handle);
859 +
860 + res = Z_OK;
861 +
862 +done:
863 + zonecfg_fini_handle(handle);
864 + return (res);
865 +}
866 +
867 +/*
737 868 * If retstr is not NULL, the output of the subproc is returned in the str,
738 869 * otherwise it is output using zerror(). Any memory allocated for retstr
739 870 * should be freed by the caller.
740 871 */
741 872 int
742 873 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
743 874 {
744 875 char buf[1024]; /* arbitrary large amount */
745 876 char *inbuf;
746 877 FILE *file;
747 878 int status;
748 879 int rd_cnt;
749 880
750 881 if (retstr != NULL) {
|
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
751 882 if ((*retstr = malloc(1024)) == NULL) {
752 883 zerror(zlogp, B_FALSE, "out of memory");
753 884 return (-1);
754 885 }
755 886 inbuf = *retstr;
756 887 rd_cnt = 0;
757 888 } else {
758 889 inbuf = buf;
759 890 }
760 891
892 + if (setup_subproc_env() != Z_OK) {
893 + zerror(zlogp, B_FALSE, "failed to setup environment");
894 + return (-1);
895 + }
896 +
761 897 file = popen(cmdbuf, "r");
762 898 if (file == NULL) {
763 899 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
764 900 return (-1);
765 901 }
766 902
767 903 while (fgets(inbuf, 1024, file) != NULL) {
768 904 if (retstr == NULL) {
769 905 if (zlogp != &logsys)
770 906 zerror(zlogp, B_FALSE, "%s", inbuf);
771 907 } else {
772 908 char *p;
773 909
774 910 rd_cnt += 1024 - 1;
775 911 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
776 912 zerror(zlogp, B_FALSE, "out of memory");
777 913 (void) pclose(file);
778 914 return (-1);
779 915 }
780 916
781 917 *retstr = p;
782 918 inbuf = *retstr + rd_cnt;
783 919 }
784 920 }
785 921 status = pclose(file);
786 922
787 923 if (WIFSIGNALED(status)) {
788 924 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
789 925 "signal %d", cmdbuf, WTERMSIG(status));
790 926 return (-1);
791 927 }
792 928 assert(WIFEXITED(status));
793 929 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
794 930 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
795 931 return (-1);
796 932 }
797 933 return (WEXITSTATUS(status));
798 934 }
799 935
800 936 #if 0 /* XXX KEBE SAYS not yet */
801 937 /*
802 938 * Get the path for this zone's init(1M) (or equivalent) process. First look
803 939 * for a zone-specific init-name attr, then get it from the brand.
804 940 */
805 941 static int
806 942 get_initname(brand_handle_t bh, char *initname, int len)
807 943 {
808 944 struct zone_attrtab a;
809 945
810 946 bzero(&a, sizeof (a));
811 947 (void) strlcpy(a.zone_attr_name, "init-name",
812 948 sizeof (a.zone_attr_name));
813 949
814 950 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
815 951 (void) strlcpy(initname, a.zone_attr_value, len);
816 952 return (0);
817 953 }
818 954
819 955 return (brand_get_initname(bh, initname, len));
820 956 }
821 957
822 958 /*
823 959 * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
824 960 * First look for a zone-specific restart-init attr, then get it from the brand.
825 961 */
826 962 static boolean_t
827 963 restartinit(brand_handle_t bh)
828 964 {
829 965 struct zone_attrtab a;
830 966
831 967 bzero(&a, sizeof (a));
832 968 (void) strlcpy(a.zone_attr_name, "restart-init",
833 969 sizeof (a.zone_attr_name));
834 970
835 971 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
836 972 if (strcmp(a.zone_attr_value, "false") == 0)
837 973 return (B_FALSE);
838 974 return (B_TRUE);
839 975 }
840 976
841 977 return (brand_restartinit(bh));
842 978 }
843 979 #endif /* XXX KEBE */
844 980
845 981 /*
846 982 * Get the app-svc-dependent flag for this zone's init process. This is a
847 983 * zone-specific attr which controls the type of contract we create for the
848 984 * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
849 985 * set, so that when any service which is in the same contract exits, the init
850 986 * application will be terminated.
851 987 *
852 988 * We use the global "snap_hndl", so no parameters get passed here.
853 989 */
854 990 static boolean_t
855 991 is_app_svc_dep(void)
856 992 {
857 993 struct zone_attrtab a;
858 994
859 995 bzero(&a, sizeof (a));
860 996 (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
861 997 sizeof (a.zone_attr_name));
862 998
863 999 if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
864 1000 strcmp(a.zone_attr_value, "true") == 0) {
865 1001 return (B_TRUE);
866 1002 }
867 1003
868 1004 return (B_FALSE);
869 1005 }
870 1006
871 1007 static int
872 1008 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
873 1009 {
874 1010 zoneid_t zoneid;
875 1011 struct stat st;
876 1012 char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
877 1013 char nbootargs[BOOTARGS_MAX];
878 1014 char cmdbuf[MAXPATHLEN];
879 1015 fs_callback_t cb;
880 1016 brand_handle_t bh;
881 1017 zone_iptype_t iptype;
882 1018 dladm_status_t status;
883 1019 char errmsg[DLADM_STRSIZE];
884 1020 int err;
885 1021 boolean_t restart_init;
886 1022 boolean_t app_svc_dep;
887 1023
888 1024 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
889 1025 return (-1);
890 1026
891 1027 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
892 1028 zerror(zlogp, B_TRUE, "unable to get zoneid");
893 1029 goto bad;
894 1030 }
895 1031
896 1032 cb.zlogp = zlogp;
897 1033 cb.zoneid = zoneid;
898 1034 cb.mount_cmd = B_FALSE;
899 1035
900 1036 /* Get a handle to the brand info for this zone */
901 1037 if ((bh = brand_open(brand_name)) == NULL) {
902 1038 zerror(zlogp, B_FALSE, "unable to determine zone brand");
903 1039 goto bad;
904 1040 }
905 1041
906 1042 /*
907 1043 * Get the list of filesystems to mount from the brand
908 1044 * configuration. These mounts are done via a thread that will
909 1045 * enter the zone, so they are done from within the context of the
910 1046 * zone.
911 1047 */
912 1048 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
913 1049 zerror(zlogp, B_FALSE, "unable to mount filesystems");
914 1050 brand_close(bh);
915 1051 goto bad;
916 1052 }
917 1053
918 1054 /*
919 1055 * Get the brand's boot callback if it exists.
920 1056 */
921 1057 (void) strcpy(cmdbuf, EXEC_PREFIX);
922 1058 if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
923 1059 sizeof (cmdbuf) - EXEC_LEN) != 0) {
924 1060 zerror(zlogp, B_FALSE,
925 1061 "unable to determine branded zone's boot callback");
926 1062 brand_close(bh);
927 1063 goto bad;
928 1064 }
929 1065
930 1066 /* Get the path for this zone's init(1M) (or equivalent) process. */
931 1067 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
932 1068 zerror(zlogp, B_FALSE,
933 1069 "unable to determine zone's init(1M) location");
934 1070 brand_close(bh);
935 1071 goto bad;
936 1072 }
937 1073
938 1074 /* See if this zone's brand should restart init if it dies. */
939 1075 restart_init = brand_restartinit(bh);
940 1076
941 1077 /*
942 1078 * See if we need to setup contract dependencies between the zone's
943 1079 * primary application and any of its services.
944 1080 */
945 1081 app_svc_dep = is_app_svc_dep();
946 1082
947 1083 brand_close(bh);
948 1084
949 1085 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
950 1086 if (err != Z_OK)
951 1087 goto bad;
952 1088
953 1089 assert(init_file[0] != '\0');
954 1090
955 1091 /*
956 1092 * Try to anticipate possible problems: If possible, make sure init is
957 1093 * executable.
958 1094 */
959 1095 if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
960 1096 zerror(zlogp, B_FALSE, "unable to determine zone root");
961 1097 goto bad;
962 1098 }
963 1099
964 1100 (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
965 1101
966 1102 if (lstat(initpath, &st) == -1) {
967 1103 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
968 1104 goto bad;
969 1105 }
970 1106
971 1107 /*
972 1108 * If a symlink, we'll have to wait and resolve when we boot,
973 1109 * otherwise check the executable bits now.
974 1110 */
975 1111 if ((st.st_mode & S_IFMT) != S_IFLNK && (st.st_mode & S_IXUSR) == 0) {
976 1112 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
977 1113 goto bad;
978 1114 }
979 1115
980 1116 /*
981 1117 * Exclusive stack zones interact with the dlmgmtd running in the
982 1118 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
983 1119 * booting, and loads its datalinks from the zone's datalink
984 1120 * configuration file.
985 1121 */
986 1122 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
987 1123 status = dladm_zone_boot(dld_handle, zoneid);
988 1124 if (status != DLADM_STATUS_OK) {
989 1125 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
990 1126 " %s", dladm_status2str(status, errmsg));
991 1127 goto bad;
992 1128 }
993 1129 }
994 1130
995 1131 /*
996 1132 * If there is a brand 'boot' callback, execute it now to give the
997 1133 * brand one last chance to do any additional setup before the zone
998 1134 * is booted.
999 1135 */
1000 1136 if ((strlen(cmdbuf) > EXEC_LEN) &&
1001 1137 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
1002 1138 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
1003 1139 goto bad;
1004 1140 }
1005 1141
1006 1142 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
1007 1143 zerror(zlogp, B_TRUE, "could not set zone boot file");
1008 1144 goto bad;
1009 1145 }
1010 1146
1011 1147 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
1012 1148 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
1013 1149 goto bad;
1014 1150 }
1015 1151
1016 1152 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
1017 1153 NULL, 0) == -1) {
1018 1154 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
1019 1155 goto bad;
1020 1156 }
1021 1157
1022 1158 if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
1023 1159 (void *)B_TRUE, sizeof (boolean_t)) == -1) {
1024 1160 zerror(zlogp, B_TRUE, "could not set zone app-die");
1025 1161 goto bad;
1026 1162 }
1027 1163
1028 1164 /*
1029 1165 * Inform zonestatd of a new zone so that it can install a door for
1030 1166 * the zone to contact it.
1031 1167 */
1032 1168 notify_zonestatd(zone_id);
1033 1169
1034 1170 if (zone_boot(zoneid) == -1) {
1035 1171 zerror(zlogp, B_TRUE, "unable to boot zone");
1036 1172 goto bad;
1037 1173 }
1038 1174
1039 1175 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
1040 1176 goto bad;
1041 1177
1042 1178 /* Startup a thread to perform zfd logging/tty svc for the zone. */
1043 1179 create_log_thread(zlogp, zone_id);
1044 1180
1045 1181 /* Startup a thread to perform memory capping for the zone. */
1046 1182 create_mcap_thread(zlogp, zone_id);
1047 1183
1048 1184 return (0);
1049 1185
1050 1186 bad:
1051 1187 /*
1052 1188 * If something goes wrong, we up the zones's state to the target
1053 1189 * state, RUNNING, and then invoke the hook as if we're halting.
1054 1190 */
1055 1191 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
1056 1192
1057 1193 return (-1);
1058 1194 }
1059 1195
1060 1196 static int
1061 1197 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
1062 1198 {
1063 1199 int err;
1064 1200
1065 1201 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
1066 1202 return (-1);
1067 1203
1068 1204 /* Shutting down, stop the memcap thread */
1069 1205 destroy_mcap_thread();
1070 1206
1071 1207 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
1072 1208 if (!bringup_failure_recovery)
1073 1209 zerror(zlogp, B_FALSE, "unable to destroy zone");
1074 1210 destroy_log_thread();
1075 1211 return (-1);
1076 1212 }
1077 1213
1078 1214 /* Shut down is done, stop the log thread */
1079 1215 destroy_log_thread();
1080 1216
1081 1217 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
1082 1218 return (-1);
1083 1219
1084 1220 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
1085 1221 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
1086 1222 zonecfg_strerror(err));
1087 1223
1088 1224 return (0);
1089 1225 }
1090 1226
1091 1227 static int
1092 1228 zone_graceful_shutdown(zlog_t *zlogp)
1093 1229 {
1094 1230 zoneid_t zoneid;
1095 1231 pid_t child;
1096 1232 char cmdbuf[MAXPATHLEN];
1097 1233 brand_handle_t bh = NULL;
1098 1234 ctid_t ct;
1099 1235 int tmpl_fd;
1100 1236 int child_status;
1101 1237
1102 1238 if (shutdown_in_progress) {
1103 1239 zerror(zlogp, B_FALSE, "shutdown already in progress");
1104 1240 return (-1);
1105 1241 }
1106 1242
1107 1243 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1108 1244 zerror(zlogp, B_TRUE, "unable to get zoneid");
1109 1245 return (-1);
1110 1246 }
1111 1247
1112 1248 /* Get a handle to the brand info for this zone */
1113 1249 if ((bh = brand_open(brand_name)) == NULL) {
1114 1250 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1115 1251 return (-1);
1116 1252 }
1117 1253
1118 1254 /*
1119 1255 * If there is a brand 'shutdown' callback, execute it now to give the
1120 1256 * brand a chance to cleanup any custom configuration.
1121 1257 */
1122 1258 (void) strcpy(cmdbuf, EXEC_PREFIX);
1123 1259 if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
1124 1260 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1125 1261 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1126 1262 }
1127 1263 brand_close(bh);
1128 1264
1129 1265 if ((tmpl_fd = init_template()) == -1) {
1130 1266 zerror(zlogp, B_TRUE, "failed to create contract");
1131 1267 return (-1);
1132 1268 }
1133 1269
1134 1270 if ((child = fork()) == -1) {
1135 1271 (void) ct_tmpl_clear(tmpl_fd);
1136 1272 (void) close(tmpl_fd);
1137 1273 zerror(zlogp, B_TRUE, "failed to fork");
1138 1274 return (-1);
1139 1275 } else if (child == 0) {
1140 1276 (void) ct_tmpl_clear(tmpl_fd);
1141 1277 if (zone_enter(zoneid) == -1) {
1142 1278 _exit(errno);
1143 1279 }
1144 1280 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1145 1281 }
1146 1282
1147 1283 if (contract_latest(&ct) == -1)
1148 1284 ct = -1;
1149 1285 (void) ct_tmpl_clear(tmpl_fd);
1150 1286 (void) close(tmpl_fd);
1151 1287
1152 1288 if (waitpid(child, &child_status, 0) != child) {
1153 1289 /* unexpected: we must have been signalled */
1154 1290 (void) contract_abandon_id(ct);
1155 1291 return (-1);
1156 1292 }
1157 1293
1158 1294 (void) contract_abandon_id(ct);
1159 1295 if (WEXITSTATUS(child_status) != 0) {
1160 1296 errno = WEXITSTATUS(child_status);
1161 1297 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1162 1298 return (-1);
1163 1299 }
1164 1300
1165 1301 shutdown_in_progress = B_TRUE;
1166 1302
1167 1303 return (0);
1168 1304 }
1169 1305
1170 1306 static int
1171 1307 zone_wait_shutdown(zlog_t *zlogp)
1172 1308 {
1173 1309 zone_state_t zstate;
1174 1310 uint64_t *tm = NULL;
1175 1311 scf_simple_prop_t *prop = NULL;
1176 1312 int timeout;
1177 1313 int tries;
1178 1314 int rc = -1;
1179 1315
1180 1316 /* Get default stop timeout from SMF framework */
1181 1317 timeout = SHUTDOWN_WAIT;
1182 1318 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1183 1319 SCF_PROPERTY_TIMEOUT)) != NULL) {
1184 1320 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1185 1321 if (tm != 0)
1186 1322 timeout = *tm;
1187 1323 }
1188 1324 scf_simple_prop_free(prop);
1189 1325 }
1190 1326
1191 1327 /* allow time for zone to shutdown cleanly */
1192 1328 for (tries = 0; tries < timeout; tries ++) {
1193 1329 (void) sleep(1);
1194 1330 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1195 1331 zstate == ZONE_STATE_INSTALLED) {
1196 1332 rc = 0;
1197 1333 break;
1198 1334 }
1199 1335 }
1200 1336
1201 1337 if (rc != 0)
1202 1338 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1203 1339
1204 1340 shutdown_in_progress = B_FALSE;
1205 1341
1206 1342 return (rc);
1207 1343 }
1208 1344
1209 1345
1210 1346
1211 1347 /*
1212 1348 * Generate AUE_zone_state for a command that boots a zone.
1213 1349 */
1214 1350 static void
1215 1351 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1216 1352 char *new_state)
1217 1353 {
1218 1354 adt_session_data_t *ah;
1219 1355 adt_event_data_t *event;
1220 1356 int pass_fail, fail_reason;
1221 1357
1222 1358 if (!adt_audit_enabled())
1223 1359 return;
1224 1360
1225 1361 if (return_val == 0) {
1226 1362 pass_fail = ADT_SUCCESS;
1227 1363 fail_reason = ADT_SUCCESS;
1228 1364 } else {
1229 1365 pass_fail = ADT_FAILURE;
1230 1366 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1231 1367 }
1232 1368
1233 1369 if (adt_start_session(&ah, NULL, 0)) {
1234 1370 zerror(zlogp, B_TRUE, gettext("audit failure."));
1235 1371 return;
1236 1372 }
1237 1373 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1238 1374 zerror(zlogp, B_TRUE, gettext("audit failure."));
1239 1375 (void) adt_end_session(ah);
1240 1376 return;
1241 1377 }
1242 1378
1243 1379 event = adt_alloc_event(ah, ADT_zone_state);
1244 1380 if (event == NULL) {
1245 1381 zerror(zlogp, B_TRUE, gettext("audit failure."));
1246 1382 (void) adt_end_session(ah);
1247 1383 return;
1248 1384 }
1249 1385 event->adt_zone_state.zonename = zone_name;
1250 1386 event->adt_zone_state.new_state = new_state;
1251 1387
1252 1388 if (adt_put_event(event, pass_fail, fail_reason))
1253 1389 zerror(zlogp, B_TRUE, gettext("audit failure."));
1254 1390
1255 1391 adt_free_event(event);
1256 1392
1257 1393 (void) adt_end_session(ah);
1258 1394 }
1259 1395
1260 1396 /*
1261 1397 * Log the exit time and status of the zone's init process into
1262 1398 * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
1263 1399 * be -1, otherwise it will be the exit status as described in wait.3c.
1264 1400 * If the zone is configured to restart init, then nothing will be logged if
1265 1401 * init exits unexpectedly (the kernel will never upcall in this case).
1266 1402 */
1267 1403 static void
1268 1404 log_init_exit(int status)
1269 1405 {
1270 1406 char p[MAXPATHLEN];
1271 1407 char buf[128];
1272 1408 struct timeval t;
1273 1409 int fd;
1274 1410
1275 1411 if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
1276 1412 return;
1277 1413 if (gettimeofday(&t, NULL) != 0)
1278 1414 return;
1279 1415 if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
1280 1416 status) > sizeof (buf))
1281 1417 return;
1282 1418 if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
1283 1419 return;
1284 1420
1285 1421 (void) write(fd, buf, strlen(buf));
1286 1422
1287 1423 (void) close(fd);
1288 1424 }
1289 1425
1290 1426 /*
1291 1427 * The main routine for the door server that deals with zone state transitions.
1292 1428 */
1293 1429 /* ARGSUSED */
1294 1430 static void
1295 1431 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1296 1432 uint_t n_desc)
1297 1433 {
1298 1434 ucred_t *uc = NULL;
1299 1435 const priv_set_t *eset;
1300 1436
1301 1437 zone_state_t zstate;
1302 1438 zone_cmd_t cmd;
1303 1439 int init_status;
1304 1440 zone_cmd_arg_t *zargp;
1305 1441
1306 1442 boolean_t kernelcall;
1307 1443
1308 1444 int rval = -1;
1309 1445 uint64_t uniqid;
1310 1446 zoneid_t zoneid = -1;
1311 1447 zlog_t zlog;
1312 1448 zlog_t *zlogp;
1313 1449 zone_cmd_rval_t *rvalp;
1314 1450 size_t rlen = getpagesize(); /* conservative */
1315 1451 fs_callback_t cb;
1316 1452 brand_handle_t bh;
1317 1453 boolean_t wait_shut = B_FALSE;
1318 1454
1319 1455 /* LINTED E_BAD_PTR_CAST_ALIGN */
1320 1456 zargp = (zone_cmd_arg_t *)args;
1321 1457
1322 1458 /*
1323 1459 * When we get the door unref message, we've fdetach'd the door, and
1324 1460 * it is time for us to shut down zoneadmd.
1325 1461 */
1326 1462 if (zargp == DOOR_UNREF_DATA) {
1327 1463 /*
1328 1464 * See comment at end of main() for info on the last rites.
1329 1465 */
1330 1466 exit(0);
1331 1467 }
1332 1468
1333 1469 if (zargp == NULL) {
1334 1470 (void) door_return(NULL, 0, 0, 0);
1335 1471 }
1336 1472
1337 1473 rvalp = alloca(rlen);
1338 1474 bzero(rvalp, rlen);
1339 1475 zlog.logfile = NULL;
1340 1476 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1341 1477 zlog.buf = rvalp->errbuf;
1342 1478 zlog.log = zlog.buf;
1343 1479 /* defer initialization of zlog.locale until after credential check */
1344 1480 zlogp = &zlog;
1345 1481
1346 1482 if (alen != sizeof (zone_cmd_arg_t)) {
1347 1483 /*
1348 1484 * This really shouldn't be happening.
1349 1485 */
1350 1486 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1351 1487 "unexpected (expected %d bytes)", alen,
1352 1488 sizeof (zone_cmd_arg_t));
1353 1489 goto out;
1354 1490 }
1355 1491 cmd = zargp->cmd;
1356 1492 init_status = zargp->status;
1357 1493
1358 1494 if (door_ucred(&uc) != 0) {
1359 1495 zerror(&logsys, B_TRUE, "door_ucred");
1360 1496 goto out;
1361 1497 }
1362 1498 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1363 1499 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1364 1500 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1365 1501 ucred_geteuid(uc) != 0)) {
1366 1502 zerror(&logsys, B_FALSE, "insufficient privileges");
1367 1503 goto out;
1368 1504 }
1369 1505
1370 1506 kernelcall = ucred_getpid(uc) == 0;
1371 1507
1372 1508 /*
1373 1509 * This is safe because we only use a zlog_t throughout the
1374 1510 * duration of a door call; i.e., by the time the pointer
1375 1511 * might become invalid, the door call would be over.
1376 1512 */
1377 1513 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1378 1514
1379 1515 (void) mutex_lock(&lock);
1380 1516
1381 1517 /*
1382 1518 * Once we start to really die off, we don't want more connections.
1383 1519 */
1384 1520 if (in_death_throes) {
1385 1521 (void) mutex_unlock(&lock);
1386 1522 ucred_free(uc);
1387 1523 (void) door_return(NULL, 0, 0, 0);
1388 1524 thr_exit(NULL);
1389 1525 }
1390 1526
1391 1527 /*
1392 1528 * Check for validity of command.
1393 1529 */
1394 1530 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1395 1531 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1396 1532 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1397 1533 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1398 1534 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1399 1535 goto out;
1400 1536 }
1401 1537
1402 1538 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1403 1539 /*
1404 1540 * Can't happen
1405 1541 */
1406 1542 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1407 1543 cmd);
1408 1544 goto out;
1409 1545 }
1410 1546 /*
1411 1547 * We ignore the possibility of someone calling zone_create(2)
1412 1548 * explicitly; all requests must come through zoneadmd.
1413 1549 */
1414 1550 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1415 1551 /*
1416 1552 * Something terribly wrong happened
1417 1553 */
1418 1554 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1419 1555 goto out;
1420 1556 }
1421 1557
1422 1558 if (kernelcall) {
1423 1559 /*
1424 1560 * Kernel-initiated requests may lose their validity if the
1425 1561 * zone_t the kernel was referring to has gone away.
1426 1562 */
1427 1563 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1428 1564 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1429 1565 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1430 1566 /*
1431 1567 * We're not talking about the same zone. The request
1432 1568 * must have arrived too late. Return error.
1433 1569 */
1434 1570 rval = -1;
1435 1571 goto out;
1436 1572 }
1437 1573 zlogp = &logsys; /* Log errors to syslog */
1438 1574 }
1439 1575
1440 1576 /*
1441 1577 * If we are being asked to forcibly mount or boot a zone, we
1442 1578 * pretend that an INCOMPLETE zone is actually INSTALLED.
1443 1579 */
1444 1580 if (zstate == ZONE_STATE_INCOMPLETE &&
1445 1581 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1446 1582 zstate = ZONE_STATE_INSTALLED;
1447 1583
1448 1584 switch (zstate) {
1449 1585 case ZONE_STATE_CONFIGURED:
1450 1586 case ZONE_STATE_INCOMPLETE:
1451 1587 /*
1452 1588 * Not our area of expertise; we just print a nice message
1453 1589 * and die off.
1454 1590 */
1455 1591 zerror(zlogp, B_FALSE,
|
↓ open down ↓ |
685 lines elided |
↑ open up ↑ |
1456 1592 "%s operation is invalid for zones in state '%s'",
1457 1593 z_cmd_name(cmd), zone_state_str(zstate));
1458 1594 break;
1459 1595
1460 1596 case ZONE_STATE_INSTALLED:
1461 1597 switch (cmd) {
1462 1598 case Z_READY:
1463 1599 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1464 1600 if (rval == 0)
1465 1601 eventstream_write(Z_EVT_ZONE_READIED);
1602 + zcons_statechanged();
1466 1603 break;
1467 1604 case Z_BOOT:
1468 1605 case Z_FORCEBOOT:
1469 1606 eventstream_write(Z_EVT_ZONE_BOOTING);
1470 1607 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1471 1608 == 0) {
1472 1609 rval = zone_bootup(zlogp, zargp->bootbuf,
1473 1610 zstate);
1474 1611 }
1475 1612 audit_put_record(zlogp, uc, rval, "boot");
1613 + zcons_statechanged();
1476 1614 if (rval != 0) {
1477 1615 bringup_failure_recovery = B_TRUE;
1478 1616 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1479 1617 zstate);
1480 1618 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1481 1619 }
1482 1620 break;
1483 1621 case Z_SHUTDOWN:
1484 1622 case Z_HALT:
1485 1623 if (kernelcall) /* Invalid; can't happen */
1486 1624 abort();
1487 1625 /*
1488 1626 * We could have two clients racing to halt this
1489 1627 * zone; the second client loses, but his request
1490 1628 * doesn't fail, since the zone is now in the desired
1491 1629 * state.
1492 1630 */
1493 1631 zerror(zlogp, B_FALSE, "zone is already halted");
1494 1632 rval = 0;
1495 1633 break;
1496 1634 case Z_REBOOT:
1497 1635 if (kernelcall) /* Invalid; can't happen */
1498 1636 abort();
1499 1637 zerror(zlogp, B_FALSE, "%s operation is invalid "
1500 1638 "for zones in state '%s'", z_cmd_name(cmd),
1501 1639 zone_state_str(zstate));
1502 1640 rval = -1;
1503 1641 break;
1504 1642 case Z_NOTE_UNINSTALLING:
1505 1643 if (kernelcall) /* Invalid; can't happen */
1506 1644 abort();
1507 1645 /*
1508 1646 * Tell the console to print out a message about this.
1509 1647 * Once it does, we will be in_death_throes.
1510 1648 */
1511 1649 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1512 1650 break;
1513 1651 case Z_MOUNT:
1514 1652 case Z_FORCEMOUNT:
1515 1653 if (kernelcall) /* Invalid; can't happen */
1516 1654 abort();
1517 1655 if (!zone_isnative && !zone_iscluster &&
1518 1656 !zone_islabeled) {
1519 1657 /*
1520 1658 * -U mounts the zone without lofs mounting
1521 1659 * zone file systems back into the scratch
1522 1660 * zone. This is required when mounting
1523 1661 * non-native branded zones.
1524 1662 */
1525 1663 (void) strlcpy(zargp->bootbuf, "-U",
1526 1664 BOOTARGS_MAX);
1527 1665 }
1528 1666
1529 1667 rval = zone_ready(zlogp,
1530 1668 strcmp(zargp->bootbuf, "-U") == 0 ?
1531 1669 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1532 1670 if (rval != 0)
1533 1671 break;
1534 1672
1535 1673 eventstream_write(Z_EVT_ZONE_READIED);
1536 1674
1537 1675 /*
1538 1676 * Get a handle to the default brand info.
1539 1677 * We must always use the default brand file system
1540 1678 * list when mounting the zone.
1541 1679 */
1542 1680 if ((bh = brand_open(default_brand)) == NULL) {
1543 1681 rval = -1;
1544 1682 break;
1545 1683 }
1546 1684
1547 1685 /*
1548 1686 * Get the list of filesystems to mount from
1549 1687 * the brand configuration. These mounts are done
1550 1688 * via a thread that will enter the zone, so they
1551 1689 * are done from within the context of the zone.
1552 1690 */
1553 1691 cb.zlogp = zlogp;
1554 1692 cb.zoneid = zone_id;
1555 1693 cb.mount_cmd = B_TRUE;
1556 1694 rval = brand_platform_iter_mounts(bh,
1557 1695 mount_early_fs, &cb);
1558 1696
1559 1697 brand_close(bh);
1560 1698
1561 1699 /*
1562 1700 * Ordinarily, /dev/fd would be mounted inside the zone
1563 1701 * by svc:/system/filesystem/usr:default, but since
1564 1702 * we're not booting the zone, we need to do this
1565 1703 * manually.
1566 1704 */
1567 1705 if (rval == 0)
1568 1706 rval = mount_early_fs(&cb,
1569 1707 "fd", "/dev/fd", "fd", NULL);
1570 1708 break;
1571 1709 case Z_UNMOUNT:
1572 1710 if (kernelcall) /* Invalid; can't happen */
1573 1711 abort();
1574 1712 zerror(zlogp, B_FALSE, "zone is already unmounted");
1575 1713 rval = 0;
1576 1714 break;
1577 1715 }
1578 1716 break;
1579 1717
1580 1718 case ZONE_STATE_READY:
1581 1719 switch (cmd) {
1582 1720 case Z_READY:
1583 1721 /*
1584 1722 * We could have two clients racing to ready this
1585 1723 * zone; the second client loses, but his request
1586 1724 * doesn't fail, since the zone is now in the desired
1587 1725 * state.
|
↓ open down ↓ |
102 lines elided |
↑ open up ↑ |
1588 1726 */
1589 1727 zerror(zlogp, B_FALSE, "zone is already ready");
1590 1728 rval = 0;
1591 1729 break;
1592 1730 case Z_BOOT:
1593 1731 (void) strlcpy(boot_args, zargp->bootbuf,
1594 1732 sizeof (boot_args));
1595 1733 eventstream_write(Z_EVT_ZONE_BOOTING);
1596 1734 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1597 1735 audit_put_record(zlogp, uc, rval, "boot");
1736 + zcons_statechanged();
1598 1737 if (rval != 0) {
1599 1738 bringup_failure_recovery = B_TRUE;
1600 1739 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1601 1740 zstate);
1602 1741 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1603 1742 }
1604 1743 boot_args[0] = '\0';
1605 1744 break;
1606 1745 case Z_HALT:
1607 1746 if (kernelcall) /* Invalid; can't happen */
1608 1747 abort();
1609 1748 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1610 1749 != 0)
1611 1750 break;
1751 + zcons_statechanged();
1612 1752 eventstream_write(Z_EVT_ZONE_HALTED);
1613 1753 break;
1614 1754 case Z_SHUTDOWN:
1615 1755 case Z_REBOOT:
1616 1756 case Z_NOTE_UNINSTALLING:
1617 1757 case Z_MOUNT:
1618 1758 case Z_UNMOUNT:
1619 1759 if (kernelcall) /* Invalid; can't happen */
1620 1760 abort();
1621 1761 zerror(zlogp, B_FALSE, "%s operation is invalid "
1622 1762 "for zones in state '%s'", z_cmd_name(cmd),
1623 1763 zone_state_str(zstate));
1624 1764 rval = -1;
1625 1765 break;
1626 1766 }
1627 1767 break;
1628 1768
1629 1769 case ZONE_STATE_MOUNTED:
1630 1770 switch (cmd) {
1631 1771 case Z_UNMOUNT:
1632 1772 if (kernelcall) /* Invalid; can't happen */
1633 1773 abort();
1634 1774 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1635 1775 if (rval == 0) {
1636 1776 eventstream_write(Z_EVT_ZONE_HALTED);
1637 1777 (void) sema_post(&scratch_sem);
1638 1778 }
1639 1779 break;
1640 1780 default:
1641 1781 if (kernelcall) /* Invalid; can't happen */
1642 1782 abort();
1643 1783 zerror(zlogp, B_FALSE, "%s operation is invalid "
1644 1784 "for zones in state '%s'", z_cmd_name(cmd),
1645 1785 zone_state_str(zstate));
1646 1786 rval = -1;
1647 1787 break;
1648 1788 }
|
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
1649 1789 break;
1650 1790
1651 1791 case ZONE_STATE_RUNNING:
1652 1792 case ZONE_STATE_SHUTTING_DOWN:
1653 1793 case ZONE_STATE_DOWN:
1654 1794 switch (cmd) {
1655 1795 case Z_READY:
1656 1796 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1657 1797 != 0)
1658 1798 break;
1799 + zcons_statechanged();
1659 1800 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1660 1801 eventstream_write(Z_EVT_ZONE_READIED);
1661 1802 else
1662 1803 eventstream_write(Z_EVT_ZONE_HALTED);
1663 1804 break;
1664 1805 case Z_BOOT:
1665 1806 /*
1666 1807 * We could have two clients racing to boot this
1667 1808 * zone; the second client loses, but his request
1668 1809 * doesn't fail, since the zone is now in the desired
1669 1810 * state.
1670 1811 */
1671 1812 zerror(zlogp, B_FALSE, "zone is already booted");
1672 1813 rval = 0;
1673 1814 break;
|
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
1674 1815 case Z_HALT:
1675 1816 if (kernelcall) {
1676 1817 log_init_exit(init_status);
1677 1818 } else {
1678 1819 log_init_exit(-1);
1679 1820 }
1680 1821 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1681 1822 != 0)
1682 1823 break;
1683 1824 eventstream_write(Z_EVT_ZONE_HALTED);
1825 + zcons_statechanged();
1684 1826 break;
1685 1827 case Z_REBOOT:
1686 1828 (void) strlcpy(boot_args, zargp->bootbuf,
1687 1829 sizeof (boot_args));
1688 1830 eventstream_write(Z_EVT_ZONE_REBOOTING);
1689 1831 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1690 1832 != 0) {
1691 1833 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1692 1834 boot_args[0] = '\0';
1693 1835 break;
1694 1836 }
1695 - if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1696 - != 0) {
1837 + zcons_statechanged();
1838 + if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) !=
1839 + 0) {
1697 1840 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1698 1841 boot_args[0] = '\0';
1699 1842 break;
1700 1843 }
1701 1844 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1702 1845 audit_put_record(zlogp, uc, rval, "reboot");
1703 1846 if (rval != 0) {
1704 1847 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1705 1848 zstate);
1706 1849 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1707 1850 }
1708 1851 boot_args[0] = '\0';
1709 1852 break;
1710 1853 case Z_SHUTDOWN:
1711 1854 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1712 1855 wait_shut = B_TRUE;
1713 1856 }
1714 1857 break;
1715 1858 case Z_NOTE_UNINSTALLING:
1716 1859 case Z_MOUNT:
1717 1860 case Z_UNMOUNT:
1718 1861 zerror(zlogp, B_FALSE, "%s operation is invalid "
1719 1862 "for zones in state '%s'", z_cmd_name(cmd),
1720 1863 zone_state_str(zstate));
1721 1864 rval = -1;
1722 1865 break;
1723 1866 }
1724 1867 break;
1725 1868 default:
1726 1869 abort();
1727 1870 }
1728 1871
1729 1872 /*
1730 1873 * Because the state of the zone may have changed, we make sure
1731 1874 * to wake the console poller, which is in charge of initiating
1732 1875 * the shutdown procedure as necessary.
1733 1876 */
1734 1877 eventstream_write(Z_EVT_NULL);
1735 1878
1736 1879 out:
1737 1880 (void) mutex_unlock(&lock);
1738 1881
1739 1882 /* Wait for the Z_SHUTDOWN commands to complete */
1740 1883 if (wait_shut)
1741 1884 rval = zone_wait_shutdown(zlogp);
1742 1885
1743 1886 if (kernelcall) {
1744 1887 rvalp = NULL;
1745 1888 rlen = 0;
1746 1889 } else {
1747 1890 rvalp->rval = rval;
1748 1891 }
1749 1892 if (uc != NULL)
1750 1893 ucred_free(uc);
1751 1894 (void) door_return((char *)rvalp, rlen, NULL, 0);
1752 1895 thr_exit(NULL);
1753 1896 }
1754 1897
1755 1898 static int
1756 1899 setup_door(zlog_t *zlogp)
1757 1900 {
1758 1901 if ((zone_door = door_create(server, NULL,
1759 1902 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1760 1903 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1761 1904 return (-1);
1762 1905 }
1763 1906 (void) fdetach(zone_door_path);
1764 1907
1765 1908 if (fattach(zone_door, zone_door_path) != 0) {
1766 1909 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1767 1910 (void) door_revoke(zone_door);
1768 1911 (void) fdetach(zone_door_path);
1769 1912 zone_door = -1;
1770 1913 return (-1);
1771 1914 }
1772 1915 return (0);
1773 1916 }
1774 1917
1775 1918 /*
1776 1919 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1777 1920 * is where zoneadmd itself will check to see that another instance of
1778 1921 * zoneadmd isn't already controlling this zone.
1779 1922 *
1780 1923 * The idea here is that we want to open the path to which we will
1781 1924 * attach our door, lock it, and then make sure that no-one has beat us
1782 1925 * to fattach(3c)ing onto it.
1783 1926 *
1784 1927 * fattach(3c) is really a mount, so there are actually two possible
1785 1928 * vnodes we could be dealing with. Our strategy is as follows:
1786 1929 *
1787 1930 * - If the file we opened is a regular file (common case):
1788 1931 * There is no fattach(3c)ed door, so we have a chance of becoming
1789 1932 * the managing zoneadmd. We attempt to lock the file: if it is
1790 1933 * already locked, that means someone else raced us here, so we
1791 1934 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1792 1935 * that beat us to it.
1793 1936 *
1794 1937 * - If the file we opened is a namefs file:
1795 1938 * This means there is already an established door fattach(3c)'ed
1796 1939 * to the rendezvous path. We've lost the race, so we give up.
1797 1940 * Note that in this case we also try to grab the file lock, and
1798 1941 * will succeed in acquiring it since the vnode locked by the
1799 1942 * "winning" zoneadmd was a regular one, and the one we locked was
1800 1943 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1801 1944 * we just return to zoneadm(1m) which knows to retry.
1802 1945 */
1803 1946 static int
1804 1947 make_daemon_exclusive(zlog_t *zlogp)
1805 1948 {
1806 1949 int doorfd = -1;
1807 1950 int err, ret = -1;
1808 1951 struct stat st;
1809 1952 struct flock flock;
1810 1953 zone_state_t zstate;
1811 1954
1812 1955 top:
1813 1956 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1814 1957 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1815 1958 zonecfg_strerror(err));
1816 1959 goto out;
1817 1960 }
1818 1961 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1819 1962 S_IREAD|S_IWRITE)) < 0) {
1820 1963 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1821 1964 goto out;
1822 1965 }
1823 1966 if (fstat(doorfd, &st) < 0) {
1824 1967 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1825 1968 goto out;
1826 1969 }
1827 1970 /*
1828 1971 * Lock the file to synchronize with other zoneadmd
1829 1972 */
1830 1973 flock.l_type = F_WRLCK;
1831 1974 flock.l_whence = SEEK_SET;
1832 1975 flock.l_start = (off_t)0;
1833 1976 flock.l_len = (off_t)0;
1834 1977 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1835 1978 /*
1836 1979 * Someone else raced us here and grabbed the lock file
1837 1980 * first. A warning here is inappropriate since nothing
1838 1981 * went wrong.
1839 1982 */
1840 1983 goto out;
1841 1984 }
1842 1985
1843 1986 if (strcmp(st.st_fstype, "namefs") == 0) {
1844 1987 struct door_info info;
1845 1988
1846 1989 /*
1847 1990 * There is already something fattach()'ed to this file.
1848 1991 * Lets see what the door is up to.
1849 1992 */
1850 1993 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1851 1994 /*
1852 1995 * Another zoneadmd process seems to be in
1853 1996 * control of the situation and we don't need to
1854 1997 * be here. A warning here is inappropriate
1855 1998 * since nothing went wrong.
1856 1999 *
1857 2000 * If the door has been revoked, the zoneadmd
1858 2001 * process currently managing the zone is going
1859 2002 * away. We'll return control to zoneadm(1m)
1860 2003 * which will try again (by which time zoneadmd
1861 2004 * will hopefully have exited).
1862 2005 */
1863 2006 goto out;
1864 2007 }
1865 2008
1866 2009 /*
1867 2010 * If we got this far, there's a fattach(3c)'ed door
1868 2011 * that belongs to a process that has exited, which can
1869 2012 * happen if the previous zoneadmd died unexpectedly.
1870 2013 *
1871 2014 * Let user know that something is amiss, but that we can
1872 2015 * recover; if the zone is in the installed state, then don't
1873 2016 * message, since having a running zoneadmd isn't really
1874 2017 * expected/needed. We want to keep occurences of this message
1875 2018 * limited to times when zoneadmd is picking back up from a
1876 2019 * zoneadmd that died while the zone was in some non-trivial
1877 2020 * state.
1878 2021 */
1879 2022 if (zstate > ZONE_STATE_INSTALLED) {
1880 2023 static zoneid_t zid;
1881 2024
1882 2025 zerror(zlogp, B_FALSE,
1883 2026 "zone '%s': WARNING: zone is in state '%s', but "
1884 2027 "zoneadmd does not appear to be available; "
1885 2028 "restarted zoneadmd to recover.",
1886 2029 zone_name, zone_state_str(zstate));
1887 2030
1888 2031 /*
1889 2032 * Startup a thread to perform the zfd logging/tty svc
1890 2033 * and a thread to perform memory capping for the
1891 2034 * zone. zlogp won't be valid for much longer so use
1892 2035 * logsys.
1893 2036 */
1894 2037 if ((zid = getzoneidbyname(zone_name)) != -1) {
1895 2038 create_log_thread(&logsys, zid);
1896 2039 create_mcap_thread(&logsys, zid);
1897 2040 }
1898 2041
1899 2042 /* recover the global configuration snapshot */
1900 2043 if (snap_hndl == NULL) {
1901 2044 if ((snap_hndl = zonecfg_init_handle())
1902 2045 == NULL ||
1903 2046 zonecfg_create_snapshot(zone_name)
1904 2047 != Z_OK ||
1905 2048 zonecfg_get_snapshot_handle(zone_name,
1906 2049 snap_hndl) != Z_OK) {
1907 2050 zerror(zlogp, B_FALSE, "recovering "
1908 2051 "zone configuration handle");
1909 2052 goto out;
1910 2053 }
1911 2054 }
1912 2055 }
1913 2056
1914 2057 (void) fdetach(zone_door_path);
1915 2058 (void) close(doorfd);
1916 2059 goto top;
1917 2060 }
1918 2061 ret = 0;
1919 2062 out:
1920 2063 (void) close(doorfd);
1921 2064 return (ret);
1922 2065 }
1923 2066
1924 2067 /*
1925 2068 * Setup the brand's pre and post state change callbacks, as well as the
1926 2069 * query callback, if any of these exist.
1927 2070 */
1928 2071 static int
1929 2072 brand_callback_init(brand_handle_t bh, char *zone_name)
1930 2073 {
1931 2074 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1932 2075 sizeof (pre_statechg_hook));
1933 2076
1934 2077 if (brand_get_prestatechange(bh, zone_name, zonepath,
1935 2078 pre_statechg_hook + EXEC_LEN,
1936 2079 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1937 2080 return (-1);
1938 2081
1939 2082 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1940 2083 pre_statechg_hook[0] = '\0';
1941 2084
1942 2085 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1943 2086 sizeof (post_statechg_hook));
1944 2087
1945 2088 if (brand_get_poststatechange(bh, zone_name, zonepath,
1946 2089 post_statechg_hook + EXEC_LEN,
1947 2090 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1948 2091 return (-1);
1949 2092
1950 2093 if (strlen(post_statechg_hook) <= EXEC_LEN)
1951 2094 post_statechg_hook[0] = '\0';
1952 2095
1953 2096 (void) strlcpy(query_hook, EXEC_PREFIX,
1954 2097 sizeof (query_hook));
1955 2098
1956 2099 if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
1957 2100 sizeof (query_hook) - EXEC_LEN) != 0)
1958 2101 return (-1);
1959 2102
1960 2103 if (strlen(query_hook) <= EXEC_LEN)
1961 2104 query_hook[0] = '\0';
1962 2105
1963 2106 return (0);
1964 2107 }
1965 2108
1966 2109 int
1967 2110 main(int argc, char *argv[])
1968 2111 {
1969 2112 int opt;
1970 2113 zoneid_t zid;
1971 2114 priv_set_t *privset;
1972 2115 zone_state_t zstate;
1973 2116 char parents_locale[MAXPATHLEN];
1974 2117 brand_handle_t bh;
1975 2118 int err;
1976 2119
1977 2120 pid_t pid;
1978 2121 sigset_t blockset;
1979 2122 sigset_t block_cld;
1980 2123
1981 2124 struct {
1982 2125 sema_t sem;
1983 2126 int status;
1984 2127 zlog_t log;
1985 2128 } *shstate;
1986 2129 size_t shstatelen = getpagesize();
1987 2130
1988 2131 zlog_t errlog;
1989 2132 zlog_t *zlogp;
1990 2133
1991 2134 int ctfd;
1992 2135
1993 2136 progname = get_execbasename(argv[0]);
1994 2137
1995 2138 /*
1996 2139 * Make sure stderr is unbuffered
1997 2140 */
1998 2141 (void) setbuffer(stderr, NULL, 0);
1999 2142
2000 2143 /*
2001 2144 * Get out of the way of mounted filesystems, since we will daemonize
2002 2145 * soon.
2003 2146 */
2004 2147 (void) chdir("/");
2005 2148
2006 2149 /*
2007 2150 * Use the default system umask per PSARC 1998/110 rather than
2008 2151 * anything that may have been set by the caller.
2009 2152 */
2010 2153 (void) umask(CMASK);
2011 2154
2012 2155 /*
2013 2156 * Initially we want to use our parent's locale.
2014 2157 */
2015 2158 (void) setlocale(LC_ALL, "");
2016 2159 (void) textdomain(TEXT_DOMAIN);
2017 2160 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
2018 2161 sizeof (parents_locale));
2019 2162
2020 2163 /*
2021 2164 * This zlog_t is used for writing to stderr
2022 2165 */
2023 2166 errlog.logfile = stderr;
2024 2167 errlog.buflen = errlog.loglen = 0;
2025 2168 errlog.buf = errlog.log = NULL;
2026 2169 errlog.locale = parents_locale;
2027 2170
2028 2171 /*
2029 2172 * We start off writing to stderr until we're ready to daemonize.
2030 2173 */
2031 2174 zlogp = &errlog;
2032 2175
2033 2176 /*
2034 2177 * Process options.
2035 2178 */
2036 2179 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
2037 2180 switch (opt) {
2038 2181 case 'R':
2039 2182 zonecfg_set_root(optarg);
2040 2183 break;
2041 2184 case 'z':
2042 2185 zone_name = optarg;
2043 2186 break;
2044 2187 default:
2045 2188 usage();
2046 2189 }
2047 2190 }
2048 2191
2049 2192 if (zone_name == NULL)
2050 2193 usage();
2051 2194
2052 2195 /*
2053 2196 * Because usage() prints directly to stderr, it has gettext()
2054 2197 * wrapping, which depends on the locale. But since zerror() calls
2055 2198 * localize() which tweaks the locale, it is not safe to call zerror()
2056 2199 * until after the last call to usage(). Fortunately, the last call
2057 2200 * to usage() is just above and the first call to zerror() is just
2058 2201 * below. Don't mess this up.
2059 2202 */
2060 2203 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
2061 2204 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
2062 2205 GLOBAL_ZONENAME);
2063 2206 return (1);
2064 2207 }
2065 2208
2066 2209 if (zone_get_id(zone_name, &zid) != 0) {
2067 2210 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
2068 2211 zonecfg_strerror(Z_NO_ZONE));
2069 2212 return (1);
2070 2213 }
2071 2214
2072 2215 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
2073 2216 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
2074 2217 zonecfg_strerror(err));
2075 2218 return (1);
2076 2219 }
2077 2220 if (zstate < ZONE_STATE_INCOMPLETE) {
2078 2221 zerror(zlogp, B_FALSE,
2079 2222 "cannot manage a zone which is in state '%s'",
2080 2223 zone_state_str(zstate));
2081 2224 return (1);
2082 2225 }
2083 2226
2084 2227 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
2085 2228 zerror(zlogp, B_FALSE, "unable to determine zone path");
2086 2229 return (-1);
2087 2230 }
2088 2231
2089 2232 if (zonecfg_default_brand(default_brand,
2090 2233 sizeof (default_brand)) != Z_OK) {
2091 2234 zerror(zlogp, B_FALSE, "unable to determine default brand");
2092 2235 return (1);
2093 2236 }
2094 2237
2095 2238 /* Get a handle to the brand info for this zone */
2096 2239 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
2097 2240 != Z_OK) {
2098 2241 zerror(zlogp, B_FALSE, "unable to determine zone brand");
2099 2242 return (1);
2100 2243 }
2101 2244 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
2102 2245 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
2103 2246
2104 2247 /*
2105 2248 * In the alternate root environment, the only supported
2106 2249 * operations are mount and unmount. In this case, just treat
2107 2250 * the zone as native if it is cluster. Cluster zones can be
2108 2251 * native for the purpose of LU or upgrade, and the cluster
2109 2252 * brand may not exist in the miniroot (such as in net install
2110 2253 * upgrade).
2111 2254 */
2112 2255 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
2113 2256 zone_iscluster = B_TRUE;
2114 2257 if (zonecfg_in_alt_root()) {
2115 2258 (void) strlcpy(brand_name, default_brand,
2116 2259 sizeof (brand_name));
2117 2260 }
2118 2261 } else {
2119 2262 zone_iscluster = B_FALSE;
2120 2263 }
2121 2264
2122 2265 if ((bh = brand_open(brand_name)) == NULL) {
2123 2266 zerror(zlogp, B_FALSE, "unable to open zone brand");
2124 2267 return (1);
2125 2268 }
2126 2269
2127 2270 /* Get state change brand hooks. */
2128 2271 if (brand_callback_init(bh, zone_name) == -1) {
2129 2272 zerror(zlogp, B_TRUE,
2130 2273 "failed to initialize brand state change hooks");
2131 2274 brand_close(bh);
2132 2275 return (1);
2133 2276 }
2134 2277
2135 2278 brand_close(bh);
2136 2279
2137 2280 /*
2138 2281 * Check that we have all privileges. It would be nice to pare
2139 2282 * this down, but this is at least a first cut.
2140 2283 */
2141 2284 if ((privset = priv_allocset()) == NULL) {
2142 2285 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2143 2286 return (1);
2144 2287 }
2145 2288
2146 2289 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2147 2290 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2148 2291 priv_freeset(privset);
2149 2292 return (1);
2150 2293 }
2151 2294
2152 2295 if (priv_isfullset(privset) == B_FALSE) {
2153 2296 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2154 2297 "run this command (all privs required)");
2155 2298 priv_freeset(privset);
2156 2299 return (1);
2157 2300 }
2158 2301 priv_freeset(privset);
2159 2302
2160 2303 if (mkzonedir(zlogp) != 0)
2161 2304 return (1);
2162 2305
2163 2306 /*
2164 2307 * Pre-fork: setup shared state
2165 2308 */
2166 2309 if ((shstate = (void *)mmap(NULL, shstatelen,
2167 2310 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2168 2311 MAP_FAILED) {
2169 2312 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2170 2313 return (1);
2171 2314 }
2172 2315 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2173 2316 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2174 2317 (void) munmap((char *)shstate, shstatelen);
2175 2318 return (1);
2176 2319 }
2177 2320 shstate->log.logfile = NULL;
2178 2321 shstate->log.buflen = shstatelen - sizeof (*shstate);
2179 2322 shstate->log.loglen = shstate->log.buflen;
2180 2323 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2181 2324 shstate->log.log = shstate->log.buf;
2182 2325 shstate->log.locale = parents_locale;
2183 2326 shstate->status = -1;
2184 2327
2185 2328 /*
2186 2329 * We need a SIGCHLD handler so the sema_wait() below will wake
2187 2330 * up if the child dies without doing a sema_post().
2188 2331 */
2189 2332 (void) sigset(SIGCHLD, sigchld);
2190 2333 /*
2191 2334 * We must mask SIGCHLD until after we've coped with the fork
2192 2335 * sufficiently to deal with it; otherwise we can race and
2193 2336 * receive the signal before pid has been initialized
2194 2337 * (yes, this really happens).
2195 2338 */
2196 2339 (void) sigemptyset(&block_cld);
2197 2340 (void) sigaddset(&block_cld, SIGCHLD);
2198 2341 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2199 2342
2200 2343 /*
2201 2344 * The parent only needs stderr after the fork, so close other fd's
2202 2345 * that we inherited from zoneadm so that the parent doesn't have those
2203 2346 * open while waiting. The child will close the rest after the fork.
2204 2347 */
2205 2348 closefrom(3);
2206 2349
2207 2350 if ((ctfd = init_template()) == -1) {
2208 2351 zerror(zlogp, B_TRUE, "failed to create contract");
2209 2352 return (1);
2210 2353 }
2211 2354
2212 2355 /*
2213 2356 * Do not let another thread localize a message while we are forking.
2214 2357 */
2215 2358 (void) mutex_lock(&msglock);
2216 2359 pid = fork();
2217 2360 (void) mutex_unlock(&msglock);
2218 2361
2219 2362 /*
2220 2363 * In all cases (parent, child, and in the event of an error) we
2221 2364 * don't want to cause creation of contracts on subsequent fork()s.
2222 2365 */
2223 2366 (void) ct_tmpl_clear(ctfd);
2224 2367 (void) close(ctfd);
2225 2368
2226 2369 if (pid == -1) {
2227 2370 zerror(zlogp, B_TRUE, "could not fork");
2228 2371 return (1);
2229 2372
2230 2373 } else if (pid > 0) { /* parent */
2231 2374 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2232 2375 /*
2233 2376 * This marks a window of vulnerability in which we receive
2234 2377 * the SIGCLD before falling into sema_wait (normally we would
2235 2378 * get woken up from sema_wait with EINTR upon receipt of
2236 2379 * SIGCLD). So we may need to use some other scheme like
2237 2380 * sema_posting in the sigcld handler.
2238 2381 * blech
2239 2382 */
2240 2383 (void) sema_wait(&shstate->sem);
2241 2384 (void) sema_destroy(&shstate->sem);
2242 2385 if (shstate->status != 0)
2243 2386 (void) waitpid(pid, NULL, WNOHANG);
2244 2387 /*
2245 2388 * It's ok if we die with SIGPIPE. It's not like we could have
2246 2389 * done anything about it.
2247 2390 */
2248 2391 (void) fprintf(stderr, "%s", shstate->log.buf);
2249 2392 _exit(shstate->status == 0 ? 0 : 1);
2250 2393 }
2251 2394
2252 2395 /*
2253 2396 * The child charges on.
2254 2397 */
2255 2398 (void) sigset(SIGCHLD, SIG_DFL);
2256 2399 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2257 2400
2258 2401 /*
2259 2402 * SIGPIPE can be delivered if we write to a socket for which the
2260 2403 * peer endpoint is gone. That can lead to too-early termination
2261 2404 * of zoneadmd, and that's not good eats.
2262 2405 */
2263 2406 (void) sigset(SIGPIPE, SIG_IGN);
2264 2407 /*
2265 2408 * Stop using stderr
2266 2409 */
2267 2410 zlogp = &shstate->log;
2268 2411
2269 2412 /*
2270 2413 * We don't need stdout/stderr from now on.
2271 2414 */
2272 2415 closefrom(0);
2273 2416
2274 2417 /*
2275 2418 * Initialize the syslog zlog_t. This needs to be done after
2276 2419 * the call to closefrom().
2277 2420 */
2278 2421 logsys.buf = logsys.log = NULL;
2279 2422 logsys.buflen = logsys.loglen = 0;
2280 2423 logsys.logfile = NULL;
2281 2424 logsys.locale = DEFAULT_LOCALE;
2282 2425
2283 2426 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2284 2427
2285 2428 /*
2286 2429 * The eventstream is used to publish state changes in the zone
2287 2430 * from the door threads to the console I/O poller.
2288 2431 */
2289 2432 if (eventstream_init() == -1) {
2290 2433 zerror(zlogp, B_TRUE, "unable to create eventstream");
2291 2434 goto child_out;
2292 2435 }
2293 2436
2294 2437 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2295 2438 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2296 2439
2297 2440 /*
2298 2441 * See if another zoneadmd is running for this zone. If not, then we
2299 2442 * can now modify system state.
2300 2443 */
2301 2444 if (make_daemon_exclusive(zlogp) == -1)
2302 2445 goto child_out;
2303 2446
2304 2447
2305 2448 /*
2306 2449 * Create/join a new session; we need to be careful of what we do with
2307 2450 * the console from now on so we don't end up being the session leader
2308 2451 * for the terminal we're going to be handing out.
2309 2452 */
2310 2453 (void) setsid();
2311 2454
2312 2455 /*
2313 2456 * This thread shouldn't be receiving any signals; in particular,
2314 2457 * SIGCHLD should be received by the thread doing the fork().
2315 2458 */
2316 2459 (void) sigfillset(&blockset);
2317 2460 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2318 2461
2319 2462 /*
2320 2463 * Setup the console device and get ready to serve the console;
2321 2464 * once this has completed, we're ready to let console clients
2322 2465 * make an attempt to connect (they will block until
2323 2466 * serve_console_sock() below gets called, and any pending
2324 2467 * connection is accept()ed).
2325 2468 */
2326 2469 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2327 2470 goto child_out;
2328 2471
2329 2472 /*
2330 2473 * Take the lock now, so that when the door server gets going, we
2331 2474 * are guaranteed that it won't take a request until we are sure
2332 2475 * that everything is completely set up. See the child_out: label
2333 2476 * below to see why this matters.
2334 2477 */
2335 2478 (void) mutex_lock(&lock);
2336 2479
2337 2480 /* Init semaphore for scratch zones. */
2338 2481 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2339 2482 zerror(zlogp, B_TRUE,
2340 2483 "failed to initialize semaphore for scratch zone");
2341 2484 goto child_out;
2342 2485 }
2343 2486
2344 2487 /* open the dladm handle */
2345 2488 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2346 2489 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2347 2490 goto child_out;
2348 2491 }
2349 2492
2350 2493 /*
2351 2494 * Note: door setup must occur *after* the console is setup.
2352 2495 * This is so that as zlogin tests the door to see if zoneadmd
2353 2496 * is ready yet, we know that the console will get serviced
2354 2497 * once door_info() indicates that the door is "up".
2355 2498 */
2356 2499 if (setup_door(zlogp) == -1)
2357 2500 goto child_out;
2358 2501
2359 2502 /*
2360 2503 * Things seem OK so far; tell the parent process that we're done
2361 2504 * with setup tasks. This will cause the parent to exit, signalling
2362 2505 * to zoneadm, zlogin, or whatever forked it that we are ready to
2363 2506 * service requests.
2364 2507 */
2365 2508 shstate->status = 0;
2366 2509 (void) sema_post(&shstate->sem);
2367 2510 (void) munmap((char *)shstate, shstatelen);
2368 2511 shstate = NULL;
2369 2512
2370 2513 (void) mutex_unlock(&lock);
2371 2514
2372 2515 /*
2373 2516 * zlogp is now invalid, so reset it to the syslog logger.
2374 2517 */
2375 2518 zlogp = &logsys;
2376 2519
2377 2520 /*
2378 2521 * Now that we are free of any parents, switch to the default locale.
2379 2522 */
2380 2523 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2381 2524
2382 2525 /*
2383 2526 * At this point the setup portion of main() is basically done, so
2384 2527 * we reuse this thread to manage the zone console. When
2385 2528 * serve_console() has returned, we are past the point of no return
2386 2529 * in the life of this zoneadmd.
2387 2530 */
2388 2531 if (zonecfg_in_alt_root()) {
2389 2532 /*
2390 2533 * This is just awful, but mounted scratch zones don't (and
2391 2534 * can't) have consoles. We just wait for unmount instead.
2392 2535 */
2393 2536 while (sema_wait(&scratch_sem) == EINTR)
2394 2537 ;
2395 2538 } else {
2396 2539 serve_console(zlogp);
2397 2540 assert(in_death_throes);
2398 2541 }
2399 2542
2400 2543 /*
2401 2544 * This is the next-to-last part of the exit interlock. Upon calling
2402 2545 * fdetach(), the door will go unreferenced; once any
2403 2546 * outstanding requests (like the door thread doing Z_HALT) are
2404 2547 * done, the door will get an UNREF notification; when it handles
2405 2548 * the UNREF, the door server will cause the exit. It's possible
2406 2549 * that fdetach() can fail because the file is in use, in which
2407 2550 * case we'll retry the operation.
2408 2551 */
2409 2552 assert(!MUTEX_HELD(&lock));
2410 2553 for (;;) {
2411 2554 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2412 2555 break;
2413 2556 yield();
2414 2557 }
2415 2558
2416 2559 for (;;)
2417 2560 (void) pause();
2418 2561
2419 2562 child_out:
2420 2563 assert(pid == 0);
2421 2564 if (shstate != NULL) {
2422 2565 shstate->status = -1;
2423 2566 (void) sema_post(&shstate->sem);
2424 2567 (void) munmap((char *)shstate, shstatelen);
2425 2568 }
2426 2569
2427 2570 /*
2428 2571 * This might trigger an unref notification, but if so,
2429 2572 * we are still holding the lock, so our call to exit will
2430 2573 * ultimately win the race and will publish the right exit
2431 2574 * code.
2432 2575 */
2433 2576 if (zone_door != -1) {
2434 2577 assert(MUTEX_HELD(&lock));
2435 2578 (void) door_revoke(zone_door);
2436 2579 (void) fdetach(zone_door_path);
2437 2580 }
2438 2581
2439 2582 if (dld_handle != NULL)
2440 2583 dladm_close(dld_handle);
2441 2584
2442 2585 return (1); /* return from main() forcibly exits an MT process */
2443 2586 }
|
↓ open down ↓ |
737 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX