1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2016 Joyent, Inc. All rights reserved.
26 */
27
28 /*
29 * Zone file descriptor support is used as a mechanism for a process inside the
30 * zone to log messages to the GZ zoneadmd and also as a way to interact
31 * directly with the process (via zlogin -I). The zfd thread is modeled on
32 * the zcons thread so see the comment header in zcons.c for a general overview.
33 * Unlike with zcons, which has a single endpoint within the zone and a single
34 * endpoint used by zoneadmd, we setup multiple endpoints within the zone.
35 *
36 * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
37 * of a misnomer since its purpose has evolved. The attribute currently
38 * can have six values which are used to control:
39 * - how the zfd devices are used inside the zone
40 * - if the output on the device(s) is also teed into another stream within
41 * the zone
42 * - if we do logging in the GZ
43 * See the comment on get_mode_logmax() in this file, and the comment in
44 * uts/common/io/zfd.c for more details.
45 *
46 * Internally the zfd_mode_t struct holds the number of stdio devs (1 or 3),
47 * the number of additional devs corresponding to the zone attr value and the
48 * GZ logging flag.
49 *
50 * Note that although the mode indicates the number of devices needed, we always
51 * create all possible zfd devices for simplicity.
52 */
53
54 #include <sys/types.h>
55 #include <sys/socket.h>
56 #include <sys/stat.h>
57 #include <sys/termios.h>
58 #include <sys/zfd.h>
59 #include <sys/mkdev.h>
60
61 #include <assert.h>
62 #include <ctype.h>
63 #include <errno.h>
64 #include <fcntl.h>
65 #include <stdarg.h>
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <strings.h>
69 #include <stropts.h>
70 #include <thread.h>
71 #include <ucred.h>
72 #include <unistd.h>
73 #include <zone.h>
74 #include <signal.h>
75 #include <wchar.h>
76
77 #include <libdevinfo.h>
78 #include <libdevice.h>
79 #include <libzonecfg.h>
80
81 #include <syslog.h>
82 #include <sys/modctl.h>
83
84 #include "zoneadmd.h"
85
86 static zlog_t *zlogp;
87 static int shutting_down = 0;
88 static thread_t logger_tid;
89 static int logfd = -1;
90 static size_t log_sz = 0;
91 static size_t log_rot_sz = 0;
92
93 static void rotate_log();
94
95 /*
96 * The eventstream is a simple one-directional flow of messages implemented
97 * with a pipe. It is used to wake up the poller when it needs to shutdown.
98 */
99 static int eventstream[2] = {-1, -1};
100
101 #define LOGNAME "stdio.log"
102 #define ZLOG_MODE "zlog-mode"
103 #define LOG_MAXSZ "zlog-max-size"
104 #define ZFDNEX_DEVTREEPATH "/pseudo/zfdnex@2"
105 #define ZFDNEX_FILEPATH "/devices/pseudo/zfdnex@2"
106 #define SERVER_SOCKPATH ZONES_TMPDIR "/%s.server_%s"
107 #define ZTTY_RETRY 5
108
109 #define NUM_ZFD_DEVS 5
110
111 typedef struct zfd_mode {
112 uint_t zmode_n_stddevs;
113 uint_t zmode_n_addl_devs;
114 boolean_t zmode_gzlogging;
115 } zfd_mode_t;
116 static zfd_mode_t mode;
117
118 /*
119 * cb_data is only used by destroy_cb.
120 */
121 struct cb_data {
122 zlog_t *zlogp;
123 int killed;
124 };
125
126 /*
127 * destroy_zfd_devs() and its helper destroy_cb() tears down any zfd instances
128 * associated with this zone. If things went very wrong, we might have an
129 * incorrect number of instances hanging around. This routine hunts down and
130 * tries to remove all of them. Of course, if the fd is open, the instance will
131 * not detach, which is a potential issue.
132 */
133 static int
134 destroy_cb(di_node_t node, void *arg)
135 {
136 struct cb_data *cb = (struct cb_data *)arg;
137 char *prop_data;
138 char *tmp;
139 char devpath[MAXPATHLEN];
140 devctl_hdl_t hdl;
141
142 if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname",
143 &prop_data) == -1)
144 return (DI_WALK_CONTINUE);
145
146 assert(prop_data != NULL);
147 if (strcmp(prop_data, zone_name) != 0) {
148 /* this is a zfd for a different zone */
149 return (DI_WALK_CONTINUE);
150 }
151
152 tmp = di_devfs_path(node);
153 (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp);
154 di_devfs_path_free(tmp);
155
156 if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) {
157 zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, "
158 "but it could not be controlled.", devpath);
159 return (DI_WALK_CONTINUE);
160 }
161 if (devctl_device_remove(hdl) == 0) {
162 cb->killed++;
163 } else {
164 zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, "
165 "but it could not be removed.", devpath);
166 }
167 devctl_release(hdl);
168 return (DI_WALK_CONTINUE);
169 }
170
171 static int
172 destroy_zfd_devs(zlog_t *zlogp)
173 {
174 di_node_t root;
175 struct cb_data cb;
176
177 bzero(&cb, sizeof (cb));
178 cb.zlogp = zlogp;
179
180 if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) {
181 zerror(zlogp, B_TRUE, "di_init failed");
182 return (-1);
183 }
184
185 (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb);
186
187 di_fini(root);
188 return (0);
189 }
190
191 static void
192 make_tty(zlog_t *zlogp, int id)
193 {
194 int i;
195 int fd = -1;
196 char stdpath[MAXPATHLEN];
197
198 /*
199 * Open the master side of the dev and issue the ZFD_MAKETTY ioctl,
200 * which will cause the the various tty-related streams modules to be
201 * pushed when the slave opens the device.
202 *
203 * In very rare cases the open returns ENOENT if devfs doesn't have
204 * everything setup yet due to heavy zone startup load. Wait for
205 * 1 sec. and retry a few times. Even if we can't setup tty mode
206 * we still move on.
207 */
208 (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d",
209 zone_name, id);
210
211 for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) {
212 fd = open(stdpath, O_RDWR | O_NOCTTY);
213 if (fd >= 0 || errno != ENOENT)
214 break;
215 (void) sleep(1);
216 }
217 if (fd == -1) {
218 zerror(zlogp, B_TRUE, "ERROR: could not open zfd %d for "
219 "zone %s to set tty mode", id, zone_name);
220 } else {
221 /*
222 * This ioctl can occasionally return ENXIO if devfs doesn't
223 * have everything plumbed up yet due to heavy zone startup
224 * load. Wait for 1 sec. and retry a few times before we give
225 * up.
226 */
227 for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) {
228 if (ioctl(fd, ZFD_MAKETTY) == 0) {
229 break;
230 } else if (errno != ENXIO) {
231 break;
232 }
233 (void) sleep(1);
234 }
235 }
236
237 if (fd != -1)
238 (void) close(fd);
239 }
240
241 /*
242 * init_zfd_devs() drives the device-tree configuration of the zone fd devices.
243 * The general strategy is to use the libdevice (devctl) interfaces to
244 * instantiate all of new zone fd nodes. We do a lot of sanity checking, and
245 * are careful to reuse a dev if one exists.
246 *
247 * Once the devices are in the device tree, we kick devfsadm via
248 * di_devlink_init() to ensure that the appropriate symlinks (to the master and
249 * slave fd devices) are placed in /dev in the global zone.
250 */
251 static int
252 init_zfd_dev(zlog_t *zlogp, devctl_hdl_t bus_hdl, int id)
253 {
254 int rv = -1;
255 devctl_ddef_t ddef_hdl = NULL;
256 devctl_hdl_t dev_hdl = NULL;
257
258 if ((ddef_hdl = devctl_ddef_alloc("zfd", 0)) == NULL) {
259 zerror(zlogp, B_TRUE, "failed to allocate ddef handle");
260 goto error;
261 }
262
263 /*
264 * Set four properties on this node; the name of the zone, the dev name
265 * seen inside the zone, a flag which lets pseudo know that it is OK to
266 * automatically allocate an instance # for this device, and the last
267 * one tells the device framework not to auto-detach this node - we
268 * need the node to still be there when we ask devfsadmd to make links,
269 * and when we need to open it.
270 */
271 if (devctl_ddef_string(ddef_hdl, "zfd_zname", zone_name) == -1) {
272 zerror(zlogp, B_TRUE, "failed to create zfd_zname property");
273 goto error;
274 }
275 if (devctl_ddef_int(ddef_hdl, "zfd_id", id) == -1) {
276 zerror(zlogp, B_TRUE, "failed to create zfd_id property");
277 goto error;
278 }
279 if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) {
280 zerror(zlogp, B_TRUE, "failed to create auto-assign-instance "
281 "property");
282 goto error;
283 }
284 if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) {
285 zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach "
286 "property");
287 goto error;
288 }
289 if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) {
290 zerror(zlogp, B_TRUE, "failed to create zfd node");
291 goto error;
292 }
293 rv = 0;
294
295 error:
296 if (ddef_hdl)
297 devctl_ddef_free(ddef_hdl);
298 if (dev_hdl)
299 devctl_release(dev_hdl);
300 return (rv);
301 }
302
303 static int
304 init_zfd_devs(zlog_t *zlogp, zfd_mode_t *mode)
305 {
306 devctl_hdl_t bus_hdl = NULL;
307 di_devlink_handle_t dl = NULL;
308 int rv = -1;
309 int i;
310
311 /*
312 * Time to make the devices.
313 */
314 if ((bus_hdl = devctl_bus_acquire(ZFDNEX_FILEPATH, 0)) == NULL) {
315 zerror(zlogp, B_TRUE, "devctl_bus_acquire failed");
316 goto error;
317 }
318
319 for (i = 0; i < NUM_ZFD_DEVS; i++) {
320 if (init_zfd_dev(zlogp, bus_hdl, i) != 0)
321 goto error;
322 }
323
324 if ((dl = di_devlink_init("zfd", DI_MAKE_LINK)) == NULL) {
325 zerror(zlogp, B_TRUE, "failed to create devlinks");
326 goto error;
327 }
328
329 (void) di_devlink_fini(&dl);
330 rv = 0;
331
332 if (mode->zmode_n_stddevs == 1) {
333 /* We want the primary stream to look like a tty. */
334 make_tty(zlogp, 0);
335 }
336
337 error:
338 if (bus_hdl)
339 devctl_release(bus_hdl);
340 return (rv);
341 }
342
343 static int
344 init_server_sock(zlog_t *zlogp, int *servfd, char *nm)
345 {
346 int resfd = -1;
347 struct sockaddr_un servaddr;
348
349 bzero(&servaddr, sizeof (servaddr));
350 servaddr.sun_family = AF_UNIX;
351 (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
352 SERVER_SOCKPATH, zone_name, nm);
353
354 if ((resfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
355 zerror(zlogp, B_TRUE, "server setup: could not create socket");
356 goto err;
357 }
358 (void) unlink(servaddr.sun_path);
359
360 if (bind(resfd, (struct sockaddr *)&servaddr, sizeof (servaddr))
361 == -1) {
362 zerror(zlogp, B_TRUE,
363 "server setup: could not bind to socket");
364 goto err;
365 }
366
367 if (listen(resfd, 4) == -1) {
368 zerror(zlogp, B_TRUE,
369 "server setup: could not listen on socket");
370 goto err;
371 }
372
373 *servfd = resfd;
374 return (0);
375
376 err:
377 (void) unlink(servaddr.sun_path);
378 if (resfd != -1)
379 (void) close(resfd);
380 return (-1);
381 }
382
383 static void
384 destroy_server_sock(int servfd, char *nm)
385 {
386 char path[MAXPATHLEN];
387
388 (void) snprintf(path, sizeof (path), SERVER_SOCKPATH, zone_name, nm);
389 (void) unlink(path);
390 (void) shutdown(servfd, SHUT_RDWR);
391 (void) close(servfd);
392 }
393
394 /*
395 * Read the "ident" string from the client's descriptor; this routine also
396 * tolerates being called with pid=NULL, for times when you want to "eat"
397 * the ident string from a client without saving it.
398 */
399 static int
400 get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
401 uint_t *flagsp)
402 {
403 char buf[BUFSIZ], *bufp;
404 size_t buflen = sizeof (buf);
405 char c = '\0';
406 int i = 0, r;
407 ucred_t *cred = NULL;
408
409 /* "eat up the ident string" case, for simplicity */
410 if (pid == NULL) {
411 assert(locale == NULL && locale_len == 0);
412 while (read(clifd, &c, 1) == 1) {
413 if (c == '\n')
414 return (0);
415 }
416 }
417
418 bzero(buf, sizeof (buf));
419 while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) {
420 buflen--;
421 if (c == '\n')
422 break;
423
424 buf[i] = c;
425 i++;
426 }
427 if (r == -1)
428 return (-1);
429
430 /*
431 * We've filled the buffer, but still haven't seen \n. Keep eating
432 * until we find it; we don't expect this to happen, but this is
433 * defensive.
434 */
435 if (c != '\n') {
436 while ((r = read(clifd, &c, sizeof (c))) > 0)
437 if (c == '\n')
438 break;
439 }
440
441 /*
442 * Parse buffer for message of the form:
443 * IDENT <locale> <flags>
444 */
445 bufp = buf;
446 if (strncmp(bufp, "IDENT ", 6) != 0)
447 return (-1);
448 bufp += 6;
449
450 if (getpeerucred(clifd, &cred) == 0) {
451 *pid = ucred_getpid((const ucred_t *)cred);
452 ucred_free(cred);
453 } else {
454 return (-1);
455 }
456
457 while (*bufp != '\0' && isspace(*bufp))
458 bufp++;
459 buflen = strlen(bufp) - 1;
460 bufp[buflen - 1] = '\0';
461 (void) strlcpy(locale, bufp, locale_len);
462
463 *flagsp = atoi(&bufp[buflen]);
464
465 return (0);
466 }
467
468 static int
469 accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len,
470 uint_t *flagsp)
471 {
472 int connfd;
473 struct sockaddr_un cliaddr;
474 socklen_t clilen;
475 int flags;
476
477 clilen = sizeof (cliaddr);
478 connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
479 if (connfd == -1)
480 return (-1);
481 if (pid != NULL) {
482 if (get_client_ident(connfd, pid, locale, locale_len, flagsp)
483 == -1) {
484 (void) shutdown(connfd, SHUT_RDWR);
485 (void) close(connfd);
486 return (-1);
487 }
488 (void) write(connfd, "OK\n", 3);
489 }
490
491 flags = fcntl(connfd, F_GETFL, 0);
492 if (flags != -1)
493 (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC);
494
495 return (connfd);
496 }
497
498 static void
499 reject_client(int servfd, pid_t clientpid)
500 {
501 int connfd;
502 struct sockaddr_un cliaddr;
503 socklen_t clilen;
504 char nak[MAXPATHLEN];
505
506 clilen = sizeof (cliaddr);
507 connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
508
509 /*
510 * After getting its ident string, tell client to get lost.
511 */
512 if (get_client_ident(connfd, NULL, NULL, 0, NULL) == 0) {
513 (void) snprintf(nak, sizeof (nak), "%lu\n",
514 clientpid);
515 (void) write(connfd, nak, strlen(nak));
516 }
517 (void) shutdown(connfd, SHUT_RDWR);
518 (void) close(connfd);
519 }
520
521 static int
522 accept_socket(int servfd, pid_t verpid)
523 {
524 int connfd;
525 struct sockaddr_un cliaddr;
526 socklen_t clilen = sizeof (cliaddr);
527 ucred_t *cred = NULL;
528 pid_t rpid = -1;
529 int flags;
530
531 connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
532 if (connfd == -1)
533 return (-1);
534
535 /* Confirm connecting process is who we expect */
536 if (getpeerucred(connfd, &cred) == 0) {
537 rpid = ucred_getpid((const ucred_t *)cred);
538 ucred_free(cred);
539 }
540 if (rpid == -1 || rpid != verpid) {
541 (void) shutdown(connfd, SHUT_RDWR);
542 (void) close(connfd);
543 return (-1);
544 }
545
546 flags = fcntl(connfd, F_GETFL, 0);
547 if (flags != -1)
548 (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC);
549
550 return (connfd);
551 }
552
553 static void
554 ctlcmd_process(int sockfd, int stdoutfd, unsigned int *flags)
555 {
556 char buf[BUFSIZ];
557 int i;
558 for (i = 0; i < BUFSIZ-1; i++) {
559 char c;
560 if (read(sockfd, &c, 1) != 1 ||
561 c == '\n' || c == '\0') {
562 break;
563 }
564 buf[i] = c;
565 }
566 if (i == 0) {
567 goto fail;
568 }
569 buf[i+1] = '\0';
570
571 if (strncmp(buf, "TIOCSWINSZ ", 11) == 0) {
572 char *next = buf + 11;
573 struct winsize ws;
574 errno = 0;
575 ws.ws_row = strtol(next, &next, 10);
576 if (errno == EINVAL) {
577 goto fail;
578 }
579 ws.ws_col = strtol(next + 1, &next, 10);
580 if (errno == EINVAL) {
581 goto fail;
582 }
583 if (ioctl(stdoutfd, TIOCSWINSZ, &ws) == 0) {
584 (void) write(sockfd, "OK\n", 3);
585 return;
586 }
587 }
588 if (strncmp(buf, "SETFLAGS ", 9) == 0) {
589 char *next = buf + 9;
590 unsigned int result;
591 errno = 0;
592 result = strtoul(next, &next, 10);
593 if (errno == EINVAL) {
594 goto fail;
595 }
596 *flags = result;
597 (void) write(sockfd, "OK\n", 3);
598 return;
599 }
600 fail:
601 (void) write(sockfd, "FAIL\n", 5);
602 }
603
604 /*
605 * Check to see if the client at the other end of the socket is still alive; we
606 * know it is not if it throws EPIPE at us when we try to write an otherwise
607 * harmless 0-length message to it.
608 */
609 static int
610 test_client(int clifd)
611 {
612 if ((write(clifd, "", 0) == -1) && errno == EPIPE)
613 return (-1);
614 return (0);
615 }
616
617 /*
618 * Modify the input string with json escapes. Since the destination can thus
619 * be larger than the source, it may get truncated, although we do use a
620 * larger buffer.
621 */
622 static void
623 escape_json(char *sbuf, int slen, char *dbuf, int dlen)
624 {
625 int i;
626 mbstate_t mbr;
627 wchar_t c;
628 size_t sz;
629
630 bzero(&mbr, sizeof (mbr));
631
632 sbuf[slen] = '\0';
633 i = 0;
634 while (i < dlen && (sz = mbrtowc(&c, sbuf, MB_CUR_MAX, &mbr)) > 0) {
635 switch (c) {
636 case '\\':
637 dbuf[i++] = '\\';
638 dbuf[i++] = '\\';
639 break;
640
641 case '"':
642 dbuf[i++] = '\\';
643 dbuf[i++] = '"';
644 break;
645
646 case '\b':
647 dbuf[i++] = '\\';
648 dbuf[i++] = 'b';
649 break;
650
651 case '\f':
652 dbuf[i++] = '\\';
653 dbuf[i++] = 'f';
654 break;
655
656 case '\n':
657 dbuf[i++] = '\\';
658 dbuf[i++] = 'n';
659 break;
660
661 case '\r':
662 dbuf[i++] = '\\';
663 dbuf[i++] = 'r';
664 break;
665
666 case '\t':
667 dbuf[i++] = '\\';
668 dbuf[i++] = 't';
669 break;
670
671 default:
672 if ((c >= 0x00 && c <= 0x1f) ||
673 (c > 0x7f && c <= 0xffff)) {
674
675 i += snprintf(&dbuf[i], (dlen - i), "\\u%04x",
676 (int)(0xffff & c));
677 } else if (c >= 0x20 && c <= 0x7f) {
678 dbuf[i++] = 0xff & c;
679 }
680
681 break;
682 }
683 sbuf += sz;
684 }
685
686 if (i == dlen)
687 dbuf[--i] = '\0';
688 else
689 dbuf[i] = '\0';
690 }
691
692 /*
693 * We output to the log file as json.
694 * ex. for string 'msg\n' on the zone's stdout:
695 * {"log":"msg\n","stream":"stdout","time":"2014-10-24T20:12:11.101973117Z"}
696 *
697 * We use ns in the last field of the timestamp for compatability.
698 *
699 * We keep track of the size of the log file and rotate it when we exceed
700 * the log size limit (if one is set).
701 */
702 static void
703 wr_log_msg(char *buf, int len, int from)
704 {
705 struct timeval tv;
706 int olen;
707 char ts[64];
708 char nbuf[BUFSIZ * 2];
709 char obuf[BUFSIZ * 2];
710 static boolean_t log_wr_err = B_FALSE;
711
712 if (logfd == -1)
713 return;
714
715 escape_json(buf, len, nbuf, sizeof (nbuf));
716
717 if (gettimeofday(&tv, NULL) != 0)
718 return;
719 (void) strftime(ts, sizeof (ts), "%FT%T", gmtime(&tv.tv_sec));
720
721 olen = snprintf(obuf, sizeof (obuf),
722 "{\"log\":\"%s\",\"stream\":\"%s\",\"time\":\"%s.%ldZ\"}\n",
723 nbuf, (from == 1) ? "stdout" : "stderr", ts, tv.tv_usec * 1000);
724
725 if (write(logfd, obuf, olen) != olen) {
726 if (!log_wr_err) {
727 zerror(zlogp, B_TRUE, "log file write error");
728 log_wr_err = B_TRUE;
729 }
730 return;
731 }
732
733 log_sz += olen;
734 if (log_rot_sz > 0 && log_sz >= log_rot_sz)
735 rotate_log();
736 }
737
738 /*
739 * We want to sleep for a little while but need to be responsive if the zone is
740 * halting. We poll/sleep on the event stream so we can notice if we're halting.
741 * Return true if halting, otherwise false.
742 */
743 static boolean_t
744 halt_sleep(int slptime)
745 {
746 struct pollfd evfd[1];
747
748 evfd[0].fd = eventstream[1];
749 evfd[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
750
751 if (poll(evfd, 1, slptime) > 0) {
752 /* zone halting */
753 return (B_TRUE);
754 }
755 return (B_FALSE);
756 }
757
758 /*
759 * This routine drives the logging and interactive I/O loop. It polls for
760 * input from the zone side of the fd (output to stdout/stderr), and from the
761 * client (input to the zone's stdin). Additionally, it polls on the server
762 * fd, and disconnects any clients that might try to hook up with the zone
763 * while the fd's are in use.
764 *
765 * Data from the zone's stdout and stderr is formatted in json and written to
766 * the log file whether an interactive client is connected or not.
767 *
768 * When the client first calls us up, it is expected to send a line giving its
769 * "identity"; this consists of the string 'IDENT <pid> <locale>'. This is so
770 * that we can report that the fd's are busy, along with some diagnostics
771 * about who has them busy; the locale is ignore here but kept for compatability
772 * with the zlogin code when running on the zone's console.
773 *
774 * We need to handle the case where there is no server within the zone (or
775 * the server gets stuck) and data that we're writing to the zone server's
776 * stdin fills the pipe. Because of the way the zfd device works writes can
777 * flow into the stream and simply be dropped, if there is no server, or writes
778 * could return -1 with EAGAIN if the server is stuck. Since we ignore errors
779 * on the write to stdin, we won't get blocked in that case but we'd like to
780 * avoid dropping initial input if the server within the zone hasn't started
781 * yet. To handle this we wait to read initial input until we detect that there
782 * is a server inside the zone. We have to poll for this so that we can
783 * re-run the ioctl to notice when a server shows up. This poll/wait is handled
784 * by halt_sleep() so that we can be responsive if the zone wants to halt.
785 * We only do this check to avoid dropping initial input so it is possible for
786 * the server within the zone to go away later. At that point zfd will just
787 * drop any new input flowing into the stream.
788 */
789 static void
790 do_zfd_io(int gzctlfd, int gzservfd, int gzerrfd, int stdinfd, int stdoutfd,
791 int stderrfd)
792 {
793 struct pollfd pollfds[8];
794 char ibuf[BUFSIZ + 1];
795 int cc, ret;
796 int ctlfd = -1;
797 int clifd = -1;
798 int clierrfd = -1;
799 int pollerr = 0;
800 char clilocale[MAXPATHLEN];
801 pid_t clipid = 0;
802 uint_t flags = 0;
803 boolean_t stdin_ready = B_FALSE;
804 int slptime = 250; /* initial poll sleep time in ms */
805
806 /* client control socket, watch for read events */
807 pollfds[0].fd = ctlfd;
808 pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND |
809 POLLPRI | POLLERR | POLLHUP | POLLNVAL;
810
811 /* client socket, watch for read events */
812 pollfds[1].fd = clifd;
813 pollfds[1].events = pollfds[0].events;
814
815 /* stdout, watch for read events */
816 pollfds[2].fd = stdoutfd;
817 pollfds[2].events = pollfds[0].events;
818
819 /* stderr, watch for read events */
820 pollfds[3].fd = stderrfd;
821 pollfds[3].events = pollfds[0].events;
822
823 /* the server control socket; watch for new connections */
824 pollfds[4].fd = gzctlfd;
825 pollfds[4].events = POLLIN | POLLRDNORM;
826
827 /* the server stdin/out socket; watch for new connections */
828 pollfds[5].fd = gzservfd;
829 pollfds[5].events = POLLIN | POLLRDNORM;
830
831 /* the server stderr socket; watch for new connections */
832 pollfds[6].fd = gzerrfd;
833 pollfds[6].events = POLLIN | POLLRDNORM;
834
835 /* the eventstream; any input means the zone is halting */
836 pollfds[7].fd = eventstream[1];
837 pollfds[7].events = pollfds[0].events;
838
839 while (!shutting_down) {
840 pollfds[0].revents = pollfds[1].revents = 0;
841 pollfds[2].revents = pollfds[3].revents = 0;
842 pollfds[4].revents = pollfds[5].revents = 0;
843 pollfds[6].revents = pollfds[7].revents = 0;
844
845 ret = poll(pollfds, 8, -1);
846 if (ret == -1 && errno != EINTR) {
847 zerror(zlogp, B_TRUE, "poll failed");
848 /* we are hosed, close connection */
849 break;
850 }
851
852 /* control events from client */
853 if (pollfds[0].revents &
854 (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
855 /* process control message */
856 ctlcmd_process(ctlfd, stdoutfd, &flags);
857 } else if (pollfds[0].revents) {
858 /* bail if any error occurs */
859 pollerr = pollfds[0].revents;
860 zerror(zlogp, B_FALSE, "closing connection "
861 "with control channel, pollerr %d\n", pollerr);
862 break;
863 }
864
865 /* event from client side */
866 if (pollfds[1].revents) {
867 if (stdin_ready) {
868 if (pollfds[1].revents & (POLLIN |
869 POLLRDNORM | POLLRDBAND | POLLPRI)) {
870 errno = 0;
871 cc = read(clifd, ibuf, BUFSIZ);
872 if (cc > 0) {
873 /*
874 * See comment for this
875 * function on what happens if
876 * there is no reader in the
877 * zone. EOF is handled below.
878 */
879 (void) write(stdinfd, ibuf, cc);
880 }
881 } else if (pollfds[1].revents & (POLLERR |
882 POLLNVAL)) {
883 pollerr = pollfds[1].revents;
884 zerror(zlogp, B_FALSE,
885 "closing connection "
886 "with client, pollerr %d\n",
887 pollerr);
888 break;
889 }
890
891 if (pollfds[1].revents & POLLHUP) {
892 if (flags & ZLOGIN_ZFD_EOF) {
893 /*
894 * Let the client know. We've
895 * already serviced any pending
896 * regular input. Let the
897 * stream clear since the EOF
898 * ioctl jumps to the head.
899 */
900 (void) ioctl(stdinfd, I_FLUSH);
901 if (halt_sleep(250))
902 break;
903 (void) ioctl(stdinfd, ZFD_EOF);
904 }
905 break;
906 }
907 } else {
908 if (ioctl(stdinfd, ZFD_HAS_SLAVE) == 0) {
909 stdin_ready = B_TRUE;
910 } else {
911 /*
912 * There is nothing in the zone to read
913 * our input. Presumably the user
914 * providing input expects something to
915 * show up, but that is no guarantee.
916 * Since we haven't serviced the pending
917 * input poll yet, we don't want to
918 * immediately loop around but we also
919 * need to be responsive if the zone is
920 * halting.
921 */
922 if (halt_sleep(slptime))
923 break;
924
925 if (slptime < 5000)
926 slptime += 250;
927 }
928 }
929 }
930
931 /* event from the zone's stdout */
932 if (pollfds[2].revents) {
933 if (pollfds[2].revents &
934 (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
935 errno = 0;
936 cc = read(stdoutfd, ibuf, BUFSIZ);
937 if (cc <= 0 && (errno != EINTR) &&
938 (errno != EAGAIN))
939 break;
940 if (cc > 0) {
941 wr_log_msg(ibuf, cc, 1);
942
943 /*
944 * Lose output if no one is listening,
945 * otherwise pass it on.
946 */
947 if (clifd != -1)
948 (void) write(clifd, ibuf, cc);
949 }
950 } else {
951 pollerr = pollfds[2].revents;
952 zerror(zlogp, B_FALSE,
953 "closing connection with stdout zfd, "
954 "pollerr %d\n", pollerr);
955 break;
956 }
957 }
958
959 /* event from the zone's stderr */
960 if (pollfds[3].revents) {
961 if (pollfds[3].revents &
962 (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
963 errno = 0;
964 cc = read(stderrfd, ibuf, BUFSIZ);
965 if (cc <= 0 && (errno != EINTR) &&
966 (errno != EAGAIN))
967 break;
968 if (cc > 0) {
969 wr_log_msg(ibuf, cc, 2);
970
971 /*
972 * Lose output if no one is listening,
973 * otherwise pass it on.
974 */
975 if (clierrfd != -1)
976 (void) write(clierrfd, ibuf,
977 cc);
978 }
979 } else {
980 pollerr = pollfds[3].revents;
981 zerror(zlogp, B_FALSE,
982 "closing connection with stderr zfd, "
983 "pollerr %d\n", pollerr);
984 break;
985 }
986 }
987
988 /* connect event from server control socket */
989 if (pollfds[4].revents) {
990 if (ctlfd != -1) {
991 /*
992 * Test the client to see if it is really
993 * still alive. If it has died but we
994 * haven't yet detected that, we might
995 * deny a legitimate connect attempt. If it
996 * is dead, we break out; once we tear down
997 * the old connection, the new connection
998 * will happen.
999 */
1000 if (test_client(ctlfd) == -1) {
1001 break;
1002 }
1003 /* we're already handling a client */
1004 reject_client(gzctlfd, clipid);
1005 } else {
1006 ctlfd = accept_client(gzctlfd, &clipid,
1007 clilocale, sizeof (clilocale), &flags);
1008 if (ctlfd != -1) {
1009 pollfds[0].fd = ctlfd;
1010 } else {
1011 break;
1012 }
1013 }
1014 }
1015
1016 /* connect event from server stdin/out socket */
1017 if (pollfds[5].revents) {
1018 if (ctlfd == -1) {
1019 /*
1020 * This shouldn't happen since the client is
1021 * expected to connect on the control socket
1022 * first. If we see this, tear everything down
1023 * and start over.
1024 */
1025 zerror(zlogp, B_FALSE, "GZ zfd stdin/stdout "
1026 "connection attempt with no GZ control\n");
1027 break;
1028 }
1029 assert(clifd == -1);
1030 if ((clifd = accept_socket(gzservfd, clipid)) != -1) {
1031 /* No need to watch for other new connections */
1032 pollfds[5].fd = -1;
1033 /* Client input is of interest, though */
1034 pollfds[1].fd = clifd;
1035 } else {
1036 break;
1037 }
1038 }
1039
1040 /* connection event from server stderr socket */
1041 if (pollfds[6].revents) {
1042 if (ctlfd == -1) {
1043 /*
1044 * Same conditions apply to stderr as stdin/out.
1045 */
1046 zerror(zlogp, B_FALSE, "GZ zfd stderr "
1047 "connection attempt with no GZ control\n");
1048 break;
1049 }
1050 assert(clierrfd == -1);
1051 if ((clierrfd = accept_socket(gzerrfd, clipid)) != -1) {
1052 /* No need to watch for other new connections */
1053 pollfds[6].fd = -1;
1054 } else {
1055 break;
1056 }
1057 }
1058
1059 /*
1060 * Watch for events on the eventstream. This is how we get
1061 * notified of the zone halting, etc. It provides us a
1062 * "wakeup" from poll when important things happen, which
1063 * is good.
1064 */
1065 if (pollfds[7].revents) {
1066 break;
1067 }
1068 }
1069
1070 if (clifd != -1) {
1071 (void) shutdown(clifd, SHUT_RDWR);
1072 (void) close(clifd);
1073 }
1074
1075 if (clierrfd != -1) {
1076 (void) shutdown(clierrfd, SHUT_RDWR);
1077 (void) close(clierrfd);
1078 }
1079 }
1080
1081 static int
1082 open_fd(zlog_t *zlogp, int id, int rw)
1083 {
1084 int fd;
1085 int flag = O_NONBLOCK | O_NOCTTY | O_CLOEXEC;
1086 int retried = 0;
1087 char stdpath[MAXPATHLEN];
1088
1089 (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d",
1090 zone_name, id);
1091 flag |= rw;
1092
1093 while (!shutting_down) {
1094 if ((fd = open(stdpath, flag)) != -1) {
1095 /*
1096 * Setting RPROTDIS on the stream means that the
1097 * control portion of messages received (which we don't
1098 * care about) will be discarded by the stream head. If
1099 * we allowed such messages, we wouldn't be able to use
1100 * read(2), as it fails (EBADMSG) when a message with a
1101 * control element is received.
1102 */
1103 if (ioctl(fd, I_SRDOPT, RNORM|RPROTDIS) == -1) {
1104 zerror(zlogp, B_TRUE,
1105 "failed to set options on zfd");
1106 return (-1);
1107 }
1108 return (fd);
1109 }
1110
1111 if (retried++ > 60)
1112 break;
1113
1114 (void) sleep(1);
1115 }
1116
1117 zerror(zlogp, B_TRUE, "failed to open zfd");
1118 return (-1);
1119 }
1120
1121 static void
1122 open_logfile()
1123 {
1124 char logpath[MAXPATHLEN];
1125
1126 logfd = -1;
1127 log_sz = 0;
1128
1129 (void) snprintf(logpath, sizeof (logpath), "%s/logs", zonepath);
1130 (void) mkdir(logpath, 0700);
1131
1132 (void) snprintf(logpath, sizeof (logpath), "%s/logs/%s", zonepath,
1133 LOGNAME);
1134
1135 if ((logfd = open(logpath, O_WRONLY | O_APPEND | O_CREAT,
1136 0600)) == -1) {
1137 zerror(zlogp, B_TRUE, "failed to open log file");
1138 } else {
1139 struct stat64 sb;
1140
1141 if (fstat64(logfd, &sb) == 0)
1142 log_sz = sb.st_size;
1143 }
1144 }
1145
1146 static void
1147 rotate_log()
1148 {
1149 time_t t;
1150 struct tm gtm;
1151 char onm[MAXPATHLEN], rnm[MAXPATHLEN];
1152
1153 if ((t = time(NULL)) == (time_t)-1 || gmtime_r(&t, >m) == NULL) {
1154 zerror(zlogp, B_TRUE, "failed to format time");
1155 return;
1156 }
1157
1158 (void) snprintf(rnm, sizeof (rnm),
1159 "%s/logs/%s.%d%02d%02dT%02d%02d%02dZ",
1160 zonepath, LOGNAME, gtm.tm_year + 1900, gtm.tm_mon + 1, gtm.tm_mday,
1161 gtm.tm_hour, gtm.tm_min, gtm.tm_sec);
1162 (void) snprintf(onm, sizeof (onm), "%s/logs/%s", zonepath, LOGNAME);
1163
1164 (void) close(logfd);
1165 if (rename(onm, rnm) != 0)
1166 zerror(zlogp, B_TRUE, "failed to rotate log file");
1167 open_logfile();
1168 }
1169
1170
1171 /* ARGSUSED */
1172 void
1173 hup_handler(int i)
1174 {
1175 if (logfd != -1) {
1176 (void) close(logfd);
1177 open_logfile();
1178 }
1179 }
1180
1181 /*
1182 * Body of the worker thread to log the zfd's stdout and stderr to a log file
1183 * and to perform interactive IO to the stdin, stdout and stderr zfd's.
1184 *
1185 * The stdin, stdout and stderr are from the perspective of the process inside
1186 * the zone, so the zoneadmd view is opposite (i.e. we write to the stdin fd
1187 * and read from the stdout/stderr fds).
1188 */
1189 static void
1190 srvr(void *modearg)
1191 {
1192 zfd_mode_t *mode = (zfd_mode_t *)modearg;
1193 int gzctlfd = -1;
1194 int gzoutfd = -1;
1195 int stdinfd = -1;
1196 int stdoutfd = -1;
1197 sigset_t blockset;
1198 int gzerrfd = -1;
1199 int stderrfd = -1;
1200 int flags;
1201 int len;
1202 char ibuf[BUFSIZ + 1];
1203
1204 if (!shutting_down && mode->zmode_gzlogging)
1205 open_logfile();
1206
1207 /*
1208 * This thread should receive SIGHUP so that it can close the log
1209 * file, and reopen it, during log rotation.
1210 */
1211 (void) sigset(SIGHUP, hup_handler);
1212 (void) sigfillset(&blockset);
1213 (void) sigdelset(&blockset, SIGHUP);
1214 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
1215
1216 if (!shutting_down) {
1217 if (pipe(eventstream) != 0) {
1218 zerror(zlogp, B_TRUE, "failed to open logger control "
1219 "pipe");
1220 return;
1221 }
1222 }
1223
1224 while (!shutting_down) {
1225 if (init_server_sock(zlogp, &gzctlfd, "ctl") == -1) {
1226 zerror(zlogp, B_FALSE,
1227 "server setup: control socket init failed");
1228 goto death;
1229 }
1230 if (init_server_sock(zlogp, &gzoutfd, "out") == -1) {
1231 zerror(zlogp, B_FALSE,
1232 "server setup: stdout socket init failed");
1233 goto death;
1234 }
1235 if (init_server_sock(zlogp, &gzerrfd, "err") == -1) {
1236 zerror(zlogp, B_FALSE,
1237 "server setup: stderr socket init failed");
1238 goto death;
1239 }
1240
1241 if (mode->zmode_n_stddevs == 1) {
1242 if ((stdinfd = open_fd(zlogp, 0, O_RDWR)) == -1) {
1243 goto death;
1244 }
1245 stdoutfd = stdinfd;
1246 } else {
1247 if ((stdinfd = open_fd(zlogp, 0, O_WRONLY)) == -1 ||
1248 (stdoutfd = open_fd(zlogp, 1, O_RDONLY)) == -1 ||
1249 (stderrfd = open_fd(zlogp, 2, O_RDONLY)) == -1) {
1250 goto death;
1251 }
1252 }
1253
1254 do_zfd_io(gzctlfd, gzoutfd, gzerrfd, stdinfd, stdoutfd,
1255 stderrfd);
1256 death:
1257 destroy_server_sock(gzctlfd, "ctl");
1258 destroy_server_sock(gzoutfd, "out");
1259 destroy_server_sock(gzerrfd, "err");
1260
1261 /* when shutting down, leave open until drained */
1262 if (!shutting_down) {
1263 (void) close(stdinfd);
1264 if (mode->zmode_n_stddevs == 3) {
1265 (void) close(stdoutfd);
1266 (void) close(stderrfd);
1267 }
1268 }
1269 }
1270
1271 /*
1272 * Attempt to drain remaining log output from the zone prior to closing
1273 * the file descriptors. This helps ensure that complete logs are
1274 * captured during shutdown.
1275 */
1276 flags = fcntl(stdoutfd, F_GETFL, 0);
1277 if (fcntl(stdoutfd, F_SETFL, flags | O_NONBLOCK) != -1) {
1278 while ((len = read(stdoutfd, ibuf, BUFSIZ)) > 0)
1279 wr_log_msg(ibuf, len, 1);
1280 }
1281 (void) close(stdoutfd);
1282
1283 if (mode->zmode_n_stddevs > 1) {
1284 (void) close(stdinfd);
1285 flags = fcntl(stderrfd, F_GETFL, 0);
1286 if (fcntl(stderrfd, F_SETFL, flags | O_NONBLOCK) != -1) {
1287 while ((len = read(stderrfd, ibuf, BUFSIZ)) > 0)
1288 wr_log_msg(ibuf, len, 2);
1289 }
1290 (void) close(stderrfd);
1291 }
1292
1293
1294 (void) close(eventstream[0]);
1295 eventstream[0] = -1;
1296 (void) close(eventstream[1]);
1297 eventstream[1] = -1;
1298 if (logfd != -1)
1299 (void) close(logfd);
1300 }
1301
1302 /*
1303 * The meaning of the original legacy values for the zlog-mode evolved over
1304 * time, to the point where the old names no longer made sense. The current
1305 * values are simply positional letters used to indicate various capabilities.
1306 * The following table shows the meaning of the mode values, along with the
1307 * legacy name which we continue to support for compatability. Any future
1308 * capability can add a letter to the left and '-' is implied for existing
1309 * strings.
1310 *
1311 * zlog-mode gz log - tty - ngz log
1312 * --------- ------ --- -------
1313 * gt- (int) y y n
1314 * g-- (log) y n n
1315 * gtn (nlint) y y y
1316 * g-n (nolog) y n y
1317 * -t- n y n
1318 * --- n n n
1319 *
1320 * This function also obtains a maximum log size while it is reading the
1321 * zone configuration.
1322 */
1323 static void
1324 get_mode_logmax(zfd_mode_t *mode)
1325 {
1326 zone_dochandle_t handle;
1327 struct zone_attrtab attr;
1328
1329 bzero(mode, sizeof (zfd_mode_t));
1330
1331 if ((handle = zonecfg_init_handle()) == NULL)
1332 return;
1333
1334 if (zonecfg_get_handle(zone_name, handle) != Z_OK)
1335 goto done;
1336
1337 if (zonecfg_setattrent(handle) != Z_OK)
1338 goto done;
1339 while (zonecfg_getattrent(handle, &attr) == Z_OK) {
1340 if (strcmp(ZLOG_MODE, attr.zone_attr_name) == 0) {
1341 if (strcmp("g--", attr.zone_attr_value) == 0 ||
1342 strncmp("log", attr.zone_attr_value, 3) == 0) {
1343 mode->zmode_gzlogging = B_TRUE;
1344 mode->zmode_n_stddevs = 3;
1345 mode->zmode_n_addl_devs = 0;
1346 } else if (strcmp("g-n", attr.zone_attr_value) == 0 ||
1347 strncmp("nolog", attr.zone_attr_value, 5) == 0) {
1348 mode->zmode_gzlogging = B_TRUE;
1349 mode->zmode_n_stddevs = 3;
1350 mode->zmode_n_addl_devs = 2;
1351 } else if (strcmp("gt-", attr.zone_attr_value) == 0 ||
1352 strncmp("int", attr.zone_attr_value, 3) == 0) {
1353 mode->zmode_gzlogging = B_TRUE;
1354 mode->zmode_n_stddevs = 1;
1355 mode->zmode_n_addl_devs = 0;
1356 } else if (strcmp("gtn", attr.zone_attr_value) == 0 ||
1357 strncmp("nlint", attr.zone_attr_value, 5) == 0) {
1358 mode->zmode_gzlogging = B_TRUE;
1359 mode->zmode_n_stddevs = 1;
1360 mode->zmode_n_addl_devs = 1;
1361 } else if (strcmp("-t-", attr.zone_attr_value) == 0) {
1362 mode->zmode_gzlogging = B_FALSE;
1363 mode->zmode_n_stddevs = 1;
1364 mode->zmode_n_addl_devs = 0;
1365 } else if (strcmp("---", attr.zone_attr_value) == 0) {
1366 mode->zmode_gzlogging = B_FALSE;
1367 mode->zmode_n_stddevs = 3;
1368 mode->zmode_n_addl_devs = 0;
1369 }
1370
1371 } else if (strcmp(LOG_MAXSZ, attr.zone_attr_name) == 0) {
1372 char *p;
1373 long lval;
1374
1375 p = attr.zone_attr_value;
1376 lval = strtol(p, &p, 10);
1377 if (*p == '\0')
1378 log_rot_sz = (size_t)lval;
1379 }
1380 }
1381 (void) zonecfg_endattrent(handle);
1382
1383 done:
1384 zonecfg_fini_handle(handle);
1385 }
1386
1387 /* ARGSUSED -- "id" might be used in the future. */
1388 void
1389 create_log_thread(zlog_t *logp, zoneid_t id)
1390 {
1391 int res;
1392
1393 shutting_down = 0;
1394 zlogp = logp;
1395
1396 get_mode_logmax(&mode);
1397 if (mode.zmode_n_stddevs == 0)
1398 return;
1399
1400 if (init_zfd_devs(zlogp, &mode) == -1) {
1401 zerror(zlogp, B_FALSE,
1402 "zfd setup: device initialization failed");
1403 return;
1404 }
1405
1406 res = thr_create(NULL, 0, (void * (*)(void *))srvr, (void *)&mode, 0,
1407 &logger_tid);
1408 if (res != 0) {
1409 zerror(zlogp, B_FALSE, "error %d creating logger thread", res);
1410 logger_tid = 0;
1411 }
1412 }
1413
1414 void
1415 destroy_log_thread()
1416 {
1417 if (logger_tid != 0) {
1418 int stop = 1;
1419
1420 shutting_down = 1;
1421 /* break out of poll to shutdown */
1422 if (eventstream[0] != -1)
1423 (void) write(eventstream[0], &stop, sizeof (stop));
1424 (void) thr_join(logger_tid, NULL, NULL);
1425 logger_tid = 0;
1426 }
1427
1428 (void) destroy_zfd_devs(zlogp);
1429 }