1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
14 * Copyright 2016 Joyent, Inc. All rights reserved.
15 */
16
17 /*
18 * Zone File Descriptor Driver.
19 *
20 * This driver is derived from the zcons driver which is in turn derived from
21 * the pts/ptm drivers. The purpose is to expose file descriptors within the
22 * zone which are connected to zoneadmd and used for logging or an interactive
23 * connection to a process within the zone.
24 *
25 * Its implementation is straightforward. Each instance of the driver
26 * represents a global-zone/local-zone pair. Unlike the zcons device, zoneadmd
27 * uses these devices unidirectionally to provide stdin, stdout and stderr to
28 * the process within the zone.
29 *
30 * Instances of zfd are onlined as children of /pseudo/zfdnex@2/ by zoneadmd,
31 * using the devctl framework; thus the driver does not need to maintain any
32 * sort of "admin" node.
33 *
34 * The driver shuttles I/O from master side to slave side and back. In a break
35 * from the pts/ptm semantics, if one side is not open, I/O directed towards
36 * it will simply be discarded. This is so that if zoneadmd is not holding the
37 * master side fd open (i.e. it has died somehow), processes in the zone do not
38 * experience any errors and I/O to the fd does not cause the process to hang.
39 *
40 * The driver can also act as a multiplexer so that data written to the
41 * slave side within the zone is also redirected back to another zfd device
42 * inside the zone for consumption (i.e. it can be read). The intention is
43 * that a logging process within the zone can consume data that is being
44 * written by an application onto the primary stream. This is essentially
45 * a tee off of the primary stream into a log stream. This tee can also be
46 * configured to be flow controlled via an ioctl. Flow control happens on the
47 * primary stream and is used to ensure that the log stream receives all of
48 * the messages off the primary stream when consumption of the data off of
49 * the log stream gets behind. Configuring for flow control implies that the
50 * application writing to the primary stream will be blocked when the log
51 * consumer gets behind. Note that closing the log stream (e.g. when the zone
52 * halts) will cause the loss of all messages queued in the stream.
53 *
54 * The zone's zfd device configuration is driven by zoneadmd and a zone mode.
55 * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
56 * of a misnomer since its purpose has evolved. The attribute can have a
57 * variety of values, but the lowest two positions are used to control how many
58 * zfd devices are created inside the zone and if the primary stream is a tty.
59 *
60 * Here is a summary of how the 4 modes control what zfd devices are created
61 * and how they're used:
62 *
63 * t-: 1 stdio zdev (0) configured as a tty
64 * --: 3 stdio zdevs (0, 1, 2), not configured as a tty
65 * tn: 1 stdio zdev (0) configured as a tty, 1 additional zdev (1)
66 * -n: 3 stdio zdevs (0, 1, 2), not tty, 2 additional zdevs (3, 4)
67 *
68 * With the 't' flag set, stdin/out/err is multiplexed onto a single full-duplex
69 * stream which is configured as a tty. That is, ptem, ldterm and ttycompat are
70 * autopushed onto the stream when the slave side is opened. There is only a
71 * single zfd dev (0) needed for the primary stream.
72 *
73 * When the 'n' flag is set, it is assumed that output logging will be done
74 * within the zone itself. In this configuration 1 or 2 additional zfd devices,
75 * depending on tty mode ('t' flag) are created within the zone. An application
76 * can then configure the zfd streams driver into a multiplexer. Output from
77 * the stdout/stderr zfd(s) will be teed into the correspond logging zfd(s)
78 * within the zone.
79 *
80 * The following is a diagram of how this works for a '-n' configuration:
81 *
82 *
83 * zoneadmd (for zlogin -I stdout)
84 * GZ: ^
85 * |
86 * --------------------------
87 * ^
88 * NGZ: |
89 * app >1 -> zfd1 -> zfd3 -> logger (for logger to consume app's stdout)
90 *
91 * There would be a similar path for the app's stderr into zfd4 for the logger
92 * to consume stderr.
93 */
94
95 #include <sys/types.h>
96 #include <sys/cmn_err.h>
97 #include <sys/conf.h>
98 #include <sys/cred.h>
99 #include <sys/ddi.h>
100 #include <sys/debug.h>
101 #include <sys/devops.h>
102 #include <sys/errno.h>
103 #include <sys/file.h>
104 #include <sys/kstr.h>
105 #include <sys/modctl.h>
106 #include <sys/param.h>
107 #include <sys/stat.h>
108 #include <sys/stream.h>
109 #include <sys/stropts.h>
110 #include <sys/strsun.h>
111 #include <sys/sunddi.h>
112 #include <sys/sysmacros.h>
113 #include <sys/systm.h>
114 #include <sys/types.h>
115 #include <sys/zfd.h>
116 #include <sys/vnode.h>
117 #include <sys/fs/snode.h>
118 #include <sys/zone.h>
119 #include <sys/sdt.h>
120
121 static kmutex_t zfd_mux_lock;
122
123 static int zfd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
124 static int zfd_attach(dev_info_t *, ddi_attach_cmd_t);
125 static int zfd_detach(dev_info_t *, ddi_detach_cmd_t);
126
127 static int zfd_open(queue_t *, dev_t *, int, int, cred_t *);
128 static int zfd_close(queue_t *, int, cred_t *);
129 static void zfd_wput(queue_t *, mblk_t *);
130 static void zfd_rsrv(queue_t *);
131 static void zfd_wsrv(queue_t *);
132
133 /*
134 * The instance number is encoded in the dev_t in the minor number; the lowest
135 * bit of the minor number is used to track the master vs. slave side of the
136 * fd. The rest of the bits in the minor number are the instance.
137 */
138 #define ZFD_MASTER_MINOR 0
139 #define ZFD_SLAVE_MINOR 1
140
141 #define ZFD_INSTANCE(x) (getminor((x)) >> 1)
142 #define ZFD_NODE(x) (getminor((x)) & 0x01)
143
144 /*
145 * This macro converts a zfd_state_t pointer to the associated slave minor
146 * node's dev_t.
147 */
148 #define ZFD_STATE_TO_SLAVEDEV(x) \
149 (makedevice(ddi_driver_major((x)->zfd_devinfo), \
150 (minor_t)(ddi_get_instance((x)->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR)))
151
152 int zfd_debug = 0;
153 #define DBG(a) if (zfd_debug) cmn_err(CE_NOTE, a)
154 #define DBG1(a, b) if (zfd_debug) cmn_err(CE_NOTE, a, b)
155
156 /*
157 * ZFD Pseudo Terminal Module: stream data structure definitions,
158 * based on zcons.
159 */
160 static struct module_info zfd_info = {
161 0x20FD, /* ZOFD - 8445 */
162 "zfd",
163 0, /* min packet size */
164 INFPSZ, /* max packet size - infinity */
165 2048, /* high water */
166 128 /* low water */
167 };
168
169 static struct qinit zfd_rinit = {
170 NULL,
171 (int (*)()) zfd_rsrv,
172 zfd_open,
173 zfd_close,
174 NULL,
175 &zfd_info,
176 NULL
177 };
178
179 static struct qinit zfd_winit = {
180 (int (*)()) zfd_wput,
181 (int (*)()) zfd_wsrv,
182 NULL,
183 NULL,
184 NULL,
185 &zfd_info,
186 NULL
187 };
188
189 static struct streamtab zfd_tab_info = {
190 &zfd_rinit,
191 &zfd_winit,
192 NULL,
193 NULL
194 };
195
196 #define ZFD_CONF_FLAG (D_MP | D_MTQPAIR | D_MTOUTPERIM | D_MTOCEXCL)
197
198 /*
199 * this will define (struct cb_ops cb_zfd_ops) and (struct dev_ops zfd_ops)
200 */
201 DDI_DEFINE_STREAM_OPS(zfd_ops, nulldev, nulldev, zfd_attach, zfd_detach, \
202 nodev, zfd_getinfo, ZFD_CONF_FLAG, &zfd_tab_info, \
203 ddi_quiesce_not_needed);
204
205 /*
206 * Module linkage information for the kernel.
207 */
208
209 static struct modldrv modldrv = {
210 &mod_driverops, /* Type of module (this is a pseudo driver) */
211 "Zone FD driver", /* description of module */
212 &zfd_ops /* driver ops */
213 };
214
215 static struct modlinkage modlinkage = {
216 MODREV_1,
217 &modldrv,
218 NULL
219 };
220
221 typedef enum {
222 ZFD_NO_MUX,
223 ZFD_PRIMARY_STREAM,
224 ZFD_LOG_STREAM
225 } zfd_mux_type_t;
226
227 typedef struct zfd_state {
228 dev_info_t *zfd_devinfo; /* instance info */
229 queue_t *zfd_master_rdq; /* GZ read queue */
230 queue_t *zfd_slave_rdq; /* in-zone read queue */
231 int zfd_state; /* ZFD_STATE_MOPEN, ZFD_STATE_SOPEN */
232 int zfd_tty; /* ZFD_MAKETTY - strm mods will push */
233 boolean_t zfd_is_flowcon; /* primary stream flow stopped */
234 boolean_t zfd_allow_flowcon; /* use flow control */
235 zfd_mux_type_t zfd_muxt; /* state type: none, primary, log */
236 struct zfd_state *zfd_inst_pri; /* log state's primary ptr */
237 struct zfd_state *zfd_inst_log; /* primary state's log ptr */
238 } zfd_state_t;
239
240 #define ZFD_STATE_MOPEN 0x01
241 #define ZFD_STATE_SOPEN 0x02
242
243 static void *zfd_soft_state;
244
245 /*
246 * List of STREAMS modules that are autopushed onto a slave instance when its
247 * opened, but only if the ZFD_MAKETTY ioctl has first been received by the
248 * master.
249 */
250 static char *zfd_mods[] = {
251 "ptem",
252 "ldterm",
253 "ttcompat",
254 NULL
255 };
256
257 int
258 _init(void)
259 {
260 int err;
261
262 if ((err = ddi_soft_state_init(&zfd_soft_state, sizeof (zfd_state_t),
263 0)) != 0) {
264 return (err);
265 }
266
267 if ((err = mod_install(&modlinkage)) != 0)
268 ddi_soft_state_fini(zfd_soft_state);
269
270 mutex_init(&zfd_mux_lock, NULL, MUTEX_DEFAULT, NULL);
271 return (err);
272 }
273
274
275 int
276 _fini(void)
277 {
278 int err;
279
280 if ((err = mod_remove(&modlinkage)) != 0) {
281 return (err);
282 }
283
284 ddi_soft_state_fini(&zfd_soft_state);
285 mutex_destroy(&zfd_mux_lock);
286 return (0);
287 }
288
289 int
290 _info(struct modinfo *modinfop)
291 {
292 return (mod_info(&modlinkage, modinfop));
293 }
294
295 static int
296 zfd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
297 {
298 zfd_state_t *zfds;
299 int instance;
300 char masternm[ZFD_NAME_LEN], slavenm[ZFD_NAME_LEN];
301
302 if (cmd != DDI_ATTACH)
303 return (DDI_FAILURE);
304
305 instance = ddi_get_instance(dip);
306 if (ddi_soft_state_zalloc(zfd_soft_state, instance) != DDI_SUCCESS)
307 return (DDI_FAILURE);
308
309 (void) snprintf(masternm, sizeof (masternm), "%s%d", ZFD_MASTER_NAME,
310 instance);
311 (void) snprintf(slavenm, sizeof (slavenm), "%s%d", ZFD_SLAVE_NAME,
312 instance);
313
314 /*
315 * Create the master and slave minor nodes.
316 */
317 if ((ddi_create_minor_node(dip, slavenm, S_IFCHR,
318 instance << 1 | ZFD_SLAVE_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE) ||
319 (ddi_create_minor_node(dip, masternm, S_IFCHR,
320 instance << 1 | ZFD_MASTER_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE)) {
321 ddi_remove_minor_node(dip, NULL);
322 ddi_soft_state_free(zfd_soft_state, instance);
323 return (DDI_FAILURE);
324 }
325
326 VERIFY((zfds = ddi_get_soft_state(zfd_soft_state, instance)) != NULL);
327 zfds->zfd_devinfo = dip;
328 zfds->zfd_tty = 0;
329 zfds->zfd_muxt = ZFD_NO_MUX;
330 zfds->zfd_inst_log = NULL;
331 return (DDI_SUCCESS);
332 }
333
334 static int
335 zfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
336 {
337 zfd_state_t *zfds;
338 int instance;
339
340 if (cmd != DDI_DETACH)
341 return (DDI_FAILURE);
342
343 instance = ddi_get_instance(dip);
344 if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
345 return (DDI_FAILURE);
346
347 if ((zfds->zfd_state & ZFD_STATE_MOPEN) ||
348 (zfds->zfd_state & ZFD_STATE_SOPEN)) {
349 DBG1("zfd_detach: device (dip=%p) still open\n", (void *)dip);
350 return (DDI_FAILURE);
351 }
352
353 ddi_remove_minor_node(dip, NULL);
354 ddi_soft_state_free(zfd_soft_state, instance);
355
356 return (DDI_SUCCESS);
357 }
358
359 /*
360 * zfd_getinfo()
361 * getinfo(9e) entrypoint.
362 */
363 /*ARGSUSED*/
364 static int
365 zfd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
366 {
367 zfd_state_t *zfds;
368 int instance = ZFD_INSTANCE((dev_t)arg);
369
370 switch (infocmd) {
371 case DDI_INFO_DEVT2DEVINFO:
372 if ((zfds = ddi_get_soft_state(zfd_soft_state,
373 instance)) == NULL)
374 return (DDI_FAILURE);
375 *result = zfds->zfd_devinfo;
376 return (DDI_SUCCESS);
377 case DDI_INFO_DEVT2INSTANCE:
378 *result = (void *)(uintptr_t)instance;
379 return (DDI_SUCCESS);
380 }
381 return (DDI_FAILURE);
382 }
383
384 /*
385 * Return the equivalent queue from the other side of the relationship.
386 * e.g.: given the slave's write queue, return the master's write queue.
387 */
388 static queue_t *
389 zfd_switch(queue_t *qp)
390 {
391 zfd_state_t *zfds = qp->q_ptr;
392 ASSERT(zfds != NULL);
393
394 if (qp == zfds->zfd_master_rdq)
395 return (zfds->zfd_slave_rdq);
396 else if (OTHERQ(qp) == zfds->zfd_master_rdq && zfds->zfd_slave_rdq
397 != NULL)
398 return (OTHERQ(zfds->zfd_slave_rdq));
399 else if (qp == zfds->zfd_slave_rdq)
400 return (zfds->zfd_master_rdq);
401 else if (OTHERQ(qp) == zfds->zfd_slave_rdq && zfds->zfd_master_rdq
402 != NULL)
403 return (OTHERQ(zfds->zfd_master_rdq));
404 else
405 return (NULL);
406 }
407
408 /*
409 * For debugging and outputting messages. Returns the name of the side of
410 * the relationship associated with this queue.
411 */
412 static const char *
413 zfd_side(queue_t *qp)
414 {
415 zfd_state_t *zfds = qp->q_ptr;
416 ASSERT(zfds != NULL);
417
418 if (qp == zfds->zfd_master_rdq ||
419 OTHERQ(qp) == zfds->zfd_master_rdq) {
420 return ("master");
421 }
422 ASSERT(qp == zfds->zfd_slave_rdq || OTHERQ(qp) == zfds->zfd_slave_rdq);
423 return ("slave");
424 }
425
426 /*ARGSUSED*/
427 static int
428 zfd_master_open(zfd_state_t *zfds,
429 queue_t *rqp, /* pointer to the read side queue */
430 dev_t *devp, /* pointer to stream tail's dev */
431 int oflag, /* the user open(2) supplied flags */
432 int sflag, /* open state flag */
433 cred_t *credp) /* credentials */
434 {
435 mblk_t *mop;
436 struct stroptions *sop;
437
438 /*
439 * Enforce exclusivity on the master side; the only consumer should
440 * be the zoneadmd for the zone.
441 */
442 if ((zfds->zfd_state & ZFD_STATE_MOPEN) != 0)
443 return (EBUSY);
444
445 if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
446 DBG("zfd_master_open(): mop allocation failed\n");
447 return (ENOMEM);
448 }
449
450 zfds->zfd_state |= ZFD_STATE_MOPEN;
451
452 /*
453 * q_ptr stores driver private data; stash the soft state data on both
454 * read and write sides of the queue.
455 */
456 WR(rqp)->q_ptr = rqp->q_ptr = zfds;
457 qprocson(rqp);
458
459 /*
460 * Following qprocson(), the master side is fully plumbed into the
461 * STREAM and may send/receive messages. Setting zfds->zfd_master_rdq
462 * will allow the slave to send messages to us (the master).
463 * This cannot occur before qprocson() because the master is not
464 * ready to process them until that point.
465 */
466 zfds->zfd_master_rdq = rqp;
467
468 /*
469 * set up hi/lo water marks on stream head read queue and add
470 * controlling tty as needed.
471 */
472 mop->b_datap->db_type = M_SETOPTS;
473 mop->b_wptr += sizeof (struct stroptions);
474 sop = (struct stroptions *)(void *)mop->b_rptr;
475 if (oflag & FNOCTTY)
476 sop->so_flags = SO_HIWAT | SO_LOWAT;
477 else
478 sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
479 sop->so_hiwat = 512;
480 sop->so_lowat = 256;
481 putnext(rqp, mop);
482
483 return (0);
484 }
485
486 /*ARGSUSED*/
487 static int
488 zfd_slave_open(zfd_state_t *zfds,
489 queue_t *rqp, /* pointer to the read side queue */
490 dev_t *devp, /* pointer to stream tail's dev */
491 int oflag, /* the user open(2) supplied flags */
492 int sflag, /* open state flag */
493 cred_t *credp) /* credentials */
494 {
495 mblk_t *mop;
496 struct stroptions *sop;
497 /*
498 * The slave side can be opened as many times as needed.
499 */
500 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
501 ASSERT((rqp != NULL) && (WR(rqp)->q_ptr == zfds));
502 return (0);
503 }
504
505 /* A log stream is read-only */
506 if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
507 (oflag & (FREAD | FWRITE)) != FREAD)
508 return (EINVAL);
509
510 if (zfds->zfd_tty == 1) {
511 major_t major;
512 minor_t minor;
513 minor_t lastminor;
514 uint_t anchorindex;
515
516 /*
517 * Set up sad(7D) so that the necessary STREAMS modules will
518 * be in place. A wrinkle is that 'ptem' must be anchored
519 * in place (see streamio(7i)) because we always want the
520 * fd to have terminal semantics.
521 */
522 minor =
523 ddi_get_instance(zfds->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR;
524 major = ddi_driver_major(zfds->zfd_devinfo);
525 lastminor = 0;
526 anchorindex = 1;
527 if (kstr_autopush(SET_AUTOPUSH, &major, &minor, &lastminor,
528 &anchorindex, zfd_mods) != 0) {
529 DBG("zfd_slave_open(): kstr_autopush() failed\n");
530 return (EIO);
531 }
532 }
533
534 if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
535 DBG("zfd_slave_open(): mop allocation failed\n");
536 return (ENOMEM);
537 }
538
539 zfds->zfd_state |= ZFD_STATE_SOPEN;
540
541 /*
542 * q_ptr stores driver private data; stash the soft state data on both
543 * read and write sides of the queue.
544 */
545 WR(rqp)->q_ptr = rqp->q_ptr = zfds;
546
547 qprocson(rqp);
548
549 /*
550 * Must follow qprocson(), since we aren't ready to process until then.
551 */
552 zfds->zfd_slave_rdq = rqp;
553
554 /*
555 * set up hi/lo water marks on stream head read queue and add
556 * controlling tty as needed.
557 */
558 mop->b_datap->db_type = M_SETOPTS;
559 mop->b_wptr += sizeof (struct stroptions);
560 sop = (struct stroptions *)(void *)mop->b_rptr;
561 sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
562 sop->so_hiwat = 512;
563 sop->so_lowat = 256;
564 putnext(rqp, mop);
565
566 return (0);
567 }
568
569 /*
570 * open(9e) entrypoint; checks sflag, and rejects anything unordinary.
571 */
572 static int
573 zfd_open(queue_t *rqp, /* pointer to the read side queue */
574 dev_t *devp, /* pointer to stream tail's dev */
575 int oflag, /* the user open(2) supplied flags */
576 int sflag, /* open state flag */
577 cred_t *credp) /* credentials */
578 {
579 int instance = ZFD_INSTANCE(*devp);
580 int ret;
581 zfd_state_t *zfds;
582
583 if (sflag != 0)
584 return (EINVAL);
585
586 if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
587 return (ENXIO);
588
589 switch (ZFD_NODE(*devp)) {
590 case ZFD_MASTER_MINOR:
591 ret = zfd_master_open(zfds, rqp, devp, oflag, sflag, credp);
592 break;
593 case ZFD_SLAVE_MINOR:
594 ret = zfd_slave_open(zfds, rqp, devp, oflag, sflag, credp);
595 /*
596 * If we just opened the log stream and flow control has
597 * been enabled, we want to make sure the primary stream can
598 * start flowing.
599 */
600 if (ret == 0 && zfds->zfd_muxt == ZFD_LOG_STREAM &&
601 zfds->zfd_inst_pri->zfd_allow_flowcon) {
602 zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
603 if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
604 qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
605 }
606 break;
607 default:
608 ret = ENXIO;
609 break;
610 }
611
612 return (ret);
613 }
614
615 /*
616 * close(9e) entrypoint.
617 */
618 /*ARGSUSED1*/
619 static int
620 zfd_close(queue_t *rqp, int flag, cred_t *credp)
621 {
622 queue_t *wqp;
623 mblk_t *bp;
624 zfd_state_t *zfds;
625 major_t major;
626 minor_t minor;
627
628 zfds = (zfd_state_t *)rqp->q_ptr;
629
630 if (rqp == zfds->zfd_master_rdq) {
631 DBG("Closing master side");
632
633 zfds->zfd_master_rdq = NULL;
634 zfds->zfd_state &= ~ZFD_STATE_MOPEN;
635
636 /*
637 * qenable slave side write queue so that it can flush
638 * its messages as master's read queue is going away
639 */
640 if (zfds->zfd_slave_rdq != NULL) {
641 qenable(WR(zfds->zfd_slave_rdq));
642 }
643
644 qprocsoff(rqp);
645 WR(rqp)->q_ptr = rqp->q_ptr = NULL;
646
647 } else if (rqp == zfds->zfd_slave_rdq) {
648
649 DBG("Closing slave side");
650 zfds->zfd_state &= ~ZFD_STATE_SOPEN;
651 zfds->zfd_slave_rdq = NULL;
652
653 wqp = WR(rqp);
654 while ((bp = getq(wqp)) != NULL) {
655 if (zfds->zfd_master_rdq != NULL)
656 putnext(zfds->zfd_master_rdq, bp);
657 else if (bp->b_datap->db_type == M_IOCTL)
658 miocnak(wqp, bp, 0, 0);
659 else
660 freemsg(bp);
661 }
662
663 /*
664 * Qenable master side write queue so that it can flush its
665 * messages as slaves's read queue is going away.
666 */
667 if (zfds->zfd_master_rdq != NULL)
668 qenable(WR(zfds->zfd_master_rdq));
669
670 /*
671 * Qenable primary stream if necessary.
672 */
673 if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
674 zfds->zfd_inst_pri->zfd_allow_flowcon) {
675 zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
676 if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
677 qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
678 }
679
680 qprocsoff(rqp);
681 WR(rqp)->q_ptr = rqp->q_ptr = NULL;
682
683 if (zfds->zfd_tty == 1) {
684 /*
685 * Clear the sad configuration so that reopening
686 * doesn't fail to set up sad configuration.
687 */
688 major = ddi_driver_major(zfds->zfd_devinfo);
689 minor = ddi_get_instance(zfds->zfd_devinfo) << 1 |
690 ZFD_SLAVE_MINOR;
691 (void) kstr_autopush(CLR_AUTOPUSH, &major, &minor,
692 NULL, NULL, NULL);
693 }
694 }
695
696 return (0);
697 }
698
699 static void
700 handle_mflush(queue_t *qp, mblk_t *mp)
701 {
702 mblk_t *nmp;
703 DBG1("M_FLUSH on %s side", zfd_side(qp));
704
705 if (*mp->b_rptr & FLUSHW) {
706 DBG1("M_FLUSH, FLUSHW, %s side", zfd_side(qp));
707 flushq(qp, FLUSHDATA);
708 *mp->b_rptr &= ~FLUSHW;
709 if ((*mp->b_rptr & FLUSHR) == 0) {
710 /*
711 * FLUSHW only. Change to FLUSHR and putnext other side,
712 * then we are done.
713 */
714 *mp->b_rptr |= FLUSHR;
715 if (zfd_switch(RD(qp)) != NULL) {
716 putnext(zfd_switch(RD(qp)), mp);
717 return;
718 }
719 } else if ((zfd_switch(RD(qp)) != NULL) &&
720 (nmp = copyb(mp)) != NULL) {
721 /*
722 * It is a FLUSHRW; we copy the mblk and send
723 * it to the other side, since we still need to use
724 * the mblk in FLUSHR processing, below.
725 */
726 putnext(zfd_switch(RD(qp)), nmp);
727 }
728 }
729
730 if (*mp->b_rptr & FLUSHR) {
731 DBG("qreply(qp) turning FLUSHR around\n");
732 qreply(qp, mp);
733 return;
734 }
735 freemsg(mp);
736 }
737
738 /*
739 * Evaluate the various conditionals to determine if we're teeing into a log
740 * stream and if the primary stream should be flow controlled. This function
741 * can set the zfd_is_flowcon flag as a side effect.
742 *
743 * When teeing with flow control, we always queue the teed msg here and if
744 * the queue is getting full, we set zfd_is_flowcon. The primary stream will
745 * always queue when zfd_is_flowcon and will also not be served when
746 * zfd_is_flowcon is set. This causes backpressure on the primary stream
747 * until the teed queue can drain.
748 */
749 static void
750 zfd_tee_handler(zfd_state_t *zfds, unsigned char type, mblk_t *mp)
751 {
752 queue_t *log_qp;
753 zfd_state_t *log_zfds;
754 mblk_t *lmp;
755
756 if (zfds->zfd_muxt != ZFD_PRIMARY_STREAM)
757 return;
758
759 if (type != M_DATA)
760 return;
761
762 log_zfds = zfds->zfd_inst_log;
763 if (log_zfds == NULL)
764 return;
765
766 ASSERT(log_zfds->zfd_muxt == ZFD_LOG_STREAM);
767
768 if ((log_zfds->zfd_state & ZFD_STATE_SOPEN) == 0) {
769 if (zfds->zfd_allow_flowcon)
770 zfds->zfd_is_flowcon = B_TRUE;
771 return;
772 }
773
774 /* The zfd_slave_rdq is null until the log dev is opened in the zone */
775 log_qp = RD(log_zfds->zfd_slave_rdq);
776 DTRACE_PROBE2(zfd__tee__check, void *, log_qp, void *, zfds);
777
778 if (!zfds->zfd_allow_flowcon) {
779 /*
780 * We're not supposed to tee with flow control and the tee is
781 * full so we skip teeing into the log stream.
782 */
783 if ((log_qp->q_flag & QFULL) != 0)
784 return;
785 }
786
787 /*
788 * Tee the message into the log stream.
789 */
790 lmp = dupmsg(mp);
791 if (lmp == NULL) {
792 if (zfds->zfd_allow_flowcon)
793 zfds->zfd_is_flowcon = B_TRUE;
794 return;
795 }
796
797 if (log_qp->q_first == NULL && bcanputnext(log_qp, lmp->b_band)) {
798 putnext(log_qp, lmp);
799 } else {
800 if (putq(log_qp, lmp) == 0) {
801 /* The logger queue is full, free the msg. */
802 freemsg(lmp);
803 }
804 /*
805 * If we're supposed to tee with flow control and the tee is
806 * over the high water mark then we want the primary stream to
807 * stop flowing. We'll stop queueing the primary stream after
808 * the log stream has drained.
809 */
810 if (zfds->zfd_allow_flowcon &&
811 log_qp->q_count > log_qp->q_hiwat) {
812 zfds->zfd_is_flowcon = B_TRUE;
813 }
814 }
815 }
816
817 /*
818 * wput(9E) is symmetric for master and slave sides, so this handles both
819 * without splitting the codepath. (The only exception to this is the
820 * processing of zfd ioctls, which is restricted to the master side.)
821 *
822 * zfd_wput() looks at the other side; if there is no process holding that
823 * side open, it frees the message. This prevents processes from hanging
824 * if no one is holding open the fd. Otherwise, it putnext's high
825 * priority messages, putnext's normal messages if possible, and otherwise
826 * enqueues the messages; in the case that something is enqueued, wsrv(9E)
827 * will take care of eventually shuttling I/O to the other side.
828 *
829 * When configured as a multiplexer, then anything written to the stream
830 * from inside the zone is also teed off to the corresponding log stream
831 * for consumption within the zone (i.e. the log stream can be read, but never
832 * written to, by an application inside the zone).
833 */
834 static void
835 zfd_wput(queue_t *qp, mblk_t *mp)
836 {
837 unsigned char type = mp->b_datap->db_type;
838 zfd_state_t *zfds;
839 struct iocblk *iocbp;
840 boolean_t must_queue = B_FALSE;
841
842 ASSERT(qp->q_ptr);
843
844 DBG1("entering zfd_wput, %s side", zfd_side(qp));
845
846 /*
847 * Process zfd ioctl messages if qp is the master side's write queue.
848 */
849 zfds = (zfd_state_t *)qp->q_ptr;
850
851 if (type == M_IOCTL) {
852 iocbp = (struct iocblk *)(void *)mp->b_rptr;
853
854 switch (iocbp->ioc_cmd) {
855 case ZFD_MAKETTY:
856 zfds->zfd_tty = 1;
857 miocack(qp, mp, 0, 0);
858 return;
859 case ZFD_EOF:
860 if (zfds->zfd_slave_rdq != NULL)
861 (void) putnextctl(zfds->zfd_slave_rdq,
862 M_HANGUP);
863 miocack(qp, mp, 0, 0);
864 return;
865 case ZFD_HAS_SLAVE:
866 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
867 miocack(qp, mp, 0, 0);
868 } else {
869 miocack(qp, mp, 0, ENOTTY);
870 }
871 return;
872 case ZFD_MUX: {
873 /*
874 * Setup the multiplexer configuration for the two
875 * streams.
876 *
877 * We expect to be called on the stream that will
878 * become the log stream and be passed one data block
879 * with the minor number of the slave side of the
880 * primary stream.
881 */
882 int to;
883 int instance;
884 zfd_state_t *prim_zfds;
885
886 if (iocbp->ioc_count != TRANSPARENT ||
887 mp->b_cont == NULL) {
888 miocack(qp, mp, 0, EINVAL);
889 return;
890 }
891
892 /* Get the primary slave minor device number */
893 to = *(int *)mp->b_cont->b_rptr;
894 instance = ZFD_INSTANCE(to);
895
896 if ((prim_zfds = ddi_get_soft_state(zfd_soft_state,
897 instance)) == NULL) {
898 miocack(qp, mp, 0, EINVAL);
899 return;
900 }
901
902 /* Disallow changing primary/log once set. */
903 mutex_enter(&zfd_mux_lock);
904 if (zfds->zfd_muxt != ZFD_NO_MUX ||
905 prim_zfds->zfd_muxt != ZFD_NO_MUX) {
906 mutex_exit(&zfd_mux_lock);
907 miocack(qp, mp, 0, EINVAL);
908 return;
909 }
910
911 zfds->zfd_muxt = ZFD_LOG_STREAM;
912 zfds->zfd_inst_pri = prim_zfds;
913 prim_zfds->zfd_muxt = ZFD_PRIMARY_STREAM;
914 prim_zfds->zfd_inst_log = zfds;
915 mutex_exit(&zfd_mux_lock);
916 DTRACE_PROBE2(zfd__mux__link, void *, prim_zfds,
917 void *, zfds);
918
919 miocack(qp, mp, 0, 0);
920 return;
921 }
922 case ZFD_MUX_FLOWCON: {
923 /*
924 * We expect this ioctl to be issued against the
925 * log stream. We don't use the primary stream since
926 * there can be other streams modules pushed onto that
927 * stream which would interfere with the ioctl.
928 */
929 int val;
930 zfd_state_t *prim_zfds;
931
932 if (iocbp->ioc_count != TRANSPARENT ||
933 mp->b_cont == NULL) {
934 miocack(qp, mp, 0, EINVAL);
935 return;
936 }
937
938 if (zfds->zfd_muxt != ZFD_LOG_STREAM) {
939 miocack(qp, mp, 0, EINVAL);
940 return;
941 }
942 prim_zfds = zfds->zfd_inst_pri;
943
944 /* Get the flow control setting */
945 val = *(int *)mp->b_cont->b_rptr;
946 if (val != 0 && val != 1) {
947 miocack(qp, mp, 0, EINVAL);
948 return;
949 }
950
951 prim_zfds->zfd_allow_flowcon = (boolean_t)val;
952 if (!prim_zfds->zfd_allow_flowcon)
953 prim_zfds->zfd_is_flowcon = B_FALSE;
954
955 DTRACE_PROBE1(zfd__mux__flowcon, void *, prim_zfds);
956 miocack(qp, mp, 0, 0);
957 return;
958 }
959 default:
960 break;
961 }
962 }
963
964 /* if on the write side, may need to tee */
965 if (zfds->zfd_slave_rdq != NULL && qp == WR(zfds->zfd_slave_rdq)) {
966 /* tee output to any attached log stream */
967 zfd_tee_handler(zfds, type, mp);
968
969 /* high-priority msgs are not subject to flow control */
970 if (zfds->zfd_is_flowcon && type == M_DATA)
971 must_queue = B_TRUE;
972 }
973
974 if (zfd_switch(RD(qp)) == NULL) {
975 DBG1("wput to %s side (no one listening)", zfd_side(qp));
976 switch (type) {
977 case M_FLUSH:
978 handle_mflush(qp, mp);
979 break;
980 case M_IOCTL:
981 miocnak(qp, mp, 0, 0);
982 break;
983 default:
984 freemsg(mp);
985 break;
986 }
987 return;
988 }
989
990 if (type >= QPCTL) {
991 DBG1("(hipri) wput, %s side", zfd_side(qp));
992 switch (type) {
993 case M_READ: /* supposedly from ldterm? */
994 DBG("zfd_wput: tossing M_READ\n");
995 freemsg(mp);
996 break;
997 case M_FLUSH:
998 handle_mflush(qp, mp);
999 break;
1000 default:
1001 /*
1002 * Put this to the other side.
1003 */
1004 ASSERT(zfd_switch(RD(qp)) != NULL);
1005 putnext(zfd_switch(RD(qp)), mp);
1006 break;
1007 }
1008 DBG1("done (hipri) wput, %s side", zfd_side(qp));
1009 return;
1010 }
1011
1012 /*
1013 * If the primary stream has been stopped for flow control then
1014 * enqueue the msg, otherwise only putnext if there isn't already
1015 * something in the queue. If we don't do this then things would wind
1016 * up out of order.
1017 */
1018 if (!must_queue && qp->q_first == NULL &&
1019 bcanputnext(RD(zfd_switch(qp)), mp->b_band)) {
1020 putnext(RD(zfd_switch(qp)), mp);
1021 } else {
1022 /*
1023 * zfd_wsrv expects msgs queued on the primary queue. Those
1024 * will be handled by zfd_wsrv after zfd_rsrv performs the
1025 * qenable on the proper queue.
1026 */
1027 (void) putq(qp, mp);
1028 }
1029
1030 DBG1("done wput, %s side", zfd_side(qp));
1031 }
1032
1033 /*
1034 * Read server
1035 *
1036 * For primary stream:
1037 * Under normal execution rsrv(9E) is symmetric for master and slave, so
1038 * zfd_rsrv() can handle both without splitting up the codepath. We do this by
1039 * enabling the write side of the partner. This triggers the partner to send
1040 * messages queued on its write side to this queue's read side.
1041 *
1042 * For log stream:
1043 * Internally we've queued up the msgs that we've teed off to the log stream
1044 * so when we're invoked we need to pass these along.
1045 */
1046 static void
1047 zfd_rsrv(queue_t *qp)
1048 {
1049 zfd_state_t *zfds;
1050 zfds = (zfd_state_t *)qp->q_ptr;
1051
1052 /*
1053 * log stream server
1054 */
1055 if (zfds->zfd_muxt == ZFD_LOG_STREAM && zfds->zfd_slave_rdq != NULL) {
1056 queue_t *log_qp;
1057 mblk_t *mp;
1058
1059 log_qp = RD(zfds->zfd_slave_rdq);
1060
1061 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
1062 zfd_state_t *pzfds = zfds->zfd_inst_pri;
1063
1064 while ((mp = getq(qp)) != NULL) {
1065 if (bcanputnext(log_qp, mp->b_band)) {
1066 putnext(log_qp, mp);
1067 } else {
1068 (void) putbq(log_qp, mp);
1069 break;
1070 }
1071 }
1072
1073 if (log_qp->q_count < log_qp->q_lowat) {
1074 DTRACE_PROBE(zfd__flow__on);
1075 pzfds->zfd_is_flowcon = B_FALSE;
1076 if (pzfds->zfd_master_rdq != NULL)
1077 qenable(RD(pzfds->zfd_master_rdq));
1078 }
1079 } else {
1080 /* No longer open, drain the queue */
1081 while ((mp = getq(qp)) != NULL) {
1082 freemsg(mp);
1083 }
1084 flushq(qp, FLUSHALL);
1085 }
1086 return;
1087 }
1088
1089 /*
1090 * Care must be taken here, as either of the master or slave side
1091 * qptr could be NULL.
1092 */
1093 ASSERT(qp == zfds->zfd_master_rdq || qp == zfds->zfd_slave_rdq);
1094 if (zfd_switch(qp) == NULL) {
1095 DBG("zfd_rsrv: other side isn't listening\n");
1096 return;
1097 }
1098 qenable(WR(zfd_switch(qp)));
1099 }
1100
1101 /*
1102 * Write server
1103 *
1104 * This routine is symmetric for master and slave, so it handles both without
1105 * splitting up the codepath.
1106 *
1107 * If there are messages on this queue that can be sent to the other, send
1108 * them via putnext(). Else, if queued messages cannot be sent, leave them
1109 * on this queue.
1110 */
1111 static void
1112 zfd_wsrv(queue_t *qp)
1113 {
1114 queue_t *swq;
1115 mblk_t *mp;
1116 zfd_state_t *zfds = (zfd_state_t *)qp->q_ptr;
1117
1118 ASSERT(zfds != NULL);
1119
1120 /*
1121 * Partner has no read queue, so take the data, and throw it away.
1122 */
1123 if (zfd_switch(RD(qp)) == NULL) {
1124 DBG("zfd_wsrv: other side isn't listening");
1125 while ((mp = getq(qp)) != NULL) {
1126 if (mp->b_datap->db_type == M_IOCTL)
1127 miocnak(qp, mp, 0, 0);
1128 else
1129 freemsg(mp);
1130 }
1131 flushq(qp, FLUSHALL);
1132 return;
1133 }
1134
1135 swq = RD(zfd_switch(qp));
1136
1137 /*
1138 * while there are messages on this write queue...
1139 */
1140 while (!zfds->zfd_is_flowcon && (mp = getq(qp)) != NULL) {
1141 /*
1142 * Due to the way zfd_wput is implemented, we should never
1143 * see a high priority control message here.
1144 */
1145 ASSERT(mp->b_datap->db_type < QPCTL);
1146
1147 if (bcanputnext(swq, mp->b_band)) {
1148 putnext(swq, mp);
1149 } else {
1150 (void) putbq(qp, mp);
1151 break;
1152 }
1153 }
1154 }