1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  14  * Copyright 2016 Joyent, Inc.  All rights reserved.
  15  */
  16 
  17 /*
  18  * Zone File Descriptor Driver.
  19  *
  20  * This driver is derived from the zcons driver which is in turn derived from
  21  * the pts/ptm drivers. The purpose is to expose file descriptors within the
  22  * zone which are connected to zoneadmd and used for logging or an interactive
  23  * connection to a process within the zone.
  24  *
  25  * Its implementation is straightforward. Each instance of the driver
  26  * represents a global-zone/local-zone pair. Unlike the zcons device, zoneadmd
  27  * uses these devices unidirectionally to provide stdin, stdout and stderr to
  28  * the process within the zone.
  29  *
  30  * Instances of zfd are onlined as children of /pseudo/zfdnex@2/ by zoneadmd,
  31  * using the devctl framework; thus the driver does not need to maintain any
  32  * sort of "admin" node.
  33  *
  34  * The driver shuttles I/O from master side to slave side and back.  In a break
  35  * from the pts/ptm semantics, if one side is not open, I/O directed towards
  36  * it will simply be discarded. This is so that if zoneadmd is not holding the
  37  * master side fd open (i.e. it has died somehow), processes in the zone do not
  38  * experience any errors and I/O to the fd does not cause the process to hang.
  39  *
  40  * The driver can also act as a multiplexer so that data written to the
  41  * slave side within the zone is also redirected back to another zfd device
  42  * inside the zone for consumption (i.e. it can be read). The intention is
  43  * that a logging process within the zone can consume data that is being
  44  * written by an application onto the primary stream. This is essentially
  45  * a tee off of the primary stream into a log stream. This tee can also be
  46  * configured to be flow controlled via an ioctl. Flow control happens on the
  47  * primary stream and is used to ensure that the log stream receives all of
  48  * the messages off the primary stream when consumption of the data off of
  49  * the log stream gets behind. Configuring for flow control implies that the
  50  * application writing to the primary stream will be blocked when the log
  51  * consumer gets behind. Note that closing the log stream (e.g. when the zone
  52  * halts) will cause the loss of all messages queued in the stream.
  53  *
  54  * The zone's zfd device configuration is driven by zoneadmd and a zone mode.
  55  * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
  56  * of a misnomer since its purpose has evolved. The attribute can have a
  57  * variety of values, but the lowest two positions are used to control how many
  58  * zfd devices are created inside the zone and if the primary stream is a tty.
  59  *
  60  * Here is a summary of how the 4 modes control what zfd devices are created
  61  * and how they're used:
  62  *
  63  *    t-:  1 stdio zdev  (0) configured as a tty
  64  *    --:  3 stdio zdevs (0, 1, 2), not configured as a tty
  65  *    tn:  1 stdio zdev  (0) configured as a tty, 1 additional zdev (1)
  66  *    -n:  3 stdio zdevs (0, 1, 2), not tty, 2 additional zdevs (3, 4)
  67  *
  68  * With the 't' flag set, stdin/out/err is multiplexed onto a single full-duplex
  69  * stream which is configured as a tty. That is, ptem, ldterm and ttycompat are
  70  * autopushed onto the stream when the slave side is opened. There is only a
  71  * single zfd dev (0) needed for the primary stream.
  72  *
  73  * When the 'n' flag is set, it is assumed that output logging will be done
  74  * within the zone itself. In this configuration 1 or 2 additional zfd devices,
  75  * depending on tty mode ('t' flag) are created within the zone. An application
  76  * can then configure the zfd streams driver into a multiplexer. Output from
  77  * the stdout/stderr zfd(s) will be teed into the correspond logging zfd(s)
  78  * within the zone.
  79  *
  80  * The following is a diagram of how this works for a '-n' configuration:
  81  *
  82  *
  83  *              zoneadmd (for zlogin -I stdout)
  84  * GZ:             ^
  85  *                 |
  86  *     --------------------------
  87  *                 ^
  88  * NGZ:            |
  89  *      app >1 -> zfd1 -> zfd3 -> logger (for logger to consume app's stdout)
  90  *
  91  * There would be a similar path for the app's stderr into zfd4 for the logger
  92  * to consume stderr.
  93  */
  94 
  95 #include <sys/types.h>
  96 #include <sys/cmn_err.h>
  97 #include <sys/conf.h>
  98 #include <sys/cred.h>
  99 #include <sys/ddi.h>
 100 #include <sys/debug.h>
 101 #include <sys/devops.h>
 102 #include <sys/errno.h>
 103 #include <sys/file.h>
 104 #include <sys/kstr.h>
 105 #include <sys/modctl.h>
 106 #include <sys/param.h>
 107 #include <sys/stat.h>
 108 #include <sys/stream.h>
 109 #include <sys/stropts.h>
 110 #include <sys/strsun.h>
 111 #include <sys/sunddi.h>
 112 #include <sys/sysmacros.h>
 113 #include <sys/systm.h>
 114 #include <sys/types.h>
 115 #include <sys/zfd.h>
 116 #include <sys/vnode.h>
 117 #include <sys/fs/snode.h>
 118 #include <sys/zone.h>
 119 #include <sys/sdt.h>
 120 
 121 static kmutex_t zfd_mux_lock;
 122 
 123 static int zfd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 124 static int zfd_attach(dev_info_t *, ddi_attach_cmd_t);
 125 static int zfd_detach(dev_info_t *, ddi_detach_cmd_t);
 126 
 127 static int zfd_open(queue_t *, dev_t *, int, int, cred_t *);
 128 static int zfd_close(queue_t *, int, cred_t *);
 129 static void zfd_wput(queue_t *, mblk_t *);
 130 static void zfd_rsrv(queue_t *);
 131 static void zfd_wsrv(queue_t *);
 132 
 133 /*
 134  * The instance number is encoded in the dev_t in the minor number; the lowest
 135  * bit of the minor number is used to track the master vs. slave side of the
 136  * fd. The rest of the bits in the minor number are the instance.
 137  */
 138 #define ZFD_MASTER_MINOR                0
 139 #define ZFD_SLAVE_MINOR         1
 140 
 141 #define ZFD_INSTANCE(x)         (getminor((x)) >> 1)
 142 #define ZFD_NODE(x)             (getminor((x)) & 0x01)
 143 
 144 /*
 145  * This macro converts a zfd_state_t pointer to the associated slave minor
 146  * node's dev_t.
 147  */
 148 #define ZFD_STATE_TO_SLAVEDEV(x)        \
 149         (makedevice(ddi_driver_major((x)->zfd_devinfo), \
 150         (minor_t)(ddi_get_instance((x)->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR)))
 151 
 152 int zfd_debug = 0;
 153 #define DBG(a)          if (zfd_debug) cmn_err(CE_NOTE, a)
 154 #define DBG1(a, b)      if (zfd_debug) cmn_err(CE_NOTE, a, b)
 155 
 156 /*
 157  * ZFD Pseudo Terminal Module: stream data structure definitions,
 158  * based on zcons.
 159  */
 160 static struct module_info zfd_info = {
 161         0x20FD, /* ZOFD - 8445 */
 162         "zfd",
 163         0,              /* min packet size */
 164         INFPSZ,         /* max packet size - infinity */
 165         2048,           /* high water */
 166         128             /* low water */
 167 };
 168 
 169 static struct qinit zfd_rinit = {
 170         NULL,
 171         (int (*)()) zfd_rsrv,
 172         zfd_open,
 173         zfd_close,
 174         NULL,
 175         &zfd_info,
 176         NULL
 177 };
 178 
 179 static struct qinit zfd_winit = {
 180         (int (*)()) zfd_wput,
 181         (int (*)()) zfd_wsrv,
 182         NULL,
 183         NULL,
 184         NULL,
 185         &zfd_info,
 186         NULL
 187 };
 188 
 189 static struct streamtab zfd_tab_info = {
 190         &zfd_rinit,
 191         &zfd_winit,
 192         NULL,
 193         NULL
 194 };
 195 
 196 #define ZFD_CONF_FLAG   (D_MP | D_MTQPAIR | D_MTOUTPERIM | D_MTOCEXCL)
 197 
 198 /*
 199  * this will define (struct cb_ops cb_zfd_ops) and (struct dev_ops zfd_ops)
 200  */
 201 DDI_DEFINE_STREAM_OPS(zfd_ops, nulldev, nulldev, zfd_attach, zfd_detach, \
 202         nodev, zfd_getinfo, ZFD_CONF_FLAG, &zfd_tab_info, \
 203         ddi_quiesce_not_needed);
 204 
 205 /*
 206  * Module linkage information for the kernel.
 207  */
 208 
 209 static struct modldrv modldrv = {
 210         &mod_driverops,     /* Type of module (this is a pseudo driver) */
 211         "Zone FD driver",       /* description of module */
 212         &zfd_ops            /* driver ops */
 213 };
 214 
 215 static struct modlinkage modlinkage = {
 216         MODREV_1,
 217         &modldrv,
 218         NULL
 219 };
 220 
 221 typedef enum {
 222         ZFD_NO_MUX,
 223         ZFD_PRIMARY_STREAM,
 224         ZFD_LOG_STREAM
 225 } zfd_mux_type_t;
 226 
 227 typedef struct zfd_state {
 228         dev_info_t *zfd_devinfo;        /* instance info */
 229         queue_t *zfd_master_rdq;        /* GZ read queue */
 230         queue_t *zfd_slave_rdq;         /* in-zone read queue */
 231         int zfd_state;                  /* ZFD_STATE_MOPEN, ZFD_STATE_SOPEN */
 232         int zfd_tty;                    /* ZFD_MAKETTY - strm mods will push */
 233         boolean_t zfd_is_flowcon;       /* primary stream flow stopped */
 234         boolean_t zfd_allow_flowcon;    /* use flow control */
 235         zfd_mux_type_t zfd_muxt;        /* state type: none, primary, log */
 236         struct zfd_state *zfd_inst_pri; /* log state's primary ptr */
 237         struct zfd_state *zfd_inst_log; /* primary state's log ptr */
 238 } zfd_state_t;
 239 
 240 #define ZFD_STATE_MOPEN 0x01
 241 #define ZFD_STATE_SOPEN 0x02
 242 
 243 static void *zfd_soft_state;
 244 
 245 /*
 246  * List of STREAMS modules that are autopushed onto a slave instance when its
 247  * opened, but only if the ZFD_MAKETTY ioctl has first been received by the
 248  * master.
 249  */
 250 static char *zfd_mods[] = {
 251         "ptem",
 252         "ldterm",
 253         "ttcompat",
 254         NULL
 255 };
 256 
 257 int
 258 _init(void)
 259 {
 260         int err;
 261 
 262         if ((err = ddi_soft_state_init(&zfd_soft_state, sizeof (zfd_state_t),
 263             0)) != 0) {
 264                 return (err);
 265         }
 266 
 267         if ((err = mod_install(&modlinkage)) != 0)
 268                 ddi_soft_state_fini(zfd_soft_state);
 269 
 270         mutex_init(&zfd_mux_lock, NULL, MUTEX_DEFAULT, NULL);
 271         return (err);
 272 }
 273 
 274 
 275 int
 276 _fini(void)
 277 {
 278         int err;
 279 
 280         if ((err = mod_remove(&modlinkage)) != 0) {
 281                 return (err);
 282         }
 283 
 284         ddi_soft_state_fini(&zfd_soft_state);
 285         mutex_destroy(&zfd_mux_lock);
 286         return (0);
 287 }
 288 
 289 int
 290 _info(struct modinfo *modinfop)
 291 {
 292         return (mod_info(&modlinkage, modinfop));
 293 }
 294 
 295 static int
 296 zfd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 297 {
 298         zfd_state_t *zfds;
 299         int instance;
 300         char masternm[ZFD_NAME_LEN], slavenm[ZFD_NAME_LEN];
 301 
 302         if (cmd != DDI_ATTACH)
 303                 return (DDI_FAILURE);
 304 
 305         instance = ddi_get_instance(dip);
 306         if (ddi_soft_state_zalloc(zfd_soft_state, instance) != DDI_SUCCESS)
 307                 return (DDI_FAILURE);
 308 
 309         (void) snprintf(masternm, sizeof (masternm), "%s%d", ZFD_MASTER_NAME,
 310             instance);
 311         (void) snprintf(slavenm, sizeof (slavenm), "%s%d", ZFD_SLAVE_NAME,
 312             instance);
 313 
 314         /*
 315          * Create the master and slave minor nodes.
 316          */
 317         if ((ddi_create_minor_node(dip, slavenm, S_IFCHR,
 318             instance << 1 | ZFD_SLAVE_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE) ||
 319             (ddi_create_minor_node(dip, masternm, S_IFCHR,
 320             instance << 1 | ZFD_MASTER_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE)) {
 321                 ddi_remove_minor_node(dip, NULL);
 322                 ddi_soft_state_free(zfd_soft_state, instance);
 323                 return (DDI_FAILURE);
 324         }
 325 
 326         VERIFY((zfds = ddi_get_soft_state(zfd_soft_state, instance)) != NULL);
 327         zfds->zfd_devinfo = dip;
 328         zfds->zfd_tty = 0;
 329         zfds->zfd_muxt = ZFD_NO_MUX;
 330         zfds->zfd_inst_log = NULL;
 331         return (DDI_SUCCESS);
 332 }
 333 
 334 static int
 335 zfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 336 {
 337         zfd_state_t *zfds;
 338         int instance;
 339 
 340         if (cmd != DDI_DETACH)
 341                 return (DDI_FAILURE);
 342 
 343         instance = ddi_get_instance(dip);
 344         if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
 345                 return (DDI_FAILURE);
 346 
 347         if ((zfds->zfd_state & ZFD_STATE_MOPEN) ||
 348             (zfds->zfd_state & ZFD_STATE_SOPEN)) {
 349                 DBG1("zfd_detach: device (dip=%p) still open\n", (void *)dip);
 350                 return (DDI_FAILURE);
 351         }
 352 
 353         ddi_remove_minor_node(dip, NULL);
 354         ddi_soft_state_free(zfd_soft_state, instance);
 355 
 356         return (DDI_SUCCESS);
 357 }
 358 
 359 /*
 360  * zfd_getinfo()
 361  *      getinfo(9e) entrypoint.
 362  */
 363 /*ARGSUSED*/
 364 static int
 365 zfd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 366 {
 367         zfd_state_t *zfds;
 368         int instance = ZFD_INSTANCE((dev_t)arg);
 369 
 370         switch (infocmd) {
 371         case DDI_INFO_DEVT2DEVINFO:
 372                 if ((zfds = ddi_get_soft_state(zfd_soft_state,
 373                     instance)) == NULL)
 374                         return (DDI_FAILURE);
 375                 *result = zfds->zfd_devinfo;
 376                 return (DDI_SUCCESS);
 377         case DDI_INFO_DEVT2INSTANCE:
 378                 *result = (void *)(uintptr_t)instance;
 379                 return (DDI_SUCCESS);
 380         }
 381         return (DDI_FAILURE);
 382 }
 383 
 384 /*
 385  * Return the equivalent queue from the other side of the relationship.
 386  * e.g.: given the slave's write queue, return the master's write queue.
 387  */
 388 static queue_t *
 389 zfd_switch(queue_t *qp)
 390 {
 391         zfd_state_t *zfds = qp->q_ptr;
 392         ASSERT(zfds != NULL);
 393 
 394         if (qp == zfds->zfd_master_rdq)
 395                 return (zfds->zfd_slave_rdq);
 396         else if (OTHERQ(qp) == zfds->zfd_master_rdq && zfds->zfd_slave_rdq
 397             != NULL)
 398                 return (OTHERQ(zfds->zfd_slave_rdq));
 399         else if (qp == zfds->zfd_slave_rdq)
 400                 return (zfds->zfd_master_rdq);
 401         else if (OTHERQ(qp) == zfds->zfd_slave_rdq && zfds->zfd_master_rdq
 402             != NULL)
 403                 return (OTHERQ(zfds->zfd_master_rdq));
 404         else
 405                 return (NULL);
 406 }
 407 
 408 /*
 409  * For debugging and outputting messages.  Returns the name of the side of
 410  * the relationship associated with this queue.
 411  */
 412 static const char *
 413 zfd_side(queue_t *qp)
 414 {
 415         zfd_state_t *zfds = qp->q_ptr;
 416         ASSERT(zfds != NULL);
 417 
 418         if (qp == zfds->zfd_master_rdq ||
 419             OTHERQ(qp) == zfds->zfd_master_rdq) {
 420                 return ("master");
 421         }
 422         ASSERT(qp == zfds->zfd_slave_rdq || OTHERQ(qp) == zfds->zfd_slave_rdq);
 423         return ("slave");
 424 }
 425 
 426 /*ARGSUSED*/
 427 static int
 428 zfd_master_open(zfd_state_t *zfds,
 429     queue_t     *rqp,   /* pointer to the read side queue */
 430     dev_t       *devp,  /* pointer to stream tail's dev */
 431     int         oflag,  /* the user open(2) supplied flags */
 432     int         sflag,  /* open state flag */
 433     cred_t      *credp) /* credentials */
 434 {
 435         mblk_t *mop;
 436         struct stroptions *sop;
 437 
 438         /*
 439          * Enforce exclusivity on the master side; the only consumer should
 440          * be the zoneadmd for the zone.
 441          */
 442         if ((zfds->zfd_state & ZFD_STATE_MOPEN) != 0)
 443                 return (EBUSY);
 444 
 445         if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
 446                 DBG("zfd_master_open(): mop allocation failed\n");
 447                 return (ENOMEM);
 448         }
 449 
 450         zfds->zfd_state |= ZFD_STATE_MOPEN;
 451 
 452         /*
 453          * q_ptr stores driver private data; stash the soft state data on both
 454          * read and write sides of the queue.
 455          */
 456         WR(rqp)->q_ptr = rqp->q_ptr = zfds;
 457         qprocson(rqp);
 458 
 459         /*
 460          * Following qprocson(), the master side is fully plumbed into the
 461          * STREAM and may send/receive messages.  Setting zfds->zfd_master_rdq
 462          * will allow the slave to send messages to us (the master).
 463          * This cannot occur before qprocson() because the master is not
 464          * ready to process them until that point.
 465          */
 466         zfds->zfd_master_rdq = rqp;
 467 
 468         /*
 469          * set up hi/lo water marks on stream head read queue and add
 470          * controlling tty as needed.
 471          */
 472         mop->b_datap->db_type = M_SETOPTS;
 473         mop->b_wptr += sizeof (struct stroptions);
 474         sop = (struct stroptions *)(void *)mop->b_rptr;
 475         if (oflag & FNOCTTY)
 476                 sop->so_flags = SO_HIWAT | SO_LOWAT;
 477         else
 478                 sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
 479         sop->so_hiwat = 512;
 480         sop->so_lowat = 256;
 481         putnext(rqp, mop);
 482 
 483         return (0);
 484 }
 485 
 486 /*ARGSUSED*/
 487 static int
 488 zfd_slave_open(zfd_state_t *zfds,
 489     queue_t     *rqp,   /* pointer to the read side queue */
 490     dev_t       *devp,  /* pointer to stream tail's dev */
 491     int         oflag,  /* the user open(2) supplied flags */
 492     int         sflag,  /* open state flag */
 493     cred_t      *credp) /* credentials */
 494 {
 495         mblk_t *mop;
 496         struct stroptions *sop;
 497         /*
 498          * The slave side can be opened as many times as needed.
 499          */
 500         if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
 501                 ASSERT((rqp != NULL) && (WR(rqp)->q_ptr == zfds));
 502                 return (0);
 503         }
 504 
 505         /* A log stream is read-only */
 506         if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
 507             (oflag & (FREAD | FWRITE)) != FREAD)
 508                 return (EINVAL);
 509 
 510         if (zfds->zfd_tty == 1) {
 511                 major_t major;
 512                 minor_t minor;
 513                 minor_t lastminor;
 514                 uint_t anchorindex;
 515 
 516                 /*
 517                  * Set up sad(7D) so that the necessary STREAMS modules will
 518                  * be in place.  A wrinkle is that 'ptem' must be anchored
 519                  * in place (see streamio(7i)) because we always want the
 520                  * fd to have terminal semantics.
 521                  */
 522                 minor =
 523                     ddi_get_instance(zfds->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR;
 524                 major = ddi_driver_major(zfds->zfd_devinfo);
 525                 lastminor = 0;
 526                 anchorindex = 1;
 527                 if (kstr_autopush(SET_AUTOPUSH, &major, &minor, &lastminor,
 528                     &anchorindex, zfd_mods) != 0) {
 529                         DBG("zfd_slave_open(): kstr_autopush() failed\n");
 530                         return (EIO);
 531                 }
 532         }
 533 
 534         if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
 535                 DBG("zfd_slave_open(): mop allocation failed\n");
 536                 return (ENOMEM);
 537         }
 538 
 539         zfds->zfd_state |= ZFD_STATE_SOPEN;
 540 
 541         /*
 542          * q_ptr stores driver private data; stash the soft state data on both
 543          * read and write sides of the queue.
 544          */
 545         WR(rqp)->q_ptr = rqp->q_ptr = zfds;
 546 
 547         qprocson(rqp);
 548 
 549         /*
 550          * Must follow qprocson(), since we aren't ready to process until then.
 551          */
 552         zfds->zfd_slave_rdq = rqp;
 553 
 554         /*
 555          * set up hi/lo water marks on stream head read queue and add
 556          * controlling tty as needed.
 557          */
 558         mop->b_datap->db_type = M_SETOPTS;
 559         mop->b_wptr += sizeof (struct stroptions);
 560         sop = (struct stroptions *)(void *)mop->b_rptr;
 561         sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
 562         sop->so_hiwat = 512;
 563         sop->so_lowat = 256;
 564         putnext(rqp, mop);
 565 
 566         return (0);
 567 }
 568 
 569 /*
 570  * open(9e) entrypoint; checks sflag, and rejects anything unordinary.
 571  */
 572 static int
 573 zfd_open(queue_t *rqp,          /* pointer to the read side queue */
 574         dev_t   *devp,          /* pointer to stream tail's dev */
 575         int     oflag,          /* the user open(2) supplied flags */
 576         int     sflag,          /* open state flag */
 577         cred_t  *credp)         /* credentials */
 578 {
 579         int instance = ZFD_INSTANCE(*devp);
 580         int ret;
 581         zfd_state_t *zfds;
 582 
 583         if (sflag != 0)
 584                 return (EINVAL);
 585 
 586         if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
 587                 return (ENXIO);
 588 
 589         switch (ZFD_NODE(*devp)) {
 590         case ZFD_MASTER_MINOR:
 591                 ret = zfd_master_open(zfds, rqp, devp, oflag, sflag, credp);
 592                 break;
 593         case ZFD_SLAVE_MINOR:
 594                 ret = zfd_slave_open(zfds, rqp, devp, oflag, sflag, credp);
 595                 /*
 596                  * If we just opened the log stream and flow control has
 597                  * been enabled, we want to make sure the primary stream can
 598                  * start flowing.
 599                  */
 600                 if (ret == 0 && zfds->zfd_muxt == ZFD_LOG_STREAM &&
 601                     zfds->zfd_inst_pri->zfd_allow_flowcon) {
 602                         zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
 603                         if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
 604                                 qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
 605                 }
 606                 break;
 607         default:
 608                 ret = ENXIO;
 609                 break;
 610         }
 611 
 612         return (ret);
 613 }
 614 
 615 /*
 616  * close(9e) entrypoint.
 617  */
 618 /*ARGSUSED1*/
 619 static int
 620 zfd_close(queue_t *rqp, int flag, cred_t *credp)
 621 {
 622         queue_t *wqp;
 623         mblk_t  *bp;
 624         zfd_state_t *zfds;
 625         major_t major;
 626         minor_t minor;
 627 
 628         zfds = (zfd_state_t *)rqp->q_ptr;
 629 
 630         if (rqp == zfds->zfd_master_rdq) {
 631                 DBG("Closing master side");
 632 
 633                 zfds->zfd_master_rdq = NULL;
 634                 zfds->zfd_state &= ~ZFD_STATE_MOPEN;
 635 
 636                 /*
 637                  * qenable slave side write queue so that it can flush
 638                  * its messages as master's read queue is going away
 639                  */
 640                 if (zfds->zfd_slave_rdq != NULL) {
 641                         qenable(WR(zfds->zfd_slave_rdq));
 642                 }
 643 
 644                 qprocsoff(rqp);
 645                 WR(rqp)->q_ptr = rqp->q_ptr = NULL;
 646 
 647         } else if (rqp == zfds->zfd_slave_rdq) {
 648 
 649                 DBG("Closing slave side");
 650                 zfds->zfd_state &= ~ZFD_STATE_SOPEN;
 651                 zfds->zfd_slave_rdq = NULL;
 652 
 653                 wqp = WR(rqp);
 654                 while ((bp = getq(wqp)) != NULL) {
 655                         if (zfds->zfd_master_rdq != NULL)
 656                                 putnext(zfds->zfd_master_rdq, bp);
 657                         else if (bp->b_datap->db_type == M_IOCTL)
 658                                 miocnak(wqp, bp, 0, 0);
 659                         else
 660                                 freemsg(bp);
 661                 }
 662 
 663                 /*
 664                  * Qenable master side write queue so that it can flush its
 665                  * messages as slaves's read queue is going away.
 666                  */
 667                 if (zfds->zfd_master_rdq != NULL)
 668                         qenable(WR(zfds->zfd_master_rdq));
 669 
 670                 /*
 671                  * Qenable primary stream if necessary.
 672                  */
 673                 if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
 674                     zfds->zfd_inst_pri->zfd_allow_flowcon) {
 675                         zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
 676                         if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
 677                                 qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
 678                 }
 679 
 680                 qprocsoff(rqp);
 681                 WR(rqp)->q_ptr = rqp->q_ptr = NULL;
 682 
 683                 if (zfds->zfd_tty == 1) {
 684                         /*
 685                          * Clear the sad configuration so that reopening
 686                          * doesn't fail to set up sad configuration.
 687                          */
 688                         major = ddi_driver_major(zfds->zfd_devinfo);
 689                         minor = ddi_get_instance(zfds->zfd_devinfo) << 1 |
 690                             ZFD_SLAVE_MINOR;
 691                         (void) kstr_autopush(CLR_AUTOPUSH, &major, &minor,
 692                             NULL, NULL, NULL);
 693                 }
 694         }
 695 
 696         return (0);
 697 }
 698 
 699 static void
 700 handle_mflush(queue_t *qp, mblk_t *mp)
 701 {
 702         mblk_t *nmp;
 703         DBG1("M_FLUSH on %s side", zfd_side(qp));
 704 
 705         if (*mp->b_rptr & FLUSHW) {
 706                 DBG1("M_FLUSH, FLUSHW, %s side", zfd_side(qp));
 707                 flushq(qp, FLUSHDATA);
 708                 *mp->b_rptr &= ~FLUSHW;
 709                 if ((*mp->b_rptr & FLUSHR) == 0) {
 710                         /*
 711                          * FLUSHW only. Change to FLUSHR and putnext other side,
 712                          * then we are done.
 713                          */
 714                         *mp->b_rptr |= FLUSHR;
 715                         if (zfd_switch(RD(qp)) != NULL) {
 716                                 putnext(zfd_switch(RD(qp)), mp);
 717                                 return;
 718                         }
 719                 } else if ((zfd_switch(RD(qp)) != NULL) &&
 720                     (nmp = copyb(mp)) != NULL) {
 721                         /*
 722                          * It is a FLUSHRW; we copy the mblk and send
 723                          * it to the other side, since we still need to use
 724                          * the mblk in FLUSHR processing, below.
 725                          */
 726                         putnext(zfd_switch(RD(qp)), nmp);
 727                 }
 728         }
 729 
 730         if (*mp->b_rptr & FLUSHR) {
 731                 DBG("qreply(qp) turning FLUSHR around\n");
 732                 qreply(qp, mp);
 733                 return;
 734         }
 735         freemsg(mp);
 736 }
 737 
 738 /*
 739  * Evaluate the various conditionals to determine if we're teeing into a log
 740  * stream and if the primary stream should be flow controlled. This function
 741  * can set the zfd_is_flowcon flag as a side effect.
 742  *
 743  * When teeing with flow control, we always queue the teed msg here and if
 744  * the queue is getting full, we set zfd_is_flowcon. The primary stream will
 745  * always queue when zfd_is_flowcon and will also not be served when
 746  * zfd_is_flowcon is set. This causes backpressure on the primary stream
 747  * until the teed queue can drain.
 748  */
 749 static void
 750 zfd_tee_handler(zfd_state_t *zfds, unsigned char type, mblk_t *mp)
 751 {
 752         queue_t *log_qp;
 753         zfd_state_t *log_zfds;
 754         mblk_t *lmp;
 755 
 756         if (zfds->zfd_muxt != ZFD_PRIMARY_STREAM)
 757                 return;
 758 
 759         if (type != M_DATA)
 760                 return;
 761 
 762         log_zfds = zfds->zfd_inst_log;
 763         if (log_zfds == NULL)
 764                 return;
 765 
 766         ASSERT(log_zfds->zfd_muxt == ZFD_LOG_STREAM);
 767 
 768         if ((log_zfds->zfd_state & ZFD_STATE_SOPEN) == 0) {
 769                 if (zfds->zfd_allow_flowcon)
 770                         zfds->zfd_is_flowcon = B_TRUE;
 771                 return;
 772         }
 773 
 774         /* The zfd_slave_rdq is null until the log dev is opened in the zone */
 775         log_qp = RD(log_zfds->zfd_slave_rdq);
 776         DTRACE_PROBE2(zfd__tee__check, void *, log_qp, void *, zfds);
 777 
 778         if (!zfds->zfd_allow_flowcon) {
 779                 /*
 780                  * We're not supposed to tee with flow control and the tee is
 781                  * full so we skip teeing into the log stream.
 782                  */
 783                 if ((log_qp->q_flag & QFULL) != 0)
 784                         return;
 785         }
 786 
 787         /*
 788          * Tee the message into the log stream.
 789          */
 790         lmp = dupmsg(mp);
 791         if (lmp == NULL) {
 792                 if (zfds->zfd_allow_flowcon)
 793                         zfds->zfd_is_flowcon = B_TRUE;
 794                 return;
 795         }
 796 
 797         if (log_qp->q_first == NULL && bcanputnext(log_qp, lmp->b_band)) {
 798                 putnext(log_qp, lmp);
 799         } else {
 800                 if (putq(log_qp, lmp) == 0) {
 801                         /* The logger queue is full, free the msg. */
 802                         freemsg(lmp);
 803                 }
 804                 /*
 805                  * If we're supposed to tee with flow control and the tee is
 806                  * over the high water mark then we want the primary stream to
 807                  * stop flowing. We'll stop queueing the primary stream after
 808                  * the log stream has drained.
 809                  */
 810                 if (zfds->zfd_allow_flowcon &&
 811                     log_qp->q_count > log_qp->q_hiwat) {
 812                         zfds->zfd_is_flowcon = B_TRUE;
 813                 }
 814         }
 815 }
 816 
 817 /*
 818  * wput(9E) is symmetric for master and slave sides, so this handles both
 819  * without splitting the codepath.  (The only exception to this is the
 820  * processing of zfd ioctls, which is restricted to the master side.)
 821  *
 822  * zfd_wput() looks at the other side; if there is no process holding that
 823  * side open, it frees the message.  This prevents processes from hanging
 824  * if no one is holding open the fd.  Otherwise, it putnext's high
 825  * priority messages, putnext's normal messages if possible, and otherwise
 826  * enqueues the messages; in the case that something is enqueued, wsrv(9E)
 827  * will take care of eventually shuttling I/O to the other side.
 828  *
 829  * When configured as a multiplexer, then anything written to the stream
 830  * from inside the zone is also teed off to the corresponding log stream
 831  * for consumption within the zone (i.e. the log stream can be read, but never
 832  * written to, by an application inside the zone).
 833  */
 834 static void
 835 zfd_wput(queue_t *qp, mblk_t *mp)
 836 {
 837         unsigned char type = mp->b_datap->db_type;
 838         zfd_state_t *zfds;
 839         struct iocblk *iocbp;
 840         boolean_t must_queue = B_FALSE;
 841 
 842         ASSERT(qp->q_ptr);
 843 
 844         DBG1("entering zfd_wput, %s side", zfd_side(qp));
 845 
 846         /*
 847          * Process zfd ioctl messages if qp is the master side's write queue.
 848          */
 849         zfds = (zfd_state_t *)qp->q_ptr;
 850 
 851         if (type == M_IOCTL) {
 852                 iocbp = (struct iocblk *)(void *)mp->b_rptr;
 853 
 854                 switch (iocbp->ioc_cmd) {
 855                 case ZFD_MAKETTY:
 856                         zfds->zfd_tty = 1;
 857                         miocack(qp, mp, 0, 0);
 858                         return;
 859                 case ZFD_EOF:
 860                         if (zfds->zfd_slave_rdq != NULL)
 861                                 (void) putnextctl(zfds->zfd_slave_rdq,
 862                                     M_HANGUP);
 863                         miocack(qp, mp, 0, 0);
 864                         return;
 865                 case ZFD_HAS_SLAVE:
 866                         if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
 867                                 miocack(qp, mp, 0, 0);
 868                         } else {
 869                                 miocack(qp, mp, 0, ENOTTY);
 870                         }
 871                         return;
 872                 case ZFD_MUX: {
 873                         /*
 874                          * Setup the multiplexer configuration for the two
 875                          * streams.
 876                          *
 877                          * We expect to be called on the stream that will
 878                          * become the log stream and be passed one data block
 879                          * with the minor number of the slave side of the
 880                          * primary stream.
 881                          */
 882                         int to;
 883                         int instance;
 884                         zfd_state_t *prim_zfds;
 885 
 886                         if (iocbp->ioc_count != TRANSPARENT ||
 887                             mp->b_cont == NULL) {
 888                                 miocack(qp, mp, 0, EINVAL);
 889                                 return;
 890                         }
 891 
 892                         /* Get the primary slave minor device number */
 893                         ASSERT(IS_P2ALIGNED(mp->b_cont->b_rptr, 4));
 894                         /* LINTED - b_rptr will always be aligned. */
 895                         to = *(int *)mp->b_cont->b_rptr;
 896                         instance = ZFD_INSTANCE(to);
 897 
 898                         if ((prim_zfds = ddi_get_soft_state(zfd_soft_state,
 899                             instance)) == NULL) {
 900                                 miocack(qp, mp, 0, EINVAL);
 901                                 return;
 902                         }
 903 
 904                         /* Disallow changing primary/log once set. */
 905                         mutex_enter(&zfd_mux_lock);
 906                         if (zfds->zfd_muxt != ZFD_NO_MUX ||
 907                             prim_zfds->zfd_muxt != ZFD_NO_MUX) {
 908                                 mutex_exit(&zfd_mux_lock);
 909                                 miocack(qp, mp, 0, EINVAL);
 910                                 return;
 911                         }
 912 
 913                         zfds->zfd_muxt = ZFD_LOG_STREAM;
 914                         zfds->zfd_inst_pri = prim_zfds;
 915                         prim_zfds->zfd_muxt = ZFD_PRIMARY_STREAM;
 916                         prim_zfds->zfd_inst_log = zfds;
 917                         mutex_exit(&zfd_mux_lock);
 918                         DTRACE_PROBE2(zfd__mux__link, void *, prim_zfds,
 919                             void *, zfds);
 920 
 921                         miocack(qp, mp, 0, 0);
 922                         return;
 923                         }
 924                 case ZFD_MUX_FLOWCON: {
 925                         /*
 926                          * We expect this ioctl to be issued against the
 927                          * log stream. We don't use the primary stream since
 928                          * there can be other streams modules pushed onto that
 929                          * stream which would interfere with the ioctl.
 930                          */
 931                         int val;
 932                         zfd_state_t *prim_zfds;
 933 
 934                         if (iocbp->ioc_count != TRANSPARENT ||
 935                             mp->b_cont == NULL) {
 936                                 miocack(qp, mp, 0, EINVAL);
 937                                 return;
 938                         }
 939 
 940                         if (zfds->zfd_muxt != ZFD_LOG_STREAM) {
 941                                 miocack(qp, mp, 0, EINVAL);
 942                                 return;
 943                         }
 944                         prim_zfds = zfds->zfd_inst_pri;
 945 
 946                         /* Get the flow control setting */
 947                         ASSERT(IS_P2ALIGNED(mp->b_cont->b_rptr, 4));
 948                         /* LINTED - b_rptr will always be aligned. */
 949                         val = *(int *)mp->b_cont->b_rptr;
 950                         if (val != 0 && val != 1) {
 951                                 miocack(qp, mp, 0, EINVAL);
 952                                 return;
 953                         }
 954 
 955                         prim_zfds->zfd_allow_flowcon = (boolean_t)val;
 956                         if (!prim_zfds->zfd_allow_flowcon)
 957                                 prim_zfds->zfd_is_flowcon = B_FALSE;
 958 
 959                         DTRACE_PROBE1(zfd__mux__flowcon, void *, prim_zfds);
 960                         miocack(qp, mp, 0, 0);
 961                         return;
 962                         }
 963                 default:
 964                         break;
 965                 }
 966         }
 967 
 968         /* if on the write side, may need to tee */
 969         if (zfds->zfd_slave_rdq != NULL && qp == WR(zfds->zfd_slave_rdq)) {
 970                 /* tee output to any attached log stream */
 971                 zfd_tee_handler(zfds, type, mp);
 972 
 973                 /* high-priority msgs are not subject to flow control */
 974                 if (zfds->zfd_is_flowcon && type == M_DATA)
 975                         must_queue = B_TRUE;
 976         }
 977 
 978         if (zfd_switch(RD(qp)) == NULL) {
 979                 DBG1("wput to %s side (no one listening)", zfd_side(qp));
 980                 switch (type) {
 981                 case M_FLUSH:
 982                         handle_mflush(qp, mp);
 983                         break;
 984                 case M_IOCTL:
 985                         miocnak(qp, mp, 0, 0);
 986                         break;
 987                 default:
 988                         freemsg(mp);
 989                         break;
 990                 }
 991                 return;
 992         }
 993 
 994         if (type >= QPCTL) {
 995                 DBG1("(hipri) wput, %s side", zfd_side(qp));
 996                 switch (type) {
 997                 case M_READ:            /* supposedly from ldterm? */
 998                         DBG("zfd_wput: tossing M_READ\n");
 999                         freemsg(mp);
1000                         break;
1001                 case M_FLUSH:
1002                         handle_mflush(qp, mp);
1003                         break;
1004                 default:
1005                         /*
1006                          * Put this to the other side.
1007                          */
1008                         ASSERT(zfd_switch(RD(qp)) != NULL);
1009                         putnext(zfd_switch(RD(qp)), mp);
1010                         break;
1011                 }
1012                 DBG1("done (hipri) wput, %s side", zfd_side(qp));
1013                 return;
1014         }
1015 
1016         /*
1017          * If the primary stream has been stopped for flow control then
1018          * enqueue the msg, otherwise only putnext if there isn't already
1019          * something in the queue. If we don't do this then things would wind
1020          * up out of order.
1021          */
1022         if (!must_queue && qp->q_first == NULL &&
1023             bcanputnext(RD(zfd_switch(qp)), mp->b_band)) {
1024                 putnext(RD(zfd_switch(qp)), mp);
1025         } else {
1026                 /*
1027                  * zfd_wsrv expects msgs queued on the primary queue. Those
1028                  * will be handled by zfd_wsrv after zfd_rsrv performs the
1029                  * qenable on the proper queue.
1030                  */
1031                 (void) putq(qp, mp);
1032         }
1033 
1034         DBG1("done wput, %s side", zfd_side(qp));
1035 }
1036 
1037 /*
1038  * Read server
1039  *
1040  * For primary stream:
1041  * Under normal execution rsrv(9E) is symmetric for master and slave, so
1042  * zfd_rsrv() can handle both without splitting up the codepath. We do this by
1043  * enabling the write side of the partner.  This triggers the partner to send
1044  * messages queued on its write side to this queue's read side.
1045  *
1046  * For log stream:
1047  * Internally we've queued up the msgs that we've teed off to the log stream
1048  * so when we're invoked we need to pass these along.
1049  */
1050 static void
1051 zfd_rsrv(queue_t *qp)
1052 {
1053         zfd_state_t *zfds;
1054         zfds = (zfd_state_t *)qp->q_ptr;
1055 
1056         /*
1057          * log stream server
1058          */
1059         if (zfds->zfd_muxt == ZFD_LOG_STREAM && zfds->zfd_slave_rdq != NULL) {
1060                 queue_t *log_qp;
1061                 mblk_t *mp;
1062 
1063                 log_qp = RD(zfds->zfd_slave_rdq);
1064 
1065                 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
1066                         zfd_state_t *pzfds = zfds->zfd_inst_pri;
1067 
1068                         while ((mp = getq(qp)) != NULL) {
1069                                 if (bcanputnext(log_qp, mp->b_band)) {
1070                                         putnext(log_qp, mp);
1071                                 } else {
1072                                         (void) putbq(log_qp, mp);
1073                                         break;
1074                                 }
1075                         }
1076 
1077                         if (log_qp->q_count < log_qp->q_lowat) {
1078                                 DTRACE_PROBE(zfd__flow__on);
1079                                 pzfds->zfd_is_flowcon = B_FALSE;
1080                                 if (pzfds->zfd_master_rdq != NULL)
1081                                         qenable(RD(pzfds->zfd_master_rdq));
1082                         }
1083                 } else {
1084                         /* No longer open, drain the queue */
1085                         while ((mp = getq(qp)) != NULL) {
1086                                 freemsg(mp);
1087                         }
1088                         flushq(qp, FLUSHALL);
1089                 }
1090                 return;
1091         }
1092 
1093         /*
1094          * Care must be taken here, as either of the master or slave side
1095          * qptr could be NULL.
1096          */
1097         ASSERT(qp == zfds->zfd_master_rdq || qp == zfds->zfd_slave_rdq);
1098         if (zfd_switch(qp) == NULL) {
1099                 DBG("zfd_rsrv: other side isn't listening\n");
1100                 return;
1101         }
1102         qenable(WR(zfd_switch(qp)));
1103 }
1104 
1105 /*
1106  * Write server
1107  *
1108  * This routine is symmetric for master and slave, so it handles both without
1109  * splitting up the codepath.
1110  *
1111  * If there are messages on this queue that can be sent to the other, send
1112  * them via putnext(). Else, if queued messages cannot be sent, leave them
1113  * on this queue.
1114  */
1115 static void
1116 zfd_wsrv(queue_t *qp)
1117 {
1118         queue_t *swq;
1119         mblk_t *mp;
1120         zfd_state_t *zfds = (zfd_state_t *)qp->q_ptr;
1121 
1122         ASSERT(zfds != NULL);
1123 
1124         /*
1125          * Partner has no read queue, so take the data, and throw it away.
1126          */
1127         if (zfd_switch(RD(qp)) == NULL) {
1128                 DBG("zfd_wsrv: other side isn't listening");
1129                 while ((mp = getq(qp)) != NULL) {
1130                         if (mp->b_datap->db_type == M_IOCTL)
1131                                 miocnak(qp, mp, 0, 0);
1132                         else
1133                                 freemsg(mp);
1134                 }
1135                 flushq(qp, FLUSHALL);
1136                 return;
1137         }
1138 
1139         swq = RD(zfd_switch(qp));
1140 
1141         /*
1142          * while there are messages on this write queue...
1143          */
1144         while (!zfds->zfd_is_flowcon && (mp = getq(qp)) != NULL) {
1145                 /*
1146                  * Due to the way zfd_wput is implemented, we should never
1147                  * see a high priority control message here.
1148                  */
1149                 ASSERT(mp->b_datap->db_type < QPCTL);
1150 
1151                 if (bcanputnext(swq, mp->b_band)) {
1152                         putnext(swq, mp);
1153                 } else {
1154                         (void) putbq(qp, mp);
1155                         break;
1156                 }
1157         }
1158 }