Print this page
Reduce lint
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/zfd.c
+++ new/usr/src/uts/common/io/zfd.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
14 14 * Copyright 2016 Joyent, Inc. All rights reserved.
15 15 */
16 16
17 17 /*
18 18 * Zone File Descriptor Driver.
19 19 *
20 20 * This driver is derived from the zcons driver which is in turn derived from
21 21 * the pts/ptm drivers. The purpose is to expose file descriptors within the
22 22 * zone which are connected to zoneadmd and used for logging or an interactive
23 23 * connection to a process within the zone.
24 24 *
25 25 * Its implementation is straightforward. Each instance of the driver
26 26 * represents a global-zone/local-zone pair. Unlike the zcons device, zoneadmd
27 27 * uses these devices unidirectionally to provide stdin, stdout and stderr to
28 28 * the process within the zone.
29 29 *
30 30 * Instances of zfd are onlined as children of /pseudo/zfdnex@2/ by zoneadmd,
31 31 * using the devctl framework; thus the driver does not need to maintain any
32 32 * sort of "admin" node.
33 33 *
34 34 * The driver shuttles I/O from master side to slave side and back. In a break
35 35 * from the pts/ptm semantics, if one side is not open, I/O directed towards
36 36 * it will simply be discarded. This is so that if zoneadmd is not holding the
37 37 * master side fd open (i.e. it has died somehow), processes in the zone do not
38 38 * experience any errors and I/O to the fd does not cause the process to hang.
39 39 *
40 40 * The driver can also act as a multiplexer so that data written to the
41 41 * slave side within the zone is also redirected back to another zfd device
42 42 * inside the zone for consumption (i.e. it can be read). The intention is
43 43 * that a logging process within the zone can consume data that is being
44 44 * written by an application onto the primary stream. This is essentially
45 45 * a tee off of the primary stream into a log stream. This tee can also be
46 46 * configured to be flow controlled via an ioctl. Flow control happens on the
47 47 * primary stream and is used to ensure that the log stream receives all of
48 48 * the messages off the primary stream when consumption of the data off of
49 49 * the log stream gets behind. Configuring for flow control implies that the
50 50 * application writing to the primary stream will be blocked when the log
51 51 * consumer gets behind. Note that closing the log stream (e.g. when the zone
52 52 * halts) will cause the loss of all messages queued in the stream.
53 53 *
54 54 * The zone's zfd device configuration is driven by zoneadmd and a zone mode.
55 55 * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
56 56 * of a misnomer since its purpose has evolved. The attribute can have a
57 57 * variety of values, but the lowest two positions are used to control how many
58 58 * zfd devices are created inside the zone and if the primary stream is a tty.
59 59 *
60 60 * Here is a summary of how the 4 modes control what zfd devices are created
61 61 * and how they're used:
62 62 *
63 63 * t-: 1 stdio zdev (0) configured as a tty
64 64 * --: 3 stdio zdevs (0, 1, 2), not configured as a tty
65 65 * tn: 1 stdio zdev (0) configured as a tty, 1 additional zdev (1)
66 66 * -n: 3 stdio zdevs (0, 1, 2), not tty, 2 additional zdevs (3, 4)
67 67 *
68 68 * With the 't' flag set, stdin/out/err is multiplexed onto a single full-duplex
69 69 * stream which is configured as a tty. That is, ptem, ldterm and ttycompat are
70 70 * autopushed onto the stream when the slave side is opened. There is only a
71 71 * single zfd dev (0) needed for the primary stream.
72 72 *
73 73 * When the 'n' flag is set, it is assumed that output logging will be done
74 74 * within the zone itself. In this configuration 1 or 2 additional zfd devices,
75 75 * depending on tty mode ('t' flag) are created within the zone. An application
76 76 * can then configure the zfd streams driver into a multiplexer. Output from
77 77 * the stdout/stderr zfd(s) will be teed into the correspond logging zfd(s)
78 78 * within the zone.
79 79 *
80 80 * The following is a diagram of how this works for a '-n' configuration:
81 81 *
82 82 *
83 83 * zoneadmd (for zlogin -I stdout)
84 84 * GZ: ^
85 85 * |
86 86 * --------------------------
87 87 * ^
88 88 * NGZ: |
89 89 * app >1 -> zfd1 -> zfd3 -> logger (for logger to consume app's stdout)
90 90 *
91 91 * There would be a similar path for the app's stderr into zfd4 for the logger
92 92 * to consume stderr.
93 93 */
94 94
95 95 #include <sys/types.h>
96 96 #include <sys/cmn_err.h>
97 97 #include <sys/conf.h>
98 98 #include <sys/cred.h>
99 99 #include <sys/ddi.h>
100 100 #include <sys/debug.h>
101 101 #include <sys/devops.h>
102 102 #include <sys/errno.h>
103 103 #include <sys/file.h>
104 104 #include <sys/kstr.h>
105 105 #include <sys/modctl.h>
106 106 #include <sys/param.h>
107 107 #include <sys/stat.h>
108 108 #include <sys/stream.h>
109 109 #include <sys/stropts.h>
110 110 #include <sys/strsun.h>
111 111 #include <sys/sunddi.h>
112 112 #include <sys/sysmacros.h>
113 113 #include <sys/systm.h>
114 114 #include <sys/types.h>
115 115 #include <sys/zfd.h>
116 116 #include <sys/vnode.h>
117 117 #include <sys/fs/snode.h>
118 118 #include <sys/zone.h>
119 119 #include <sys/sdt.h>
120 120
121 121 static kmutex_t zfd_mux_lock;
122 122
123 123 static int zfd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
124 124 static int zfd_attach(dev_info_t *, ddi_attach_cmd_t);
125 125 static int zfd_detach(dev_info_t *, ddi_detach_cmd_t);
126 126
127 127 static int zfd_open(queue_t *, dev_t *, int, int, cred_t *);
128 128 static int zfd_close(queue_t *, int, cred_t *);
129 129 static void zfd_wput(queue_t *, mblk_t *);
130 130 static void zfd_rsrv(queue_t *);
131 131 static void zfd_wsrv(queue_t *);
132 132
133 133 /*
134 134 * The instance number is encoded in the dev_t in the minor number; the lowest
135 135 * bit of the minor number is used to track the master vs. slave side of the
136 136 * fd. The rest of the bits in the minor number are the instance.
137 137 */
138 138 #define ZFD_MASTER_MINOR 0
139 139 #define ZFD_SLAVE_MINOR 1
140 140
141 141 #define ZFD_INSTANCE(x) (getminor((x)) >> 1)
142 142 #define ZFD_NODE(x) (getminor((x)) & 0x01)
143 143
144 144 /*
145 145 * This macro converts a zfd_state_t pointer to the associated slave minor
146 146 * node's dev_t.
147 147 */
148 148 #define ZFD_STATE_TO_SLAVEDEV(x) \
149 149 (makedevice(ddi_driver_major((x)->zfd_devinfo), \
150 150 (minor_t)(ddi_get_instance((x)->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR)))
151 151
152 152 int zfd_debug = 0;
153 153 #define DBG(a) if (zfd_debug) cmn_err(CE_NOTE, a)
154 154 #define DBG1(a, b) if (zfd_debug) cmn_err(CE_NOTE, a, b)
155 155
156 156 /*
157 157 * ZFD Pseudo Terminal Module: stream data structure definitions,
158 158 * based on zcons.
159 159 */
160 160 static struct module_info zfd_info = {
161 161 0x20FD, /* ZOFD - 8445 */
162 162 "zfd",
163 163 0, /* min packet size */
164 164 INFPSZ, /* max packet size - infinity */
165 165 2048, /* high water */
166 166 128 /* low water */
167 167 };
168 168
169 169 static struct qinit zfd_rinit = {
170 170 NULL,
171 171 (int (*)()) zfd_rsrv,
172 172 zfd_open,
173 173 zfd_close,
174 174 NULL,
175 175 &zfd_info,
176 176 NULL
177 177 };
178 178
179 179 static struct qinit zfd_winit = {
180 180 (int (*)()) zfd_wput,
181 181 (int (*)()) zfd_wsrv,
182 182 NULL,
183 183 NULL,
184 184 NULL,
185 185 &zfd_info,
186 186 NULL
187 187 };
188 188
189 189 static struct streamtab zfd_tab_info = {
190 190 &zfd_rinit,
191 191 &zfd_winit,
192 192 NULL,
193 193 NULL
194 194 };
195 195
196 196 #define ZFD_CONF_FLAG (D_MP | D_MTQPAIR | D_MTOUTPERIM | D_MTOCEXCL)
197 197
198 198 /*
199 199 * this will define (struct cb_ops cb_zfd_ops) and (struct dev_ops zfd_ops)
200 200 */
201 201 DDI_DEFINE_STREAM_OPS(zfd_ops, nulldev, nulldev, zfd_attach, zfd_detach, \
202 202 nodev, zfd_getinfo, ZFD_CONF_FLAG, &zfd_tab_info, \
203 203 ddi_quiesce_not_needed);
204 204
205 205 /*
206 206 * Module linkage information for the kernel.
207 207 */
208 208
209 209 static struct modldrv modldrv = {
210 210 &mod_driverops, /* Type of module (this is a pseudo driver) */
211 211 "Zone FD driver", /* description of module */
212 212 &zfd_ops /* driver ops */
213 213 };
214 214
215 215 static struct modlinkage modlinkage = {
216 216 MODREV_1,
217 217 &modldrv,
218 218 NULL
219 219 };
220 220
221 221 typedef enum {
222 222 ZFD_NO_MUX,
223 223 ZFD_PRIMARY_STREAM,
224 224 ZFD_LOG_STREAM
225 225 } zfd_mux_type_t;
226 226
227 227 typedef struct zfd_state {
228 228 dev_info_t *zfd_devinfo; /* instance info */
229 229 queue_t *zfd_master_rdq; /* GZ read queue */
230 230 queue_t *zfd_slave_rdq; /* in-zone read queue */
231 231 int zfd_state; /* ZFD_STATE_MOPEN, ZFD_STATE_SOPEN */
232 232 int zfd_tty; /* ZFD_MAKETTY - strm mods will push */
233 233 boolean_t zfd_is_flowcon; /* primary stream flow stopped */
234 234 boolean_t zfd_allow_flowcon; /* use flow control */
235 235 zfd_mux_type_t zfd_muxt; /* state type: none, primary, log */
236 236 struct zfd_state *zfd_inst_pri; /* log state's primary ptr */
237 237 struct zfd_state *zfd_inst_log; /* primary state's log ptr */
238 238 } zfd_state_t;
239 239
240 240 #define ZFD_STATE_MOPEN 0x01
241 241 #define ZFD_STATE_SOPEN 0x02
242 242
243 243 static void *zfd_soft_state;
244 244
245 245 /*
246 246 * List of STREAMS modules that are autopushed onto a slave instance when its
247 247 * opened, but only if the ZFD_MAKETTY ioctl has first been received by the
248 248 * master.
249 249 */
250 250 static char *zfd_mods[] = {
251 251 "ptem",
252 252 "ldterm",
253 253 "ttcompat",
254 254 NULL
255 255 };
256 256
257 257 int
258 258 _init(void)
259 259 {
260 260 int err;
261 261
262 262 if ((err = ddi_soft_state_init(&zfd_soft_state, sizeof (zfd_state_t),
263 263 0)) != 0) {
264 264 return (err);
265 265 }
266 266
267 267 if ((err = mod_install(&modlinkage)) != 0)
268 268 ddi_soft_state_fini(zfd_soft_state);
269 269
270 270 mutex_init(&zfd_mux_lock, NULL, MUTEX_DEFAULT, NULL);
271 271 return (err);
272 272 }
273 273
274 274
275 275 int
276 276 _fini(void)
277 277 {
278 278 int err;
279 279
280 280 if ((err = mod_remove(&modlinkage)) != 0) {
281 281 return (err);
282 282 }
283 283
284 284 ddi_soft_state_fini(&zfd_soft_state);
285 285 mutex_destroy(&zfd_mux_lock);
286 286 return (0);
287 287 }
288 288
289 289 int
290 290 _info(struct modinfo *modinfop)
291 291 {
292 292 return (mod_info(&modlinkage, modinfop));
293 293 }
294 294
295 295 static int
296 296 zfd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
297 297 {
298 298 zfd_state_t *zfds;
299 299 int instance;
300 300 char masternm[ZFD_NAME_LEN], slavenm[ZFD_NAME_LEN];
301 301
302 302 if (cmd != DDI_ATTACH)
303 303 return (DDI_FAILURE);
304 304
305 305 instance = ddi_get_instance(dip);
306 306 if (ddi_soft_state_zalloc(zfd_soft_state, instance) != DDI_SUCCESS)
307 307 return (DDI_FAILURE);
308 308
309 309 (void) snprintf(masternm, sizeof (masternm), "%s%d", ZFD_MASTER_NAME,
310 310 instance);
311 311 (void) snprintf(slavenm, sizeof (slavenm), "%s%d", ZFD_SLAVE_NAME,
312 312 instance);
313 313
314 314 /*
315 315 * Create the master and slave minor nodes.
316 316 */
317 317 if ((ddi_create_minor_node(dip, slavenm, S_IFCHR,
318 318 instance << 1 | ZFD_SLAVE_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE) ||
319 319 (ddi_create_minor_node(dip, masternm, S_IFCHR,
320 320 instance << 1 | ZFD_MASTER_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE)) {
321 321 ddi_remove_minor_node(dip, NULL);
322 322 ddi_soft_state_free(zfd_soft_state, instance);
323 323 return (DDI_FAILURE);
324 324 }
325 325
326 326 VERIFY((zfds = ddi_get_soft_state(zfd_soft_state, instance)) != NULL);
327 327 zfds->zfd_devinfo = dip;
328 328 zfds->zfd_tty = 0;
329 329 zfds->zfd_muxt = ZFD_NO_MUX;
330 330 zfds->zfd_inst_log = NULL;
331 331 return (DDI_SUCCESS);
332 332 }
333 333
334 334 static int
335 335 zfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
336 336 {
337 337 zfd_state_t *zfds;
338 338 int instance;
339 339
340 340 if (cmd != DDI_DETACH)
341 341 return (DDI_FAILURE);
342 342
343 343 instance = ddi_get_instance(dip);
344 344 if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
345 345 return (DDI_FAILURE);
346 346
347 347 if ((zfds->zfd_state & ZFD_STATE_MOPEN) ||
348 348 (zfds->zfd_state & ZFD_STATE_SOPEN)) {
349 349 DBG1("zfd_detach: device (dip=%p) still open\n", (void *)dip);
350 350 return (DDI_FAILURE);
351 351 }
352 352
353 353 ddi_remove_minor_node(dip, NULL);
354 354 ddi_soft_state_free(zfd_soft_state, instance);
355 355
356 356 return (DDI_SUCCESS);
357 357 }
358 358
359 359 /*
360 360 * zfd_getinfo()
361 361 * getinfo(9e) entrypoint.
362 362 */
363 363 /*ARGSUSED*/
364 364 static int
365 365 zfd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
366 366 {
367 367 zfd_state_t *zfds;
368 368 int instance = ZFD_INSTANCE((dev_t)arg);
369 369
370 370 switch (infocmd) {
371 371 case DDI_INFO_DEVT2DEVINFO:
372 372 if ((zfds = ddi_get_soft_state(zfd_soft_state,
373 373 instance)) == NULL)
374 374 return (DDI_FAILURE);
375 375 *result = zfds->zfd_devinfo;
376 376 return (DDI_SUCCESS);
377 377 case DDI_INFO_DEVT2INSTANCE:
378 378 *result = (void *)(uintptr_t)instance;
379 379 return (DDI_SUCCESS);
380 380 }
381 381 return (DDI_FAILURE);
382 382 }
383 383
384 384 /*
385 385 * Return the equivalent queue from the other side of the relationship.
386 386 * e.g.: given the slave's write queue, return the master's write queue.
387 387 */
388 388 static queue_t *
389 389 zfd_switch(queue_t *qp)
390 390 {
391 391 zfd_state_t *zfds = qp->q_ptr;
392 392 ASSERT(zfds != NULL);
393 393
394 394 if (qp == zfds->zfd_master_rdq)
395 395 return (zfds->zfd_slave_rdq);
396 396 else if (OTHERQ(qp) == zfds->zfd_master_rdq && zfds->zfd_slave_rdq
397 397 != NULL)
398 398 return (OTHERQ(zfds->zfd_slave_rdq));
399 399 else if (qp == zfds->zfd_slave_rdq)
400 400 return (zfds->zfd_master_rdq);
401 401 else if (OTHERQ(qp) == zfds->zfd_slave_rdq && zfds->zfd_master_rdq
402 402 != NULL)
403 403 return (OTHERQ(zfds->zfd_master_rdq));
404 404 else
405 405 return (NULL);
406 406 }
407 407
408 408 /*
409 409 * For debugging and outputting messages. Returns the name of the side of
410 410 * the relationship associated with this queue.
411 411 */
412 412 static const char *
413 413 zfd_side(queue_t *qp)
414 414 {
415 415 zfd_state_t *zfds = qp->q_ptr;
416 416 ASSERT(zfds != NULL);
417 417
418 418 if (qp == zfds->zfd_master_rdq ||
419 419 OTHERQ(qp) == zfds->zfd_master_rdq) {
420 420 return ("master");
421 421 }
422 422 ASSERT(qp == zfds->zfd_slave_rdq || OTHERQ(qp) == zfds->zfd_slave_rdq);
423 423 return ("slave");
424 424 }
425 425
426 426 /*ARGSUSED*/
427 427 static int
428 428 zfd_master_open(zfd_state_t *zfds,
429 429 queue_t *rqp, /* pointer to the read side queue */
430 430 dev_t *devp, /* pointer to stream tail's dev */
431 431 int oflag, /* the user open(2) supplied flags */
432 432 int sflag, /* open state flag */
433 433 cred_t *credp) /* credentials */
434 434 {
435 435 mblk_t *mop;
436 436 struct stroptions *sop;
437 437
438 438 /*
439 439 * Enforce exclusivity on the master side; the only consumer should
440 440 * be the zoneadmd for the zone.
441 441 */
442 442 if ((zfds->zfd_state & ZFD_STATE_MOPEN) != 0)
443 443 return (EBUSY);
444 444
445 445 if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
446 446 DBG("zfd_master_open(): mop allocation failed\n");
447 447 return (ENOMEM);
448 448 }
449 449
450 450 zfds->zfd_state |= ZFD_STATE_MOPEN;
451 451
452 452 /*
453 453 * q_ptr stores driver private data; stash the soft state data on both
454 454 * read and write sides of the queue.
455 455 */
456 456 WR(rqp)->q_ptr = rqp->q_ptr = zfds;
457 457 qprocson(rqp);
458 458
459 459 /*
460 460 * Following qprocson(), the master side is fully plumbed into the
461 461 * STREAM and may send/receive messages. Setting zfds->zfd_master_rdq
462 462 * will allow the slave to send messages to us (the master).
463 463 * This cannot occur before qprocson() because the master is not
464 464 * ready to process them until that point.
465 465 */
466 466 zfds->zfd_master_rdq = rqp;
467 467
468 468 /*
469 469 * set up hi/lo water marks on stream head read queue and add
470 470 * controlling tty as needed.
471 471 */
472 472 mop->b_datap->db_type = M_SETOPTS;
473 473 mop->b_wptr += sizeof (struct stroptions);
474 474 sop = (struct stroptions *)(void *)mop->b_rptr;
475 475 if (oflag & FNOCTTY)
476 476 sop->so_flags = SO_HIWAT | SO_LOWAT;
477 477 else
478 478 sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
479 479 sop->so_hiwat = 512;
480 480 sop->so_lowat = 256;
481 481 putnext(rqp, mop);
482 482
483 483 return (0);
484 484 }
485 485
486 486 /*ARGSUSED*/
487 487 static int
488 488 zfd_slave_open(zfd_state_t *zfds,
489 489 queue_t *rqp, /* pointer to the read side queue */
490 490 dev_t *devp, /* pointer to stream tail's dev */
491 491 int oflag, /* the user open(2) supplied flags */
492 492 int sflag, /* open state flag */
493 493 cred_t *credp) /* credentials */
494 494 {
495 495 mblk_t *mop;
496 496 struct stroptions *sop;
497 497 /*
498 498 * The slave side can be opened as many times as needed.
499 499 */
500 500 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
501 501 ASSERT((rqp != NULL) && (WR(rqp)->q_ptr == zfds));
502 502 return (0);
503 503 }
504 504
505 505 /* A log stream is read-only */
506 506 if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
507 507 (oflag & (FREAD | FWRITE)) != FREAD)
508 508 return (EINVAL);
509 509
510 510 if (zfds->zfd_tty == 1) {
511 511 major_t major;
512 512 minor_t minor;
513 513 minor_t lastminor;
514 514 uint_t anchorindex;
515 515
516 516 /*
517 517 * Set up sad(7D) so that the necessary STREAMS modules will
518 518 * be in place. A wrinkle is that 'ptem' must be anchored
519 519 * in place (see streamio(7i)) because we always want the
520 520 * fd to have terminal semantics.
521 521 */
522 522 minor =
523 523 ddi_get_instance(zfds->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR;
524 524 major = ddi_driver_major(zfds->zfd_devinfo);
525 525 lastminor = 0;
526 526 anchorindex = 1;
527 527 if (kstr_autopush(SET_AUTOPUSH, &major, &minor, &lastminor,
528 528 &anchorindex, zfd_mods) != 0) {
529 529 DBG("zfd_slave_open(): kstr_autopush() failed\n");
530 530 return (EIO);
531 531 }
532 532 }
533 533
534 534 if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
535 535 DBG("zfd_slave_open(): mop allocation failed\n");
536 536 return (ENOMEM);
537 537 }
538 538
539 539 zfds->zfd_state |= ZFD_STATE_SOPEN;
540 540
541 541 /*
542 542 * q_ptr stores driver private data; stash the soft state data on both
543 543 * read and write sides of the queue.
544 544 */
545 545 WR(rqp)->q_ptr = rqp->q_ptr = zfds;
546 546
547 547 qprocson(rqp);
548 548
549 549 /*
550 550 * Must follow qprocson(), since we aren't ready to process until then.
551 551 */
552 552 zfds->zfd_slave_rdq = rqp;
553 553
554 554 /*
555 555 * set up hi/lo water marks on stream head read queue and add
556 556 * controlling tty as needed.
557 557 */
558 558 mop->b_datap->db_type = M_SETOPTS;
559 559 mop->b_wptr += sizeof (struct stroptions);
560 560 sop = (struct stroptions *)(void *)mop->b_rptr;
561 561 sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
562 562 sop->so_hiwat = 512;
563 563 sop->so_lowat = 256;
564 564 putnext(rqp, mop);
565 565
566 566 return (0);
567 567 }
568 568
569 569 /*
570 570 * open(9e) entrypoint; checks sflag, and rejects anything unordinary.
571 571 */
572 572 static int
573 573 zfd_open(queue_t *rqp, /* pointer to the read side queue */
574 574 dev_t *devp, /* pointer to stream tail's dev */
575 575 int oflag, /* the user open(2) supplied flags */
576 576 int sflag, /* open state flag */
577 577 cred_t *credp) /* credentials */
578 578 {
579 579 int instance = ZFD_INSTANCE(*devp);
580 580 int ret;
581 581 zfd_state_t *zfds;
582 582
583 583 if (sflag != 0)
584 584 return (EINVAL);
585 585
586 586 if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
587 587 return (ENXIO);
588 588
589 589 switch (ZFD_NODE(*devp)) {
590 590 case ZFD_MASTER_MINOR:
591 591 ret = zfd_master_open(zfds, rqp, devp, oflag, sflag, credp);
592 592 break;
593 593 case ZFD_SLAVE_MINOR:
594 594 ret = zfd_slave_open(zfds, rqp, devp, oflag, sflag, credp);
595 595 /*
596 596 * If we just opened the log stream and flow control has
597 597 * been enabled, we want to make sure the primary stream can
598 598 * start flowing.
599 599 */
600 600 if (ret == 0 && zfds->zfd_muxt == ZFD_LOG_STREAM &&
601 601 zfds->zfd_inst_pri->zfd_allow_flowcon) {
602 602 zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
603 603 if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
604 604 qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
605 605 }
606 606 break;
607 607 default:
608 608 ret = ENXIO;
609 609 break;
610 610 }
611 611
612 612 return (ret);
613 613 }
614 614
615 615 /*
616 616 * close(9e) entrypoint.
617 617 */
618 618 /*ARGSUSED1*/
619 619 static int
620 620 zfd_close(queue_t *rqp, int flag, cred_t *credp)
621 621 {
622 622 queue_t *wqp;
623 623 mblk_t *bp;
624 624 zfd_state_t *zfds;
625 625 major_t major;
626 626 minor_t minor;
627 627
628 628 zfds = (zfd_state_t *)rqp->q_ptr;
629 629
630 630 if (rqp == zfds->zfd_master_rdq) {
631 631 DBG("Closing master side");
632 632
633 633 zfds->zfd_master_rdq = NULL;
634 634 zfds->zfd_state &= ~ZFD_STATE_MOPEN;
635 635
636 636 /*
637 637 * qenable slave side write queue so that it can flush
638 638 * its messages as master's read queue is going away
639 639 */
640 640 if (zfds->zfd_slave_rdq != NULL) {
641 641 qenable(WR(zfds->zfd_slave_rdq));
642 642 }
643 643
644 644 qprocsoff(rqp);
645 645 WR(rqp)->q_ptr = rqp->q_ptr = NULL;
646 646
647 647 } else if (rqp == zfds->zfd_slave_rdq) {
648 648
649 649 DBG("Closing slave side");
650 650 zfds->zfd_state &= ~ZFD_STATE_SOPEN;
651 651 zfds->zfd_slave_rdq = NULL;
652 652
653 653 wqp = WR(rqp);
654 654 while ((bp = getq(wqp)) != NULL) {
655 655 if (zfds->zfd_master_rdq != NULL)
656 656 putnext(zfds->zfd_master_rdq, bp);
657 657 else if (bp->b_datap->db_type == M_IOCTL)
658 658 miocnak(wqp, bp, 0, 0);
659 659 else
660 660 freemsg(bp);
661 661 }
662 662
663 663 /*
664 664 * Qenable master side write queue so that it can flush its
665 665 * messages as slaves's read queue is going away.
666 666 */
667 667 if (zfds->zfd_master_rdq != NULL)
668 668 qenable(WR(zfds->zfd_master_rdq));
669 669
670 670 /*
671 671 * Qenable primary stream if necessary.
672 672 */
673 673 if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
674 674 zfds->zfd_inst_pri->zfd_allow_flowcon) {
675 675 zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
676 676 if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
677 677 qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
678 678 }
679 679
680 680 qprocsoff(rqp);
681 681 WR(rqp)->q_ptr = rqp->q_ptr = NULL;
682 682
683 683 if (zfds->zfd_tty == 1) {
684 684 /*
685 685 * Clear the sad configuration so that reopening
686 686 * doesn't fail to set up sad configuration.
687 687 */
688 688 major = ddi_driver_major(zfds->zfd_devinfo);
689 689 minor = ddi_get_instance(zfds->zfd_devinfo) << 1 |
690 690 ZFD_SLAVE_MINOR;
691 691 (void) kstr_autopush(CLR_AUTOPUSH, &major, &minor,
692 692 NULL, NULL, NULL);
693 693 }
694 694 }
695 695
696 696 return (0);
697 697 }
698 698
699 699 static void
700 700 handle_mflush(queue_t *qp, mblk_t *mp)
701 701 {
702 702 mblk_t *nmp;
703 703 DBG1("M_FLUSH on %s side", zfd_side(qp));
704 704
705 705 if (*mp->b_rptr & FLUSHW) {
706 706 DBG1("M_FLUSH, FLUSHW, %s side", zfd_side(qp));
707 707 flushq(qp, FLUSHDATA);
708 708 *mp->b_rptr &= ~FLUSHW;
709 709 if ((*mp->b_rptr & FLUSHR) == 0) {
710 710 /*
711 711 * FLUSHW only. Change to FLUSHR and putnext other side,
712 712 * then we are done.
713 713 */
714 714 *mp->b_rptr |= FLUSHR;
715 715 if (zfd_switch(RD(qp)) != NULL) {
716 716 putnext(zfd_switch(RD(qp)), mp);
717 717 return;
718 718 }
719 719 } else if ((zfd_switch(RD(qp)) != NULL) &&
720 720 (nmp = copyb(mp)) != NULL) {
721 721 /*
722 722 * It is a FLUSHRW; we copy the mblk and send
723 723 * it to the other side, since we still need to use
724 724 * the mblk in FLUSHR processing, below.
725 725 */
726 726 putnext(zfd_switch(RD(qp)), nmp);
727 727 }
728 728 }
729 729
730 730 if (*mp->b_rptr & FLUSHR) {
731 731 DBG("qreply(qp) turning FLUSHR around\n");
732 732 qreply(qp, mp);
733 733 return;
734 734 }
735 735 freemsg(mp);
736 736 }
737 737
738 738 /*
739 739 * Evaluate the various conditionals to determine if we're teeing into a log
740 740 * stream and if the primary stream should be flow controlled. This function
741 741 * can set the zfd_is_flowcon flag as a side effect.
742 742 *
743 743 * When teeing with flow control, we always queue the teed msg here and if
744 744 * the queue is getting full, we set zfd_is_flowcon. The primary stream will
745 745 * always queue when zfd_is_flowcon and will also not be served when
746 746 * zfd_is_flowcon is set. This causes backpressure on the primary stream
747 747 * until the teed queue can drain.
748 748 */
749 749 static void
750 750 zfd_tee_handler(zfd_state_t *zfds, unsigned char type, mblk_t *mp)
751 751 {
752 752 queue_t *log_qp;
753 753 zfd_state_t *log_zfds;
754 754 mblk_t *lmp;
755 755
756 756 if (zfds->zfd_muxt != ZFD_PRIMARY_STREAM)
757 757 return;
758 758
759 759 if (type != M_DATA)
760 760 return;
761 761
762 762 log_zfds = zfds->zfd_inst_log;
763 763 if (log_zfds == NULL)
764 764 return;
765 765
766 766 ASSERT(log_zfds->zfd_muxt == ZFD_LOG_STREAM);
767 767
768 768 if ((log_zfds->zfd_state & ZFD_STATE_SOPEN) == 0) {
769 769 if (zfds->zfd_allow_flowcon)
770 770 zfds->zfd_is_flowcon = B_TRUE;
771 771 return;
772 772 }
773 773
774 774 /* The zfd_slave_rdq is null until the log dev is opened in the zone */
775 775 log_qp = RD(log_zfds->zfd_slave_rdq);
776 776 DTRACE_PROBE2(zfd__tee__check, void *, log_qp, void *, zfds);
777 777
778 778 if (!zfds->zfd_allow_flowcon) {
779 779 /*
780 780 * We're not supposed to tee with flow control and the tee is
781 781 * full so we skip teeing into the log stream.
782 782 */
783 783 if ((log_qp->q_flag & QFULL) != 0)
784 784 return;
785 785 }
786 786
787 787 /*
788 788 * Tee the message into the log stream.
789 789 */
790 790 lmp = dupmsg(mp);
791 791 if (lmp == NULL) {
792 792 if (zfds->zfd_allow_flowcon)
793 793 zfds->zfd_is_flowcon = B_TRUE;
794 794 return;
795 795 }
796 796
797 797 if (log_qp->q_first == NULL && bcanputnext(log_qp, lmp->b_band)) {
798 798 putnext(log_qp, lmp);
799 799 } else {
800 800 if (putq(log_qp, lmp) == 0) {
801 801 /* The logger queue is full, free the msg. */
802 802 freemsg(lmp);
803 803 }
804 804 /*
805 805 * If we're supposed to tee with flow control and the tee is
806 806 * over the high water mark then we want the primary stream to
807 807 * stop flowing. We'll stop queueing the primary stream after
808 808 * the log stream has drained.
809 809 */
810 810 if (zfds->zfd_allow_flowcon &&
811 811 log_qp->q_count > log_qp->q_hiwat) {
812 812 zfds->zfd_is_flowcon = B_TRUE;
813 813 }
814 814 }
815 815 }
816 816
817 817 /*
818 818 * wput(9E) is symmetric for master and slave sides, so this handles both
819 819 * without splitting the codepath. (The only exception to this is the
820 820 * processing of zfd ioctls, which is restricted to the master side.)
821 821 *
822 822 * zfd_wput() looks at the other side; if there is no process holding that
823 823 * side open, it frees the message. This prevents processes from hanging
824 824 * if no one is holding open the fd. Otherwise, it putnext's high
825 825 * priority messages, putnext's normal messages if possible, and otherwise
826 826 * enqueues the messages; in the case that something is enqueued, wsrv(9E)
827 827 * will take care of eventually shuttling I/O to the other side.
828 828 *
829 829 * When configured as a multiplexer, then anything written to the stream
830 830 * from inside the zone is also teed off to the corresponding log stream
831 831 * for consumption within the zone (i.e. the log stream can be read, but never
832 832 * written to, by an application inside the zone).
833 833 */
834 834 static void
835 835 zfd_wput(queue_t *qp, mblk_t *mp)
836 836 {
837 837 unsigned char type = mp->b_datap->db_type;
838 838 zfd_state_t *zfds;
839 839 struct iocblk *iocbp;
840 840 boolean_t must_queue = B_FALSE;
841 841
842 842 ASSERT(qp->q_ptr);
843 843
844 844 DBG1("entering zfd_wput, %s side", zfd_side(qp));
845 845
846 846 /*
847 847 * Process zfd ioctl messages if qp is the master side's write queue.
848 848 */
849 849 zfds = (zfd_state_t *)qp->q_ptr;
850 850
851 851 if (type == M_IOCTL) {
852 852 iocbp = (struct iocblk *)(void *)mp->b_rptr;
853 853
854 854 switch (iocbp->ioc_cmd) {
855 855 case ZFD_MAKETTY:
856 856 zfds->zfd_tty = 1;
857 857 miocack(qp, mp, 0, 0);
858 858 return;
859 859 case ZFD_EOF:
860 860 if (zfds->zfd_slave_rdq != NULL)
861 861 (void) putnextctl(zfds->zfd_slave_rdq,
862 862 M_HANGUP);
863 863 miocack(qp, mp, 0, 0);
864 864 return;
865 865 case ZFD_HAS_SLAVE:
866 866 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
867 867 miocack(qp, mp, 0, 0);
868 868 } else {
869 869 miocack(qp, mp, 0, ENOTTY);
870 870 }
871 871 return;
872 872 case ZFD_MUX: {
873 873 /*
874 874 * Setup the multiplexer configuration for the two
875 875 * streams.
876 876 *
877 877 * We expect to be called on the stream that will
878 878 * become the log stream and be passed one data block
879 879 * with the minor number of the slave side of the
880 880 * primary stream.
881 881 */
882 882 int to;
|
↓ open down ↓ |
882 lines elided |
↑ open up ↑ |
883 883 int instance;
884 884 zfd_state_t *prim_zfds;
885 885
886 886 if (iocbp->ioc_count != TRANSPARENT ||
887 887 mp->b_cont == NULL) {
888 888 miocack(qp, mp, 0, EINVAL);
889 889 return;
890 890 }
891 891
892 892 /* Get the primary slave minor device number */
893 + ASSERT(IS_P2ALIGNED(mp->b_cont->b_rptr, 4));
894 + /* LINTED - b_rptr will always be aligned. */
893 895 to = *(int *)mp->b_cont->b_rptr;
894 896 instance = ZFD_INSTANCE(to);
895 897
896 898 if ((prim_zfds = ddi_get_soft_state(zfd_soft_state,
897 899 instance)) == NULL) {
898 900 miocack(qp, mp, 0, EINVAL);
899 901 return;
900 902 }
901 903
902 904 /* Disallow changing primary/log once set. */
903 905 mutex_enter(&zfd_mux_lock);
904 906 if (zfds->zfd_muxt != ZFD_NO_MUX ||
905 907 prim_zfds->zfd_muxt != ZFD_NO_MUX) {
906 908 mutex_exit(&zfd_mux_lock);
907 909 miocack(qp, mp, 0, EINVAL);
908 910 return;
909 911 }
910 912
911 913 zfds->zfd_muxt = ZFD_LOG_STREAM;
912 914 zfds->zfd_inst_pri = prim_zfds;
913 915 prim_zfds->zfd_muxt = ZFD_PRIMARY_STREAM;
914 916 prim_zfds->zfd_inst_log = zfds;
915 917 mutex_exit(&zfd_mux_lock);
916 918 DTRACE_PROBE2(zfd__mux__link, void *, prim_zfds,
917 919 void *, zfds);
918 920
919 921 miocack(qp, mp, 0, 0);
920 922 return;
921 923 }
922 924 case ZFD_MUX_FLOWCON: {
923 925 /*
924 926 * We expect this ioctl to be issued against the
925 927 * log stream. We don't use the primary stream since
926 928 * there can be other streams modules pushed onto that
927 929 * stream which would interfere with the ioctl.
928 930 */
929 931 int val;
930 932 zfd_state_t *prim_zfds;
931 933
932 934 if (iocbp->ioc_count != TRANSPARENT ||
933 935 mp->b_cont == NULL) {
934 936 miocack(qp, mp, 0, EINVAL);
|
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
935 937 return;
936 938 }
937 939
938 940 if (zfds->zfd_muxt != ZFD_LOG_STREAM) {
939 941 miocack(qp, mp, 0, EINVAL);
940 942 return;
941 943 }
942 944 prim_zfds = zfds->zfd_inst_pri;
943 945
944 946 /* Get the flow control setting */
947 + ASSERT(IS_P2ALIGNED(mp->b_cont->b_rptr, 4));
948 + /* LINTED - b_rptr will always be aligned. */
945 949 val = *(int *)mp->b_cont->b_rptr;
946 950 if (val != 0 && val != 1) {
947 951 miocack(qp, mp, 0, EINVAL);
948 952 return;
949 953 }
950 954
951 955 prim_zfds->zfd_allow_flowcon = (boolean_t)val;
952 956 if (!prim_zfds->zfd_allow_flowcon)
953 957 prim_zfds->zfd_is_flowcon = B_FALSE;
954 958
955 959 DTRACE_PROBE1(zfd__mux__flowcon, void *, prim_zfds);
956 960 miocack(qp, mp, 0, 0);
957 961 return;
958 962 }
959 963 default:
960 964 break;
961 965 }
962 966 }
963 967
964 968 /* if on the write side, may need to tee */
965 969 if (zfds->zfd_slave_rdq != NULL && qp == WR(zfds->zfd_slave_rdq)) {
966 970 /* tee output to any attached log stream */
967 971 zfd_tee_handler(zfds, type, mp);
968 972
969 973 /* high-priority msgs are not subject to flow control */
970 974 if (zfds->zfd_is_flowcon && type == M_DATA)
971 975 must_queue = B_TRUE;
972 976 }
973 977
974 978 if (zfd_switch(RD(qp)) == NULL) {
975 979 DBG1("wput to %s side (no one listening)", zfd_side(qp));
976 980 switch (type) {
977 981 case M_FLUSH:
978 982 handle_mflush(qp, mp);
979 983 break;
980 984 case M_IOCTL:
981 985 miocnak(qp, mp, 0, 0);
982 986 break;
983 987 default:
984 988 freemsg(mp);
985 989 break;
986 990 }
987 991 return;
988 992 }
989 993
990 994 if (type >= QPCTL) {
991 995 DBG1("(hipri) wput, %s side", zfd_side(qp));
992 996 switch (type) {
993 997 case M_READ: /* supposedly from ldterm? */
994 998 DBG("zfd_wput: tossing M_READ\n");
995 999 freemsg(mp);
996 1000 break;
997 1001 case M_FLUSH:
998 1002 handle_mflush(qp, mp);
999 1003 break;
1000 1004 default:
1001 1005 /*
1002 1006 * Put this to the other side.
1003 1007 */
1004 1008 ASSERT(zfd_switch(RD(qp)) != NULL);
1005 1009 putnext(zfd_switch(RD(qp)), mp);
1006 1010 break;
1007 1011 }
1008 1012 DBG1("done (hipri) wput, %s side", zfd_side(qp));
1009 1013 return;
1010 1014 }
1011 1015
1012 1016 /*
1013 1017 * If the primary stream has been stopped for flow control then
1014 1018 * enqueue the msg, otherwise only putnext if there isn't already
1015 1019 * something in the queue. If we don't do this then things would wind
1016 1020 * up out of order.
1017 1021 */
1018 1022 if (!must_queue && qp->q_first == NULL &&
1019 1023 bcanputnext(RD(zfd_switch(qp)), mp->b_band)) {
1020 1024 putnext(RD(zfd_switch(qp)), mp);
1021 1025 } else {
1022 1026 /*
1023 1027 * zfd_wsrv expects msgs queued on the primary queue. Those
1024 1028 * will be handled by zfd_wsrv after zfd_rsrv performs the
1025 1029 * qenable on the proper queue.
1026 1030 */
1027 1031 (void) putq(qp, mp);
1028 1032 }
1029 1033
1030 1034 DBG1("done wput, %s side", zfd_side(qp));
1031 1035 }
1032 1036
1033 1037 /*
1034 1038 * Read server
1035 1039 *
1036 1040 * For primary stream:
1037 1041 * Under normal execution rsrv(9E) is symmetric for master and slave, so
1038 1042 * zfd_rsrv() can handle both without splitting up the codepath. We do this by
1039 1043 * enabling the write side of the partner. This triggers the partner to send
1040 1044 * messages queued on its write side to this queue's read side.
1041 1045 *
1042 1046 * For log stream:
1043 1047 * Internally we've queued up the msgs that we've teed off to the log stream
1044 1048 * so when we're invoked we need to pass these along.
1045 1049 */
1046 1050 static void
1047 1051 zfd_rsrv(queue_t *qp)
1048 1052 {
1049 1053 zfd_state_t *zfds;
1050 1054 zfds = (zfd_state_t *)qp->q_ptr;
1051 1055
1052 1056 /*
1053 1057 * log stream server
1054 1058 */
1055 1059 if (zfds->zfd_muxt == ZFD_LOG_STREAM && zfds->zfd_slave_rdq != NULL) {
1056 1060 queue_t *log_qp;
1057 1061 mblk_t *mp;
1058 1062
1059 1063 log_qp = RD(zfds->zfd_slave_rdq);
1060 1064
1061 1065 if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
1062 1066 zfd_state_t *pzfds = zfds->zfd_inst_pri;
1063 1067
1064 1068 while ((mp = getq(qp)) != NULL) {
1065 1069 if (bcanputnext(log_qp, mp->b_band)) {
1066 1070 putnext(log_qp, mp);
1067 1071 } else {
1068 1072 (void) putbq(log_qp, mp);
1069 1073 break;
1070 1074 }
1071 1075 }
1072 1076
1073 1077 if (log_qp->q_count < log_qp->q_lowat) {
1074 1078 DTRACE_PROBE(zfd__flow__on);
1075 1079 pzfds->zfd_is_flowcon = B_FALSE;
1076 1080 if (pzfds->zfd_master_rdq != NULL)
1077 1081 qenable(RD(pzfds->zfd_master_rdq));
1078 1082 }
1079 1083 } else {
1080 1084 /* No longer open, drain the queue */
1081 1085 while ((mp = getq(qp)) != NULL) {
1082 1086 freemsg(mp);
1083 1087 }
1084 1088 flushq(qp, FLUSHALL);
1085 1089 }
1086 1090 return;
1087 1091 }
1088 1092
1089 1093 /*
1090 1094 * Care must be taken here, as either of the master or slave side
1091 1095 * qptr could be NULL.
1092 1096 */
1093 1097 ASSERT(qp == zfds->zfd_master_rdq || qp == zfds->zfd_slave_rdq);
1094 1098 if (zfd_switch(qp) == NULL) {
1095 1099 DBG("zfd_rsrv: other side isn't listening\n");
1096 1100 return;
1097 1101 }
1098 1102 qenable(WR(zfd_switch(qp)));
1099 1103 }
1100 1104
1101 1105 /*
1102 1106 * Write server
1103 1107 *
1104 1108 * This routine is symmetric for master and slave, so it handles both without
1105 1109 * splitting up the codepath.
1106 1110 *
1107 1111 * If there are messages on this queue that can be sent to the other, send
1108 1112 * them via putnext(). Else, if queued messages cannot be sent, leave them
1109 1113 * on this queue.
1110 1114 */
1111 1115 static void
1112 1116 zfd_wsrv(queue_t *qp)
1113 1117 {
1114 1118 queue_t *swq;
1115 1119 mblk_t *mp;
1116 1120 zfd_state_t *zfds = (zfd_state_t *)qp->q_ptr;
1117 1121
1118 1122 ASSERT(zfds != NULL);
1119 1123
1120 1124 /*
1121 1125 * Partner has no read queue, so take the data, and throw it away.
1122 1126 */
1123 1127 if (zfd_switch(RD(qp)) == NULL) {
1124 1128 DBG("zfd_wsrv: other side isn't listening");
1125 1129 while ((mp = getq(qp)) != NULL) {
1126 1130 if (mp->b_datap->db_type == M_IOCTL)
1127 1131 miocnak(qp, mp, 0, 0);
1128 1132 else
1129 1133 freemsg(mp);
1130 1134 }
1131 1135 flushq(qp, FLUSHALL);
1132 1136 return;
1133 1137 }
1134 1138
1135 1139 swq = RD(zfd_switch(qp));
1136 1140
1137 1141 /*
1138 1142 * while there are messages on this write queue...
1139 1143 */
1140 1144 while (!zfds->zfd_is_flowcon && (mp = getq(qp)) != NULL) {
1141 1145 /*
1142 1146 * Due to the way zfd_wput is implemented, we should never
1143 1147 * see a high priority control message here.
1144 1148 */
1145 1149 ASSERT(mp->b_datap->db_type < QPCTL);
1146 1150
1147 1151 if (bcanputnext(swq, mp->b_band)) {
1148 1152 putnext(swq, mp);
1149 1153 } else {
1150 1154 (void) putbq(qp, mp);
1151 1155 break;
1152 1156 }
1153 1157 }
1154 1158 }
|
↓ open down ↓ |
200 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX