Print this page
3354 kernel crash in rpcsec_gss after using gsscred
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Carlos Neira <cneirabustos@gmail.com>
Approved by: Robert Mustacchi <rm@joyent.com>
NEX-771 Long RPC messages could confuse kernel and cause panic
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: Jan Kryl <jan.kryl@nexenta.com>
re #12783 rb4338 Flow control is needed in rpcmod when the NFS server is unable to keep up with the network
re #13613 rb4516 Tunables needs volatile keyword
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/rpc/rpcmod.c
+++ new/usr/src/uts/common/rpc/rpcmod.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright 2012 Milan Jurik. All rights reserved.
27 + * Copyright 2012 Marcel Telka <marcel@telka.sk>
27 28 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
29 + * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
28 30 */
29 31 /* Copyright (c) 1990 Mentat Inc. */
30 32
31 33 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
32 -/* All Rights Reserved */
34 +/* All Rights Reserved */
33 35
34 36 /*
35 37 * Kernel RPC filtering module
36 38 */
37 39
38 40 #include <sys/param.h>
39 41 #include <sys/types.h>
40 42 #include <sys/stream.h>
41 43 #include <sys/stropts.h>
42 44 #include <sys/strsubr.h>
43 45 #include <sys/tihdr.h>
44 46 #include <sys/timod.h>
45 47 #include <sys/tiuser.h>
46 48 #include <sys/debug.h>
47 49 #include <sys/signal.h>
48 50 #include <sys/pcb.h>
49 51 #include <sys/user.h>
50 52 #include <sys/errno.h>
51 53 #include <sys/cred.h>
52 54 #include <sys/policy.h>
53 55 #include <sys/inline.h>
54 56 #include <sys/cmn_err.h>
55 57 #include <sys/kmem.h>
56 58 #include <sys/file.h>
57 59 #include <sys/sysmacros.h>
58 60 #include <sys/systm.h>
59 61 #include <sys/t_lock.h>
60 62 #include <sys/ddi.h>
61 63 #include <sys/vtrace.h>
62 64 #include <sys/callb.h>
63 65 #include <sys/strsun.h>
64 66
65 67 #include <sys/strlog.h>
66 68 #include <rpc/rpc_com.h>
67 69 #include <inet/common.h>
68 70 #include <rpc/types.h>
69 71 #include <sys/time.h>
70 72 #include <rpc/xdr.h>
71 73 #include <rpc/auth.h>
72 74 #include <rpc/clnt.h>
73 75 #include <rpc/rpc_msg.h>
74 76 #include <rpc/clnt.h>
75 77 #include <rpc/svc.h>
76 78 #include <rpc/rpcsys.h>
77 79 #include <rpc/rpc_rdma.h>
78 80
79 81 /*
80 82 * This is the loadable module wrapper.
81 83 */
82 84 #include <sys/conf.h>
83 85 #include <sys/modctl.h>
84 86 #include <sys/syscall.h>
85 87
86 88 extern struct streamtab rpcinfo;
87 89
88 90 static struct fmodsw fsw = {
89 91 "rpcmod",
90 92 &rpcinfo,
91 93 D_NEW|D_MP,
92 94 };
93 95
94 96 /*
95 97 * Module linkage information for the kernel.
96 98 */
97 99
98 100 static struct modlstrmod modlstrmod = {
99 101 &mod_strmodops, "rpc interface str mod", &fsw
100 102 };
101 103
102 104 /*
103 105 * For the RPC system call.
104 106 */
105 107 static struct sysent rpcsysent = {
106 108 2,
107 109 SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
108 110 rpcsys
109 111 };
110 112
111 113 static struct modlsys modlsys = {
112 114 &mod_syscallops,
113 115 "RPC syscall",
114 116 &rpcsysent
115 117 };
116 118
117 119 #ifdef _SYSCALL32_IMPL
118 120 static struct modlsys modlsys32 = {
119 121 &mod_syscallops32,
120 122 "32-bit RPC syscall",
121 123 &rpcsysent
122 124 };
123 125 #endif /* _SYSCALL32_IMPL */
124 126
125 127 static struct modlinkage modlinkage = {
126 128 MODREV_1,
127 129 {
128 130 &modlsys,
129 131 #ifdef _SYSCALL32_IMPL
130 132 &modlsys32,
131 133 #endif
132 134 &modlstrmod,
133 135 NULL
134 136 }
135 137 };
136 138
137 139 int
138 140 _init(void)
139 141 {
140 142 int error = 0;
141 143 callb_id_t cid;
142 144 int status;
143 145
144 146 svc_init();
145 147 clnt_init();
146 148 cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc");
147 149
148 150 if (error = mod_install(&modlinkage)) {
149 151 /*
150 152 * Could not install module, cleanup previous
151 153 * initialization work.
152 154 */
153 155 clnt_fini();
154 156 if (cid != NULL)
155 157 (void) callb_delete(cid);
156 158
157 159 return (error);
158 160 }
159 161
160 162 /*
161 163 * Load up the RDMA plugins and initialize the stats. Even if the
162 164 * plugins loadup fails, but rpcmod was successfully installed the
163 165 * counters still get initialized.
164 166 */
165 167 rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL);
166 168 mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL);
167 169
168 170 cv_init(&rdma_wait.svc_cv, NULL, CV_DEFAULT, NULL);
169 171 mutex_init(&rdma_wait.svc_lock, NULL, MUTEX_DEFAULT, NULL);
170 172
171 173 mt_kstat_init();
172 174
173 175 /*
174 176 * Get our identification into ldi. This is used for loading
175 177 * other modules, e.g. rpcib.
176 178 */
177 179 status = ldi_ident_from_mod(&modlinkage, &rpcmod_li);
178 180 if (status != 0) {
179 181 cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status);
180 182 rpcmod_li = NULL;
181 183 }
182 184
183 185 return (error);
184 186 }
185 187
186 188 /*
187 189 * The unload entry point fails, because we advertise entry points into
188 190 * rpcmod from the rest of kRPC: rpcmod_release().
189 191 */
190 192 int
191 193 _fini(void)
192 194 {
193 195 return (EBUSY);
194 196 }
195 197
196 198 int
197 199 _info(struct modinfo *modinfop)
198 200 {
199 201 return (mod_info(&modlinkage, modinfop));
200 202 }
201 203
202 204 extern int nulldev();
203 205
204 206 #define RPCMOD_ID 2049
205 207
206 208 int rmm_open(queue_t *, dev_t *, int, int, cred_t *);
207 209 int rmm_close(queue_t *, int, cred_t *);
208 210
209 211 /*
210 212 * To save instructions, since STREAMS ignores the return value
211 213 * from these functions, they are defined as void here. Kind of icky, but...
212 214 */
213 215 void rmm_rput(queue_t *, mblk_t *);
214 216 void rmm_wput(queue_t *, mblk_t *);
215 217 void rmm_rsrv(queue_t *);
216 218 void rmm_wsrv(queue_t *);
217 219
218 220 int rpcmodopen(queue_t *, dev_t *, int, int, cred_t *);
219 221 int rpcmodclose(queue_t *, int, cred_t *);
220 222 void rpcmodrput(queue_t *, mblk_t *);
221 223 void rpcmodwput(queue_t *, mblk_t *);
222 224 void rpcmodrsrv();
223 225 void rpcmodwsrv(queue_t *);
224 226
225 227 static void rpcmodwput_other(queue_t *, mblk_t *);
226 228 static int mir_close(queue_t *q);
227 229 static int mir_open(queue_t *q, dev_t *devp, int flag, int sflag,
228 230 cred_t *credp);
229 231 static void mir_rput(queue_t *q, mblk_t *mp);
230 232 static void mir_rsrv(queue_t *q);
231 233 static void mir_wput(queue_t *q, mblk_t *mp);
232 234 static void mir_wsrv(queue_t *q);
233 235
234 236 static struct module_info rpcmod_info =
235 237 {RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024};
236 238
237 239 static struct qinit rpcmodrinit = {
238 240 (int (*)())rmm_rput,
239 241 (int (*)())rmm_rsrv,
240 242 rmm_open,
241 243 rmm_close,
242 244 nulldev,
243 245 &rpcmod_info,
244 246 NULL
245 247 };
246 248
247 249 /*
248 250 * The write put procedure is simply putnext to conserve stack space.
249 251 * The write service procedure is not used to queue data, but instead to
250 252 * synchronize with flow control.
251 253 */
252 254 static struct qinit rpcmodwinit = {
253 255 (int (*)())rmm_wput,
254 256 (int (*)())rmm_wsrv,
255 257 rmm_open,
256 258 rmm_close,
257 259 nulldev,
258 260 &rpcmod_info,
259 261 NULL
260 262 };
261 263 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL };
262 264
263 265 struct xprt_style_ops {
264 266 int (*xo_open)();
265 267 int (*xo_close)();
266 268 void (*xo_wput)();
267 269 void (*xo_wsrv)();
268 270 void (*xo_rput)();
269 271 void (*xo_rsrv)();
270 272 };
271 273
272 274 /*
273 275 * Read side has no service procedure.
274 276 */
275 277 static struct xprt_style_ops xprt_clts_ops = {
276 278 rpcmodopen,
277 279 rpcmodclose,
278 280 rpcmodwput,
279 281 rpcmodwsrv,
280 282 rpcmodrput,
281 283 NULL
282 284 };
283 285
284 286 static struct xprt_style_ops xprt_cots_ops = {
285 287 mir_open,
286 288 mir_close,
287 289 mir_wput,
288 290 mir_wsrv,
289 291 mir_rput,
290 292 mir_rsrv
291 293 };
292 294
293 295 /*
294 296 * Per rpcmod "slot" data structure. q->q_ptr points to one of these.
295 297 */
296 298 struct rpcm {
297 299 void *rm_krpc_cell; /* Reserved for use by kRPC */
298 300 struct xprt_style_ops *rm_ops;
299 301 int rm_type; /* Client or server side stream */
300 302 #define RM_CLOSING 0x1 /* somebody is trying to close slot */
301 303 uint_t rm_state; /* state of the slot. see above */
302 304 uint_t rm_ref; /* cnt of external references to slot */
303 305 kmutex_t rm_lock; /* mutex protecting above fields */
304 306 kcondvar_t rm_cwait; /* condition for closing */
305 307 zoneid_t rm_zoneid; /* zone which pushed rpcmod */
306 308 };
307 309
308 310 struct temp_slot {
309 311 void *cell;
310 312 struct xprt_style_ops *ops;
311 313 int type;
312 314 mblk_t *info_ack;
313 315 kmutex_t lock;
314 316 kcondvar_t wait;
315 317 };
316 318
317 319 typedef struct mir_s {
318 320 void *mir_krpc_cell; /* Reserved for kRPC use. This field */
319 321 /* must be first in the structure. */
320 322 struct xprt_style_ops *rm_ops;
321 323 int mir_type; /* Client or server side stream */
322 324
323 325 mblk_t *mir_head_mp; /* RPC msg in progress */
324 326 /*
325 327 * mir_head_mp points the first mblk being collected in
326 328 * the current RPC message. Record headers are removed
327 329 * before data is linked into mir_head_mp.
328 330 */
329 331 mblk_t *mir_tail_mp; /* Last mblk in mir_head_mp */
330 332 /*
331 333 * mir_tail_mp points to the last mblk in the message
332 334 * chain starting at mir_head_mp. It is only valid
333 335 * if mir_head_mp is non-NULL and is used to add new
334 336 * data blocks to the end of chain quickly.
335 337 */
336 338
337 339 int32_t mir_frag_len; /* Bytes seen in the current frag */
338 340 /*
339 341 * mir_frag_len starts at -4 for beginning of each fragment.
340 342 * When this length is negative, it indicates the number of
341 343 * bytes that rpcmod needs to complete the record marker
342 344 * header. When it is positive or zero, it holds the number
343 345 * of bytes that have arrived for the current fragment and
344 346 * are held in mir_header_mp.
345 347 */
346 348
347 349 int32_t mir_frag_header;
348 350 /*
349 351 * Fragment header as collected for the current fragment.
350 352 * It holds the last-fragment indicator and the number
351 353 * of bytes in the fragment.
352 354 */
353 355
354 356 unsigned int
355 357 mir_ordrel_pending : 1, /* Sent T_ORDREL_REQ */
356 358 mir_hold_inbound : 1, /* Hold inbound messages on server */
357 359 /* side until outbound flow control */
358 360 /* is relieved. */
359 361 mir_closing : 1, /* The stream is being closed */
360 362 mir_inrservice : 1, /* data queued or rd srv proc running */
361 363 mir_inwservice : 1, /* data queued or wr srv proc running */
362 364 mir_inwflushdata : 1, /* flush M_DATAs when srv runs */
363 365 /*
364 366 * On client streams, mir_clntreq is 0 or 1; it is set
365 367 * to 1 whenever a new request is sent out (mir_wput)
366 368 * and cleared when the timer fires (mir_timer). If
367 369 * the timer fires with this value equal to 0, then the
368 370 * stream is considered idle and kRPC is notified.
369 371 */
370 372 mir_clntreq : 1,
371 373 /*
372 374 * On server streams, stop accepting messages
373 375 */
374 376 mir_svc_no_more_msgs : 1,
375 377 mir_listen_stream : 1, /* listen end point */
376 378 mir_unused : 1, /* no longer used */
377 379 mir_timer_call : 1,
378 380 mir_junk_fill_thru_bit_31 : 21;
379 381
380 382 int mir_setup_complete; /* server has initialized everything */
381 383 timeout_id_t mir_timer_id; /* Timer for idle checks */
382 384 clock_t mir_idle_timeout; /* Allowed idle time before shutdown */
383 385 /*
384 386 * This value is copied from clnt_idle_timeout or
385 387 * svc_idle_timeout during the appropriate ioctl.
386 388 * Kept in milliseconds
387 389 */
388 390 clock_t mir_use_timestamp; /* updated on client with each use */
389 391 /*
390 392 * This value is set to lbolt
391 393 * every time a client stream sends or receives data.
392 394 * Even if the timer message arrives, we don't shutdown
393 395 * client unless:
394 396 * lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp.
395 397 * This value is kept in HZ.
396 398 */
397 399
398 400 uint_t *mir_max_msg_sizep; /* Reference to sanity check size */
399 401 /*
400 402 * This pointer is set to &clnt_max_msg_size or
401 403 * &svc_max_msg_size during the appropriate ioctl.
402 404 */
403 405 zoneid_t mir_zoneid; /* zone which pushed rpcmod */
404 406 /* Server-side fields. */
405 407 int mir_ref_cnt; /* Reference count: server side only */
406 408 /* counts the number of references */
407 409 /* that a kernel RPC server thread */
408 410 /* (see svc_run()) has on this rpcmod */
409 411 /* slot. Effectively, it is the */
410 412 /* number of unprocessed messages */
411 413 /* that have been passed up to the */
412 414 /* kRPC layer */
413 415
414 416 mblk_t *mir_svc_pend_mp; /* Pending T_ORDREL_IND or */
415 417 /* T_DISCON_IND */
416 418
417 419 /*
418 420 * these fields are for both client and server, but for debugging,
419 421 * it is easier to have these last in the structure.
420 422 */
421 423 kmutex_t mir_mutex; /* Mutex and condvar for close */
422 424 kcondvar_t mir_condvar; /* synchronization. */
423 425 kcondvar_t mir_timer_cv; /* Timer routine sync. */
424 426 } mir_t;
425 427
426 428 void tmp_rput(queue_t *q, mblk_t *mp);
427 429
428 430 struct xprt_style_ops tmpops = {
429 431 NULL,
430 432 NULL,
431 433 putnext,
432 434 NULL,
433 435 tmp_rput,
434 436 NULL
435 437 };
436 438
437 439 void
438 440 tmp_rput(queue_t *q, mblk_t *mp)
439 441 {
440 442 struct temp_slot *t = (struct temp_slot *)(q->q_ptr);
441 443 struct T_info_ack *pptr;
442 444
443 445 switch (mp->b_datap->db_type) {
444 446 case M_PCPROTO:
445 447 pptr = (struct T_info_ack *)mp->b_rptr;
446 448 switch (pptr->PRIM_type) {
447 449 case T_INFO_ACK:
448 450 mutex_enter(&t->lock);
449 451 t->info_ack = mp;
450 452 cv_signal(&t->wait);
451 453 mutex_exit(&t->lock);
452 454 return;
453 455 default:
454 456 break;
455 457 }
456 458 default:
457 459 break;
458 460 }
459 461
460 462 /*
461 463 * Not an info-ack, so free it. This is ok because we should
462 464 * not be receiving data until the open finishes: rpcmod
463 465 * is pushed well before the end-point is bound to an address.
464 466 */
465 467 freemsg(mp);
466 468 }
467 469
468 470 int
469 471 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
470 472 {
471 473 mblk_t *bp;
472 474 struct temp_slot ts, *t;
473 475 struct T_info_ack *pptr;
474 476 int error = 0;
475 477
476 478 ASSERT(q != NULL);
477 479 /*
478 480 * Check for re-opens.
479 481 */
480 482 if (q->q_ptr) {
481 483 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END,
482 484 "rpcmodopen_end:(%s)", "q->qptr");
483 485 return (0);
484 486 }
485 487
486 488 t = &ts;
487 489 bzero(t, sizeof (*t));
488 490 q->q_ptr = (void *)t;
489 491 WR(q)->q_ptr = (void *)t;
490 492
491 493 /*
492 494 * Allocate the required messages upfront.
493 495 */
494 496 if ((bp = allocb_cred(sizeof (struct T_info_req) +
495 497 sizeof (struct T_info_ack), crp, curproc->p_pid)) == NULL) {
496 498 return (ENOBUFS);
497 499 }
498 500
499 501 mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL);
500 502 cv_init(&t->wait, NULL, CV_DEFAULT, NULL);
501 503
502 504 t->ops = &tmpops;
503 505
504 506 qprocson(q);
505 507 bp->b_datap->db_type = M_PCPROTO;
506 508 *(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ;
507 509 bp->b_wptr += sizeof (struct T_info_req);
508 510 putnext(WR(q), bp);
509 511
510 512 mutex_enter(&t->lock);
511 513 while (t->info_ack == NULL) {
512 514 if (cv_wait_sig(&t->wait, &t->lock) == 0) {
513 515 error = EINTR;
514 516 break;
515 517 }
516 518 }
517 519 mutex_exit(&t->lock);
518 520
519 521 if (error)
520 522 goto out;
521 523
522 524 pptr = (struct T_info_ack *)t->info_ack->b_rptr;
523 525
524 526 if (pptr->SERV_type == T_CLTS) {
525 527 if ((error = rpcmodopen(q, devp, flag, sflag, crp)) == 0)
526 528 ((struct rpcm *)q->q_ptr)->rm_ops = &xprt_clts_ops;
527 529 } else {
528 530 if ((error = mir_open(q, devp, flag, sflag, crp)) == 0)
529 531 ((mir_t *)q->q_ptr)->rm_ops = &xprt_cots_ops;
530 532 }
531 533
532 534 out:
533 535 if (error)
534 536 qprocsoff(q);
535 537
536 538 freemsg(t->info_ack);
537 539 mutex_destroy(&t->lock);
538 540 cv_destroy(&t->wait);
539 541
540 542 return (error);
541 543 }
542 544
543 545 void
544 546 rmm_rput(queue_t *q, mblk_t *mp)
545 547 {
546 548 (*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp);
547 549 }
548 550
549 551 void
550 552 rmm_rsrv(queue_t *q)
551 553 {
552 554 (*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q);
553 555 }
554 556
555 557 void
556 558 rmm_wput(queue_t *q, mblk_t *mp)
557 559 {
558 560 (*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp);
559 561 }
560 562
561 563 void
562 564 rmm_wsrv(queue_t *q)
|
↓ open down ↓ |
520 lines elided |
↑ open up ↑ |
563 565 {
564 566 (*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q);
565 567 }
566 568
567 569 int
568 570 rmm_close(queue_t *q, int flag, cred_t *crp)
569 571 {
570 572 return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp));
571 573 }
572 574
573 -static void rpcmod_release(queue_t *, mblk_t *, bool_t);
574 575 /*
575 576 * rpcmodopen - open routine gets called when the module gets pushed
576 577 * onto the stream.
577 578 */
578 579 /*ARGSUSED*/
579 580 int
580 581 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
581 582 {
582 583 struct rpcm *rmp;
583 584
584 - extern void (*rpc_rele)(queue_t *, mblk_t *, bool_t);
585 -
586 585 TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:");
587 586
588 587 /*
589 - * Initialize entry points to release a rpcmod slot (and an input
590 - * message if supplied) and to send an output message to the module
591 - * below rpcmod.
592 - */
593 - if (rpc_rele == NULL)
594 - rpc_rele = rpcmod_release;
595 -
596 - /*
597 588 * Only sufficiently privileged users can use this module, and it
598 589 * is assumed that they will use this module properly, and NOT send
599 590 * bulk data from downstream.
600 591 */
601 592 if (secpolicy_rpcmod_open(crp) != 0)
602 593 return (EPERM);
603 594
604 595 /*
605 596 * Allocate slot data structure.
606 597 */
607 598 rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP);
608 599
609 600 mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL);
610 601 cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL);
611 602 rmp->rm_zoneid = rpc_zoneid();
612 603 /*
613 604 * slot type will be set by kRPC client and server ioctl's
614 605 */
615 606 rmp->rm_type = 0;
616 607
617 608 q->q_ptr = (void *)rmp;
618 609 WR(q)->q_ptr = (void *)rmp;
619 610
620 611 TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end");
621 612 return (0);
622 613 }
623 614
624 615 /*
625 616 * rpcmodclose - This routine gets called when the module gets popped
626 617 * off of the stream.
627 618 */
628 619 /*ARGSUSED*/
629 620 int
630 621 rpcmodclose(queue_t *q, int flag, cred_t *crp)
631 622 {
632 623 struct rpcm *rmp;
633 624
634 625 ASSERT(q != NULL);
635 626 rmp = (struct rpcm *)q->q_ptr;
636 627
637 628 /*
638 629 * Mark our state as closing.
639 630 */
640 631 mutex_enter(&rmp->rm_lock);
641 632 rmp->rm_state |= RM_CLOSING;
642 633
643 634 /*
644 635 * Check and see if there are any messages on the queue. If so, send
645 636 * the messages, regardless whether the downstream module is ready to
646 637 * accept data.
647 638 */
648 639 if (rmp->rm_type == RPC_SERVER) {
649 640 flushq(q, FLUSHDATA);
650 641
651 642 qenable(WR(q));
652 643
653 644 if (rmp->rm_ref) {
654 645 mutex_exit(&rmp->rm_lock);
655 646 /*
656 647 * call into SVC to clean the queue
657 648 */
658 649 svc_queueclean(q);
659 650 mutex_enter(&rmp->rm_lock);
660 651
661 652 /*
662 653 * Block while there are kRPC threads with a reference
663 654 * to this message.
664 655 */
665 656 while (rmp->rm_ref)
666 657 cv_wait(&rmp->rm_cwait, &rmp->rm_lock);
667 658 }
668 659
669 660 mutex_exit(&rmp->rm_lock);
670 661
671 662 /*
672 663 * It is now safe to remove this queue from the stream. No kRPC
673 664 * threads have a reference to the stream, and none ever will,
674 665 * because RM_CLOSING is set.
675 666 */
676 667 qprocsoff(q);
677 668
678 669 /* Notify kRPC that this stream is going away. */
679 670 svc_queueclose(q);
680 671 } else {
681 672 mutex_exit(&rmp->rm_lock);
682 673 qprocsoff(q);
683 674 }
684 675
685 676 q->q_ptr = NULL;
686 677 WR(q)->q_ptr = NULL;
687 678 mutex_destroy(&rmp->rm_lock);
688 679 cv_destroy(&rmp->rm_cwait);
689 680 kmem_free(rmp, sizeof (*rmp));
690 681 return (0);
691 682 }
692 683
693 684 /*
694 685 * rpcmodrput - Module read put procedure. This is called from
695 686 * the module, driver, or stream head downstream.
696 687 */
697 688 void
698 689 rpcmodrput(queue_t *q, mblk_t *mp)
699 690 {
700 691 struct rpcm *rmp;
701 692 union T_primitives *pptr;
702 693 int hdrsz;
703 694
704 695 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:");
705 696
706 697 ASSERT(q != NULL);
707 698 rmp = (struct rpcm *)q->q_ptr;
708 699
709 700 if (rmp->rm_type == 0) {
710 701 freemsg(mp);
711 702 return;
712 703 }
713 704
714 705 switch (mp->b_datap->db_type) {
715 706 default:
716 707 putnext(q, mp);
717 708 break;
718 709
719 710 case M_PROTO:
720 711 case M_PCPROTO:
721 712 ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t));
722 713 pptr = (union T_primitives *)mp->b_rptr;
723 714
724 715 /*
725 716 * Forward this message to kRPC if it is data.
726 717 */
727 718 if (pptr->type == T_UNITDATA_IND) {
728 719 /*
729 720 * Check if the module is being popped.
730 721 */
731 722 mutex_enter(&rmp->rm_lock);
732 723 if (rmp->rm_state & RM_CLOSING) {
733 724 mutex_exit(&rmp->rm_lock);
734 725 putnext(q, mp);
735 726 break;
736 727 }
737 728
738 729 switch (rmp->rm_type) {
739 730 case RPC_CLIENT:
740 731 mutex_exit(&rmp->rm_lock);
741 732 hdrsz = mp->b_wptr - mp->b_rptr;
742 733
743 734 /*
744 735 * Make sure the header is sane.
745 736 */
746 737 if (hdrsz < TUNITDATAINDSZ ||
747 738 hdrsz < (pptr->unitdata_ind.OPT_length +
748 739 pptr->unitdata_ind.OPT_offset) ||
749 740 hdrsz < (pptr->unitdata_ind.SRC_length +
750 741 pptr->unitdata_ind.SRC_offset)) {
751 742 freemsg(mp);
752 743 return;
753 744 }
754 745
755 746 /*
756 747 * Call clnt_clts_dispatch_notify, so that it
757 748 * can pass the message to the proper caller.
758 749 * Don't discard the header just yet since the
759 750 * client may need the sender's address.
760 751 */
761 752 clnt_clts_dispatch_notify(mp, hdrsz,
762 753 rmp->rm_zoneid);
763 754 return;
764 755 case RPC_SERVER:
765 756 /*
766 757 * rm_krpc_cell is exclusively used by the kRPC
767 758 * CLTS server. Try to submit the message to
768 759 * kRPC. Since this is an unreliable channel, we
769 760 * can just free the message in case the kRPC
770 761 * does not accept new messages.
771 762 */
772 763 if (rmp->rm_krpc_cell &&
773 764 svc_queuereq(q, mp, TRUE)) {
774 765 /*
775 766 * Raise the reference count on this
776 767 * module to prevent it from being
777 768 * popped before kRPC generates the
778 769 * reply.
779 770 */
780 771 rmp->rm_ref++;
781 772 mutex_exit(&rmp->rm_lock);
782 773 } else {
783 774 mutex_exit(&rmp->rm_lock);
784 775 freemsg(mp);
785 776 }
786 777 return;
787 778 default:
788 779 mutex_exit(&rmp->rm_lock);
789 780 freemsg(mp);
790 781 return;
791 782 } /* end switch(rmp->rm_type) */
792 783 } else if (pptr->type == T_UDERROR_IND) {
793 784 mutex_enter(&rmp->rm_lock);
794 785 hdrsz = mp->b_wptr - mp->b_rptr;
795 786
796 787 /*
797 788 * Make sure the header is sane
798 789 */
799 790 if (hdrsz < TUDERRORINDSZ ||
800 791 hdrsz < (pptr->uderror_ind.OPT_length +
801 792 pptr->uderror_ind.OPT_offset) ||
802 793 hdrsz < (pptr->uderror_ind.DEST_length +
803 794 pptr->uderror_ind.DEST_offset)) {
804 795 mutex_exit(&rmp->rm_lock);
805 796 freemsg(mp);
806 797 return;
807 798 }
808 799
809 800 /*
810 801 * In the case where a unit data error has been
811 802 * received, all we need to do is clear the message from
812 803 * the queue.
813 804 */
814 805 mutex_exit(&rmp->rm_lock);
815 806 freemsg(mp);
816 807 RPCLOG(32, "rpcmodrput: unitdata error received at "
817 808 "%ld\n", gethrestime_sec());
818 809 return;
819 810 } /* end else if (pptr->type == T_UDERROR_IND) */
820 811
821 812 putnext(q, mp);
822 813 break;
823 814 } /* end switch (mp->b_datap->db_type) */
824 815
825 816 TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END,
826 817 "rpcmodrput_end:");
827 818 /*
828 819 * Return codes are not looked at by the STREAMS framework.
829 820 */
830 821 }
831 822
832 823 /*
833 824 * write put procedure
834 825 */
835 826 void
836 827 rpcmodwput(queue_t *q, mblk_t *mp)
837 828 {
838 829 struct rpcm *rmp;
839 830
840 831 ASSERT(q != NULL);
841 832
842 833 switch (mp->b_datap->db_type) {
843 834 case M_PROTO:
844 835 case M_PCPROTO:
845 836 break;
846 837 default:
847 838 rpcmodwput_other(q, mp);
848 839 return;
849 840 }
850 841
851 842 /*
852 843 * Check to see if we can send the message downstream.
853 844 */
854 845 if (canputnext(q)) {
855 846 putnext(q, mp);
856 847 return;
857 848 }
858 849
859 850 rmp = (struct rpcm *)q->q_ptr;
860 851 ASSERT(rmp != NULL);
861 852
862 853 /*
863 854 * The first canputnext failed. Try again except this time with the
864 855 * lock held, so that we can check the state of the stream to see if
865 856 * it is closing. If either of these conditions evaluate to true
866 857 * then send the meesage.
867 858 */
868 859 mutex_enter(&rmp->rm_lock);
869 860 if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) {
870 861 mutex_exit(&rmp->rm_lock);
871 862 putnext(q, mp);
872 863 } else {
873 864 /*
874 865 * canputnext failed again and the stream is not closing.
875 866 * Place the message on the queue and let the service
876 867 * procedure handle the message.
877 868 */
878 869 mutex_exit(&rmp->rm_lock);
879 870 (void) putq(q, mp);
880 871 }
881 872 }
882 873
883 874 static void
884 875 rpcmodwput_other(queue_t *q, mblk_t *mp)
885 876 {
886 877 struct rpcm *rmp;
887 878 struct iocblk *iocp;
888 879
889 880 rmp = (struct rpcm *)q->q_ptr;
890 881 ASSERT(rmp != NULL);
891 882
892 883 switch (mp->b_datap->db_type) {
893 884 case M_IOCTL:
894 885 iocp = (struct iocblk *)mp->b_rptr;
895 886 ASSERT(iocp != NULL);
896 887 switch (iocp->ioc_cmd) {
897 888 case RPC_CLIENT:
898 889 case RPC_SERVER:
899 890 mutex_enter(&rmp->rm_lock);
900 891 rmp->rm_type = iocp->ioc_cmd;
901 892 mutex_exit(&rmp->rm_lock);
902 893 mp->b_datap->db_type = M_IOCACK;
903 894 qreply(q, mp);
904 895 return;
905 896 default:
906 897 /*
907 898 * pass the ioctl downstream and hope someone
908 899 * down there knows how to handle it.
909 900 */
910 901 putnext(q, mp);
911 902 return;
912 903 }
913 904 default:
914 905 break;
915 906 }
916 907 /*
917 908 * This is something we definitely do not know how to handle, just
918 909 * pass the message downstream
919 910 */
920 911 putnext(q, mp);
921 912 }
922 913
923 914 /*
924 915 * Module write service procedure. This is called by downstream modules
925 916 * for back enabling during flow control.
926 917 */
927 918 void
928 919 rpcmodwsrv(queue_t *q)
929 920 {
930 921 struct rpcm *rmp;
931 922 mblk_t *mp = NULL;
932 923
933 924 rmp = (struct rpcm *)q->q_ptr;
934 925 ASSERT(rmp != NULL);
935 926
936 927 /*
937 928 * Get messages that may be queued and send them down stream
938 929 */
939 930 while ((mp = getq(q)) != NULL) {
940 931 /*
941 932 * Optimize the service procedure for the server-side, by
942 933 * avoiding a call to canputnext().
|
↓ open down ↓ |
336 lines elided |
↑ open up ↑ |
943 934 */
944 935 if (rmp->rm_type == RPC_SERVER || canputnext(q)) {
945 936 putnext(q, mp);
946 937 continue;
947 938 }
948 939 (void) putbq(q, mp);
949 940 return;
950 941 }
951 942 }
952 943
953 -/* ARGSUSED */
954 -static void
955 -rpcmod_release(queue_t *q, mblk_t *bp, bool_t enable)
944 +void
945 +rpcmod_hold(queue_t *q)
956 946 {
947 + struct rpcm *rmp = (struct rpcm *)q->q_ptr;
948 +
949 + mutex_enter(&rmp->rm_lock);
950 + rmp->rm_ref++;
951 + mutex_exit(&rmp->rm_lock);
952 +}
953 +
954 +void
955 +rpcmod_release(queue_t *q, mblk_t *bp,
956 + /* LINTED E_FUNC_ARG_UNUSED */
957 + bool_t enable __unused)
958 +{
957 959 struct rpcm *rmp;
958 960
959 961 /*
960 962 * For now, just free the message.
961 963 */
962 964 if (bp)
963 965 freemsg(bp);
964 966 rmp = (struct rpcm *)q->q_ptr;
965 967
966 968 mutex_enter(&rmp->rm_lock);
967 969 rmp->rm_ref--;
968 970
969 971 if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) {
970 972 cv_broadcast(&rmp->rm_cwait);
971 973 }
972 974
973 975 mutex_exit(&rmp->rm_lock);
974 976 }
975 977
976 978 /*
977 979 * This part of rpcmod is pushed on a connection-oriented transport for use
978 980 * by RPC. It serves to bypass the Stream head, implements
979 981 * the record marking protocol, and dispatches incoming RPC messages.
980 982 */
981 983
982 984 /* Default idle timer values */
983 985 #define MIR_CLNT_IDLE_TIMEOUT (5 * (60 * 1000L)) /* 5 minutes */
984 986 #define MIR_SVC_IDLE_TIMEOUT (6 * (60 * 1000L)) /* 6 minutes */
985 987 #define MIR_SVC_ORDREL_TIMEOUT (10 * (60 * 1000L)) /* 10 minutes */
986 988 #define MIR_LASTFRAG 0x80000000 /* Record marker */
987 989
988 990 #define MIR_SVC_QUIESCED(mir) \
989 991 (mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0)
990 992
991 993 #define MIR_CLEAR_INRSRV(mir_ptr) { \
992 994 (mir_ptr)->mir_inrservice = 0; \
993 995 if ((mir_ptr)->mir_type == RPC_SERVER && \
994 996 (mir_ptr)->mir_closing) \
995 997 cv_signal(&(mir_ptr)->mir_condvar); \
996 998 }
997 999
|
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
998 1000 /*
999 1001 * Don't block service procedure (and mir_close) if
1000 1002 * we are in the process of closing.
1001 1003 */
1002 1004 #define MIR_WCANPUTNEXT(mir_ptr, write_q) \
1003 1005 (canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1))
1004 1006
1005 1007 static int mir_clnt_dup_request(queue_t *q, mblk_t *mp);
1006 1008 static void mir_rput_proto(queue_t *q, mblk_t *mp);
1007 1009 static int mir_svc_policy_notify(queue_t *q, int event);
1008 -static void mir_svc_release(queue_t *wq, mblk_t *mp, bool_t);
1009 1010 static void mir_svc_start(queue_t *wq);
1010 1011 static void mir_svc_idle_start(queue_t *, mir_t *);
1011 1012 static void mir_svc_idle_stop(queue_t *, mir_t *);
1012 1013 static void mir_svc_start_close(queue_t *, mir_t *);
1013 1014 static void mir_clnt_idle_do_stop(queue_t *);
1014 1015 static void mir_clnt_idle_stop(queue_t *, mir_t *);
1015 1016 static void mir_clnt_idle_start(queue_t *, mir_t *);
1016 1017 static void mir_wput(queue_t *q, mblk_t *mp);
1017 1018 static void mir_wput_other(queue_t *q, mblk_t *mp);
1018 1019 static void mir_wsrv(queue_t *q);
1019 1020 static void mir_disconnect(queue_t *, mir_t *ir);
1020 1021 static int mir_check_len(queue_t *, mblk_t *);
1021 1022 static void mir_timer(void *);
1022 1023
1023 -extern void (*mir_rele)(queue_t *, mblk_t *, bool_t);
1024 1024 extern void (*mir_start)(queue_t *);
1025 1025 extern void (*clnt_stop_idle)(queue_t *);
1026 1026
1027 -clock_t clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT;
1028 -clock_t svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT;
1027 +volatile clock_t clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT;
1028 +volatile clock_t svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT;
1029 1029
1030 1030 /*
1031 1031 * Timeout for subsequent notifications of idle connection. This is
1032 1032 * typically used to clean up after a wedged orderly release.
1033 1033 */
1034 1034 clock_t svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */
1035 1035
1036 1036 extern uint_t *clnt_max_msg_sizep;
1037 1037 extern uint_t *svc_max_msg_sizep;
1038 1038 uint_t clnt_max_msg_size = RPC_MAXDATASIZE;
1039 1039 uint_t svc_max_msg_size = RPC_MAXDATASIZE;
1040 1040 uint_t mir_krpc_cell_null;
1041 1041
1042 1042 static void
1043 1043 mir_timer_stop(mir_t *mir)
1044 1044 {
1045 1045 timeout_id_t tid;
1046 1046
1047 1047 ASSERT(MUTEX_HELD(&mir->mir_mutex));
1048 1048
1049 1049 /*
1050 1050 * Since the mir_mutex lock needs to be released to call
1051 1051 * untimeout(), we need to make sure that no other thread
1052 1052 * can start/stop the timer (changing mir_timer_id) during
1053 1053 * that time. The mir_timer_call bit and the mir_timer_cv
1054 1054 * condition variable are used to synchronize this. Setting
1055 1055 * mir_timer_call also tells mir_timer() (refer to the comments
1056 1056 * in mir_timer()) that it does not need to do anything.
1057 1057 */
1058 1058 while (mir->mir_timer_call)
1059 1059 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex);
1060 1060 mir->mir_timer_call = B_TRUE;
1061 1061
1062 1062 if ((tid = mir->mir_timer_id) != 0) {
1063 1063 mir->mir_timer_id = 0;
1064 1064 mutex_exit(&mir->mir_mutex);
1065 1065 (void) untimeout(tid);
1066 1066 mutex_enter(&mir->mir_mutex);
1067 1067 }
1068 1068 mir->mir_timer_call = B_FALSE;
1069 1069 cv_broadcast(&mir->mir_timer_cv);
1070 1070 }
1071 1071
1072 1072 static void
1073 1073 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl)
1074 1074 {
1075 1075 timeout_id_t tid;
1076 1076
1077 1077 ASSERT(MUTEX_HELD(&mir->mir_mutex));
1078 1078
1079 1079 while (mir->mir_timer_call)
1080 1080 cv_wait(&mir->mir_timer_cv, &mir->mir_mutex);
1081 1081 mir->mir_timer_call = B_TRUE;
1082 1082
1083 1083 if ((tid = mir->mir_timer_id) != 0) {
1084 1084 mutex_exit(&mir->mir_mutex);
1085 1085 (void) untimeout(tid);
1086 1086 mutex_enter(&mir->mir_mutex);
1087 1087 }
1088 1088 /* Only start the timer when it is not closing. */
1089 1089 if (!mir->mir_closing) {
1090 1090 mir->mir_timer_id = timeout(mir_timer, q,
1091 1091 MSEC_TO_TICK(intrvl));
1092 1092 }
1093 1093 mir->mir_timer_call = B_FALSE;
1094 1094 cv_broadcast(&mir->mir_timer_cv);
1095 1095 }
1096 1096
1097 1097 static int
1098 1098 mir_clnt_dup_request(queue_t *q, mblk_t *mp)
1099 1099 {
1100 1100 mblk_t *mp1;
1101 1101 uint32_t new_xid;
1102 1102 uint32_t old_xid;
1103 1103
1104 1104 ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex));
1105 1105 new_xid = BE32_TO_U32(&mp->b_rptr[4]);
1106 1106 /*
1107 1107 * This loop is a bit tacky -- it walks the STREAMS list of
1108 1108 * flow-controlled messages.
1109 1109 */
1110 1110 if ((mp1 = q->q_first) != NULL) {
1111 1111 do {
1112 1112 old_xid = BE32_TO_U32(&mp1->b_rptr[4]);
1113 1113 if (new_xid == old_xid)
1114 1114 return (1);
1115 1115 } while ((mp1 = mp1->b_next) != NULL);
1116 1116 }
1117 1117 return (0);
1118 1118 }
1119 1119
1120 1120 static int
1121 1121 mir_close(queue_t *q)
1122 1122 {
1123 1123 mir_t *mir = q->q_ptr;
1124 1124 mblk_t *mp;
1125 1125 bool_t queue_cleaned = FALSE;
1126 1126
1127 1127 RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q);
1128 1128 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1129 1129 mutex_enter(&mir->mir_mutex);
1130 1130 if ((mp = mir->mir_head_mp) != NULL) {
1131 1131 mir->mir_head_mp = NULL;
1132 1132 mir->mir_tail_mp = NULL;
1133 1133 freemsg(mp);
1134 1134 }
1135 1135 /*
1136 1136 * Set mir_closing so we get notified when MIR_SVC_QUIESCED()
1137 1137 * is TRUE. And mir_timer_start() won't start the timer again.
1138 1138 */
1139 1139 mir->mir_closing = B_TRUE;
1140 1140 mir_timer_stop(mir);
1141 1141
1142 1142 if (mir->mir_type == RPC_SERVER) {
1143 1143 flushq(q, FLUSHDATA); /* Ditch anything waiting on read q */
1144 1144
1145 1145 /*
1146 1146 * This will prevent more requests from arriving and
1147 1147 * will force rpcmod to ignore flow control.
1148 1148 */
1149 1149 mir_svc_start_close(WR(q), mir);
1150 1150
1151 1151 while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) {
1152 1152
1153 1153 if (mir->mir_ref_cnt && !mir->mir_inrservice &&
1154 1154 (queue_cleaned == FALSE)) {
1155 1155 /*
1156 1156 * call into SVC to clean the queue
1157 1157 */
1158 1158 mutex_exit(&mir->mir_mutex);
1159 1159 svc_queueclean(q);
1160 1160 queue_cleaned = TRUE;
1161 1161 mutex_enter(&mir->mir_mutex);
1162 1162 continue;
1163 1163 }
1164 1164
1165 1165 /*
1166 1166 * Bugid 1253810 - Force the write service
1167 1167 * procedure to send its messages, regardless
1168 1168 * whether the downstream module is ready
1169 1169 * to accept data.
1170 1170 */
1171 1171 if (mir->mir_inwservice == 1)
1172 1172 qenable(WR(q));
1173 1173
1174 1174 cv_wait(&mir->mir_condvar, &mir->mir_mutex);
1175 1175 }
1176 1176
1177 1177 mutex_exit(&mir->mir_mutex);
1178 1178 qprocsoff(q);
1179 1179
1180 1180 /* Notify kRPC that this stream is going away. */
1181 1181 svc_queueclose(q);
1182 1182 } else {
1183 1183 mutex_exit(&mir->mir_mutex);
1184 1184 qprocsoff(q);
1185 1185 }
1186 1186
1187 1187 mutex_destroy(&mir->mir_mutex);
1188 1188 cv_destroy(&mir->mir_condvar);
1189 1189 cv_destroy(&mir->mir_timer_cv);
1190 1190 kmem_free(mir, sizeof (mir_t));
1191 1191 return (0);
1192 1192 }
1193 1193
1194 1194 /*
1195 1195 * This is server side only (RPC_SERVER).
1196 1196 *
1197 1197 * Exit idle mode.
1198 1198 */
1199 1199 static void
1200 1200 mir_svc_idle_stop(queue_t *q, mir_t *mir)
1201 1201 {
1202 1202 ASSERT(MUTEX_HELD(&mir->mir_mutex));
1203 1203 ASSERT((q->q_flag & QREADR) == 0);
1204 1204 ASSERT(mir->mir_type == RPC_SERVER);
1205 1205 RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q);
1206 1206
1207 1207 mir_timer_stop(mir);
1208 1208 }
1209 1209
1210 1210 /*
1211 1211 * This is server side only (RPC_SERVER).
1212 1212 *
1213 1213 * Start idle processing, which will include setting idle timer if the
1214 1214 * stream is not being closed.
1215 1215 */
1216 1216 static void
1217 1217 mir_svc_idle_start(queue_t *q, mir_t *mir)
1218 1218 {
1219 1219 ASSERT(MUTEX_HELD(&mir->mir_mutex));
1220 1220 ASSERT((q->q_flag & QREADR) == 0);
1221 1221 ASSERT(mir->mir_type == RPC_SERVER);
1222 1222 RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q);
1223 1223
1224 1224 /*
1225 1225 * Don't re-start idle timer if we are closing queues.
1226 1226 */
1227 1227 if (mir->mir_closing) {
1228 1228 RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n",
1229 1229 (void *)q);
1230 1230
1231 1231 /*
1232 1232 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED()
1233 1233 * is true. When it is true, and we are in the process of
1234 1234 * closing the stream, signal any thread waiting in
1235 1235 * mir_close().
1236 1236 */
1237 1237 if (mir->mir_inwservice == 0)
1238 1238 cv_signal(&mir->mir_condvar);
1239 1239
1240 1240 } else {
1241 1241 RPCLOG(16, "mir_svc_idle_start - reset %s timer\n",
1242 1242 mir->mir_ordrel_pending ? "ordrel" : "normal");
1243 1243 /*
1244 1244 * Normal condition, start the idle timer. If an orderly
1245 1245 * release has been sent, set the timeout to wait for the
1246 1246 * client to close its side of the connection. Otherwise,
1247 1247 * use the normal idle timeout.
1248 1248 */
1249 1249 mir_timer_start(q, mir, mir->mir_ordrel_pending ?
1250 1250 svc_ordrel_timeout : mir->mir_idle_timeout);
1251 1251 }
|
↓ open down ↓ |
213 lines elided |
↑ open up ↑ |
1252 1252 }
1253 1253
1254 1254 /* ARGSUSED */
1255 1255 static int
1256 1256 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1257 1257 {
1258 1258 mir_t *mir;
1259 1259
1260 1260 RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q);
1261 1261 /* Set variables used directly by kRPC. */
1262 - if (!mir_rele)
1263 - mir_rele = mir_svc_release;
1264 1262 if (!mir_start)
1265 1263 mir_start = mir_svc_start;
1266 1264 if (!clnt_stop_idle)
1267 1265 clnt_stop_idle = mir_clnt_idle_do_stop;
1268 1266 if (!clnt_max_msg_sizep)
1269 1267 clnt_max_msg_sizep = &clnt_max_msg_size;
1270 1268 if (!svc_max_msg_sizep)
1271 1269 svc_max_msg_sizep = &svc_max_msg_size;
1272 1270
1273 1271 /* Allocate a zero'ed out mir structure for this stream. */
1274 1272 mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP);
1275 1273
1276 1274 /*
1277 1275 * We set hold inbound here so that incoming messages will
1278 1276 * be held on the read-side queue until the stream is completely
1279 1277 * initialized with a RPC_CLIENT or RPC_SERVER ioctl. During
1280 1278 * the ioctl processing, the flag is cleared and any messages that
1281 1279 * arrived between the open and the ioctl are delivered to kRPC.
1282 1280 *
1283 1281 * Early data should never arrive on a client stream since
1284 1282 * servers only respond to our requests and we do not send any.
1285 1283 * until after the stream is initialized. Early data is
1286 1284 * very common on a server stream where the client will start
1287 1285 * sending data as soon as the connection is made (and this
1288 1286 * is especially true with TCP where the protocol accepts the
1289 1287 * connection before nfsd or kRPC is notified about it).
1290 1288 */
1291 1289
1292 1290 mir->mir_hold_inbound = 1;
1293 1291
1294 1292 /*
1295 1293 * Start the record marker looking for a 4-byte header. When
1296 1294 * this length is negative, it indicates that rpcmod is looking
1297 1295 * for bytes to consume for the record marker header. When it
1298 1296 * is positive, it holds the number of bytes that have arrived
1299 1297 * for the current fragment and are being held in mir_header_mp.
1300 1298 */
1301 1299
1302 1300 mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
1303 1301
1304 1302 mir->mir_zoneid = rpc_zoneid();
1305 1303 mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL);
1306 1304 cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL);
1307 1305 cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL);
1308 1306
1309 1307 q->q_ptr = (char *)mir;
1310 1308 WR(q)->q_ptr = (char *)mir;
1311 1309
1312 1310 /*
1313 1311 * We noenable the read-side queue because we don't want it
1314 1312 * automatically enabled by putq. We enable it explicitly
1315 1313 * in mir_wsrv when appropriate. (See additional comments on
1316 1314 * flow control at the beginning of mir_rsrv.)
1317 1315 */
1318 1316 noenable(q);
1319 1317
1320 1318 qprocson(q);
1321 1319 return (0);
1322 1320 }
1323 1321
1324 1322 /*
1325 1323 * Read-side put routine for both the client and server side. Does the
1326 1324 * record marking for incoming RPC messages, and when complete, dispatches
1327 1325 * the message to either the client or server.
1328 1326 */
1329 1327 static void
1330 1328 mir_rput(queue_t *q, mblk_t *mp)
1331 1329 {
1332 1330 int excess;
1333 1331 int32_t frag_len, frag_header;
1334 1332 mblk_t *cont_mp, *head_mp, *tail_mp, *mp1;
1335 1333 mir_t *mir = q->q_ptr;
1336 1334 boolean_t stop_timer = B_FALSE;
1337 1335
1338 1336 ASSERT(mir != NULL);
1339 1337
1340 1338 /*
1341 1339 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER
1342 1340 * with the corresponding ioctl, then don't accept
1343 1341 * any inbound data. This should never happen for streams
1344 1342 * created by nfsd or client-side kRPC because they are careful
1345 1343 * to set the mode of the stream before doing anything else.
1346 1344 */
1347 1345 if (mir->mir_type == 0) {
1348 1346 freemsg(mp);
1349 1347 return;
1350 1348 }
1351 1349
1352 1350 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1353 1351
1354 1352 switch (mp->b_datap->db_type) {
1355 1353 case M_DATA:
1356 1354 break;
1357 1355 case M_PROTO:
1358 1356 case M_PCPROTO:
1359 1357 if (MBLKL(mp) < sizeof (t_scalar_t)) {
1360 1358 RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n",
1361 1359 (int)MBLKL(mp));
1362 1360 freemsg(mp);
1363 1361 return;
1364 1362 }
1365 1363 if (((union T_primitives *)mp->b_rptr)->type != T_DATA_IND) {
1366 1364 mir_rput_proto(q, mp);
1367 1365 return;
1368 1366 }
1369 1367
1370 1368 /* Throw away the T_DATA_IND block and continue with data. */
1371 1369 mp1 = mp;
1372 1370 mp = mp->b_cont;
1373 1371 freeb(mp1);
1374 1372 break;
1375 1373 case M_SETOPTS:
1376 1374 /*
1377 1375 * If a module on the stream is trying set the Stream head's
1378 1376 * high water mark, then set our hiwater to the requested
1379 1377 * value. We are the "stream head" for all inbound
1380 1378 * data messages since messages are passed directly to kRPC.
1381 1379 */
1382 1380 if (MBLKL(mp) >= sizeof (struct stroptions)) {
1383 1381 struct stroptions *stropts;
1384 1382
1385 1383 stropts = (struct stroptions *)mp->b_rptr;
1386 1384 if ((stropts->so_flags & SO_HIWAT) &&
1387 1385 !(stropts->so_flags & SO_BAND)) {
1388 1386 (void) strqset(q, QHIWAT, 0, stropts->so_hiwat);
1389 1387 }
1390 1388 }
1391 1389 putnext(q, mp);
1392 1390 return;
1393 1391 case M_FLUSH:
1394 1392 RPCLOG(32, "mir_rput: ignoring M_FLUSH %x ", *mp->b_rptr);
1395 1393 RPCLOG(32, "on q 0x%p\n", (void *)q);
1396 1394 putnext(q, mp);
1397 1395 return;
1398 1396 default:
1399 1397 putnext(q, mp);
1400 1398 return;
1401 1399 }
1402 1400
1403 1401 mutex_enter(&mir->mir_mutex);
1404 1402
1405 1403 /*
1406 1404 * If this connection is closing, don't accept any new messages.
1407 1405 */
1408 1406 if (mir->mir_svc_no_more_msgs) {
1409 1407 ASSERT(mir->mir_type == RPC_SERVER);
1410 1408 mutex_exit(&mir->mir_mutex);
1411 1409 freemsg(mp);
1412 1410 return;
1413 1411 }
1414 1412
1415 1413 /* Get local copies for quicker access. */
1416 1414 frag_len = mir->mir_frag_len;
1417 1415 frag_header = mir->mir_frag_header;
1418 1416 head_mp = mir->mir_head_mp;
1419 1417 tail_mp = mir->mir_tail_mp;
1420 1418
1421 1419 /* Loop, processing each message block in the mp chain separately. */
1422 1420 do {
1423 1421 cont_mp = mp->b_cont;
1424 1422 mp->b_cont = NULL;
1425 1423
1426 1424 /*
1427 1425 * Drop zero-length mblks to prevent unbounded kernel memory
1428 1426 * consumption.
1429 1427 */
1430 1428 if (MBLKL(mp) == 0) {
1431 1429 freeb(mp);
1432 1430 continue;
1433 1431 }
1434 1432
1435 1433 /*
1436 1434 * If frag_len is negative, we're still in the process of
1437 1435 * building frag_header -- try to complete it with this mblk.
1438 1436 */
1439 1437 while (frag_len < 0 && mp->b_rptr < mp->b_wptr) {
1440 1438 frag_len++;
1441 1439 frag_header <<= 8;
1442 1440 frag_header += *mp->b_rptr++;
1443 1441 }
1444 1442
1445 1443 if (MBLKL(mp) == 0 && frag_len < 0) {
1446 1444 /*
1447 1445 * We consumed this mblk while trying to complete the
1448 1446 * fragment header. Free it and move on.
1449 1447 */
1450 1448 freeb(mp);
1451 1449 continue;
1452 1450 }
1453 1451
1454 1452 ASSERT(frag_len >= 0);
1455 1453
1456 1454 /*
1457 1455 * Now frag_header has the number of bytes in this fragment
1458 1456 * and we're just waiting to collect them all. Chain our
1459 1457 * latest mblk onto the list and see if we now have enough
1460 1458 * bytes to complete the fragment.
1461 1459 */
1462 1460 if (head_mp == NULL) {
1463 1461 ASSERT(tail_mp == NULL);
1464 1462 head_mp = tail_mp = mp;
1465 1463 } else {
1466 1464 tail_mp->b_cont = mp;
1467 1465 tail_mp = mp;
1468 1466 }
1469 1467
1470 1468 frag_len += MBLKL(mp);
1471 1469 excess = frag_len - (frag_header & ~MIR_LASTFRAG);
1472 1470 if (excess < 0) {
1473 1471 /*
1474 1472 * We still haven't received enough data to complete
1475 1473 * the fragment, so continue on to the next mblk.
1476 1474 */
1477 1475 continue;
1478 1476 }
1479 1477
1480 1478 /*
1481 1479 * We've got a complete fragment. If there are excess bytes,
1482 1480 * then they're part of the next fragment's header (of either
1483 1481 * this RPC message or the next RPC message). Split that part
1484 1482 * into its own mblk so that we can safely freeb() it when
1485 1483 * building frag_header above.
1486 1484 */
1487 1485 if (excess > 0) {
1488 1486 if ((mp1 = dupb(mp)) == NULL &&
1489 1487 (mp1 = copyb(mp)) == NULL) {
1490 1488 freemsg(head_mp);
1491 1489 freemsg(cont_mp);
1492 1490 RPCLOG0(1, "mir_rput: dupb/copyb failed\n");
1493 1491 mir->mir_frag_header = 0;
1494 1492 mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
1495 1493 mir->mir_head_mp = NULL;
1496 1494 mir->mir_tail_mp = NULL;
1497 1495 mir_disconnect(q, mir); /* drops mir_mutex */
1498 1496 return;
1499 1497 }
1500 1498
1501 1499 /*
1502 1500 * Relink the message chain so that the next mblk is
1503 1501 * the next fragment header, followed by the rest of
1504 1502 * the message chain.
1505 1503 */
1506 1504 mp1->b_cont = cont_mp;
1507 1505 cont_mp = mp1;
1508 1506
1509 1507 /*
1510 1508 * Data in the new mblk begins at the next fragment,
1511 1509 * and data in the old mblk ends at the next fragment.
1512 1510 */
1513 1511 mp1->b_rptr = mp1->b_wptr - excess;
1514 1512 mp->b_wptr -= excess;
1515 1513 }
1516 1514
1517 1515 /*
1518 1516 * Reset frag_len and frag_header for the next fragment.
1519 1517 */
1520 1518 frag_len = -(int32_t)sizeof (uint32_t);
1521 1519 if (!(frag_header & MIR_LASTFRAG)) {
1522 1520 /*
1523 1521 * The current fragment is complete, but more
1524 1522 * fragments need to be processed before we can
1525 1523 * pass along the RPC message headed at head_mp.
1526 1524 */
1527 1525 frag_header = 0;
1528 1526 continue;
1529 1527 }
1530 1528 frag_header = 0;
1531 1529
1532 1530 /*
1533 1531 * We've got a complete RPC message; pass it to the
1534 1532 * appropriate consumer.
1535 1533 */
1536 1534 switch (mir->mir_type) {
1537 1535 case RPC_CLIENT:
1538 1536 if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) {
1539 1537 /*
1540 1538 * Mark this stream as active. This marker
1541 1539 * is used in mir_timer().
1542 1540 */
1543 1541 mir->mir_clntreq = 1;
1544 1542 mir->mir_use_timestamp = ddi_get_lbolt();
1545 1543 } else {
1546 1544 freemsg(head_mp);
1547 1545 }
1548 1546 break;
1549 1547
1550 1548 case RPC_SERVER:
1551 1549 /*
1552 1550 * Check for flow control before passing the
1553 1551 * message to kRPC.
1554 1552 */
1555 1553 if (!mir->mir_hold_inbound) {
1556 1554 if (mir->mir_krpc_cell) {
1557 1555
1558 1556 if (mir_check_len(q, head_mp))
1559 1557 return;
1560 1558
1561 1559 if (q->q_first == NULL &&
1562 1560 svc_queuereq(q, head_mp, TRUE)) {
1563 1561 /*
1564 1562 * If the reference count is 0
1565 1563 * (not including this
1566 1564 * request), then the stream is
1567 1565 * transitioning from idle to
1568 1566 * non-idle. In this case, we
1569 1567 * cancel the idle timer.
1570 1568 */
1571 1569 if (mir->mir_ref_cnt++ == 0)
1572 1570 stop_timer = B_TRUE;
1573 1571 } else {
1574 1572 (void) putq(q, head_mp);
1575 1573 mir->mir_inrservice = B_TRUE;
1576 1574 }
1577 1575 } else {
1578 1576 /*
1579 1577 * Count # of times this happens. Should
1580 1578 * be never, but experience shows
1581 1579 * otherwise.
1582 1580 */
1583 1581 mir_krpc_cell_null++;
1584 1582 freemsg(head_mp);
1585 1583 }
1586 1584 } else {
1587 1585 /*
1588 1586 * If the outbound side of the stream is
1589 1587 * flow controlled, then hold this message
1590 1588 * until client catches up. mir_hold_inbound
1591 1589 * is set in mir_wput and cleared in mir_wsrv.
1592 1590 */
1593 1591 (void) putq(q, head_mp);
1594 1592 mir->mir_inrservice = B_TRUE;
1595 1593 }
1596 1594 break;
1597 1595 default:
1598 1596 RPCLOG(1, "mir_rput: unknown mir_type %d\n",
1599 1597 mir->mir_type);
1600 1598 freemsg(head_mp);
1601 1599 break;
1602 1600 }
1603 1601
1604 1602 /*
1605 1603 * Reset the chain since we're starting on a new RPC message.
1606 1604 */
1607 1605 head_mp = tail_mp = NULL;
1608 1606 } while ((mp = cont_mp) != NULL);
1609 1607
1610 1608 /*
1611 1609 * Sanity check the message length; if it's too large mir_check_len()
1612 1610 * will shutdown the connection, drop mir_mutex, and return non-zero.
1613 1611 */
1614 1612 if (head_mp != NULL && mir->mir_setup_complete &&
1615 1613 mir_check_len(q, head_mp))
1616 1614 return;
1617 1615
1618 1616 /* Save our local copies back in the mir structure. */
1619 1617 mir->mir_frag_header = frag_header;
1620 1618 mir->mir_frag_len = frag_len;
1621 1619 mir->mir_head_mp = head_mp;
1622 1620 mir->mir_tail_mp = tail_mp;
1623 1621
1624 1622 /*
1625 1623 * The timer is stopped after the whole message chain is processed.
1626 1624 * The reason is that stopping the timer releases the mir_mutex
1627 1625 * lock temporarily. This means that the request can be serviced
1628 1626 * while we are still processing the message chain. This is not
1629 1627 * good. So we stop the timer here instead.
1630 1628 *
1631 1629 * Note that if the timer fires before we stop it, it will not
1632 1630 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer()
1633 1631 * will just return.
1634 1632 */
1635 1633 if (stop_timer) {
1636 1634 RPCLOG(16, "mir_rput: stopping idle timer on 0x%p because "
1637 1635 "ref cnt going to non zero\n", (void *)WR(q));
1638 1636 mir_svc_idle_stop(WR(q), mir);
1639 1637 }
1640 1638 mutex_exit(&mir->mir_mutex);
1641 1639 }
1642 1640
1643 1641 static void
1644 1642 mir_rput_proto(queue_t *q, mblk_t *mp)
1645 1643 {
1646 1644 mir_t *mir = (mir_t *)q->q_ptr;
1647 1645 uint32_t type;
1648 1646 uint32_t reason = 0;
1649 1647
1650 1648 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1651 1649
1652 1650 type = ((union T_primitives *)mp->b_rptr)->type;
1653 1651 switch (mir->mir_type) {
1654 1652 case RPC_CLIENT:
1655 1653 switch (type) {
1656 1654 case T_DISCON_IND:
1657 1655 reason = ((struct T_discon_ind *)
1658 1656 (mp->b_rptr))->DISCON_reason;
1659 1657 /*FALLTHROUGH*/
1660 1658 case T_ORDREL_IND:
1661 1659 mutex_enter(&mir->mir_mutex);
1662 1660 if (mir->mir_head_mp) {
1663 1661 freemsg(mir->mir_head_mp);
1664 1662 mir->mir_head_mp = (mblk_t *)0;
1665 1663 mir->mir_tail_mp = (mblk_t *)0;
1666 1664 }
1667 1665 /*
1668 1666 * We are disconnecting, but not necessarily
1669 1667 * closing. By not closing, we will fail to
1670 1668 * pick up a possibly changed global timeout value,
1671 1669 * unless we store it now.
1672 1670 */
1673 1671 mir->mir_idle_timeout = clnt_idle_timeout;
1674 1672 mir_clnt_idle_stop(WR(q), mir);
1675 1673
1676 1674 /*
1677 1675 * Even though we are unconnected, we still
1678 1676 * leave the idle timer going on the client. The
1679 1677 * reason for is that if we've disconnected due
1680 1678 * to a server-side disconnect, reset, or connection
1681 1679 * timeout, there is a possibility the client may
1682 1680 * retry the RPC request. This retry needs to done on
1683 1681 * the same bound address for the server to interpret
1684 1682 * it as such. However, we don't want
1685 1683 * to wait forever for that possibility. If the
1686 1684 * end-point stays unconnected for mir_idle_timeout
1687 1685 * units of time, then that is a signal to the
1688 1686 * connection manager to give up waiting for the
1689 1687 * application (eg. NFS) to send a retry.
1690 1688 */
1691 1689 mir_clnt_idle_start(WR(q), mir);
1692 1690 mutex_exit(&mir->mir_mutex);
1693 1691 clnt_dispatch_notifyall(WR(q), type, reason);
1694 1692 freemsg(mp);
1695 1693 return;
1696 1694 case T_ERROR_ACK:
1697 1695 {
1698 1696 struct T_error_ack *terror;
1699 1697
1700 1698 terror = (struct T_error_ack *)mp->b_rptr;
1701 1699 RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p",
1702 1700 (void *)q);
1703 1701 RPCLOG(1, " ERROR_prim: %s,",
1704 1702 rpc_tpiprim2name(terror->ERROR_prim));
1705 1703 RPCLOG(1, " TLI_error: %s,",
1706 1704 rpc_tpierr2name(terror->TLI_error));
1707 1705 RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error);
1708 1706 if (terror->ERROR_prim == T_DISCON_REQ) {
1709 1707 clnt_dispatch_notifyall(WR(q), type, reason);
1710 1708 freemsg(mp);
1711 1709 return;
1712 1710 } else {
1713 1711 if (clnt_dispatch_notifyconn(WR(q), mp))
1714 1712 return;
1715 1713 }
1716 1714 break;
1717 1715 }
1718 1716 case T_OK_ACK:
1719 1717 {
1720 1718 struct T_ok_ack *tok = (struct T_ok_ack *)mp->b_rptr;
1721 1719
1722 1720 if (tok->CORRECT_prim == T_DISCON_REQ) {
1723 1721 clnt_dispatch_notifyall(WR(q), type, reason);
1724 1722 freemsg(mp);
1725 1723 return;
1726 1724 } else {
1727 1725 if (clnt_dispatch_notifyconn(WR(q), mp))
1728 1726 return;
1729 1727 }
1730 1728 break;
1731 1729 }
1732 1730 case T_CONN_CON:
1733 1731 case T_INFO_ACK:
1734 1732 case T_OPTMGMT_ACK:
1735 1733 if (clnt_dispatch_notifyconn(WR(q), mp))
1736 1734 return;
1737 1735 break;
1738 1736 case T_BIND_ACK:
1739 1737 break;
1740 1738 default:
1741 1739 RPCLOG(1, "mir_rput: unexpected message %d "
1742 1740 "for kRPC client\n",
1743 1741 ((union T_primitives *)mp->b_rptr)->type);
1744 1742 break;
1745 1743 }
1746 1744 break;
1747 1745
1748 1746 case RPC_SERVER:
1749 1747 switch (type) {
1750 1748 case T_BIND_ACK:
1751 1749 {
1752 1750 struct T_bind_ack *tbind;
1753 1751
1754 1752 /*
1755 1753 * If this is a listening stream, then shut
1756 1754 * off the idle timer.
1757 1755 */
1758 1756 tbind = (struct T_bind_ack *)mp->b_rptr;
1759 1757 if (tbind->CONIND_number > 0) {
1760 1758 mutex_enter(&mir->mir_mutex);
1761 1759 mir_svc_idle_stop(WR(q), mir);
1762 1760
1763 1761 /*
1764 1762 * mark this as a listen endpoint
1765 1763 * for special handling.
1766 1764 */
1767 1765
1768 1766 mir->mir_listen_stream = 1;
1769 1767 mutex_exit(&mir->mir_mutex);
1770 1768 }
1771 1769 break;
1772 1770 }
1773 1771 case T_DISCON_IND:
1774 1772 case T_ORDREL_IND:
1775 1773 RPCLOG(16, "mir_rput_proto: got %s indication\n",
1776 1774 type == T_DISCON_IND ? "disconnect"
1777 1775 : "orderly release");
1778 1776
1779 1777 /*
1780 1778 * For listen endpoint just pass
1781 1779 * on the message.
1782 1780 */
1783 1781
1784 1782 if (mir->mir_listen_stream)
1785 1783 break;
1786 1784
1787 1785 mutex_enter(&mir->mir_mutex);
1788 1786
1789 1787 /*
1790 1788 * If client wants to break off connection, record
1791 1789 * that fact.
1792 1790 */
1793 1791 mir_svc_start_close(WR(q), mir);
1794 1792
1795 1793 /*
1796 1794 * If we are idle, then send the orderly release
1797 1795 * or disconnect indication to nfsd.
1798 1796 */
1799 1797 if (MIR_SVC_QUIESCED(mir)) {
1800 1798 mutex_exit(&mir->mir_mutex);
1801 1799 break;
1802 1800 }
1803 1801
1804 1802 RPCLOG(16, "mir_rput_proto: not idle, so "
1805 1803 "disconnect/ord rel indication not passed "
1806 1804 "upstream on 0x%p\n", (void *)q);
1807 1805
1808 1806 /*
1809 1807 * Hold the indication until we get idle
1810 1808 * If there already is an indication stored,
1811 1809 * replace it if the new one is a disconnect. The
1812 1810 * reasoning is that disconnection takes less time
1813 1811 * to process, and once a client decides to
1814 1812 * disconnect, we should do that.
1815 1813 */
1816 1814 if (mir->mir_svc_pend_mp) {
1817 1815 if (type == T_DISCON_IND) {
1818 1816 RPCLOG(16, "mir_rput_proto: replacing"
1819 1817 " held disconnect/ord rel"
1820 1818 " indication with disconnect on"
1821 1819 " 0x%p\n", (void *)q);
1822 1820
1823 1821 freemsg(mir->mir_svc_pend_mp);
1824 1822 mir->mir_svc_pend_mp = mp;
1825 1823 } else {
1826 1824 RPCLOG(16, "mir_rput_proto: already "
1827 1825 "held a disconnect/ord rel "
1828 1826 "indication. freeing ord rel "
1829 1827 "ind on 0x%p\n", (void *)q);
1830 1828 freemsg(mp);
1831 1829 }
1832 1830 } else
1833 1831 mir->mir_svc_pend_mp = mp;
1834 1832
1835 1833 mutex_exit(&mir->mir_mutex);
1836 1834 return;
1837 1835
1838 1836 default:
1839 1837 /* nfsd handles server-side non-data messages. */
1840 1838 break;
1841 1839 }
1842 1840 break;
1843 1841
1844 1842 default:
1845 1843 break;
1846 1844 }
1847 1845
1848 1846 putnext(q, mp);
1849 1847 }
1850 1848
1851 1849 /*
1852 1850 * The server-side read queues are used to hold inbound messages while
1853 1851 * outbound flow control is exerted. When outbound flow control is
1854 1852 * relieved, mir_wsrv qenables the read-side queue. Read-side queues
1855 1853 * are not enabled by STREAMS and are explicitly noenable'ed in mir_open.
1856 1854 */
1857 1855 static void
1858 1856 mir_rsrv(queue_t *q)
1859 1857 {
1860 1858 mir_t *mir;
1861 1859 mblk_t *mp;
1862 1860 boolean_t stop_timer = B_FALSE;
1863 1861
1864 1862 mir = (mir_t *)q->q_ptr;
1865 1863 mutex_enter(&mir->mir_mutex);
1866 1864
1867 1865 mp = NULL;
1868 1866 switch (mir->mir_type) {
1869 1867 case RPC_SERVER:
1870 1868 if (mir->mir_ref_cnt == 0)
1871 1869 mir->mir_hold_inbound = 0;
1872 1870 if (mir->mir_hold_inbound)
1873 1871 break;
1874 1872
1875 1873 while (mp = getq(q)) {
1876 1874 if (mir->mir_krpc_cell &&
1877 1875 (mir->mir_svc_no_more_msgs == 0)) {
1878 1876
1879 1877 if (mir_check_len(q, mp))
1880 1878 return;
1881 1879
1882 1880 if (svc_queuereq(q, mp, TRUE)) {
1883 1881 /*
1884 1882 * If we were idle, turn off idle timer
1885 1883 * since we aren't idle any more.
1886 1884 */
1887 1885 if (mir->mir_ref_cnt++ == 0)
1888 1886 stop_timer = B_TRUE;
1889 1887 } else {
1890 1888 (void) putbq(q, mp);
1891 1889 break;
1892 1890 }
1893 1891 } else {
1894 1892 /*
1895 1893 * Count # of times this happens. Should be
1896 1894 * never, but experience shows otherwise.
1897 1895 */
1898 1896 if (mir->mir_krpc_cell == NULL)
1899 1897 mir_krpc_cell_null++;
1900 1898 freemsg(mp);
1901 1899 }
1902 1900 }
1903 1901 break;
1904 1902 case RPC_CLIENT:
1905 1903 break;
1906 1904 default:
1907 1905 RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type);
1908 1906
1909 1907 if (q->q_first == NULL)
1910 1908 MIR_CLEAR_INRSRV(mir);
1911 1909
1912 1910 mutex_exit(&mir->mir_mutex);
1913 1911
1914 1912 return;
1915 1913 }
1916 1914
1917 1915 /*
1918 1916 * The timer is stopped after all the messages are processed.
1919 1917 * The reason is that stopping the timer releases the mir_mutex
1920 1918 * lock temporarily. This means that the request can be serviced
1921 1919 * while we are still processing the message queue. This is not
1922 1920 * good. So we stop the timer here instead.
1923 1921 */
1924 1922 if (stop_timer) {
1925 1923 RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref "
1926 1924 "cnt going to non zero\n", (void *)WR(q));
1927 1925 mir_svc_idle_stop(WR(q), mir);
1928 1926 }
1929 1927
1930 1928 if (q->q_first == NULL) {
1931 1929 mblk_t *cmp = NULL;
1932 1930
1933 1931 MIR_CLEAR_INRSRV(mir);
1934 1932
1935 1933 if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) {
1936 1934 cmp = mir->mir_svc_pend_mp;
1937 1935 mir->mir_svc_pend_mp = NULL;
1938 1936 }
1939 1937
1940 1938 mutex_exit(&mir->mir_mutex);
1941 1939
1942 1940 if (cmp != NULL) {
1943 1941 RPCLOG(16, "mir_rsrv: line %d: sending a held "
1944 1942 "disconnect/ord rel indication upstream\n",
1945 1943 __LINE__);
1946 1944 putnext(q, cmp);
1947 1945 }
1948 1946
1949 1947 return;
1950 1948 }
1951 1949 mutex_exit(&mir->mir_mutex);
1952 1950 }
1953 1951
1954 1952 static int mir_svc_policy_fails;
1955 1953
1956 1954 /*
1957 1955 * Called to send an event code to nfsd/lockd so that it initiates
1958 1956 * connection close.
1959 1957 */
1960 1958 static int
1961 1959 mir_svc_policy_notify(queue_t *q, int event)
1962 1960 {
1963 1961 mblk_t *mp;
1964 1962 #ifdef DEBUG
1965 1963 mir_t *mir = (mir_t *)q->q_ptr;
1966 1964 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1967 1965 #endif
1968 1966 ASSERT(q->q_flag & QREADR);
1969 1967
1970 1968 /*
1971 1969 * Create an M_DATA message with the event code and pass it to the
1972 1970 * Stream head (nfsd or whoever created the stream will consume it).
1973 1971 */
1974 1972 mp = allocb(sizeof (int), BPRI_HI);
1975 1973
1976 1974 if (!mp) {
1977 1975
1978 1976 mir_svc_policy_fails++;
1979 1977 RPCLOG(16, "mir_svc_policy_notify: could not allocate event "
1980 1978 "%d\n", event);
1981 1979 return (ENOMEM);
1982 1980 }
1983 1981
1984 1982 U32_TO_BE32(event, mp->b_rptr);
1985 1983 mp->b_wptr = mp->b_rptr + sizeof (int);
1986 1984 putnext(q, mp);
1987 1985 return (0);
1988 1986 }
1989 1987
1990 1988 /*
1991 1989 * Server side: start the close phase. We want to get this rpcmod slot in an
1992 1990 * idle state before mir_close() is called.
1993 1991 */
1994 1992 static void
1995 1993 mir_svc_start_close(queue_t *wq, mir_t *mir)
1996 1994 {
1997 1995 ASSERT(MUTEX_HELD(&mir->mir_mutex));
1998 1996 ASSERT((wq->q_flag & QREADR) == 0);
1999 1997 ASSERT(mir->mir_type == RPC_SERVER);
2000 1998
2001 1999 /*
2002 2000 * Do not accept any more messages.
2003 2001 */
2004 2002 mir->mir_svc_no_more_msgs = 1;
2005 2003
2006 2004 /*
2007 2005 * Next two statements will make the read service procedure
2008 2006 * free everything stuck in the streams read queue.
2009 2007 * It's not necessary because enabling the write queue will
2010 2008 * have the same effect, but why not speed the process along?
2011 2009 */
|
↓ open down ↓ |
738 lines elided |
↑ open up ↑ |
2012 2010 mir->mir_hold_inbound = 0;
2013 2011 qenable(RD(wq));
2014 2012
2015 2013 /*
2016 2014 * Meanwhile force the write service procedure to send the
2017 2015 * responses downstream, regardless of flow control.
2018 2016 */
2019 2017 qenable(wq);
2020 2018 }
2021 2019
2020 +void
2021 +mir_svc_hold(queue_t *wq)
2022 +{
2023 + mir_t *mir = (mir_t *)wq->q_ptr;
2024 +
2025 + mutex_enter(&mir->mir_mutex);
2026 + mir->mir_ref_cnt++;
2027 + mutex_exit(&mir->mir_mutex);
2028 +}
2029 +
2022 2030 /*
2023 2031 * This routine is called directly by kRPC after a request is completed,
2024 2032 * whether a reply was sent or the request was dropped.
2025 2033 */
2026 -static void
2034 +void
2027 2035 mir_svc_release(queue_t *wq, mblk_t *mp, bool_t enable)
2028 2036 {
2029 2037 mir_t *mir = (mir_t *)wq->q_ptr;
2030 2038 mblk_t *cmp = NULL;
2031 2039
2032 2040 ASSERT((wq->q_flag & QREADR) == 0);
2033 2041 if (mp)
2034 2042 freemsg(mp);
2035 2043
2036 2044 if (enable)
2037 2045 qenable(RD(wq));
2038 2046
2039 2047 mutex_enter(&mir->mir_mutex);
2040 2048
2041 2049 /*
2042 2050 * Start idle processing if this is the last reference.
2043 2051 */
2044 2052 if ((mir->mir_ref_cnt == 1) && (mir->mir_inrservice == 0)) {
2045 2053 cmp = mir->mir_svc_pend_mp;
2046 2054 mir->mir_svc_pend_mp = NULL;
2047 2055 }
2048 2056
2049 2057 if (cmp) {
2050 2058 RPCLOG(16, "mir_svc_release: sending a held "
2051 2059 "disconnect/ord rel indication upstream on queue 0x%p\n",
2052 2060 (void *)RD(wq));
2053 2061
2054 2062 mutex_exit(&mir->mir_mutex);
2055 2063
2056 2064 putnext(RD(wq), cmp);
2057 2065
2058 2066 mutex_enter(&mir->mir_mutex);
2059 2067 }
2060 2068
2061 2069 /*
2062 2070 * Start idle processing if this is the last reference.
2063 2071 */
2064 2072 if (mir->mir_ref_cnt == 1 && mir->mir_inrservice == 0) {
2065 2073
2066 2074 RPCLOG(16, "mir_svc_release starting idle timer on 0x%p "
2067 2075 "because ref cnt is zero\n", (void *) wq);
2068 2076
2069 2077 mir_svc_idle_start(wq, mir);
2070 2078 }
2071 2079
2072 2080 mir->mir_ref_cnt--;
2073 2081 ASSERT(mir->mir_ref_cnt >= 0);
2074 2082
2075 2083 /*
2076 2084 * Wake up the thread waiting to close.
2077 2085 */
2078 2086
2079 2087 if ((mir->mir_ref_cnt == 0) && mir->mir_closing)
2080 2088 cv_signal(&mir->mir_condvar);
2081 2089
2082 2090 mutex_exit(&mir->mir_mutex);
2083 2091 }
2084 2092
2085 2093 /*
2086 2094 * This routine is called by server-side kRPC when it is ready to
2087 2095 * handle inbound messages on the stream.
2088 2096 */
2089 2097 static void
2090 2098 mir_svc_start(queue_t *wq)
2091 2099 {
2092 2100 mir_t *mir = (mir_t *)wq->q_ptr;
2093 2101
2094 2102 /*
2095 2103 * no longer need to take the mir_mutex because the
2096 2104 * mir_setup_complete field has been moved out of
2097 2105 * the binary field protected by the mir_mutex.
2098 2106 */
2099 2107
2100 2108 mir->mir_setup_complete = 1;
2101 2109 qenable(RD(wq));
2102 2110 }
2103 2111
2104 2112 /*
2105 2113 * client side wrapper for stopping timer with normal idle timeout.
2106 2114 */
2107 2115 static void
2108 2116 mir_clnt_idle_stop(queue_t *wq, mir_t *mir)
2109 2117 {
2110 2118 ASSERT(MUTEX_HELD(&mir->mir_mutex));
2111 2119 ASSERT((wq->q_flag & QREADR) == 0);
2112 2120 ASSERT(mir->mir_type == RPC_CLIENT);
2113 2121
2114 2122 mir_timer_stop(mir);
2115 2123 }
2116 2124
2117 2125 /*
2118 2126 * client side wrapper for stopping timer with normal idle timeout.
2119 2127 */
2120 2128 static void
2121 2129 mir_clnt_idle_start(queue_t *wq, mir_t *mir)
2122 2130 {
2123 2131 ASSERT(MUTEX_HELD(&mir->mir_mutex));
2124 2132 ASSERT((wq->q_flag & QREADR) == 0);
2125 2133 ASSERT(mir->mir_type == RPC_CLIENT);
2126 2134
2127 2135 mir_timer_start(wq, mir, mir->mir_idle_timeout);
2128 2136 }
2129 2137
2130 2138 /*
2131 2139 * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on
2132 2140 * end-points that aren't connected.
2133 2141 */
2134 2142 static void
2135 2143 mir_clnt_idle_do_stop(queue_t *wq)
2136 2144 {
2137 2145 mir_t *mir = (mir_t *)wq->q_ptr;
2138 2146
2139 2147 RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq);
2140 2148 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2141 2149 mutex_enter(&mir->mir_mutex);
2142 2150 mir_clnt_idle_stop(wq, mir);
2143 2151 mutex_exit(&mir->mir_mutex);
2144 2152 }
2145 2153
2146 2154 /*
2147 2155 * Timer handler. It handles idle timeout and memory shortage problem.
2148 2156 */
2149 2157 static void
2150 2158 mir_timer(void *arg)
2151 2159 {
2152 2160 queue_t *wq = (queue_t *)arg;
2153 2161 mir_t *mir = (mir_t *)wq->q_ptr;
2154 2162 boolean_t notify;
2155 2163 clock_t now;
2156 2164
2157 2165 mutex_enter(&mir->mir_mutex);
2158 2166
2159 2167 /*
2160 2168 * mir_timer_call is set only when either mir_timer_[start|stop]
2161 2169 * is progressing. And mir_timer() can only be run while they
2162 2170 * are progressing if the timer is being stopped. So just
2163 2171 * return.
2164 2172 */
2165 2173 if (mir->mir_timer_call) {
2166 2174 mutex_exit(&mir->mir_mutex);
2167 2175 return;
2168 2176 }
2169 2177 mir->mir_timer_id = 0;
2170 2178
2171 2179 switch (mir->mir_type) {
2172 2180 case RPC_CLIENT:
2173 2181
2174 2182 /*
2175 2183 * For clients, the timer fires at clnt_idle_timeout
2176 2184 * intervals. If the activity marker (mir_clntreq) is
2177 2185 * zero, then the stream has been idle since the last
2178 2186 * timer event and we notify kRPC. If mir_clntreq is
2179 2187 * non-zero, then the stream is active and we just
2180 2188 * restart the timer for another interval. mir_clntreq
2181 2189 * is set to 1 in mir_wput for every request passed
2182 2190 * downstream.
2183 2191 *
2184 2192 * If this was a memory shortage timer reset the idle
2185 2193 * timeout regardless; the mir_clntreq will not be a
2186 2194 * valid indicator.
2187 2195 *
2188 2196 * The timer is initially started in mir_wput during
2189 2197 * RPC_CLIENT ioctl processing.
2190 2198 *
2191 2199 * The timer interval can be changed for individual
2192 2200 * streams with the ND variable "mir_idle_timeout".
2193 2201 */
2194 2202 now = ddi_get_lbolt();
2195 2203 if (mir->mir_clntreq > 0 && mir->mir_use_timestamp +
2196 2204 MSEC_TO_TICK(mir->mir_idle_timeout) - now >= 0) {
2197 2205 clock_t tout;
2198 2206
2199 2207 tout = mir->mir_idle_timeout -
2200 2208 TICK_TO_MSEC(now - mir->mir_use_timestamp);
2201 2209 if (tout < 0)
2202 2210 tout = 1000;
2203 2211 #if 0
2204 2212 printf("mir_timer[%d < %d + %d]: reset client timer "
2205 2213 "to %d (ms)\n", TICK_TO_MSEC(now),
2206 2214 TICK_TO_MSEC(mir->mir_use_timestamp),
2207 2215 mir->mir_idle_timeout, tout);
2208 2216 #endif
2209 2217 mir->mir_clntreq = 0;
2210 2218 mir_timer_start(wq, mir, tout);
2211 2219 mutex_exit(&mir->mir_mutex);
2212 2220 return;
2213 2221 }
2214 2222 #if 0
2215 2223 printf("mir_timer[%d]: doing client timeout\n", now / hz);
2216 2224 #endif
2217 2225 /*
2218 2226 * We are disconnecting, but not necessarily
2219 2227 * closing. By not closing, we will fail to
2220 2228 * pick up a possibly changed global timeout value,
2221 2229 * unless we store it now.
2222 2230 */
2223 2231 mir->mir_idle_timeout = clnt_idle_timeout;
2224 2232 mir_clnt_idle_start(wq, mir);
2225 2233
2226 2234 mutex_exit(&mir->mir_mutex);
2227 2235 /*
2228 2236 * We pass T_ORDREL_REQ as an integer value
2229 2237 * to kRPC as the indication that the stream
2230 2238 * is idle. This is not a T_ORDREL_REQ message,
2231 2239 * it is just a convenient value since we call
2232 2240 * the same kRPC routine for T_ORDREL_INDs and
2233 2241 * T_DISCON_INDs.
2234 2242 */
2235 2243 clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0);
2236 2244 return;
2237 2245
2238 2246 case RPC_SERVER:
2239 2247
2240 2248 /*
2241 2249 * For servers, the timer is only running when the stream
2242 2250 * is really idle or memory is short. The timer is started
2243 2251 * by mir_wput when mir_type is set to RPC_SERVER and
2244 2252 * by mir_svc_idle_start whenever the stream goes idle
2245 2253 * (mir_ref_cnt == 0). The timer is cancelled in
2246 2254 * mir_rput whenever a new inbound request is passed to kRPC
2247 2255 * and the stream was previously idle.
2248 2256 *
2249 2257 * The timer interval can be changed for individual
2250 2258 * streams with the ND variable "mir_idle_timeout".
2251 2259 *
2252 2260 * If the stream is not idle do nothing.
2253 2261 */
2254 2262 if (!MIR_SVC_QUIESCED(mir)) {
2255 2263 mutex_exit(&mir->mir_mutex);
2256 2264 return;
2257 2265 }
2258 2266
2259 2267 notify = !mir->mir_inrservice;
2260 2268 mutex_exit(&mir->mir_mutex);
2261 2269
2262 2270 /*
2263 2271 * If there is no packet queued up in read queue, the stream
2264 2272 * is really idle so notify nfsd to close it.
2265 2273 */
2266 2274 if (notify) {
2267 2275 RPCLOG(16, "mir_timer: telling stream head listener "
2268 2276 "to close stream (0x%p)\n", (void *) RD(wq));
2269 2277 (void) mir_svc_policy_notify(RD(wq), 1);
2270 2278 }
2271 2279 return;
2272 2280 default:
2273 2281 RPCLOG(1, "mir_timer: unexpected mir_type %d\n",
2274 2282 mir->mir_type);
2275 2283 mutex_exit(&mir->mir_mutex);
2276 2284 return;
2277 2285 }
2278 2286 }
2279 2287
2280 2288 /*
2281 2289 * Called by the RPC package to send either a call or a return, or a
2282 2290 * transport connection request. Adds the record marking header.
2283 2291 */
2284 2292 static void
2285 2293 mir_wput(queue_t *q, mblk_t *mp)
2286 2294 {
2287 2295 uint_t frag_header;
2288 2296 mir_t *mir = (mir_t *)q->q_ptr;
2289 2297 uchar_t *rptr = mp->b_rptr;
2290 2298
2291 2299 if (!mir) {
2292 2300 freemsg(mp);
2293 2301 return;
2294 2302 }
2295 2303
2296 2304 if (mp->b_datap->db_type != M_DATA) {
2297 2305 mir_wput_other(q, mp);
2298 2306 return;
2299 2307 }
2300 2308
2301 2309 if (mir->mir_ordrel_pending == 1) {
2302 2310 freemsg(mp);
2303 2311 RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n",
2304 2312 (void *)q);
2305 2313 return;
2306 2314 }
2307 2315
2308 2316 frag_header = (uint_t)DLEN(mp);
2309 2317 frag_header |= MIR_LASTFRAG;
2310 2318
2311 2319 /* Stick in the 4 byte record marking header. */
2312 2320 if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) ||
2313 2321 !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
2314 2322 /*
2315 2323 * Since we know that M_DATA messages are created exclusively
2316 2324 * by kRPC, we expect that kRPC will leave room for our header
2317 2325 * and 4 byte align which is normal for XDR.
2318 2326 * If kRPC (or someone else) does not cooperate, then we
2319 2327 * just throw away the message.
2320 2328 */
2321 2329 RPCLOG(1, "mir_wput: kRPC did not leave space for record "
2322 2330 "fragment header (%d bytes left)\n",
2323 2331 (int)(rptr - mp->b_datap->db_base));
2324 2332 freemsg(mp);
2325 2333 return;
2326 2334 }
2327 2335 rptr -= sizeof (uint32_t);
2328 2336 *(uint32_t *)rptr = htonl(frag_header);
2329 2337 mp->b_rptr = rptr;
2330 2338
2331 2339 mutex_enter(&mir->mir_mutex);
2332 2340 if (mir->mir_type == RPC_CLIENT) {
2333 2341 /*
2334 2342 * For the client, set mir_clntreq to indicate that the
2335 2343 * connection is active.
2336 2344 */
2337 2345 mir->mir_clntreq = 1;
2338 2346 mir->mir_use_timestamp = ddi_get_lbolt();
2339 2347 }
2340 2348
2341 2349 /*
2342 2350 * If we haven't already queued some data and the downstream module
2343 2351 * can accept more data, send it on, otherwise we queue the message
2344 2352 * and take other actions depending on mir_type.
2345 2353 */
2346 2354 if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) {
2347 2355 mutex_exit(&mir->mir_mutex);
2348 2356
2349 2357 /*
2350 2358 * Now we pass the RPC message downstream.
2351 2359 */
2352 2360 putnext(q, mp);
2353 2361 return;
2354 2362 }
2355 2363
2356 2364 switch (mir->mir_type) {
2357 2365 case RPC_CLIENT:
2358 2366 /*
2359 2367 * Check for a previous duplicate request on the
2360 2368 * queue. If there is one, then we throw away
2361 2369 * the current message and let the previous one
2362 2370 * go through. If we can't find a duplicate, then
2363 2371 * send this one. This tap dance is an effort
2364 2372 * to reduce traffic and processing requirements
2365 2373 * under load conditions.
2366 2374 */
2367 2375 if (mir_clnt_dup_request(q, mp)) {
2368 2376 mutex_exit(&mir->mir_mutex);
2369 2377 freemsg(mp);
2370 2378 return;
2371 2379 }
2372 2380 break;
2373 2381 case RPC_SERVER:
2374 2382 /*
2375 2383 * Set mir_hold_inbound so that new inbound RPC
2376 2384 * messages will be held until the client catches
2377 2385 * up on the earlier replies. This flag is cleared
2378 2386 * in mir_wsrv after flow control is relieved;
2379 2387 * the read-side queue is also enabled at that time.
2380 2388 */
2381 2389 mir->mir_hold_inbound = 1;
2382 2390 break;
2383 2391 default:
2384 2392 RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type);
2385 2393 break;
2386 2394 }
2387 2395 mir->mir_inwservice = 1;
2388 2396 (void) putq(q, mp);
2389 2397 mutex_exit(&mir->mir_mutex);
2390 2398 }
2391 2399
2392 2400 static void
2393 2401 mir_wput_other(queue_t *q, mblk_t *mp)
2394 2402 {
2395 2403 mir_t *mir = (mir_t *)q->q_ptr;
2396 2404 struct iocblk *iocp;
2397 2405 uchar_t *rptr = mp->b_rptr;
2398 2406 bool_t flush_in_svc = FALSE;
2399 2407
2400 2408 ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2401 2409 switch (mp->b_datap->db_type) {
2402 2410 case M_IOCTL:
2403 2411 iocp = (struct iocblk *)rptr;
2404 2412 switch (iocp->ioc_cmd) {
2405 2413 case RPC_CLIENT:
2406 2414 mutex_enter(&mir->mir_mutex);
2407 2415 if (mir->mir_type != 0 &&
2408 2416 mir->mir_type != iocp->ioc_cmd) {
2409 2417 ioc_eperm:
2410 2418 mutex_exit(&mir->mir_mutex);
2411 2419 iocp->ioc_error = EPERM;
2412 2420 iocp->ioc_count = 0;
2413 2421 mp->b_datap->db_type = M_IOCACK;
2414 2422 qreply(q, mp);
2415 2423 return;
2416 2424 }
2417 2425
2418 2426 mir->mir_type = iocp->ioc_cmd;
2419 2427
2420 2428 /*
2421 2429 * Clear mir_hold_inbound which was set to 1 by
2422 2430 * mir_open. This flag is not used on client
2423 2431 * streams.
2424 2432 */
2425 2433 mir->mir_hold_inbound = 0;
2426 2434 mir->mir_max_msg_sizep = &clnt_max_msg_size;
2427 2435
2428 2436 /*
2429 2437 * Start the idle timer. See mir_timer() for more
2430 2438 * information on how client timers work.
2431 2439 */
2432 2440 mir->mir_idle_timeout = clnt_idle_timeout;
2433 2441 mir_clnt_idle_start(q, mir);
2434 2442 mutex_exit(&mir->mir_mutex);
2435 2443
2436 2444 mp->b_datap->db_type = M_IOCACK;
2437 2445 qreply(q, mp);
2438 2446 return;
2439 2447 case RPC_SERVER:
2440 2448 mutex_enter(&mir->mir_mutex);
2441 2449 if (mir->mir_type != 0 &&
2442 2450 mir->mir_type != iocp->ioc_cmd)
2443 2451 goto ioc_eperm;
2444 2452
2445 2453 /*
2446 2454 * We don't clear mir_hold_inbound here because
2447 2455 * mir_hold_inbound is used in the flow control
2448 2456 * model. If we cleared it here, then we'd commit
2449 2457 * a small violation to the model where the transport
2450 2458 * might immediately block downstream flow.
2451 2459 */
2452 2460
2453 2461 mir->mir_type = iocp->ioc_cmd;
2454 2462 mir->mir_max_msg_sizep = &svc_max_msg_size;
2455 2463
2456 2464 /*
2457 2465 * Start the idle timer. See mir_timer() for more
2458 2466 * information on how server timers work.
2459 2467 *
2460 2468 * Note that it is important to start the idle timer
2461 2469 * here so that connections time out even if we
2462 2470 * never receive any data on them.
2463 2471 */
2464 2472 mir->mir_idle_timeout = svc_idle_timeout;
2465 2473 RPCLOG(16, "mir_wput_other starting idle timer on 0x%p "
2466 2474 "because we got RPC_SERVER ioctl\n", (void *)q);
2467 2475 mir_svc_idle_start(q, mir);
2468 2476 mutex_exit(&mir->mir_mutex);
2469 2477
2470 2478 mp->b_datap->db_type = M_IOCACK;
2471 2479 qreply(q, mp);
2472 2480 return;
2473 2481 default:
2474 2482 break;
2475 2483 }
2476 2484 break;
2477 2485
2478 2486 case M_PROTO:
2479 2487 if (mir->mir_type == RPC_CLIENT) {
2480 2488 /*
2481 2489 * We are likely being called from the context of a
2482 2490 * service procedure. So we need to enqueue. However
2483 2491 * enqueing may put our message behind data messages.
2484 2492 * So flush the data first.
2485 2493 */
2486 2494 flush_in_svc = TRUE;
2487 2495 }
2488 2496 if ((mp->b_wptr - rptr) < sizeof (uint32_t) ||
2489 2497 !IS_P2ALIGNED(rptr, sizeof (uint32_t)))
2490 2498 break;
2491 2499
2492 2500 switch (((union T_primitives *)rptr)->type) {
2493 2501 case T_DATA_REQ:
2494 2502 /* Don't pass T_DATA_REQ messages downstream. */
2495 2503 freemsg(mp);
2496 2504 return;
2497 2505 case T_ORDREL_REQ:
2498 2506 RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n",
2499 2507 (void *)q);
2500 2508 mutex_enter(&mir->mir_mutex);
2501 2509 if (mir->mir_type != RPC_SERVER) {
2502 2510 /*
2503 2511 * We are likely being called from
2504 2512 * clnt_dispatch_notifyall(). Sending
2505 2513 * a T_ORDREL_REQ will result in
2506 2514 * a some kind of _IND message being sent,
2507 2515 * will be another call to
2508 2516 * clnt_dispatch_notifyall(). To keep the stack
2509 2517 * lean, queue this message.
2510 2518 */
2511 2519 mir->mir_inwservice = 1;
2512 2520 (void) putq(q, mp);
2513 2521 mutex_exit(&mir->mir_mutex);
2514 2522 return;
2515 2523 }
2516 2524
2517 2525 /*
2518 2526 * Mark the structure such that we don't accept any
2519 2527 * more requests from client. We could defer this
2520 2528 * until we actually send the orderly release
2521 2529 * request downstream, but all that does is delay
2522 2530 * the closing of this stream.
2523 2531 */
2524 2532 RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ "
2525 2533 " so calling mir_svc_start_close\n", (void *)q);
2526 2534
2527 2535 mir_svc_start_close(q, mir);
2528 2536
2529 2537 /*
2530 2538 * If we have sent down a T_ORDREL_REQ, don't send
2531 2539 * any more.
2532 2540 */
2533 2541 if (mir->mir_ordrel_pending) {
2534 2542 freemsg(mp);
2535 2543 mutex_exit(&mir->mir_mutex);
2536 2544 return;
2537 2545 }
2538 2546
2539 2547 /*
2540 2548 * If the stream is not idle, then we hold the
2541 2549 * orderly release until it becomes idle. This
2542 2550 * ensures that kRPC will be able to reply to
2543 2551 * all requests that we have passed to it.
2544 2552 *
2545 2553 * We also queue the request if there is data already
2546 2554 * queued, because we cannot allow the T_ORDREL_REQ
2547 2555 * to go before data. When we had a separate reply
2548 2556 * count, this was not a problem, because the
2549 2557 * reply count was reconciled when mir_wsrv()
2550 2558 * completed.
2551 2559 */
2552 2560 if (!MIR_SVC_QUIESCED(mir) ||
2553 2561 mir->mir_inwservice == 1) {
2554 2562 mir->mir_inwservice = 1;
2555 2563 (void) putq(q, mp);
2556 2564
2557 2565 RPCLOG(16, "mir_wput_other: queuing "
2558 2566 "T_ORDREL_REQ on 0x%p\n", (void *)q);
2559 2567
2560 2568 mutex_exit(&mir->mir_mutex);
2561 2569 return;
2562 2570 }
2563 2571
2564 2572 /*
2565 2573 * Mark the structure so that we know we sent
2566 2574 * an orderly release request, and reset the idle timer.
2567 2575 */
2568 2576 mir->mir_ordrel_pending = 1;
2569 2577
2570 2578 RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start"
2571 2579 " on 0x%p because we got T_ORDREL_REQ\n",
2572 2580 (void *)q);
2573 2581
2574 2582 mir_svc_idle_start(q, mir);
2575 2583 mutex_exit(&mir->mir_mutex);
2576 2584
2577 2585 /*
2578 2586 * When we break, we will putnext the T_ORDREL_REQ.
2579 2587 */
2580 2588 break;
2581 2589
2582 2590 case T_CONN_REQ:
2583 2591 mutex_enter(&mir->mir_mutex);
2584 2592 if (mir->mir_head_mp != NULL) {
2585 2593 freemsg(mir->mir_head_mp);
2586 2594 mir->mir_head_mp = NULL;
2587 2595 mir->mir_tail_mp = NULL;
2588 2596 }
2589 2597 mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
2590 2598 /*
2591 2599 * Restart timer in case mir_clnt_idle_do_stop() was
2592 2600 * called.
2593 2601 */
2594 2602 mir->mir_idle_timeout = clnt_idle_timeout;
2595 2603 mir_clnt_idle_stop(q, mir);
2596 2604 mir_clnt_idle_start(q, mir);
2597 2605 mutex_exit(&mir->mir_mutex);
2598 2606 break;
2599 2607
2600 2608 default:
2601 2609 /*
2602 2610 * T_DISCON_REQ is one of the interesting default
2603 2611 * cases here. Ideally, an M_FLUSH is done before
2604 2612 * T_DISCON_REQ is done. However, that is somewhat
2605 2613 * cumbersome for clnt_cots.c to do. So we queue
2606 2614 * T_DISCON_REQ, and let the service procedure
2607 2615 * flush all M_DATA.
2608 2616 */
2609 2617 break;
2610 2618 }
2611 2619 /* fallthru */;
2612 2620 default:
2613 2621 if (mp->b_datap->db_type >= QPCTL) {
2614 2622 if (mp->b_datap->db_type == M_FLUSH) {
2615 2623 if (mir->mir_type == RPC_CLIENT &&
2616 2624 *mp->b_rptr & FLUSHW) {
2617 2625 RPCLOG(32, "mir_wput_other: flushing "
2618 2626 "wq 0x%p\n", (void *)q);
2619 2627 if (*mp->b_rptr & FLUSHBAND) {
2620 2628 flushband(q, *(mp->b_rptr + 1),
2621 2629 FLUSHDATA);
2622 2630 } else {
2623 2631 flushq(q, FLUSHDATA);
2624 2632 }
2625 2633 } else {
2626 2634 RPCLOG(32, "mir_wput_other: ignoring "
2627 2635 "M_FLUSH on wq 0x%p\n", (void *)q);
2628 2636 }
2629 2637 }
2630 2638 break;
2631 2639 }
2632 2640
2633 2641 mutex_enter(&mir->mir_mutex);
2634 2642 if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) {
2635 2643 mutex_exit(&mir->mir_mutex);
2636 2644 break;
2637 2645 }
2638 2646 mir->mir_inwservice = 1;
2639 2647 mir->mir_inwflushdata = flush_in_svc;
2640 2648 (void) putq(q, mp);
2641 2649 mutex_exit(&mir->mir_mutex);
2642 2650 qenable(q);
2643 2651
2644 2652 return;
2645 2653 }
2646 2654 putnext(q, mp);
2647 2655 }
2648 2656
2649 2657 static void
2650 2658 mir_wsrv(queue_t *q)
2651 2659 {
2652 2660 mblk_t *mp;
2653 2661 mir_t *mir;
2654 2662 bool_t flushdata;
2655 2663
2656 2664 mir = (mir_t *)q->q_ptr;
2657 2665 mutex_enter(&mir->mir_mutex);
2658 2666
2659 2667 flushdata = mir->mir_inwflushdata;
2660 2668 mir->mir_inwflushdata = 0;
2661 2669
2662 2670 while (mp = getq(q)) {
2663 2671 if (mp->b_datap->db_type == M_DATA) {
2664 2672 /*
2665 2673 * Do not send any more data if we have sent
2666 2674 * a T_ORDREL_REQ.
2667 2675 */
2668 2676 if (flushdata || mir->mir_ordrel_pending == 1) {
2669 2677 freemsg(mp);
2670 2678 continue;
2671 2679 }
2672 2680
2673 2681 /*
2674 2682 * Make sure that the stream can really handle more
2675 2683 * data.
2676 2684 */
2677 2685 if (!MIR_WCANPUTNEXT(mir, q)) {
2678 2686 (void) putbq(q, mp);
2679 2687 mutex_exit(&mir->mir_mutex);
2680 2688 return;
2681 2689 }
2682 2690
2683 2691 /*
2684 2692 * Now we pass the RPC message downstream.
2685 2693 */
2686 2694 mutex_exit(&mir->mir_mutex);
2687 2695 putnext(q, mp);
2688 2696 mutex_enter(&mir->mir_mutex);
2689 2697 continue;
2690 2698 }
2691 2699
2692 2700 /*
2693 2701 * This is not an RPC message, pass it downstream
2694 2702 * (ignoring flow control) if the server side is not sending a
2695 2703 * T_ORDREL_REQ downstream.
2696 2704 */
2697 2705 if (mir->mir_type != RPC_SERVER ||
2698 2706 ((union T_primitives *)mp->b_rptr)->type !=
2699 2707 T_ORDREL_REQ) {
2700 2708 mutex_exit(&mir->mir_mutex);
2701 2709 putnext(q, mp);
2702 2710 mutex_enter(&mir->mir_mutex);
2703 2711 continue;
2704 2712 }
2705 2713
2706 2714 if (mir->mir_ordrel_pending == 1) {
2707 2715 /*
2708 2716 * Don't send two T_ORDRELs
2709 2717 */
2710 2718 freemsg(mp);
2711 2719 continue;
2712 2720 }
2713 2721
2714 2722 /*
2715 2723 * Mark the structure so that we know we sent an orderly
2716 2724 * release request. We will check to see slot is idle at the
2717 2725 * end of this routine, and if so, reset the idle timer to
2718 2726 * handle orderly release timeouts.
2719 2727 */
2720 2728 mir->mir_ordrel_pending = 1;
2721 2729 RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n",
2722 2730 (void *)q);
2723 2731 /*
2724 2732 * Send the orderly release downstream. If there are other
2725 2733 * pending replies we won't be able to send them. However,
2726 2734 * the only reason we should send the orderly release is if
2727 2735 * we were idle, or if an unusual event occurred.
2728 2736 */
2729 2737 mutex_exit(&mir->mir_mutex);
2730 2738 putnext(q, mp);
2731 2739 mutex_enter(&mir->mir_mutex);
2732 2740 }
2733 2741
2734 2742 if (q->q_first == NULL)
2735 2743 /*
2736 2744 * If we call mir_svc_idle_start() below, then
2737 2745 * clearing mir_inwservice here will also result in
2738 2746 * any thread waiting in mir_close() to be signaled.
2739 2747 */
2740 2748 mir->mir_inwservice = 0;
2741 2749
2742 2750 if (mir->mir_type != RPC_SERVER) {
2743 2751 mutex_exit(&mir->mir_mutex);
2744 2752 return;
2745 2753 }
2746 2754
2747 2755 /*
2748 2756 * If idle we call mir_svc_idle_start to start the timer (or wakeup
2749 2757 * a close). Also make sure not to start the idle timer on the
2750 2758 * listener stream. This can cause nfsd to send an orderly release
2751 2759 * command on the listener stream.
2752 2760 */
2753 2761 if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) {
2754 2762 RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p "
2755 2763 "because mir slot is idle\n", (void *)q);
2756 2764 mir_svc_idle_start(q, mir);
2757 2765 }
2758 2766
2759 2767 /*
2760 2768 * If outbound flow control has been relieved, then allow new
2761 2769 * inbound requests to be processed.
2762 2770 */
2763 2771 if (mir->mir_hold_inbound) {
2764 2772 mir->mir_hold_inbound = 0;
2765 2773 qenable(RD(q));
2766 2774 }
2767 2775 mutex_exit(&mir->mir_mutex);
2768 2776 }
2769 2777
2770 2778 static void
2771 2779 mir_disconnect(queue_t *q, mir_t *mir)
2772 2780 {
2773 2781 ASSERT(MUTEX_HELD(&mir->mir_mutex));
2774 2782
2775 2783 switch (mir->mir_type) {
2776 2784 case RPC_CLIENT:
2777 2785 /*
2778 2786 * We are disconnecting, but not necessarily
2779 2787 * closing. By not closing, we will fail to
2780 2788 * pick up a possibly changed global timeout value,
2781 2789 * unless we store it now.
2782 2790 */
2783 2791 mir->mir_idle_timeout = clnt_idle_timeout;
2784 2792 mir_clnt_idle_start(WR(q), mir);
2785 2793 mutex_exit(&mir->mir_mutex);
2786 2794
2787 2795 /*
2788 2796 * T_DISCON_REQ is passed to kRPC as an integer value
2789 2797 * (this is not a TPI message). It is used as a
2790 2798 * convenient value to indicate a sanity check
2791 2799 * failure -- the same kRPC routine is also called
2792 2800 * for T_DISCON_INDs and T_ORDREL_INDs.
2793 2801 */
2794 2802 clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0);
2795 2803 break;
2796 2804
2797 2805 case RPC_SERVER:
2798 2806 mir->mir_svc_no_more_msgs = 1;
2799 2807 mir_svc_idle_stop(WR(q), mir);
2800 2808 mutex_exit(&mir->mir_mutex);
2801 2809 RPCLOG(16, "mir_disconnect: telling "
2802 2810 "stream head listener to disconnect stream "
2803 2811 "(0x%p)\n", (void *) q);
2804 2812 (void) mir_svc_policy_notify(q, 2);
2805 2813 break;
2806 2814
2807 2815 default:
2808 2816 mutex_exit(&mir->mir_mutex);
2809 2817 break;
2810 2818 }
2811 2819 }
2812 2820
2813 2821 /*
2814 2822 * Sanity check the message length, and if it's too large, shutdown the
2815 2823 * connection. Returns 1 if the connection is shutdown; 0 otherwise.
2816 2824 */
2817 2825 static int
2818 2826 mir_check_len(queue_t *q, mblk_t *head_mp)
2819 2827 {
2820 2828 mir_t *mir = q->q_ptr;
2821 2829 uint_t maxsize = 0;
2822 2830 size_t msg_len = msgdsize(head_mp);
2823 2831
2824 2832 if (mir->mir_max_msg_sizep != NULL)
2825 2833 maxsize = *mir->mir_max_msg_sizep;
2826 2834
2827 2835 if (maxsize == 0 || msg_len <= maxsize)
2828 2836 return (0);
2829 2837
2830 2838 freemsg(head_mp);
2831 2839 mir->mir_head_mp = NULL;
2832 2840 mir->mir_tail_mp = NULL;
2833 2841 mir->mir_frag_header = 0;
2834 2842 mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
2835 2843 if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) {
2836 2844 cmn_err(CE_NOTE,
2837 2845 "kRPC: record fragment from %s of size(%lu) exceeds "
2838 2846 "maximum (%u). Disconnecting",
2839 2847 (mir->mir_type == RPC_CLIENT) ? "server" :
2840 2848 (mir->mir_type == RPC_SERVER) ? "client" :
2841 2849 "test tool", msg_len, maxsize);
2842 2850 }
2843 2851
2844 2852 mir_disconnect(q, mir);
2845 2853 return (1);
2846 2854 }
|
↓ open down ↓ |
810 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX