1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  *  Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * Portions of this source code were derived from Berkeley 4.3 BSD
  32  * under license from the Regents of the University of California.
  33  */
  34 
  35 /*
  36  * svc_clts.c
  37  * Server side for RPC in the kernel.
  38  *
  39  */
  40 
  41 #include <sys/param.h>
  42 #include <sys/types.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/file.h>
  45 #include <sys/stream.h>
  46 #include <sys/strsun.h>
  47 #include <sys/strsubr.h>
  48 #include <sys/tihdr.h>
  49 #include <sys/tiuser.h>
  50 #include <sys/t_kuser.h>
  51 #include <sys/fcntl.h>
  52 #include <sys/errno.h>
  53 #include <sys/kmem.h>
  54 #include <sys/systm.h>
  55 #include <sys/cmn_err.h>
  56 #include <sys/kstat.h>
  57 #include <sys/vtrace.h>
  58 #include <sys/debug.h>
  59 
  60 #include <rpc/types.h>
  61 #include <rpc/xdr.h>
  62 #include <rpc/auth.h>
  63 #include <rpc/clnt.h>
  64 #include <rpc/rpc_msg.h>
  65 #include <rpc/svc.h>
  66 #include <inet/ip.h>
  67 
  68 /*
  69  * Routines exported through ops vector.
  70  */
  71 static bool_t           svc_clts_krecv(SVCXPRT *, mblk_t *, struct rpc_msg *);
  72 static bool_t           svc_clts_ksend(SVCXPRT *, struct rpc_msg *);
  73 static bool_t           svc_clts_kgetargs(SVCXPRT *, xdrproc_t, caddr_t);
  74 static bool_t           svc_clts_kfreeargs(SVCXPRT *, xdrproc_t, caddr_t);
  75 static void             svc_clts_kdestroy(SVCMASTERXPRT *);
  76 static int              svc_clts_kdup(struct svc_req *, caddr_t, int,
  77                                 struct dupreq **, bool_t *);
  78 static void             svc_clts_kdupdone(struct dupreq *, caddr_t,
  79                                 void (*)(), int, int);
  80 static int32_t          *svc_clts_kgetres(SVCXPRT *, int);
  81 static void             svc_clts_kclone_destroy(SVCXPRT *);
  82 static void             svc_clts_kfreeres(SVCXPRT *);
  83 static void             svc_clts_kstart(SVCMASTERXPRT *);
  84 static void             svc_clts_kclone_xprt(SVCXPRT *, SVCXPRT *);
  85 static void             svc_clts_ktattrs(SVCXPRT *, int, void **);
  86 
  87 /*
  88  * Server transport operations vector.
  89  */
  90 struct svc_ops svc_clts_op = {
  91         svc_clts_krecv,         /* Get requests */
  92         svc_clts_kgetargs,      /* Deserialize arguments */
  93         svc_clts_ksend,         /* Send reply */
  94         svc_clts_kfreeargs,     /* Free argument data space */
  95         svc_clts_kdestroy,      /* Destroy transport handle */
  96         svc_clts_kdup,          /* Check entry in dup req cache */
  97         svc_clts_kdupdone,      /* Mark entry in dup req cache as done */
  98         svc_clts_kgetres,       /* Get pointer to response buffer */
  99         svc_clts_kfreeres,      /* Destroy pre-serialized response header */
 100         svc_clts_kclone_destroy, /* Destroy a clone xprt */
 101         svc_clts_kstart,        /* Tell `ready-to-receive' to rpcmod */
 102         svc_clts_kclone_xprt,   /* transport specific clone xprt function */
 103         svc_clts_ktattrs        /* Transport specific attributes. */
 104 };
 105 
 106 /*
 107  * Transport private data.
 108  * Kept in xprt->xp_p2buf.
 109  */
 110 struct udp_data {
 111         mblk_t  *ud_resp;                       /* buffer for response */
 112         mblk_t  *ud_inmp;                       /* mblk chain of request */
 113 };
 114 
 115 #define UD_MAXSIZE      8800
 116 #define UD_INITSIZE     2048
 117 
 118 /*
 119  * Connectionless server statistics
 120  */
 121 static const struct rpc_clts_server {
 122         kstat_named_t   rscalls;
 123         kstat_named_t   rsbadcalls;
 124         kstat_named_t   rsnullrecv;
 125         kstat_named_t   rsbadlen;
 126         kstat_named_t   rsxdrcall;
 127         kstat_named_t   rsdupchecks;
 128         kstat_named_t   rsdupreqs;
 129 } clts_rsstat_tmpl = {
 130         { "calls",      KSTAT_DATA_UINT64 },
 131         { "badcalls",   KSTAT_DATA_UINT64 },
 132         { "nullrecv",   KSTAT_DATA_UINT64 },
 133         { "badlen",     KSTAT_DATA_UINT64 },
 134         { "xdrcall",    KSTAT_DATA_UINT64 },
 135         { "dupchecks",  KSTAT_DATA_UINT64 },
 136         { "dupreqs",    KSTAT_DATA_UINT64 }
 137 };
 138 
 139 static uint_t clts_rsstat_ndata =
 140         sizeof (clts_rsstat_tmpl) / sizeof (kstat_named_t);
 141 
 142 #define CLONE2STATS(clone_xprt) \
 143         (struct rpc_clts_server *)(clone_xprt)->xp_master->xp_p2
 144 
 145 #define RSSTAT_INCR(stats, x)   \
 146         atomic_inc_64(&(stats)->x.value.ui64)
 147 
 148 /*
 149  * Create a transport record.
 150  * The transport record, output buffer, and private data structure
 151  * are allocated.  The output buffer is serialized into using xdrmem.
 152  * There is one transport record per user process which implements a
 153  * set of services.
 154  */
 155 /* ARGSUSED */
 156 int
 157 svc_clts_kcreate(file_t *fp, uint_t sendsz, struct T_info_ack *tinfo,
 158     SVCMASTERXPRT **nxprt)
 159 {
 160         SVCMASTERXPRT *xprt;
 161         struct rpcstat *rpcstat;
 162 
 163         if (nxprt == NULL)
 164                 return (EINVAL);
 165 
 166         rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone);
 167         ASSERT(rpcstat != NULL);
 168 
 169         xprt = kmem_zalloc(sizeof (*xprt), KM_SLEEP);
 170         xprt->xp_lcladdr.buf = kmem_zalloc(sizeof (sin6_t), KM_SLEEP);
 171         xprt->xp_p2 = (caddr_t)rpcstat->rpc_clts_server;
 172         xprt->xp_ops = &svc_clts_op;
 173         xprt->xp_msg_size = tinfo->TSDU_size;
 174 
 175         xprt->xp_rtaddr.buf = NULL;
 176         xprt->xp_rtaddr.maxlen = tinfo->ADDR_size;
 177         xprt->xp_rtaddr.len = 0;
 178 
 179         *nxprt = xprt;
 180 
 181         return (0);
 182 }
 183 
 184 /*
 185  * Destroy a transport record.
 186  * Frees the space allocated for a transport record.
 187  */
 188 static void
 189 svc_clts_kdestroy(SVCMASTERXPRT *xprt)
 190 {
 191         if (xprt->xp_netid)
 192                 kmem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
 193         if (xprt->xp_addrmask.maxlen)
 194                 kmem_free(xprt->xp_addrmask.buf, xprt->xp_addrmask.maxlen);
 195 
 196         mutex_destroy(&xprt->xp_req_lock);
 197         mutex_destroy(&xprt->xp_thread_lock);
 198 
 199         kmem_free(xprt->xp_lcladdr.buf, sizeof (sin6_t));
 200         kmem_free(xprt, sizeof (SVCMASTERXPRT));
 201 }
 202 
 203 /*
 204  * Transport-type specific part of svc_xprt_cleanup().
 205  * Frees the message buffer space allocated for a clone of a transport record
 206  */
 207 static void
 208 svc_clts_kclone_destroy(SVCXPRT *clone_xprt)
 209 {
 210         /* LINTED pointer alignment */
 211         struct udp_data *ud = (struct udp_data *)clone_xprt->xp_p2buf;
 212 
 213         if (ud->ud_resp) {
 214                 /*
 215                  * There should not be any left over results buffer.
 216                  */
 217                 ASSERT(ud->ud_resp->b_cont == NULL);
 218 
 219                 /*
 220                  * Free the T_UNITDATA_{REQ/IND} that svc_clts_krecv
 221                  * saved.
 222                  */
 223                 freeb(ud->ud_resp);
 224         }
 225         if (ud->ud_inmp)
 226                 freemsg(ud->ud_inmp);
 227 }
 228 
 229 /*
 230  * svc_tli_kcreate() calls this function at the end to tell
 231  * rpcmod that the transport is ready to receive requests.
 232  */
 233 /* ARGSUSED */
 234 static void
 235 svc_clts_kstart(SVCMASTERXPRT *xprt)
 236 {
 237 }
 238 
 239 static void
 240 svc_clts_kclone_xprt(SVCXPRT *src_xprt, SVCXPRT *dst_xprt)
 241 {
 242         struct udp_data *ud_src =
 243             (struct udp_data *)src_xprt->xp_p2buf;
 244         struct udp_data *ud_dst =
 245             (struct udp_data *)dst_xprt->xp_p2buf;
 246 
 247         if (ud_src->ud_resp)
 248                 ud_dst->ud_resp = dupb(ud_src->ud_resp);
 249 
 250 }
 251 
 252 static void
 253 svc_clts_ktattrs(SVCXPRT *clone_xprt, int attrflag, void **tattr)
 254 {
 255         *tattr = NULL;
 256 
 257         switch (attrflag) {
 258         case SVC_TATTR_ADDRMASK:
 259                 *tattr = (void *)&clone_xprt->xp_master->xp_addrmask;
 260         }
 261 }
 262 
 263 /*
 264  * Receive rpc requests.
 265  * Pulls a request in off the socket, checks if the packet is intact,
 266  * and deserializes the call packet.
 267  */
 268 static bool_t
 269 svc_clts_krecv(SVCXPRT *clone_xprt, mblk_t *mp, struct rpc_msg *msg)
 270 {
 271         /* LINTED pointer alignment */
 272         struct udp_data *ud = (struct udp_data *)clone_xprt->xp_p2buf;
 273         XDR *xdrs = &clone_xprt->xp_xdrin;
 274         struct rpc_clts_server *stats = CLONE2STATS(clone_xprt);
 275         union T_primitives *pptr;
 276         int hdrsz;
 277         cred_t *cr;
 278 
 279         TRACE_0(TR_FAC_KRPC, TR_SVC_CLTS_KRECV_START,
 280             "svc_clts_krecv_start:");
 281 
 282         RSSTAT_INCR(stats, rscalls);
 283 
 284         /*
 285          * The incoming request should start with an M_PROTO message.
 286          */
 287         if (mp->b_datap->db_type != M_PROTO) {
 288                 goto bad;
 289         }
 290 
 291         /*
 292          * The incoming request should be an T_UNITDTA_IND.  There
 293          * might be other messages coming up the stream, but we can
 294          * ignore them.
 295          */
 296         pptr = (union T_primitives *)mp->b_rptr;
 297         if (pptr->type != T_UNITDATA_IND) {
 298                 goto bad;
 299         }
 300         /*
 301          * Do some checking to make sure that the header at least looks okay.
 302          */
 303         hdrsz = (int)(mp->b_wptr - mp->b_rptr);
 304         if (hdrsz < TUNITDATAINDSZ ||
 305             hdrsz < (pptr->unitdata_ind.OPT_offset +
 306             pptr->unitdata_ind.OPT_length) ||
 307             hdrsz < (pptr->unitdata_ind.SRC_offset +
 308             pptr->unitdata_ind.SRC_length)) {
 309                 goto bad;
 310         }
 311 
 312         /*
 313          * Make sure that the transport provided a usable address.
 314          */
 315         if (pptr->unitdata_ind.SRC_length <= 0) {
 316                 goto bad;
 317         }
 318         /*
 319          * Point the remote transport address in the service_transport
 320          * handle at the address in the request.
 321          */
 322         clone_xprt->xp_rtaddr.buf = (char *)mp->b_rptr +
 323             pptr->unitdata_ind.SRC_offset;
 324         clone_xprt->xp_rtaddr.len = pptr->unitdata_ind.SRC_length;
 325 
 326         /*
 327          * Copy the local transport address in the service_transport
 328          * handle at the address in the request. We will have only
 329          * the local IP address in options.
 330          */
 331         ((sin_t *)(clone_xprt->xp_lcladdr.buf))->sin_family = AF_UNSPEC;
 332         if (pptr->unitdata_ind.OPT_length && pptr->unitdata_ind.OPT_offset) {
 333                 char *dstopt = (char *)mp->b_rptr +
 334                     pptr->unitdata_ind.OPT_offset;
 335                 struct T_opthdr *toh = (struct T_opthdr *)dstopt;
 336 
 337                 if (toh->level == IPPROTO_IPV6 && toh->status == 0 &&
 338                     toh->name == IPV6_PKTINFO) {
 339                         struct in6_pktinfo *pkti;
 340 
 341                         dstopt += sizeof (struct T_opthdr);
 342                         pkti = (struct in6_pktinfo *)dstopt;
 343                         ((sin6_t *)(clone_xprt->xp_lcladdr.buf))->sin6_addr
 344                             = pkti->ipi6_addr;
 345                         ((sin6_t *)(clone_xprt->xp_lcladdr.buf))->sin6_family
 346                             = AF_INET6;
 347                 } else if (toh->level == IPPROTO_IP && toh->status == 0 &&
 348                     toh->name == IP_RECVDSTADDR) {
 349                         dstopt += sizeof (struct T_opthdr);
 350                         ((sin_t *)(clone_xprt->xp_lcladdr.buf))->sin_addr
 351                             = *(struct in_addr *)dstopt;
 352                         ((sin_t *)(clone_xprt->xp_lcladdr.buf))->sin_family
 353                             = AF_INET;
 354                 }
 355         }
 356 
 357         /*
 358          * Save the first mblk which contains the T_unidata_ind in
 359          * ud_resp.  It will be used to generate the T_unitdata_req
 360          * during the reply.
 361          * We reuse any options in the T_unitdata_ind for the T_unitdata_req
 362          * since we must pass any SCM_UCRED across in order for TX to
 363          * work. We also make sure any cred_t is carried across.
 364          */
 365         if (ud->ud_resp) {
 366                 if (ud->ud_resp->b_cont != NULL) {
 367                         cmn_err(CE_WARN, "svc_clts_krecv: ud_resp %p, "
 368                             "b_cont %p", (void *)ud->ud_resp,
 369                             (void *)ud->ud_resp->b_cont);
 370                 }
 371                 freeb(ud->ud_resp);
 372         }
 373         /* Move any cred_t to the first mblk in the message */
 374         cr = msg_getcred(mp, NULL);
 375         if (cr != NULL)
 376                 mblk_setcred(mp, cr, NOPID);
 377 
 378         ud->ud_resp = mp;
 379         mp = mp->b_cont;
 380         ud->ud_resp->b_cont = NULL;
 381 
 382         xdrmblk_init(xdrs, mp, XDR_DECODE, 0);
 383 
 384         TRACE_0(TR_FAC_KRPC, TR_XDR_CALLMSG_START,
 385             "xdr_callmsg_start:");
 386         if (! xdr_callmsg(xdrs, msg)) {
 387                 XDR_DESTROY(xdrs);
 388                 TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END,
 389                     "xdr_callmsg_end:(%S)", "bad");
 390                 RSSTAT_INCR(stats, rsxdrcall);
 391                 goto bad;
 392         }
 393         TRACE_1(TR_FAC_KRPC, TR_XDR_CALLMSG_END,
 394             "xdr_callmsg_end:(%S)", "good");
 395 
 396         clone_xprt->xp_xid = msg->rm_xid;
 397         ud->ud_inmp = mp;
 398 
 399         TRACE_1(TR_FAC_KRPC, TR_SVC_CLTS_KRECV_END,
 400             "svc_clts_krecv_end:(%S)", "good");
 401         return (TRUE);
 402 
 403 bad:
 404         freemsg(mp);
 405         if (ud->ud_resp) {
 406                 /*
 407                  * There should not be any left over results buffer.
 408                  */
 409                 ASSERT(ud->ud_resp->b_cont == NULL);
 410                 freeb(ud->ud_resp);
 411                 ud->ud_resp = NULL;
 412         }
 413 
 414         RSSTAT_INCR(stats, rsbadcalls);
 415         TRACE_1(TR_FAC_KRPC, TR_SVC_CLTS_KRECV_END,
 416             "svc_clts_krecv_end:(%S)", "bad");
 417         return (FALSE);
 418 }
 419 
 420 /*
 421  * Send rpc reply.
 422  * Serialize the reply packet into the output buffer then
 423  * call t_ksndudata to send it.
 424  */
 425 static bool_t
 426 svc_clts_ksend(SVCXPRT *clone_xprt, struct rpc_msg *msg)
 427 {
 428         /* LINTED pointer alignment */
 429         struct udp_data *ud = (struct udp_data *)clone_xprt->xp_p2buf;
 430         XDR *xdrs = &clone_xprt->xp_xdrout;
 431         int stat = FALSE;
 432         mblk_t *mp;
 433         int msgsz;
 434         struct T_unitdata_req *udreq;
 435         xdrproc_t xdr_results;
 436         caddr_t xdr_location;
 437         bool_t has_args;
 438 
 439         TRACE_0(TR_FAC_KRPC, TR_SVC_CLTS_KSEND_START,
 440             "svc_clts_ksend_start:");
 441 
 442         ASSERT(ud->ud_resp != NULL);
 443 
 444         /*
 445          * If there is a result procedure specified in the reply message,
 446          * it will be processed in the xdr_replymsg and SVCAUTH_WRAP.
 447          * We need to make sure it won't be processed twice, so we null
 448          * it for xdr_replymsg here.
 449          */
 450         has_args = FALSE;
 451         if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
 452             msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
 453                 if ((xdr_results = msg->acpted_rply.ar_results.proc) != NULL) {
 454                         has_args = TRUE;
 455                         xdr_location = msg->acpted_rply.ar_results.where;
 456                         msg->acpted_rply.ar_results.proc = xdr_void;
 457                         msg->acpted_rply.ar_results.where = NULL;
 458                 }
 459         }
 460 
 461         if (ud->ud_resp->b_cont == NULL) {
 462                 /*
 463                  * Allocate an initial mblk for the response data.
 464                  */
 465                 while ((mp = allocb(UD_INITSIZE, BPRI_LO)) == NULL) {
 466                         if (strwaitbuf(UD_INITSIZE, BPRI_LO)) {
 467                                 TRACE_1(TR_FAC_KRPC, TR_SVC_CLTS_KSEND_END,
 468                                     "svc_clts_ksend_end:(%S)", "strwaitbuf");
 469                                 return (FALSE);
 470                         }
 471                 }
 472 
 473                 /*
 474                  * Initialize the XDR encode stream.  Additional mblks
 475                  * will be allocated if necessary.  They will be UD_MAXSIZE
 476                  * sized.
 477                  */
 478                 xdrmblk_init(xdrs, mp, XDR_ENCODE, UD_MAXSIZE);
 479 
 480                 /*
 481                  * Leave some space for protocol headers.
 482                  */
 483                 (void) XDR_SETPOS(xdrs, 512);
 484                 mp->b_rptr += 512;
 485 
 486                 msg->rm_xid = clone_xprt->xp_xid;
 487 
 488                 ud->ud_resp->b_cont = mp;
 489 
 490                 TRACE_0(TR_FAC_KRPC, TR_XDR_REPLYMSG_START,
 491                     "xdr_replymsg_start:");
 492                 if (!(xdr_replymsg(xdrs, msg) &&
 493                     (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs,
 494                     xdr_results, xdr_location)))) {
 495                         XDR_DESTROY(xdrs);
 496                         TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END,
 497                             "xdr_replymsg_end:(%S)", "bad");
 498                         RPCLOG0(1, "xdr_replymsg/SVCAUTH_WRAP failed\n");
 499                         goto out;
 500                 }
 501                 TRACE_1(TR_FAC_KRPC, TR_XDR_REPLYMSG_END,
 502                     "xdr_replymsg_end:(%S)", "good");
 503 
 504         } else if (!(xdr_replymsg_body(xdrs, msg) &&
 505             (!has_args || SVCAUTH_WRAP(&clone_xprt->xp_auth, xdrs,
 506             xdr_results, xdr_location)))) {
 507                 XDR_DESTROY(xdrs);
 508                 RPCLOG0(1, "xdr_replymsg_body/SVCAUTH_WRAP failed\n");
 509                 goto out;
 510         }
 511 
 512         XDR_DESTROY(xdrs);
 513 
 514         msgsz = (int)xmsgsize(ud->ud_resp->b_cont);
 515 
 516         if (msgsz <= 0 || (clone_xprt->xp_msg_size != -1 &&
 517             msgsz > clone_xprt->xp_msg_size)) {
 518 #ifdef  DEBUG
 519                 cmn_err(CE_NOTE,
 520 "KRPC: server response message of %d bytes; transport limits are [0, %d]",
 521                     msgsz, clone_xprt->xp_msg_size);
 522 #endif
 523                 goto out;
 524         }
 525 
 526         /*
 527          * Construct the T_unitdata_req.  We take advantage of the fact that
 528          * T_unitdata_ind looks just like T_unitdata_req, except for the
 529          * primitive type.  Reusing it means we preserve the SCM_UCRED, and
 530          * we must preserve it for TX to work.
 531          *
 532          * This has the side effect that we can also pass certain receive-side
 533          * options like IPV6_PKTINFO back down the send side.  This implies
 534          * that we can not ASSERT on a non-NULL db_credp when we have send-side
 535          * options in UDP.
 536          */
 537         ASSERT(MBLKL(ud->ud_resp) >= TUNITDATAREQSZ);
 538         udreq = (struct T_unitdata_req *)ud->ud_resp->b_rptr;
 539         ASSERT(udreq->PRIM_type == T_UNITDATA_IND);
 540         udreq->PRIM_type = T_UNITDATA_REQ;
 541 
 542         /*
 543          * If the local IPv4 transport address is known use it as a source
 544          * address for the outgoing UDP packet.
 545          */
 546         if (((sin_t *)(clone_xprt->xp_lcladdr.buf))->sin_family == AF_INET) {
 547                 struct T_opthdr *opthdr;
 548                 in_pktinfo_t *pktinfo;
 549                 size_t size;
 550 
 551                 if (udreq->DEST_length == 0)
 552                         udreq->OPT_offset = _TPI_ALIGN_TOPT(TUNITDATAREQSZ);
 553                 else
 554                         udreq->OPT_offset = _TPI_ALIGN_TOPT(udreq->DEST_offset +
 555                             udreq->DEST_length);
 556 
 557                 udreq->OPT_length = sizeof (struct T_opthdr) +
 558                     sizeof (in_pktinfo_t);
 559 
 560                 size = udreq->OPT_length + udreq->OPT_offset;
 561 
 562                 /* make sure we have enough space for the option data */
 563                 mp = reallocb(ud->ud_resp, size, 1);
 564                 if (mp == NULL)
 565                         goto out;
 566                 ud->ud_resp = mp;
 567                 udreq = (struct T_unitdata_req *)mp->b_rptr;
 568 
 569                 /* set desired option header */
 570                 opthdr = (struct T_opthdr *)(mp->b_rptr + udreq->OPT_offset);
 571                 opthdr->len = udreq->OPT_length;
 572                 opthdr->level = IPPROTO_IP;
 573                 opthdr->name = IP_PKTINFO;
 574 
 575                 /*
 576                  * 1. set source IP of outbound packet
 577                  * 2. value '0' for index means IP layer uses this as source
 578                  *    address
 579                  */
 580                 pktinfo = (in_pktinfo_t *)(opthdr + 1);
 581                 (void) memset(pktinfo, 0, sizeof (in_pktinfo_t));
 582                 pktinfo->ipi_spec_dst.s_addr =
 583                     ((sin_t *)(clone_xprt->xp_lcladdr.buf))->sin_addr.s_addr;
 584                 pktinfo->ipi_ifindex = 0;
 585 
 586                 /* adjust the end of active data */
 587                 mp->b_wptr = mp->b_rptr + size;
 588         }
 589 
 590         put(clone_xprt->xp_wq, ud->ud_resp);
 591         stat = TRUE;
 592         ud->ud_resp = NULL;
 593 
 594 out:
 595         if (stat == FALSE) {
 596                 freemsg(ud->ud_resp);
 597                 ud->ud_resp = NULL;
 598         }
 599 
 600         /*
 601          * This is completely disgusting.  If public is set it is
 602          * a pointer to a structure whose first field is the address
 603          * of the function to free that structure and any related
 604          * stuff.  (see rrokfree in nfs_xdr.c).
 605          */
 606         if (xdrs->x_public) {
 607                 /* LINTED pointer alignment */
 608                 (**((int (**)())xdrs->x_public))(xdrs->x_public);
 609         }
 610 
 611         TRACE_1(TR_FAC_KRPC, TR_SVC_CLTS_KSEND_END,
 612             "svc_clts_ksend_end:(%S)", "done");
 613         return (stat);
 614 }
 615 
 616 /*
 617  * Deserialize arguments.
 618  */
 619 static bool_t
 620 svc_clts_kgetargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args,
 621     caddr_t args_ptr)
 622 {
 623 
 624         /* LINTED pointer alignment */
 625         return (SVCAUTH_UNWRAP(&clone_xprt->xp_auth, &clone_xprt->xp_xdrin,
 626             xdr_args, args_ptr));
 627 
 628 }
 629 
 630 static bool_t
 631 svc_clts_kfreeargs(SVCXPRT *clone_xprt, xdrproc_t xdr_args,
 632     caddr_t args_ptr)
 633 {
 634         /* LINTED pointer alignment */
 635         struct udp_data *ud = (struct udp_data *)clone_xprt->xp_p2buf;
 636         XDR *xdrs = &clone_xprt->xp_xdrin;
 637         bool_t retval;
 638 
 639         if (args_ptr) {
 640                 xdrs->x_op = XDR_FREE;
 641                 retval = (*xdr_args)(xdrs, args_ptr);
 642         } else
 643                 retval = TRUE;
 644 
 645         XDR_DESTROY(xdrs);
 646 
 647         if (ud->ud_inmp) {
 648                 freemsg(ud->ud_inmp);
 649                 ud->ud_inmp = NULL;
 650         }
 651 
 652         return (retval);
 653 }
 654 
 655 static int32_t *
 656 svc_clts_kgetres(SVCXPRT *clone_xprt, int size)
 657 {
 658         /* LINTED pointer alignment */
 659         struct udp_data *ud = (struct udp_data *)clone_xprt->xp_p2buf;
 660         XDR *xdrs = &clone_xprt->xp_xdrout;
 661         mblk_t *mp;
 662         int32_t *buf;
 663         struct rpc_msg rply;
 664 
 665         /*
 666          * Allocate an initial mblk for the response data.
 667          */
 668         while ((mp = allocb(UD_INITSIZE, BPRI_LO)) == NULL) {
 669                 if (strwaitbuf(UD_INITSIZE, BPRI_LO)) {
 670                         return (NULL);
 671                 }
 672         }
 673 
 674         mp->b_cont = NULL;
 675 
 676         /*
 677          * Initialize the XDR encode stream.  Additional mblks
 678          * will be allocated if necessary.  They will be UD_MAXSIZE
 679          * sized.
 680          */
 681         xdrmblk_init(xdrs, mp, XDR_ENCODE, UD_MAXSIZE);
 682 
 683         /*
 684          * Leave some space for protocol headers.
 685          */
 686         (void) XDR_SETPOS(xdrs, 512);
 687         mp->b_rptr += 512;
 688 
 689         /*
 690          * Assume a successful RPC since most of them are.
 691          */
 692         rply.rm_xid = clone_xprt->xp_xid;
 693         rply.rm_direction = REPLY;
 694         rply.rm_reply.rp_stat = MSG_ACCEPTED;
 695         rply.acpted_rply.ar_verf = clone_xprt->xp_verf;
 696         rply.acpted_rply.ar_stat = SUCCESS;
 697 
 698         if (!xdr_replymsg_hdr(xdrs, &rply)) {
 699                 XDR_DESTROY(xdrs);
 700                 freeb(mp);
 701                 return (NULL);
 702         }
 703 
 704         buf = XDR_INLINE(xdrs, size);
 705 
 706         if (buf == NULL) {
 707                 XDR_DESTROY(xdrs);
 708                 freeb(mp);
 709         } else {
 710                 ud->ud_resp->b_cont = mp;
 711         }
 712 
 713         return (buf);
 714 }
 715 
 716 static void
 717 svc_clts_kfreeres(SVCXPRT *clone_xprt)
 718 {
 719         /* LINTED pointer alignment */
 720         struct udp_data *ud = (struct udp_data *)clone_xprt->xp_p2buf;
 721 
 722         if (ud->ud_resp == NULL || ud->ud_resp->b_cont == NULL)
 723                 return;
 724 
 725         XDR_DESTROY(&clone_xprt->xp_xdrout);
 726 
 727         /*
 728          * SVC_FREERES() is called whenever the server decides not to
 729          * send normal reply. Thus, we expect only one mblk to be allocated,
 730          * because we have not attempted any XDR encoding.
 731          * If we do any XDR encoding and we get an error, then SVC_REPLY()
 732          * will freemsg(ud->ud_resp);
 733          */
 734         ASSERT(ud->ud_resp->b_cont->b_cont == NULL);
 735         freeb(ud->ud_resp->b_cont);
 736         ud->ud_resp->b_cont = NULL;
 737 }
 738 
 739 /*
 740  * the dup cacheing routines below provide a cache of non-failure
 741  * transaction id's.  rpc service routines can use this to detect
 742  * retransmissions and re-send a non-failure response.
 743  */
 744 
 745 /*
 746  * MAXDUPREQS is the number of cached items.  It should be adjusted
 747  * to the service load so that there is likely to be a response entry
 748  * when the first retransmission comes in.
 749  */
 750 #define MAXDUPREQS      1024
 751 
 752 /*
 753  * This should be appropriately scaled to MAXDUPREQS.
 754  */
 755 #define DRHASHSZ        257
 756 
 757 #if ((DRHASHSZ & (DRHASHSZ - 1)) == 0)
 758 #define XIDHASH(xid)    ((xid) & (DRHASHSZ - 1))
 759 #else
 760 #define XIDHASH(xid)    ((xid) % DRHASHSZ)
 761 #endif
 762 #define DRHASH(dr)      XIDHASH((dr)->dr_xid)
 763 #define REQTOXID(req)   ((req)->rq_xprt->xp_xid)
 764 
 765 static int      ndupreqs = 0;
 766 int     maxdupreqs = MAXDUPREQS;
 767 static kmutex_t dupreq_lock;
 768 static struct dupreq *drhashtbl[DRHASHSZ];
 769 static int      drhashstat[DRHASHSZ];
 770 
 771 static void unhash(struct dupreq *);
 772 
 773 /*
 774  * drmru points to the head of a circular linked list in lru order.
 775  * drmru->dr_next == drlru
 776  */
 777 struct dupreq *drmru;
 778 
 779 /*
 780  * PSARC 2003/523 Contract Private Interface
 781  * svc_clts_kdup
 782  * Changes must be reviewed by Solaris File Sharing
 783  * Changes must be communicated to contract-2003-523@sun.com
 784  *
 785  * svc_clts_kdup searches the request cache and returns 0 if the
 786  * request is not found in the cache.  If it is found, then it
 787  * returns the state of the request (in progress or done) and
 788  * the status or attributes that were part of the original reply.
 789  *
 790  * If DUP_DONE (there is a duplicate) svc_clts_kdup copies over the
 791  * value of the response. In that case, also return in *dupcachedp
 792  * whether the response free routine is cached in the dupreq - in which case
 793  * the caller should not be freeing it, because it will be done later
 794  * in the svc_clts_kdup code when the dupreq is reused.
 795  */
 796 static int
 797 svc_clts_kdup(struct svc_req *req, caddr_t res, int size, struct dupreq **drpp,
 798         bool_t *dupcachedp)
 799 {
 800         struct rpc_clts_server *stats = CLONE2STATS(req->rq_xprt);
 801         struct dupreq *dr;
 802         uint32_t xid;
 803         uint32_t drhash;
 804         int status;
 805 
 806         xid = REQTOXID(req);
 807         mutex_enter(&dupreq_lock);
 808         RSSTAT_INCR(stats, rsdupchecks);
 809         /*
 810          * Check to see whether an entry already exists in the cache.
 811          */
 812         dr = drhashtbl[XIDHASH(xid)];
 813         while (dr != NULL) {
 814                 if (dr->dr_xid == xid &&
 815                     dr->dr_proc == req->rq_proc &&
 816                     dr->dr_prog == req->rq_prog &&
 817                     dr->dr_vers == req->rq_vers &&
 818                     dr->dr_addr.len == req->rq_xprt->xp_rtaddr.len &&
 819                     bcmp(dr->dr_addr.buf, req->rq_xprt->xp_rtaddr.buf,
 820                     dr->dr_addr.len) == 0) {
 821                         status = dr->dr_status;
 822                         if (status == DUP_DONE) {
 823                                 bcopy(dr->dr_resp.buf, res, size);
 824                                 if (dupcachedp != NULL)
 825                                         *dupcachedp = (dr->dr_resfree != NULL);
 826                         } else {
 827                                 dr->dr_status = DUP_INPROGRESS;
 828                                 *drpp = dr;
 829                         }
 830                         RSSTAT_INCR(stats, rsdupreqs);
 831                         mutex_exit(&dupreq_lock);
 832                         return (status);
 833                 }
 834                 dr = dr->dr_chain;
 835         }
 836 
 837         /*
 838          * There wasn't an entry, either allocate a new one or recycle
 839          * an old one.
 840          */
 841         if (ndupreqs < maxdupreqs) {
 842                 dr = kmem_alloc(sizeof (*dr), KM_NOSLEEP);
 843                 if (dr == NULL) {
 844                         mutex_exit(&dupreq_lock);
 845                         return (DUP_ERROR);
 846                 }
 847                 dr->dr_resp.buf = NULL;
 848                 dr->dr_resp.maxlen = 0;
 849                 dr->dr_addr.buf = NULL;
 850                 dr->dr_addr.maxlen = 0;
 851                 if (drmru) {
 852                         dr->dr_next = drmru->dr_next;
 853                         drmru->dr_next = dr;
 854                 } else {
 855                         dr->dr_next = dr;
 856                 }
 857                 ndupreqs++;
 858         } else {
 859                 dr = drmru->dr_next;
 860                 while (dr->dr_status == DUP_INPROGRESS) {
 861                         dr = dr->dr_next;
 862                         if (dr == drmru->dr_next) {
 863                                 cmn_err(CE_WARN, "svc_clts_kdup no slots free");
 864                                 mutex_exit(&dupreq_lock);
 865                                 return (DUP_ERROR);
 866                         }
 867                 }
 868                 unhash(dr);
 869                 if (dr->dr_resfree) {
 870                         (*dr->dr_resfree)(dr->dr_resp.buf);
 871                 }
 872         }
 873         dr->dr_resfree = NULL;
 874         drmru = dr;
 875 
 876         dr->dr_xid = REQTOXID(req);
 877         dr->dr_prog = req->rq_prog;
 878         dr->dr_vers = req->rq_vers;
 879         dr->dr_proc = req->rq_proc;
 880         if (dr->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) {
 881                 if (dr->dr_addr.buf != NULL)
 882                         kmem_free(dr->dr_addr.buf, dr->dr_addr.maxlen);
 883                 dr->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len;
 884                 dr->dr_addr.buf = kmem_alloc(dr->dr_addr.maxlen,
 885                     KM_NOSLEEP);
 886                 if (dr->dr_addr.buf == NULL) {
 887                         dr->dr_addr.maxlen = 0;
 888                         dr->dr_status = DUP_DROP;
 889                         mutex_exit(&dupreq_lock);
 890                         return (DUP_ERROR);
 891                 }
 892         }
 893         dr->dr_addr.len = req->rq_xprt->xp_rtaddr.len;
 894         bcopy(req->rq_xprt->xp_rtaddr.buf, dr->dr_addr.buf, dr->dr_addr.len);
 895         if (dr->dr_resp.maxlen < size) {
 896                 if (dr->dr_resp.buf != NULL)
 897                         kmem_free(dr->dr_resp.buf, dr->dr_resp.maxlen);
 898                 dr->dr_resp.maxlen = (unsigned int)size;
 899                 dr->dr_resp.buf = kmem_alloc(size, KM_NOSLEEP);
 900                 if (dr->dr_resp.buf == NULL) {
 901                         dr->dr_resp.maxlen = 0;
 902                         dr->dr_status = DUP_DROP;
 903                         mutex_exit(&dupreq_lock);
 904                         return (DUP_ERROR);
 905                 }
 906         }
 907         dr->dr_status = DUP_INPROGRESS;
 908 
 909         drhash = (uint32_t)DRHASH(dr);
 910         dr->dr_chain = drhashtbl[drhash];
 911         drhashtbl[drhash] = dr;
 912         drhashstat[drhash]++;
 913         mutex_exit(&dupreq_lock);
 914         *drpp = dr;
 915         return (DUP_NEW);
 916 }
 917 
 918 /*
 919  * PSARC 2003/523 Contract Private Interface
 920  * svc_clts_kdupdone
 921  * Changes must be reviewed by Solaris File Sharing
 922  * Changes must be communicated to contract-2003-523@sun.com
 923  *
 924  * svc_clts_kdupdone marks the request done (DUP_DONE or DUP_DROP)
 925  * and stores the response.
 926  */
 927 static void
 928 svc_clts_kdupdone(struct dupreq *dr, caddr_t res, void (*dis_resfree)(),
 929         int size, int status)
 930 {
 931 
 932         ASSERT(dr->dr_resfree == NULL);
 933         if (status == DUP_DONE) {
 934                 bcopy(res, dr->dr_resp.buf, size);
 935                 dr->dr_resfree = dis_resfree;
 936         }
 937         dr->dr_status = status;
 938 }
 939 
 940 /*
 941  * This routine expects that the mutex, dupreq_lock, is already held.
 942  */
 943 static void
 944 unhash(struct dupreq *dr)
 945 {
 946         struct dupreq *drt;
 947         struct dupreq *drtprev = NULL;
 948         uint32_t drhash;
 949 
 950         ASSERT(MUTEX_HELD(&dupreq_lock));
 951 
 952         drhash = (uint32_t)DRHASH(dr);
 953         drt = drhashtbl[drhash];
 954         while (drt != NULL) {
 955                 if (drt == dr) {
 956                         drhashstat[drhash]--;
 957                         if (drtprev == NULL) {
 958                                 drhashtbl[drhash] = drt->dr_chain;
 959                         } else {
 960                                 drtprev->dr_chain = drt->dr_chain;
 961                         }
 962                         return;
 963                 }
 964                 drtprev = drt;
 965                 drt = drt->dr_chain;
 966         }
 967 }
 968 
 969 void
 970 svc_clts_stats_init(zoneid_t zoneid, struct rpc_clts_server **statsp)
 971 {
 972         kstat_t *ksp;
 973         kstat_named_t *knp;
 974 
 975         knp = rpcstat_zone_init_common(zoneid, "unix", "rpc_clts_server",
 976             (const kstat_named_t *)&clts_rsstat_tmpl,
 977             sizeof (clts_rsstat_tmpl));
 978         /*
 979          * Backwards compatibility for old kstat clients
 980          */
 981         ksp = kstat_create_zone("unix", 0, "rpc_server", "rpc",
 982             KSTAT_TYPE_NAMED, clts_rsstat_ndata,
 983             KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid);
 984         if (ksp) {
 985                 ksp->ks_data = knp;
 986                 kstat_install(ksp);
 987         }
 988         *statsp = (struct rpc_clts_server *)knp;
 989 }
 990 
 991 void
 992 svc_clts_stats_fini(zoneid_t zoneid, struct rpc_clts_server **statsp)
 993 {
 994         rpcstat_zone_fini_common(zoneid, "unix", "rpc_clts_server");
 995         kstat_delete_byname_zone("unix", 0, "rpc_server", zoneid);
 996         kmem_free(*statsp, sizeof (clts_rsstat_tmpl));
 997 }
 998 
 999 void
1000 svc_clts_init()
1001 {
1002         /*
1003          * Check to make sure that the clts private data will fit into
1004          * the stack buffer allocated by svc_run.  The compiler should
1005          * remove this check, but it's a safety net if the udp_data
1006          * structure ever changes.
1007          */
1008         /*CONSTANTCONDITION*/
1009         ASSERT(sizeof (struct udp_data) <= SVC_P2LEN);
1010 
1011         mutex_init(&dupreq_lock, NULL, MUTEX_DEFAULT, NULL);
1012 }