1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/systm.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/debug.h>
  35 #include <sys/cmn_err.h>
  36 
  37 #include <sys/stropts.h>
  38 #include <sys/socket.h>
  39 #include <sys/socketvar.h>
  40 
  41 #define _SUN_TPI_VERSION        2
  42 #include <sys/tihdr.h>
  43 #include <sys/sockio.h>
  44 #include <sys/kmem_impl.h>
  45 
  46 #include <sys/strsubr.h>
  47 #include <sys/strsun.h>
  48 #include <sys/ddi.h>
  49 #include <netinet/in.h>
  50 #include <inet/ip.h>
  51 
  52 #include <fs/sockfs/sockcommon.h>
  53 #include <fs/sockfs/sockfilter_impl.h>
  54 
  55 #include <sys/socket_proto.h>
  56 
  57 #include <fs/sockfs/socktpi_impl.h>
  58 #include <fs/sockfs/sodirect.h>
  59 #include <sys/tihdr.h>
  60 #include <fs/sockfs/nl7c.h>
  61 
  62 extern int xnet_skip_checks;
  63 extern int xnet_check_print;
  64 
  65 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
  66 
  67 
  68 /*ARGSUSED*/
  69 int
  70 so_accept_notsupp(struct sonode *lso, int fflag,
  71     struct cred *cr, struct sonode **nsop)
  72 {
  73         return (EOPNOTSUPP);
  74 }
  75 
  76 /*ARGSUSED*/
  77 int
  78 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
  79 {
  80         return (EOPNOTSUPP);
  81 }
  82 
  83 /*ARGSUSED*/
  84 int
  85 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
  86     socklen_t *len, struct cred *cr)
  87 {
  88         return (EOPNOTSUPP);
  89 }
  90 
  91 /*ARGSUSED*/
  92 int
  93 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
  94     socklen_t *addrlen, boolean_t accept, struct cred *cr)
  95 {
  96         return (EOPNOTSUPP);
  97 }
  98 
  99 /*ARGSUSED*/
 100 int
 101 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
 102 {
 103         return (EOPNOTSUPP);
 104 }
 105 
 106 /*ARGSUSED*/
 107 int
 108 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
 109     struct cred *cr, mblk_t **mpp)
 110 {
 111         return (EOPNOTSUPP);
 112 }
 113 
 114 /*
 115  * Generic Socket Ops
 116  */
 117 
 118 /* ARGSUSED */
 119 int
 120 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
 121 {
 122         return (socket_init_common(so, pso, flags, cr));
 123 }
 124 
 125 int
 126 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
 127     int flags, struct cred *cr)
 128 {
 129         int error;
 130 
 131         SO_BLOCK_FALLBACK_SAFE(so, SOP_BIND(so, name, namelen, flags, cr));
 132 
 133         ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
 134 
 135         /* X/Open requires this check */
 136         if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 137                 if (xnet_check_print) {
 138                         printf("sockfs: X/Open bind state check "
 139                             "caused EINVAL\n");
 140                 }
 141                 error = EINVAL;
 142                 goto done;
 143         }
 144 
 145         /*
 146          * a bind to a NULL address is interpreted as unbind. So just
 147          * do the downcall.
 148          */
 149         if (name == NULL)
 150                 goto dobind;
 151 
 152         switch (so->so_family) {
 153         case AF_INET:
 154                 if ((size_t)namelen != sizeof (sin_t)) {
 155                         error = name->sa_family != so->so_family ?
 156                             EAFNOSUPPORT : EINVAL;
 157                         eprintsoline(so, error);
 158                         goto done;
 159                 }
 160 
 161                 if ((flags & _SOBIND_XPG4_2) &&
 162                     (name->sa_family != so->so_family)) {
 163                         /*
 164                          * This check has to be made for X/Open
 165                          * sockets however application failures have
 166                          * been observed when it is applied to
 167                          * all sockets.
 168                          */
 169                         error = EAFNOSUPPORT;
 170                         eprintsoline(so, error);
 171                         goto done;
 172                 }
 173                 /*
 174                  * Force a zero sa_family to match so_family.
 175                  *
 176                  * Some programs like inetd(1M) don't set the
 177                  * family field. Other programs leave
 178                  * sin_family set to garbage - SunOS 4.X does
 179                  * not check the family field on a bind.
 180                  * We use the family field that
 181                  * was passed in to the socket() call.
 182                  */
 183                 name->sa_family = so->so_family;
 184                 break;
 185 
 186         case AF_INET6: {
 187 #ifdef DEBUG
 188                 sin6_t *sin6 = (sin6_t *)name;
 189 #endif
 190                 if ((size_t)namelen != sizeof (sin6_t)) {
 191                         error = name->sa_family != so->so_family ?
 192                             EAFNOSUPPORT : EINVAL;
 193                         eprintsoline(so, error);
 194                         goto done;
 195                 }
 196 
 197                 if (name->sa_family != so->so_family) {
 198                         /*
 199                          * With IPv6 we require the family to match
 200                          * unlike in IPv4.
 201                          */
 202                         error = EAFNOSUPPORT;
 203                         eprintsoline(so, error);
 204                         goto done;
 205                 }
 206 #ifdef DEBUG
 207                 /*
 208                  * Verify that apps don't forget to clear
 209                  * sin6_scope_id etc
 210                  */
 211                 if (sin6->sin6_scope_id != 0 &&
 212                     !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
 213                         zcmn_err(getzoneid(), CE_WARN,
 214                             "bind with uninitialized sin6_scope_id "
 215                             "(%d) on socket. Pid = %d\n",
 216                             (int)sin6->sin6_scope_id,
 217                             (int)curproc->p_pid);
 218                 }
 219                 if (sin6->__sin6_src_id != 0) {
 220                         zcmn_err(getzoneid(), CE_WARN,
 221                             "bind with uninitialized __sin6_src_id "
 222                             "(%d) on socket. Pid = %d\n",
 223                             (int)sin6->__sin6_src_id,
 224                             (int)curproc->p_pid);
 225                 }
 226 #endif /* DEBUG */
 227 
 228                 break;
 229         }
 230         default:
 231                 /* Just pass the request to the protocol */
 232                 goto dobind;
 233         }
 234 
 235         /*
 236          * First we check if either NCA or KSSL has been enabled for
 237          * the requested address, and if so, we fall back to TPI.
 238          * If neither of those two services are enabled, then we just
 239          * pass the request to the protocol.
 240          *
 241          * Note that KSSL can only be enabled on a socket if NCA is NOT
 242          * enabled for that socket, hence the else-statement below.
 243          */
 244         if (nl7c_enabled && ((so->so_family == AF_INET ||
 245             so->so_family == AF_INET6) &&
 246             nl7c_lookup_addr(name, namelen) != NULL)) {
 247                 /*
 248                  * NL7C is not supported in non-global zones,
 249                  * we enforce this restriction here.
 250                  */
 251                 if (so->so_zoneid == GLOBAL_ZONEID) {
 252                         /* NCA should be used, so fall back to TPI */
 253                         error = so_tpi_fallback(so, cr);
 254                         SO_UNBLOCK_FALLBACK(so);
 255                         if (error)
 256                                 return (error);
 257                         else
 258                                 return (SOP_BIND(so, name, namelen, flags, cr));
 259                 }
 260         }
 261 
 262 dobind:
 263         if (so->so_filter_active == 0 ||
 264             (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
 265                 error = (*so->so_downcalls->sd_bind)
 266                     (so->so_proto_handle, name, namelen, cr);
 267         }
 268 done:
 269         SO_UNBLOCK_FALLBACK(so);
 270 
 271         return (error);
 272 }
 273 
 274 int
 275 so_listen(struct sonode *so, int backlog, struct cred *cr)
 276 {
 277         int     error = 0;
 278 
 279         ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 280         SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
 281 
 282         if ((so)->so_filter_active == 0 ||
 283             (error = sof_filter_listen(so, &backlog, cr)) < 0)
 284                 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
 285                     backlog, cr);
 286 
 287         SO_UNBLOCK_FALLBACK(so);
 288 
 289         return (error);
 290 }
 291 
 292 
 293 int
 294 so_connect(struct sonode *so, struct sockaddr *name,
 295     socklen_t namelen, int fflag, int flags, struct cred *cr)
 296 {
 297         int error = 0;
 298         sock_connid_t id;
 299 
 300         ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 301         SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
 302 
 303         /*
 304          * If there is a pending error, return error
 305          * This can happen if a non blocking operation caused an error.
 306          */
 307 
 308         if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 309                 mutex_enter(&so->so_lock);
 310                 error = sogeterr(so, B_TRUE);
 311                 mutex_exit(&so->so_lock);
 312                 if (error != 0)
 313                         goto done;
 314         }
 315 
 316         if (so->so_filter_active == 0 ||
 317             (error = sof_filter_connect(so, (struct sockaddr *)name,
 318             &namelen, cr)) < 0) {
 319                 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
 320                     name, namelen, &id, cr);
 321 
 322                 if (error == EINPROGRESS)
 323                         error = so_wait_connected(so,
 324                             fflag & (FNONBLOCK|FNDELAY), id);
 325         }
 326 done:
 327         SO_UNBLOCK_FALLBACK(so);
 328         return (error);
 329 }
 330 
 331 /*ARGSUSED*/
 332 int
 333 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
 334 {
 335         int error = 0;
 336         struct sonode *nso;
 337 
 338         *nsop = NULL;
 339 
 340         SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
 341         if ((so->so_state & SS_ACCEPTCONN) == 0) {
 342                 SO_UNBLOCK_FALLBACK(so);
 343                 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
 344                     EOPNOTSUPP : EINVAL);
 345         }
 346 
 347         if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
 348             &nso)) == 0) {
 349                 ASSERT(nso != NULL);
 350 
 351                 /* finish the accept */
 352                 if ((so->so_filter_active > 0 &&
 353                     (error = sof_filter_accept(nso, cr)) > 0) ||
 354                     (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
 355                     nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
 356                         (void) socket_close(nso, 0, cr);
 357                         socket_destroy(nso);
 358                 } else {
 359                         *nsop = nso;
 360                 }
 361         }
 362 
 363         SO_UNBLOCK_FALLBACK(so);
 364         return (error);
 365 }
 366 
 367 int
 368 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
 369     struct cred *cr)
 370 {
 371         int error, flags;
 372         boolean_t dontblock;
 373         ssize_t orig_resid;
 374         mblk_t  *mp;
 375 
 376         SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
 377 
 378         flags = msg->msg_flags;
 379         error = 0;
 380         dontblock = (flags & MSG_DONTWAIT) ||
 381             (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
 382 
 383         if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
 384                 /*
 385                  * Old way of passing fd's is not supported
 386                  */
 387                 SO_UNBLOCK_FALLBACK(so);
 388                 return (EOPNOTSUPP);
 389         }
 390 
 391         if ((so->so_mode & SM_ATOMIC) &&
 392             uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
 393             so->so_proto_props.sopp_maxpsz != -1) {
 394                 SO_UNBLOCK_FALLBACK(so);
 395                 return (EMSGSIZE);
 396         }
 397 
 398         /*
 399          * For atomic sends we will only do one iteration.
 400          */
 401         do {
 402                 if (so->so_state & SS_CANTSENDMORE) {
 403                         error = EPIPE;
 404                         break;
 405                 }
 406 
 407                 if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 408                         mutex_enter(&so->so_lock);
 409                         error = sogeterr(so, B_TRUE);
 410                         mutex_exit(&so->so_lock);
 411                         if (error != 0)
 412                                 break;
 413                 }
 414 
 415                 /*
 416                  * Send down OOB messages even if the send path is being
 417                  * flow controlled (assuming the protocol supports OOB data).
 418                  */
 419                 if (flags & MSG_OOB) {
 420                         if ((so->so_mode & SM_EXDATA) == 0) {
 421                                 error = EOPNOTSUPP;
 422                                 break;
 423                         }
 424                 } else if (SO_SND_FLOWCTRLD(so)) {
 425                         /*
 426                          * Need to wait until the protocol is ready to receive
 427                          * more data for transmission.
 428                          */
 429                         if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 430                                 break;
 431                 }
 432 
 433                 /*
 434                  * Time to send data to the protocol. We either copy the
 435                  * data into mblks or pass the uio directly to the protocol.
 436                  * We decide what to do based on the available down calls.
 437                  */
 438                 if (so->so_downcalls->sd_send_uio != NULL) {
 439                         error = (*so->so_downcalls->sd_send_uio)
 440                             (so->so_proto_handle, uiop, msg, cr);
 441                         if (error != 0)
 442                                 break;
 443                 } else {
 444                         /* save the resid in case of failure */
 445                         orig_resid = uiop->uio_resid;
 446 
 447                         if ((mp = socopyinuio(uiop,
 448                             so->so_proto_props.sopp_maxpsz,
 449                             so->so_proto_props.sopp_wroff,
 450                             so->so_proto_props.sopp_maxblk,
 451                             so->so_proto_props.sopp_tail, &error)) == NULL) {
 452                                 break;
 453                         }
 454                         ASSERT(uiop->uio_resid >= 0);
 455 
 456                         if (so->so_filter_active > 0 &&
 457                             ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
 458                             &error)) == NULL)) {
 459                                 if (error != 0)
 460                                         break;
 461                                 continue;
 462                         }
 463                         error = (*so->so_downcalls->sd_send)
 464                             (so->so_proto_handle, mp, msg, cr);
 465                         if (error != 0) {
 466                                 /*
 467                                  * The send failed. We do not have to free the
 468                                  * mblks, because that is the protocol's
 469                                  * responsibility. However, uio_resid must
 470                                  * remain accurate, so adjust that here.
 471                                  */
 472                                 uiop->uio_resid = orig_resid;
 473                                         break;
 474                         }
 475                 }
 476         } while (uiop->uio_resid > 0);
 477 
 478         SO_UNBLOCK_FALLBACK(so);
 479 
 480         return (error);
 481 }
 482 
 483 int
 484 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
 485     struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
 486     boolean_t fil_inject)
 487 {
 488         int error;
 489         boolean_t dontblock;
 490         size_t size;
 491         mblk_t *mp = *mpp;
 492 
 493         if (so->so_downcalls->sd_send == NULL)
 494                 return (EOPNOTSUPP);
 495 
 496         error = 0;
 497         dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
 498             (fflag & (FNONBLOCK|FNDELAY));
 499         size = msgdsize(mp);
 500 
 501         if ((so->so_mode & SM_ATOMIC) &&
 502             size > so->so_proto_props.sopp_maxpsz &&
 503             so->so_proto_props.sopp_maxpsz != -1) {
 504                 SO_UNBLOCK_FALLBACK(so);
 505                 return (EMSGSIZE);
 506         }
 507 
 508         while (mp != NULL) {
 509                 mblk_t *nmp, *last_mblk;
 510                 size_t mlen;
 511 
 512                 if (so->so_state & SS_CANTSENDMORE) {
 513                         error = EPIPE;
 514                         break;
 515                 }
 516                 if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 517                         mutex_enter(&so->so_lock);
 518                         error = sogeterr(so, B_TRUE);
 519                         mutex_exit(&so->so_lock);
 520                         if (error != 0)
 521                                 break;
 522                 }
 523                 /* Socket filters are not flow controlled */
 524                 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
 525                         /*
 526                          * Need to wait until the protocol is ready to receive
 527                          * more data for transmission.
 528                          */
 529                         if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 530                                 break;
 531                 }
 532 
 533                 /*
 534                  * We only allow so_maxpsz of data to be sent down to
 535                  * the protocol at time.
 536                  */
 537                 mlen = MBLKL(mp);
 538                 nmp = mp->b_cont;
 539                 last_mblk = mp;
 540                 while (nmp != NULL) {
 541                         mlen += MBLKL(nmp);
 542                         if (mlen > so->so_proto_props.sopp_maxpsz) {
 543                                 last_mblk->b_cont = NULL;
 544                                 break;
 545                         }
 546                         last_mblk = nmp;
 547                         nmp = nmp->b_cont;
 548                 }
 549 
 550                 if (so->so_filter_active > 0 &&
 551                     (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
 552                     cr, &error)) == NULL) {
 553                         *mpp = mp = nmp;
 554                         if (error != 0)
 555                                 break;
 556                         continue;
 557                 }
 558                 error = (*so->so_downcalls->sd_send)
 559                     (so->so_proto_handle, mp, msg, cr);
 560                 if (error != 0) {
 561                         /*
 562                          * The send failed. The protocol will free the mblks
 563                          * that were sent down. Let the caller deal with the
 564                          * rest.
 565                          */
 566                         *mpp = nmp;
 567                         break;
 568                 }
 569 
 570                 *mpp = mp = nmp;
 571         }
 572         /* Let the filter know whether the protocol is flow controlled */
 573         if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
 574                 error = ENOSPC;
 575 
 576         return (error);
 577 }
 578 
 579 #pragma inline(so_sendmblk_impl)
 580 
 581 int
 582 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
 583     struct cred *cr, mblk_t **mpp)
 584 {
 585         int error;
 586 
 587         SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
 588 
 589         error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
 590             B_FALSE);
 591 
 592         SO_UNBLOCK_FALLBACK(so);
 593 
 594         return (error);
 595 }
 596 
 597 int
 598 so_shutdown(struct sonode *so, int how, struct cred *cr)
 599 {
 600         int error;
 601 
 602         SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
 603 
 604         /*
 605          * SunOS 4.X has no check for datagram sockets.
 606          * 5.X checks that it is connected (ENOTCONN)
 607          * X/Open requires that we check the connected state.
 608          */
 609         if (!(so->so_state & SS_ISCONNECTED)) {
 610                 if (!xnet_skip_checks) {
 611                         error = ENOTCONN;
 612                         if (xnet_check_print) {
 613                                 printf("sockfs: X/Open shutdown check "
 614                                     "caused ENOTCONN\n");
 615                         }
 616                 }
 617                 goto done;
 618         }
 619 
 620         if (so->so_filter_active == 0 ||
 621             (error = sof_filter_shutdown(so, &how, cr)) < 0)
 622                 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
 623                     how, cr));
 624 
 625         /*
 626          * Protocol agreed to shutdown. We need to flush the
 627          * receive buffer if the receive side is being shutdown.
 628          */
 629         if (error == 0 && how != SHUT_WR) {
 630                 mutex_enter(&so->so_lock);
 631                 /* wait for active reader to finish */
 632                 (void) so_lock_read(so, 0);
 633 
 634                 so_rcv_flush(so);
 635 
 636                 so_unlock_read(so);
 637                 mutex_exit(&so->so_lock);
 638         }
 639 
 640 done:
 641         SO_UNBLOCK_FALLBACK(so);
 642         return (error);
 643 }
 644 
 645 int
 646 so_getsockname(struct sonode *so, struct sockaddr *addr,
 647     socklen_t *addrlen, struct cred *cr)
 648 {
 649         int error;
 650 
 651         SO_BLOCK_FALLBACK_SAFE(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
 652 
 653         if (so->so_filter_active == 0 ||
 654             (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
 655                 error = (*so->so_downcalls->sd_getsockname)
 656                     (so->so_proto_handle, addr, addrlen, cr);
 657 
 658         SO_UNBLOCK_FALLBACK(so);
 659         return (error);
 660 }
 661 
 662 int
 663 so_getpeername(struct sonode *so, struct sockaddr *addr,
 664     socklen_t *addrlen, boolean_t accept, struct cred *cr)
 665 {
 666         int error;
 667 
 668         SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
 669 
 670         if (accept) {
 671                 error = (*so->so_downcalls->sd_getpeername)
 672                     (so->so_proto_handle, addr, addrlen, cr);
 673         } else if (!(so->so_state & SS_ISCONNECTED)) {
 674                 error = ENOTCONN;
 675         } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 676                 /* Added this check for X/Open */
 677                 error = EINVAL;
 678                 if (xnet_check_print) {
 679                         printf("sockfs: X/Open getpeername check => EINVAL\n");
 680                 }
 681         } else if (so->so_filter_active == 0 ||
 682             (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
 683                 error = (*so->so_downcalls->sd_getpeername)
 684                     (so->so_proto_handle, addr, addrlen, cr);
 685         }
 686 
 687         SO_UNBLOCK_FALLBACK(so);
 688         return (error);
 689 }
 690 
 691 int
 692 so_getsockopt(struct sonode *so, int level, int option_name,
 693     void *optval, socklen_t *optlenp, int flags, struct cred *cr)
 694 {
 695         int error = 0;
 696 
 697         if (level == SOL_FILTER)
 698                 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
 699 
 700         SO_BLOCK_FALLBACK_SAFE(so,
 701             SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
 702 
 703         if ((so->so_filter_active == 0 ||
 704             (error = sof_filter_getsockopt(so, level, option_name, optval,
 705             optlenp, cr)) < 0) &&
 706             (error = socket_getopt_common(so, level, option_name, optval,
 707             optlenp, flags)) < 0) {
 708                 error = (*so->so_downcalls->sd_getsockopt)
 709                     (so->so_proto_handle, level, option_name, optval, optlenp,
 710                     cr);
 711                 if (error ==  ENOPROTOOPT) {
 712                         if (level == SOL_SOCKET) {
 713                                 /*
 714                                  * If a protocol does not support a particular
 715                                  * socket option, set can fail (not allowed)
 716                                  * but get can not fail. This is the previous
 717                                  * sockfs bahvior.
 718                                  */
 719                                 switch (option_name) {
 720                                 case SO_LINGER:
 721                                         if (*optlenp < (t_uscalar_t)
 722                                             sizeof (struct linger)) {
 723                                                 error = EINVAL;
 724                                                 break;
 725                                         }
 726                                         error = 0;
 727                                         bzero(optval, sizeof (struct linger));
 728                                         *optlenp = sizeof (struct linger);
 729                                         break;
 730                                 case SO_RCVTIMEO:
 731                                 case SO_SNDTIMEO:
 732                                         if (*optlenp < (t_uscalar_t)
 733                                             sizeof (struct timeval)) {
 734                                                 error = EINVAL;
 735                                                 break;
 736                                         }
 737                                         error = 0;
 738                                         bzero(optval, sizeof (struct timeval));
 739                                         *optlenp = sizeof (struct timeval);
 740                                         break;
 741                                 case SO_SND_BUFINFO:
 742                                         if (*optlenp < (t_uscalar_t)
 743                                             sizeof (struct so_snd_bufinfo)) {
 744                                                 error = EINVAL;
 745                                                 break;
 746                                         }
 747                                         error = 0;
 748                                         bzero(optval,
 749                                             sizeof (struct so_snd_bufinfo));
 750                                         *optlenp =
 751                                             sizeof (struct so_snd_bufinfo);
 752                                         break;
 753                                 case SO_DEBUG:
 754                                 case SO_REUSEADDR:
 755                                 case SO_KEEPALIVE:
 756                                 case SO_DONTROUTE:
 757                                 case SO_BROADCAST:
 758                                 case SO_USELOOPBACK:
 759                                 case SO_OOBINLINE:
 760                                 case SO_DGRAM_ERRIND:
 761                                 case SO_SNDBUF:
 762                                 case SO_RCVBUF:
 763                                         error = 0;
 764                                         *((int32_t *)optval) = 0;
 765                                         *optlenp = sizeof (int32_t);
 766                                         break;
 767                                 default:
 768                                         break;
 769                                 }
 770                         }
 771                 }
 772         }
 773 
 774         SO_UNBLOCK_FALLBACK(so);
 775         return (error);
 776 }
 777 
 778 int
 779 so_setsockopt(struct sonode *so, int level, int option_name,
 780     const void *optval, socklen_t optlen, struct cred *cr)
 781 {
 782         int error = 0;
 783         struct timeval tl;
 784         const void *opt = optval;
 785 
 786         if (level == SOL_FILTER)
 787                 return (sof_setsockopt(so, option_name, optval, optlen, cr));
 788 
 789         SO_BLOCK_FALLBACK_SAFE(so,
 790             SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
 791 
 792         /* X/Open requires this check */
 793         if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
 794                 SO_UNBLOCK_FALLBACK(so);
 795                 if (xnet_check_print)
 796                         printf("sockfs: X/Open setsockopt check => EINVAL\n");
 797                 return (EINVAL);
 798         }
 799 
 800         if (so->so_filter_active > 0 &&
 801             (error = sof_filter_setsockopt(so, level, option_name,
 802             (void *)optval, &optlen, cr)) >= 0)
 803                 goto done;
 804 
 805         if (level == SOL_SOCKET) {
 806                 switch (option_name) {
 807                 case SO_RCVTIMEO:
 808                 case SO_SNDTIMEO: {
 809                         /*
 810                          * We pass down these two options to protocol in order
 811                          * to support some third part protocols which need to
 812                          * know them. For those protocols which don't care
 813                          * these two options, simply return 0.
 814                          */
 815                         clock_t t_usec;
 816 
 817                         if (get_udatamodel() == DATAMODEL_NONE ||
 818                             get_udatamodel() == DATAMODEL_NATIVE) {
 819                                 if (optlen != sizeof (struct timeval)) {
 820                                         error = EINVAL;
 821                                         goto done;
 822                                 }
 823                                 bcopy((struct timeval *)optval, &tl,
 824                                     sizeof (struct timeval));
 825                         } else {
 826                                 if (optlen != sizeof (struct timeval32)) {
 827                                         error = EINVAL;
 828                                         goto done;
 829                                 }
 830                                 TIMEVAL32_TO_TIMEVAL(&tl,
 831                                     (struct timeval32 *)optval);
 832                         }
 833                         opt = &tl;
 834                         optlen = sizeof (tl);
 835                         t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
 836                         mutex_enter(&so->so_lock);
 837                         if (option_name == SO_RCVTIMEO)
 838                                 so->so_rcvtimeo = drv_usectohz(t_usec);
 839                         else
 840                                 so->so_sndtimeo = drv_usectohz(t_usec);
 841                         mutex_exit(&so->so_lock);
 842                         break;
 843                 }
 844                 case SO_RCVBUF:
 845                         /*
 846                          * XXX XPG 4.2 applications retrieve SO_RCVBUF from
 847                          * sockfs since the transport might adjust the value
 848                          * and not return exactly what was set by the
 849                          * application.
 850                          */
 851                         so->so_xpg_rcvbuf = *(int32_t *)optval;
 852                         break;
 853                 }
 854         }
 855         error = (*so->so_downcalls->sd_setsockopt)
 856             (so->so_proto_handle, level, option_name, opt, optlen, cr);
 857 done:
 858         SO_UNBLOCK_FALLBACK(so);
 859         return (error);
 860 }
 861 
 862 int
 863 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
 864     struct cred *cr, int32_t *rvalp)
 865 {
 866         int error = 0;
 867 
 868         SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
 869 
 870         /*
 871          * If there is a pending error, return error
 872          * This can happen if a non blocking operation caused an error.
 873          */
 874         if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 875                 mutex_enter(&so->so_lock);
 876                 error = sogeterr(so, B_TRUE);
 877                 mutex_exit(&so->so_lock);
 878                 if (error != 0)
 879                         goto done;
 880         }
 881 
 882         /*
 883          * calling strioc can result in the socket falling back to TPI,
 884          * if that is supported.
 885          */
 886         if ((so->so_filter_active == 0 ||
 887             (error = sof_filter_ioctl(so, cmd, arg, mode,
 888             rvalp, cr)) < 0) &&
 889             (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
 890             (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
 891                 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
 892                     cmd, arg, mode, rvalp, cr);
 893         }
 894 
 895 done:
 896         SO_UNBLOCK_FALLBACK(so);
 897 
 898         return (error);
 899 }
 900 
 901 int
 902 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
 903     struct pollhead **phpp)
 904 {
 905         int state = so->so_state, mask;
 906         *reventsp = 0;
 907 
 908         /*
 909          * In sockets the errors are represented as input/output events
 910          */
 911         if (so->so_error != 0 &&
 912             ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
 913                 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
 914                 return (0);
 915         }
 916 
 917         /*
 918          * If the socket is in a state where it can send data
 919          * turn on POLLWRBAND and POLLOUT events.
 920          */
 921         if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
 922                 /*
 923                  * out of band data is allowed even if the connection
 924                  * is flow controlled
 925                  */
 926                 *reventsp |= POLLWRBAND & events;
 927                 if (!SO_SND_FLOWCTRLD(so)) {
 928                         /*
 929                          * As long as there is buffer to send data
 930                          * turn on POLLOUT events
 931                          */
 932                         *reventsp |= POLLOUT & events;
 933                 }
 934         }
 935 
 936         /*
 937          * Turn on POLLIN whenever there is data on the receive queue,
 938          * or the socket is in a state where no more data will be received.
 939          * Also, if the socket is accepting connections, flip the bit if
 940          * there is something on the queue.
 941          *
 942          * We do an initial check for events without holding locks. However,
 943          * if there are no event available, then we redo the check for POLLIN
 944          * events under the lock.
 945          */
 946 
 947         /* Pending connections */
 948         if (!list_is_empty(&so->so_acceptq_list))
 949                 *reventsp |= (POLLIN|POLLRDNORM) & events;
 950 
 951         /*
 952          * If we're looking for POLLRDHUP, indicate it if we have sent the
 953          * last rx signal for the socket.
 954          */
 955         if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
 956                 *reventsp |= POLLRDHUP;
 957 
 958         /* Data */
 959         /* so_downcalls is null for sctp */
 960         if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
 961                 *reventsp |= (*so->so_downcalls->sd_poll)
 962                     (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
 963                     CRED()) & events;
 964                 ASSERT((*reventsp & ~events) == 0);
 965                 /* do not recheck events */
 966                 events &= ~SO_PROTO_POLLEV;
 967         } else {
 968                 if (SO_HAVE_DATA(so))
 969                         *reventsp |= (POLLIN|POLLRDNORM) & events;
 970 
 971                 /* Urgent data */
 972                 if ((state & SS_OOBPEND) != 0) {
 973                         *reventsp |= (POLLRDBAND | POLLPRI) & events;
 974                 }
 975 
 976                 /*
 977                  * If the socket has become disconnected, we set POLLHUP.
 978                  * Note that if we are in this state, we will have set POLLIN
 979                  * (SO_HAVE_DATA() is true on a disconnected socket), but not
 980                  * POLLOUT (SS_ISCONNECTED is false).  This is in keeping with
 981                  * the semantics of POLLHUP, which is defined to be mutually
 982                  * exclusive with respect to POLLOUT but not POLLIN.  We are
 983                  * therefore setting POLLHUP primarily for the benefit of
 984                  * those not polling on POLLIN, as they have no other way of
 985                  * knowing that the socket has been disconnected.
 986                  */
 987                 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
 988 
 989                 if ((state & (mask | SS_ISCONNECTED)) == mask)
 990                         *reventsp |= POLLHUP;
 991         }
 992 
 993         if ((!*reventsp && !anyyet) || (events & POLLET)) {
 994                 /* Check for read events again, but this time under lock */
 995                 if (events & (POLLIN|POLLRDNORM)) {
 996                         mutex_enter(&so->so_lock);
 997                         if (SO_HAVE_DATA(so) ||
 998                             !list_is_empty(&so->so_acceptq_list)) {
 999                                 if (events & POLLET) {
1000                                         so->so_pollev |= SO_POLLEV_IN;
1001                                         *phpp = &so->so_poll_list;
1002                                 }
1003 
1004                                 mutex_exit(&so->so_lock);
1005                                 *reventsp |= (POLLIN|POLLRDNORM) & events;
1006 
1007                                 return (0);
1008                         } else {
1009                                 so->so_pollev |= SO_POLLEV_IN;
1010                                 mutex_exit(&so->so_lock);
1011                         }
1012                 }
1013                 *phpp = &so->so_poll_list;
1014         }
1015         return (0);
1016 }
1017 
1018 /*
1019  * Generic Upcalls
1020  */
1021 void
1022 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1023     cred_t *peer_cred, pid_t peer_cpid)
1024 {
1025         struct sonode *so = (struct sonode *)sock_handle;
1026 
1027         mutex_enter(&so->so_lock);
1028         ASSERT(so->so_proto_handle != NULL);
1029 
1030         if (peer_cred != NULL) {
1031                 if (so->so_peercred != NULL)
1032                         crfree(so->so_peercred);
1033                 crhold(peer_cred);
1034                 so->so_peercred = peer_cred;
1035                 so->so_cpid = peer_cpid;
1036         }
1037 
1038         so->so_proto_connid = id;
1039         soisconnected(so);
1040         /*
1041          * Wake ones who're waiting for conn to become established.
1042          */
1043         so_notify_connected(so);
1044 }
1045 
1046 int
1047 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1048 {
1049         struct sonode *so = (struct sonode *)sock_handle;
1050         boolean_t connect_failed;
1051 
1052         mutex_enter(&so->so_lock);
1053 
1054         /*
1055          * If we aren't currently connected, then this isn't a disconnect but
1056          * rather a failure to connect.
1057          */
1058         connect_failed = !(so->so_state & SS_ISCONNECTED);
1059 
1060         so->so_proto_connid = id;
1061         soisdisconnected(so, error);
1062         so_notify_disconnected(so, connect_failed, error);
1063 
1064         return (0);
1065 }
1066 
1067 void
1068 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1069     uintptr_t arg)
1070 {
1071         struct sonode *so = (struct sonode *)sock_handle;
1072 
1073         switch (action) {
1074         case SOCK_OPCTL_SHUT_SEND:
1075                 mutex_enter(&so->so_lock);
1076                 socantsendmore(so);
1077                 so_notify_disconnecting(so);
1078                 break;
1079         case SOCK_OPCTL_SHUT_RECV: {
1080                 mutex_enter(&so->so_lock);
1081                 socantrcvmore(so);
1082                 so_notify_eof(so);
1083                 break;
1084         }
1085         case SOCK_OPCTL_ENAB_ACCEPT:
1086                 mutex_enter(&so->so_lock);
1087                 so->so_state |= SS_ACCEPTCONN;
1088                 so->so_backlog = (unsigned int)arg;
1089                 /*
1090                  * The protocol can stop generating newconn upcalls when
1091                  * the backlog is full, so to make sure the listener does
1092                  * not end up with a queue full of deferred connections
1093                  * we reduce the backlog by one. Thus the listener will
1094                  * start closing deferred connections before the backlog
1095                  * is full.
1096                  */
1097                 if (so->so_filter_active > 0)
1098                         so->so_backlog = MAX(1, so->so_backlog - 1);
1099                 mutex_exit(&so->so_lock);
1100                 break;
1101         default:
1102                 ASSERT(0);
1103                 break;
1104         }
1105 }
1106 
1107 void
1108 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1109 {
1110         struct sonode *so = (struct sonode *)sock_handle;
1111 
1112         if (qfull) {
1113                 so_snd_qfull(so);
1114         } else {
1115                 so_snd_qnotfull(so);
1116                 mutex_enter(&so->so_lock);
1117                 /* so_notify_writable drops so_lock */
1118                 so_notify_writable(so);
1119         }
1120 }
1121 
1122 sock_upper_handle_t
1123 so_newconn(sock_upper_handle_t parenthandle,
1124     sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1125     struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1126 {
1127         struct sonode   *so = (struct sonode *)parenthandle;
1128         struct sonode   *nso;
1129         int error;
1130 
1131         ASSERT(proto_handle != NULL);
1132 
1133         if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1134             (so->so_acceptq_len >= so->so_backlog &&
1135             (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1136                         return (NULL);
1137         }
1138 
1139         nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1140             &error);
1141         if (nso == NULL)
1142                 return (NULL);
1143 
1144         if (peer_cred != NULL) {
1145                 crhold(peer_cred);
1146                 nso->so_peercred = peer_cred;
1147                 nso->so_cpid = peer_cpid;
1148         }
1149         nso->so_listener = so;
1150 
1151         /*
1152          * The new socket (nso), proto_handle and sock_upcallsp are all
1153          * valid at this point. But as soon as nso is placed in the accept
1154          * queue that can no longer be assumed (since an accept() thread may
1155          * pull it off the queue and close the socket).
1156          */
1157         *sock_upcallsp = &so_upcalls;
1158 
1159         mutex_enter(&so->so_acceptq_lock);
1160         if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1161                 mutex_exit(&so->so_acceptq_lock);
1162                 ASSERT(nso->so_count == 1);
1163                 nso->so_count--;
1164                 nso->so_listener = NULL;
1165                 /* drop proto ref */
1166                 VN_RELE(SOTOV(nso));
1167                 socket_destroy(nso);
1168                 return (NULL);
1169         } else {
1170                 so->so_acceptq_len++;
1171                 if (nso->so_state & SS_FIL_DEFER) {
1172                         list_insert_tail(&so->so_acceptq_defer, nso);
1173                         mutex_exit(&so->so_acceptq_lock);
1174                 } else {
1175                         list_insert_tail(&so->so_acceptq_list, nso);
1176                         cv_signal(&so->so_acceptq_cv);
1177                         mutex_exit(&so->so_acceptq_lock);
1178                         mutex_enter(&so->so_lock);
1179                         so_notify_newconn(so);
1180                 }
1181 
1182                 return ((sock_upper_handle_t)nso);
1183         }
1184 }
1185 
1186 void
1187 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1188 {
1189         struct sonode *so;
1190 
1191         so = (struct sonode *)sock_handle;
1192 
1193         mutex_enter(&so->so_lock);
1194 
1195         if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1196                 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1197         if (soppp->sopp_flags & SOCKOPT_WROFF)
1198                 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1199         if (soppp->sopp_flags & SOCKOPT_TAIL)
1200                 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1201         if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1202                 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1203         if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1204                 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1205         if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1206                 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1207         if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1208                 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1209         if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1210                 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1211                         so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1212                         so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1213                 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1214                         so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1215                         so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1216                 }
1217 
1218                 if (soppp->sopp_zcopyflag & COPYCACHED) {
1219                         so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1220                 }
1221         }
1222         if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1223                 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1224         if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1225                 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1226         if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1227                 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1228         if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1229                 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1230         if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1231                 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1232 
1233         mutex_exit(&so->so_lock);
1234 
1235         if (so->so_filter_active > 0) {
1236                 sof_instance_t *inst;
1237                 ssize_t maxblk;
1238                 ushort_t wroff, tail;
1239                 maxblk = so->so_proto_props.sopp_maxblk;
1240                 wroff = so->so_proto_props.sopp_wroff;
1241                 tail = so->so_proto_props.sopp_tail;
1242                 for (inst = so->so_filter_bottom; inst != NULL;
1243                     inst = inst->sofi_prev) {
1244                         if (SOF_INTERESTED(inst, mblk_prop)) {
1245                                 (*inst->sofi_ops->sofop_mblk_prop)(
1246                                     (sof_handle_t)inst, inst->sofi_cookie,
1247                                     &maxblk, &wroff, &tail);
1248                         }
1249                 }
1250                 mutex_enter(&so->so_lock);
1251                 so->so_proto_props.sopp_maxblk = maxblk;
1252                 so->so_proto_props.sopp_wroff = wroff;
1253                 so->so_proto_props.sopp_tail = tail;
1254                 mutex_exit(&so->so_lock);
1255         }
1256 #ifdef DEBUG
1257         soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1258             SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1259             SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1260             SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1261             SOCKOPT_LOOPBACK);
1262         ASSERT(soppp->sopp_flags == 0);
1263 #endif
1264 }
1265 
1266 /* ARGSUSED */
1267 ssize_t
1268 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1269     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp,
1270     sof_instance_t *filter)
1271 {
1272         boolean_t force_push = B_TRUE;
1273         int space_left;
1274         sodirect_t *sodp = so->so_direct;
1275 
1276         ASSERT(errorp != NULL);
1277         *errorp = 0;
1278         if (mp == NULL) {
1279                 if (so->so_downcalls->sd_recv_uio != NULL) {
1280                         mutex_enter(&so->so_lock);
1281                         /* the notify functions will drop the lock */
1282                         if (flags & MSG_OOB)
1283                                 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1284                         else
1285                                 so_notify_data(so, msg_size);
1286                         return (0);
1287                 }
1288                 ASSERT(msg_size == 0);
1289                 mutex_enter(&so->so_lock);
1290                 goto space_check;
1291         }
1292 
1293         ASSERT(mp->b_next == NULL);
1294         ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1295         ASSERT(msg_size == msgdsize(mp));
1296 
1297         if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1298                 /* The read pointer is not aligned correctly for TPI */
1299                 zcmn_err(getzoneid(), CE_WARN,
1300                     "sockfs: Unaligned TPI message received. rptr = %p\n",
1301                     (void *)mp->b_rptr);
1302                 freemsg(mp);
1303                 mutex_enter(&so->so_lock);
1304                 if (sodp != NULL)
1305                         SOD_UIOAFINI(sodp);
1306                 goto space_check;
1307         }
1308 
1309         if (so->so_filter_active > 0) {
1310                 for (; filter != NULL; filter = filter->sofi_prev) {
1311                         if (!SOF_INTERESTED(filter, data_in))
1312                                 continue;
1313                         mp = (*filter->sofi_ops->sofop_data_in)(
1314                             (sof_handle_t)filter, filter->sofi_cookie, mp,
1315                             flags, &msg_size);
1316                         ASSERT(msgdsize(mp) == msg_size);
1317                         DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1318                             (mblk_t *), mp);
1319                         /* Data was consumed/dropped, just do space check */
1320                         if (msg_size == 0) {
1321                                 mutex_enter(&so->so_lock);
1322                                 goto space_check;
1323                         }
1324                 }
1325         }
1326 
1327         mutex_enter(&so->so_lock);
1328         if (so->so_krecv_cb != NULL) {
1329                 boolean_t cont;
1330                 so_krecv_f func = so->so_krecv_cb;
1331                 void *arg = so->so_krecv_arg;
1332 
1333                 mutex_exit(&so->so_lock);
1334                 cont = func(so, mp, msg_size, flags & MSG_OOB, arg);
1335                 mutex_enter(&so->so_lock);
1336                 if (cont == B_TRUE) {
1337                         space_left = so->so_rcvbuf;
1338                 } else {
1339                         so->so_rcv_queued = so->so_rcvlowat;
1340                         *errorp = ENOSPC;
1341                         space_left = -1;
1342                 }
1343                 goto done_unlock;
1344         }
1345         mutex_exit(&so->so_lock);
1346 
1347         if (flags & MSG_OOB) {
1348                 so_queue_oob(so, mp, msg_size);
1349                 mutex_enter(&so->so_lock);
1350                 goto space_check;
1351         }
1352 
1353         if (force_pushp != NULL)
1354                 force_push = *force_pushp;
1355 
1356         mutex_enter(&so->so_lock);
1357         if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1358                 if (sodp != NULL)
1359                         SOD_DISABLE(sodp);
1360                 mutex_exit(&so->so_lock);
1361                 *errorp = EOPNOTSUPP;
1362                 return (-1);
1363         }
1364         if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1365                 freemsg(mp);
1366                 if (sodp != NULL)
1367                         SOD_DISABLE(sodp);
1368                 mutex_exit(&so->so_lock);
1369                 return (0);
1370         }
1371 
1372         /* process the mblk via I/OAT if capable */
1373         if (sodp != NULL && sodp->sod_enabled) {
1374                 if (DB_TYPE(mp) == M_DATA) {
1375                         sod_uioa_mblk_init(sodp, mp, msg_size);
1376                 } else {
1377                         SOD_UIOAFINI(sodp);
1378                 }
1379         }
1380 
1381         if (mp->b_next == NULL) {
1382                 so_enqueue_msg(so, mp, msg_size);
1383         } else {
1384                 do {
1385                         mblk_t *nmp;
1386 
1387                         if ((nmp = mp->b_next) != NULL) {
1388                                 mp->b_next = NULL;
1389                         }
1390                         so_enqueue_msg(so, mp, msgdsize(mp));
1391                         mp = nmp;
1392                 } while (mp != NULL);
1393         }
1394 
1395         space_left = so->so_rcvbuf - so->so_rcv_queued;
1396         if (space_left <= 0) {
1397                 so->so_flowctrld = B_TRUE;
1398                 *errorp = ENOSPC;
1399                 space_left = -1;
1400         }
1401 
1402         if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1403             so->so_rcv_queued >= so->so_rcv_wanted) {
1404                 SOCKET_TIMER_CANCEL(so);
1405                 /*
1406                  * so_notify_data will release the lock
1407                  */
1408                 so_notify_data(so, so->so_rcv_queued);
1409 
1410                 if (force_pushp != NULL)
1411                         *force_pushp = B_TRUE;
1412                 goto done;
1413         } else if (so->so_rcv_timer_tid == 0) {
1414                 /* Make sure the recv push timer is running */
1415                 SOCKET_TIMER_START(so);
1416         }
1417 
1418 done_unlock:
1419         mutex_exit(&so->so_lock);
1420 done:
1421         return (space_left);
1422 
1423 space_check:
1424         space_left = so->so_rcvbuf - so->so_rcv_queued;
1425         if (space_left <= 0) {
1426                 so->so_flowctrld = B_TRUE;
1427                 *errorp = ENOSPC;
1428                 space_left = -1;
1429         }
1430         goto done_unlock;
1431 }
1432 
1433 #pragma inline(so_queue_msg_impl)
1434 
1435 ssize_t
1436 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1437     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
1438 {
1439         struct sonode *so = (struct sonode *)sock_handle;
1440 
1441         return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1442             so->so_filter_bottom));
1443 }
1444 
1445 /*
1446  * Set the offset of where the oob data is relative to the bytes in
1447  * queued. Also generate SIGURG
1448  */
1449 void
1450 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1451 {
1452         struct sonode *so;
1453 
1454         ASSERT(offset >= 0);
1455         so = (struct sonode *)sock_handle;
1456         mutex_enter(&so->so_lock);
1457         if (so->so_direct != NULL)
1458                 SOD_UIOAFINI(so->so_direct);
1459 
1460         /*
1461          * New urgent data on the way so forget about any old
1462          * urgent data.
1463          */
1464         so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1465 
1466         /*
1467          * Record that urgent data is pending.
1468          */
1469         so->so_state |= SS_OOBPEND;
1470 
1471         if (so->so_oobmsg != NULL) {
1472                 dprintso(so, 1, ("sock: discarding old oob\n"));
1473                 freemsg(so->so_oobmsg);
1474                 so->so_oobmsg = NULL;
1475         }
1476 
1477         /*
1478          * set the offset where the urgent byte is
1479          */
1480         so->so_oobmark = so->so_rcv_queued + offset;
1481         if (so->so_oobmark == 0)
1482                 so->so_state |= SS_RCVATMARK;
1483         else
1484                 so->so_state &= ~SS_RCVATMARK;
1485 
1486         so_notify_oobsig(so);
1487 }
1488 
1489 /*
1490  * Queue the OOB byte
1491  */
1492 static void
1493 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1494 {
1495         mutex_enter(&so->so_lock);
1496         if (so->so_direct != NULL)
1497                 SOD_UIOAFINI(so->so_direct);
1498 
1499         ASSERT(mp != NULL);
1500         if (!IS_SO_OOB_INLINE(so)) {
1501                 so->so_oobmsg = mp;
1502                 so->so_state |= SS_HAVEOOBDATA;
1503         } else {
1504                 so_enqueue_msg(so, mp, len);
1505         }
1506 
1507         so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1508 }
1509 
1510 int
1511 so_close(struct sonode *so, int flag, struct cred *cr)
1512 {
1513         int error;
1514 
1515         /*
1516          * No new data will be enqueued once the CLOSING flag is set.
1517          */
1518         mutex_enter(&so->so_lock);
1519         so->so_state |= SS_CLOSING;
1520         ASSERT(so_verify_oobstate(so));
1521         so_rcv_flush(so);
1522         mutex_exit(&so->so_lock);
1523 
1524         if (so->so_filter_active > 0)
1525                 sof_sonode_closing(so);
1526 
1527         if (so->so_state & SS_ACCEPTCONN) {
1528                 /*
1529                  * We grab and release the accept lock to ensure that any
1530                  * thread about to insert a socket in so_newconn completes
1531                  * before we flush the queue. Any thread calling so_newconn
1532                  * after we drop the lock will observe the SS_CLOSING flag,
1533                  * which will stop it from inserting the socket in the queue.
1534                  */
1535                 mutex_enter(&so->so_acceptq_lock);
1536                 mutex_exit(&so->so_acceptq_lock);
1537 
1538                 so_acceptq_flush(so, B_TRUE);
1539         }
1540 
1541         error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1542         switch (error) {
1543         default:
1544                 /* Protocol made a synchronous close; remove proto ref */
1545                 VN_RELE(SOTOV(so));
1546                 break;
1547         case EINPROGRESS:
1548                 /*
1549                  * Protocol is in the process of closing, it will make a
1550                  * 'closed' upcall to remove the reference.
1551                  */
1552                 error = 0;
1553                 break;
1554         }
1555 
1556         return (error);
1557 }
1558 
1559 /*
1560  * Upcall made by the protocol when it's doing an asynchronous close. It
1561  * will drop the protocol's reference on the socket.
1562  */
1563 void
1564 so_closed(sock_upper_handle_t sock_handle)
1565 {
1566         struct sonode *so = (struct sonode *)sock_handle;
1567 
1568         VN_RELE(SOTOV(so));
1569 }
1570 
1571 void
1572 so_zcopy_notify(sock_upper_handle_t sock_handle)
1573 {
1574         struct sonode *so = (struct sonode *)sock_handle;
1575 
1576         mutex_enter(&so->so_lock);
1577         so->so_copyflag |= STZCNOTIFY;
1578         cv_broadcast(&so->so_copy_cv);
1579         mutex_exit(&so->so_lock);
1580 }
1581 
1582 void
1583 so_set_error(sock_upper_handle_t sock_handle, int error)
1584 {
1585         struct sonode *so = (struct sonode *)sock_handle;
1586 
1587         mutex_enter(&so->so_lock);
1588 
1589         soseterror(so, error);
1590 
1591         so_notify_error(so);
1592 }
1593 
1594 /*
1595  * so_recvmsg - read data from the socket
1596  *
1597  * There are two ways of obtaining data; either we ask the protocol to
1598  * copy directly into the supplied buffer, or we copy data from the
1599  * sonode's receive queue. The decision which one to use depends on
1600  * whether the protocol has a sd_recv_uio down call.
1601  */
1602 int
1603 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1604     struct cred *cr)
1605 {
1606         rval_t          rval;
1607         int             flags = 0;
1608         t_uscalar_t     controllen, namelen;
1609         int             error = 0;
1610         int ret;
1611         mblk_t          *mctlp = NULL;
1612         union T_primitives *tpr;
1613         void            *control;
1614         ssize_t         saved_resid;
1615         struct uio      *suiop;
1616 
1617         SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1618 
1619         if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1620             (so->so_mode & SM_CONNREQUIRED)) {
1621                 SO_UNBLOCK_FALLBACK(so);
1622                 return (ENOTCONN);
1623         }
1624 
1625         mutex_enter(&so->so_lock);
1626         if (so->so_krecv_cb != NULL) {
1627                 mutex_exit(&so->so_lock);
1628                 return (EOPNOTSUPP);
1629         }
1630         mutex_exit(&so->so_lock);
1631 
1632         if (msg->msg_flags & MSG_PEEK)
1633                 msg->msg_flags &= ~MSG_WAITALL;
1634 
1635         if (so->so_mode & SM_ATOMIC)
1636                 msg->msg_flags |= MSG_TRUNC;
1637 
1638         if (msg->msg_flags & MSG_OOB) {
1639                 if ((so->so_mode & SM_EXDATA) == 0) {
1640                         error = EOPNOTSUPP;
1641                 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1642                         error = (*so->so_downcalls->sd_recv_uio)
1643                             (so->so_proto_handle, uiop, msg, cr);
1644                 } else {
1645                         error = sorecvoob(so, msg, uiop, msg->msg_flags,
1646                             IS_SO_OOB_INLINE(so));
1647                 }
1648                 SO_UNBLOCK_FALLBACK(so);
1649                 return (error);
1650         }
1651 
1652         /*
1653          * If the protocol has the recv down call, then pass the request
1654          * down.
1655          */
1656         if (so->so_downcalls->sd_recv_uio != NULL) {
1657                 error = (*so->so_downcalls->sd_recv_uio)
1658                     (so->so_proto_handle, uiop, msg, cr);
1659                 SO_UNBLOCK_FALLBACK(so);
1660                 return (error);
1661         }
1662 
1663         /*
1664          * Reading data from the socket buffer
1665          */
1666         flags = msg->msg_flags;
1667         msg->msg_flags = 0;
1668 
1669         /*
1670          * Set msg_controllen and msg_namelen to zero here to make it
1671          * simpler in the cases that no control or name is returned.
1672          */
1673         controllen = msg->msg_controllen;
1674         namelen = msg->msg_namelen;
1675         msg->msg_controllen = 0;
1676         msg->msg_namelen = 0;
1677 
1678         mutex_enter(&so->so_lock);
1679         /* Set SOREADLOCKED */
1680         error = so_lock_read_intr(so,
1681             uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1682         mutex_exit(&so->so_lock);
1683         if (error) {
1684                 SO_UNBLOCK_FALLBACK(so);
1685                 return (error);
1686         }
1687 
1688         suiop = sod_rcv_init(so, flags, &uiop);
1689 retry:
1690         saved_resid = uiop->uio_resid;
1691         error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1692         if (error != 0) {
1693                 goto out;
1694         }
1695         /*
1696          * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1697          * For non-datagrams MOREDATA is used to set MSG_EOR.
1698          */
1699         ASSERT(!(rval.r_val1 & MORECTL));
1700         if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1701                 msg->msg_flags |= MSG_TRUNC;
1702         if (mctlp == NULL) {
1703                 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1704 
1705                 mutex_enter(&so->so_lock);
1706                 /* Set MSG_EOR based on MOREDATA */
1707                 if (!(rval.r_val1 & MOREDATA)) {
1708                         if (so->so_state & SS_SAVEDEOR) {
1709                                 msg->msg_flags |= MSG_EOR;
1710                                 so->so_state &= ~SS_SAVEDEOR;
1711                         }
1712                 }
1713                 /*
1714                  * If some data was received (i.e. not EOF) and the
1715                  * read/recv* has not been satisfied wait for some more.
1716                  */
1717                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1718                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1719                         mutex_exit(&so->so_lock);
1720                         flags |= MSG_NOMARK;
1721                         goto retry;
1722                 }
1723 
1724                 goto out_locked;
1725         }
1726         /* so_queue_msg has already verified length and alignment */
1727         tpr = (union T_primitives *)mctlp->b_rptr;
1728         dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1729         switch (tpr->type) {
1730         case T_DATA_IND: {
1731                 /*
1732                  * Set msg_flags to MSG_EOR based on
1733                  * MORE_flag and MOREDATA.
1734                  */
1735                 mutex_enter(&so->so_lock);
1736                 so->so_state &= ~SS_SAVEDEOR;
1737                 if (!(tpr->data_ind.MORE_flag & 1)) {
1738                         if (!(rval.r_val1 & MOREDATA))
1739                                 msg->msg_flags |= MSG_EOR;
1740                         else
1741                                 so->so_state |= SS_SAVEDEOR;
1742                 }
1743                 freemsg(mctlp);
1744                 /*
1745                  * If some data was received (i.e. not EOF) and the
1746                  * read/recv* has not been satisfied wait for some more.
1747                  */
1748                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1749                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1750                         mutex_exit(&so->so_lock);
1751                         flags |= MSG_NOMARK;
1752                         goto retry;
1753                 }
1754                 goto out_locked;
1755         }
1756         case T_UNITDATA_IND: {
1757                 void *addr;
1758                 t_uscalar_t addrlen;
1759                 void *abuf;
1760                 t_uscalar_t optlen;
1761                 void *opt;
1762 
1763                 if (namelen != 0) {
1764                         /* Caller wants source address */
1765                         addrlen = tpr->unitdata_ind.SRC_length;
1766                         addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1767                             addrlen, 1);
1768                         if (addr == NULL) {
1769                                 freemsg(mctlp);
1770                                 error = EPROTO;
1771                                 eprintsoline(so, error);
1772                                 goto out;
1773                         }
1774                         ASSERT(so->so_family != AF_UNIX);
1775                 }
1776                 optlen = tpr->unitdata_ind.OPT_length;
1777                 if (optlen != 0) {
1778                         t_uscalar_t ncontrollen;
1779 
1780                         /*
1781                          * Extract any source address option.
1782                          * Determine how large cmsg buffer is needed.
1783                          */
1784                         opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1785                             optlen, __TPI_ALIGN_SIZE);
1786 
1787                         if (opt == NULL) {
1788                                 freemsg(mctlp);
1789                                 error = EPROTO;
1790                                 eprintsoline(so, error);
1791                                 goto out;
1792                         }
1793                         if (so->so_family == AF_UNIX)
1794                                 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1795                         ncontrollen = so_cmsglen(mctlp, opt, optlen,
1796                             !(flags & MSG_XPG4_2));
1797                         if (controllen != 0)
1798                                 controllen = ncontrollen;
1799                         else if (ncontrollen != 0)
1800                                 msg->msg_flags |= MSG_CTRUNC;
1801                 } else {
1802                         controllen = 0;
1803                 }
1804 
1805                 if (namelen != 0) {
1806                         /*
1807                          * Return address to caller.
1808                          * Caller handles truncation if length
1809                          * exceeds msg_namelen.
1810                          * NOTE: AF_UNIX NUL termination is ensured by
1811                          * the sender's copyin_name().
1812                          */
1813                         abuf = kmem_alloc(addrlen, KM_SLEEP);
1814 
1815                         bcopy(addr, abuf, addrlen);
1816                         msg->msg_name = abuf;
1817                         msg->msg_namelen = addrlen;
1818                 }
1819 
1820                 if (controllen != 0) {
1821                         /*
1822                          * Return control msg to caller.
1823                          * Caller handles truncation if length
1824                          * exceeds msg_controllen.
1825                          */
1826                         control = kmem_zalloc(controllen, KM_SLEEP);
1827 
1828                         error = so_opt2cmsg(mctlp, opt, optlen,
1829                             !(flags & MSG_XPG4_2), control, controllen);
1830                         if (error) {
1831                                 freemsg(mctlp);
1832                                 if (msg->msg_namelen != 0)
1833                                         kmem_free(msg->msg_name,
1834                                             msg->msg_namelen);
1835                                 kmem_free(control, controllen);
1836                                 eprintsoline(so, error);
1837                                 goto out;
1838                         }
1839                         msg->msg_control = control;
1840                         msg->msg_controllen = controllen;
1841                 }
1842 
1843                 freemsg(mctlp);
1844                 goto out;
1845         }
1846         case T_OPTDATA_IND: {
1847                 struct T_optdata_req *tdr;
1848                 void *opt;
1849                 t_uscalar_t optlen;
1850 
1851                 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1852                 optlen = tdr->OPT_length;
1853                 if (optlen != 0) {
1854                         t_uscalar_t ncontrollen;
1855                         /*
1856                          * Determine how large cmsg buffer is needed.
1857                          */
1858                         opt = sogetoff(mctlp,
1859                             tpr->optdata_ind.OPT_offset, optlen,
1860                             __TPI_ALIGN_SIZE);
1861 
1862                         if (opt == NULL) {
1863                                 freemsg(mctlp);
1864                                 error = EPROTO;
1865                                 eprintsoline(so, error);
1866                                 goto out;
1867                         }
1868 
1869                         ncontrollen = so_cmsglen(mctlp, opt, optlen,
1870                             !(flags & MSG_XPG4_2));
1871                         if (controllen != 0)
1872                                 controllen = ncontrollen;
1873                         else if (ncontrollen != 0)
1874                                 msg->msg_flags |= MSG_CTRUNC;
1875                 } else {
1876                         controllen = 0;
1877                 }
1878 
1879                 if (controllen != 0) {
1880                         /*
1881                          * Return control msg to caller.
1882                          * Caller handles truncation if length
1883                          * exceeds msg_controllen.
1884                          */
1885                         control = kmem_zalloc(controllen, KM_SLEEP);
1886 
1887                         error = so_opt2cmsg(mctlp, opt, optlen,
1888                             !(flags & MSG_XPG4_2), control, controllen);
1889                         if (error) {
1890                                 freemsg(mctlp);
1891                                 kmem_free(control, controllen);
1892                                 eprintsoline(so, error);
1893                                 goto out;
1894                         }
1895                         msg->msg_control = control;
1896                         msg->msg_controllen = controllen;
1897                 }
1898 
1899                 /*
1900                  * Set msg_flags to MSG_EOR based on
1901                  * DATA_flag and MOREDATA.
1902                  */
1903                 mutex_enter(&so->so_lock);
1904                 so->so_state &= ~SS_SAVEDEOR;
1905                 if (!(tpr->data_ind.MORE_flag & 1)) {
1906                         if (!(rval.r_val1 & MOREDATA))
1907                                 msg->msg_flags |= MSG_EOR;
1908                         else
1909                                 so->so_state |= SS_SAVEDEOR;
1910                 }
1911                 freemsg(mctlp);
1912                 /*
1913                  * If some data was received (i.e. not EOF) and the
1914                  * read/recv* has not been satisfied wait for some more.
1915                  * Not possible to wait if control info was received.
1916                  */
1917                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1918                     controllen == 0 &&
1919                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1920                         mutex_exit(&so->so_lock);
1921                         flags |= MSG_NOMARK;
1922                         goto retry;
1923                 }
1924                 goto out_locked;
1925         }
1926         default:
1927                 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1928                     tpr->type);
1929                 freemsg(mctlp);
1930                 error = EPROTO;
1931                 ASSERT(0);
1932         }
1933 out:
1934         mutex_enter(&so->so_lock);
1935 out_locked:
1936         ret = sod_rcv_done(so, suiop, uiop);
1937         if (ret != 0 && error == 0)
1938                 error = ret;
1939 
1940         so_unlock_read(so);     /* Clear SOREADLOCKED */
1941         mutex_exit(&so->so_lock);
1942 
1943         SO_UNBLOCK_FALLBACK(so);
1944 
1945         return (error);
1946 }
1947 
1948 sonodeops_t so_sonodeops = {
1949         so_init,                /* sop_init     */
1950         so_accept,              /* sop_accept   */
1951         so_bind,                /* sop_bind     */
1952         so_listen,              /* sop_listen   */
1953         so_connect,             /* sop_connect  */
1954         so_recvmsg,             /* sop_recvmsg  */
1955         so_sendmsg,             /* sop_sendmsg  */
1956         so_sendmblk,            /* sop_sendmblk */
1957         so_getpeername,         /* sop_getpeername */
1958         so_getsockname,         /* sop_getsockname */
1959         so_shutdown,            /* sop_shutdown */
1960         so_getsockopt,          /* sop_getsockopt */
1961         so_setsockopt,          /* sop_setsockopt */
1962         so_ioctl,               /* sop_ioctl    */
1963         so_poll,                /* sop_poll     */
1964         so_close,               /* sop_close */
1965 };
1966 
1967 sock_upcalls_t so_upcalls = {
1968         so_newconn,
1969         so_connected,
1970         so_disconnected,
1971         so_opctl,
1972         so_queue_msg,
1973         so_set_prop,
1974         so_txq_full,
1975         so_signal_oob,
1976         so_zcopy_notify,
1977         so_set_error,
1978         so_closed
1979 };