Print this page
    
OS-4865 lxbrand async socket errors catch programs off guard
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4213 lxbrand should be able to set TCP_DEFER_ACCEPT after other socket operations
OS-3893 sendfile compat checks shouldn't be done in so_sendmblk
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
          +++ new/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright (c) 2014, Joyent, Inc.  All rights reserved.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/systm.h>
  33   33  #include <sys/sysmacros.h>
  34   34  #include <sys/debug.h>
  35   35  #include <sys/cmn_err.h>
  36   36  
  37   37  #include <sys/stropts.h>
  38   38  #include <sys/socket.h>
  39   39  #include <sys/socketvar.h>
  40   40  
  41   41  #define _SUN_TPI_VERSION        2
  42   42  #include <sys/tihdr.h>
  43   43  #include <sys/sockio.h>
  44   44  #include <sys/kmem_impl.h>
  45   45  
  46   46  #include <sys/strsubr.h>
  47   47  #include <sys/strsun.h>
  48   48  #include <sys/ddi.h>
  49   49  #include <netinet/in.h>
  50   50  #include <inet/ip.h>
  51   51  
  52   52  #include <fs/sockfs/sockcommon.h>
  53   53  #include <fs/sockfs/sockfilter_impl.h>
  54   54  
  55   55  #include <sys/socket_proto.h>
  56   56  
  57   57  #include <fs/sockfs/socktpi_impl.h>
  58   58  #include <fs/sockfs/sodirect.h>
  59   59  #include <sys/tihdr.h>
  60   60  #include <fs/sockfs/nl7c.h>
  61   61  
  62   62  extern int xnet_skip_checks;
  63   63  extern int xnet_check_print;
  64   64  
  65   65  static void so_queue_oob(struct sonode *, mblk_t *, size_t);
  66   66  
  67   67  
  68   68  /*ARGSUSED*/
  69   69  int
  70   70  so_accept_notsupp(struct sonode *lso, int fflag,
  71   71      struct cred *cr, struct sonode **nsop)
  72   72  {
  73   73          return (EOPNOTSUPP);
  74   74  }
  75   75  
  76   76  /*ARGSUSED*/
  77   77  int
  78   78  so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
  79   79  {
  80   80          return (EOPNOTSUPP);
  81   81  }
  82   82  
  83   83  /*ARGSUSED*/
  84   84  int
  85   85  so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
  86   86      socklen_t *len, struct cred *cr)
  87   87  {
  88   88          return (EOPNOTSUPP);
  89   89  }
  90   90  
  91   91  /*ARGSUSED*/
  92   92  int
  93   93  so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
  94   94      socklen_t *addrlen, boolean_t accept, struct cred *cr)
  95   95  {
  96   96          return (EOPNOTSUPP);
  97   97  }
  98   98  
  99   99  /*ARGSUSED*/
 100  100  int
 101  101  so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
 102  102  {
 103  103          return (EOPNOTSUPP);
 104  104  }
 105  105  
 106  106  /*ARGSUSED*/
 107  107  int
 108  108  so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
 109  109      struct cred *cr, mblk_t **mpp)
 110  110  {
 111  111          return (EOPNOTSUPP);
 112  112  }
 113  113  
 114  114  /*
 115  115   * Generic Socket Ops
 116  116   */
 117  117  
 118  118  /* ARGSUSED */
 119  119  int
 120  120  so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
  
    | 
      ↓ open down ↓ | 
    120 lines elided | 
    
      ↑ open up ↑ | 
  
 121  121  {
 122  122          return (socket_init_common(so, pso, flags, cr));
 123  123  }
 124  124  
 125  125  int
 126  126  so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
 127  127      int flags, struct cred *cr)
 128  128  {
 129  129          int error;
 130  130  
 131      -        SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
      131 +        SO_BLOCK_FALLBACK_SAFE(so, SOP_BIND(so, name, namelen, flags, cr));
 132  132  
 133  133          ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
 134  134  
 135  135          /* X/Open requires this check */
 136  136          if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 137  137                  if (xnet_check_print) {
 138  138                          printf("sockfs: X/Open bind state check "
 139  139                              "caused EINVAL\n");
 140  140                  }
 141  141                  error = EINVAL;
 142  142                  goto done;
 143  143          }
 144  144  
 145  145          /*
 146  146           * a bind to a NULL address is interpreted as unbind. So just
 147  147           * do the downcall.
 148  148           */
 149  149          if (name == NULL)
 150  150                  goto dobind;
 151  151  
 152  152          switch (so->so_family) {
 153  153          case AF_INET:
 154  154                  if ((size_t)namelen != sizeof (sin_t)) {
 155  155                          error = name->sa_family != so->so_family ?
 156  156                              EAFNOSUPPORT : EINVAL;
 157  157                          eprintsoline(so, error);
 158  158                          goto done;
 159  159                  }
 160  160  
 161  161                  if ((flags & _SOBIND_XPG4_2) &&
 162  162                      (name->sa_family != so->so_family)) {
 163  163                          /*
 164  164                           * This check has to be made for X/Open
 165  165                           * sockets however application failures have
 166  166                           * been observed when it is applied to
 167  167                           * all sockets.
 168  168                           */
 169  169                          error = EAFNOSUPPORT;
 170  170                          eprintsoline(so, error);
 171  171                          goto done;
 172  172                  }
 173  173                  /*
 174  174                   * Force a zero sa_family to match so_family.
 175  175                   *
 176  176                   * Some programs like inetd(1M) don't set the
 177  177                   * family field. Other programs leave
 178  178                   * sin_family set to garbage - SunOS 4.X does
 179  179                   * not check the family field on a bind.
 180  180                   * We use the family field that
 181  181                   * was passed in to the socket() call.
 182  182                   */
 183  183                  name->sa_family = so->so_family;
 184  184                  break;
 185  185  
 186  186          case AF_INET6: {
 187  187  #ifdef DEBUG
 188  188                  sin6_t *sin6 = (sin6_t *)name;
 189  189  #endif
 190  190                  if ((size_t)namelen != sizeof (sin6_t)) {
 191  191                          error = name->sa_family != so->so_family ?
 192  192                              EAFNOSUPPORT : EINVAL;
 193  193                          eprintsoline(so, error);
 194  194                          goto done;
 195  195                  }
 196  196  
 197  197                  if (name->sa_family != so->so_family) {
 198  198                          /*
 199  199                           * With IPv6 we require the family to match
 200  200                           * unlike in IPv4.
 201  201                           */
 202  202                          error = EAFNOSUPPORT;
 203  203                          eprintsoline(so, error);
 204  204                          goto done;
 205  205                  }
 206  206  #ifdef DEBUG
 207  207                  /*
 208  208                   * Verify that apps don't forget to clear
 209  209                   * sin6_scope_id etc
 210  210                   */
 211  211                  if (sin6->sin6_scope_id != 0 &&
 212  212                      !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
 213  213                          zcmn_err(getzoneid(), CE_WARN,
 214  214                              "bind with uninitialized sin6_scope_id "
 215  215                              "(%d) on socket. Pid = %d\n",
 216  216                              (int)sin6->sin6_scope_id,
 217  217                              (int)curproc->p_pid);
 218  218                  }
 219  219                  if (sin6->__sin6_src_id != 0) {
 220  220                          zcmn_err(getzoneid(), CE_WARN,
 221  221                              "bind with uninitialized __sin6_src_id "
 222  222                              "(%d) on socket. Pid = %d\n",
 223  223                              (int)sin6->__sin6_src_id,
 224  224                              (int)curproc->p_pid);
 225  225                  }
 226  226  #endif /* DEBUG */
 227  227  
 228  228                  break;
 229  229          }
 230  230          default:
 231  231                  /* Just pass the request to the protocol */
 232  232                  goto dobind;
 233  233          }
 234  234  
 235  235          /*
 236  236           * First we check if either NCA or KSSL has been enabled for
 237  237           * the requested address, and if so, we fall back to TPI.
 238  238           * If neither of those two services are enabled, then we just
 239  239           * pass the request to the protocol.
 240  240           *
 241  241           * Note that KSSL can only be enabled on a socket if NCA is NOT
 242  242           * enabled for that socket, hence the else-statement below.
 243  243           */
 244  244          if (nl7c_enabled && ((so->so_family == AF_INET ||
 245  245              so->so_family == AF_INET6) &&
 246  246              nl7c_lookup_addr(name, namelen) != NULL)) {
 247  247                  /*
 248  248                   * NL7C is not supported in non-global zones,
 249  249                   * we enforce this restriction here.
 250  250                   */
 251  251                  if (so->so_zoneid == GLOBAL_ZONEID) {
 252  252                          /* NCA should be used, so fall back to TPI */
 253  253                          error = so_tpi_fallback(so, cr);
 254  254                          SO_UNBLOCK_FALLBACK(so);
 255  255                          if (error)
 256  256                                  return (error);
 257  257                          else
 258  258                                  return (SOP_BIND(so, name, namelen, flags, cr));
 259  259                  }
 260  260          }
 261  261  
 262  262  dobind:
 263  263          if (so->so_filter_active == 0 ||
 264  264              (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
 265  265                  error = (*so->so_downcalls->sd_bind)
 266  266                      (so->so_proto_handle, name, namelen, cr);
 267  267          }
 268  268  done:
 269  269          SO_UNBLOCK_FALLBACK(so);
 270  270  
 271  271          return (error);
 272  272  }
 273  273  
 274  274  int
 275  275  so_listen(struct sonode *so, int backlog, struct cred *cr)
 276  276  {
 277  277          int     error = 0;
 278  278  
 279  279          ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 280  280          SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
 281  281  
 282  282          if ((so)->so_filter_active == 0 ||
 283  283              (error = sof_filter_listen(so, &backlog, cr)) < 0)
 284  284                  error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
 285  285                      backlog, cr);
 286  286  
 287  287          SO_UNBLOCK_FALLBACK(so);
 288  288  
 289  289          return (error);
 290  290  }
 291  291  
 292  292  
 293  293  int
 294  294  so_connect(struct sonode *so, struct sockaddr *name,
 295  295      socklen_t namelen, int fflag, int flags, struct cred *cr)
 296  296  {
 297  297          int error = 0;
  
    | 
      ↓ open down ↓ | 
    156 lines elided | 
    
      ↑ open up ↑ | 
  
 298  298          sock_connid_t id;
 299  299  
 300  300          ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 301  301          SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
 302  302  
 303  303          /*
 304  304           * If there is a pending error, return error
 305  305           * This can happen if a non blocking operation caused an error.
 306  306           */
 307  307  
 308      -        if (so->so_error != 0) {
      308 +        if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 309  309                  mutex_enter(&so->so_lock);
 310  310                  error = sogeterr(so, B_TRUE);
 311  311                  mutex_exit(&so->so_lock);
 312  312                  if (error != 0)
 313  313                          goto done;
 314  314          }
 315  315  
 316  316          if (so->so_filter_active == 0 ||
 317  317              (error = sof_filter_connect(so, (struct sockaddr *)name,
 318  318              &namelen, cr)) < 0) {
 319  319                  error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
 320  320                      name, namelen, &id, cr);
 321  321  
 322  322                  if (error == EINPROGRESS)
 323  323                          error = so_wait_connected(so,
 324  324                              fflag & (FNONBLOCK|FNDELAY), id);
 325  325          }
 326  326  done:
 327  327          SO_UNBLOCK_FALLBACK(so);
 328  328          return (error);
 329  329  }
 330  330  
 331  331  /*ARGSUSED*/
 332  332  int
 333  333  so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
 334  334  {
 335  335          int error = 0;
 336  336          struct sonode *nso;
 337  337  
 338  338          *nsop = NULL;
 339  339  
 340  340          SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
 341  341          if ((so->so_state & SS_ACCEPTCONN) == 0) {
 342  342                  SO_UNBLOCK_FALLBACK(so);
 343  343                  return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
 344  344                      EOPNOTSUPP : EINVAL);
 345  345          }
 346  346  
 347  347          if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
 348  348              &nso)) == 0) {
 349  349                  ASSERT(nso != NULL);
 350  350  
 351  351                  /* finish the accept */
 352  352                  if ((so->so_filter_active > 0 &&
 353  353                      (error = sof_filter_accept(nso, cr)) > 0) ||
 354  354                      (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
 355  355                      nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
 356  356                          (void) socket_close(nso, 0, cr);
 357  357                          socket_destroy(nso);
 358  358                  } else {
 359  359                          *nsop = nso;
 360  360                  }
 361  361          }
 362  362  
 363  363          SO_UNBLOCK_FALLBACK(so);
 364  364          return (error);
 365  365  }
 366  366  
 367  367  int
 368  368  so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
 369  369      struct cred *cr)
 370  370  {
 371  371          int error, flags;
 372  372          boolean_t dontblock;
 373  373          ssize_t orig_resid;
 374  374          mblk_t  *mp;
 375  375  
 376  376          SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
 377  377  
 378  378          flags = msg->msg_flags;
 379  379          error = 0;
 380  380          dontblock = (flags & MSG_DONTWAIT) ||
 381  381              (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
 382  382  
 383  383          if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
 384  384                  /*
 385  385                   * Old way of passing fd's is not supported
 386  386                   */
 387  387                  SO_UNBLOCK_FALLBACK(so);
 388  388                  return (EOPNOTSUPP);
 389  389          }
 390  390  
 391  391          if ((so->so_mode & SM_ATOMIC) &&
 392  392              uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
 393  393              so->so_proto_props.sopp_maxpsz != -1) {
 394  394                  SO_UNBLOCK_FALLBACK(so);
 395  395                  return (EMSGSIZE);
 396  396          }
  
    | 
      ↓ open down ↓ | 
    78 lines elided | 
    
      ↑ open up ↑ | 
  
 397  397  
 398  398          /*
 399  399           * For atomic sends we will only do one iteration.
 400  400           */
 401  401          do {
 402  402                  if (so->so_state & SS_CANTSENDMORE) {
 403  403                          error = EPIPE;
 404  404                          break;
 405  405                  }
 406  406  
 407      -                if (so->so_error != 0) {
      407 +                if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 408  408                          mutex_enter(&so->so_lock);
 409  409                          error = sogeterr(so, B_TRUE);
 410  410                          mutex_exit(&so->so_lock);
 411  411                          if (error != 0)
 412  412                                  break;
 413  413                  }
 414  414  
 415  415                  /*
 416  416                   * Send down OOB messages even if the send path is being
 417  417                   * flow controlled (assuming the protocol supports OOB data).
 418  418                   */
 419  419                  if (flags & MSG_OOB) {
 420  420                          if ((so->so_mode & SM_EXDATA) == 0) {
 421  421                                  error = EOPNOTSUPP;
 422  422                                  break;
 423  423                          }
 424  424                  } else if (SO_SND_FLOWCTRLD(so)) {
 425  425                          /*
 426  426                           * Need to wait until the protocol is ready to receive
 427  427                           * more data for transmission.
 428  428                           */
 429  429                          if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 430  430                                  break;
 431  431                  }
 432  432  
 433  433                  /*
 434  434                   * Time to send data to the protocol. We either copy the
 435  435                   * data into mblks or pass the uio directly to the protocol.
 436  436                   * We decide what to do based on the available down calls.
 437  437                   */
 438  438                  if (so->so_downcalls->sd_send_uio != NULL) {
 439  439                          error = (*so->so_downcalls->sd_send_uio)
 440  440                              (so->so_proto_handle, uiop, msg, cr);
 441  441                          if (error != 0)
 442  442                                  break;
 443  443                  } else {
 444  444                          /* save the resid in case of failure */
 445  445                          orig_resid = uiop->uio_resid;
 446  446  
 447  447                          if ((mp = socopyinuio(uiop,
 448  448                              so->so_proto_props.sopp_maxpsz,
 449  449                              so->so_proto_props.sopp_wroff,
 450  450                              so->so_proto_props.sopp_maxblk,
 451  451                              so->so_proto_props.sopp_tail, &error)) == NULL) {
 452  452                                  break;
 453  453                          }
 454  454                          ASSERT(uiop->uio_resid >= 0);
 455  455  
 456  456                          if (so->so_filter_active > 0 &&
 457  457                              ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
 458  458                              &error)) == NULL)) {
 459  459                                  if (error != 0)
 460  460                                          break;
 461  461                                  continue;
 462  462                          }
 463  463                          error = (*so->so_downcalls->sd_send)
 464  464                              (so->so_proto_handle, mp, msg, cr);
 465  465                          if (error != 0) {
 466  466                                  /*
 467  467                                   * The send failed. We do not have to free the
 468  468                                   * mblks, because that is the protocol's
 469  469                                   * responsibility. However, uio_resid must
 470  470                                   * remain accurate, so adjust that here.
 471  471                                   */
 472  472                                  uiop->uio_resid = orig_resid;
 473  473                                          break;
 474  474                          }
 475  475                  }
 476  476          } while (uiop->uio_resid > 0);
 477  477  
 478  478          SO_UNBLOCK_FALLBACK(so);
 479  479  
 480  480          return (error);
 481  481  }
 482  482  
 483  483  int
 484  484  so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
 485  485      struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
 486  486      boolean_t fil_inject)
 487  487  {
 488  488          int error;
 489  489          boolean_t dontblock;
 490  490          size_t size;
 491  491          mblk_t *mp = *mpp;
 492  492  
 493  493          if (so->so_downcalls->sd_send == NULL)
 494  494                  return (EOPNOTSUPP);
 495  495  
 496  496          error = 0;
 497  497          dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
 498  498              (fflag & (FNONBLOCK|FNDELAY));
 499  499          size = msgdsize(mp);
 500  500  
 501  501          if ((so->so_mode & SM_ATOMIC) &&
 502  502              size > so->so_proto_props.sopp_maxpsz &&
 503  503              so->so_proto_props.sopp_maxpsz != -1) {
 504  504                  SO_UNBLOCK_FALLBACK(so);
 505  505                  return (EMSGSIZE);
  
    | 
      ↓ open down ↓ | 
    88 lines elided | 
    
      ↑ open up ↑ | 
  
 506  506          }
 507  507  
 508  508          while (mp != NULL) {
 509  509                  mblk_t *nmp, *last_mblk;
 510  510                  size_t mlen;
 511  511  
 512  512                  if (so->so_state & SS_CANTSENDMORE) {
 513  513                          error = EPIPE;
 514  514                          break;
 515  515                  }
 516      -                if (so->so_error != 0) {
      516 +                if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 517  517                          mutex_enter(&so->so_lock);
 518  518                          error = sogeterr(so, B_TRUE);
 519  519                          mutex_exit(&so->so_lock);
 520  520                          if (error != 0)
 521  521                                  break;
 522  522                  }
 523  523                  /* Socket filters are not flow controlled */
 524  524                  if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
 525  525                          /*
 526  526                           * Need to wait until the protocol is ready to receive
 527  527                           * more data for transmission.
 528  528                           */
 529  529                          if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 530  530                                  break;
 531  531                  }
 532  532  
 533  533                  /*
 534  534                   * We only allow so_maxpsz of data to be sent down to
 535  535                   * the protocol at time.
 536  536                   */
 537  537                  mlen = MBLKL(mp);
 538  538                  nmp = mp->b_cont;
 539  539                  last_mblk = mp;
 540  540                  while (nmp != NULL) {
 541  541                          mlen += MBLKL(nmp);
 542  542                          if (mlen > so->so_proto_props.sopp_maxpsz) {
 543  543                                  last_mblk->b_cont = NULL;
 544  544                                  break;
 545  545                          }
 546  546                          last_mblk = nmp;
 547  547                          nmp = nmp->b_cont;
 548  548                  }
 549  549  
 550  550                  if (so->so_filter_active > 0 &&
 551  551                      (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
 552  552                      cr, &error)) == NULL) {
 553  553                          *mpp = mp = nmp;
 554  554                          if (error != 0)
 555  555                                  break;
 556  556                          continue;
 557  557                  }
 558  558                  error = (*so->so_downcalls->sd_send)
 559  559                      (so->so_proto_handle, mp, msg, cr);
 560  560                  if (error != 0) {
 561  561                          /*
 562  562                           * The send failed. The protocol will free the mblks
 563  563                           * that were sent down. Let the caller deal with the
 564  564                           * rest.
 565  565                           */
 566  566                          *mpp = nmp;
 567  567                          break;
 568  568                  }
 569  569  
 570  570                  *mpp = mp = nmp;
 571  571          }
 572  572          /* Let the filter know whether the protocol is flow controlled */
 573  573          if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
 574  574                  error = ENOSPC;
 575  575  
 576  576          return (error);
 577  577  }
 578  578  
  
    | 
      ↓ open down ↓ | 
    52 lines elided | 
    
      ↑ open up ↑ | 
  
 579  579  #pragma inline(so_sendmblk_impl)
 580  580  
 581  581  int
 582  582  so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
 583  583      struct cred *cr, mblk_t **mpp)
 584  584  {
 585  585          int error;
 586  586  
 587  587          SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
 588  588  
 589      -        if ((so->so_mode & SM_SENDFILESUPP) == 0) {
 590      -                SO_UNBLOCK_FALLBACK(so);
 591      -                return (EOPNOTSUPP);
 592      -        }
 593      -
 594  589          error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
 595  590              B_FALSE);
 596  591  
 597  592          SO_UNBLOCK_FALLBACK(so);
 598  593  
 599  594          return (error);
 600  595  }
 601  596  
 602  597  int
 603  598  so_shutdown(struct sonode *so, int how, struct cred *cr)
 604  599  {
 605  600          int error;
 606  601  
 607  602          SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
 608  603  
 609  604          /*
 610  605           * SunOS 4.X has no check for datagram sockets.
 611  606           * 5.X checks that it is connected (ENOTCONN)
 612  607           * X/Open requires that we check the connected state.
 613  608           */
 614  609          if (!(so->so_state & SS_ISCONNECTED)) {
 615  610                  if (!xnet_skip_checks) {
 616  611                          error = ENOTCONN;
 617  612                          if (xnet_check_print) {
 618  613                                  printf("sockfs: X/Open shutdown check "
 619  614                                      "caused ENOTCONN\n");
 620  615                          }
 621  616                  }
 622  617                  goto done;
 623  618          }
 624  619  
 625  620          if (so->so_filter_active == 0 ||
 626  621              (error = sof_filter_shutdown(so, &how, cr)) < 0)
 627  622                  error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
 628  623                      how, cr));
 629  624  
 630  625          /*
 631  626           * Protocol agreed to shutdown. We need to flush the
 632  627           * receive buffer if the receive side is being shutdown.
 633  628           */
 634  629          if (error == 0 && how != SHUT_WR) {
 635  630                  mutex_enter(&so->so_lock);
 636  631                  /* wait for active reader to finish */
 637  632                  (void) so_lock_read(so, 0);
 638  633  
 639  634                  so_rcv_flush(so);
 640  635  
 641  636                  so_unlock_read(so);
 642  637                  mutex_exit(&so->so_lock);
 643  638          }
 644  639  
 645  640  done:
  
    | 
      ↓ open down ↓ | 
    42 lines elided | 
    
      ↑ open up ↑ | 
  
 646  641          SO_UNBLOCK_FALLBACK(so);
 647  642          return (error);
 648  643  }
 649  644  
 650  645  int
 651  646  so_getsockname(struct sonode *so, struct sockaddr *addr,
 652  647      socklen_t *addrlen, struct cred *cr)
 653  648  {
 654  649          int error;
 655  650  
 656      -        SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
      651 +        SO_BLOCK_FALLBACK_SAFE(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
 657  652  
 658  653          if (so->so_filter_active == 0 ||
 659  654              (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
 660  655                  error = (*so->so_downcalls->sd_getsockname)
 661  656                      (so->so_proto_handle, addr, addrlen, cr);
 662  657  
 663  658          SO_UNBLOCK_FALLBACK(so);
 664  659          return (error);
 665  660  }
 666  661  
 667  662  int
 668  663  so_getpeername(struct sonode *so, struct sockaddr *addr,
 669  664      socklen_t *addrlen, boolean_t accept, struct cred *cr)
 670  665  {
 671  666          int error;
 672  667  
 673  668          SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
 674  669  
 675  670          if (accept) {
 676  671                  error = (*so->so_downcalls->sd_getpeername)
 677  672                      (so->so_proto_handle, addr, addrlen, cr);
 678  673          } else if (!(so->so_state & SS_ISCONNECTED)) {
 679  674                  error = ENOTCONN;
 680  675          } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 681  676                  /* Added this check for X/Open */
 682  677                  error = EINVAL;
 683  678                  if (xnet_check_print) {
 684  679                          printf("sockfs: X/Open getpeername check => EINVAL\n");
 685  680                  }
 686  681          } else if (so->so_filter_active == 0 ||
 687  682              (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
 688  683                  error = (*so->so_downcalls->sd_getpeername)
 689  684                      (so->so_proto_handle, addr, addrlen, cr);
 690  685          }
 691  686  
 692  687          SO_UNBLOCK_FALLBACK(so);
 693  688          return (error);
 694  689  }
  
    | 
      ↓ open down ↓ | 
    28 lines elided | 
    
      ↑ open up ↑ | 
  
 695  690  
 696  691  int
 697  692  so_getsockopt(struct sonode *so, int level, int option_name,
 698  693      void *optval, socklen_t *optlenp, int flags, struct cred *cr)
 699  694  {
 700  695          int error = 0;
 701  696  
 702  697          if (level == SOL_FILTER)
 703  698                  return (sof_getsockopt(so, option_name, optval, optlenp, cr));
 704  699  
 705      -        SO_BLOCK_FALLBACK(so,
      700 +        SO_BLOCK_FALLBACK_SAFE(so,
 706  701              SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
 707  702  
 708  703          if ((so->so_filter_active == 0 ||
 709  704              (error = sof_filter_getsockopt(so, level, option_name, optval,
 710  705              optlenp, cr)) < 0) &&
 711  706              (error = socket_getopt_common(so, level, option_name, optval,
 712  707              optlenp, flags)) < 0) {
 713  708                  error = (*so->so_downcalls->sd_getsockopt)
 714  709                      (so->so_proto_handle, level, option_name, optval, optlenp,
 715  710                      cr);
 716  711                  if (error ==  ENOPROTOOPT) {
 717  712                          if (level == SOL_SOCKET) {
 718  713                                  /*
 719  714                                   * If a protocol does not support a particular
 720  715                                   * socket option, set can fail (not allowed)
 721  716                                   * but get can not fail. This is the previous
 722  717                                   * sockfs bahvior.
 723  718                                   */
 724  719                                  switch (option_name) {
 725  720                                  case SO_LINGER:
 726  721                                          if (*optlenp < (t_uscalar_t)
 727  722                                              sizeof (struct linger)) {
 728  723                                                  error = EINVAL;
 729  724                                                  break;
 730  725                                          }
 731  726                                          error = 0;
 732  727                                          bzero(optval, sizeof (struct linger));
 733  728                                          *optlenp = sizeof (struct linger);
 734  729                                          break;
 735  730                                  case SO_RCVTIMEO:
 736  731                                  case SO_SNDTIMEO:
 737  732                                          if (*optlenp < (t_uscalar_t)
 738  733                                              sizeof (struct timeval)) {
 739  734                                                  error = EINVAL;
 740  735                                                  break;
 741  736                                          }
 742  737                                          error = 0;
 743  738                                          bzero(optval, sizeof (struct timeval));
 744  739                                          *optlenp = sizeof (struct timeval);
 745  740                                          break;
 746  741                                  case SO_SND_BUFINFO:
 747  742                                          if (*optlenp < (t_uscalar_t)
 748  743                                              sizeof (struct so_snd_bufinfo)) {
 749  744                                                  error = EINVAL;
 750  745                                                  break;
 751  746                                          }
 752  747                                          error = 0;
 753  748                                          bzero(optval,
 754  749                                              sizeof (struct so_snd_bufinfo));
 755  750                                          *optlenp =
 756  751                                              sizeof (struct so_snd_bufinfo);
 757  752                                          break;
 758  753                                  case SO_DEBUG:
 759  754                                  case SO_REUSEADDR:
 760  755                                  case SO_KEEPALIVE:
 761  756                                  case SO_DONTROUTE:
 762  757                                  case SO_BROADCAST:
 763  758                                  case SO_USELOOPBACK:
 764  759                                  case SO_OOBINLINE:
 765  760                                  case SO_DGRAM_ERRIND:
 766  761                                  case SO_SNDBUF:
 767  762                                  case SO_RCVBUF:
 768  763                                          error = 0;
 769  764                                          *((int32_t *)optval) = 0;
 770  765                                          *optlenp = sizeof (int32_t);
 771  766                                          break;
 772  767                                  default:
 773  768                                          break;
 774  769                                  }
 775  770                          }
 776  771                  }
 777  772          }
 778  773  
 779  774          SO_UNBLOCK_FALLBACK(so);
 780  775          return (error);
 781  776  }
 782  777  
 783  778  int
  
    | 
      ↓ open down ↓ | 
    68 lines elided | 
    
      ↑ open up ↑ | 
  
 784  779  so_setsockopt(struct sonode *so, int level, int option_name,
 785  780      const void *optval, socklen_t optlen, struct cred *cr)
 786  781  {
 787  782          int error = 0;
 788  783          struct timeval tl;
 789  784          const void *opt = optval;
 790  785  
 791  786          if (level == SOL_FILTER)
 792  787                  return (sof_setsockopt(so, option_name, optval, optlen, cr));
 793  788  
 794      -        SO_BLOCK_FALLBACK(so,
      789 +        SO_BLOCK_FALLBACK_SAFE(so,
 795  790              SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
 796  791  
 797  792          /* X/Open requires this check */
 798  793          if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
 799  794                  SO_UNBLOCK_FALLBACK(so);
 800  795                  if (xnet_check_print)
 801  796                          printf("sockfs: X/Open setsockopt check => EINVAL\n");
 802  797                  return (EINVAL);
 803  798          }
 804  799  
 805  800          if (so->so_filter_active > 0 &&
 806  801              (error = sof_filter_setsockopt(so, level, option_name,
 807  802              (void *)optval, &optlen, cr)) >= 0)
 808  803                  goto done;
 809  804  
 810  805          if (level == SOL_SOCKET) {
 811  806                  switch (option_name) {
 812  807                  case SO_RCVTIMEO:
 813  808                  case SO_SNDTIMEO: {
 814  809                          /*
 815  810                           * We pass down these two options to protocol in order
 816  811                           * to support some third part protocols which need to
 817  812                           * know them. For those protocols which don't care
 818  813                           * these two options, simply return 0.
 819  814                           */
 820  815                          clock_t t_usec;
 821  816  
 822  817                          if (get_udatamodel() == DATAMODEL_NONE ||
 823  818                              get_udatamodel() == DATAMODEL_NATIVE) {
 824  819                                  if (optlen != sizeof (struct timeval)) {
 825  820                                          error = EINVAL;
 826  821                                          goto done;
 827  822                                  }
 828  823                                  bcopy((struct timeval *)optval, &tl,
 829  824                                      sizeof (struct timeval));
 830  825                          } else {
 831  826                                  if (optlen != sizeof (struct timeval32)) {
 832  827                                          error = EINVAL;
 833  828                                          goto done;
 834  829                                  }
 835  830                                  TIMEVAL32_TO_TIMEVAL(&tl,
 836  831                                      (struct timeval32 *)optval);
 837  832                          }
 838  833                          opt = &tl;
 839  834                          optlen = sizeof (tl);
 840  835                          t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
 841  836                          mutex_enter(&so->so_lock);
 842  837                          if (option_name == SO_RCVTIMEO)
 843  838                                  so->so_rcvtimeo = drv_usectohz(t_usec);
 844  839                          else
 845  840                                  so->so_sndtimeo = drv_usectohz(t_usec);
 846  841                          mutex_exit(&so->so_lock);
 847  842                          break;
 848  843                  }
 849  844                  case SO_RCVBUF:
 850  845                          /*
 851  846                           * XXX XPG 4.2 applications retrieve SO_RCVBUF from
 852  847                           * sockfs since the transport might adjust the value
 853  848                           * and not return exactly what was set by the
 854  849                           * application.
 855  850                           */
 856  851                          so->so_xpg_rcvbuf = *(int32_t *)optval;
 857  852                          break;
 858  853                  }
 859  854          }
 860  855          error = (*so->so_downcalls->sd_setsockopt)
 861  856              (so->so_proto_handle, level, option_name, opt, optlen, cr);
 862  857  done:
 863  858          SO_UNBLOCK_FALLBACK(so);
 864  859          return (error);
 865  860  }
 866  861  
 867  862  int
 868  863  so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
  
    | 
      ↓ open down ↓ | 
    64 lines elided | 
    
      ↑ open up ↑ | 
  
 869  864      struct cred *cr, int32_t *rvalp)
 870  865  {
 871  866          int error = 0;
 872  867  
 873  868          SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
 874  869  
 875  870          /*
 876  871           * If there is a pending error, return error
 877  872           * This can happen if a non blocking operation caused an error.
 878  873           */
 879      -        if (so->so_error != 0) {
      874 +        if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 880  875                  mutex_enter(&so->so_lock);
 881  876                  error = sogeterr(so, B_TRUE);
 882  877                  mutex_exit(&so->so_lock);
 883  878                  if (error != 0)
 884  879                          goto done;
 885  880          }
 886  881  
 887  882          /*
 888  883           * calling strioc can result in the socket falling back to TPI,
 889  884           * if that is supported.
 890  885           */
 891  886          if ((so->so_filter_active == 0 ||
 892  887              (error = sof_filter_ioctl(so, cmd, arg, mode,
 893  888              rvalp, cr)) < 0) &&
 894  889              (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
 895  890              (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
 896  891                  error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
 897  892                      cmd, arg, mode, rvalp, cr);
 898  893          }
 899  894  
 900  895  done:
 901  896          SO_UNBLOCK_FALLBACK(so);
 902  897  
 903  898          return (error);
 904  899  }
 905  900  
 906  901  int
 907  902  so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
 908  903      struct pollhead **phpp)
 909  904  {
 910  905          int state = so->so_state, mask;
 911  906          *reventsp = 0;
 912  907  
 913  908          /*
 914  909           * In sockets the errors are represented as input/output events
 915  910           */
 916  911          if (so->so_error != 0 &&
 917  912              ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
 918  913                  *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
 919  914                  return (0);
 920  915          }
 921  916  
 922  917          /*
 923  918           * If the socket is in a state where it can send data
 924  919           * turn on POLLWRBAND and POLLOUT events.
 925  920           */
 926  921          if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
 927  922                  /*
 928  923                   * out of band data is allowed even if the connection
 929  924                   * is flow controlled
 930  925                   */
 931  926                  *reventsp |= POLLWRBAND & events;
 932  927                  if (!SO_SND_FLOWCTRLD(so)) {
 933  928                          /*
 934  929                           * As long as there is buffer to send data
 935  930                           * turn on POLLOUT events
 936  931                           */
 937  932                          *reventsp |= POLLOUT & events;
 938  933                  }
 939  934          }
 940  935  
 941  936          /*
 942  937           * Turn on POLLIN whenever there is data on the receive queue,
 943  938           * or the socket is in a state where no more data will be received.
 944  939           * Also, if the socket is accepting connections, flip the bit if
 945  940           * there is something on the queue.
 946  941           *
 947  942           * We do an initial check for events without holding locks. However,
 948  943           * if there are no event available, then we redo the check for POLLIN
 949  944           * events under the lock.
 950  945           */
 951  946  
 952  947          /* Pending connections */
 953  948          if (!list_is_empty(&so->so_acceptq_list))
 954  949                  *reventsp |= (POLLIN|POLLRDNORM) & events;
 955  950  
 956  951          /*
 957  952           * If we're looking for POLLRDHUP, indicate it if we have sent the
 958  953           * last rx signal for the socket.
 959  954           */
 960  955          if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
 961  956                  *reventsp |= POLLRDHUP;
 962  957  
 963  958          /* Data */
 964  959          /* so_downcalls is null for sctp */
 965  960          if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
 966  961                  *reventsp |= (*so->so_downcalls->sd_poll)
 967  962                      (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
 968  963                      CRED()) & events;
 969  964                  ASSERT((*reventsp & ~events) == 0);
 970  965                  /* do not recheck events */
 971  966                  events &= ~SO_PROTO_POLLEV;
 972  967          } else {
 973  968                  if (SO_HAVE_DATA(so))
 974  969                          *reventsp |= (POLLIN|POLLRDNORM) & events;
 975  970  
 976  971                  /* Urgent data */
 977  972                  if ((state & SS_OOBPEND) != 0) {
 978  973                          *reventsp |= (POLLRDBAND | POLLPRI) & events;
 979  974                  }
 980  975  
 981  976                  /*
 982  977                   * If the socket has become disconnected, we set POLLHUP.
 983  978                   * Note that if we are in this state, we will have set POLLIN
 984  979                   * (SO_HAVE_DATA() is true on a disconnected socket), but not
 985  980                   * POLLOUT (SS_ISCONNECTED is false).  This is in keeping with
 986  981                   * the semantics of POLLHUP, which is defined to be mutually
 987  982                   * exclusive with respect to POLLOUT but not POLLIN.  We are
 988  983                   * therefore setting POLLHUP primarily for the benefit of
 989  984                   * those not polling on POLLIN, as they have no other way of
 990  985                   * knowing that the socket has been disconnected.
 991  986                   */
 992  987                  mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
 993  988  
 994  989                  if ((state & (mask | SS_ISCONNECTED)) == mask)
 995  990                          *reventsp |= POLLHUP;
 996  991          }
 997  992  
 998  993          if ((!*reventsp && !anyyet) || (events & POLLET)) {
 999  994                  /* Check for read events again, but this time under lock */
1000  995                  if (events & (POLLIN|POLLRDNORM)) {
1001  996                          mutex_enter(&so->so_lock);
1002  997                          if (SO_HAVE_DATA(so) ||
1003  998                              !list_is_empty(&so->so_acceptq_list)) {
1004  999                                  if (events & POLLET) {
1005 1000                                          so->so_pollev |= SO_POLLEV_IN;
1006 1001                                          *phpp = &so->so_poll_list;
1007 1002                                  }
1008 1003  
1009 1004                                  mutex_exit(&so->so_lock);
1010 1005                                  *reventsp |= (POLLIN|POLLRDNORM) & events;
1011 1006  
1012 1007                                  return (0);
1013 1008                          } else {
1014 1009                                  so->so_pollev |= SO_POLLEV_IN;
1015 1010                                  mutex_exit(&so->so_lock);
1016 1011                          }
1017 1012                  }
1018 1013                  *phpp = &so->so_poll_list;
1019 1014          }
1020 1015          return (0);
1021 1016  }
1022 1017  
1023 1018  /*
1024 1019   * Generic Upcalls
1025 1020   */
1026 1021  void
1027 1022  so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1028 1023      cred_t *peer_cred, pid_t peer_cpid)
1029 1024  {
1030 1025          struct sonode *so = (struct sonode *)sock_handle;
1031 1026  
1032 1027          mutex_enter(&so->so_lock);
1033 1028          ASSERT(so->so_proto_handle != NULL);
1034 1029  
1035 1030          if (peer_cred != NULL) {
1036 1031                  if (so->so_peercred != NULL)
1037 1032                          crfree(so->so_peercred);
1038 1033                  crhold(peer_cred);
1039 1034                  so->so_peercred = peer_cred;
1040 1035                  so->so_cpid = peer_cpid;
1041 1036          }
1042 1037  
1043 1038          so->so_proto_connid = id;
1044 1039          soisconnected(so);
1045 1040          /*
1046 1041           * Wake ones who're waiting for conn to become established.
1047 1042           */
1048 1043          so_notify_connected(so);
1049 1044  }
1050 1045  
1051 1046  int
1052 1047  so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1053 1048  {
1054 1049          struct sonode *so = (struct sonode *)sock_handle;
1055 1050          boolean_t connect_failed;
1056 1051  
1057 1052          mutex_enter(&so->so_lock);
1058 1053  
1059 1054          /*
1060 1055           * If we aren't currently connected, then this isn't a disconnect but
1061 1056           * rather a failure to connect.
1062 1057           */
1063 1058          connect_failed = !(so->so_state & SS_ISCONNECTED);
1064 1059  
1065 1060          so->so_proto_connid = id;
1066 1061          soisdisconnected(so, error);
1067 1062          so_notify_disconnected(so, connect_failed, error);
1068 1063  
1069 1064          return (0);
1070 1065  }
1071 1066  
1072 1067  void
1073 1068  so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1074 1069      uintptr_t arg)
1075 1070  {
1076 1071          struct sonode *so = (struct sonode *)sock_handle;
1077 1072  
1078 1073          switch (action) {
1079 1074          case SOCK_OPCTL_SHUT_SEND:
1080 1075                  mutex_enter(&so->so_lock);
1081 1076                  socantsendmore(so);
1082 1077                  so_notify_disconnecting(so);
1083 1078                  break;
1084 1079          case SOCK_OPCTL_SHUT_RECV: {
1085 1080                  mutex_enter(&so->so_lock);
1086 1081                  socantrcvmore(so);
1087 1082                  so_notify_eof(so);
1088 1083                  break;
1089 1084          }
1090 1085          case SOCK_OPCTL_ENAB_ACCEPT:
1091 1086                  mutex_enter(&so->so_lock);
1092 1087                  so->so_state |= SS_ACCEPTCONN;
1093 1088                  so->so_backlog = (unsigned int)arg;
1094 1089                  /*
1095 1090                   * The protocol can stop generating newconn upcalls when
1096 1091                   * the backlog is full, so to make sure the listener does
1097 1092                   * not end up with a queue full of deferred connections
1098 1093                   * we reduce the backlog by one. Thus the listener will
1099 1094                   * start closing deferred connections before the backlog
1100 1095                   * is full.
1101 1096                   */
1102 1097                  if (so->so_filter_active > 0)
1103 1098                          so->so_backlog = MAX(1, so->so_backlog - 1);
1104 1099                  mutex_exit(&so->so_lock);
1105 1100                  break;
1106 1101          default:
1107 1102                  ASSERT(0);
1108 1103                  break;
1109 1104          }
1110 1105  }
1111 1106  
1112 1107  void
1113 1108  so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1114 1109  {
1115 1110          struct sonode *so = (struct sonode *)sock_handle;
1116 1111  
1117 1112          if (qfull) {
1118 1113                  so_snd_qfull(so);
1119 1114          } else {
1120 1115                  so_snd_qnotfull(so);
1121 1116                  mutex_enter(&so->so_lock);
1122 1117                  /* so_notify_writable drops so_lock */
1123 1118                  so_notify_writable(so);
1124 1119          }
1125 1120  }
1126 1121  
1127 1122  sock_upper_handle_t
1128 1123  so_newconn(sock_upper_handle_t parenthandle,
1129 1124      sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1130 1125      struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1131 1126  {
1132 1127          struct sonode   *so = (struct sonode *)parenthandle;
1133 1128          struct sonode   *nso;
1134 1129          int error;
1135 1130  
1136 1131          ASSERT(proto_handle != NULL);
1137 1132  
1138 1133          if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1139 1134              (so->so_acceptq_len >= so->so_backlog &&
1140 1135              (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1141 1136                          return (NULL);
1142 1137          }
1143 1138  
1144 1139          nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1145 1140              &error);
1146 1141          if (nso == NULL)
1147 1142                  return (NULL);
1148 1143  
1149 1144          if (peer_cred != NULL) {
1150 1145                  crhold(peer_cred);
1151 1146                  nso->so_peercred = peer_cred;
1152 1147                  nso->so_cpid = peer_cpid;
1153 1148          }
1154 1149          nso->so_listener = so;
1155 1150  
1156 1151          /*
1157 1152           * The new socket (nso), proto_handle and sock_upcallsp are all
1158 1153           * valid at this point. But as soon as nso is placed in the accept
1159 1154           * queue that can no longer be assumed (since an accept() thread may
1160 1155           * pull it off the queue and close the socket).
1161 1156           */
1162 1157          *sock_upcallsp = &so_upcalls;
1163 1158  
1164 1159          mutex_enter(&so->so_acceptq_lock);
1165 1160          if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1166 1161                  mutex_exit(&so->so_acceptq_lock);
1167 1162                  ASSERT(nso->so_count == 1);
1168 1163                  nso->so_count--;
1169 1164                  nso->so_listener = NULL;
1170 1165                  /* drop proto ref */
1171 1166                  VN_RELE(SOTOV(nso));
1172 1167                  socket_destroy(nso);
1173 1168                  return (NULL);
1174 1169          } else {
1175 1170                  so->so_acceptq_len++;
1176 1171                  if (nso->so_state & SS_FIL_DEFER) {
1177 1172                          list_insert_tail(&so->so_acceptq_defer, nso);
1178 1173                          mutex_exit(&so->so_acceptq_lock);
1179 1174                  } else {
1180 1175                          list_insert_tail(&so->so_acceptq_list, nso);
1181 1176                          cv_signal(&so->so_acceptq_cv);
1182 1177                          mutex_exit(&so->so_acceptq_lock);
1183 1178                          mutex_enter(&so->so_lock);
1184 1179                          so_notify_newconn(so);
1185 1180                  }
1186 1181  
1187 1182                  return ((sock_upper_handle_t)nso);
1188 1183          }
1189 1184  }
1190 1185  
1191 1186  void
1192 1187  so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1193 1188  {
1194 1189          struct sonode *so;
1195 1190  
1196 1191          so = (struct sonode *)sock_handle;
1197 1192  
1198 1193          mutex_enter(&so->so_lock);
1199 1194  
1200 1195          if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1201 1196                  so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1202 1197          if (soppp->sopp_flags & SOCKOPT_WROFF)
1203 1198                  so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1204 1199          if (soppp->sopp_flags & SOCKOPT_TAIL)
1205 1200                  so->so_proto_props.sopp_tail = soppp->sopp_tail;
1206 1201          if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1207 1202                  so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1208 1203          if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1209 1204                  so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1210 1205          if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1211 1206                  so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1212 1207          if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1213 1208                  so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1214 1209          if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1215 1210                  if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1216 1211                          so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1217 1212                          so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1218 1213                  } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1219 1214                          so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1220 1215                          so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1221 1216                  }
1222 1217  
1223 1218                  if (soppp->sopp_zcopyflag & COPYCACHED) {
1224 1219                          so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1225 1220                  }
1226 1221          }
1227 1222          if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1228 1223                  so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1229 1224          if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1230 1225                  so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1231 1226          if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1232 1227                  so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1233 1228          if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1234 1229                  so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1235 1230          if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1236 1231                  so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1237 1232  
1238 1233          mutex_exit(&so->so_lock);
1239 1234  
1240 1235          if (so->so_filter_active > 0) {
1241 1236                  sof_instance_t *inst;
1242 1237                  ssize_t maxblk;
1243 1238                  ushort_t wroff, tail;
1244 1239                  maxblk = so->so_proto_props.sopp_maxblk;
1245 1240                  wroff = so->so_proto_props.sopp_wroff;
1246 1241                  tail = so->so_proto_props.sopp_tail;
1247 1242                  for (inst = so->so_filter_bottom; inst != NULL;
1248 1243                      inst = inst->sofi_prev) {
1249 1244                          if (SOF_INTERESTED(inst, mblk_prop)) {
1250 1245                                  (*inst->sofi_ops->sofop_mblk_prop)(
1251 1246                                      (sof_handle_t)inst, inst->sofi_cookie,
1252 1247                                      &maxblk, &wroff, &tail);
1253 1248                          }
1254 1249                  }
1255 1250                  mutex_enter(&so->so_lock);
1256 1251                  so->so_proto_props.sopp_maxblk = maxblk;
1257 1252                  so->so_proto_props.sopp_wroff = wroff;
1258 1253                  so->so_proto_props.sopp_tail = tail;
1259 1254                  mutex_exit(&so->so_lock);
1260 1255          }
1261 1256  #ifdef DEBUG
1262 1257          soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1263 1258              SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1264 1259              SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1265 1260              SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1266 1261              SOCKOPT_LOOPBACK);
1267 1262          ASSERT(soppp->sopp_flags == 0);
1268 1263  #endif
1269 1264  }
1270 1265  
1271 1266  /* ARGSUSED */
1272 1267  ssize_t
1273 1268  so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1274 1269      size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp,
1275 1270      sof_instance_t *filter)
1276 1271  {
1277 1272          boolean_t force_push = B_TRUE;
1278 1273          int space_left;
1279 1274          sodirect_t *sodp = so->so_direct;
1280 1275  
1281 1276          ASSERT(errorp != NULL);
1282 1277          *errorp = 0;
1283 1278          if (mp == NULL) {
1284 1279                  if (so->so_downcalls->sd_recv_uio != NULL) {
1285 1280                          mutex_enter(&so->so_lock);
1286 1281                          /* the notify functions will drop the lock */
1287 1282                          if (flags & MSG_OOB)
1288 1283                                  so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1289 1284                          else
1290 1285                                  so_notify_data(so, msg_size);
1291 1286                          return (0);
1292 1287                  }
1293 1288                  ASSERT(msg_size == 0);
1294 1289                  mutex_enter(&so->so_lock);
1295 1290                  goto space_check;
1296 1291          }
1297 1292  
1298 1293          ASSERT(mp->b_next == NULL);
1299 1294          ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1300 1295          ASSERT(msg_size == msgdsize(mp));
1301 1296  
1302 1297          if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1303 1298                  /* The read pointer is not aligned correctly for TPI */
1304 1299                  zcmn_err(getzoneid(), CE_WARN,
1305 1300                      "sockfs: Unaligned TPI message received. rptr = %p\n",
1306 1301                      (void *)mp->b_rptr);
1307 1302                  freemsg(mp);
1308 1303                  mutex_enter(&so->so_lock);
1309 1304                  if (sodp != NULL)
1310 1305                          SOD_UIOAFINI(sodp);
1311 1306                  goto space_check;
1312 1307          }
1313 1308  
1314 1309          if (so->so_filter_active > 0) {
1315 1310                  for (; filter != NULL; filter = filter->sofi_prev) {
1316 1311                          if (!SOF_INTERESTED(filter, data_in))
1317 1312                                  continue;
1318 1313                          mp = (*filter->sofi_ops->sofop_data_in)(
1319 1314                              (sof_handle_t)filter, filter->sofi_cookie, mp,
1320 1315                              flags, &msg_size);
1321 1316                          ASSERT(msgdsize(mp) == msg_size);
1322 1317                          DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1323 1318                              (mblk_t *), mp);
1324 1319                          /* Data was consumed/dropped, just do space check */
1325 1320                          if (msg_size == 0) {
1326 1321                                  mutex_enter(&so->so_lock);
1327 1322                                  goto space_check;
1328 1323                          }
1329 1324                  }
1330 1325          }
1331 1326  
1332 1327          if (flags & MSG_OOB) {
1333 1328                  so_queue_oob(so, mp, msg_size);
1334 1329                  mutex_enter(&so->so_lock);
1335 1330                  goto space_check;
1336 1331          }
1337 1332  
1338 1333          if (force_pushp != NULL)
1339 1334                  force_push = *force_pushp;
1340 1335  
1341 1336          mutex_enter(&so->so_lock);
1342 1337          if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1343 1338                  if (sodp != NULL)
1344 1339                          SOD_DISABLE(sodp);
1345 1340                  mutex_exit(&so->so_lock);
1346 1341                  *errorp = EOPNOTSUPP;
1347 1342                  return (-1);
1348 1343          }
1349 1344          if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1350 1345                  freemsg(mp);
1351 1346                  if (sodp != NULL)
1352 1347                          SOD_DISABLE(sodp);
1353 1348                  mutex_exit(&so->so_lock);
1354 1349                  return (0);
1355 1350          }
1356 1351  
1357 1352          /* process the mblk via I/OAT if capable */
1358 1353          if (sodp != NULL && sodp->sod_enabled) {
1359 1354                  if (DB_TYPE(mp) == M_DATA) {
1360 1355                          sod_uioa_mblk_init(sodp, mp, msg_size);
1361 1356                  } else {
1362 1357                          SOD_UIOAFINI(sodp);
1363 1358                  }
1364 1359          }
1365 1360  
1366 1361          if (mp->b_next == NULL) {
1367 1362                  so_enqueue_msg(so, mp, msg_size);
1368 1363          } else {
1369 1364                  do {
1370 1365                          mblk_t *nmp;
1371 1366  
1372 1367                          if ((nmp = mp->b_next) != NULL) {
1373 1368                                  mp->b_next = NULL;
1374 1369                          }
1375 1370                          so_enqueue_msg(so, mp, msgdsize(mp));
1376 1371                          mp = nmp;
1377 1372                  } while (mp != NULL);
1378 1373          }
1379 1374  
1380 1375          space_left = so->so_rcvbuf - so->so_rcv_queued;
1381 1376          if (space_left <= 0) {
1382 1377                  so->so_flowctrld = B_TRUE;
1383 1378                  *errorp = ENOSPC;
1384 1379                  space_left = -1;
1385 1380          }
1386 1381  
1387 1382          if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1388 1383              so->so_rcv_queued >= so->so_rcv_wanted) {
1389 1384                  SOCKET_TIMER_CANCEL(so);
1390 1385                  /*
1391 1386                   * so_notify_data will release the lock
1392 1387                   */
1393 1388                  so_notify_data(so, so->so_rcv_queued);
1394 1389  
1395 1390                  if (force_pushp != NULL)
1396 1391                          *force_pushp = B_TRUE;
1397 1392                  goto done;
1398 1393          } else if (so->so_rcv_timer_tid == 0) {
1399 1394                  /* Make sure the recv push timer is running */
1400 1395                  SOCKET_TIMER_START(so);
1401 1396          }
1402 1397  
1403 1398  done_unlock:
1404 1399          mutex_exit(&so->so_lock);
1405 1400  done:
1406 1401          return (space_left);
1407 1402  
1408 1403  space_check:
1409 1404          space_left = so->so_rcvbuf - so->so_rcv_queued;
1410 1405          if (space_left <= 0) {
1411 1406                  so->so_flowctrld = B_TRUE;
1412 1407                  *errorp = ENOSPC;
1413 1408                  space_left = -1;
1414 1409          }
1415 1410          goto done_unlock;
1416 1411  }
1417 1412  
1418 1413  #pragma inline(so_queue_msg_impl)
1419 1414  
1420 1415  ssize_t
1421 1416  so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1422 1417      size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
1423 1418  {
1424 1419          struct sonode *so = (struct sonode *)sock_handle;
1425 1420  
1426 1421          return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1427 1422              so->so_filter_bottom));
1428 1423  }
1429 1424  
1430 1425  /*
1431 1426   * Set the offset of where the oob data is relative to the bytes in
1432 1427   * queued. Also generate SIGURG
1433 1428   */
1434 1429  void
1435 1430  so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1436 1431  {
1437 1432          struct sonode *so;
1438 1433  
1439 1434          ASSERT(offset >= 0);
1440 1435          so = (struct sonode *)sock_handle;
1441 1436          mutex_enter(&so->so_lock);
1442 1437          if (so->so_direct != NULL)
1443 1438                  SOD_UIOAFINI(so->so_direct);
1444 1439  
1445 1440          /*
1446 1441           * New urgent data on the way so forget about any old
1447 1442           * urgent data.
1448 1443           */
1449 1444          so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1450 1445  
1451 1446          /*
1452 1447           * Record that urgent data is pending.
1453 1448           */
1454 1449          so->so_state |= SS_OOBPEND;
1455 1450  
1456 1451          if (so->so_oobmsg != NULL) {
1457 1452                  dprintso(so, 1, ("sock: discarding old oob\n"));
1458 1453                  freemsg(so->so_oobmsg);
1459 1454                  so->so_oobmsg = NULL;
1460 1455          }
1461 1456  
1462 1457          /*
1463 1458           * set the offset where the urgent byte is
1464 1459           */
1465 1460          so->so_oobmark = so->so_rcv_queued + offset;
1466 1461          if (so->so_oobmark == 0)
1467 1462                  so->so_state |= SS_RCVATMARK;
1468 1463          else
1469 1464                  so->so_state &= ~SS_RCVATMARK;
1470 1465  
1471 1466          so_notify_oobsig(so);
1472 1467  }
1473 1468  
1474 1469  /*
1475 1470   * Queue the OOB byte
1476 1471   */
1477 1472  static void
1478 1473  so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1479 1474  {
1480 1475          mutex_enter(&so->so_lock);
1481 1476          if (so->so_direct != NULL)
1482 1477                  SOD_UIOAFINI(so->so_direct);
1483 1478  
1484 1479          ASSERT(mp != NULL);
1485 1480          if (!IS_SO_OOB_INLINE(so)) {
1486 1481                  so->so_oobmsg = mp;
1487 1482                  so->so_state |= SS_HAVEOOBDATA;
1488 1483          } else {
1489 1484                  so_enqueue_msg(so, mp, len);
1490 1485          }
1491 1486  
1492 1487          so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1493 1488  }
1494 1489  
1495 1490  int
1496 1491  so_close(struct sonode *so, int flag, struct cred *cr)
1497 1492  {
1498 1493          int error;
1499 1494  
1500 1495          /*
1501 1496           * No new data will be enqueued once the CLOSING flag is set.
1502 1497           */
1503 1498          mutex_enter(&so->so_lock);
1504 1499          so->so_state |= SS_CLOSING;
1505 1500          ASSERT(so_verify_oobstate(so));
1506 1501          so_rcv_flush(so);
1507 1502          mutex_exit(&so->so_lock);
1508 1503  
1509 1504          if (so->so_filter_active > 0)
1510 1505                  sof_sonode_closing(so);
1511 1506  
1512 1507          if (so->so_state & SS_ACCEPTCONN) {
1513 1508                  /*
1514 1509                   * We grab and release the accept lock to ensure that any
1515 1510                   * thread about to insert a socket in so_newconn completes
1516 1511                   * before we flush the queue. Any thread calling so_newconn
1517 1512                   * after we drop the lock will observe the SS_CLOSING flag,
1518 1513                   * which will stop it from inserting the socket in the queue.
1519 1514                   */
1520 1515                  mutex_enter(&so->so_acceptq_lock);
1521 1516                  mutex_exit(&so->so_acceptq_lock);
1522 1517  
1523 1518                  so_acceptq_flush(so, B_TRUE);
1524 1519          }
1525 1520  
1526 1521          error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1527 1522          switch (error) {
1528 1523          default:
1529 1524                  /* Protocol made a synchronous close; remove proto ref */
1530 1525                  VN_RELE(SOTOV(so));
1531 1526                  break;
1532 1527          case EINPROGRESS:
1533 1528                  /*
1534 1529                   * Protocol is in the process of closing, it will make a
1535 1530                   * 'closed' upcall to remove the reference.
1536 1531                   */
1537 1532                  error = 0;
1538 1533                  break;
1539 1534          }
1540 1535  
1541 1536          return (error);
1542 1537  }
1543 1538  
1544 1539  /*
1545 1540   * Upcall made by the protocol when it's doing an asynchronous close. It
1546 1541   * will drop the protocol's reference on the socket.
1547 1542   */
1548 1543  void
1549 1544  so_closed(sock_upper_handle_t sock_handle)
1550 1545  {
1551 1546          struct sonode *so = (struct sonode *)sock_handle;
1552 1547  
1553 1548          VN_RELE(SOTOV(so));
1554 1549  }
1555 1550  
1556 1551  void
1557 1552  so_zcopy_notify(sock_upper_handle_t sock_handle)
1558 1553  {
1559 1554          struct sonode *so = (struct sonode *)sock_handle;
1560 1555  
1561 1556          mutex_enter(&so->so_lock);
1562 1557          so->so_copyflag |= STZCNOTIFY;
1563 1558          cv_broadcast(&so->so_copy_cv);
1564 1559          mutex_exit(&so->so_lock);
1565 1560  }
1566 1561  
1567 1562  void
1568 1563  so_set_error(sock_upper_handle_t sock_handle, int error)
1569 1564  {
1570 1565          struct sonode *so = (struct sonode *)sock_handle;
1571 1566  
1572 1567          mutex_enter(&so->so_lock);
1573 1568  
1574 1569          soseterror(so, error);
1575 1570  
1576 1571          so_notify_error(so);
1577 1572  }
1578 1573  
1579 1574  /*
1580 1575   * so_recvmsg - read data from the socket
1581 1576   *
1582 1577   * There are two ways of obtaining data; either we ask the protocol to
1583 1578   * copy directly into the supplied buffer, or we copy data from the
1584 1579   * sonode's receive queue. The decision which one to use depends on
1585 1580   * whether the protocol has a sd_recv_uio down call.
1586 1581   */
1587 1582  int
1588 1583  so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1589 1584      struct cred *cr)
1590 1585  {
1591 1586          rval_t          rval;
1592 1587          int             flags = 0;
1593 1588          t_uscalar_t     controllen, namelen;
1594 1589          int             error = 0;
1595 1590          int ret;
1596 1591          mblk_t          *mctlp = NULL;
1597 1592          union T_primitives *tpr;
1598 1593          void            *control;
1599 1594          ssize_t         saved_resid;
1600 1595          struct uio      *suiop;
1601 1596  
1602 1597          SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1603 1598  
1604 1599          if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1605 1600              (so->so_mode & SM_CONNREQUIRED)) {
1606 1601                  SO_UNBLOCK_FALLBACK(so);
1607 1602                  return (ENOTCONN);
1608 1603          }
1609 1604  
1610 1605          if (msg->msg_flags & MSG_PEEK)
1611 1606                  msg->msg_flags &= ~MSG_WAITALL;
1612 1607  
1613 1608          if (so->so_mode & SM_ATOMIC)
1614 1609                  msg->msg_flags |= MSG_TRUNC;
1615 1610  
1616 1611          if (msg->msg_flags & MSG_OOB) {
1617 1612                  if ((so->so_mode & SM_EXDATA) == 0) {
1618 1613                          error = EOPNOTSUPP;
1619 1614                  } else if (so->so_downcalls->sd_recv_uio != NULL) {
1620 1615                          error = (*so->so_downcalls->sd_recv_uio)
1621 1616                              (so->so_proto_handle, uiop, msg, cr);
1622 1617                  } else {
1623 1618                          error = sorecvoob(so, msg, uiop, msg->msg_flags,
1624 1619                              IS_SO_OOB_INLINE(so));
1625 1620                  }
1626 1621                  SO_UNBLOCK_FALLBACK(so);
1627 1622                  return (error);
1628 1623          }
1629 1624  
1630 1625          /*
1631 1626           * If the protocol has the recv down call, then pass the request
1632 1627           * down.
1633 1628           */
1634 1629          if (so->so_downcalls->sd_recv_uio != NULL) {
1635 1630                  error = (*so->so_downcalls->sd_recv_uio)
1636 1631                      (so->so_proto_handle, uiop, msg, cr);
1637 1632                  SO_UNBLOCK_FALLBACK(so);
1638 1633                  return (error);
1639 1634          }
1640 1635  
1641 1636          /*
1642 1637           * Reading data from the socket buffer
1643 1638           */
1644 1639          flags = msg->msg_flags;
1645 1640          msg->msg_flags = 0;
1646 1641  
1647 1642          /*
1648 1643           * Set msg_controllen and msg_namelen to zero here to make it
1649 1644           * simpler in the cases that no control or name is returned.
1650 1645           */
1651 1646          controllen = msg->msg_controllen;
1652 1647          namelen = msg->msg_namelen;
1653 1648          msg->msg_controllen = 0;
1654 1649          msg->msg_namelen = 0;
1655 1650  
1656 1651          mutex_enter(&so->so_lock);
1657 1652          /* Set SOREADLOCKED */
1658 1653          error = so_lock_read_intr(so,
1659 1654              uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1660 1655          mutex_exit(&so->so_lock);
1661 1656          if (error) {
1662 1657                  SO_UNBLOCK_FALLBACK(so);
1663 1658                  return (error);
1664 1659          }
1665 1660  
1666 1661          suiop = sod_rcv_init(so, flags, &uiop);
1667 1662  retry:
1668 1663          saved_resid = uiop->uio_resid;
1669 1664          error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1670 1665          if (error != 0) {
1671 1666                  goto out;
1672 1667          }
1673 1668          /*
1674 1669           * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1675 1670           * For non-datagrams MOREDATA is used to set MSG_EOR.
1676 1671           */
1677 1672          ASSERT(!(rval.r_val1 & MORECTL));
1678 1673          if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1679 1674                  msg->msg_flags |= MSG_TRUNC;
1680 1675          if (mctlp == NULL) {
1681 1676                  dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1682 1677  
1683 1678                  mutex_enter(&so->so_lock);
1684 1679                  /* Set MSG_EOR based on MOREDATA */
1685 1680                  if (!(rval.r_val1 & MOREDATA)) {
1686 1681                          if (so->so_state & SS_SAVEDEOR) {
1687 1682                                  msg->msg_flags |= MSG_EOR;
1688 1683                                  so->so_state &= ~SS_SAVEDEOR;
1689 1684                          }
1690 1685                  }
1691 1686                  /*
1692 1687                   * If some data was received (i.e. not EOF) and the
1693 1688                   * read/recv* has not been satisfied wait for some more.
1694 1689                   */
1695 1690                  if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1696 1691                      uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1697 1692                          mutex_exit(&so->so_lock);
1698 1693                          flags |= MSG_NOMARK;
1699 1694                          goto retry;
1700 1695                  }
1701 1696  
1702 1697                  goto out_locked;
1703 1698          }
1704 1699          /* so_queue_msg has already verified length and alignment */
1705 1700          tpr = (union T_primitives *)mctlp->b_rptr;
1706 1701          dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1707 1702          switch (tpr->type) {
1708 1703          case T_DATA_IND: {
1709 1704                  /*
1710 1705                   * Set msg_flags to MSG_EOR based on
1711 1706                   * MORE_flag and MOREDATA.
1712 1707                   */
1713 1708                  mutex_enter(&so->so_lock);
1714 1709                  so->so_state &= ~SS_SAVEDEOR;
1715 1710                  if (!(tpr->data_ind.MORE_flag & 1)) {
1716 1711                          if (!(rval.r_val1 & MOREDATA))
1717 1712                                  msg->msg_flags |= MSG_EOR;
1718 1713                          else
1719 1714                                  so->so_state |= SS_SAVEDEOR;
1720 1715                  }
1721 1716                  freemsg(mctlp);
1722 1717                  /*
1723 1718                   * If some data was received (i.e. not EOF) and the
1724 1719                   * read/recv* has not been satisfied wait for some more.
1725 1720                   */
1726 1721                  if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1727 1722                      uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1728 1723                          mutex_exit(&so->so_lock);
1729 1724                          flags |= MSG_NOMARK;
1730 1725                          goto retry;
1731 1726                  }
1732 1727                  goto out_locked;
1733 1728          }
1734 1729          case T_UNITDATA_IND: {
1735 1730                  void *addr;
1736 1731                  t_uscalar_t addrlen;
1737 1732                  void *abuf;
1738 1733                  t_uscalar_t optlen;
1739 1734                  void *opt;
1740 1735  
1741 1736                  if (namelen != 0) {
1742 1737                          /* Caller wants source address */
1743 1738                          addrlen = tpr->unitdata_ind.SRC_length;
1744 1739                          addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1745 1740                              addrlen, 1);
1746 1741                          if (addr == NULL) {
1747 1742                                  freemsg(mctlp);
1748 1743                                  error = EPROTO;
1749 1744                                  eprintsoline(so, error);
1750 1745                                  goto out;
1751 1746                          }
1752 1747                          ASSERT(so->so_family != AF_UNIX);
1753 1748                  }
1754 1749                  optlen = tpr->unitdata_ind.OPT_length;
1755 1750                  if (optlen != 0) {
1756 1751                          t_uscalar_t ncontrollen;
1757 1752  
1758 1753                          /*
1759 1754                           * Extract any source address option.
1760 1755                           * Determine how large cmsg buffer is needed.
1761 1756                           */
1762 1757                          opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1763 1758                              optlen, __TPI_ALIGN_SIZE);
1764 1759  
1765 1760                          if (opt == NULL) {
1766 1761                                  freemsg(mctlp);
1767 1762                                  error = EPROTO;
1768 1763                                  eprintsoline(so, error);
1769 1764                                  goto out;
1770 1765                          }
1771 1766                          if (so->so_family == AF_UNIX)
1772 1767                                  so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1773 1768                          ncontrollen = so_cmsglen(mctlp, opt, optlen,
1774 1769                              !(flags & MSG_XPG4_2));
1775 1770                          if (controllen != 0)
1776 1771                                  controllen = ncontrollen;
1777 1772                          else if (ncontrollen != 0)
1778 1773                                  msg->msg_flags |= MSG_CTRUNC;
1779 1774                  } else {
1780 1775                          controllen = 0;
1781 1776                  }
1782 1777  
1783 1778                  if (namelen != 0) {
1784 1779                          /*
1785 1780                           * Return address to caller.
1786 1781                           * Caller handles truncation if length
1787 1782                           * exceeds msg_namelen.
1788 1783                           * NOTE: AF_UNIX NUL termination is ensured by
1789 1784                           * the sender's copyin_name().
1790 1785                           */
1791 1786                          abuf = kmem_alloc(addrlen, KM_SLEEP);
1792 1787  
1793 1788                          bcopy(addr, abuf, addrlen);
1794 1789                          msg->msg_name = abuf;
1795 1790                          msg->msg_namelen = addrlen;
1796 1791                  }
1797 1792  
1798 1793                  if (controllen != 0) {
1799 1794                          /*
1800 1795                           * Return control msg to caller.
1801 1796                           * Caller handles truncation if length
1802 1797                           * exceeds msg_controllen.
1803 1798                           */
1804 1799                          control = kmem_zalloc(controllen, KM_SLEEP);
1805 1800  
1806 1801                          error = so_opt2cmsg(mctlp, opt, optlen,
1807 1802                              !(flags & MSG_XPG4_2), control, controllen);
1808 1803                          if (error) {
1809 1804                                  freemsg(mctlp);
1810 1805                                  if (msg->msg_namelen != 0)
1811 1806                                          kmem_free(msg->msg_name,
1812 1807                                              msg->msg_namelen);
1813 1808                                  kmem_free(control, controllen);
1814 1809                                  eprintsoline(so, error);
1815 1810                                  goto out;
1816 1811                          }
1817 1812                          msg->msg_control = control;
1818 1813                          msg->msg_controllen = controllen;
1819 1814                  }
1820 1815  
1821 1816                  freemsg(mctlp);
1822 1817                  goto out;
1823 1818          }
1824 1819          case T_OPTDATA_IND: {
1825 1820                  struct T_optdata_req *tdr;
1826 1821                  void *opt;
1827 1822                  t_uscalar_t optlen;
1828 1823  
1829 1824                  tdr = (struct T_optdata_req *)mctlp->b_rptr;
1830 1825                  optlen = tdr->OPT_length;
1831 1826                  if (optlen != 0) {
1832 1827                          t_uscalar_t ncontrollen;
1833 1828                          /*
1834 1829                           * Determine how large cmsg buffer is needed.
1835 1830                           */
1836 1831                          opt = sogetoff(mctlp,
1837 1832                              tpr->optdata_ind.OPT_offset, optlen,
1838 1833                              __TPI_ALIGN_SIZE);
1839 1834  
1840 1835                          if (opt == NULL) {
1841 1836                                  freemsg(mctlp);
1842 1837                                  error = EPROTO;
1843 1838                                  eprintsoline(so, error);
1844 1839                                  goto out;
1845 1840                          }
1846 1841  
1847 1842                          ncontrollen = so_cmsglen(mctlp, opt, optlen,
1848 1843                              !(flags & MSG_XPG4_2));
1849 1844                          if (controllen != 0)
1850 1845                                  controllen = ncontrollen;
1851 1846                          else if (ncontrollen != 0)
1852 1847                                  msg->msg_flags |= MSG_CTRUNC;
1853 1848                  } else {
1854 1849                          controllen = 0;
1855 1850                  }
1856 1851  
1857 1852                  if (controllen != 0) {
1858 1853                          /*
1859 1854                           * Return control msg to caller.
1860 1855                           * Caller handles truncation if length
1861 1856                           * exceeds msg_controllen.
1862 1857                           */
1863 1858                          control = kmem_zalloc(controllen, KM_SLEEP);
1864 1859  
1865 1860                          error = so_opt2cmsg(mctlp, opt, optlen,
1866 1861                              !(flags & MSG_XPG4_2), control, controllen);
1867 1862                          if (error) {
1868 1863                                  freemsg(mctlp);
1869 1864                                  kmem_free(control, controllen);
1870 1865                                  eprintsoline(so, error);
1871 1866                                  goto out;
1872 1867                          }
1873 1868                          msg->msg_control = control;
1874 1869                          msg->msg_controllen = controllen;
1875 1870                  }
1876 1871  
1877 1872                  /*
1878 1873                   * Set msg_flags to MSG_EOR based on
1879 1874                   * DATA_flag and MOREDATA.
1880 1875                   */
1881 1876                  mutex_enter(&so->so_lock);
1882 1877                  so->so_state &= ~SS_SAVEDEOR;
1883 1878                  if (!(tpr->data_ind.MORE_flag & 1)) {
1884 1879                          if (!(rval.r_val1 & MOREDATA))
1885 1880                                  msg->msg_flags |= MSG_EOR;
1886 1881                          else
1887 1882                                  so->so_state |= SS_SAVEDEOR;
1888 1883                  }
1889 1884                  freemsg(mctlp);
1890 1885                  /*
1891 1886                   * If some data was received (i.e. not EOF) and the
1892 1887                   * read/recv* has not been satisfied wait for some more.
1893 1888                   * Not possible to wait if control info was received.
1894 1889                   */
1895 1890                  if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1896 1891                      controllen == 0 &&
1897 1892                      uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1898 1893                          mutex_exit(&so->so_lock);
1899 1894                          flags |= MSG_NOMARK;
1900 1895                          goto retry;
1901 1896                  }
1902 1897                  goto out_locked;
1903 1898          }
1904 1899          default:
1905 1900                  cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1906 1901                      tpr->type);
1907 1902                  freemsg(mctlp);
1908 1903                  error = EPROTO;
1909 1904                  ASSERT(0);
1910 1905          }
1911 1906  out:
1912 1907          mutex_enter(&so->so_lock);
1913 1908  out_locked:
1914 1909          ret = sod_rcv_done(so, suiop, uiop);
1915 1910          if (ret != 0 && error == 0)
1916 1911                  error = ret;
1917 1912  
1918 1913          so_unlock_read(so);     /* Clear SOREADLOCKED */
1919 1914          mutex_exit(&so->so_lock);
1920 1915  
1921 1916          SO_UNBLOCK_FALLBACK(so);
1922 1917  
1923 1918          return (error);
1924 1919  }
1925 1920  
1926 1921  sonodeops_t so_sonodeops = {
1927 1922          so_init,                /* sop_init     */
1928 1923          so_accept,              /* sop_accept   */
1929 1924          so_bind,                /* sop_bind     */
1930 1925          so_listen,              /* sop_listen   */
1931 1926          so_connect,             /* sop_connect  */
1932 1927          so_recvmsg,             /* sop_recvmsg  */
1933 1928          so_sendmsg,             /* sop_sendmsg  */
1934 1929          so_sendmblk,            /* sop_sendmblk */
1935 1930          so_getpeername,         /* sop_getpeername */
1936 1931          so_getsockname,         /* sop_getsockname */
1937 1932          so_shutdown,            /* sop_shutdown */
1938 1933          so_getsockopt,          /* sop_getsockopt */
1939 1934          so_setsockopt,          /* sop_setsockopt */
1940 1935          so_ioctl,               /* sop_ioctl    */
1941 1936          so_poll,                /* sop_poll     */
1942 1937          so_close,               /* sop_close */
1943 1938  };
1944 1939  
1945 1940  sock_upcalls_t so_upcalls = {
1946 1941          so_newconn,
1947 1942          so_connected,
1948 1943          so_disconnected,
1949 1944          so_opctl,
1950 1945          so_queue_msg,
1951 1946          so_set_prop,
1952 1947          so_txq_full,
1953 1948          so_signal_oob,
1954 1949          so_zcopy_notify,
1955 1950          so_set_error,
1956 1951          so_closed
1957 1952  };
  
    | 
      ↓ open down ↓ | 
    1068 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX