Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
          +++ new/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27      - * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
       27 + * Copyright (c) 2014, Joyent, Inc.  All rights reserved.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/param.h>
  32   32  #include <sys/systm.h>
  33   33  #include <sys/sysmacros.h>
  34   34  #include <sys/debug.h>
  35   35  #include <sys/cmn_err.h>
  36   36  
  37   37  #include <sys/stropts.h>
  38   38  #include <sys/socket.h>
  39   39  #include <sys/socketvar.h>
  40   40  
  41   41  #define _SUN_TPI_VERSION        2
  42   42  #include <sys/tihdr.h>
  43   43  #include <sys/sockio.h>
  44   44  #include <sys/kmem_impl.h>
  45   45  
  46   46  #include <sys/strsubr.h>
  47   47  #include <sys/strsun.h>
  48   48  #include <sys/ddi.h>
  49   49  #include <netinet/in.h>
  50   50  #include <inet/ip.h>
  51   51  
  52   52  #include <fs/sockfs/sockcommon.h>
  53   53  #include <fs/sockfs/sockfilter_impl.h>
  54   54  
  55   55  #include <sys/socket_proto.h>
  56   56  
  57   57  #include <fs/sockfs/socktpi_impl.h>
  58   58  #include <fs/sockfs/sodirect.h>
  59   59  #include <sys/tihdr.h>
  60   60  #include <fs/sockfs/nl7c.h>
  61   61  
  62   62  extern int xnet_skip_checks;
  63   63  extern int xnet_check_print;
  64   64  
  65   65  static void so_queue_oob(struct sonode *, mblk_t *, size_t);
  66   66  
  67   67  
  68   68  /*ARGSUSED*/
  69   69  int
  70   70  so_accept_notsupp(struct sonode *lso, int fflag,
  71   71      struct cred *cr, struct sonode **nsop)
  72   72  {
  73   73          return (EOPNOTSUPP);
  74   74  }
  75   75  
  76   76  /*ARGSUSED*/
  77   77  int
  78   78  so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
  79   79  {
  80   80          return (EOPNOTSUPP);
  81   81  }
  82   82  
  83   83  /*ARGSUSED*/
  84   84  int
  85   85  so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
  86   86      socklen_t *len, struct cred *cr)
  87   87  {
  88   88          return (EOPNOTSUPP);
  89   89  }
  90   90  
  91   91  /*ARGSUSED*/
  92   92  int
  93   93  so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
  94   94      socklen_t *addrlen, boolean_t accept, struct cred *cr)
  95   95  {
  96   96          return (EOPNOTSUPP);
  97   97  }
  98   98  
  99   99  /*ARGSUSED*/
 100  100  int
 101  101  so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
 102  102  {
 103  103          return (EOPNOTSUPP);
 104  104  }
 105  105  
 106  106  /*ARGSUSED*/
 107  107  int
 108  108  so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
 109  109      struct cred *cr, mblk_t **mpp)
 110  110  {
 111  111          return (EOPNOTSUPP);
 112  112  }
 113  113  
 114  114  /*
 115  115   * Generic Socket Ops
 116  116   */
 117  117  
 118  118  /* ARGSUSED */
 119  119  int
 120  120  so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
 121  121  {
 122  122          return (socket_init_common(so, pso, flags, cr));
 123  123  }
 124  124  
 125  125  int
 126  126  so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
 127  127      int flags, struct cred *cr)
 128  128  {
 129  129          int error;
 130  130  
 131  131          SO_BLOCK_FALLBACK_SAFE(so, SOP_BIND(so, name, namelen, flags, cr));
 132  132  
 133  133          ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
 134  134  
 135  135          /* X/Open requires this check */
 136  136          if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 137  137                  if (xnet_check_print) {
 138  138                          printf("sockfs: X/Open bind state check "
 139  139                              "caused EINVAL\n");
 140  140                  }
 141  141                  error = EINVAL;
 142  142                  goto done;
 143  143          }
 144  144  
 145  145          /*
 146  146           * a bind to a NULL address is interpreted as unbind. So just
 147  147           * do the downcall.
 148  148           */
 149  149          if (name == NULL)
 150  150                  goto dobind;
 151  151  
 152  152          switch (so->so_family) {
 153  153          case AF_INET:
 154  154                  if ((size_t)namelen != sizeof (sin_t)) {
 155  155                          error = name->sa_family != so->so_family ?
 156  156                              EAFNOSUPPORT : EINVAL;
 157  157                          eprintsoline(so, error);
 158  158                          goto done;
 159  159                  }
 160  160  
 161  161                  if ((flags & _SOBIND_XPG4_2) &&
 162  162                      (name->sa_family != so->so_family)) {
 163  163                          /*
 164  164                           * This check has to be made for X/Open
 165  165                           * sockets however application failures have
 166  166                           * been observed when it is applied to
 167  167                           * all sockets.
 168  168                           */
 169  169                          error = EAFNOSUPPORT;
 170  170                          eprintsoline(so, error);
 171  171                          goto done;
 172  172                  }
 173  173                  /*
 174  174                   * Force a zero sa_family to match so_family.
 175  175                   *
 176  176                   * Some programs like inetd(1M) don't set the
 177  177                   * family field. Other programs leave
 178  178                   * sin_family set to garbage - SunOS 4.X does
 179  179                   * not check the family field on a bind.
 180  180                   * We use the family field that
 181  181                   * was passed in to the socket() call.
 182  182                   */
 183  183                  name->sa_family = so->so_family;
 184  184                  break;
 185  185  
 186  186          case AF_INET6: {
 187  187  #ifdef DEBUG
 188  188                  sin6_t *sin6 = (sin6_t *)name;
 189  189  #endif
 190  190                  if ((size_t)namelen != sizeof (sin6_t)) {
 191  191                          error = name->sa_family != so->so_family ?
 192  192                              EAFNOSUPPORT : EINVAL;
 193  193                          eprintsoline(so, error);
 194  194                          goto done;
 195  195                  }
 196  196  
 197  197                  if (name->sa_family != so->so_family) {
 198  198                          /*
 199  199                           * With IPv6 we require the family to match
 200  200                           * unlike in IPv4.
 201  201                           */
 202  202                          error = EAFNOSUPPORT;
 203  203                          eprintsoline(so, error);
 204  204                          goto done;
 205  205                  }
 206  206  #ifdef DEBUG
 207  207                  /*
 208  208                   * Verify that apps don't forget to clear
 209  209                   * sin6_scope_id etc
 210  210                   */
 211  211                  if (sin6->sin6_scope_id != 0 &&
 212  212                      !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
 213  213                          zcmn_err(getzoneid(), CE_WARN,
 214  214                              "bind with uninitialized sin6_scope_id "
 215  215                              "(%d) on socket. Pid = %d\n",
 216  216                              (int)sin6->sin6_scope_id,
 217  217                              (int)curproc->p_pid);
 218  218                  }
 219  219                  if (sin6->__sin6_src_id != 0) {
 220  220                          zcmn_err(getzoneid(), CE_WARN,
 221  221                              "bind with uninitialized __sin6_src_id "
 222  222                              "(%d) on socket. Pid = %d\n",
 223  223                              (int)sin6->__sin6_src_id,
 224  224                              (int)curproc->p_pid);
 225  225                  }
 226  226  #endif /* DEBUG */
 227  227  
 228  228                  break;
 229  229          }
 230  230          default:
 231  231                  /* Just pass the request to the protocol */
 232  232                  goto dobind;
 233  233          }
 234  234  
 235  235          /*
 236  236           * First we check if either NCA or KSSL has been enabled for
 237  237           * the requested address, and if so, we fall back to TPI.
 238  238           * If neither of those two services are enabled, then we just
 239  239           * pass the request to the protocol.
 240  240           *
 241  241           * Note that KSSL can only be enabled on a socket if NCA is NOT
 242  242           * enabled for that socket, hence the else-statement below.
 243  243           */
 244  244          if (nl7c_enabled && ((so->so_family == AF_INET ||
 245  245              so->so_family == AF_INET6) &&
 246  246              nl7c_lookup_addr(name, namelen) != NULL)) {
 247  247                  /*
 248  248                   * NL7C is not supported in non-global zones,
 249  249                   * we enforce this restriction here.
 250  250                   */
 251  251                  if (so->so_zoneid == GLOBAL_ZONEID) {
 252  252                          /* NCA should be used, so fall back to TPI */
 253  253                          error = so_tpi_fallback(so, cr);
 254  254                          SO_UNBLOCK_FALLBACK(so);
 255  255                          if (error)
 256  256                                  return (error);
 257  257                          else
 258  258                                  return (SOP_BIND(so, name, namelen, flags, cr));
 259  259                  }
 260  260          }
 261  261  
 262  262  dobind:
 263  263          if (so->so_filter_active == 0 ||
 264  264              (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
 265  265                  error = (*so->so_downcalls->sd_bind)
 266  266                      (so->so_proto_handle, name, namelen, cr);
 267  267          }
 268  268  done:
 269  269          SO_UNBLOCK_FALLBACK(so);
 270  270  
 271  271          return (error);
 272  272  }
 273  273  
 274  274  int
 275  275  so_listen(struct sonode *so, int backlog, struct cred *cr)
 276  276  {
 277  277          int     error = 0;
 278  278  
 279  279          ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 280  280          SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
 281  281  
 282  282          if ((so)->so_filter_active == 0 ||
 283  283              (error = sof_filter_listen(so, &backlog, cr)) < 0)
 284  284                  error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
 285  285                      backlog, cr);
 286  286  
 287  287          SO_UNBLOCK_FALLBACK(so);
 288  288  
 289  289          return (error);
 290  290  }
 291  291  
 292  292  
 293  293  int
 294  294  so_connect(struct sonode *so, struct sockaddr *name,
 295  295      socklen_t namelen, int fflag, int flags, struct cred *cr)
 296  296  {
 297  297          int error = 0;
 298  298          sock_connid_t id;
 299  299  
 300  300          ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 301  301          SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
 302  302  
 303  303          /*
 304  304           * If there is a pending error, return error
 305  305           * This can happen if a non blocking operation caused an error.
 306  306           */
 307  307  
 308  308          if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 309  309                  mutex_enter(&so->so_lock);
 310  310                  error = sogeterr(so, B_TRUE);
 311  311                  mutex_exit(&so->so_lock);
 312  312                  if (error != 0)
 313  313                          goto done;
 314  314          }
 315  315  
 316  316          if (so->so_filter_active == 0 ||
 317  317              (error = sof_filter_connect(so, (struct sockaddr *)name,
 318  318              &namelen, cr)) < 0) {
 319  319                  error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
 320  320                      name, namelen, &id, cr);
 321  321  
 322  322                  if (error == EINPROGRESS)
 323  323                          error = so_wait_connected(so,
 324  324                              fflag & (FNONBLOCK|FNDELAY), id);
 325  325          }
 326  326  done:
 327  327          SO_UNBLOCK_FALLBACK(so);
 328  328          return (error);
 329  329  }
 330  330  
 331  331  /*ARGSUSED*/
 332  332  int
 333  333  so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
 334  334  {
 335  335          int error = 0;
 336  336          struct sonode *nso;
 337  337  
 338  338          *nsop = NULL;
 339  339  
 340  340          SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
 341  341          if ((so->so_state & SS_ACCEPTCONN) == 0) {
 342  342                  SO_UNBLOCK_FALLBACK(so);
 343  343                  return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
 344  344                      EOPNOTSUPP : EINVAL);
 345  345          }
 346  346  
 347  347          if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
 348  348              &nso)) == 0) {
 349  349                  ASSERT(nso != NULL);
 350  350  
 351  351                  /* finish the accept */
 352  352                  if ((so->so_filter_active > 0 &&
 353  353                      (error = sof_filter_accept(nso, cr)) > 0) ||
 354  354                      (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
 355  355                      nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
 356  356                          (void) socket_close(nso, 0, cr);
 357  357                          socket_destroy(nso);
 358  358                  } else {
 359  359                          *nsop = nso;
 360  360                  }
 361  361          }
 362  362  
 363  363          SO_UNBLOCK_FALLBACK(so);
 364  364          return (error);
 365  365  }
 366  366  
 367  367  int
 368  368  so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
 369  369      struct cred *cr)
 370  370  {
 371  371          int error, flags;
 372  372          boolean_t dontblock;
 373  373          ssize_t orig_resid;
 374  374          mblk_t  *mp;
 375  375  
 376  376          SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
 377  377  
 378  378          flags = msg->msg_flags;
 379  379          error = 0;
 380  380          dontblock = (flags & MSG_DONTWAIT) ||
 381  381              (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
 382  382  
 383  383          if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
 384  384                  /*
 385  385                   * Old way of passing fd's is not supported
 386  386                   */
 387  387                  SO_UNBLOCK_FALLBACK(so);
 388  388                  return (EOPNOTSUPP);
 389  389          }
 390  390  
 391  391          if ((so->so_mode & SM_ATOMIC) &&
 392  392              uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
 393  393              so->so_proto_props.sopp_maxpsz != -1) {
 394  394                  SO_UNBLOCK_FALLBACK(so);
 395  395                  return (EMSGSIZE);
 396  396          }
 397  397  
 398  398          /*
 399  399           * For atomic sends we will only do one iteration.
 400  400           */
 401  401          do {
 402  402                  if (so->so_state & SS_CANTSENDMORE) {
 403  403                          error = EPIPE;
 404  404                          break;
 405  405                  }
 406  406  
 407  407                  if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 408  408                          mutex_enter(&so->so_lock);
 409  409                          error = sogeterr(so, B_TRUE);
 410  410                          mutex_exit(&so->so_lock);
 411  411                          if (error != 0)
 412  412                                  break;
 413  413                  }
 414  414  
 415  415                  /*
 416  416                   * Send down OOB messages even if the send path is being
 417  417                   * flow controlled (assuming the protocol supports OOB data).
 418  418                   */
 419  419                  if (flags & MSG_OOB) {
 420  420                          if ((so->so_mode & SM_EXDATA) == 0) {
 421  421                                  error = EOPNOTSUPP;
 422  422                                  break;
 423  423                          }
 424  424                  } else if (SO_SND_FLOWCTRLD(so)) {
 425  425                          /*
 426  426                           * Need to wait until the protocol is ready to receive
 427  427                           * more data for transmission.
 428  428                           */
 429  429                          if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 430  430                                  break;
 431  431                  }
 432  432  
 433  433                  /*
 434  434                   * Time to send data to the protocol. We either copy the
 435  435                   * data into mblks or pass the uio directly to the protocol.
 436  436                   * We decide what to do based on the available down calls.
 437  437                   */
 438  438                  if (so->so_downcalls->sd_send_uio != NULL) {
 439  439                          error = (*so->so_downcalls->sd_send_uio)
 440  440                              (so->so_proto_handle, uiop, msg, cr);
 441  441                          if (error != 0)
 442  442                                  break;
 443  443                  } else {
 444  444                          /* save the resid in case of failure */
 445  445                          orig_resid = uiop->uio_resid;
 446  446  
 447  447                          if ((mp = socopyinuio(uiop,
 448  448                              so->so_proto_props.sopp_maxpsz,
 449  449                              so->so_proto_props.sopp_wroff,
 450  450                              so->so_proto_props.sopp_maxblk,
 451  451                              so->so_proto_props.sopp_tail, &error)) == NULL) {
 452  452                                  break;
 453  453                          }
 454  454                          ASSERT(uiop->uio_resid >= 0);
 455  455  
 456  456                          if (so->so_filter_active > 0 &&
 457  457                              ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
 458  458                              &error)) == NULL)) {
 459  459                                  if (error != 0)
 460  460                                          break;
 461  461                                  continue;
 462  462                          }
 463  463                          error = (*so->so_downcalls->sd_send)
 464  464                              (so->so_proto_handle, mp, msg, cr);
 465  465                          if (error != 0) {
 466  466                                  /*
 467  467                                   * The send failed. We do not have to free the
 468  468                                   * mblks, because that is the protocol's
 469  469                                   * responsibility. However, uio_resid must
 470  470                                   * remain accurate, so adjust that here.
 471  471                                   */
 472  472                                  uiop->uio_resid = orig_resid;
 473  473                                          break;
 474  474                          }
 475  475                  }
 476  476          } while (uiop->uio_resid > 0);
 477  477  
 478  478          SO_UNBLOCK_FALLBACK(so);
 479  479  
 480  480          return (error);
 481  481  }
 482  482  
 483  483  int
 484  484  so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
 485  485      struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
 486  486      boolean_t fil_inject)
 487  487  {
 488  488          int error;
 489  489          boolean_t dontblock;
 490  490          size_t size;
 491  491          mblk_t *mp = *mpp;
 492  492  
 493  493          if (so->so_downcalls->sd_send == NULL)
 494  494                  return (EOPNOTSUPP);
 495  495  
 496  496          error = 0;
 497  497          dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
 498  498              (fflag & (FNONBLOCK|FNDELAY));
 499  499          size = msgdsize(mp);
 500  500  
 501  501          if ((so->so_mode & SM_ATOMIC) &&
 502  502              size > so->so_proto_props.sopp_maxpsz &&
 503  503              so->so_proto_props.sopp_maxpsz != -1) {
 504  504                  SO_UNBLOCK_FALLBACK(so);
 505  505                  return (EMSGSIZE);
 506  506          }
 507  507  
 508  508          while (mp != NULL) {
 509  509                  mblk_t *nmp, *last_mblk;
 510  510                  size_t mlen;
 511  511  
 512  512                  if (so->so_state & SS_CANTSENDMORE) {
 513  513                          error = EPIPE;
 514  514                          break;
 515  515                  }
 516  516                  if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 517  517                          mutex_enter(&so->so_lock);
 518  518                          error = sogeterr(so, B_TRUE);
 519  519                          mutex_exit(&so->so_lock);
 520  520                          if (error != 0)
 521  521                                  break;
 522  522                  }
 523  523                  /* Socket filters are not flow controlled */
 524  524                  if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
 525  525                          /*
 526  526                           * Need to wait until the protocol is ready to receive
 527  527                           * more data for transmission.
 528  528                           */
 529  529                          if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 530  530                                  break;
 531  531                  }
 532  532  
 533  533                  /*
 534  534                   * We only allow so_maxpsz of data to be sent down to
 535  535                   * the protocol at time.
 536  536                   */
 537  537                  mlen = MBLKL(mp);
 538  538                  nmp = mp->b_cont;
 539  539                  last_mblk = mp;
 540  540                  while (nmp != NULL) {
 541  541                          mlen += MBLKL(nmp);
 542  542                          if (mlen > so->so_proto_props.sopp_maxpsz) {
 543  543                                  last_mblk->b_cont = NULL;
 544  544                                  break;
 545  545                          }
 546  546                          last_mblk = nmp;
 547  547                          nmp = nmp->b_cont;
 548  548                  }
 549  549  
 550  550                  if (so->so_filter_active > 0 &&
 551  551                      (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
 552  552                      cr, &error)) == NULL) {
 553  553                          *mpp = mp = nmp;
 554  554                          if (error != 0)
 555  555                                  break;
 556  556                          continue;
 557  557                  }
 558  558                  error = (*so->so_downcalls->sd_send)
 559  559                      (so->so_proto_handle, mp, msg, cr);
 560  560                  if (error != 0) {
 561  561                          /*
 562  562                           * The send failed. The protocol will free the mblks
 563  563                           * that were sent down. Let the caller deal with the
 564  564                           * rest.
 565  565                           */
 566  566                          *mpp = nmp;
 567  567                          break;
 568  568                  }
 569  569  
 570  570                  *mpp = mp = nmp;
 571  571          }
 572  572          /* Let the filter know whether the protocol is flow controlled */
 573  573          if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
 574  574                  error = ENOSPC;
 575  575  
 576  576          return (error);
 577  577  }
 578  578  
 579  579  #pragma inline(so_sendmblk_impl)
 580  580  
 581  581  int
 582  582  so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
 583  583      struct cred *cr, mblk_t **mpp)
 584  584  {
 585  585          int error;
 586  586  
 587  587          SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
 588  588  
 589  589          error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
 590  590              B_FALSE);
 591  591  
 592  592          SO_UNBLOCK_FALLBACK(so);
 593  593  
 594  594          return (error);
 595  595  }
 596  596  
 597  597  int
 598  598  so_shutdown(struct sonode *so, int how, struct cred *cr)
 599  599  {
 600  600          int error;
 601  601  
 602  602          SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
 603  603  
 604  604          /*
 605  605           * SunOS 4.X has no check for datagram sockets.
 606  606           * 5.X checks that it is connected (ENOTCONN)
 607  607           * X/Open requires that we check the connected state.
 608  608           */
 609  609          if (!(so->so_state & SS_ISCONNECTED)) {
 610  610                  if (!xnet_skip_checks) {
 611  611                          error = ENOTCONN;
 612  612                          if (xnet_check_print) {
 613  613                                  printf("sockfs: X/Open shutdown check "
 614  614                                      "caused ENOTCONN\n");
 615  615                          }
 616  616                  }
 617  617                  goto done;
 618  618          }
 619  619  
 620  620          if (so->so_filter_active == 0 ||
 621  621              (error = sof_filter_shutdown(so, &how, cr)) < 0)
 622  622                  error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
 623  623                      how, cr));
 624  624  
 625  625          /*
 626  626           * Protocol agreed to shutdown. We need to flush the
 627  627           * receive buffer if the receive side is being shutdown.
 628  628           */
 629  629          if (error == 0 && how != SHUT_WR) {
 630  630                  mutex_enter(&so->so_lock);
 631  631                  /* wait for active reader to finish */
 632  632                  (void) so_lock_read(so, 0);
 633  633  
 634  634                  so_rcv_flush(so);
 635  635  
 636  636                  so_unlock_read(so);
 637  637                  mutex_exit(&so->so_lock);
 638  638          }
 639  639  
 640  640  done:
 641  641          SO_UNBLOCK_FALLBACK(so);
 642  642          return (error);
 643  643  }
 644  644  
 645  645  int
 646  646  so_getsockname(struct sonode *so, struct sockaddr *addr,
 647  647      socklen_t *addrlen, struct cred *cr)
 648  648  {
 649  649          int error;
 650  650  
 651  651          SO_BLOCK_FALLBACK_SAFE(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
 652  652  
 653  653          if (so->so_filter_active == 0 ||
 654  654              (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
 655  655                  error = (*so->so_downcalls->sd_getsockname)
 656  656                      (so->so_proto_handle, addr, addrlen, cr);
 657  657  
 658  658          SO_UNBLOCK_FALLBACK(so);
 659  659          return (error);
 660  660  }
 661  661  
 662  662  int
 663  663  so_getpeername(struct sonode *so, struct sockaddr *addr,
 664  664      socklen_t *addrlen, boolean_t accept, struct cred *cr)
 665  665  {
 666  666          int error;
 667  667  
 668  668          SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
 669  669  
 670  670          if (accept) {
 671  671                  error = (*so->so_downcalls->sd_getpeername)
 672  672                      (so->so_proto_handle, addr, addrlen, cr);
 673  673          } else if (!(so->so_state & SS_ISCONNECTED)) {
 674  674                  error = ENOTCONN;
 675  675          } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 676  676                  /* Added this check for X/Open */
 677  677                  error = EINVAL;
 678  678                  if (xnet_check_print) {
 679  679                          printf("sockfs: X/Open getpeername check => EINVAL\n");
 680  680                  }
 681  681          } else if (so->so_filter_active == 0 ||
 682  682              (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
 683  683                  error = (*so->so_downcalls->sd_getpeername)
 684  684                      (so->so_proto_handle, addr, addrlen, cr);
 685  685          }
 686  686  
 687  687          SO_UNBLOCK_FALLBACK(so);
 688  688          return (error);
 689  689  }
 690  690  
 691  691  int
 692  692  so_getsockopt(struct sonode *so, int level, int option_name,
 693  693      void *optval, socklen_t *optlenp, int flags, struct cred *cr)
 694  694  {
 695  695          int error = 0;
 696  696  
 697  697          if (level == SOL_FILTER)
 698  698                  return (sof_getsockopt(so, option_name, optval, optlenp, cr));
 699  699  
 700  700          SO_BLOCK_FALLBACK_SAFE(so,
 701  701              SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
 702  702  
 703  703          if ((so->so_filter_active == 0 ||
 704  704              (error = sof_filter_getsockopt(so, level, option_name, optval,
 705  705              optlenp, cr)) < 0) &&
 706  706              (error = socket_getopt_common(so, level, option_name, optval,
 707  707              optlenp, flags)) < 0) {
 708  708                  error = (*so->so_downcalls->sd_getsockopt)
 709  709                      (so->so_proto_handle, level, option_name, optval, optlenp,
 710  710                      cr);
 711  711                  if (error ==  ENOPROTOOPT) {
 712  712                          if (level == SOL_SOCKET) {
 713  713                                  /*
 714  714                                   * If a protocol does not support a particular
 715  715                                   * socket option, set can fail (not allowed)
 716  716                                   * but get can not fail. This is the previous
 717  717                                   * sockfs bahvior.
 718  718                                   */
 719  719                                  switch (option_name) {
 720  720                                  case SO_LINGER:
 721  721                                          if (*optlenp < (t_uscalar_t)
 722  722                                              sizeof (struct linger)) {
 723  723                                                  error = EINVAL;
 724  724                                                  break;
 725  725                                          }
 726  726                                          error = 0;
 727  727                                          bzero(optval, sizeof (struct linger));
 728  728                                          *optlenp = sizeof (struct linger);
 729  729                                          break;
 730  730                                  case SO_RCVTIMEO:
 731  731                                  case SO_SNDTIMEO:
 732  732                                          if (*optlenp < (t_uscalar_t)
 733  733                                              sizeof (struct timeval)) {
 734  734                                                  error = EINVAL;
 735  735                                                  break;
 736  736                                          }
 737  737                                          error = 0;
 738  738                                          bzero(optval, sizeof (struct timeval));
 739  739                                          *optlenp = sizeof (struct timeval);
 740  740                                          break;
 741  741                                  case SO_SND_BUFINFO:
 742  742                                          if (*optlenp < (t_uscalar_t)
 743  743                                              sizeof (struct so_snd_bufinfo)) {
 744  744                                                  error = EINVAL;
 745  745                                                  break;
 746  746                                          }
 747  747                                          error = 0;
 748  748                                          bzero(optval,
 749  749                                              sizeof (struct so_snd_bufinfo));
 750  750                                          *optlenp =
 751  751                                              sizeof (struct so_snd_bufinfo);
 752  752                                          break;
 753  753                                  case SO_DEBUG:
 754  754                                  case SO_REUSEADDR:
 755  755                                  case SO_KEEPALIVE:
 756  756                                  case SO_DONTROUTE:
 757  757                                  case SO_BROADCAST:
 758  758                                  case SO_USELOOPBACK:
 759  759                                  case SO_OOBINLINE:
 760  760                                  case SO_DGRAM_ERRIND:
 761  761                                  case SO_SNDBUF:
 762  762                                  case SO_RCVBUF:
 763  763                                          error = 0;
 764  764                                          *((int32_t *)optval) = 0;
 765  765                                          *optlenp = sizeof (int32_t);
 766  766                                          break;
 767  767                                  default:
 768  768                                          break;
 769  769                                  }
 770  770                          }
 771  771                  }
 772  772          }
 773  773  
 774  774          SO_UNBLOCK_FALLBACK(so);
 775  775          return (error);
 776  776  }
 777  777  
 778  778  int
 779  779  so_setsockopt(struct sonode *so, int level, int option_name,
 780  780      const void *optval, socklen_t optlen, struct cred *cr)
 781  781  {
 782  782          int error = 0;
 783  783          struct timeval tl;
 784  784          const void *opt = optval;
 785  785  
 786  786          if (level == SOL_FILTER)
 787  787                  return (sof_setsockopt(so, option_name, optval, optlen, cr));
 788  788  
 789  789          SO_BLOCK_FALLBACK_SAFE(so,
 790  790              SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
 791  791  
 792  792          /* X/Open requires this check */
 793  793          if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
 794  794                  SO_UNBLOCK_FALLBACK(so);
 795  795                  if (xnet_check_print)
 796  796                          printf("sockfs: X/Open setsockopt check => EINVAL\n");
 797  797                  return (EINVAL);
 798  798          }
 799  799  
 800  800          if (so->so_filter_active > 0 &&
 801  801              (error = sof_filter_setsockopt(so, level, option_name,
 802  802              (void *)optval, &optlen, cr)) >= 0)
 803  803                  goto done;
 804  804  
 805  805          if (level == SOL_SOCKET) {
 806  806                  switch (option_name) {
 807  807                  case SO_RCVTIMEO:
 808  808                  case SO_SNDTIMEO: {
 809  809                          /*
 810  810                           * We pass down these two options to protocol in order
 811  811                           * to support some third part protocols which need to
 812  812                           * know them. For those protocols which don't care
 813  813                           * these two options, simply return 0.
 814  814                           */
 815  815                          clock_t t_usec;
 816  816  
 817  817                          if (get_udatamodel() == DATAMODEL_NONE ||
 818  818                              get_udatamodel() == DATAMODEL_NATIVE) {
 819  819                                  if (optlen != sizeof (struct timeval)) {
 820  820                                          error = EINVAL;
 821  821                                          goto done;
 822  822                                  }
 823  823                                  bcopy((struct timeval *)optval, &tl,
 824  824                                      sizeof (struct timeval));
 825  825                          } else {
 826  826                                  if (optlen != sizeof (struct timeval32)) {
 827  827                                          error = EINVAL;
 828  828                                          goto done;
 829  829                                  }
 830  830                                  TIMEVAL32_TO_TIMEVAL(&tl,
 831  831                                      (struct timeval32 *)optval);
 832  832                          }
 833  833                          opt = &tl;
 834  834                          optlen = sizeof (tl);
 835  835                          t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
 836  836                          mutex_enter(&so->so_lock);
 837  837                          if (option_name == SO_RCVTIMEO)
 838  838                                  so->so_rcvtimeo = drv_usectohz(t_usec);
 839  839                          else
 840  840                                  so->so_sndtimeo = drv_usectohz(t_usec);
 841  841                          mutex_exit(&so->so_lock);
 842  842                          break;
 843  843                  }
 844  844                  case SO_RCVBUF:
 845  845                          /*
 846  846                           * XXX XPG 4.2 applications retrieve SO_RCVBUF from
 847  847                           * sockfs since the transport might adjust the value
 848  848                           * and not return exactly what was set by the
 849  849                           * application.
 850  850                           */
 851  851                          so->so_xpg_rcvbuf = *(int32_t *)optval;
 852  852                          break;
 853  853                  }
 854  854          }
 855  855          error = (*so->so_downcalls->sd_setsockopt)
 856  856              (so->so_proto_handle, level, option_name, opt, optlen, cr);
 857  857  done:
 858  858          SO_UNBLOCK_FALLBACK(so);
 859  859          return (error);
 860  860  }
 861  861  
 862  862  int
 863  863  so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
 864  864      struct cred *cr, int32_t *rvalp)
 865  865  {
 866  866          int error = 0;
 867  867  
 868  868          SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
 869  869  
 870  870          /*
 871  871           * If there is a pending error, return error
 872  872           * This can happen if a non blocking operation caused an error.
 873  873           */
 874  874          if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
 875  875                  mutex_enter(&so->so_lock);
 876  876                  error = sogeterr(so, B_TRUE);
 877  877                  mutex_exit(&so->so_lock);
 878  878                  if (error != 0)
 879  879                          goto done;
 880  880          }
 881  881  
 882  882          /*
 883  883           * calling strioc can result in the socket falling back to TPI,
 884  884           * if that is supported.
 885  885           */
 886  886          if ((so->so_filter_active == 0 ||
 887  887              (error = sof_filter_ioctl(so, cmd, arg, mode,
 888  888              rvalp, cr)) < 0) &&
 889  889              (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
 890  890              (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
 891  891                  error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
 892  892                      cmd, arg, mode, rvalp, cr);
 893  893          }
 894  894  
 895  895  done:
 896  896          SO_UNBLOCK_FALLBACK(so);
 897  897  
 898  898          return (error);
 899  899  }
 900  900  
 901  901  int
 902  902  so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
 903  903      struct pollhead **phpp)
 904  904  {
 905  905          int state = so->so_state, mask;
 906  906          *reventsp = 0;
 907  907  
 908  908          /*
 909  909           * In sockets the errors are represented as input/output events
 910  910           */
 911  911          if (so->so_error != 0 &&
 912  912              ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
 913  913                  *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
 914  914                  return (0);
 915  915          }
 916  916  
 917  917          /*
 918  918           * If the socket is in a state where it can send data
 919  919           * turn on POLLWRBAND and POLLOUT events.
 920  920           */
 921  921          if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
 922  922                  /*
 923  923                   * out of band data is allowed even if the connection
 924  924                   * is flow controlled
 925  925                   */
 926  926                  *reventsp |= POLLWRBAND & events;
 927  927                  if (!SO_SND_FLOWCTRLD(so)) {
 928  928                          /*
 929  929                           * As long as there is buffer to send data
 930  930                           * turn on POLLOUT events
 931  931                           */
 932  932                          *reventsp |= POLLOUT & events;
 933  933                  }
 934  934          }
 935  935  
 936  936          /*
 937  937           * Turn on POLLIN whenever there is data on the receive queue,
 938  938           * or the socket is in a state where no more data will be received.
 939  939           * Also, if the socket is accepting connections, flip the bit if
 940  940           * there is something on the queue.
 941  941           *
 942  942           * We do an initial check for events without holding locks. However,
 943  943           * if there are no event available, then we redo the check for POLLIN
 944  944           * events under the lock.
 945  945           */
 946  946  
 947  947          /* Pending connections */
 948  948          if (!list_is_empty(&so->so_acceptq_list))
 949  949                  *reventsp |= (POLLIN|POLLRDNORM) & events;
 950  950  
 951  951          /*
 952  952           * If we're looking for POLLRDHUP, indicate it if we have sent the
 953  953           * last rx signal for the socket.
 954  954           */
 955  955          if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
 956  956                  *reventsp |= POLLRDHUP;
 957  957  
 958  958          /* Data */
 959  959          /* so_downcalls is null for sctp */
 960  960          if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
 961  961                  *reventsp |= (*so->so_downcalls->sd_poll)
 962  962                      (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
 963  963                      CRED()) & events;
 964  964                  ASSERT((*reventsp & ~events) == 0);
 965  965                  /* do not recheck events */
 966  966                  events &= ~SO_PROTO_POLLEV;
 967  967          } else {
 968  968                  if (SO_HAVE_DATA(so))
 969  969                          *reventsp |= (POLLIN|POLLRDNORM) & events;
 970  970  
 971  971                  /* Urgent data */
 972  972                  if ((state & SS_OOBPEND) != 0) {
 973  973                          *reventsp |= (POLLRDBAND | POLLPRI) & events;
 974  974                  }
 975  975  
 976  976                  /*
 977  977                   * If the socket has become disconnected, we set POLLHUP.
 978  978                   * Note that if we are in this state, we will have set POLLIN
 979  979                   * (SO_HAVE_DATA() is true on a disconnected socket), but not
 980  980                   * POLLOUT (SS_ISCONNECTED is false).  This is in keeping with
 981  981                   * the semantics of POLLHUP, which is defined to be mutually
 982  982                   * exclusive with respect to POLLOUT but not POLLIN.  We are
 983  983                   * therefore setting POLLHUP primarily for the benefit of
 984  984                   * those not polling on POLLIN, as they have no other way of
 985  985                   * knowing that the socket has been disconnected.
 986  986                   */
 987  987                  mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
 988  988  
 989  989                  if ((state & (mask | SS_ISCONNECTED)) == mask)
 990  990                          *reventsp |= POLLHUP;
 991  991          }
 992  992  
 993  993          if ((!*reventsp && !anyyet) || (events & POLLET)) {
 994  994                  /* Check for read events again, but this time under lock */
 995  995                  if (events & (POLLIN|POLLRDNORM)) {
 996  996                          mutex_enter(&so->so_lock);
 997  997                          if (SO_HAVE_DATA(so) ||
 998  998                              !list_is_empty(&so->so_acceptq_list)) {
 999  999                                  if (events & POLLET) {
1000 1000                                          so->so_pollev |= SO_POLLEV_IN;
1001 1001                                          *phpp = &so->so_poll_list;
1002 1002                                  }
1003 1003  
1004 1004                                  mutex_exit(&so->so_lock);
1005 1005                                  *reventsp |= (POLLIN|POLLRDNORM) & events;
1006 1006  
1007 1007                                  return (0);
1008 1008                          } else {
1009 1009                                  so->so_pollev |= SO_POLLEV_IN;
1010 1010                                  mutex_exit(&so->so_lock);
1011 1011                          }
1012 1012                  }
1013 1013                  *phpp = &so->so_poll_list;
1014 1014          }
1015 1015          return (0);
1016 1016  }
1017 1017  
1018 1018  /*
1019 1019   * Generic Upcalls
1020 1020   */
1021 1021  void
1022 1022  so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1023 1023      cred_t *peer_cred, pid_t peer_cpid)
1024 1024  {
1025 1025          struct sonode *so = (struct sonode *)sock_handle;
1026 1026  
1027 1027          mutex_enter(&so->so_lock);
1028 1028          ASSERT(so->so_proto_handle != NULL);
1029 1029  
1030 1030          if (peer_cred != NULL) {
1031 1031                  if (so->so_peercred != NULL)
1032 1032                          crfree(so->so_peercred);
1033 1033                  crhold(peer_cred);
1034 1034                  so->so_peercred = peer_cred;
1035 1035                  so->so_cpid = peer_cpid;
1036 1036          }
1037 1037  
1038 1038          so->so_proto_connid = id;
1039 1039          soisconnected(so);
1040 1040          /*
1041 1041           * Wake ones who're waiting for conn to become established.
1042 1042           */
1043 1043          so_notify_connected(so);
1044 1044  }
1045 1045  
1046 1046  int
1047 1047  so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1048 1048  {
1049 1049          struct sonode *so = (struct sonode *)sock_handle;
1050 1050          boolean_t connect_failed;
1051 1051  
1052 1052          mutex_enter(&so->so_lock);
1053 1053  
1054 1054          /*
1055 1055           * If we aren't currently connected, then this isn't a disconnect but
1056 1056           * rather a failure to connect.
1057 1057           */
1058 1058          connect_failed = !(so->so_state & SS_ISCONNECTED);
1059 1059  
1060 1060          so->so_proto_connid = id;
1061 1061          soisdisconnected(so, error);
1062 1062          so_notify_disconnected(so, connect_failed, error);
1063 1063  
1064 1064          return (0);
1065 1065  }
1066 1066  
1067 1067  void
1068 1068  so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1069 1069      uintptr_t arg)
1070 1070  {
1071 1071          struct sonode *so = (struct sonode *)sock_handle;
1072 1072  
1073 1073          switch (action) {
1074 1074          case SOCK_OPCTL_SHUT_SEND:
1075 1075                  mutex_enter(&so->so_lock);
1076 1076                  socantsendmore(so);
1077 1077                  so_notify_disconnecting(so);
1078 1078                  break;
1079 1079          case SOCK_OPCTL_SHUT_RECV: {
1080 1080                  mutex_enter(&so->so_lock);
1081 1081                  socantrcvmore(so);
1082 1082                  so_notify_eof(so);
1083 1083                  break;
1084 1084          }
1085 1085          case SOCK_OPCTL_ENAB_ACCEPT:
1086 1086                  mutex_enter(&so->so_lock);
1087 1087                  so->so_state |= SS_ACCEPTCONN;
1088 1088                  so->so_backlog = (unsigned int)arg;
1089 1089                  /*
1090 1090                   * The protocol can stop generating newconn upcalls when
1091 1091                   * the backlog is full, so to make sure the listener does
1092 1092                   * not end up with a queue full of deferred connections
1093 1093                   * we reduce the backlog by one. Thus the listener will
1094 1094                   * start closing deferred connections before the backlog
1095 1095                   * is full.
1096 1096                   */
1097 1097                  if (so->so_filter_active > 0)
1098 1098                          so->so_backlog = MAX(1, so->so_backlog - 1);
1099 1099                  mutex_exit(&so->so_lock);
1100 1100                  break;
1101 1101          default:
1102 1102                  ASSERT(0);
1103 1103                  break;
1104 1104          }
1105 1105  }
1106 1106  
1107 1107  void
1108 1108  so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1109 1109  {
1110 1110          struct sonode *so = (struct sonode *)sock_handle;
1111 1111  
1112 1112          if (qfull) {
1113 1113                  so_snd_qfull(so);
1114 1114          } else {
1115 1115                  so_snd_qnotfull(so);
1116 1116                  mutex_enter(&so->so_lock);
1117 1117                  /* so_notify_writable drops so_lock */
1118 1118                  so_notify_writable(so);
1119 1119          }
1120 1120  }
1121 1121  
1122 1122  sock_upper_handle_t
1123 1123  so_newconn(sock_upper_handle_t parenthandle,
1124 1124      sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1125 1125      struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1126 1126  {
1127 1127          struct sonode   *so = (struct sonode *)parenthandle;
1128 1128          struct sonode   *nso;
1129 1129          int error;
1130 1130  
1131 1131          ASSERT(proto_handle != NULL);
1132 1132  
1133 1133          if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1134 1134              (so->so_acceptq_len >= so->so_backlog &&
1135 1135              (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1136 1136                          return (NULL);
1137 1137          }
1138 1138  
1139 1139          nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1140 1140              &error);
1141 1141          if (nso == NULL)
1142 1142                  return (NULL);
1143 1143  
1144 1144          if (peer_cred != NULL) {
1145 1145                  crhold(peer_cred);
1146 1146                  nso->so_peercred = peer_cred;
1147 1147                  nso->so_cpid = peer_cpid;
1148 1148          }
1149 1149          nso->so_listener = so;
1150 1150  
1151 1151          /*
1152 1152           * The new socket (nso), proto_handle and sock_upcallsp are all
1153 1153           * valid at this point. But as soon as nso is placed in the accept
1154 1154           * queue that can no longer be assumed (since an accept() thread may
1155 1155           * pull it off the queue and close the socket).
1156 1156           */
1157 1157          *sock_upcallsp = &so_upcalls;
1158 1158  
1159 1159          mutex_enter(&so->so_acceptq_lock);
1160 1160          if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1161 1161                  mutex_exit(&so->so_acceptq_lock);
1162 1162                  ASSERT(nso->so_count == 1);
1163 1163                  nso->so_count--;
1164 1164                  nso->so_listener = NULL;
1165 1165                  /* drop proto ref */
1166 1166                  VN_RELE(SOTOV(nso));
1167 1167                  socket_destroy(nso);
1168 1168                  return (NULL);
1169 1169          } else {
1170 1170                  so->so_acceptq_len++;
1171 1171                  if (nso->so_state & SS_FIL_DEFER) {
1172 1172                          list_insert_tail(&so->so_acceptq_defer, nso);
1173 1173                          mutex_exit(&so->so_acceptq_lock);
1174 1174                  } else {
1175 1175                          list_insert_tail(&so->so_acceptq_list, nso);
1176 1176                          cv_signal(&so->so_acceptq_cv);
1177 1177                          mutex_exit(&so->so_acceptq_lock);
1178 1178                          mutex_enter(&so->so_lock);
1179 1179                          so_notify_newconn(so);
1180 1180                  }
1181 1181  
1182 1182                  return ((sock_upper_handle_t)nso);
1183 1183          }
1184 1184  }
1185 1185  
1186 1186  void
1187 1187  so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1188 1188  {
1189 1189          struct sonode *so;
1190 1190  
1191 1191          so = (struct sonode *)sock_handle;
1192 1192  
1193 1193          mutex_enter(&so->so_lock);
1194 1194  
1195 1195          if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1196 1196                  so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1197 1197          if (soppp->sopp_flags & SOCKOPT_WROFF)
1198 1198                  so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1199 1199          if (soppp->sopp_flags & SOCKOPT_TAIL)
1200 1200                  so->so_proto_props.sopp_tail = soppp->sopp_tail;
1201 1201          if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1202 1202                  so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1203 1203          if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1204 1204                  so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1205 1205          if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1206 1206                  so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1207 1207          if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1208 1208                  so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1209 1209          if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1210 1210                  if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1211 1211                          so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1212 1212                          so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1213 1213                  } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1214 1214                          so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1215 1215                          so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1216 1216                  }
1217 1217  
1218 1218                  if (soppp->sopp_zcopyflag & COPYCACHED) {
1219 1219                          so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1220 1220                  }
1221 1221          }
1222 1222          if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1223 1223                  so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1224 1224          if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1225 1225                  so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1226 1226          if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1227 1227                  so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1228 1228          if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1229 1229                  so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1230 1230          if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1231 1231                  so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1232 1232  
1233 1233          mutex_exit(&so->so_lock);
1234 1234  
1235 1235          if (so->so_filter_active > 0) {
1236 1236                  sof_instance_t *inst;
1237 1237                  ssize_t maxblk;
1238 1238                  ushort_t wroff, tail;
1239 1239                  maxblk = so->so_proto_props.sopp_maxblk;
1240 1240                  wroff = so->so_proto_props.sopp_wroff;
1241 1241                  tail = so->so_proto_props.sopp_tail;
1242 1242                  for (inst = so->so_filter_bottom; inst != NULL;
1243 1243                      inst = inst->sofi_prev) {
1244 1244                          if (SOF_INTERESTED(inst, mblk_prop)) {
1245 1245                                  (*inst->sofi_ops->sofop_mblk_prop)(
1246 1246                                      (sof_handle_t)inst, inst->sofi_cookie,
1247 1247                                      &maxblk, &wroff, &tail);
1248 1248                          }
1249 1249                  }
1250 1250                  mutex_enter(&so->so_lock);
1251 1251                  so->so_proto_props.sopp_maxblk = maxblk;
1252 1252                  so->so_proto_props.sopp_wroff = wroff;
1253 1253                  so->so_proto_props.sopp_tail = tail;
1254 1254                  mutex_exit(&so->so_lock);
1255 1255          }
1256 1256  #ifdef DEBUG
1257 1257          soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1258 1258              SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1259 1259              SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1260 1260              SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1261 1261              SOCKOPT_LOOPBACK);
1262 1262          ASSERT(soppp->sopp_flags == 0);
1263 1263  #endif
1264 1264  }
1265 1265  
1266 1266  /* ARGSUSED */
1267 1267  ssize_t
1268 1268  so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1269 1269      size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp,
1270 1270      sof_instance_t *filter)
1271 1271  {
1272 1272          boolean_t force_push = B_TRUE;
1273 1273          int space_left;
1274 1274          sodirect_t *sodp = so->so_direct;
1275 1275  
1276 1276          ASSERT(errorp != NULL);
1277 1277          *errorp = 0;
1278 1278          if (mp == NULL) {
1279 1279                  if (so->so_downcalls->sd_recv_uio != NULL) {
1280 1280                          mutex_enter(&so->so_lock);
1281 1281                          /* the notify functions will drop the lock */
1282 1282                          if (flags & MSG_OOB)
1283 1283                                  so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1284 1284                          else
1285 1285                                  so_notify_data(so, msg_size);
1286 1286                          return (0);
1287 1287                  }
1288 1288                  ASSERT(msg_size == 0);
1289 1289                  mutex_enter(&so->so_lock);
1290 1290                  goto space_check;
1291 1291          }
1292 1292  
1293 1293          ASSERT(mp->b_next == NULL);
1294 1294          ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1295 1295          ASSERT(msg_size == msgdsize(mp));
1296 1296  
1297 1297          if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1298 1298                  /* The read pointer is not aligned correctly for TPI */
1299 1299                  zcmn_err(getzoneid(), CE_WARN,
1300 1300                      "sockfs: Unaligned TPI message received. rptr = %p\n",
1301 1301                      (void *)mp->b_rptr);
1302 1302                  freemsg(mp);
1303 1303                  mutex_enter(&so->so_lock);
1304 1304                  if (sodp != NULL)
1305 1305                          SOD_UIOAFINI(sodp);
1306 1306                  goto space_check;
1307 1307          }
1308 1308  
1309 1309          if (so->so_filter_active > 0) {
1310 1310                  for (; filter != NULL; filter = filter->sofi_prev) {
1311 1311                          if (!SOF_INTERESTED(filter, data_in))
1312 1312                                  continue;
1313 1313                          mp = (*filter->sofi_ops->sofop_data_in)(
1314 1314                              (sof_handle_t)filter, filter->sofi_cookie, mp,
1315 1315                              flags, &msg_size);
1316 1316                          ASSERT(msgdsize(mp) == msg_size);
  
    | 
      ↓ open down ↓ | 
    1279 lines elided | 
    
      ↑ open up ↑ | 
  
1317 1317                          DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1318 1318                              (mblk_t *), mp);
1319 1319                          /* Data was consumed/dropped, just do space check */
1320 1320                          if (msg_size == 0) {
1321 1321                                  mutex_enter(&so->so_lock);
1322 1322                                  goto space_check;
1323 1323                          }
1324 1324                  }
1325 1325          }
1326 1326  
1327      -        mutex_enter(&so->so_lock);
1328      -        if (so->so_krecv_cb != NULL) {
1329      -                boolean_t cont;
1330      -                so_krecv_f func = so->so_krecv_cb;
1331      -                void *arg = so->so_krecv_arg;
1332      -
1333      -                mutex_exit(&so->so_lock);
1334      -                cont = func(so, mp, msg_size, flags & MSG_OOB, arg);
1335      -                mutex_enter(&so->so_lock);
1336      -                if (cont == B_TRUE) {
1337      -                        space_left = so->so_rcvbuf;
1338      -                } else {
1339      -                        so->so_rcv_queued = so->so_rcvlowat;
1340      -                        *errorp = ENOSPC;
1341      -                        space_left = -1;
1342      -                }
1343      -                goto done_unlock;
1344      -        }
1345      -        mutex_exit(&so->so_lock);
1346      -
1347 1327          if (flags & MSG_OOB) {
1348 1328                  so_queue_oob(so, mp, msg_size);
1349 1329                  mutex_enter(&so->so_lock);
1350 1330                  goto space_check;
1351 1331          }
1352 1332  
1353 1333          if (force_pushp != NULL)
1354 1334                  force_push = *force_pushp;
1355 1335  
1356 1336          mutex_enter(&so->so_lock);
1357 1337          if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1358 1338                  if (sodp != NULL)
1359 1339                          SOD_DISABLE(sodp);
1360 1340                  mutex_exit(&so->so_lock);
1361 1341                  *errorp = EOPNOTSUPP;
1362 1342                  return (-1);
1363 1343          }
1364 1344          if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1365 1345                  freemsg(mp);
1366 1346                  if (sodp != NULL)
1367 1347                          SOD_DISABLE(sodp);
1368 1348                  mutex_exit(&so->so_lock);
1369 1349                  return (0);
1370 1350          }
1371 1351  
1372 1352          /* process the mblk via I/OAT if capable */
1373 1353          if (sodp != NULL && sodp->sod_enabled) {
1374 1354                  if (DB_TYPE(mp) == M_DATA) {
1375 1355                          sod_uioa_mblk_init(sodp, mp, msg_size);
1376 1356                  } else {
1377 1357                          SOD_UIOAFINI(sodp);
1378 1358                  }
1379 1359          }
1380 1360  
1381 1361          if (mp->b_next == NULL) {
1382 1362                  so_enqueue_msg(so, mp, msg_size);
1383 1363          } else {
1384 1364                  do {
1385 1365                          mblk_t *nmp;
1386 1366  
1387 1367                          if ((nmp = mp->b_next) != NULL) {
1388 1368                                  mp->b_next = NULL;
1389 1369                          }
1390 1370                          so_enqueue_msg(so, mp, msgdsize(mp));
1391 1371                          mp = nmp;
1392 1372                  } while (mp != NULL);
1393 1373          }
1394 1374  
1395 1375          space_left = so->so_rcvbuf - so->so_rcv_queued;
1396 1376          if (space_left <= 0) {
1397 1377                  so->so_flowctrld = B_TRUE;
1398 1378                  *errorp = ENOSPC;
1399 1379                  space_left = -1;
1400 1380          }
1401 1381  
1402 1382          if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1403 1383              so->so_rcv_queued >= so->so_rcv_wanted) {
1404 1384                  SOCKET_TIMER_CANCEL(so);
1405 1385                  /*
1406 1386                   * so_notify_data will release the lock
1407 1387                   */
1408 1388                  so_notify_data(so, so->so_rcv_queued);
1409 1389  
1410 1390                  if (force_pushp != NULL)
1411 1391                          *force_pushp = B_TRUE;
1412 1392                  goto done;
1413 1393          } else if (so->so_rcv_timer_tid == 0) {
1414 1394                  /* Make sure the recv push timer is running */
1415 1395                  SOCKET_TIMER_START(so);
1416 1396          }
1417 1397  
1418 1398  done_unlock:
1419 1399          mutex_exit(&so->so_lock);
1420 1400  done:
1421 1401          return (space_left);
1422 1402  
1423 1403  space_check:
1424 1404          space_left = so->so_rcvbuf - so->so_rcv_queued;
1425 1405          if (space_left <= 0) {
1426 1406                  so->so_flowctrld = B_TRUE;
1427 1407                  *errorp = ENOSPC;
1428 1408                  space_left = -1;
1429 1409          }
1430 1410          goto done_unlock;
1431 1411  }
1432 1412  
1433 1413  #pragma inline(so_queue_msg_impl)
1434 1414  
1435 1415  ssize_t
1436 1416  so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1437 1417      size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
1438 1418  {
1439 1419          struct sonode *so = (struct sonode *)sock_handle;
1440 1420  
1441 1421          return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1442 1422              so->so_filter_bottom));
1443 1423  }
1444 1424  
1445 1425  /*
1446 1426   * Set the offset of where the oob data is relative to the bytes in
1447 1427   * queued. Also generate SIGURG
1448 1428   */
1449 1429  void
1450 1430  so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1451 1431  {
1452 1432          struct sonode *so;
1453 1433  
1454 1434          ASSERT(offset >= 0);
1455 1435          so = (struct sonode *)sock_handle;
1456 1436          mutex_enter(&so->so_lock);
1457 1437          if (so->so_direct != NULL)
1458 1438                  SOD_UIOAFINI(so->so_direct);
1459 1439  
1460 1440          /*
1461 1441           * New urgent data on the way so forget about any old
1462 1442           * urgent data.
1463 1443           */
1464 1444          so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1465 1445  
1466 1446          /*
1467 1447           * Record that urgent data is pending.
1468 1448           */
1469 1449          so->so_state |= SS_OOBPEND;
1470 1450  
1471 1451          if (so->so_oobmsg != NULL) {
1472 1452                  dprintso(so, 1, ("sock: discarding old oob\n"));
1473 1453                  freemsg(so->so_oobmsg);
1474 1454                  so->so_oobmsg = NULL;
1475 1455          }
1476 1456  
1477 1457          /*
1478 1458           * set the offset where the urgent byte is
1479 1459           */
1480 1460          so->so_oobmark = so->so_rcv_queued + offset;
1481 1461          if (so->so_oobmark == 0)
1482 1462                  so->so_state |= SS_RCVATMARK;
1483 1463          else
1484 1464                  so->so_state &= ~SS_RCVATMARK;
1485 1465  
1486 1466          so_notify_oobsig(so);
1487 1467  }
1488 1468  
1489 1469  /*
1490 1470   * Queue the OOB byte
1491 1471   */
1492 1472  static void
1493 1473  so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1494 1474  {
1495 1475          mutex_enter(&so->so_lock);
1496 1476          if (so->so_direct != NULL)
1497 1477                  SOD_UIOAFINI(so->so_direct);
1498 1478  
1499 1479          ASSERT(mp != NULL);
1500 1480          if (!IS_SO_OOB_INLINE(so)) {
1501 1481                  so->so_oobmsg = mp;
1502 1482                  so->so_state |= SS_HAVEOOBDATA;
1503 1483          } else {
1504 1484                  so_enqueue_msg(so, mp, len);
1505 1485          }
1506 1486  
1507 1487          so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1508 1488  }
1509 1489  
1510 1490  int
1511 1491  so_close(struct sonode *so, int flag, struct cred *cr)
1512 1492  {
1513 1493          int error;
1514 1494  
1515 1495          /*
1516 1496           * No new data will be enqueued once the CLOSING flag is set.
1517 1497           */
1518 1498          mutex_enter(&so->so_lock);
1519 1499          so->so_state |= SS_CLOSING;
1520 1500          ASSERT(so_verify_oobstate(so));
1521 1501          so_rcv_flush(so);
1522 1502          mutex_exit(&so->so_lock);
1523 1503  
1524 1504          if (so->so_filter_active > 0)
1525 1505                  sof_sonode_closing(so);
1526 1506  
1527 1507          if (so->so_state & SS_ACCEPTCONN) {
1528 1508                  /*
1529 1509                   * We grab and release the accept lock to ensure that any
1530 1510                   * thread about to insert a socket in so_newconn completes
1531 1511                   * before we flush the queue. Any thread calling so_newconn
1532 1512                   * after we drop the lock will observe the SS_CLOSING flag,
1533 1513                   * which will stop it from inserting the socket in the queue.
1534 1514                   */
1535 1515                  mutex_enter(&so->so_acceptq_lock);
1536 1516                  mutex_exit(&so->so_acceptq_lock);
1537 1517  
1538 1518                  so_acceptq_flush(so, B_TRUE);
1539 1519          }
1540 1520  
1541 1521          error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1542 1522          switch (error) {
1543 1523          default:
1544 1524                  /* Protocol made a synchronous close; remove proto ref */
1545 1525                  VN_RELE(SOTOV(so));
1546 1526                  break;
1547 1527          case EINPROGRESS:
1548 1528                  /*
1549 1529                   * Protocol is in the process of closing, it will make a
1550 1530                   * 'closed' upcall to remove the reference.
1551 1531                   */
1552 1532                  error = 0;
1553 1533                  break;
1554 1534          }
1555 1535  
1556 1536          return (error);
1557 1537  }
1558 1538  
1559 1539  /*
1560 1540   * Upcall made by the protocol when it's doing an asynchronous close. It
1561 1541   * will drop the protocol's reference on the socket.
1562 1542   */
1563 1543  void
1564 1544  so_closed(sock_upper_handle_t sock_handle)
1565 1545  {
1566 1546          struct sonode *so = (struct sonode *)sock_handle;
1567 1547  
1568 1548          VN_RELE(SOTOV(so));
1569 1549  }
1570 1550  
1571 1551  void
1572 1552  so_zcopy_notify(sock_upper_handle_t sock_handle)
1573 1553  {
1574 1554          struct sonode *so = (struct sonode *)sock_handle;
1575 1555  
1576 1556          mutex_enter(&so->so_lock);
1577 1557          so->so_copyflag |= STZCNOTIFY;
1578 1558          cv_broadcast(&so->so_copy_cv);
1579 1559          mutex_exit(&so->so_lock);
1580 1560  }
1581 1561  
1582 1562  void
1583 1563  so_set_error(sock_upper_handle_t sock_handle, int error)
1584 1564  {
1585 1565          struct sonode *so = (struct sonode *)sock_handle;
1586 1566  
1587 1567          mutex_enter(&so->so_lock);
1588 1568  
1589 1569          soseterror(so, error);
1590 1570  
1591 1571          so_notify_error(so);
1592 1572  }
1593 1573  
1594 1574  /*
1595 1575   * so_recvmsg - read data from the socket
1596 1576   *
1597 1577   * There are two ways of obtaining data; either we ask the protocol to
1598 1578   * copy directly into the supplied buffer, or we copy data from the
1599 1579   * sonode's receive queue. The decision which one to use depends on
1600 1580   * whether the protocol has a sd_recv_uio down call.
1601 1581   */
1602 1582  int
1603 1583  so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1604 1584      struct cred *cr)
1605 1585  {
1606 1586          rval_t          rval;
1607 1587          int             flags = 0;
1608 1588          t_uscalar_t     controllen, namelen;
1609 1589          int             error = 0;
1610 1590          int ret;
1611 1591          mblk_t          *mctlp = NULL;
1612 1592          union T_primitives *tpr;
1613 1593          void            *control;
1614 1594          ssize_t         saved_resid;
  
    | 
      ↓ open down ↓ | 
    258 lines elided | 
    
      ↑ open up ↑ | 
  
1615 1595          struct uio      *suiop;
1616 1596  
1617 1597          SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1618 1598  
1619 1599          if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1620 1600              (so->so_mode & SM_CONNREQUIRED)) {
1621 1601                  SO_UNBLOCK_FALLBACK(so);
1622 1602                  return (ENOTCONN);
1623 1603          }
1624 1604  
1625      -        mutex_enter(&so->so_lock);
1626      -        if (so->so_krecv_cb != NULL) {
1627      -                mutex_exit(&so->so_lock);
1628      -                return (EOPNOTSUPP);
1629      -        }
1630      -        mutex_exit(&so->so_lock);
1631      -
1632 1605          if (msg->msg_flags & MSG_PEEK)
1633 1606                  msg->msg_flags &= ~MSG_WAITALL;
1634 1607  
1635 1608          if (so->so_mode & SM_ATOMIC)
1636 1609                  msg->msg_flags |= MSG_TRUNC;
1637 1610  
1638 1611          if (msg->msg_flags & MSG_OOB) {
1639 1612                  if ((so->so_mode & SM_EXDATA) == 0) {
1640 1613                          error = EOPNOTSUPP;
1641 1614                  } else if (so->so_downcalls->sd_recv_uio != NULL) {
1642 1615                          error = (*so->so_downcalls->sd_recv_uio)
1643 1616                              (so->so_proto_handle, uiop, msg, cr);
1644 1617                  } else {
1645 1618                          error = sorecvoob(so, msg, uiop, msg->msg_flags,
1646 1619                              IS_SO_OOB_INLINE(so));
1647 1620                  }
1648 1621                  SO_UNBLOCK_FALLBACK(so);
1649 1622                  return (error);
1650 1623          }
1651 1624  
1652 1625          /*
1653 1626           * If the protocol has the recv down call, then pass the request
1654 1627           * down.
1655 1628           */
1656 1629          if (so->so_downcalls->sd_recv_uio != NULL) {
1657 1630                  error = (*so->so_downcalls->sd_recv_uio)
1658 1631                      (so->so_proto_handle, uiop, msg, cr);
1659 1632                  SO_UNBLOCK_FALLBACK(so);
1660 1633                  return (error);
1661 1634          }
1662 1635  
1663 1636          /*
1664 1637           * Reading data from the socket buffer
1665 1638           */
1666 1639          flags = msg->msg_flags;
1667 1640          msg->msg_flags = 0;
1668 1641  
1669 1642          /*
1670 1643           * Set msg_controllen and msg_namelen to zero here to make it
1671 1644           * simpler in the cases that no control or name is returned.
1672 1645           */
1673 1646          controllen = msg->msg_controllen;
1674 1647          namelen = msg->msg_namelen;
1675 1648          msg->msg_controllen = 0;
1676 1649          msg->msg_namelen = 0;
1677 1650  
1678 1651          mutex_enter(&so->so_lock);
1679 1652          /* Set SOREADLOCKED */
1680 1653          error = so_lock_read_intr(so,
1681 1654              uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1682 1655          mutex_exit(&so->so_lock);
1683 1656          if (error) {
1684 1657                  SO_UNBLOCK_FALLBACK(so);
1685 1658                  return (error);
1686 1659          }
1687 1660  
1688 1661          suiop = sod_rcv_init(so, flags, &uiop);
1689 1662  retry:
1690 1663          saved_resid = uiop->uio_resid;
1691 1664          error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1692 1665          if (error != 0) {
1693 1666                  goto out;
1694 1667          }
1695 1668          /*
1696 1669           * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1697 1670           * For non-datagrams MOREDATA is used to set MSG_EOR.
1698 1671           */
1699 1672          ASSERT(!(rval.r_val1 & MORECTL));
1700 1673          if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1701 1674                  msg->msg_flags |= MSG_TRUNC;
1702 1675          if (mctlp == NULL) {
1703 1676                  dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1704 1677  
1705 1678                  mutex_enter(&so->so_lock);
1706 1679                  /* Set MSG_EOR based on MOREDATA */
1707 1680                  if (!(rval.r_val1 & MOREDATA)) {
1708 1681                          if (so->so_state & SS_SAVEDEOR) {
1709 1682                                  msg->msg_flags |= MSG_EOR;
1710 1683                                  so->so_state &= ~SS_SAVEDEOR;
1711 1684                          }
1712 1685                  }
1713 1686                  /*
1714 1687                   * If some data was received (i.e. not EOF) and the
1715 1688                   * read/recv* has not been satisfied wait for some more.
1716 1689                   */
1717 1690                  if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1718 1691                      uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1719 1692                          mutex_exit(&so->so_lock);
1720 1693                          flags |= MSG_NOMARK;
1721 1694                          goto retry;
1722 1695                  }
1723 1696  
1724 1697                  goto out_locked;
1725 1698          }
1726 1699          /* so_queue_msg has already verified length and alignment */
1727 1700          tpr = (union T_primitives *)mctlp->b_rptr;
1728 1701          dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1729 1702          switch (tpr->type) {
1730 1703          case T_DATA_IND: {
1731 1704                  /*
1732 1705                   * Set msg_flags to MSG_EOR based on
1733 1706                   * MORE_flag and MOREDATA.
1734 1707                   */
1735 1708                  mutex_enter(&so->so_lock);
1736 1709                  so->so_state &= ~SS_SAVEDEOR;
1737 1710                  if (!(tpr->data_ind.MORE_flag & 1)) {
1738 1711                          if (!(rval.r_val1 & MOREDATA))
1739 1712                                  msg->msg_flags |= MSG_EOR;
1740 1713                          else
1741 1714                                  so->so_state |= SS_SAVEDEOR;
1742 1715                  }
1743 1716                  freemsg(mctlp);
1744 1717                  /*
1745 1718                   * If some data was received (i.e. not EOF) and the
1746 1719                   * read/recv* has not been satisfied wait for some more.
1747 1720                   */
1748 1721                  if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1749 1722                      uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1750 1723                          mutex_exit(&so->so_lock);
1751 1724                          flags |= MSG_NOMARK;
1752 1725                          goto retry;
1753 1726                  }
1754 1727                  goto out_locked;
1755 1728          }
1756 1729          case T_UNITDATA_IND: {
1757 1730                  void *addr;
1758 1731                  t_uscalar_t addrlen;
1759 1732                  void *abuf;
1760 1733                  t_uscalar_t optlen;
1761 1734                  void *opt;
1762 1735  
1763 1736                  if (namelen != 0) {
1764 1737                          /* Caller wants source address */
1765 1738                          addrlen = tpr->unitdata_ind.SRC_length;
1766 1739                          addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1767 1740                              addrlen, 1);
1768 1741                          if (addr == NULL) {
1769 1742                                  freemsg(mctlp);
1770 1743                                  error = EPROTO;
1771 1744                                  eprintsoline(so, error);
1772 1745                                  goto out;
1773 1746                          }
1774 1747                          ASSERT(so->so_family != AF_UNIX);
1775 1748                  }
1776 1749                  optlen = tpr->unitdata_ind.OPT_length;
1777 1750                  if (optlen != 0) {
1778 1751                          t_uscalar_t ncontrollen;
1779 1752  
1780 1753                          /*
1781 1754                           * Extract any source address option.
1782 1755                           * Determine how large cmsg buffer is needed.
1783 1756                           */
1784 1757                          opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1785 1758                              optlen, __TPI_ALIGN_SIZE);
1786 1759  
1787 1760                          if (opt == NULL) {
1788 1761                                  freemsg(mctlp);
1789 1762                                  error = EPROTO;
1790 1763                                  eprintsoline(so, error);
1791 1764                                  goto out;
1792 1765                          }
1793 1766                          if (so->so_family == AF_UNIX)
1794 1767                                  so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1795 1768                          ncontrollen = so_cmsglen(mctlp, opt, optlen,
1796 1769                              !(flags & MSG_XPG4_2));
1797 1770                          if (controllen != 0)
1798 1771                                  controllen = ncontrollen;
1799 1772                          else if (ncontrollen != 0)
1800 1773                                  msg->msg_flags |= MSG_CTRUNC;
1801 1774                  } else {
1802 1775                          controllen = 0;
1803 1776                  }
1804 1777  
1805 1778                  if (namelen != 0) {
1806 1779                          /*
1807 1780                           * Return address to caller.
1808 1781                           * Caller handles truncation if length
1809 1782                           * exceeds msg_namelen.
1810 1783                           * NOTE: AF_UNIX NUL termination is ensured by
1811 1784                           * the sender's copyin_name().
1812 1785                           */
1813 1786                          abuf = kmem_alloc(addrlen, KM_SLEEP);
1814 1787  
1815 1788                          bcopy(addr, abuf, addrlen);
1816 1789                          msg->msg_name = abuf;
1817 1790                          msg->msg_namelen = addrlen;
1818 1791                  }
1819 1792  
1820 1793                  if (controllen != 0) {
1821 1794                          /*
1822 1795                           * Return control msg to caller.
1823 1796                           * Caller handles truncation if length
1824 1797                           * exceeds msg_controllen.
1825 1798                           */
1826 1799                          control = kmem_zalloc(controllen, KM_SLEEP);
1827 1800  
1828 1801                          error = so_opt2cmsg(mctlp, opt, optlen,
1829 1802                              !(flags & MSG_XPG4_2), control, controllen);
1830 1803                          if (error) {
1831 1804                                  freemsg(mctlp);
1832 1805                                  if (msg->msg_namelen != 0)
1833 1806                                          kmem_free(msg->msg_name,
1834 1807                                              msg->msg_namelen);
1835 1808                                  kmem_free(control, controllen);
1836 1809                                  eprintsoline(so, error);
1837 1810                                  goto out;
1838 1811                          }
1839 1812                          msg->msg_control = control;
1840 1813                          msg->msg_controllen = controllen;
1841 1814                  }
1842 1815  
1843 1816                  freemsg(mctlp);
1844 1817                  goto out;
1845 1818          }
1846 1819          case T_OPTDATA_IND: {
1847 1820                  struct T_optdata_req *tdr;
1848 1821                  void *opt;
1849 1822                  t_uscalar_t optlen;
1850 1823  
1851 1824                  tdr = (struct T_optdata_req *)mctlp->b_rptr;
1852 1825                  optlen = tdr->OPT_length;
1853 1826                  if (optlen != 0) {
1854 1827                          t_uscalar_t ncontrollen;
1855 1828                          /*
1856 1829                           * Determine how large cmsg buffer is needed.
1857 1830                           */
1858 1831                          opt = sogetoff(mctlp,
1859 1832                              tpr->optdata_ind.OPT_offset, optlen,
1860 1833                              __TPI_ALIGN_SIZE);
1861 1834  
1862 1835                          if (opt == NULL) {
1863 1836                                  freemsg(mctlp);
1864 1837                                  error = EPROTO;
1865 1838                                  eprintsoline(so, error);
1866 1839                                  goto out;
1867 1840                          }
1868 1841  
1869 1842                          ncontrollen = so_cmsglen(mctlp, opt, optlen,
1870 1843                              !(flags & MSG_XPG4_2));
1871 1844                          if (controllen != 0)
1872 1845                                  controllen = ncontrollen;
1873 1846                          else if (ncontrollen != 0)
1874 1847                                  msg->msg_flags |= MSG_CTRUNC;
1875 1848                  } else {
1876 1849                          controllen = 0;
1877 1850                  }
1878 1851  
1879 1852                  if (controllen != 0) {
1880 1853                          /*
1881 1854                           * Return control msg to caller.
1882 1855                           * Caller handles truncation if length
1883 1856                           * exceeds msg_controllen.
1884 1857                           */
1885 1858                          control = kmem_zalloc(controllen, KM_SLEEP);
1886 1859  
1887 1860                          error = so_opt2cmsg(mctlp, opt, optlen,
1888 1861                              !(flags & MSG_XPG4_2), control, controllen);
1889 1862                          if (error) {
1890 1863                                  freemsg(mctlp);
1891 1864                                  kmem_free(control, controllen);
1892 1865                                  eprintsoline(so, error);
1893 1866                                  goto out;
1894 1867                          }
1895 1868                          msg->msg_control = control;
1896 1869                          msg->msg_controllen = controllen;
1897 1870                  }
1898 1871  
1899 1872                  /*
1900 1873                   * Set msg_flags to MSG_EOR based on
1901 1874                   * DATA_flag and MOREDATA.
1902 1875                   */
1903 1876                  mutex_enter(&so->so_lock);
1904 1877                  so->so_state &= ~SS_SAVEDEOR;
1905 1878                  if (!(tpr->data_ind.MORE_flag & 1)) {
1906 1879                          if (!(rval.r_val1 & MOREDATA))
1907 1880                                  msg->msg_flags |= MSG_EOR;
1908 1881                          else
1909 1882                                  so->so_state |= SS_SAVEDEOR;
1910 1883                  }
1911 1884                  freemsg(mctlp);
1912 1885                  /*
1913 1886                   * If some data was received (i.e. not EOF) and the
1914 1887                   * read/recv* has not been satisfied wait for some more.
1915 1888                   * Not possible to wait if control info was received.
1916 1889                   */
1917 1890                  if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1918 1891                      controllen == 0 &&
1919 1892                      uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1920 1893                          mutex_exit(&so->so_lock);
1921 1894                          flags |= MSG_NOMARK;
1922 1895                          goto retry;
1923 1896                  }
1924 1897                  goto out_locked;
1925 1898          }
1926 1899          default:
1927 1900                  cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1928 1901                      tpr->type);
1929 1902                  freemsg(mctlp);
1930 1903                  error = EPROTO;
1931 1904                  ASSERT(0);
1932 1905          }
1933 1906  out:
1934 1907          mutex_enter(&so->so_lock);
1935 1908  out_locked:
1936 1909          ret = sod_rcv_done(so, suiop, uiop);
1937 1910          if (ret != 0 && error == 0)
1938 1911                  error = ret;
1939 1912  
1940 1913          so_unlock_read(so);     /* Clear SOREADLOCKED */
1941 1914          mutex_exit(&so->so_lock);
1942 1915  
1943 1916          SO_UNBLOCK_FALLBACK(so);
1944 1917  
1945 1918          return (error);
1946 1919  }
1947 1920  
1948 1921  sonodeops_t so_sonodeops = {
1949 1922          so_init,                /* sop_init     */
1950 1923          so_accept,              /* sop_accept   */
1951 1924          so_bind,                /* sop_bind     */
1952 1925          so_listen,              /* sop_listen   */
1953 1926          so_connect,             /* sop_connect  */
1954 1927          so_recvmsg,             /* sop_recvmsg  */
1955 1928          so_sendmsg,             /* sop_sendmsg  */
1956 1929          so_sendmblk,            /* sop_sendmblk */
1957 1930          so_getpeername,         /* sop_getpeername */
1958 1931          so_getsockname,         /* sop_getsockname */
1959 1932          so_shutdown,            /* sop_shutdown */
1960 1933          so_getsockopt,          /* sop_getsockopt */
1961 1934          so_setsockopt,          /* sop_setsockopt */
1962 1935          so_ioctl,               /* sop_ioctl    */
1963 1936          so_poll,                /* sop_poll     */
1964 1937          so_close,               /* sop_close */
1965 1938  };
1966 1939  
1967 1940  sock_upcalls_t so_upcalls = {
1968 1941          so_newconn,
1969 1942          so_connected,
1970 1943          so_disconnected,
1971 1944          so_opctl,
1972 1945          so_queue_msg,
1973 1946          so_set_prop,
1974 1947          so_txq_full,
1975 1948          so_signal_oob,
1976 1949          so_zcopy_notify,
1977 1950          so_set_error,
1978 1951          so_closed
1979 1952  };
  
    | 
      ↓ open down ↓ | 
    338 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX