Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  24   24   * Copyright 2016 Joyent, Inc.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/stream.h>
  29   29  #define _SUN_TPI_VERSION 2
  30   30  #include <sys/tihdr.h>
  31   31  #include <sys/socket.h>
  32   32  #include <sys/xti_xtiopt.h>
  33   33  #include <sys/xti_inet.h>
  34   34  #include <sys/policy.h>
  35   35  
  36   36  #include <inet/common.h>
  37   37  #include <netinet/ip6.h>
  38   38  #include <inet/ip.h>
  39   39  
  40   40  #include <netinet/in.h>
  41   41  #include <netinet/tcp.h>
  42   42  #include <inet/optcom.h>
  43   43  #include <inet/proto_set.h>
  44   44  #include <inet/tcp_impl.h>
  45   45  
  46   46  static int      tcp_opt_default(queue_t *, int, int, uchar_t *);
  47   47  
  48   48  /*
  49   49   * Table of all known options handled on a TCP protocol stack.
  50   50   *
  51   51   * Note: This table contains options processed by both TCP and IP levels
  52   52   *       and is the superset of options that can be performed on a TCP over IP
  53   53   *       stack.
  54   54   */
  55   55  opdes_t tcp_opt_arr[] = {
  56   56  
  57   57  { SO_LINGER,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  58   58          sizeof (struct linger), 0 },
  59   59  
  60   60  { SO_DEBUG,     SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  61   61  { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  62   62  { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  63   63  { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  64   64          },
  65   65  { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  66   66  { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  67   67  { SO_REUSEPORT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  68   68  { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  69   69  { SO_TYPE,      SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  70   70  { SO_SNDBUF,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  71   71  { SO_RCVBUF,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  72   72  { SO_SNDTIMEO,  SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  73   73          sizeof (struct timeval), 0 },
  74   74  { SO_RCVTIMEO,  SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  75   75          sizeof (struct timeval), 0 },
  76   76  { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  77   77          },
  78   78  { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  79   79  { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  80   80          0 },
  81   81  { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  82   82          0 },
  83   83  { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  84   84          0 },
  85   85  { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
  86   86          0 },
  87   87  { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  88   88  
  89   89  { SO_DOMAIN,    SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  90   90  
  91   91  { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  92   92  
  93   93  { TCP_NODELAY,  IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  94   94          },
  95   95  { TCP_MAXSEG,   IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
  96   96          536 },
  97   97  
  98   98  { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
  99   99          OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 100  100  
 101  101  { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 102  102          OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 103  103  
 104  104  { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 105  105          OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 106  106  
 107  107  { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 108  108          OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 109  109  
 110  110  { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
 111  111          0 },
 112  112  
 113  113  { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
 114  114          sizeof (int), 0 },
 115  115  
 116  116  { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
 117  117          },
 118  118  
 119  119  { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
 120  120          sizeof (int), 0 },
 121  121  
 122  122  { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
 123  123          sizeof (int), 0 },
 124  124  
 125  125  { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 126  126  
 127  127  { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 128  128  
 129  129  { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 130  130  
 131  131  { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
 132  132          sizeof (int), 0 },
 133  133  
 134  134  { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 135  135  
 136  136  { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 137  137  
 138  138  { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 139  139  
 140  140  { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 141  141  
 142  142  { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 143  143  
 144  144  { IP_OPTIONS,   IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 145  145          (OP_VARLEN|OP_NODEFAULT),
 146  146          IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 147  147  { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 148  148          (OP_VARLEN|OP_NODEFAULT),
 149  149          IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 150  150  
 151  151  { IP_TOS,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 152  152  { T_IP_TOS,     IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 153  153  { IP_TTL,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
 154  154          sizeof (int), -1 /* not initialized */ },
 155  155  
 156  156  { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
 157  157          sizeof (ipsec_req_t), -1 /* not initialized */ },
 158  158  
 159  159  { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
 160  160          sizeof (int),   0 /* no ifindex */ },
 161  161  
 162  162  { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
 163  163          sizeof (int), 0 },
 164  164  
 165  165  { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
 166  166          sizeof (int), -1 /* not initialized */ },
 167  167  
 168  168  { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 169  169          sizeof (int),   0 /* no ifindex */ },
 170  170  
 171  171  { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 172  172  
 173  173  { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
 174  174          sizeof (in_addr_t),     -1 /* not initialized  */ },
 175  175  
 176  176  { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
 177  177          sizeof (int), 0 },
 178  178  
 179  179  { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 180  180          (OP_NODEFAULT|OP_VARLEN),
 181  181          sizeof (struct in6_pktinfo), -1 /* not initialized */ },
 182  182  { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 183  183          OP_NODEFAULT,
 184  184          sizeof (sin6_t), -1 /* not initialized */ },
 185  185  { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 186  186          (OP_VARLEN|OP_NODEFAULT), 255*8,
 187  187          -1 /* not initialized */ },
 188  188  { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 189  189          (OP_VARLEN|OP_NODEFAULT), 255*8,
 190  190          -1 /* not initialized */ },
 191  191  { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 192  192          (OP_VARLEN|OP_NODEFAULT), 255*8,
 193  193          -1 /* not initialized */ },
 194  194  { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 195  195          (OP_VARLEN|OP_NODEFAULT), 255*8,
 196  196          -1 /* not initialized */ },
 197  197  { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 198  198          OP_NODEFAULT,
 199  199          sizeof (int), -1 /* not initialized */ },
 200  200  { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 201  201          OP_NODEFAULT,
 202  202          sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
 203  203  { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 204  204          sizeof (int), 0 },
 205  205  { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 206  206          sizeof (int), 0 },
 207  207  { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 208  208          sizeof (int), 0 },
 209  209  
 210  210  /* Enable receipt of ancillary data */
 211  211  { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 212  212          sizeof (int), 0 },
 213  213  { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 214  214          sizeof (int), 0 },
 215  215  { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 216  216          sizeof (int), 0 },
 217  217  { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 218  218          sizeof (int), 0 },
 219  219  { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 220  220          sizeof (int), 0 },
 221  221  { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 222  222          sizeof (int), 0 },
 223  223  { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 224  224          sizeof (int), 0 },
 225  225  { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 226  226          sizeof (int), 0 },
 227  227  
 228  228  { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
 229  229          sizeof (ipsec_req_t), -1 /* not initialized */ },
 230  230  { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 231  231          sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
 232  232  };
 233  233  
 234  234  /*
 235  235   * Table of all supported levels
 236  236   * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 237  237   * any supported options so we need this info separately.
 238  238   *
 239  239   * This is needed only for topmost tpi providers and is used only by
 240  240   * XTI interfaces.
 241  241   */
 242  242  optlevel_t      tcp_valid_levels_arr[] = {
 243  243          XTI_GENERIC,
 244  244          SOL_SOCKET,
 245  245          IPPROTO_TCP,
 246  246          IPPROTO_IP,
 247  247          IPPROTO_IPV6
 248  248  };
 249  249  
 250  250  
 251  251  #define TCP_OPT_ARR_CNT         A_CNT(tcp_opt_arr)
 252  252  #define TCP_VALID_LEVELS_CNT    A_CNT(tcp_valid_levels_arr)
 253  253  
 254  254  uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
 255  255  
 256  256  /*
 257  257   * Initialize option database object for TCP
 258  258   *
 259  259   * This object represents database of options to search passed to
 260  260   * {sock,tpi}optcom_req() interface routine to take care of option
 261  261   * management and associated methods.
 262  262   */
 263  263  
 264  264  optdb_obj_t tcp_opt_obj = {
 265  265          tcp_opt_default,        /* TCP default value function pointer */
 266  266          tcp_tpi_opt_get,        /* TCP get function pointer */
 267  267          tcp_tpi_opt_set,        /* TCP set function pointer */
 268  268          TCP_OPT_ARR_CNT,        /* TCP option database count of entries */
 269  269          tcp_opt_arr,            /* TCP option database */
 270  270          TCP_VALID_LEVELS_CNT,   /* TCP valid level count of entries */
 271  271          tcp_valid_levels_arr    /* TCP valid level array */
 272  272  };
 273  273  
 274  274  static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
 275  275  
 276  276  /*
 277  277   * Some TCP options can be "set" by requesting them in the option
 278  278   * buffer. This is needed for XTI feature test though we do not
 279  279   * allow it in general. We interpret that this mechanism is more
 280  280   * applicable to OSI protocols and need not be allowed in general.
 281  281   * This routine filters out options for which it is not allowed (most)
 282  282   * and lets through those (few) for which it is. [ The XTI interface
 283  283   * test suite specifics will imply that any XTI_GENERIC level XTI_* if
 284  284   * ever implemented will have to be allowed here ].
 285  285   */
 286  286  static boolean_t
 287  287  tcp_allow_connopt_set(int level, int name)
 288  288  {
 289  289  
 290  290          switch (level) {
 291  291          case IPPROTO_TCP:
 292  292                  switch (name) {
 293  293                  case TCP_NODELAY:
 294  294                          return (B_TRUE);
 295  295                  default:
 296  296                          return (B_FALSE);
 297  297                  }
 298  298                  /*NOTREACHED*/
 299  299          default:
 300  300                  return (B_FALSE);
 301  301          }
 302  302          /*NOTREACHED*/
 303  303  }
 304  304  
 305  305  /*
 306  306   * This routine gets default values of certain options whose default
 307  307   * values are maintained by protocol specific code
 308  308   */
 309  309  /* ARGSUSED */
 310  310  static int
 311  311  tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
 312  312  {
 313  313          int32_t *i1 = (int32_t *)ptr;
 314  314          tcp_stack_t     *tcps = Q_TO_TCP(q)->tcp_tcps;
 315  315  
 316  316          switch (level) {
 317  317          case IPPROTO_TCP:
 318  318                  switch (name) {
 319  319                  case TCP_NOTIFY_THRESHOLD:
 320  320                          *i1 = tcps->tcps_ip_notify_interval;
 321  321                          break;
 322  322                  case TCP_ABORT_THRESHOLD:
 323  323                          *i1 = tcps->tcps_ip_abort_interval;
 324  324                          break;
 325  325                  case TCP_CONN_NOTIFY_THRESHOLD:
 326  326                          *i1 = tcps->tcps_ip_notify_cinterval;
 327  327                          break;
 328  328                  case TCP_CONN_ABORT_THRESHOLD:
 329  329                          *i1 = tcps->tcps_ip_abort_cinterval;
 330  330                          break;
 331  331                  default:
 332  332                          return (-1);
 333  333                  }
 334  334                  break;
 335  335          case IPPROTO_IP:
 336  336                  switch (name) {
 337  337                  case IP_TTL:
 338  338                          *i1 = tcps->tcps_ipv4_ttl;
 339  339                          break;
 340  340                  default:
 341  341                          return (-1);
 342  342                  }
 343  343                  break;
 344  344          case IPPROTO_IPV6:
 345  345                  switch (name) {
 346  346                  case IPV6_UNICAST_HOPS:
 347  347                          *i1 = tcps->tcps_ipv6_hoplimit;
 348  348                          break;
 349  349                  default:
 350  350                          return (-1);
 351  351                  }
 352  352                  break;
 353  353          default:
 354  354                  return (-1);
 355  355          }
 356  356          return (sizeof (int));
 357  357  }
 358  358  
 359  359  /*
 360  360   * TCP routine to get the values of options.
 361  361   */
 362  362  int
 363  363  tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
 364  364  {
 365  365          int             *i1 = (int *)ptr;
 366  366          tcp_t           *tcp = connp->conn_tcp;
 367  367          conn_opt_arg_t  coas;
 368  368          int             retval;
 369  369  
 370  370          coas.coa_connp = connp;
 371  371          coas.coa_ixa = connp->conn_ixa;
 372  372          coas.coa_ipp = &connp->conn_xmit_ipp;
 373  373          coas.coa_ancillary = B_FALSE;
 374  374          coas.coa_changed = 0;
 375  375  
 376  376          switch (level) {
 377  377          case SOL_SOCKET:
 378  378                  switch (name) {
 379  379                  case SO_SND_COPYAVOID:
 380  380                          *i1 = tcp->tcp_snd_zcopy_on ?
 381  381                              SO_SND_COPYAVOID : 0;
 382  382                          return (sizeof (int));
 383  383                  case SO_ACCEPTCONN:
 384  384                          *i1 = (tcp->tcp_state == TCPS_LISTEN);
 385  385                          return (sizeof (int));
 386  386                  }
 387  387                  break;
 388  388          case IPPROTO_TCP:
 389  389                  switch (name) {
 390  390                  case TCP_NODELAY:
 391  391                          *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
 392  392                          return (sizeof (int));
 393  393                  case TCP_MAXSEG:
 394  394                          *i1 = tcp->tcp_mss;
 395  395                          return (sizeof (int));
 396  396                  case TCP_NOTIFY_THRESHOLD:
 397  397                          *i1 = (int)tcp->tcp_first_timer_threshold;
 398  398                          return (sizeof (int));
 399  399                  case TCP_ABORT_THRESHOLD:
 400  400                          *i1 = tcp->tcp_second_timer_threshold;
 401  401                          return (sizeof (int));
 402  402                  case TCP_CONN_NOTIFY_THRESHOLD:
 403  403                          *i1 = tcp->tcp_first_ctimer_threshold;
 404  404                          return (sizeof (int));
 405  405                  case TCP_CONN_ABORT_THRESHOLD:
 406  406                          *i1 = tcp->tcp_second_ctimer_threshold;
 407  407                          return (sizeof (int));
 408  408                  case TCP_INIT_CWND:
 409  409                          *i1 = tcp->tcp_init_cwnd;
 410  410                          return (sizeof (int));
 411  411                  case TCP_KEEPALIVE_THRESHOLD:
 412  412                          *i1 = tcp->tcp_ka_interval;
 413  413                          return (sizeof (int));
 414  414  
 415  415                  /*
 416  416                   * TCP_KEEPIDLE expects value in seconds, but
 417  417                   * tcp_ka_interval is in milliseconds.
 418  418                   */
 419  419                  case TCP_KEEPIDLE:
 420  420                          *i1 = tcp->tcp_ka_interval / 1000;
 421  421                          return (sizeof (int));
 422  422                  case TCP_KEEPCNT:
 423  423                          *i1 = tcp->tcp_ka_cnt;
 424  424                          return (sizeof (int));
 425  425  
 426  426                  /*
 427  427                   * TCP_KEEPINTVL expects value in seconds, but
 428  428                   * tcp_ka_rinterval is in milliseconds.
 429  429                   */
 430  430                  case TCP_KEEPINTVL:
 431  431                          *i1 = tcp->tcp_ka_rinterval / 1000;
 432  432                          return (sizeof (int));
 433  433                  case TCP_KEEPALIVE_ABORT_THRESHOLD:
 434  434                          *i1 = tcp->tcp_ka_abort_thres;
 435  435                          return (sizeof (int));
 436  436                  case TCP_CORK:
 437  437                          *i1 = tcp->tcp_cork;
 438  438                          return (sizeof (int));
 439  439                  case TCP_RTO_INITIAL:
 440  440                          *i1 = tcp->tcp_rto_initial;
 441  441                          return (sizeof (uint32_t));
 442  442                  case TCP_RTO_MIN:
 443  443                          *i1 = tcp->tcp_rto_min;
 444  444                          return (sizeof (uint32_t));
 445  445                  case TCP_RTO_MAX:
 446  446                          *i1 = tcp->tcp_rto_max;
 447  447                          return (sizeof (uint32_t));
 448  448                  case TCP_LINGER2:
 449  449                          *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
 450  450                          return (sizeof (int));
 451  451                  }
 452  452                  break;
 453  453          case IPPROTO_IP:
 454  454                  if (connp->conn_family != AF_INET)
 455  455                          return (-1);
 456  456                  switch (name) {
 457  457                  case IP_OPTIONS:
 458  458                  case T_IP_OPTIONS:
 459  459                          /* Caller ensures enough space */
 460  460                          return (ip_opt_get_user(connp, ptr));
 461  461                  default:
 462  462                          break;
 463  463                  }
 464  464                  break;
 465  465  
 466  466          case IPPROTO_IPV6:
 467  467                  /*
 468  468                   * IPPROTO_IPV6 options are only supported for sockets
 469  469                   * that are using IPv6 on the wire.
 470  470                   */
 471  471                  if (connp->conn_ipversion != IPV6_VERSION) {
 472  472                          return (-1);
 473  473                  }
 474  474                  switch (name) {
 475  475                  case IPV6_PATHMTU:
 476  476                          if (tcp->tcp_state < TCPS_ESTABLISHED)
 477  477                                  return (-1);
 478  478                          break;
 479  479                  }
 480  480                  break;
 481  481          }
 482  482          mutex_enter(&connp->conn_lock);
 483  483          retval = conn_opt_get(&coas, level, name, ptr);
 484  484          mutex_exit(&connp->conn_lock);
 485  485          return (retval);
 486  486  }
 487  487  
 488  488  /*
 489  489   * Set a TCP connection's participation in SO_REUSEPORT.  This operation is
 490  490   * performed under the protection of the squeue via tcp_setsockopt.
 491  491   * The manipulation of tcp_rg_bind, as part of this operation, is subject to
 492  492   * these constraints:
 493  493   * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport
 494  494   *    under the protection of the squeue.
 495  495   * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be
 496  496   *    altered until such time as tcp_free() cleans up the connection.
 497  497   * 3. A connection undergoing bind, which matches to a connection participating
 498  498   *    in port-reuse, will switch its tcp_rg_bind pointer when it joins the
 499  499   *    group of an existing connection in tcp_bindi().
 500  500   */
 501  501  static int
 502  502  tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
 503  503  {
 504  504          tcp_t *tcp = connp->conn_tcp;
 505  505          struct tcp_rg_s *rg;
 506  506  
 507  507          if (!IPCL_IS_NONSTR(connp)) {
 508  508                  if (do_enable) {
 509  509                          /*
 510  510                           * SO_REUSEPORT cannot be enabled on sockets which have
 511  511                           * fallen back to the STREAMS API.
 512  512                           */
 513  513                          return (EINVAL);
 514  514                  } else {
 515  515                          /*
 516  516                           * A connection with SO_REUSEPORT enabled should be
 517  517                           * prevented from falling back to STREAMS mode via
 518  518                           * logic in tcp_fallback.  It is legal, however, for
 519  519                           * fallen-back connections to affirm the disabled state
 520  520                           * of SO_REUSEPORT.
 521  521                           */
 522  522                          ASSERT(connp->conn_reuseport == 0);
 523  523                          return (0);
 524  524                  }
 525  525          }
 526  526          if (tcp->tcp_state <= TCPS_CLOSED) {
 527  527                  return (EINVAL);
 528  528          }
 529  529          if (connp->conn_reuseport == 0 && do_enable) {
 530  530                  /* disabled -> enabled */
 531  531                  if (tcp->tcp_rg_bind != NULL) {
 532  532                          tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
 533  533                  } else {
 534  534                          /*
 535  535                           * Connection state is not a concern when initially
 536  536                           * populating tcp_rg_bind.  Setting it to non-NULL on a
 537  537                           * bound or listening connection would only mean that
 538  538                           * new reused-port binds become a possibility.
 539  539                           */
 540  540                          if ((rg = tcp_rg_init(tcp)) == NULL) {
 541  541                                  return (ENOMEM);
 542  542                          }
 543  543                          tcp->tcp_rg_bind = rg;
 544  544                  }
 545  545                  connp->conn_reuseport = 1;
 546  546          } else if (connp->conn_reuseport != 0 && !do_enable) {
 547  547                  /* enabled -> disabled */
 548  548                  ASSERT(tcp->tcp_rg_bind != NULL);
 549  549                  if (tcp->tcp_state == TCPS_IDLE) {
 550  550                          /*
 551  551                           * If the connection has not been bound yet, discard
 552  552                           * the reuse group state.  Since disabling SO_REUSEPORT
 553  553                           * on a bound socket will _not_ prevent others from
 554  554                           * reusing the port, the presence of tcp_rg_bind is
 555  555                           * used to determine reuse availability, not
 556  556                           * conn_reuseport.
 557  557                           *
 558  558                           * This allows proper behavior for examples such as:
 559  559                           *
 560  560                           * setsockopt(fd1, ... SO_REUSEPORT, &on_val...);
 561  561                           * bind(fd1, &myaddr, ...);
 562  562                           * setsockopt(fd1, ... SO_REUSEPORT, &off_val...);
 563  563                           *
 564  564                           * setsockopt(fd2, ... SO_REUSEPORT, &on_val...);
 565  565                           * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED
 566  566                           *
 567  567                           */
 568  568                          rg = tcp->tcp_rg_bind;
 569  569                          tcp->tcp_rg_bind = NULL;
 570  570                          VERIFY(tcp_rg_remove(rg, tcp));
 571  571                          tcp_rg_destroy(rg);
 572  572                  } else {
 573  573                          /*
 574  574                           * If a connection has been bound, it's no longer safe
 575  575                           * to manipulate tcp_rg_bind until connection clean-up
 576  576                           * during tcp_free.  Just mark the member status of the
 577  577                           * connection as inactive.
 578  578                           */
 579  579                          tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
 580  580                  }
 581  581                  connp->conn_reuseport = 0;
 582  582          }
 583  583          return (0);
 584  584  }
 585  585  
 586  586  /*
 587  587   * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
 588  588   * Parameters are assumed to be verified by the caller.
 589  589   */
 590  590  /* ARGSUSED */
 591  591  int
 592  592  tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 593  593      uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
 594  594      void *thisdg_attrs, cred_t *cr)
 595  595  {
 596  596          tcp_t   *tcp = connp->conn_tcp;
 597  597          int     *i1 = (int *)invalp;
 598  598          boolean_t onoff = (*i1 == 0) ? 0 : 1;
 599  599          boolean_t checkonly;
 600  600          int     reterr;
 601  601          tcp_stack_t     *tcps = tcp->tcp_tcps;
 602  602          conn_opt_arg_t  coas;
 603  603          uint32_t        val = *((uint32_t *)invalp);
 604  604  
 605  605          coas.coa_connp = connp;
 606  606          coas.coa_ixa = connp->conn_ixa;
 607  607          coas.coa_ipp = &connp->conn_xmit_ipp;
 608  608          coas.coa_ancillary = B_FALSE;
 609  609          coas.coa_changed = 0;
 610  610  
 611  611          switch (optset_context) {
 612  612          case SETFN_OPTCOM_CHECKONLY:
 613  613                  checkonly = B_TRUE;
 614  614                  /*
 615  615                   * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
 616  616                   * inlen != 0 implies value supplied and
 617  617                   *      we have to "pretend" to set it.
 618  618                   * inlen == 0 implies that there is no
 619  619                   *      value part in T_CHECK request and just validation
 620  620                   * done elsewhere should be enough, we just return here.
 621  621                   */
 622  622                  if (inlen == 0) {
 623  623                          *outlenp = 0;
 624  624                          return (0);
 625  625                  }
 626  626                  break;
 627  627          case SETFN_OPTCOM_NEGOTIATE:
 628  628                  checkonly = B_FALSE;
 629  629                  break;
 630  630          case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
 631  631          case SETFN_CONN_NEGOTIATE:
 632  632                  checkonly = B_FALSE;
 633  633                  /*
 634  634                   * Negotiating local and "association-related" options
 635  635                   * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
 636  636                   * primitives is allowed by XTI, but we choose
 637  637                   * to not implement this style negotiation for Internet
 638  638                   * protocols (We interpret it is a must for OSI world but
 639  639                   * optional for Internet protocols) for all options.
 640  640                   * [ Will do only for the few options that enable test
 641  641                   * suites that our XTI implementation of this feature
 642  642                   * works for transports that do allow it ]
 643  643                   */
 644  644                  if (!tcp_allow_connopt_set(level, name)) {
 645  645                          *outlenp = 0;
 646  646                          return (EINVAL);
 647  647                  }
 648  648                  break;
 649  649          default:
 650  650                  /*
 651  651                   * We should never get here
 652  652                   */
 653  653                  *outlenp = 0;
 654  654                  return (EINVAL);
 655  655          }
 656  656  
 657  657          ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
 658  658              (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
 659  659  
 660  660          /*
 661  661           * For TCP, we should have no ancillary data sent down
 662  662           * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
 663  663           * has to be zero.
 664  664           */
 665  665          ASSERT(thisdg_attrs == NULL);
 666  666  
 667  667          /*
 668  668           * For fixed length options, no sanity check
 669  669           * of passed in length is done. It is assumed *_optcom_req()
 670  670           * routines do the right thing.
 671  671           */
 672  672          switch (level) {
 673  673          case SOL_SOCKET:
 674  674                  switch (name) {
 675  675                  case SO_KEEPALIVE:
 676  676                          if (checkonly) {
 677  677                                  /* check only case */
 678  678                                  break;
 679  679                          }
 680  680  
 681  681                          if (!onoff) {
 682  682                                  if (connp->conn_keepalive) {
 683  683                                          if (tcp->tcp_ka_tid != 0) {
 684  684                                                  (void) TCP_TIMER_CANCEL(tcp,
 685  685                                                      tcp->tcp_ka_tid);
 686  686                                                  tcp->tcp_ka_tid = 0;
 687  687                                          }
 688  688                                          connp->conn_keepalive = 0;
 689  689                                  }
 690  690                                  break;
 691  691                          }
 692  692                          if (!connp->conn_keepalive) {
 693  693                                  /* Crank up the keepalive timer */
 694  694                                  tcp->tcp_ka_last_intrvl = 0;
 695  695                                  tcp->tcp_ka_tid = TCP_TIMER(tcp,
 696  696                                      tcp_keepalive_timer, tcp->tcp_ka_interval);
 697  697                                  connp->conn_keepalive = 1;
 698  698                          }
 699  699                          break;
 700  700                  case SO_SNDBUF: {
 701  701                          if (*i1 > tcps->tcps_max_buf) {
 702  702                                  *outlenp = 0;
 703  703                                  return (ENOBUFS);
 704  704                          }
 705  705                          if (checkonly)
 706  706                                  break;
 707  707  
 708  708                          connp->conn_sndbuf = *i1;
 709  709                          if (tcps->tcps_snd_lowat_fraction != 0) {
 710  710                                  connp->conn_sndlowat = connp->conn_sndbuf /
 711  711                                      tcps->tcps_snd_lowat_fraction;
 712  712                          }
 713  713                          (void) tcp_maxpsz_set(tcp, B_TRUE);
 714  714                          /*
 715  715                           * If we are flow-controlled, recheck the condition.
 716  716                           * There are apps that increase SO_SNDBUF size when
 717  717                           * flow-controlled (EWOULDBLOCK), and expect the flow
 718  718                           * control condition to be lifted right away.
 719  719                           */
 720  720                          mutex_enter(&tcp->tcp_non_sq_lock);
 721  721                          if (tcp->tcp_flow_stopped &&
 722  722                              TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
 723  723                                  tcp_clrqfull(tcp);
 724  724                          }
 725  725                          mutex_exit(&tcp->tcp_non_sq_lock);
 726  726                          *outlenp = inlen;
 727  727                          return (0);
 728  728                  }
 729  729                  case SO_RCVBUF:
 730  730                          if (*i1 > tcps->tcps_max_buf) {
 731  731                                  *outlenp = 0;
 732  732                                  return (ENOBUFS);
 733  733                          }
 734  734                          /* Silently ignore zero */
 735  735                          if (!checkonly && *i1 != 0) {
 736  736                                  *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
 737  737                                  (void) tcp_rwnd_set(tcp, *i1);
 738  738                          }
 739  739                          /*
 740  740                           * XXX should we return the rwnd here
 741  741                           * and tcp_opt_get ?
 742  742                           */
 743  743                          *outlenp = inlen;
 744  744                          return (0);
 745  745                  case SO_SND_COPYAVOID:
 746  746                          if (!checkonly) {
 747  747                                  if (tcp->tcp_loopback ||
 748  748                                      (onoff != 1) || !tcp_zcopy_check(tcp)) {
 749  749                                          *outlenp = 0;
 750  750                                          return (EOPNOTSUPP);
 751  751                                  }
 752  752                                  tcp->tcp_snd_zcopy_aware = 1;
 753  753                          }
 754  754                          *outlenp = inlen;
 755  755                          return (0);
 756  756                  case SO_REUSEPORT:
 757  757                          if (!checkonly) {
 758  758                                  return (tcp_set_reuseport(connp, *i1 != 0));
 759  759                          }
 760  760                          return (0);
 761  761                  }
 762  762                  break;
 763  763          case IPPROTO_TCP:
 764  764                  switch (name) {
 765  765                  case TCP_NODELAY:
 766  766                          if (!checkonly)
 767  767                                  tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
 768  768                          break;
 769  769                  case TCP_NOTIFY_THRESHOLD:
 770  770                          if (!checkonly)
 771  771                                  tcp->tcp_first_timer_threshold = *i1;
 772  772                          break;
 773  773                  case TCP_ABORT_THRESHOLD:
 774  774                          if (!checkonly)
 775  775                                  tcp->tcp_second_timer_threshold = *i1;
 776  776                          break;
 777  777                  case TCP_CONN_NOTIFY_THRESHOLD:
 778  778                          if (!checkonly)
 779  779                                  tcp->tcp_first_ctimer_threshold = *i1;
 780  780                          break;
 781  781                  case TCP_CONN_ABORT_THRESHOLD:
 782  782                          if (!checkonly)
 783  783                                  tcp->tcp_second_ctimer_threshold = *i1;
 784  784                          break;
 785  785                  case TCP_RECVDSTADDR:
 786  786                          if (tcp->tcp_state > TCPS_LISTEN) {
 787  787                                  *outlenp = 0;
 788  788                                  return (EOPNOTSUPP);
 789  789                          }
 790  790                          /* Setting done in conn_opt_set */
 791  791                          break;
 792  792                  case TCP_INIT_CWND:
 793  793                          if (checkonly)
 794  794                                  break;
 795  795  
 796  796                          /*
 797  797                           * Only allow socket with network configuration
 798  798                           * privilege to set the initial cwnd to be larger
 799  799                           * than allowed by RFC 3390.
 800  800                           */
 801  801                          if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
 802  802                                  if ((reterr = secpolicy_ip_config(cr, B_TRUE))
 803  803                                      != 0) {
 804  804                                          *outlenp = 0;
 805  805                                          return (reterr);
 806  806                                  }
 807  807                                  if (val > tcp_max_init_cwnd) {
 808  808                                          *outlenp = 0;
 809  809                                          return (EINVAL);
 810  810                                  }
 811  811                          }
 812  812  
 813  813                          tcp->tcp_init_cwnd = val;
 814  814  
 815  815                          /*
 816  816                           * If the socket is connected, AND no outbound data
 817  817                           * has been sent, reset the actual cwnd values.
 818  818                           */
 819  819                          if (tcp->tcp_state == TCPS_ESTABLISHED &&
 820  820                              tcp->tcp_iss == tcp->tcp_snxt - 1) {
 821  821                                  tcp->tcp_cwnd =
 822  822                                      MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
 823  823                          }
 824  824                          break;
 825  825  
 826  826                  /*
 827  827                   * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
 828  828                   * is in milliseconds. TCP_KEEPIDLE is introduced for
 829  829                   * compatibility with other Unix flavors.
 830  830                   * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
 831  831                   * converting the input to milliseconds.
 832  832                   */
 833  833                  case TCP_KEEPIDLE:
 834  834                          *i1 *= 1000;
 835  835                          /* FALLTHRU */
 836  836  
 837  837                  case TCP_KEEPALIVE_THRESHOLD:
 838  838                          if (checkonly)
 839  839                                  break;
 840  840  
 841  841                          if (*i1 < tcps->tcps_keepalive_interval_low ||
 842  842                              *i1 > tcps->tcps_keepalive_interval_high) {
 843  843                                  *outlenp = 0;
 844  844                                  return (EINVAL);
 845  845                          }
 846  846                          if (*i1 != tcp->tcp_ka_interval) {
 847  847                                  tcp->tcp_ka_interval = *i1;
 848  848                                  /*
 849  849                                   * Check if we need to restart the
 850  850                                   * keepalive timer.
 851  851                                   */
 852  852                                  if (tcp->tcp_ka_tid != 0) {
 853  853                                          ASSERT(connp->conn_keepalive);
 854  854                                          (void) TCP_TIMER_CANCEL(tcp,
 855  855                                              tcp->tcp_ka_tid);
 856  856                                          tcp->tcp_ka_last_intrvl = 0;
 857  857                                          tcp->tcp_ka_tid = TCP_TIMER(tcp,
 858  858                                              tcp_keepalive_timer,
 859  859                                              tcp->tcp_ka_interval);
 860  860                                  }
 861  861                          }
 862  862                          break;
 863  863  
 864  864                  /*
 865  865                   * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
 866  866                   * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
 867  867                   * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
 868  868                   * tcp_ka_cnt.
 869  869                   */
 870  870                  case TCP_KEEPCNT:
 871  871                          if (checkonly)
 872  872                                  break;
 873  873  
 874  874                          if (*i1 == 0) {
 875  875                                  return (EINVAL);
 876  876                          } else if (tcp->tcp_ka_rinterval == 0) {
 877  877                                  /*
 878  878                                   * When TCP_KEEPCNT is specified without first
 879  879                                   * specifying a TCP_KEEPINTVL, we infer an
 880  880                                   * interval based on a tunable specific to our
 881  881                                   * stack: the tcp_keepalive_abort_interval.
 882  882                                   * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
 883  883                                   * the unlikely event that that has been set.)
 884  884                                   * Given the abort interval's default value of
 885  885                                   * 480 seconds, low TCP_KEEPCNT values can
 886  886                                   * result in intervals that exceed the default
 887  887                                   * maximum RTO of 60 seconds.  Rather than
 888  888                                   * fail in these cases, we (implicitly) clamp
 889  889                                   * the interval at the maximum RTO; if the
 890  890                                   * TCP_KEEPCNT is shortly followed by a
 891  891                                   * TCP_KEEPINTVL (as we expect), the abort
 892  892                                   * threshold will be recalculated correctly --
 893  893                                   * and if a TCP_KEEPINTVL is not forthcoming,
 894  894                                   * keep-alive will at least operate reasonably
 895  895                                   * given the underconfigured state.
 896  896                                   */
 897  897                                  uint32_t interval;
 898  898  
 899  899                                  interval = tcp->tcp_ka_abort_thres / *i1;
 900  900  
 901  901                                  if (interval < tcp->tcp_rto_min)
 902  902                                          interval = tcp->tcp_rto_min;
 903  903  
 904  904                                  if (interval > tcp->tcp_rto_max)
 905  905                                          interval = tcp->tcp_rto_max;
 906  906  
 907  907                                  tcp->tcp_ka_rinterval = interval;
 908  908                          } else {
 909  909                                  if ((*i1 * tcp->tcp_ka_rinterval) <
 910  910                                      tcps->tcps_keepalive_abort_interval_low ||
 911  911                                      (*i1 * tcp->tcp_ka_rinterval) >
 912  912                                      tcps->tcps_keepalive_abort_interval_high)
 913  913                                          return (EINVAL);
 914  914                                  tcp->tcp_ka_abort_thres =
 915  915                                      (*i1 * tcp->tcp_ka_rinterval);
 916  916                          }
 917  917                          tcp->tcp_ka_cnt = *i1;
 918  918                          break;
 919  919                  case TCP_KEEPINTVL:
 920  920                          /*
 921  921                           * TCP_KEEPINTVL is specified in seconds, but
 922  922                           * tcp_ka_rinterval is in milliseconds.
 923  923                           */
 924  924  
 925  925                          if (checkonly)
 926  926                                  break;
 927  927  
 928  928                          if ((*i1 * 1000) < tcp->tcp_rto_min ||
 929  929                              (*i1 * 1000) > tcp->tcp_rto_max)
 930  930                                  return (EINVAL);
 931  931  
 932  932                          if (tcp->tcp_ka_cnt == 0) {
 933  933                                  tcp->tcp_ka_cnt =
 934  934                                      tcp->tcp_ka_abort_thres / (*i1 * 1000);
 935  935                          } else {
 936  936                                  if ((*i1 * tcp->tcp_ka_cnt * 1000) <
 937  937                                      tcps->tcps_keepalive_abort_interval_low ||
 938  938                                      (*i1 * tcp->tcp_ka_cnt * 1000) >
 939  939                                      tcps->tcps_keepalive_abort_interval_high)
 940  940                                          return (EINVAL);
 941  941                                  tcp->tcp_ka_abort_thres =
 942  942                                      (*i1 * tcp->tcp_ka_cnt * 1000);
 943  943                          }
 944  944                          tcp->tcp_ka_rinterval = *i1 * 1000;
 945  945                          break;
 946  946                  case TCP_KEEPALIVE_ABORT_THRESHOLD:
 947  947                          if (!checkonly) {
 948  948                                  if (*i1 <
 949  949                                      tcps->tcps_keepalive_abort_interval_low ||
 950  950                                      *i1 >
 951  951                                      tcps->tcps_keepalive_abort_interval_high) {
 952  952                                          *outlenp = 0;
 953  953                                          return (EINVAL);
 954  954                                  }
 955  955                                  tcp->tcp_ka_abort_thres = *i1;
 956  956                                  tcp->tcp_ka_cnt = 0;
 957  957                                  tcp->tcp_ka_rinterval = 0;
 958  958                          }
 959  959                          break;
 960  960                  case TCP_CORK:
 961  961                          if (!checkonly) {
 962  962                                  /*
 963  963                                   * if tcp->tcp_cork was set and is now
 964  964                                   * being unset, we have to make sure that
 965  965                                   * the remaining data gets sent out. Also
 966  966                                   * unset tcp->tcp_cork so that tcp_wput_data()
 967  967                                   * can send data even if it is less than mss
 968  968                                   */
 969  969                                  if (tcp->tcp_cork && onoff == 0 &&
 970  970                                      tcp->tcp_unsent > 0) {
 971  971                                          tcp->tcp_cork = B_FALSE;
 972  972                                          tcp_wput_data(tcp, NULL, B_FALSE);
 973  973                                  }
 974  974                                  tcp->tcp_cork = onoff;
 975  975                          }
 976  976                          break;
 977  977                  case TCP_RTO_INITIAL: {
 978  978                          clock_t rto;
 979  979  
 980  980                          if (checkonly || val == 0)
 981  981                                  break;
 982  982  
 983  983                          /*
 984  984                           * Sanity checks
 985  985                           *
 986  986                           * The initial RTO should be bounded by the minimum
 987  987                           * and maximum RTO.  And it should also be smaller
 988  988                           * than the connect attempt abort timeout.  Otherwise,
 989  989                           * the connection won't be aborted in a period
 990  990                           * reasonably close to that timeout.
 991  991                           */
 992  992                          if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
 993  993                              val > tcp->tcp_second_ctimer_threshold ||
 994  994                              val < tcps->tcps_rexmit_interval_initial_low ||
 995  995                              val > tcps->tcps_rexmit_interval_initial_high) {
 996  996                                  *outlenp = 0;
 997  997                                  return (EINVAL);
 998  998                          }
 999  999                          tcp->tcp_rto_initial = val;
1000 1000  
1001 1001                          /*
1002 1002                           * If TCP has not sent anything, need to re-calculate
1003 1003                           * tcp_rto.  Otherwise, this option change does not
1004 1004                           * really affect anything.
1005 1005                           */
1006 1006                          if (tcp->tcp_state >= TCPS_SYN_SENT)
1007 1007                                  break;
1008 1008  
1009 1009                          tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
1010 1010                          tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
1011 1011                          rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1012 1012                              tcps->tcps_rexmit_interval_extra +
1013 1013                              (tcp->tcp_rtt_sa >> 5) +
1014 1014                              tcps->tcps_conn_grace_period;
1015 1015                          TCP_SET_RTO(tcp, rto);
1016 1016                          break;
1017 1017                  }
1018 1018                  case TCP_RTO_MIN:
1019 1019                          if (checkonly || val == 0)
1020 1020                                  break;
1021 1021  
1022 1022                          if (val < tcps->tcps_rexmit_interval_min_low ||
1023 1023                              val > tcps->tcps_rexmit_interval_min_high ||
1024 1024                              val > tcp->tcp_rto_max) {
1025 1025                                  *outlenp = 0;
1026 1026                                  return (EINVAL);
1027 1027                          }
1028 1028                          tcp->tcp_rto_min = val;
1029 1029                          if (tcp->tcp_rto < val)
1030 1030                                  tcp->tcp_rto = val;
1031 1031                          break;
1032 1032                  case TCP_RTO_MAX:
1033 1033                          if (checkonly || val == 0)
1034 1034                                  break;
1035 1035  
1036 1036                          /*
1037 1037                           * Sanity checks
1038 1038                           *
1039 1039                           * The maximum RTO should not be larger than the
1040 1040                           * connection abort timeout.  Otherwise, the
1041 1041                           * connection won't be aborted in a period reasonably
1042 1042                           * close to that timeout.
1043 1043                           */
1044 1044                          if (val < tcps->tcps_rexmit_interval_max_low ||
1045 1045                              val > tcps->tcps_rexmit_interval_max_high ||
1046 1046                              val < tcp->tcp_rto_min ||
1047 1047                              val > tcp->tcp_second_timer_threshold) {
1048 1048                                  *outlenp = 0;
1049 1049                                  return (EINVAL);
1050 1050                          }
1051 1051                          tcp->tcp_rto_max = val;
1052 1052                          if (tcp->tcp_rto > val)
1053 1053                                  tcp->tcp_rto = val;
1054 1054                          break;
1055 1055                  case TCP_LINGER2:
1056 1056                          if (checkonly || *i1 == 0)
1057 1057                                  break;
1058 1058  
1059 1059                          /*
1060 1060                           * Note that the option value's unit is second.  And
1061 1061                           * the value should be bigger than the private
1062 1062                           * parameter tcp_fin_wait_2_flush_interval's lower
1063 1063                           * bound and smaller than the current value of that
1064 1064                           * parameter.  It should be smaller than the current
1065 1065                           * value to avoid an app setting TCP_LINGER2 to a big
1066 1066                           * value, causing resource to be held up too long in
1067 1067                           * FIN-WAIT-2 state.
1068 1068                           */
1069 1069                          if (*i1 < 0 ||
1070 1070                              tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1071 1071                              *i1 ||
1072 1072                              tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1073 1073                              *i1) {
1074 1074                                  *outlenp = 0;
1075 1075                                  return (EINVAL);
1076 1076                          }
1077 1077                          tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1078 1078                          break;
1079 1079                  default:
1080 1080                          break;
1081 1081                  }
1082 1082                  break;
1083 1083          case IPPROTO_IP:
1084 1084                  switch (name) {
1085 1085                  case IP_SEC_OPT:
1086 1086                          /*
1087 1087                           * We should not allow policy setting after
1088 1088                           * we start listening for connections.
1089 1089                           */
1090 1090                          if (tcp->tcp_state == TCPS_LISTEN) {
1091 1091                                  return (EINVAL);
1092 1092                          }
1093 1093                          break;
1094 1094                  }
1095 1095                  break;
1096 1096          case IPPROTO_IPV6:
1097 1097                  /*
1098 1098                   * IPPROTO_IPV6 options are only supported for sockets
1099 1099                   * that are using IPv6 on the wire.
1100 1100                   */
1101 1101                  if (connp->conn_ipversion != IPV6_VERSION) {
1102 1102                          *outlenp = 0;
1103 1103                          return (EINVAL);
1104 1104                  }
1105 1105  
1106 1106                  switch (name) {
1107 1107                  case IPV6_RECVPKTINFO:
1108 1108                          if (!checkonly) {
1109 1109                                  /* Force it to be sent up with the next msg */
1110 1110                                  tcp->tcp_recvifindex = 0;
1111 1111                          }
1112 1112                          break;
1113 1113                  case IPV6_RECVTCLASS:
1114 1114                          if (!checkonly) {
1115 1115                                  /* Force it to be sent up with the next msg */
1116 1116                                  tcp->tcp_recvtclass = 0xffffffffU;
1117 1117                          }
1118 1118                          break;
1119 1119                  case IPV6_RECVHOPLIMIT:
1120 1120                          if (!checkonly) {
1121 1121                                  /* Force it to be sent up with the next msg */
1122 1122                                  tcp->tcp_recvhops = 0xffffffffU;
1123 1123                          }
1124 1124                          break;
1125 1125                  case IPV6_PKTINFO:
1126 1126                          /* This is an extra check for TCP */
1127 1127                          if (inlen == sizeof (struct in6_pktinfo)) {
1128 1128                                  struct in6_pktinfo *pkti;
1129 1129  
1130 1130                                  pkti = (struct in6_pktinfo *)invalp;
1131 1131                                  /*
1132 1132                                   * RFC 3542 states that ipi6_addr must be
1133 1133                                   * the unspecified address when setting the
1134 1134                                   * IPV6_PKTINFO sticky socket option on a
1135 1135                                   * TCP socket.
1136 1136                                   */
1137 1137                                  if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1138 1138                                          return (EINVAL);
1139 1139                          }
1140 1140                          break;
1141 1141                  case IPV6_SEC_OPT:
1142 1142                          /*
1143 1143                           * We should not allow policy setting after
1144 1144                           * we start listening for connections.
1145 1145                           */
1146 1146                          if (tcp->tcp_state == TCPS_LISTEN) {
1147 1147                                  return (EINVAL);
1148 1148                          }
1149 1149                          break;
1150 1150                  }
1151 1151                  break;
1152 1152          }
1153 1153          reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1154 1154              checkonly, cr);
1155 1155          if (reterr != 0) {
1156 1156                  *outlenp = 0;
1157 1157                  return (reterr);
1158 1158          }
1159 1159  
1160 1160          /*
1161 1161           * Common case of OK return with outval same as inval
1162 1162           */
1163 1163          if (invalp != outvalp) {
1164 1164                  /* don't trust bcopy for identical src/dst */
1165 1165                  (void) bcopy(invalp, outvalp, inlen);
1166 1166          }
1167 1167          *outlenp = inlen;
1168 1168  
1169 1169          if (coas.coa_changed & COA_HEADER_CHANGED) {
1170 1170                  /* If we are connected we rebuilt the headers */
1171 1171                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1172 1172                      !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1173 1173                          reterr = tcp_build_hdrs(tcp);
1174 1174                          if (reterr != 0)
1175 1175                                  return (reterr);
1176 1176                  }
1177 1177          }
1178 1178          if (coas.coa_changed & COA_ROUTE_CHANGED) {
1179 1179                  in6_addr_t nexthop;
1180 1180  
1181 1181                  /*
1182 1182                   * If we are connected we re-cache the information.
1183 1183                   * We ignore errors to preserve BSD behavior.
1184 1184                   * Note that we don't redo IPsec policy lookup here
1185 1185                   * since the final destination (or source) didn't change.
1186 1186                   */
1187 1187                  ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1188 1188                      &connp->conn_faddr_v6, &nexthop);
1189 1189  
1190 1190                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1191 1191                      !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1192 1192                          (void) ip_attr_connect(connp, connp->conn_ixa,
1193 1193                              &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1194 1194                              &nexthop, connp->conn_fport, NULL, NULL,
1195 1195                              IPDF_VERIFY_DST);
1196 1196                  }
1197 1197          }
1198 1198          if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1199 1199                  connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1200 1200          }
1201 1201          if (coas.coa_changed & COA_WROFF_CHANGED) {
1202 1202                  connp->conn_wroff = connp->conn_ht_iphc_allocated +
1203 1203                      tcps->tcps_wroff_xtra;
1204 1204                  (void) proto_set_tx_wroff(connp->conn_rq, connp,
1205 1205                      connp->conn_wroff);
1206 1206          }
1207 1207          if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1208 1208                  if (IPCL_IS_NONSTR(connp))
1209 1209                          proto_set_rx_oob_opt(connp, onoff);
1210 1210          }
1211 1211          return (0);
1212 1212  }
  
    | 
      ↓ open down ↓ | 
    1212 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX