Print this page
    
NEX-3672 IDM module panics target when PDU has AHS length between 17 and 49
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/io/idm/idm_so.c
          +++ new/usr/src/uts/common/io/idm/idm_so.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  
    | 
      ↓ open down ↓ | 
    16 lines elided | 
    
      ↑ open up ↑ | 
  
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2013 by Delphix. All rights reserved.
       27 + * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  27   28   * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  28   29   */
  29   30  
  30   31  #include <sys/conf.h>
  31   32  #include <sys/stat.h>
  32   33  #include <sys/file.h>
  33   34  #include <sys/ddi.h>
  34   35  #include <sys/sunddi.h>
  35   36  #include <sys/modctl.h>
  36   37  #include <sys/priv.h>
  37   38  #include <sys/cpuvar.h>
  38   39  #include <sys/socket.h>
  39   40  #include <sys/strsubr.h>
  40   41  #include <sys/sysmacros.h>
  41   42  #include <sys/sdt.h>
  42   43  #include <netinet/tcp.h>
  43   44  #include <inet/tcp.h>
  44   45  #include <sys/socketvar.h>
  45   46  #include <sys/pathname.h>
  46   47  #include <sys/fs/snode.h>
  47   48  #include <sys/fs/dv_node.h>
  48   49  #include <sys/vnode.h>
  49   50  #include <netinet/in.h>
  50   51  #include <net/if.h>
  51   52  #include <sys/sockio.h>
  52   53  #include <sys/ksocket.h>
  53   54  #include <sys/filio.h>          /* FIONBIO */
  54   55  #include <sys/iscsi_protocol.h>
  55   56  #include <sys/idm/idm.h>
  56   57  #include <sys/idm/idm_so.h>
  57   58  #include <sys/idm/idm_text.h>
  58   59  
  59   60  #define IN_PROGRESS_DELAY       1
  60   61  
  61   62  /*
  62   63   * in6addr_any is currently all zeroes, but use the macro in case this
  63   64   * ever changes.
  64   65   */
  65   66  static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
  66   67  
  67   68  static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  68   69  static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  69   70  static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  70   71  
  71   72  static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
  72   73  static void idm_so_conn_destroy_common(idm_conn_t *ic);
  73   74  static void idm_so_conn_connect_common(idm_conn_t *ic);
  74   75  
  75   76  static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
  76   77      boolean_t boot_conn);
  77   78  static void idm_set_postconnect_options(ksocket_t so);
  78   79  static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
  79   80  
  80   81  static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
  81   82  static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
  82   83      idm_buf_t *idb, uint32_t offset, uint32_t length);
  83   84  static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
  84   85  static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
  85   86      idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
  86   87  
  87   88  static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
  88   89      uint32_t ro, uint32_t dlength);
  89   90  
  90   91  static idm_status_t idm_so_handle_digest(idm_conn_t *it,
  91   92      nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
  92   93  
  93   94  static void idm_so_socket_set_nonblock(struct sonode *node);
  94   95  static void idm_so_socket_set_block(struct sonode *node);
  95   96  
  96   97  /*
  97   98   * Transport ops prototypes
  98   99   */
  99  100  static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
 100  101  static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
 101  102  static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
 102  103  static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
 103  104  static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
 104  105  static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
 105  106  static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
 106  107  static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
 107  108      nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
 108  109  static void idm_so_notice_key_values(idm_conn_t *it,
 109  110      nvlist_t *negotiated_nvl);
 110  111  static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
 111  112      nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
 112  113  static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
 113  114      idm_transport_caps_t *caps);
 114  115  static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
 115  116  static void idm_so_buf_free(idm_buf_t *idb);
 116  117  static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
 117  118  static void idm_so_buf_teardown(idm_buf_t *idb);
 118  119  static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
 119  120  static void idm_so_tgt_svc_destroy(idm_svc_t *is);
 120  121  static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
 121  122  static void idm_so_tgt_svc_offline(idm_svc_t *is);
 122  123  static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
 123  124  static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
 124  125  static void idm_so_conn_disconnect(idm_conn_t *ic);
 125  126  static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
 126  127  static void idm_so_ini_conn_destroy(idm_conn_t *ic);
 127  128  static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
 128  129  
 129  130  /*
 130  131   * IDM Native Sockets transport operations
 131  132   */
 132  133  static
 133  134  idm_transport_ops_t idm_so_transport_ops = {
 134  135          idm_so_tx,                      /* it_tx_pdu */
 135  136          idm_so_buf_tx_to_ini,           /* it_buf_tx_to_ini */
 136  137          idm_so_buf_rx_from_ini,         /* it_buf_rx_from_ini */
 137  138          idm_so_rx_datain,               /* it_rx_datain */
 138  139          idm_so_rx_rtt,                  /* it_rx_rtt */
 139  140          idm_so_rx_dataout,              /* it_rx_dataout */
 140  141          NULL,                           /* it_alloc_conn_rsrc */
 141  142          NULL,                           /* it_free_conn_rsrc */
 142  143          NULL,                           /* it_tgt_enable_datamover */
 143  144          NULL,                           /* it_ini_enable_datamover */
 144  145          NULL,                           /* it_conn_terminate */
 145  146          idm_so_free_task_rsrc,          /* it_free_task_rsrc */
 146  147          idm_so_negotiate_key_values,    /* it_negotiate_key_values */
 147  148          idm_so_notice_key_values,       /* it_notice_key_values */
 148  149          idm_so_conn_is_capable,         /* it_conn_is_capable */
 149  150          idm_so_buf_alloc,               /* it_buf_alloc */
 150  151          idm_so_buf_free,                /* it_buf_free */
 151  152          idm_so_buf_setup,               /* it_buf_setup */
 152  153          idm_so_buf_teardown,            /* it_buf_teardown */
 153  154          idm_so_tgt_svc_create,          /* it_tgt_svc_create */
 154  155          idm_so_tgt_svc_destroy,         /* it_tgt_svc_destroy */
 155  156          idm_so_tgt_svc_online,          /* it_tgt_svc_online */
 156  157          idm_so_tgt_svc_offline,         /* it_tgt_svc_offline */
 157  158          idm_so_tgt_conn_destroy,        /* it_tgt_conn_destroy */
 158  159          idm_so_tgt_conn_connect,        /* it_tgt_conn_connect */
 159  160          idm_so_conn_disconnect,         /* it_tgt_conn_disconnect */
 160  161          idm_so_ini_conn_create,         /* it_ini_conn_create */
 161  162          idm_so_ini_conn_destroy,        /* it_ini_conn_destroy */
 162  163          idm_so_ini_conn_connect,        /* it_ini_conn_connect */
 163  164          idm_so_conn_disconnect,         /* it_ini_conn_disconnect */
 164  165          idm_so_declare_key_values       /* it_declare_key_values */
 165  166  };
 166  167  
 167  168  kmutex_t        idm_so_timed_socket_mutex;
 168  169  
 169  170  int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
 170  171  int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
 171  172  
 172  173  /*
 173  174   * idm_so_init()
 174  175   * Sockets transport initialization
 175  176   */
 176  177  void
 177  178  idm_so_init(idm_transport_t *it)
 178  179  {
 179  180          /* Cache for IDM Data and R2T Transmit PDU's */
 180  181          idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
 181  182              sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
 182  183              &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 183  184  
 184  185          /* Cache for IDM Receive PDU's */
 185  186          idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
 186  187              sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
 187  188              &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 188  189  
 189  190          /* 128k buffer cache */
 190  191          idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
 191  192              IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
 192  193  
 193  194          /* Set the sockets transport ops */
 194  195          it->it_ops = &idm_so_transport_ops;
 195  196  
 196  197          mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
 197  198  
 198  199  }
 199  200  
 200  201  /*
 201  202   * idm_so_fini()
 202  203   * Sockets transport teardown
 203  204   */
 204  205  void
 205  206  idm_so_fini(void)
 206  207  {
 207  208          kmem_cache_destroy(idm.idm_so_128k_buf_cache);
 208  209          kmem_cache_destroy(idm.idm_sotx_pdu_cache);
 209  210          kmem_cache_destroy(idm.idm_sorx_pdu_cache);
 210  211          mutex_destroy(&idm_so_timed_socket_mutex);
 211  212  }
 212  213  
 213  214  ksocket_t
 214  215  idm_socreate(int domain, int type, int protocol)
 215  216  {
 216  217          ksocket_t ks;
 217  218  
 218  219          if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
 219  220              CRED())) {
 220  221                  return (ks);
 221  222          } else {
 222  223                  return (NULL);
 223  224          }
 224  225  }
 225  226  
 226  227  /*
 227  228   * idm_soshutdown will disconnect the socket and prevent subsequent PDU
 228  229   * reception and transmission.  The sonode still exists but its state
 229  230   * gets modified to indicate it is no longer connected.  Calls to
 230  231   * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
 231  232   * regain control of a thread stuck in idm_sorecv.
 232  233   */
 233  234  void
 234  235  idm_soshutdown(ksocket_t so)
 235  236  {
 236  237          (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
 237  238  }
 238  239  
 239  240  /*
 240  241   * idm_sodestroy releases all resources associated with a socket previously
 241  242   * created with idm_socreate.  The socket must be shutdown using
 242  243   * idm_soshutdown before the socket is destroyed with idm_sodestroy,
 243  244   * otherwise undefined behavior will result.
 244  245   */
 245  246  void
 246  247  idm_sodestroy(ksocket_t ks)
 247  248  {
 248  249          (void) ksocket_close(ks, CRED());
 249  250  }
 250  251  
 251  252  /*
 252  253   * Function to compare two addresses in sockaddr_storage format
 253  254   */
 254  255  
 255  256  int
 256  257  idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
 257  258      const struct sockaddr_storage *cmp_ss2,
 258  259      boolean_t v4_mapped_as_v4,
 259  260      boolean_t compare_ports)
 260  261  {
 261  262          struct sockaddr_storage                 mapped_v4_ss1, mapped_v4_ss2;
 262  263          const struct sockaddr_storage           *ss1, *ss2;
 263  264          struct in_addr                          *in1, *in2;
 264  265          struct in6_addr                         *in61, *in62;
 265  266          int i;
 266  267  
 267  268          /*
 268  269           * Normalize V4-mapped IPv6 addresses into V4 format if
 269  270           * v4_mapped_as_v4 is B_TRUE.
 270  271           */
 271  272          ss1 = cmp_ss1;
 272  273          ss2 = cmp_ss2;
 273  274          if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
 274  275                  in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 275  276                  if (IN6_IS_ADDR_V4MAPPED(in61)) {
 276  277                          bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
 277  278                          mapped_v4_ss1.ss_family = AF_INET;
 278  279                          ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
 279  280                              ((struct sockaddr_in *)ss1)->sin_port;
 280  281                          IN6_V4MAPPED_TO_INADDR(in61,
 281  282                              &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
 282  283                          ss1 = &mapped_v4_ss1;
 283  284                  }
 284  285          }
 285  286          ss2 = cmp_ss2;
 286  287          if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
 287  288                  in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 288  289                  if (IN6_IS_ADDR_V4MAPPED(in62)) {
 289  290                          bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
 290  291                          mapped_v4_ss2.ss_family = AF_INET;
 291  292                          ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
 292  293                              ((struct sockaddr_in *)ss2)->sin_port;
 293  294                          IN6_V4MAPPED_TO_INADDR(in62,
 294  295                              &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
 295  296                          ss2 = &mapped_v4_ss2;
 296  297                  }
 297  298          }
 298  299  
 299  300          /*
 300  301           * Compare ports, then address family, then ip address
 301  302           */
 302  303          if (compare_ports &&
 303  304              (((struct sockaddr_in *)ss1)->sin_port !=
 304  305              ((struct sockaddr_in *)ss2)->sin_port)) {
 305  306                  if (((struct sockaddr_in *)ss1)->sin_port >
 306  307                      ((struct sockaddr_in *)ss2)->sin_port)
 307  308                          return (1);
 308  309                  else
 309  310                          return (-1);
 310  311          }
 311  312  
 312  313          /*
 313  314           * ports are the same
 314  315           */
 315  316          if (ss1->ss_family != ss2->ss_family) {
 316  317                  if (ss1->ss_family == AF_INET)
 317  318                          return (1);
 318  319                  else
 319  320                          return (-1);
 320  321          }
 321  322  
 322  323          /*
 323  324           * address families are the same
 324  325           */
 325  326          if (ss1->ss_family == AF_INET) {
 326  327                  in1 = &((struct sockaddr_in *)ss1)->sin_addr;
 327  328                  in2 = &((struct sockaddr_in *)ss2)->sin_addr;
 328  329  
 329  330                  if (in1->s_addr > in2->s_addr)
 330  331                          return (1);
 331  332                  else if (in1->s_addr < in2->s_addr)
 332  333                          return (-1);
 333  334                  else
 334  335                          return (0);
 335  336          } else if (ss1->ss_family == AF_INET6) {
 336  337                  in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 337  338                  in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 338  339  
 339  340                  for (i = 0; i < 4; i++) {
 340  341                          if (in61->s6_addr32[i] > in62->s6_addr32[i])
 341  342                                  return (1);
 342  343                          else if (in61->s6_addr32[i] < in62->s6_addr32[i])
 343  344                                  return (-1);
 344  345                  }
 345  346                  return (0);
 346  347          }
 347  348  
 348  349          return (1);
 349  350  }
 350  351  
 351  352  /*
 352  353   * IP address filter functions to flag addresses that should not
 353  354   * go out to initiators through discovery.
 354  355   */
 355  356  static boolean_t
 356  357  idm_v4_addr_okay(struct in_addr *in_addr)
 357  358  {
 358  359          in_addr_t addr = ntohl(in_addr->s_addr);
 359  360  
 360  361          if ((INADDR_NONE == addr) ||
 361  362              (IN_MULTICAST(addr)) ||
 362  363              ((addr >> IN_CLASSA_NSHIFT) == 0) ||
 363  364              ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
 364  365                  return (B_FALSE);
 365  366          }
 366  367          return (B_TRUE);
 367  368  }
 368  369  
 369  370  static boolean_t
 370  371  idm_v6_addr_okay(struct in6_addr *addr6)
 371  372  {
 372  373  
 373  374          if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
 374  375              (IN6_IS_ADDR_LOOPBACK(addr6)) ||
 375  376              (IN6_IS_ADDR_MULTICAST(addr6)) ||
 376  377              (IN6_IS_ADDR_V4MAPPED(addr6)) ||
 377  378              (IN6_IS_ADDR_V4COMPAT(addr6)) ||
 378  379              (IN6_IS_ADDR_LINKLOCAL(addr6))) {
 379  380                  return (B_FALSE);
 380  381          }
 381  382          return (B_TRUE);
 382  383  }
 383  384  
 384  385  /*
 385  386   * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
 386  387   * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
 387  388   */
 388  389  int
 389  390  idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
 390  391  {
 391  392          ksocket_t               so4, so6;
 392  393          struct lifnum           lifn;
 393  394          struct lifconf          lifc;
 394  395          struct lifreq           *lp;
 395  396          int                     rval;
 396  397          int                     numifs;
 397  398          int                     bufsize;
 398  399          void                    *buf;
 399  400          int                     i, j, n, rc;
 400  401          struct sockaddr_storage ss;
 401  402          struct sockaddr_in      *sin;
 402  403          struct sockaddr_in6     *sin6;
 403  404          idm_addr_t              *ip;
 404  405          idm_addr_list_t         *ipaddr = NULL;
 405  406          int                     size_ipaddr;
 406  407  
 407  408          *ipaddr_p = NULL;
 408  409          size_ipaddr = 0;
 409  410          buf = NULL;
 410  411  
 411  412          /* create an ipv4 and ipv6 UDP socket */
 412  413          if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
 413  414                  return (0);
 414  415          if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
 415  416                  idm_sodestroy(so6);
 416  417                  return (0);
 417  418          }
 418  419  
 419  420  
 420  421  retry_count:
 421  422          /* snapshot the current number of interfaces */
 422  423          lifn.lifn_family = PF_UNSPEC;
 423  424          lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 424  425          lifn.lifn_count = 0;
 425  426          /* use vp6 for ioctls with unspecified families by default */
 426  427          if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
 427  428              != 0) {
 428  429                  goto cleanup;
 429  430          }
 430  431  
 431  432          numifs = lifn.lifn_count;
 432  433          if (numifs <= 0) {
 433  434                  goto cleanup;
 434  435          }
 435  436  
 436  437          /* allocate extra room in case more interfaces appear */
 437  438          numifs += 10;
 438  439  
 439  440          /* get the interface names and ip addresses */
 440  441          bufsize = numifs * sizeof (struct lifreq);
 441  442          buf = kmem_alloc(bufsize, KM_SLEEP);
 442  443  
 443  444          lifc.lifc_family = AF_UNSPEC;
 444  445          lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 445  446          lifc.lifc_len = bufsize;
 446  447          lifc.lifc_buf = buf;
 447  448          rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 448  449          if (rc != 0) {
 449  450                  goto cleanup;
 450  451          }
 451  452          /* if our extra room is used up, try again */
 452  453          if (bufsize <= lifc.lifc_len) {
 453  454                  kmem_free(buf, bufsize);
 454  455                  buf = NULL;
 455  456                  goto retry_count;
 456  457          }
 457  458          /* calc actual number of ifconfs */
 458  459          n = lifc.lifc_len / sizeof (struct lifreq);
 459  460  
 460  461          /* get ip address */
 461  462          if (n > 0) {
 462  463                  size_ipaddr = sizeof (idm_addr_list_t) +
 463  464                      (n - 1) * sizeof (idm_addr_t);
 464  465                  ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
 465  466          } else {
 466  467                  goto cleanup;
 467  468          }
 468  469  
 469  470          /*
 470  471           * Examine the array of interfaces and filter uninteresting ones
 471  472           */
 472  473          for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 473  474  
 474  475                  /*
 475  476                   * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 476  477                   */
 477  478                  ss = lp->lifr_addr;
 478  479                  /*
 479  480                   * fetch the flags using the socket of the correct family
 480  481                   */
 481  482                  switch (ss.ss_family) {
 482  483                  case AF_INET:
 483  484                          rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
 484  485                              &rval, CRED());
 485  486                          break;
 486  487                  case AF_INET6:
 487  488                          rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
 488  489                              &rval, CRED());
 489  490                          break;
 490  491                  default:
 491  492                          continue;
 492  493                  }
 493  494                  if (rc == 0) {
 494  495                          /*
 495  496                           * If we got the flags, skip uninteresting
 496  497                           * interfaces based on flags
 497  498                           */
 498  499                          if ((lp->lifr_flags & IFF_UP) != IFF_UP)
 499  500                                  continue;
 500  501                          if (lp->lifr_flags &
 501  502                              (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 502  503                                  continue;
 503  504                  }
 504  505  
 505  506                  /* save ip address */
 506  507                  ip = &ipaddr->al_addrs[j];
 507  508                  switch (ss.ss_family) {
 508  509                  case AF_INET:
 509  510                          sin = (struct sockaddr_in *)&ss;
 510  511                          if (!idm_v4_addr_okay(&sin->sin_addr))
 511  512                                  continue;
 512  513                          ip->a_addr.i_addr.in4 = sin->sin_addr;
 513  514                          ip->a_addr.i_insize = sizeof (struct in_addr);
 514  515                          break;
 515  516                  case AF_INET6:
 516  517                          sin6 = (struct sockaddr_in6 *)&ss;
 517  518                          if (!idm_v6_addr_okay(&sin6->sin6_addr))
 518  519                                  continue;
 519  520                          ip->a_addr.i_addr.in6 = sin6->sin6_addr;
 520  521                          ip->a_addr.i_insize = sizeof (struct in6_addr);
 521  522                          break;
 522  523                  default:
 523  524                          continue;
 524  525                  }
 525  526                  j++;
 526  527          }
 527  528  
 528  529          if (j == 0) {
 529  530                  /* no valid ifaddr */
 530  531                  kmem_free(ipaddr, size_ipaddr);
 531  532                  size_ipaddr = 0;
 532  533                  ipaddr = NULL;
 533  534          } else {
 534  535                  ipaddr->al_out_cnt = j;
 535  536          }
 536  537  
 537  538  
 538  539  cleanup:
 539  540          idm_sodestroy(so6);
 540  541          idm_sodestroy(so4);
 541  542  
 542  543          if (buf != NULL)
 543  544                  kmem_free(buf, bufsize);
 544  545  
 545  546          *ipaddr_p = ipaddr;
 546  547          return (size_ipaddr);
 547  548  }
 548  549  
 549  550  int
 550  551  idm_sorecv(ksocket_t so, void *msg, size_t len)
 551  552  {
 552  553          iovec_t iov;
 553  554  
 554  555          ASSERT(so != NULL);
 555  556          ASSERT(len != 0);
 556  557  
 557  558          /*
 558  559           * Fill in iovec and receive data
 559  560           */
 560  561          iov.iov_base = msg;
 561  562          iov.iov_len = len;
 562  563  
 563  564          return (idm_iov_sorecv(so, &iov, 1, len));
 564  565  }
 565  566  
 566  567  /*
 567  568   * idm_sosendto - Sends a buffered data on a non-connected socket.
 568  569   *
 569  570   * This function puts the data provided on the wire by calling sosendmsg.
 570  571   * It will return only when all the data has been sent or if an error
 571  572   * occurs.
 572  573   *
 573  574   * Returns 0 for success, the socket errno value if sosendmsg fails, and
 574  575   * -1 if sosendmsg returns success but uio_resid != 0
 575  576   */
 576  577  int
 577  578  idm_sosendto(ksocket_t so, void *buff, size_t len,
 578  579      struct sockaddr *name, socklen_t namelen)
 579  580  {
 580  581          struct msghdr           msg;
 581  582          struct iovec            iov[1];
 582  583          int                     error;
 583  584          size_t                  sent = 0;
 584  585  
 585  586          iov[0].iov_base = buff;
 586  587          iov[0].iov_len  = len;
 587  588  
 588  589          /* Initialization of the message header. */
 589  590          bzero(&msg, sizeof (msg));
 590  591          msg.msg_iov     = iov;
 591  592          msg.msg_iovlen  = 1;
 592  593          msg.msg_name    = name;
 593  594          msg.msg_namelen = namelen;
 594  595  
 595  596          if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
 596  597                  /* Data sent */
 597  598                  if (sent == len) {
 598  599                          /* All data sent.  Success. */
 599  600                          return (0);
 600  601                  } else {
 601  602                          /* Not all data was sent.  Failure */
 602  603                          return (-1);
 603  604                  }
 604  605          }
 605  606  
 606  607          /* Send failed */
 607  608          return (error);
 608  609  }
 609  610  
 610  611  /*
 611  612   * idm_iov_sosend - Sends an iovec on a connection.
 612  613   *
 613  614   * This function puts the data provided on the wire by calling sosendmsg.
 614  615   * It will return only when all the data has been sent or if an error
 615  616   * occurs.
 616  617   *
 617  618   * Returns 0 for success, the socket errno value if sosendmsg fails, and
 618  619   * -1 if sosendmsg returns success but uio_resid != 0
 619  620   */
 620  621  int
 621  622  idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 622  623  {
 623  624          struct msghdr           msg;
 624  625          int                     error;
 625  626          size_t                  sent = 0;
 626  627  
 627  628          ASSERT(iop != NULL);
 628  629  
 629  630          /* Initialization of the message header. */
 630  631          bzero(&msg, sizeof (msg));
 631  632          msg.msg_iov     = iop;
 632  633          msg.msg_iovlen  = iovlen;
 633  634  
 634  635          if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
 635  636              == 0) {
 636  637                  /* Data sent */
 637  638                  if (sent == total_len) {
 638  639                          /* All data sent.  Success. */
 639  640                          return (0);
 640  641                  } else {
 641  642                          /* Not all data was sent.  Failure */
 642  643                          return (-1);
 643  644                  }
 644  645          }
 645  646  
 646  647          /* Send failed */
 647  648          return (error);
 648  649  }
 649  650  
 650  651  /*
 651  652   * idm_iov_sorecv - Receives an iovec from a connection
 652  653   *
 653  654   * This function gets the data asked for from the socket.  It will return
 654  655   * only when all the requested data has been retrieved or if an error
 655  656   * occurs.
 656  657   *
 657  658   * Returns 0 for success, the socket errno value if sorecvmsg fails, and
 658  659   * -1 if sorecvmsg returns success but uio_resid != 0
 659  660   */
 660  661  int
 661  662  idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 662  663  {
 663  664          struct msghdr           msg;
 664  665          int                     error;
 665  666          size_t                  recv;
 666  667          int                     flags;
 667  668  
 668  669          ASSERT(iop != NULL);
 669  670  
 670  671          /* Initialization of the message header. */
 671  672          bzero(&msg, sizeof (msg));
 672  673          msg.msg_iov     = iop;
 673  674          msg.msg_iovlen  = iovlen;
 674  675          flags           = MSG_WAITALL;
 675  676  
 676  677          if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
 677  678              == 0) {
 678  679                  /* Received data */
 679  680                  if (recv == total_len) {
 680  681                          /* All requested data received.  Success */
 681  682                          return (0);
 682  683                  } else {
 683  684                          /*
 684  685                           * Not all data was received.  The connection has
 685  686                           * probably failed.
 686  687                           */
 687  688                          return (-1);
 688  689                  }
 689  690          }
 690  691  
 691  692          /* Receive failed */
 692  693          return (error);
 693  694  }
 694  695  
 695  696  static void
 696  697  idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
 697  698  {
 698  699          int     conn_abort = 10000;
 699  700          int     conn_notify = 2000;
 700  701          int     abort = 30000;
 701  702  
 702  703          /* Pre-connect socket options */
 703  704          (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 704  705              TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
 705  706              CRED());
 706  707          if (boot_conn == B_FALSE) {
 707  708                  (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 708  709                      TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
 709  710                      CRED());
 710  711                  (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 711  712                      TCP_ABORT_THRESHOLD,
 712  713                      (char *)&abort, sizeof (int), CRED());
 713  714          }
 714  715  }
 715  716  
 716  717  static void
 717  718  idm_set_postconnect_options(ksocket_t ks)
 718  719  {
 719  720          const int       on = 1;
 720  721  
 721  722          /* Set connect options */
 722  723          (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
 723  724              (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
 724  725          (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
 725  726              (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
 726  727          (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
 727  728              (char *)&on, sizeof (on), CRED());
 728  729  }
 729  730  
 730  731  static uint32_t
 731  732  n2h24(const uchar_t *ptr)
 732  733  {
 733  734          return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
 734  735  }
 735  736  
 736  737  static boolean_t
 737  738  idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu)
 738  739  {
 739  740          iscsi_hdr_t     *bhs;
 740  741  
 741  742          if (ic->ic_conn_type == CONN_TYPE_TGT &&
 742  743              pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
 743  744                  IDM_CONN_LOG(CE_WARN,
 744  745                      "idm_dataseglenokay: exceeded the max data segment length");
 745  746                  return (B_FALSE);
 746  747          }
 747  748  
 748  749          bhs = pdu->isp_hdr;
 749  750          /*
 750  751           * Filter out any RFC3720 data-size violations.
 751  752           */
 752  753          switch (IDM_PDU_OPCODE(pdu)) {
 753  754          case ISCSI_OP_SCSI_TASK_MGT_MSG:
 754  755          case ISCSI_OP_SCSI_TASK_MGT_RSP:
 755  756          case ISCSI_OP_RTT_RSP:
 756  757          case ISCSI_OP_LOGOUT_CMD:
 757  758                  /*
 758  759                   * Data-segment not allowed and additional headers not allowed.
 759  760                   * (both must be zero according to the RFC3720.)
 760  761                   */
 761  762                  if (bhs->hlength != 0 || pdu->isp_datalen != 0)
 762  763                          return (B_FALSE);
 763  764                  break;
 764  765          case ISCSI_OP_NOOP_OUT:
 765  766          case ISCSI_OP_LOGIN_CMD:
 766  767          case ISCSI_OP_TEXT_CMD:
 767  768          case ISCSI_OP_SNACK_CMD:
 768  769          case ISCSI_OP_NOOP_IN:
 769  770          case ISCSI_OP_SCSI_RSP:
 770  771          case ISCSI_OP_LOGIN_RSP:
 771  772          case ISCSI_OP_TEXT_RSP:
 772  773          case ISCSI_OP_SCSI_DATA_RSP:
 773  774          case ISCSI_OP_LOGOUT_RSP:
 774  775          case ISCSI_OP_ASYNC_EVENT:
 775  776          case ISCSI_OP_REJECT_MSG:
 776  777                  /*
 777  778                   * Additional headers not allowed.
 778  779                   * (must be zero according to RFC3720.)
 779  780                   */
 780  781                  if (bhs->hlength)
 781  782                          return (B_FALSE);
 782  783                  break;
 783  784          case ISCSI_OP_SCSI_CMD:
 784  785                  /*
 785  786                   * See RFC3720, section 10.3
 786  787                   *
 787  788                   * For pure read cmds, data-segment-length must be zero.
 788  789                   * For non-final transfers, data-size must be even number of
 789  790                   * 4-byte words.
 790  791                   * For any transfer, an expected byte count must be provided.
 791  792                   * For bidirectional transfers, an additional-header must be
 792  793                   * provided (for the read byte-count.)
 793  794                   */
 794  795                  if (pdu->isp_datalen) {
 795  796                          if ((bhs->flags & (ISCSI_FLAG_CMD_READ |
 796  797                              ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ)
 797  798                                  return (B_FALSE);
 798  799                          if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
 799  800                              ((pdu->isp_datalen & 0x3) != 0))
 800  801                                  return (B_FALSE);
 801  802                  }
 802  803                  if (bhs->flags & (ISCSI_FLAG_CMD_READ |
 803  804                      ISCSI_FLAG_CMD_WRITE)) {
 804  805                          iscsi_scsi_cmd_hdr_t *cmdhdr =
 805  806                              (iscsi_scsi_cmd_hdr_t *)bhs;
 806  807                          /*
 807  808                           * we're transfering some data, we must have a
 808  809                           * byte count
 809  810                           */
 810  811                          if (cmdhdr->data_length == 0)
 811  812                                  return (B_FALSE);
 812  813                  }
 813  814                  break;
 814  815          case ISCSI_OP_SCSI_DATA:
 815  816                  /*
 816  817                   * See RFC3720, section 10.7
 817  818                   *
 818  819                   * Additional headers aren't allowed, and the data-size must
 819  820                   * be an even number of 4-byte words (unless the final bit
 820  821                   * is set.)
 821  822                   */
 822  823                  if (bhs->hlength)
 823  824                          return (B_FALSE);
 824  825                  if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
 825  826                      ((pdu->isp_datalen & 0x3) != 0))
 826  827                          return (B_FALSE);
 827  828                  break;
 828  829          default:
 829  830                  break;
 830  831          }
 831  832          return (B_TRUE);
 832  833  }
 833  834  
 834  835  static idm_status_t
 835  836  idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
 836  837  {
 837  838          iscsi_hdr_t     *bhs;
 838  839          uint32_t        hdr_digest_crc;
 839  840          uint32_t        crc_calculated;
 840  841          void            *new_hdr;
 841  842          int             ahslen = 0;
 842  843          int             total_len = 0;
 843  844          int             iovlen = 0;
 844  845          struct iovec    iov[2];
 845  846          idm_so_conn_t   *so_conn;
 846  847          int             rc;
 847  848  
 848  849          so_conn = ic->ic_transport_private;
 849  850  
 850  851          /*
 851  852           * Read BHS
  
    | 
      ↓ open down ↓ | 
    815 lines elided | 
    
      ↑ open up ↑ | 
  
 852  853           */
 853  854          bhs = pdu->isp_hdr;
 854  855          rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
 855  856          if (rc != IDM_STATUS_SUCCESS) {
 856  857                  return (IDM_STATUS_FAIL);
 857  858          }
 858  859  
 859  860          /*
 860  861           * Check actual AHS length against the amount available in the buffer
 861  862           */
      863 +        if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) &&
      864 +            (bhs->hlength != 0)) {
      865 +                /* ---- hlength is only only valid for SCSI Request ---- */
      866 +                return (IDM_STATUS_FAIL);
      867 +        }
 862  868          pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 863  869              (bhs->hlength * sizeof (uint32_t));
 864  870          pdu->isp_datalen = n2h24(bhs->dlength);
 865  871  
 866  872          if (!idm_dataseglenokay(ic, pdu)) {
 867  873                  IDM_CONN_LOG(CE_WARN,
 868  874                      "idm_sorecvhdr: invalid data segment length");
 869  875                  return (IDM_STATUS_FAIL);
 870  876          }
 871      -        if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
      877 +        if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) {
 872  878                  /* Allocate a new header segment and change the callback */
 873  879                  new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 874  880                  bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
 875  881                  pdu->isp_hdr = new_hdr;
 876  882                  pdu->isp_flags |= IDM_PDU_ADDL_HDR;
 877  883  
 878  884                  /*
 879  885                   * This callback will restore the expected values after
 880  886                   * the RX PDU has been processed.
 881  887                   */
 882  888                  pdu->isp_callback = idm_sorx_addl_pdu_cb;
 883  889          }
 884  890  
 885  891          /*
 886  892           * Setup receipt of additional header and header digest (if enabled).
 887  893           */
 888  894          if (bhs->hlength > 0) {
 889  895                  iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
 890  896                  ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
 891  897                  iov[iovlen].iov_len = ahslen;
 892  898                  total_len += iov[iovlen].iov_len;
 893  899                  iovlen++;
 894  900          }
 895  901  
 896  902          if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 897  903                  iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
 898  904                  iov[iovlen].iov_len = sizeof (hdr_digest_crc);
 899  905                  total_len += iov[iovlen].iov_len;
 900  906                  iovlen++;
 901  907          }
 902  908  
 903  909          if ((iovlen != 0) &&
 904  910              (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
 905  911              total_len) != 0)) {
 906  912                  return (IDM_STATUS_FAIL);
 907  913          }
 908  914  
 909  915          /*
 910  916           * Validate header digest if enabled
 911  917           */
 912  918          if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 913  919                  crc_calculated = idm_crc32c(pdu->isp_hdr,
 914  920                      sizeof (iscsi_hdr_t) + ahslen);
 915  921                  if (crc_calculated != hdr_digest_crc) {
 916  922                          /* Invalid Header Digest */
 917  923                          return (IDM_STATUS_HEADER_DIGEST);
 918  924                  }
 919  925          }
 920  926  
 921  927          return (0);
 922  928  }
 923  929  
 924  930  /*
 925  931   * idm_so_ini_conn_create()
 926  932   * Allocate the sockets transport connection resources.
 927  933   */
 928  934  static idm_status_t
 929  935  idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
 930  936  {
 931  937          ksocket_t       so;
 932  938          idm_so_conn_t   *so_conn;
 933  939          idm_status_t    idmrc;
 934  940  
 935  941          so = idm_socreate(cr->cr_domain, cr->cr_type,
 936  942              cr->cr_protocol);
 937  943          if (so == NULL) {
 938  944                  return (IDM_STATUS_FAIL);
 939  945          }
 940  946  
 941  947          /* Bind the socket if configured to do so */
 942  948          if (cr->cr_bound) {
 943  949                  if (ksocket_bind(so, &cr->cr_bound_addr.sin,
 944  950                      SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
 945  951                          idm_sodestroy(so);
 946  952                          return (IDM_STATUS_FAIL);
 947  953                  }
 948  954          }
 949  955  
 950  956          idmrc = idm_so_conn_create_common(ic, so);
 951  957          if (idmrc != IDM_STATUS_SUCCESS) {
 952  958                  idm_soshutdown(so);
 953  959                  idm_sodestroy(so);
 954  960                  return (IDM_STATUS_FAIL);
 955  961          }
 956  962  
 957  963          so_conn = ic->ic_transport_private;
 958  964          /* Set up socket options */
 959  965          idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
 960  966  
 961  967          return (IDM_STATUS_SUCCESS);
 962  968  }
 963  969  
 964  970  /*
 965  971   * idm_so_ini_conn_destroy()
 966  972   * Tear down the sockets transport connection resources.
 967  973   */
 968  974  static void
 969  975  idm_so_ini_conn_destroy(idm_conn_t *ic)
 970  976  {
 971  977          idm_so_conn_destroy_common(ic);
 972  978  }
 973  979  
 974  980  /*
 975  981   * idm_so_ini_conn_connect()
 976  982   * Establish the connection referred to by the handle previously allocated via
 977  983   * idm_so_ini_conn_create().
 978  984   */
 979  985  static idm_status_t
 980  986  idm_so_ini_conn_connect(idm_conn_t *ic)
 981  987  {
 982  988          idm_so_conn_t   *so_conn;
 983  989          struct sonode   *node = NULL;
 984  990          int             rc;
 985  991          clock_t         lbolt, conn_login_max, conn_login_interval;
 986  992          boolean_t       nonblock;
 987  993  
 988  994          so_conn = ic->ic_transport_private;
 989  995          nonblock = ic->ic_conn_params.nonblock_socket;
 990  996          conn_login_max = ic->ic_conn_params.conn_login_max;
 991  997          conn_login_interval = ddi_get_lbolt() +
 992  998              SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 993  999  
 994 1000          if (nonblock == B_TRUE) {
 995 1001                  node = ((struct sonode *)(so_conn->ic_so));
 996 1002                  /* Set to none block socket mode */
 997 1003                  idm_so_socket_set_nonblock(node);
 998 1004                  do {
 999 1005                          rc = ksocket_connect(so_conn->ic_so,
1000 1006                              &ic->ic_ini_dst_addr.sin,
1001 1007                              (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
1002 1008                              CRED());
1003 1009                          if (rc == 0 || rc == EISCONN) {
1004 1010                                  /* socket success or already success */
1005 1011                                  rc = IDM_STATUS_SUCCESS;
1006 1012                                  break;
1007 1013                          }
1008 1014                          if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
1009 1015                              (rc == ECONNRESET)) {
1010 1016                                  /* socket connection timeout or refuse */
1011 1017                                  break;
1012 1018                          }
1013 1019                          lbolt = ddi_get_lbolt();
1014 1020                          if (lbolt > conn_login_max) {
1015 1021                                  /*
1016 1022                                   * Connection retry timeout,
1017 1023                                   * failed connect to target.
1018 1024                                   */
1019 1025                                  break;
1020 1026                          }
1021 1027                          if (lbolt < conn_login_interval) {
1022 1028                                  if ((rc == EINPROGRESS) || (rc == EALREADY)) {
1023 1029                                          /* TCP connect still in progress */
1024 1030                                          delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
1025 1031                                          continue;
1026 1032                                  } else {
1027 1033                                          delay(conn_login_interval - lbolt);
1028 1034                                  }
1029 1035                          }
1030 1036                          conn_login_interval = ddi_get_lbolt() +
1031 1037                              SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
1032 1038                  } while (rc != 0);
1033 1039                  /* resume to nonblock mode */
1034 1040                  if (rc == IDM_STATUS_SUCCESS) {
1035 1041                          idm_so_socket_set_block(node);
1036 1042                  }
1037 1043          } else {
1038 1044                  rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
1039 1045                      (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
1040 1046          }
1041 1047  
1042 1048          if (rc != 0) {
1043 1049                  idm_soshutdown(so_conn->ic_so);
1044 1050                  return (IDM_STATUS_FAIL);
1045 1051          }
1046 1052  
1047 1053          idm_so_conn_connect_common(ic);
1048 1054  
1049 1055          idm_set_postconnect_options(so_conn->ic_so);
1050 1056  
1051 1057          return (IDM_STATUS_SUCCESS);
1052 1058  }
1053 1059  
1054 1060  idm_status_t
1055 1061  idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
1056 1062  {
1057 1063          idm_status_t    idmrc;
1058 1064  
1059 1065          idm_set_postconnect_options(new_so);
1060 1066          idmrc = idm_so_conn_create_common(ic, new_so);
1061 1067  
1062 1068          return (idmrc);
1063 1069  }
1064 1070  
1065 1071  static void
1066 1072  idm_so_tgt_conn_destroy(idm_conn_t *ic)
1067 1073  {
1068 1074          idm_so_conn_destroy_common(ic);
1069 1075  }
1070 1076  
1071 1077  /*
1072 1078   * idm_so_tgt_conn_connect()
1073 1079   * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1074 1080   * is invoked from the SM as a result of an inbound connection request.
1075 1081   */
1076 1082  static idm_status_t
1077 1083  idm_so_tgt_conn_connect(idm_conn_t *ic)
1078 1084  {
1079 1085          idm_so_conn_connect_common(ic);
1080 1086  
1081 1087          return (IDM_STATUS_SUCCESS);
1082 1088  }
1083 1089  
1084 1090  static idm_status_t
1085 1091  idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
1086 1092  {
1087 1093          idm_so_conn_t   *so_conn;
1088 1094  
1089 1095          so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1090 1096          so_conn->ic_so = new_so;
1091 1097  
1092 1098          ic->ic_transport_private = so_conn;
1093 1099          ic->ic_transport_hdrlen = 0;
1094 1100  
1095 1101          /* Set the scoreboarding flag on this connection */
1096 1102          ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1097 1103          ic->ic_conn_params.max_recv_dataseglen =
1098 1104              ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1099 1105          ic->ic_conn_params.max_xmit_dataseglen =
1100 1106              ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1101 1107  
1102 1108          /*
1103 1109           * Initialize tx thread mutex and list
1104 1110           */
1105 1111          mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1106 1112          cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1107 1113          list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1108 1114              offsetof(idm_pdu_t, idm_tx_link));
1109 1115  
1110 1116          return (IDM_STATUS_SUCCESS);
1111 1117  }
1112 1118  
1113 1119  static void
1114 1120  idm_so_conn_destroy_common(idm_conn_t *ic)
1115 1121  {
1116 1122          idm_so_conn_t   *so_conn = ic->ic_transport_private;
1117 1123  
1118 1124          ic->ic_transport_private = NULL;
1119 1125          idm_sodestroy(so_conn->ic_so);
1120 1126          list_destroy(&so_conn->ic_tx_list);
1121 1127          mutex_destroy(&so_conn->ic_tx_mutex);
1122 1128          cv_destroy(&so_conn->ic_tx_cv);
1123 1129  
1124 1130          kmem_free(so_conn, sizeof (idm_so_conn_t));
1125 1131  }
1126 1132  
1127 1133  static void
1128 1134  idm_so_conn_connect_common(idm_conn_t *ic)
1129 1135  {
1130 1136          idm_so_conn_t   *so_conn;
1131 1137          struct sockaddr_in6     t_addr;
1132 1138          socklen_t       t_addrlen = 0;
1133 1139  
1134 1140          so_conn = ic->ic_transport_private;
1135 1141          bzero(&t_addr, sizeof (struct sockaddr_in6));
1136 1142          t_addrlen = sizeof (struct sockaddr_in6);
1137 1143  
1138 1144          /* Set the local and remote addresses in the idm conn handle */
1139 1145          (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1140 1146              &t_addrlen, CRED());
1141 1147          bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1142 1148          (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1143 1149              &t_addrlen, CRED());
1144 1150          bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1145 1151  
1146 1152          mutex_enter(&ic->ic_mutex);
1147 1153          so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1148 1154              &p0, TS_RUN, minclsyspri);
1149 1155          so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1150 1156              &p0, TS_RUN, minclsyspri);
1151 1157  
1152 1158          while (so_conn->ic_rx_thread_did == 0 ||
1153 1159              so_conn->ic_tx_thread_did == 0)
1154 1160                  cv_wait(&ic->ic_cv, &ic->ic_mutex);
1155 1161          mutex_exit(&ic->ic_mutex);
1156 1162  }
1157 1163  
1158 1164  /*
1159 1165   * idm_so_conn_disconnect()
1160 1166   * Shutdown the socket connection and stop the thread
1161 1167   */
1162 1168  static void
1163 1169  idm_so_conn_disconnect(idm_conn_t *ic)
1164 1170  {
1165 1171          idm_so_conn_t   *so_conn;
1166 1172  
1167 1173          so_conn = ic->ic_transport_private;
1168 1174  
1169 1175          mutex_enter(&ic->ic_mutex);
1170 1176          so_conn->ic_rx_thread_running = B_FALSE;
1171 1177          so_conn->ic_tx_thread_running = B_FALSE;
1172 1178          /* We need to wakeup the TX thread */
1173 1179          mutex_enter(&so_conn->ic_tx_mutex);
1174 1180          cv_signal(&so_conn->ic_tx_cv);
1175 1181          mutex_exit(&so_conn->ic_tx_mutex);
1176 1182          mutex_exit(&ic->ic_mutex);
1177 1183  
1178 1184          /* This should wakeup the RX thread if it is sleeping */
1179 1185          idm_soshutdown(so_conn->ic_so);
1180 1186  
1181 1187          thread_join(so_conn->ic_tx_thread_did);
1182 1188          thread_join(so_conn->ic_rx_thread_did);
1183 1189  }
1184 1190  
1185 1191  /*
1186 1192   * idm_so_tgt_svc_create()
1187 1193   * Establish a service on an IP address and port.  idm_svc_req_t contains
1188 1194   * the service parameters.
1189 1195   */
1190 1196  /*ARGSUSED*/
1191 1197  static idm_status_t
1192 1198  idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1193 1199  {
1194 1200          idm_so_svc_t            *so_svc;
1195 1201  
1196 1202          so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1197 1203  
1198 1204          /* Set the new sockets service in svc handle */
1199 1205          is->is_so_svc = (void *)so_svc;
1200 1206  
1201 1207          return (IDM_STATUS_SUCCESS);
1202 1208  }
1203 1209  
1204 1210  /*
1205 1211   * idm_so_tgt_svc_destroy()
1206 1212   * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1207 1213   */
1208 1214  static void
1209 1215  idm_so_tgt_svc_destroy(idm_svc_t *is)
1210 1216  {
1211 1217          /* the socket will have been torn down; free the service */
1212 1218          kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1213 1219  }
1214 1220  
1215 1221  /*
1216 1222   * idm_so_tgt_svc_online()
1217 1223   * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1218 1224   */
1219 1225  
1220 1226  static idm_status_t
1221 1227  idm_so_tgt_svc_online(idm_svc_t *is)
1222 1228  {
1223 1229          idm_so_svc_t            *so_svc;
1224 1230          idm_svc_req_t           *sr = &is->is_svc_req;
1225 1231          struct sockaddr_in6     sin6_ip;
1226 1232          const uint32_t          on = 1;
1227 1233          const uint32_t          off = 0;
1228 1234  
1229 1235          mutex_enter(&is->is_mutex);
1230 1236          so_svc = (idm_so_svc_t *)is->is_so_svc;
1231 1237  
1232 1238          /*
1233 1239           * Try creating an IPv6 socket first
1234 1240           */
1235 1241          if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1236 1242                  mutex_exit(&is->is_mutex);
1237 1243                  return (IDM_STATUS_FAIL);
1238 1244          } else {
1239 1245                  bzero(&sin6_ip, sizeof (sin6_ip));
1240 1246                  sin6_ip.sin6_family = AF_INET6;
1241 1247                  sin6_ip.sin6_port = htons(sr->sr_port);
1242 1248                  sin6_ip.sin6_addr = in6addr_any;
1243 1249  
1244 1250                  (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1245 1251                      SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1246 1252                  /*
1247 1253                   * Turn off SO_MAC_EXEMPT so future sobinds succeed
1248 1254                   */
1249 1255                  (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1250 1256                      SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1251 1257  
1252 1258                  if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1253 1259                      sizeof (sin6_ip), CRED()) != 0) {
1254 1260                          mutex_exit(&is->is_mutex);
1255 1261                          idm_sodestroy(so_svc->is_so);
1256 1262                          return (IDM_STATUS_FAIL);
1257 1263                  }
1258 1264          }
1259 1265  
1260 1266          idm_set_postconnect_options(so_svc->is_so);
1261 1267  
1262 1268          if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1263 1269                  mutex_exit(&is->is_mutex);
1264 1270                  idm_soshutdown(so_svc->is_so);
1265 1271                  idm_sodestroy(so_svc->is_so);
1266 1272                  return (IDM_STATUS_FAIL);
1267 1273          }
1268 1274  
1269 1275          /* Launch a watch thread */
1270 1276          so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1271 1277              is, 0, &p0, TS_RUN, minclsyspri);
1272 1278  
1273 1279          if (so_svc->is_thread == NULL) {
1274 1280                  /* Failure to launch; teardown the socket */
1275 1281                  mutex_exit(&is->is_mutex);
1276 1282                  idm_soshutdown(so_svc->is_so);
1277 1283                  idm_sodestroy(so_svc->is_so);
1278 1284                  return (IDM_STATUS_FAIL);
1279 1285          }
1280 1286          ksocket_hold(so_svc->is_so);
1281 1287          /* Wait for the port watcher thread to start */
1282 1288          while (!so_svc->is_thread_running)
1283 1289                  cv_wait(&is->is_cv, &is->is_mutex);
1284 1290          mutex_exit(&is->is_mutex);
1285 1291  
1286 1292          return (IDM_STATUS_SUCCESS);
1287 1293  }
1288 1294  
1289 1295  /*
1290 1296   * idm_so_tgt_svc_offline
1291 1297   *
1292 1298   * Stop listening on the IP address and port identified by idm_svc_t.
1293 1299   */
1294 1300  static void
1295 1301  idm_so_tgt_svc_offline(idm_svc_t *is)
1296 1302  {
1297 1303          idm_so_svc_t            *so_svc;
1298 1304          mutex_enter(&is->is_mutex);
1299 1305          so_svc = (idm_so_svc_t *)is->is_so_svc;
1300 1306          so_svc->is_thread_running = B_FALSE;
1301 1307          mutex_exit(&is->is_mutex);
1302 1308  
1303 1309          /*
1304 1310           * Teardown socket
1305 1311           */
1306 1312          idm_sodestroy(so_svc->is_so);
1307 1313  
1308 1314          /*
1309 1315           * Now we expect the port watcher thread to terminate
1310 1316           */
1311 1317          thread_join(so_svc->is_thread_did);
1312 1318  }
1313 1319  
1314 1320  /*
1315 1321   * Watch thread for target service connection establishment.
1316 1322   */
1317 1323  void
1318 1324  idm_so_svc_port_watcher(void *arg)
1319 1325  {
1320 1326          idm_svc_t               *svc = arg;
1321 1327          ksocket_t               new_so;
1322 1328          idm_conn_t              *ic;
1323 1329          idm_status_t            idmrc;
1324 1330          idm_so_svc_t            *so_svc;
1325 1331          int                     rc;
1326 1332          const uint32_t          off = 0;
1327 1333          struct sockaddr_in6     t_addr;
1328 1334          socklen_t               t_addrlen;
1329 1335  
1330 1336          bzero(&t_addr, sizeof (struct sockaddr_in6));
1331 1337          t_addrlen = sizeof (struct sockaddr_in6);
1332 1338          mutex_enter(&svc->is_mutex);
1333 1339  
1334 1340          so_svc = svc->is_so_svc;
1335 1341          so_svc->is_thread_running = B_TRUE;
1336 1342          so_svc->is_thread_did = so_svc->is_thread->t_did;
1337 1343  
1338 1344          cv_signal(&svc->is_cv);
1339 1345  
1340 1346          IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1341 1347              svc->is_svc_req.sr_port);
1342 1348  
1343 1349          while (so_svc->is_thread_running) {
1344 1350                  mutex_exit(&svc->is_mutex);
1345 1351  
1346 1352                  if ((rc = ksocket_accept(so_svc->is_so,
1347 1353                      (struct sockaddr *)&t_addr, &t_addrlen,
1348 1354                      &new_so, CRED())) != 0) {
1349 1355                          mutex_enter(&svc->is_mutex);
1350 1356                          if (rc != ECONNABORTED && rc != EINTR) {
1351 1357                                  IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1352 1358                                      " ksocket_accept failed %d", rc);
1353 1359                          }
1354 1360                          /*
1355 1361                           * Unclean shutdown of this thread is not handled
1356 1362                           * wait for !is_thread_running.
1357 1363                           */
1358 1364                          continue;
1359 1365                  }
1360 1366                  /*
1361 1367                   * Turn off SO_MAC_EXEMPT so future sobinds succeed
1362 1368                   */
1363 1369                  (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1364 1370                      (char *)&off, sizeof (off), CRED());
1365 1371  
1366 1372                  idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1367 1373                      &ic);
1368 1374                  if (idmrc != IDM_STATUS_SUCCESS) {
1369 1375                          /* Drop connection */
1370 1376                          idm_soshutdown(new_so);
1371 1377                          idm_sodestroy(new_so);
1372 1378                          mutex_enter(&svc->is_mutex);
1373 1379                          continue;
1374 1380                  }
1375 1381  
1376 1382                  idmrc = idm_so_tgt_conn_create(ic, new_so);
1377 1383                  if (idmrc != IDM_STATUS_SUCCESS) {
1378 1384                          idm_svc_conn_destroy(ic);
1379 1385                          idm_soshutdown(new_so);
1380 1386                          idm_sodestroy(new_so);
1381 1387                          mutex_enter(&svc->is_mutex);
1382 1388                          continue;
1383 1389                  }
1384 1390  
1385 1391                  /*
1386 1392                   * Kick the state machine.  At CS_S3_XPT_UP the state machine
1387 1393                   * will notify the client (target) about the new connection.
1388 1394                   */
1389 1395                  idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1390 1396  
1391 1397                  mutex_enter(&svc->is_mutex);
1392 1398          }
1393 1399          ksocket_rele(so_svc->is_so);
1394 1400          so_svc->is_thread_running = B_FALSE;
1395 1401          mutex_exit(&svc->is_mutex);
1396 1402  
1397 1403          IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1398 1404              svc->is_svc_req.sr_port);
1399 1405  
1400 1406          thread_exit();
1401 1407  }
1402 1408  
1403 1409  /*
1404 1410   * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1405 1411   * frees resources associated with the task.
1406 1412   *
1407 1413   * It's not clear that this should return idm_status_t.  What do we do
1408 1414   * if it fails?
1409 1415   */
1410 1416  static idm_status_t
1411 1417  idm_so_free_task_rsrc(idm_task_t *idt)
1412 1418  {
1413 1419          idm_buf_t       *idb, *next_idb;
1414 1420  
1415 1421          /*
1416 1422           * There is nothing to cleanup on initiator connections
1417 1423           */
1418 1424          if (IDM_CONN_ISINI(idt->idt_ic))
1419 1425                  return (IDM_STATUS_SUCCESS);
1420 1426  
1421 1427          /*
1422 1428           * If this is a target connection, call idm_buf_rx_from_ini_done for
1423 1429           * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1424 1430           *
1425 1431           * In addition, remove any buffers associated with this task from
1426 1432           * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1427 1433           * items don't actually get removed from that list (and completion
1428 1434           * routines called) until idm_task_cleanup.
1429 1435           */
1430 1436          mutex_enter(&idt->idt_mutex);
1431 1437  
1432 1438          for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1433 1439                  next_idb = list_next(&idt->idt_outbufv, idb);
1434 1440                  if (idb->idb_in_transport) {
1435 1441                          /*
1436 1442                           * idm_buf_rx_from_ini_done releases idt->idt_mutex
1437 1443                           */
1438 1444                          DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1439 1445                              uintptr_t, idb->idb_buf,
1440 1446                              uint32_t, idb->idb_bufoffset,
1441 1447                              uint64_t, 0, uint32_t, 0, uint32_t, 0,
1442 1448                              uint32_t, idb->idb_xfer_len,
1443 1449                              int, XFER_BUF_RX_FROM_INI);
1444 1450                          idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1445 1451                          mutex_enter(&idt->idt_mutex);
1446 1452                  }
1447 1453          }
1448 1454  
1449 1455          for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1450 1456                  next_idb = list_next(&idt->idt_inbufv, idb);
1451 1457                  /*
1452 1458                   * We want to remove these items from the tx_list as well,
1453 1459                   * but knowing it's in the idt_inbufv list is not a guarantee
1454 1460                   * that it's in the tx_list.  If it's on the tx list then
1455 1461                   * let idm_sotx_thread() clean it up.
1456 1462                   */
1457 1463                  if (idb->idb_in_transport && !idb->idb_tx_thread) {
1458 1464                          /*
1459 1465                           * idm_buf_tx_to_ini_done releases idt->idt_mutex
1460 1466                           */
1461 1467                          DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1462 1468                              uintptr_t, idb->idb_buf,
1463 1469                              uint32_t, idb->idb_bufoffset,
1464 1470                              uint64_t, 0, uint32_t, 0, uint32_t, 0,
1465 1471                              uint32_t, idb->idb_xfer_len,
1466 1472                              int, XFER_BUF_TX_TO_INI);
1467 1473                          idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1468 1474                          mutex_enter(&idt->idt_mutex);
1469 1475                  }
1470 1476          }
1471 1477  
1472 1478          mutex_exit(&idt->idt_mutex);
1473 1479  
1474 1480          return (IDM_STATUS_SUCCESS);
1475 1481  }
1476 1482  
1477 1483  /*
1478 1484   * idm_so_negotiate_key_values() validates the key values for this connection
1479 1485   */
1480 1486  /* ARGSUSED */
1481 1487  static kv_status_t
1482 1488  idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1483 1489      nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1484 1490  {
1485 1491          /* All parameters are negotiated at the iscsit level */
1486 1492          return (KV_HANDLED);
1487 1493  }
1488 1494  
1489 1495  /*
1490 1496   * idm_so_notice_key_values() activates the negotiated key values for
1491 1497   * this connection.
1492 1498   */
1493 1499  static void
1494 1500  idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1495 1501  {
1496 1502          char                    *nvp_name;
1497 1503          nvpair_t                *nvp;
1498 1504          nvpair_t                *next_nvp;
1499 1505          int                     nvrc;
1500 1506          idm_status_t            idm_status;
1501 1507          const idm_kv_xlate_t    *ikvx;
1502 1508          uint64_t                num_val;
1503 1509  
1504 1510          for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1505 1511              nvp != NULL; nvp = next_nvp) {
1506 1512                  next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1507 1513                  nvp_name = nvpair_name(nvp);
1508 1514  
1509 1515                  ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1510 1516                  switch (ikvx->ik_key_id) {
1511 1517                  case KI_HEADER_DIGEST:
1512 1518                  case KI_DATA_DIGEST:
1513 1519                          idm_status = idm_so_handle_digest(it, nvp, ikvx);
1514 1520                          ASSERT(idm_status == 0);
1515 1521  
1516 1522                          /* Remove processed item from negotiated_nvl list */
1517 1523                          nvrc = nvlist_remove_all(
1518 1524                              negotiated_nvl, ikvx->ik_key_name);
1519 1525                          ASSERT(nvrc == 0);
1520 1526                          break;
1521 1527                  case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1522 1528                          /*
1523 1529                           * Just pass the value down to idm layer.
1524 1530                           * No need to remove it from negotiated_nvl list here.
1525 1531                           */
1526 1532                          nvrc = nvpair_value_uint64(nvp, &num_val);
1527 1533                          ASSERT(nvrc == 0);
1528 1534                          it->ic_conn_params.max_xmit_dataseglen =
1529 1535                              (uint32_t)num_val;
1530 1536                          break;
1531 1537                  default:
1532 1538                          break;
1533 1539                  }
1534 1540          }
1535 1541  }
1536 1542  
1537 1543  /*
1538 1544   * idm_so_declare_key_values() declares the key values for this connection
1539 1545   */
1540 1546  /* ARGSUSED */
1541 1547  static kv_status_t
1542 1548  idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1543 1549      nvlist_t *outgoing_nvl)
1544 1550  {
1545 1551          char                    *nvp_name;
1546 1552          nvpair_t                *nvp;
1547 1553          nvpair_t                *next_nvp;
1548 1554          kv_status_t             kvrc;
1549 1555          int                     nvrc = 0;
1550 1556          const idm_kv_xlate_t    *ikvx;
1551 1557          uint64_t                num_val;
1552 1558  
1553 1559          for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1554 1560              nvp != NULL && nvrc == 0; nvp = next_nvp) {
1555 1561                  next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1556 1562                  nvp_name = nvpair_name(nvp);
1557 1563  
1558 1564                  ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1559 1565                  switch (ikvx->ik_key_id) {
1560 1566                  case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1561 1567                          if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1562 1568                                  break;
1563 1569                          }
1564 1570                          if (outgoing_nvl &&
1565 1571                              (nvrc = nvlist_add_uint64(outgoing_nvl,
1566 1572                              nvp_name, num_val)) != 0) {
1567 1573                                  break;
1568 1574                          }
1569 1575                          it->ic_conn_params.max_recv_dataseglen =
1570 1576                              (uint32_t)num_val;
1571 1577                          break;
1572 1578                  default:
1573 1579                          break;
1574 1580                  }
1575 1581          }
1576 1582          kvrc = idm_nvstat_to_kvstat(nvrc);
1577 1583          return (kvrc);
1578 1584  }
1579 1585  
1580 1586  static idm_status_t
1581 1587  idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1582 1588      const idm_kv_xlate_t *ikvx)
1583 1589  {
1584 1590          int                     nvrc;
1585 1591          char                    *digest_choice_string;
1586 1592  
1587 1593          nvrc = nvpair_value_string(digest_choice,
1588 1594              &digest_choice_string);
1589 1595          ASSERT(nvrc == 0);
1590 1596          if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1591 1597                  switch (ikvx->ik_key_id) {
1592 1598                  case KI_HEADER_DIGEST:
1593 1599                          it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1594 1600                          break;
1595 1601                  case KI_DATA_DIGEST:
1596 1602                          it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1597 1603                          break;
1598 1604                  default:
1599 1605                          ASSERT(0);
1600 1606                          break;
1601 1607                  }
1602 1608          } else if (strcasecmp(digest_choice_string, "none") == 0) {
1603 1609                  switch (ikvx->ik_key_id) {
1604 1610                  case KI_HEADER_DIGEST:
1605 1611                          it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1606 1612                          break;
1607 1613                  case KI_DATA_DIGEST:
1608 1614                          it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1609 1615                          break;
1610 1616                  default:
1611 1617                          ASSERT(0);
1612 1618                          break;
1613 1619                  }
1614 1620          } else {
1615 1621                  ASSERT(0);
1616 1622          }
1617 1623  
1618 1624          return (IDM_STATUS_SUCCESS);
1619 1625  }
1620 1626  
1621 1627  
1622 1628  /*
1623 1629   * idm_so_conn_is_capable() verifies that the passed connection is provided
1624 1630   * for by the sockets interface.
1625 1631   */
1626 1632  /* ARGSUSED */
1627 1633  static boolean_t
1628 1634  idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1629 1635  {
1630 1636          return (B_TRUE);
1631 1637  }
1632 1638  
1633 1639  /*
1634 1640   * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1635 1641   * idm_sorecv_scsidata() function invoked earlier actually reads the data
1636 1642   * off the socket into the appropriate buffers.
1637 1643   */
1638 1644  static void
1639 1645  idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1640 1646  {
1641 1647          iscsi_data_hdr_t        *bhs;
1642 1648          idm_task_t              *idt;
1643 1649          idm_buf_t               *idb;
1644 1650          uint32_t                datasn;
1645 1651          size_t                  offset;
1646 1652          iscsi_hdr_t             *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1647 1653          iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1648 1654  
1649 1655          ASSERT(ic != NULL);
1650 1656          ASSERT(pdu != NULL);
1651 1657          ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP);
1652 1658  
1653 1659          bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1654 1660          datasn  = ntohl(bhs->datasn);
1655 1661          offset  = ntohl(bhs->offset);
1656 1662  
1657 1663          /*
1658 1664           * Look up the task corresponding to the initiator task tag
1659 1665           * to get the buffers affiliated with the task.
1660 1666           */
1661 1667          idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1662 1668          if (idt == NULL) {
1663 1669                  IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1664 1670                  idm_pdu_rx_protocol_error(ic, pdu);
1665 1671                  return;
1666 1672          }
1667 1673  
1668 1674          idb = pdu->isp_sorx_buf;
1669 1675          if (idb == NULL) {
1670 1676                  IDM_CONN_LOG(CE_WARN,
1671 1677                      "idm_so_rx_datain: failed to find buffer");
1672 1678                  idm_task_rele(idt);
1673 1679                  idm_pdu_rx_protocol_error(ic, pdu);
1674 1680                  return;
1675 1681          }
1676 1682  
1677 1683          /*
1678 1684           * DataSN values should be sequential and should not have any gaps or
1679 1685           * repetitions. Check the DataSN with the one stored in the task.
1680 1686           */
1681 1687          if (datasn == idt->idt_exp_datasn) {
1682 1688                  idt->idt_exp_datasn++; /* keep track of DataSN received */
1683 1689          } else {
1684 1690                  IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1685 1691                  idm_task_rele(idt);
1686 1692                  idm_pdu_rx_protocol_error(ic, pdu);
1687 1693                  return;
1688 1694          }
1689 1695  
1690 1696          /*
1691 1697           * PDUs in a sequence should be in continuously increasing
1692 1698           * address offset
1693 1699           */
1694 1700          if (offset != idb->idb_exp_offset) {
1695 1701                  IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1696 1702                  idm_task_rele(idt);
1697 1703                  idm_pdu_rx_protocol_error(ic, pdu);
1698 1704                  return;
1699 1705          }
1700 1706          /* Expected next relative buffer offset */
1701 1707          idb->idb_exp_offset += n2h24(bhs->dlength);
1702 1708          idt->idt_rx_bytes += n2h24(bhs->dlength);
1703 1709  
1704 1710          idm_task_rele(idt);
1705 1711  
1706 1712          /*
1707 1713           * For now call scsi_rsp which will process the data rsp
1708 1714           * Revisit, need to provide an explicit client entry point for
1709 1715           * phase collapse completions.
1710 1716           */
1711 1717          if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) &&
1712 1718              (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1713 1719                  (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1714 1720          }
1715 1721  
1716 1722          idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717 1723  }
1718 1724  
1719 1725  /*
1720 1726   * The idm_so_rx_dataout() function is used by the iSCSI target to read
1721 1727   * data from the Data-Out PDU sent by the iSCSI initiator.
1722 1728   *
1723 1729   * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1724 1730   * task to get the buffers associated with the PDU. A PDU might span buffers.
1725 1731   * The data is then read into the respective buffer.
1726 1732   */
1727 1733  static void
1728 1734  idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1729 1735  {
1730 1736  
1731 1737          iscsi_data_hdr_t        *bhs;
1732 1738          idm_task_t              *idt;
1733 1739          idm_buf_t               *idb;
1734 1740          size_t                  offset;
1735 1741  
1736 1742          ASSERT(ic != NULL);
1737 1743          ASSERT(pdu != NULL);
1738 1744          ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA);
1739 1745  
1740 1746          bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1741 1747          offset = ntohl(bhs->offset);
1742 1748  
1743 1749          /*
1744 1750           * Look up the task corresponding to the initiator task tag
1745 1751           * to get the buffers affiliated with the task.
1746 1752           */
1747 1753          idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1748 1754          if (idt == NULL) {
1749 1755                  IDM_CONN_LOG(CE_WARN,
1750 1756                      "idm_so_rx_dataout: failed to find task");
1751 1757                  idm_pdu_rx_protocol_error(ic, pdu);
1752 1758                  return;
1753 1759          }
1754 1760  
1755 1761          idb = pdu->isp_sorx_buf;
1756 1762          if (idb == NULL) {
1757 1763                  IDM_CONN_LOG(CE_WARN,
1758 1764                      "idm_so_rx_dataout: failed to find buffer");
1759 1765                  idm_task_rele(idt);
1760 1766                  idm_pdu_rx_protocol_error(ic, pdu);
1761 1767                  return;
1762 1768          }
1763 1769  
1764 1770          /* Keep track of data transferred - check data offsets */
1765 1771          if (offset != idb->idb_exp_offset) {
1766 1772                  IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1767 1773                      "%ld, %d", offset, idb->idb_exp_offset);
1768 1774                  idm_task_rele(idt);
1769 1775                  idm_pdu_rx_protocol_error(ic, pdu);
1770 1776                  return;
1771 1777          }
1772 1778          /* Expected next relative offset */
1773 1779          idb->idb_exp_offset += ntoh24(bhs->dlength);
1774 1780          idt->idt_rx_bytes += n2h24(bhs->dlength);
1775 1781  
1776 1782          /*
1777 1783           * Call the buffer callback when the transfer is complete
1778 1784           *
1779 1785           * The connection state machine should only abort tasks after
1780 1786           * shutting down the connection so we are assured that there
1781 1787           * won't be a simultaneous attempt to abort this task at the
1782 1788           * same time as we are processing this PDU (due to a connection
1783 1789           * state change).
1784 1790           */
1785 1791          if (bhs->flags & ISCSI_FLAG_FINAL) {
1786 1792                  /*
1787 1793                   * We have gotten the last data-message for the current
1788 1794                   * transfer.  idb_xfer_len represents the data that the
1789 1795                   * command intended to transfer, it does not represent the
1790 1796                   * actual number of bytes transferred. If we have not
1791 1797                   * transferred the expected number of bytes something is
1792 1798                   * wrong.
1793 1799                   *
1794 1800                   * We have two options, when there is a mismatch, we can
1795 1801                   * regard the transfer as invalid -- or we can modify our
1796 1802                   * notion of "xfer_len." In order to be as stringent as
1797 1803                   * possible, here we regard this transfer as in error; and
1798 1804                   * bail out.
1799 1805                   */
1800 1806                  if (idb->idb_buflen == idb->idb_xfer_len &&
1801 1807                      idb->idb_buflen !=
1802 1808                      (idb->idb_exp_offset - idb->idb_bufoffset)) {
1803 1809                          printf("idm_so_rx_dataout: incomplete transfer, "
1804 1810                              "protocol err");
1805 1811                          IDM_CONN_LOG(CE_NOTE,
1806 1812                              "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1807 1813                              offset, (int)(idb->idb_exp_offset - offset));
1808 1814                          idm_task_rele(idt);
1809 1815                          idm_pdu_rx_protocol_error(ic, pdu);
1810 1816                          return;
1811 1817                  }
1812 1818                  /*
1813 1819                   * We only want to call idm_buf_rx_from_ini_done once
1814 1820                   * per transfer.  It's possible that this task has
1815 1821                   * already been aborted in which case
1816 1822                   * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1817 1823                   * for each buffer with idb_in_transport==B_TRUE.  To
1818 1824                   * close this window and ensure that this doesn't happen,
1819 1825                   * we'll clear idb->idb_in_transport now while holding
1820 1826                   * the task mutex.   This is only really an issue for
1821 1827                   * SCSI task abort -- if tasks were being aborted because
1822 1828                   * of a connection state change the state machine would
1823 1829                   * have already stopped the receive thread.
1824 1830                   */
1825 1831                  mutex_enter(&idt->idt_mutex);
1826 1832  
1827 1833                  /*
1828 1834                   * Release the task hold here (obtained in idm_task_find)
1829 1835                   * because the task may complete synchronously during
1830 1836                   * idm_buf_rx_from_ini_done.  Since we still have an active
1831 1837                   * buffer we know there is at least one additional hold on idt.
1832 1838                   */
1833 1839                  idm_task_rele(idt);
1834 1840  
1835 1841                  /*
1836 1842                   * idm_buf_rx_from_ini_done releases idt->idt_mutex
1837 1843                   */
1838 1844                  DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1839 1845                      uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1840 1846                      uint64_t, 0, uint32_t, 0, uint32_t, 0,
1841 1847                      uint32_t, idb->idb_xfer_len,
1842 1848                      int, XFER_BUF_RX_FROM_INI);
1843 1849                  idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1844 1850                  idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1845 1851                  return;
1846 1852          }
1847 1853  
1848 1854          idm_task_rele(idt);
1849 1855          idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1850 1856  }
1851 1857  
1852 1858  /*
1853 1859   * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1854 1860   * the R2T PDU sent by the iSCSI target indicating that it is ready to
1855 1861   * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1856 1862   * and looks up the task in the task tree using the itt to get the output
1857 1863   * buffers associated the task. The R2T PDU contains the offset of the
1858 1864   * requested data and the data length. This function then constructs a
1859 1865   * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1860 1866   * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1861 1867   */
1862 1868  
1863 1869  static void
1864 1870  idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1865 1871  {
1866 1872          idm_task_t              *idt;
1867 1873          idm_buf_t               *idb;
1868 1874          iscsi_rtt_hdr_t         *rtt_hdr;
1869 1875          uint32_t                data_offset;
1870 1876          uint32_t                data_length;
1871 1877  
1872 1878          ASSERT(ic != NULL);
1873 1879          ASSERT(pdu != NULL);
1874 1880  
1875 1881          rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1876 1882          data_offset = ntohl(rtt_hdr->data_offset);
1877 1883          data_length = ntohl(rtt_hdr->data_length);
1878 1884          idt     = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1879 1885  
1880 1886          if (idt == NULL) {
1881 1887                  IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1882 1888                  idm_pdu_rx_protocol_error(ic, pdu);
1883 1889                  return;
1884 1890          }
1885 1891  
1886 1892          /* Find the buffer bound to the task by the iSCSI initiator */
1887 1893          mutex_enter(&idt->idt_mutex);
1888 1894          idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1889 1895          if (idb == NULL) {
1890 1896                  mutex_exit(&idt->idt_mutex);
1891 1897                  idm_task_rele(idt);
1892 1898                  IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1893 1899                  idm_pdu_rx_protocol_error(ic, pdu);
1894 1900                  return;
1895 1901          }
1896 1902  
1897 1903          /* return buffer contains this data */
1898 1904          if (data_offset + data_length > idb->idb_buflen) {
1899 1905                  /* Overflow */
1900 1906                  mutex_exit(&idt->idt_mutex);
1901 1907                  idm_task_rele(idt);
1902 1908                  IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1903 1909                      "buffer");
1904 1910                  idm_pdu_rx_protocol_error(ic, pdu);
1905 1911                  return;
1906 1912          }
1907 1913  
1908 1914          idt->idt_r2t_ttt = rtt_hdr->ttt;
1909 1915          idt->idt_exp_datasn = 0;
1910 1916  
1911 1917          idm_so_send_rtt_data(ic, idt, idb, data_offset,
1912 1918              ntohl(rtt_hdr->data_length));
1913 1919          /*
1914 1920           * the idt_mutex is released in idm_so_send_rtt_data
1915 1921           */
1916 1922  
1917 1923          idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1918 1924          idm_task_rele(idt);
1919 1925  
1920 1926  }
1921 1927  
1922 1928  idm_status_t
1923 1929  idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1924 1930  {
1925 1931          uint8_t         pad[ISCSI_PAD_WORD_LEN];
1926 1932          int             pad_len;
1927 1933          uint32_t        data_digest_crc;
1928 1934          uint32_t        crc_calculated;
1929 1935          int             total_len;
1930 1936          idm_so_conn_t   *so_conn;
1931 1937  
1932 1938          so_conn = ic->ic_transport_private;
1933 1939  
1934 1940          pad_len = ((ISCSI_PAD_WORD_LEN -
1935 1941              (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1936 1942              (ISCSI_PAD_WORD_LEN - 1));
1937 1943  
1938 1944          ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1939 1945  
1940 1946          total_len = pdu->isp_datalen;
1941 1947  
1942 1948          if (pad_len) {
1943 1949                  pdu->isp_iov[pdu->isp_iovlen].iov_base  = (char *)&pad;
1944 1950                  pdu->isp_iov[pdu->isp_iovlen].iov_len   = pad_len;
1945 1951                  total_len               += pad_len;
1946 1952                  pdu->isp_iovlen++;
1947 1953          }
1948 1954  
1949 1955          /* setup data digest */
1950 1956          if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1951 1957                  pdu->isp_iov[pdu->isp_iovlen].iov_base =
1952 1958                      (char *)&data_digest_crc;
1953 1959                  pdu->isp_iov[pdu->isp_iovlen].iov_len =
1954 1960                      sizeof (data_digest_crc);
1955 1961                  total_len               += sizeof (data_digest_crc);
1956 1962                  pdu->isp_iovlen++;
1957 1963          }
1958 1964  
1959 1965          pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1960 1966  
1961 1967          if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1962 1968              pdu->isp_iovlen, total_len) != 0) {
1963 1969                  return (IDM_STATUS_IO);
1964 1970          }
1965 1971  
1966 1972          if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1967 1973                  crc_calculated = idm_crc32c(pdu->isp_data,
1968 1974                      pdu->isp_datalen);
1969 1975                  if (pad_len) {
1970 1976                          crc_calculated = idm_crc32c_continued((char *)&pad,
1971 1977                              pad_len, crc_calculated);
1972 1978                  }
1973 1979                  if (crc_calculated != data_digest_crc) {
1974 1980                          IDM_CONN_LOG(CE_WARN,
1975 1981                              "idm_sorecvdata: "
1976 1982                              "CRC error: actual 0x%x, calc 0x%x",
1977 1983                              data_digest_crc, crc_calculated);
1978 1984  
1979 1985                          /* Invalid Data Digest */
1980 1986                          return (IDM_STATUS_DATA_DIGEST);
1981 1987                  }
1982 1988          }
1983 1989  
1984 1990          return (IDM_STATUS_SUCCESS);
1985 1991  }
1986 1992  
1987 1993  /*
1988 1994   * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1989 1995   * Data-type PDU header must be read into the idm_pdu_t structure prior to
1990 1996   * calling this function.
1991 1997   */
1992 1998  idm_status_t
1993 1999  idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1994 2000  {
1995 2001          iscsi_data_hdr_t        *bhs;
1996 2002          idm_task_t              *task;
1997 2003          uint32_t                offset;
1998 2004          uint8_t                 opcode;
1999 2005          uint32_t                dlength;
2000 2006          list_t                  *buflst;
2001 2007          uint32_t                xfer_bytes;
2002 2008          idm_status_t            status;
2003 2009  
2004 2010          ASSERT(ic != NULL);
2005 2011          ASSERT(pdu != NULL);
2006 2012  
2007 2013          bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
2008 2014  
2009 2015          offset  = ntohl(bhs->offset);
2010 2016          opcode  = IDM_PDU_OPCODE(pdu);
2011 2017          dlength = n2h24(bhs->dlength);
2012 2018  
2013 2019          ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
2014 2020              (opcode == ISCSI_OP_SCSI_DATA));
2015 2021  
2016 2022          /*
2017 2023           * Successful lookup implicitly gets a "hold" on the task.  This
2018 2024           * hold must be released before leaving this function.  At one
2019 2025           * point we were caching this task context and retaining the hold
2020 2026           * but it turned out to be very difficult to release the hold properly.
2021 2027           * The task can be aborted and the connection shutdown between this
2022 2028           * call and the subsequent expected call to idm_so_rx_datain/
2023 2029           * idm_so_rx_dataout (in which case those functions are not called).
2024 2030           * Releasing the hold in the PDU callback doesn't work well either
2025 2031           * because the whole task may be completed by then at which point
2026 2032           * it is too late to release the hold -- for better or worse this
2027 2033           * code doesn't wait on the refcnts during normal operation.
2028 2034           * idm_task_find() is very fast and it is not a huge burden if we
2029 2035           * have to do it twice.
2030 2036           */
2031 2037          task = idm_task_find(ic, bhs->itt, bhs->ttt);
2032 2038          if (task == NULL) {
2033 2039                  IDM_CONN_LOG(CE_WARN,
2034 2040                      "idm_sorecv_scsidata: could not find task");
2035 2041                  return (IDM_STATUS_FAIL);
2036 2042          }
2037 2043  
2038 2044          mutex_enter(&task->idt_mutex);
2039 2045          buflst  = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
2040 2046              &task->idt_inbufv : &task->idt_outbufv;
2041 2047          pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
2042 2048          mutex_exit(&task->idt_mutex);
2043 2049  
2044 2050          if (pdu->isp_sorx_buf == NULL) {
2045 2051                  idm_task_rele(task);
2046 2052                  IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
2047 2053                      "buffer for offset %x opcode=%x",
2048 2054                      offset, opcode);
2049 2055                  return (IDM_STATUS_FAIL);
2050 2056          }
2051 2057  
2052 2058          xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
2053 2059          ASSERT(xfer_bytes != 0);
2054 2060          if (xfer_bytes != dlength) {
2055 2061                  idm_task_rele(task);
2056 2062                  /*
2057 2063                   * Buffer overflow, connection error.  The PDU data is still
2058 2064                   * sitting in the socket so we can't use the connection
2059 2065                   * again until that data is drained.
2060 2066                   */
2061 2067                  return (IDM_STATUS_FAIL);
2062 2068          }
2063 2069  
2064 2070          status = idm_sorecvdata(ic, pdu);
2065 2071  
2066 2072          idm_task_rele(task);
2067 2073  
2068 2074          return (status);
2069 2075  }
2070 2076  
2071 2077  static uint32_t
2072 2078  idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
2073 2079  {
2074 2080          uint32_t        buf_ro = ro - idb->idb_bufoffset;
2075 2081          uint32_t        xfer_len = min(dlength, idb->idb_buflen - buf_ro);
2076 2082  
2077 2083          ASSERT(ro >= idb->idb_bufoffset);
2078 2084  
2079 2085          pdu->isp_iov[pdu->isp_iovlen].iov_base  =
2080 2086              (caddr_t)idb->idb_buf + buf_ro;
2081 2087          pdu->isp_iov[pdu->isp_iovlen].iov_len   = xfer_len;
2082 2088          pdu->isp_iovlen++;
2083 2089  
2084 2090          return (xfer_len);
2085 2091  }
2086 2092  
2087 2093  int
2088 2094  idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2089 2095  {
2090 2096          pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
2091 2097          ASSERT(pdu->isp_data != NULL);
2092 2098  
2093 2099          pdu->isp_databuflen = pdu->isp_datalen;
2094 2100          pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
2095 2101          pdu->isp_iov[0].iov_len = pdu->isp_datalen;
2096 2102          pdu->isp_iovlen = 1;
2097 2103          /*
2098 2104           * Since we are associating a new data buffer with this received
2099 2105           * PDU we need to set a specific callback to free the data
2100 2106           * after the PDU is processed.
2101 2107           */
2102 2108          pdu->isp_flags |= IDM_PDU_ADDL_DATA;
2103 2109          pdu->isp_callback = idm_sorx_addl_pdu_cb;
2104 2110  
2105 2111          return (idm_sorecvdata(ic, pdu));
2106 2112  }
2107 2113  
2108 2114  void
2109 2115  idm_sorx_thread(void *arg)
2110 2116  {
2111 2117          boolean_t       conn_failure = B_FALSE;
2112 2118          idm_conn_t      *ic = (idm_conn_t *)arg;
2113 2119          idm_so_conn_t   *so_conn;
2114 2120          idm_pdu_t       *pdu;
2115 2121          idm_status_t    rc;
2116 2122  
2117 2123          idm_conn_hold(ic);
2118 2124  
2119 2125          mutex_enter(&ic->ic_mutex);
2120 2126  
2121 2127          so_conn = ic->ic_transport_private;
2122 2128          so_conn->ic_rx_thread_running = B_TRUE;
2123 2129          so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2124 2130          cv_signal(&ic->ic_cv);
2125 2131  
2126 2132          while (so_conn->ic_rx_thread_running) {
2127 2133                  mutex_exit(&ic->ic_mutex);
2128 2134  
2129 2135                  /*
2130 2136                   * Get PDU with default header size (large enough for
2131 2137                   * BHS plus any anticipated AHS).  PDU from
2132 2138                   * the cache will have all values set correctly
2133 2139                   * for sockets RX including callback.
2134 2140                   */
2135 2141                  pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2136 2142                  pdu->isp_ic = ic;
2137 2143                  pdu->isp_flags = 0;
2138 2144                  pdu->isp_transport_hdrlen = 0;
2139 2145  
2140 2146                  if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2141 2147                          /*
2142 2148                           * Call idm_pdu_complete so that we call the callback
2143 2149                           * and ensure any memory allocated in idm_sorecvhdr
2144 2150                           * gets freed up.
2145 2151                           */
2146 2152                          idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2147 2153  
2148 2154                          /*
2149 2155                           * If ic_rx_thread_running is still set then
2150 2156                           * this is some kind of connection problem
2151 2157                           * on the socket.  In this case we want to
2152 2158                           * generate an event.  Otherwise some other
2153 2159                           * thread closed the socket due to another
2154 2160                           * issue in which case we don't need to
2155 2161                           * generate an event.
2156 2162                           */
2157 2163                          mutex_enter(&ic->ic_mutex);
2158 2164                          if (so_conn->ic_rx_thread_running) {
2159 2165                                  conn_failure = B_TRUE;
2160 2166                                  so_conn->ic_rx_thread_running = B_FALSE;
2161 2167                          }
2162 2168  
2163 2169                          continue;
2164 2170                  }
2165 2171  
2166 2172                  /*
2167 2173                   * Header has been read and validated.  Now we need
2168 2174                   * to read the PDU data payload (if present).  SCSI data
2169 2175                   * need to be transferred from the socket directly into
2170 2176                   * the associated transfer buffer for the SCSI task.
2171 2177                   */
2172 2178                  if (pdu->isp_datalen != 0) {
2173 2179                          if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2174 2180                              (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2175 2181                                  rc = idm_sorecv_scsidata(ic, pdu);
2176 2182                                  /*
2177 2183                                   * All SCSI errors are fatal to the
2178 2184                                   * connection right now since we have no
2179 2185                                   * place to put the data.  What we need
2180 2186                                   * is some kind of sink to dispose of unwanted
2181 2187                                   * SCSI data.  For example an invalid task tag
2182 2188                                   * should not kill the connection (although
2183 2189                                   * we may want to drop the connection).
2184 2190                                   */
2185 2191                          } else {
2186 2192                                  /*
2187 2193                                   * Not data PDUs so allocate a buffer for the
2188 2194                                   * data segment and read the remaining data.
2189 2195                                   */
2190 2196                                  rc = idm_sorecv_nonscsidata(ic, pdu);
2191 2197                          }
2192 2198                          if (rc != 0) {
2193 2199                                  /*
2194 2200                                   * Call idm_pdu_complete so that we call the
2195 2201                                   * callback and ensure any memory allocated
2196 2202                                   * in idm_sorecvhdr gets freed up.
2197 2203                                   */
2198 2204                                  idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2199 2205  
2200 2206                                  /*
2201 2207                                   * If ic_rx_thread_running is still set then
2202 2208                                   * this is some kind of connection problem
2203 2209                                   * on the socket.  In this case we want to
2204 2210                                   * generate an event.  Otherwise some other
2205 2211                                   * thread closed the socket due to another
2206 2212                                   * issue in which case we don't need to
2207 2213                                   * generate an event.
2208 2214                                   */
2209 2215                                  mutex_enter(&ic->ic_mutex);
2210 2216                                  if (so_conn->ic_rx_thread_running) {
2211 2217                                          conn_failure = B_TRUE;
2212 2218                                          so_conn->ic_rx_thread_running = B_FALSE;
2213 2219                                  }
2214 2220                                  continue;
2215 2221                          }
2216 2222                  }
2217 2223  
2218 2224                  /*
2219 2225                   * Process RX PDU
2220 2226                   */
2221 2227                  idm_pdu_rx(ic, pdu);
2222 2228  
2223 2229                  mutex_enter(&ic->ic_mutex);
2224 2230          }
2225 2231  
2226 2232          mutex_exit(&ic->ic_mutex);
2227 2233  
2228 2234          /*
2229 2235           * If we dropped out of the RX processing loop because of
2230 2236           * a socket problem or other connection failure (including
2231 2237           * digest errors) then we need to generate a state machine
2232 2238           * event to shut the connection down.
2233 2239           * If the state machine is already in, for example, INIT_ERROR, this
2234 2240           * event will get dropped, and the TX thread will never be notified
2235 2241           * to shut down.  To be safe, we'll just notify it here.
2236 2242           */
2237 2243          if (conn_failure) {
2238 2244                  if (so_conn->ic_tx_thread_running) {
2239 2245                          so_conn->ic_tx_thread_running = B_FALSE;
2240 2246                          mutex_enter(&so_conn->ic_tx_mutex);
2241 2247                          cv_signal(&so_conn->ic_tx_cv);
2242 2248                          mutex_exit(&so_conn->ic_tx_mutex);
2243 2249                  }
2244 2250  
2245 2251                  idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2246 2252          }
2247 2253  
2248 2254          idm_conn_rele(ic);
2249 2255  
2250 2256          thread_exit();
2251 2257  }
2252 2258  
2253 2259  /*
2254 2260   * idm_so_tx
2255 2261   *
2256 2262   * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2257 2263   * point.  By definition, it is supposed to be fast.  So, simply queue
2258 2264   * the entry and return.  The real work is done by idm_i_so_tx() via
2259 2265   * idm_sotx_thread().
2260 2266   */
2261 2267  
2262 2268  static void
2263 2269  idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2264 2270  {
2265 2271          idm_so_conn_t *so_conn = ic->ic_transport_private;
2266 2272  
2267 2273          ASSERT(pdu->isp_ic == ic);
2268 2274          mutex_enter(&so_conn->ic_tx_mutex);
2269 2275  
2270 2276          if (!so_conn->ic_tx_thread_running) {
2271 2277                  mutex_exit(&so_conn->ic_tx_mutex);
2272 2278                  idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2273 2279                  return;
2274 2280          }
2275 2281  
2276 2282          list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2277 2283          cv_signal(&so_conn->ic_tx_cv);
2278 2284          mutex_exit(&so_conn->ic_tx_mutex);
2279 2285  }
2280 2286  
2281 2287  static idm_status_t
2282 2288  idm_i_so_tx(idm_pdu_t *pdu)
2283 2289  {
2284 2290          idm_conn_t      *ic = pdu->isp_ic;
2285 2291          idm_status_t    status = IDM_STATUS_SUCCESS;
2286 2292          uint8_t         pad[ISCSI_PAD_WORD_LEN];
2287 2293          int             pad_len;
2288 2294          uint32_t        hdr_digest_crc;
2289 2295          uint32_t        data_digest_crc = 0;
2290 2296          int             total_len = 0;
2291 2297          int             iovlen = 0;
2292 2298          struct iovec    iov[6];
2293 2299          idm_so_conn_t   *so_conn;
2294 2300  
2295 2301          so_conn = ic->ic_transport_private;
2296 2302  
2297 2303          /* Setup BHS */
2298 2304          iov[iovlen].iov_base    = (caddr_t)pdu->isp_hdr;
2299 2305          iov[iovlen].iov_len     = pdu->isp_hdrlen;
2300 2306          total_len               += iov[iovlen].iov_len;
2301 2307          iovlen++;
2302 2308  
2303 2309          /* Setup header digest */
2304 2310          if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2305 2311              (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2306 2312                  hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2307 2313  
2308 2314                  iov[iovlen].iov_base    = (caddr_t)&hdr_digest_crc;
2309 2315                  iov[iovlen].iov_len     = sizeof (hdr_digest_crc);
2310 2316                  total_len               += iov[iovlen].iov_len;
2311 2317                  iovlen++;
2312 2318          }
2313 2319  
2314 2320          /* Setup the data */
2315 2321          if (pdu->isp_datalen) {
2316 2322                  idm_task_t              *idt;
2317 2323                  idm_buf_t               *idb;
2318 2324                  iscsi_data_hdr_t        *ihp;
2319 2325                  ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2320 2326                  /* Write of immediate data */
2321 2327                  if (ic->ic_ffp &&
2322 2328                      (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD ||
2323 2329                      IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) {
2324 2330                          idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2325 2331                          if (idt) {
2326 2332                                  mutex_enter(&idt->idt_mutex);
2327 2333                                  idb = idm_buf_find(&idt->idt_outbufv, 0);
2328 2334                                  mutex_exit(&idt->idt_mutex);
2329 2335                                  /*
2330 2336                                   * If the initiator call to idm_buf_alloc
2331 2337                                   * failed then we can get to this point
2332 2338                                   * without a bound buffer.  The associated
2333 2339                                   * connection failure will clean things up
2334 2340                                   * later.  It would be nice to come up with
2335 2341                                   * a cleaner way to handle this.  In
2336 2342                                   * particular it seems absurd to look up
2337 2343                                   * the task and the buffer just to update
2338 2344                                   * this counter.
2339 2345                                   */
2340 2346                                  if (idb)
2341 2347                                          idb->idb_xfer_len += pdu->isp_datalen;
2342 2348                                  idm_task_rele(idt);
2343 2349                          }
2344 2350                  }
2345 2351  
2346 2352                  iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2347 2353                  iov[iovlen].iov_len  = pdu->isp_datalen;
2348 2354                  total_len += iov[iovlen].iov_len;
2349 2355                  iovlen++;
2350 2356          }
2351 2357  
2352 2358          /* Setup the data pad if necessary */
2353 2359          pad_len = ((ISCSI_PAD_WORD_LEN -
2354 2360              (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2355 2361              (ISCSI_PAD_WORD_LEN - 1));
2356 2362  
2357 2363          if (pad_len) {
2358 2364                  bzero(pad, sizeof (pad));
2359 2365                  iov[iovlen].iov_base = (void *)&pad;
2360 2366                  iov[iovlen].iov_len  = pad_len;
2361 2367                  total_len               += iov[iovlen].iov_len;
2362 2368                  iovlen++;
2363 2369          }
2364 2370  
2365 2371          /*
2366 2372           * Setup the data digest if enabled.  Data-digest is not sent
2367 2373           * for login-phase PDUs.
2368 2374           */
2369 2375          if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2370 2376              ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2371 2377              (pdu->isp_datalen || pad_len)) {
2372 2378                  /*
2373 2379                   * RFC3720/10.2.3: A zero-length Data Segment also
2374 2380                   * implies a zero-length data digest.
2375 2381                   */
2376 2382                  if (pdu->isp_datalen) {
2377 2383                          data_digest_crc = idm_crc32c(pdu->isp_data,
2378 2384                              pdu->isp_datalen);
2379 2385                  }
2380 2386                  if (pad_len) {
2381 2387                          data_digest_crc = idm_crc32c_continued(&pad,
2382 2388                              pad_len, data_digest_crc);
2383 2389                  }
2384 2390  
2385 2391                  iov[iovlen].iov_base    = (caddr_t)&data_digest_crc;
2386 2392                  iov[iovlen].iov_len     = sizeof (data_digest_crc);
2387 2393                  total_len               += iov[iovlen].iov_len;
2388 2394                  iovlen++;
2389 2395          }
2390 2396  
2391 2397          /* Transmit the PDU */
2392 2398          if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2393 2399              total_len) != 0) {
2394 2400                  /* Set error status */
2395 2401                  IDM_CONN_LOG(CE_WARN,
2396 2402                      "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2397 2403                      "data: %p", (void *) so_conn->ic_so, (void *) ic,
2398 2404                      (void *) pdu->isp_data);
2399 2405                  status = IDM_STATUS_IO;
2400 2406          }
2401 2407  
2402 2408          /*
2403 2409           * Success does not mean that the PDU actually reached the
2404 2410           * remote node since it could get dropped along the way.
2405 2411           */
2406 2412          idm_pdu_complete(pdu, status);
2407 2413  
2408 2414          return (status);
2409 2415  }
2410 2416  
2411 2417  /*
2412 2418   * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2413 2419   * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2414 2420   * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2415 2421   * A target can invoke this function multiple times for a single read command
2416 2422   * (identified by the same ITT) to split the input into several sequences.
2417 2423   *
2418 2424   * DataSN starts with 0 for the first data PDU of an input command and advances
2419 2425   * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2420 2426   * which is set to 1 for the last data PDU of a sequence.
2421 2427   * If the initiator supports phase collapse, the status bit must be set along
2422 2428   * with the F bit to indicate that the status is shipped together with the last
2423 2429   * Data-In PDU.
2424 2430   *
2425 2431   * The data PDUs within a sequence will be sent in order with the buffer offset
2426 2432   * in increasing order. i.e. initiator and target must have negotiated the
2427 2433   * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2428 2434   *
2429 2435   * Caller holds idt->idt_mutex
2430 2436   */
2431 2437  static idm_status_t
2432 2438  idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2433 2439  {
2434 2440          idm_so_conn_t   *so_conn = idb->idb_ic->ic_transport_private;
2435 2441          idm_pdu_t       tmppdu;
2436 2442  
2437 2443          ASSERT(mutex_owned(&idt->idt_mutex));
2438 2444  
2439 2445          /*
2440 2446           * Put the idm_buf_t on the tx queue.  It will be transmitted by
2441 2447           * idm_sotx_thread.
2442 2448           */
2443 2449          mutex_enter(&so_conn->ic_tx_mutex);
2444 2450  
2445 2451          DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2446 2452              uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2447 2453              uint64_t, 0, uint32_t, 0, uint32_t, 0,
2448 2454              uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2449 2455  
2450 2456          if (!so_conn->ic_tx_thread_running) {
2451 2457                  mutex_exit(&so_conn->ic_tx_mutex);
2452 2458                  /*
2453 2459                   * Don't release idt->idt_mutex since we're supposed to hold
2454 2460                   * in when calling idm_buf_tx_to_ini_done
2455 2461                   */
2456 2462                  DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2457 2463                      uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2458 2464                      uint64_t, 0, uint32_t, 0, uint32_t, 0,
2459 2465                      uint32_t, idb->idb_xfer_len,
2460 2466                      int, XFER_BUF_TX_TO_INI);
2461 2467                  idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2462 2468                  return (IDM_STATUS_FAIL);
2463 2469          }
2464 2470  
2465 2471          /*
2466 2472           * Build a template for the data PDU headers we will use so that
2467 2473           * the SN values will stay consistent with other PDU's we are
2468 2474           * transmitting like R2T and SCSI status.
2469 2475           */
2470 2476          bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2471 2477          tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2472 2478          (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2473 2479              ISCSI_OP_SCSI_DATA_RSP);
2474 2480          idb->idb_tx_thread = B_TRUE;
2475 2481          list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2476 2482          cv_signal(&so_conn->ic_tx_cv);
2477 2483          mutex_exit(&so_conn->ic_tx_mutex);
2478 2484          mutex_exit(&idt->idt_mutex);
2479 2485  
2480 2486          /*
2481 2487           * Returning success here indicates the transfer was successfully
2482 2488           * dispatched -- it does not mean that the transfer completed
2483 2489           * successfully.
2484 2490           */
2485 2491          return (IDM_STATUS_SUCCESS);
2486 2492  }
2487 2493  
2488 2494  /*
2489 2495   * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2490 2496   * data blocks it is ready to receive from the initiator in response to a WRITE
2491 2497   * SCSI command. The target iSCSI layer passes the information about the desired
2492 2498   * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2493 2499   * offset and datalen are passed via the 'idb' argument.
2494 2500   *
2495 2501   * Scope for Prototype build:
2496 2502   * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2497 2503   * negotiated the "InitialR2T" to "Yes".
2498 2504   *
2499 2505   * Caller holds idt->idt_mutex
2500 2506   */
2501 2507  static idm_status_t
2502 2508  idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2503 2509  {
2504 2510          idm_pdu_t               *pdu;
2505 2511          iscsi_rtt_hdr_t         *rtt;
2506 2512  
2507 2513          ASSERT(mutex_owned(&idt->idt_mutex));
2508 2514  
2509 2515          DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2510 2516              uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2511 2517              uint64_t, 0, uint32_t, 0, uint32_t, 0,
2512 2518              uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2513 2519  
2514 2520          pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2515 2521          pdu->isp_ic = idt->idt_ic;
2516 2522          pdu->isp_flags = IDM_PDU_SET_STATSN;
2517 2523          bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2518 2524  
2519 2525          /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2520 2526          (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2521 2527  
2522 2528          /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2523 2529          rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2524 2530  
2525 2531          rtt->opcode             = ISCSI_OP_RTT_RSP;
2526 2532          rtt->flags              = ISCSI_FLAG_FINAL;
2527 2533          rtt->data_offset        = htonl(idb->idb_bufoffset);
2528 2534          rtt->data_length        = htonl(idb->idb_xfer_len);
2529 2535          rtt->rttsn              = htonl(idt->idt_exp_rttsn++);
2530 2536  
2531 2537          /* Keep track of buffer offsets */
2532 2538          idb->idb_exp_offset     = idb->idb_bufoffset;
2533 2539          mutex_exit(&idt->idt_mutex);
2534 2540  
2535 2541          /*
2536 2542           * Transmit the PDU.
2537 2543           */
2538 2544          idm_pdu_tx(pdu);
2539 2545  
2540 2546          return (IDM_STATUS_SUCCESS);
2541 2547  }
2542 2548  
2543 2549  static idm_status_t
2544 2550  idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2545 2551  {
2546 2552          if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2547 2553                  idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2548 2554                      KM_NOSLEEP);
2549 2555                  idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2550 2556          } else {
2551 2557                  idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2552 2558                  idb->idb_buf_private = NULL;
2553 2559          }
2554 2560  
2555 2561          if (idb->idb_buf == NULL) {
2556 2562                  IDM_CONN_LOG(CE_NOTE,
2557 2563                      "idm_so_buf_alloc: failed buffer allocation");
2558 2564                  return (IDM_STATUS_FAIL);
2559 2565          }
2560 2566  
2561 2567          return (IDM_STATUS_SUCCESS);
2562 2568  }
2563 2569  
2564 2570  /* ARGSUSED */
2565 2571  static idm_status_t
2566 2572  idm_so_buf_setup(idm_buf_t *idb)
2567 2573  {
2568 2574          /* Ensure bufalloc'd flag is unset */
2569 2575          idb->idb_bufalloc = B_FALSE;
2570 2576  
2571 2577          return (IDM_STATUS_SUCCESS);
2572 2578  }
2573 2579  
2574 2580  /* ARGSUSED */
2575 2581  static void
2576 2582  idm_so_buf_teardown(idm_buf_t *idb)
2577 2583  {
2578 2584          /* nothing to do here */
2579 2585  }
2580 2586  
2581 2587  static void
2582 2588  idm_so_buf_free(idm_buf_t *idb)
2583 2589  {
2584 2590          if (idb->idb_buf_private == NULL) {
2585 2591                  kmem_free(idb->idb_buf, idb->idb_buflen);
2586 2592          } else {
2587 2593                  kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2588 2594          }
2589 2595  }
2590 2596  
2591 2597  static void
2592 2598  idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2593 2599      uint32_t offset, uint32_t length)
2594 2600  {
2595 2601          idm_so_conn_t   *so_conn = ic->ic_transport_private;
2596 2602          idm_pdu_t       tmppdu;
2597 2603          idm_buf_t       *rtt_buf;
2598 2604  
2599 2605          ASSERT(mutex_owned(&idt->idt_mutex));
2600 2606  
2601 2607          /*
2602 2608           * Allocate a buffer to represent the RTT transfer.  We could further
2603 2609           * optimize this by allocating the buffers internally from an rtt
2604 2610           * specific buffer cache since this is socket-specific code but for
2605 2611           * now we will keep it simple.
2606 2612           */
2607 2613          rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2608 2614          if (rtt_buf == NULL) {
2609 2615                  /*
2610 2616                   * If we're in FFP then the failure was likely a resource
2611 2617                   * allocation issue and we should close the connection by
2612 2618                   * sending a CE_TRANSPORT_FAIL event.
2613 2619                   *
2614 2620                   * If we're not in FFP then idm_buf_alloc will always
2615 2621                   * fail and the state is transitioning to "complete" anyway
2616 2622                   * so we won't bother to send an event.
2617 2623                   */
2618 2624                  mutex_enter(&ic->ic_state_mutex);
2619 2625                  if (ic->ic_ffp)
2620 2626                          idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2621 2627                              NULL, CT_NONE);
2622 2628                  mutex_exit(&ic->ic_state_mutex);
2623 2629                  mutex_exit(&idt->idt_mutex);
2624 2630                  return;
2625 2631          }
2626 2632  
2627 2633          rtt_buf->idb_buf_cb = NULL;
2628 2634          rtt_buf->idb_cb_arg = NULL;
2629 2635          rtt_buf->idb_bufoffset = offset;
2630 2636          rtt_buf->idb_xfer_len = length;
2631 2637          rtt_buf->idb_ic = idt->idt_ic;
2632 2638          rtt_buf->idb_task_binding = idt;
2633 2639  
2634 2640          /*
2635 2641           * The new buffer (if any) represents an additional
2636 2642           * reference on the task
2637 2643           */
2638 2644          idm_task_hold(idt);
2639 2645          mutex_exit(&idt->idt_mutex);
2640 2646  
2641 2647          /*
2642 2648           * Put the idm_buf_t on the tx queue.  It will be transmitted by
2643 2649           * idm_sotx_thread.
2644 2650           */
2645 2651          mutex_enter(&so_conn->ic_tx_mutex);
2646 2652  
2647 2653          if (!so_conn->ic_tx_thread_running) {
2648 2654                  idm_buf_free(rtt_buf);
2649 2655                  mutex_exit(&so_conn->ic_tx_mutex);
2650 2656                  idm_task_rele(idt);
2651 2657                  return;
2652 2658          }
2653 2659  
2654 2660          /*
2655 2661           * Build a template for the data PDU headers we will use so that
2656 2662           * the SN values will stay consistent with other PDU's we are
2657 2663           * transmitting like R2T and SCSI status.
2658 2664           */
2659 2665          bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2660 2666          tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2661 2667          (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2662 2668              ISCSI_OP_SCSI_DATA);
2663 2669          rtt_buf->idb_tx_thread = B_TRUE;
2664 2670          rtt_buf->idb_in_transport = B_TRUE;
2665 2671          list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2666 2672          cv_signal(&so_conn->ic_tx_cv);
2667 2673          mutex_exit(&so_conn->ic_tx_mutex);
2668 2674  }
2669 2675  
2670 2676  static void
2671 2677  idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2672 2678  {
2673 2679          /*
2674 2680           * Don't worry about status -- we assume any error handling
2675 2681           * is performed by the caller (idm_sotx_thread).
2676 2682           */
2677 2683          idb->idb_in_transport = B_FALSE;
2678 2684          idm_task_rele(idt);
2679 2685          idm_buf_free(idb);
2680 2686  }
2681 2687  
2682 2688  static idm_status_t
2683 2689  idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2684 2690      uint32_t buf_region_offset, uint32_t buf_region_length)
2685 2691  {
2686 2692          idm_conn_t              *ic;
2687 2693          uint32_t                max_dataseglen;
2688 2694          size_t                  remainder, chunk;
2689 2695          uint32_t                data_offset = buf_region_offset;
2690 2696          iscsi_data_hdr_t        *bhs;
2691 2697          idm_pdu_t               *pdu;
2692 2698          idm_status_t            tx_status;
2693 2699  
2694 2700          ASSERT(mutex_owned(&idt->idt_mutex));
2695 2701  
2696 2702          ic = idt->idt_ic;
2697 2703  
2698 2704          max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2699 2705          remainder = buf_region_length;
2700 2706  
2701 2707          while (remainder) {
2702 2708                  if (idt->idt_state != TASK_ACTIVE) {
2703 2709                          ASSERT((idt->idt_state != TASK_IDLE) &&
2704 2710                              (idt->idt_state != TASK_COMPLETE));
2705 2711                          return (IDM_STATUS_ABORTED);
2706 2712                  }
2707 2713  
2708 2714                  /* check to see if we need to chunk the data */
2709 2715                  if (remainder > max_dataseglen) {
2710 2716                          chunk = max_dataseglen;
2711 2717                  } else {
2712 2718                          chunk = remainder;
2713 2719                  }
2714 2720  
2715 2721                  /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2716 2722                  pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2717 2723                  pdu->isp_ic = ic;
2718 2724                  pdu->isp_flags = 0;     /* initialize isp_flags */
2719 2725  
2720 2726                  /*
2721 2727                   * We've already built a build a header template
2722 2728                   * to use during the transfer.  Use this template so that
2723 2729                   * the SN values stay consistent with any unrelated PDU's
2724 2730                   * being transmitted.
2725 2731                   */
2726 2732                  bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2727 2733                      sizeof (iscsi_hdr_t));
2728 2734  
2729 2735                  /*
2730 2736                   * Set DataSN, data offset, and flags in BHS
2731 2737                   * For the prototype build, A = 0, S = 0, U = 0
2732 2738                   */
2733 2739                  bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2734 2740  
2735 2741                  bhs->datasn             = htonl(idt->idt_exp_datasn++);
2736 2742  
2737 2743                  hton24(bhs->dlength, chunk);
2738 2744                  bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2739 2745  
2740 2746                  /* setup data */
2741 2747                  pdu->isp_data   =  (uint8_t *)idb->idb_buf + data_offset;
2742 2748                  pdu->isp_datalen = (uint_t)chunk;
2743 2749  
2744 2750                  if (chunk == remainder) {
2745 2751                          bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2746 2752                          /* Piggyback the status with the last data PDU */
2747 2753                          if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2748 2754                                  pdu->isp_flags |= IDM_PDU_SET_STATSN |
2749 2755                                      IDM_PDU_ADVANCE_STATSN;
2750 2756                                  (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2751 2757                                      (idt, pdu);
2752 2758                                  idt->idt_flags |=
2753 2759                                      IDM_TASK_PHASECOLLAPSE_SUCCESS;
2754 2760  
2755 2761                          }
2756 2762                  }
2757 2763  
2758 2764                  remainder       -= chunk;
2759 2765                  data_offset     += chunk;
2760 2766  
2761 2767                  /* Instrument the data-send DTrace probe. */
2762 2768                  if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2763 2769                          DTRACE_ISCSI_2(data__send,
2764 2770                              idm_conn_t *, idt->idt_ic,
2765 2771                              iscsi_data_rsp_hdr_t *,
2766 2772                              (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2767 2773                  }
2768 2774  
2769 2775                  /*
2770 2776                   * Now that we're done working with idt_exp_datasn,
2771 2777                   * idt->idt_state and idb->idb_bufoffset we can release
2772 2778                   * the task lock -- don't want to hold it across the
2773 2779                   * call to idm_i_so_tx since we could block.
2774 2780                   */
2775 2781                  mutex_exit(&idt->idt_mutex);
2776 2782  
2777 2783                  /*
2778 2784                   * Transmit the PDU.  Call the internal routine directly
2779 2785                   * as there is already implicit ordering.
2780 2786                   */
2781 2787                  if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2782 2788                          mutex_enter(&idt->idt_mutex);
2783 2789                          return (tx_status);
2784 2790                  }
2785 2791  
2786 2792                  mutex_enter(&idt->idt_mutex);
2787 2793                  idt->idt_tx_bytes += chunk;
2788 2794          }
2789 2795  
2790 2796          return (IDM_STATUS_SUCCESS);
2791 2797  }
2792 2798  
2793 2799  /*
2794 2800   * TX PDU cache
2795 2801   */
2796 2802  /* ARGSUSED */
2797 2803  int
2798 2804  idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2799 2805  {
2800 2806          idm_pdu_t       *pdu = hdl;
2801 2807  
2802 2808          bzero(pdu, sizeof (idm_pdu_t));
2803 2809          pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2804 2810          pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2805 2811          pdu->isp_callback = idm_sotx_cache_pdu_cb;
2806 2812          pdu->isp_magic = IDM_PDU_MAGIC;
2807 2813          bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2808 2814  
2809 2815          return (0);
2810 2816  }
2811 2817  
2812 2818  /* ARGSUSED */
2813 2819  void
2814 2820  idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2815 2821  {
2816 2822          /* reset values between use */
2817 2823          pdu->isp_datalen = 0;
2818 2824  
2819 2825          kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2820 2826  }
2821 2827  
2822 2828  /*
2823 2829   * RX PDU cache
2824 2830   */
2825 2831  /* ARGSUSED */
2826 2832  int
2827 2833  idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2828 2834  {
2829 2835          idm_pdu_t       *pdu = hdl;
2830 2836  
2831 2837          bzero(pdu, sizeof (idm_pdu_t));
2832 2838          pdu->isp_magic = IDM_PDU_MAGIC;
2833 2839          pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2834 2840          pdu->isp_callback = idm_sorx_cache_pdu_cb;
2835 2841  
2836 2842          return (0);
2837 2843  }
2838 2844  
2839 2845  /* ARGSUSED */
2840 2846  static void
2841 2847  idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2842 2848  {
2843 2849          pdu->isp_iovlen = 0;
2844 2850          pdu->isp_sorx_buf = 0;
2845 2851          kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2846 2852  }
2847 2853  
2848 2854  static void
2849 2855  idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2850 2856  {
2851 2857          /*
2852 2858           * We had to modify our cached RX PDU with a longer header buffer
2853 2859           * and/or a longer data buffer.  Release the new buffers and fix
2854 2860           * the fields back to what we would expect for a cached RX PDU.
2855 2861           */
2856 2862          if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2857 2863                  kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2858 2864          }
2859 2865          if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2860 2866                  kmem_free(pdu->isp_data, pdu->isp_datalen);
2861 2867          }
2862 2868          pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2863 2869          pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2864 2870          pdu->isp_data = NULL;
2865 2871          pdu->isp_datalen = 0;
2866 2872          pdu->isp_sorx_buf = 0;
2867 2873          pdu->isp_callback = idm_sorx_cache_pdu_cb;
2868 2874          idm_sorx_cache_pdu_cb(pdu, status);
2869 2875  }
2870 2876  
2871 2877  /*
2872 2878   * This thread is only active when I/O is queued for transmit
2873 2879   * because the socket is busy.
2874 2880   */
2875 2881  void
2876 2882  idm_sotx_thread(void *arg)
2877 2883  {
2878 2884          idm_conn_t      *ic = arg;
2879 2885          idm_tx_obj_t    *object, *next;
2880 2886          idm_so_conn_t   *so_conn;
2881 2887          idm_status_t    status = IDM_STATUS_SUCCESS;
2882 2888  
2883 2889          idm_conn_hold(ic);
2884 2890  
2885 2891          mutex_enter(&ic->ic_mutex);
2886 2892          so_conn = ic->ic_transport_private;
2887 2893          so_conn->ic_tx_thread_running = B_TRUE;
2888 2894          so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2889 2895          cv_signal(&ic->ic_cv);
2890 2896          mutex_exit(&ic->ic_mutex);
2891 2897  
2892 2898          mutex_enter(&so_conn->ic_tx_mutex);
2893 2899  
2894 2900          while (so_conn->ic_tx_thread_running) {
2895 2901                  while (list_is_empty(&so_conn->ic_tx_list)) {
2896 2902                          DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2897 2903                          cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2898 2904                          DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2899 2905  
2900 2906                          if (!so_conn->ic_tx_thread_running) {
2901 2907                                  goto tx_bail;
2902 2908                          }
2903 2909                  }
2904 2910  
2905 2911                  object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2906 2912                  list_remove(&so_conn->ic_tx_list, object);
2907 2913                  mutex_exit(&so_conn->ic_tx_mutex);
2908 2914  
2909 2915                  switch (object->idm_tx_obj_magic) {
2910 2916                  case IDM_PDU_MAGIC: {
2911 2917                          idm_pdu_t *pdu = (idm_pdu_t *)object;
2912 2918                          DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2913 2919                              idm_pdu_t *, (idm_pdu_t *)object);
2914 2920  
2915 2921                          if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2916 2922                                  /* No IDM task */
2917 2923                                  (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2918 2924                          }
2919 2925                          status = idm_i_so_tx((idm_pdu_t *)object);
2920 2926                          break;
2921 2927                  }
2922 2928                  case IDM_BUF_MAGIC: {
2923 2929                          idm_buf_t *idb = (idm_buf_t *)object;
2924 2930                          idm_task_t *idt = idb->idb_task_binding;
2925 2931  
2926 2932                          DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2927 2933                              idm_buf_t *, idb);
2928 2934  
2929 2935                          mutex_enter(&idt->idt_mutex);
2930 2936                          status = idm_so_send_buf_region(idt,
2931 2937                              idb, 0, idb->idb_xfer_len);
2932 2938  
2933 2939                          /*
2934 2940                           * TX thread owns the buffer so we expect it to
2935 2941                           * be "in transport"
2936 2942                           */
2937 2943                          ASSERT(idb->idb_in_transport);
2938 2944                          if (IDM_CONN_ISTGT(ic)) {
2939 2945                                  /*
2940 2946                                   * idm_buf_tx_to_ini_done releases
2941 2947                                   * idt->idt_mutex
2942 2948                                   */
2943 2949                                  DTRACE_ISCSI_8(xfer__done,
2944 2950                                      idm_conn_t *, idt->idt_ic,
2945 2951                                      uintptr_t, idb->idb_buf,
2946 2952                                      uint32_t, idb->idb_bufoffset,
2947 2953                                      uint64_t, 0, uint32_t, 0, uint32_t, 0,
2948 2954                                      uint32_t, idb->idb_xfer_len,
2949 2955                                      int, XFER_BUF_TX_TO_INI);
2950 2956                                  idm_buf_tx_to_ini_done(idt, idb, status);
2951 2957                          } else {
2952 2958                                  idm_so_send_rtt_data_done(idt, idb);
2953 2959                                  mutex_exit(&idt->idt_mutex);
2954 2960                          }
2955 2961                          break;
2956 2962                  }
2957 2963  
2958 2964                  default:
2959 2965                          IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2960 2966                              "(0x%08x)", object->idm_tx_obj_magic);
2961 2967                          status = IDM_STATUS_FAIL;
2962 2968                  }
2963 2969  
2964 2970                  mutex_enter(&so_conn->ic_tx_mutex);
2965 2971  
2966 2972                  if (status != IDM_STATUS_SUCCESS) {
2967 2973                          so_conn->ic_tx_thread_running = B_FALSE;
2968 2974                          idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2969 2975                  }
2970 2976          }
2971 2977  
2972 2978          /*
2973 2979           * Before we leave, we need to abort every item remaining in the
2974 2980           * TX list.
2975 2981           */
2976 2982  
2977 2983  tx_bail:
2978 2984          object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2979 2985  
2980 2986          while (object != NULL) {
2981 2987                  next = list_next(&so_conn->ic_tx_list, object);
2982 2988  
2983 2989                  list_remove(&so_conn->ic_tx_list, object);
2984 2990                  switch (object->idm_tx_obj_magic) {
2985 2991                  case IDM_PDU_MAGIC:
2986 2992                          idm_pdu_complete((idm_pdu_t *)object,
2987 2993                              IDM_STATUS_ABORTED);
2988 2994                          break;
2989 2995  
2990 2996                  case IDM_BUF_MAGIC: {
2991 2997                          idm_buf_t *idb = (idm_buf_t *)object;
2992 2998                          idm_task_t *idt = idb->idb_task_binding;
2993 2999                          mutex_exit(&so_conn->ic_tx_mutex);
2994 3000                          mutex_enter(&idt->idt_mutex);
2995 3001                          /*
2996 3002                           * TX thread owns the buffer so we expect it to
2997 3003                           * be "in transport"
2998 3004                           */
2999 3005                          ASSERT(idb->idb_in_transport);
3000 3006                          if (IDM_CONN_ISTGT(ic)) {
3001 3007                                  /*
3002 3008                                   * idm_buf_tx_to_ini_done releases
3003 3009                                   * idt->idt_mutex
3004 3010                                   */
3005 3011                                  DTRACE_ISCSI_8(xfer__done,
3006 3012                                      idm_conn_t *, idt->idt_ic,
3007 3013                                      uintptr_t, idb->idb_buf,
3008 3014                                      uint32_t, idb->idb_bufoffset,
3009 3015                                      uint64_t, 0, uint32_t, 0, uint32_t, 0,
3010 3016                                      uint32_t, idb->idb_xfer_len,
3011 3017                                      int, XFER_BUF_TX_TO_INI);
3012 3018                                  idm_buf_tx_to_ini_done(idt, idb,
3013 3019                                      IDM_STATUS_ABORTED);
3014 3020                          } else {
3015 3021                                  idm_so_send_rtt_data_done(idt, idb);
3016 3022                                  mutex_exit(&idt->idt_mutex);
3017 3023                          }
3018 3024                          mutex_enter(&so_conn->ic_tx_mutex);
3019 3025                          break;
3020 3026                  }
3021 3027                  default:
3022 3028                          IDM_CONN_LOG(CE_WARN,
3023 3029                              "idm_sotx_thread: Unexpected magic "
3024 3030                              "(0x%08x)", object->idm_tx_obj_magic);
3025 3031                  }
3026 3032  
3027 3033                  object = next;
3028 3034          }
3029 3035  
3030 3036          mutex_exit(&so_conn->ic_tx_mutex);
3031 3037          idm_conn_rele(ic);
3032 3038          thread_exit();
3033 3039          /*NOTREACHED*/
3034 3040  }
3035 3041  
3036 3042  static void
3037 3043  idm_so_socket_set_nonblock(struct sonode *node)
3038 3044  {
3039 3045          (void) VOP_SETFL(node->so_vnode, node->so_flag,
3040 3046              (node->so_state | FNONBLOCK), CRED(), NULL);
3041 3047  }
3042 3048  
3043 3049  static void
3044 3050  idm_so_socket_set_block(struct sonode *node)
3045 3051  {
3046 3052          (void) VOP_SETFL(node->so_vnode, node->so_flag,
3047 3053              (node->so_state & (~FNONBLOCK)), CRED(), NULL);
3048 3054  }
3049 3055  
3050 3056  
3051 3057  /*
3052 3058   * Called by kernel sockets when the connection has been accepted or
3053 3059   * rejected. In early volo, a "disconnect" callback was sent instead of
3054 3060   * "connectfailed", so we check for both.
3055 3061   */
3056 3062  /* ARGSUSED */
3057 3063  void
3058 3064  idm_so_timed_socket_connect_cb(ksocket_t ks,
3059 3065      ksocket_callback_event_t ev, void *arg, uintptr_t info)
3060 3066  {
3061 3067          idm_so_timed_socket_t   *itp = arg;
3062 3068          ASSERT(itp != NULL);
3063 3069          ASSERT(ev == KSOCKET_EV_CONNECTED ||
3064 3070              ev == KSOCKET_EV_CONNECTFAILED ||
3065 3071              ev == KSOCKET_EV_DISCONNECTED);
3066 3072  
3067 3073          mutex_enter(&idm_so_timed_socket_mutex);
3068 3074          itp->it_callback_called = B_TRUE;
3069 3075          if (ev == KSOCKET_EV_CONNECTED) {
3070 3076                  itp->it_socket_error_code = 0;
3071 3077          } else {
3072 3078                  /* Make sure the error code is non-zero on error */
3073 3079                  if (info == 0)
3074 3080                          info = ECONNRESET;
3075 3081                  itp->it_socket_error_code = (int)info;
3076 3082          }
3077 3083          cv_signal(&itp->it_cv);
3078 3084          mutex_exit(&idm_so_timed_socket_mutex);
3079 3085  }
3080 3086  
3081 3087  int
3082 3088  idm_so_timed_socket_connect(ksocket_t ks,
3083 3089      struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
3084 3090  {
3085 3091          clock_t                 conn_login_max;
3086 3092          int                     rc, nonblocking, rval;
3087 3093          idm_so_timed_socket_t   it;
3088 3094          ksocket_callbacks_t     ks_cb;
3089 3095  
3090 3096          conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
3091 3097  
3092 3098          /*
3093 3099           * Set to non-block socket mode, with callback on connect
3094 3100           * Early volo used "disconnected" instead of "connectfailed",
3095 3101           * so set callback to look for both.
3096 3102           */
3097 3103          bzero(&it, sizeof (it));
3098 3104          ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
3099 3105              KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
3100 3106          ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
3101 3107          ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
3102 3108          ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
3103 3109          cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
3104 3110          rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
3105 3111          if (rc != 0)
3106 3112                  return (rc);
3107 3113  
3108 3114          /* Set to non-blocking mode */
3109 3115          nonblocking = 1;
3110 3116          rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3111 3117              CRED());
3112 3118          if (rc != 0)
3113 3119                  goto cleanup;
3114 3120  
3115 3121          bzero(&it, sizeof (it));
3116 3122          for (;;) {
3117 3123                  /*
3118 3124                   * Warning -- in a loopback scenario, the call to
3119 3125                   * the connect_cb can occur inside the call to
3120 3126                   * ksocket_connect. Do not hold the mutex around the
3121 3127                   * call to ksocket_connect.
3122 3128                   */
3123 3129                  rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3124 3130                  if (rc == 0 || rc == EISCONN) {
3125 3131                          /* socket success or already success */
3126 3132                          rc = 0;
3127 3133                          break;
3128 3134                  }
3129 3135                  if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3130 3136                          break;
3131 3137                  }
3132 3138  
3133 3139                  /* TCP connect still in progress. See if out of time. */
3134 3140                  if (ddi_get_lbolt() > conn_login_max) {
3135 3141                          /*
3136 3142                           * Connection retry timeout,
3137 3143                           * failed connect to target.
3138 3144                           */
3139 3145                          rc = ETIMEDOUT;
3140 3146                          break;
3141 3147                  }
3142 3148  
3143 3149                  /*
3144 3150                   * TCP connect still in progress.  Sleep until callback.
3145 3151                   * Do NOT go to sleep if the callback already occurred!
3146 3152                   */
3147 3153                  mutex_enter(&idm_so_timed_socket_mutex);
3148 3154                  if (!it.it_callback_called) {
3149 3155                          (void) cv_timedwait(&it.it_cv,
3150 3156                              &idm_so_timed_socket_mutex, conn_login_max);
3151 3157                  }
3152 3158                  if (it.it_callback_called) {
3153 3159                          rc = it.it_socket_error_code;
3154 3160                          mutex_exit(&idm_so_timed_socket_mutex);
3155 3161                          break;
3156 3162                  }
3157 3163                  /* If timer expires, go call ksocket_connect one last time. */
3158 3164                  mutex_exit(&idm_so_timed_socket_mutex);
3159 3165          }
3160 3166  
3161 3167          /* resume blocking mode */
3162 3168          nonblocking = 0;
3163 3169          (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3164 3170              CRED());
3165 3171  cleanup:
3166 3172          (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3167 3173          cv_destroy(&it.it_cv);
3168 3174          if (rc != 0) {
3169 3175                  idm_soshutdown(ks);
3170 3176          }
3171 3177          return (rc);
3172 3178  }
3173 3179  
3174 3180  
3175 3181  void
3176 3182  idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3177 3183  {
3178 3184          int                     dp_addr_size;
3179 3185          struct sockaddr_in      *sin;
3180 3186          struct sockaddr_in6     *sin6;
3181 3187  
3182 3188          /* Build sockaddr_storage for this portal (idm_addr_t) */
3183 3189          bzero(sa, sizeof (*sa));
3184 3190          dp_addr_size = dportal->a_addr.i_insize;
3185 3191          if (dp_addr_size == sizeof (struct in_addr)) {
3186 3192                  /* IPv4 */
3187 3193                  sa->ss_family = AF_INET;
3188 3194                  sin = (struct sockaddr_in *)sa;
3189 3195                  sin->sin_port = htons(dportal->a_port);
3190 3196                  bcopy(&dportal->a_addr.i_addr.in4,
3191 3197                      &sin->sin_addr, sizeof (struct in_addr));
3192 3198          } else if (dp_addr_size == sizeof (struct in6_addr)) {
3193 3199                  /* IPv6 */
3194 3200                  sa->ss_family = AF_INET6;
3195 3201                  sin6 = (struct sockaddr_in6 *)sa;
3196 3202                  sin6->sin6_port = htons(dportal->a_port);
3197 3203                  bcopy(&dportal->a_addr.i_addr.in6,
3198 3204                      &sin6->sin6_addr, sizeof (struct in6_addr));
3199 3205          } else {
3200 3206                  ASSERT(0);
3201 3207          }
3202 3208  }
3203 3209  
3204 3210  
3205 3211  /*
3206 3212   * return a human-readable form of a sockaddr_storage, in the form
3207 3213   * [ip-address]:port.  This is used in calls to logging functions.
3208 3214   * If several calls to idm_sa_ntop are made within the same invocation
3209 3215   * of a logging function, then each one needs its own buf.
3210 3216   */
3211 3217  const char *
3212 3218  idm_sa_ntop(const struct sockaddr_storage *sa,
3213 3219      char *buf, size_t size)
3214 3220  {
3215 3221          static const char bogus_ip[] = "[0].-1";
3216 3222          char tmp[INET6_ADDRSTRLEN];
3217 3223  
3218 3224          switch (sa->ss_family) {
3219 3225          case AF_INET6: {
3220 3226                  const struct sockaddr_in6 *in6 =
3221 3227                      (const struct sockaddr_in6 *) sa;
3222 3228  
3223 3229                  (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp,
3224 3230                      sizeof (tmp));
3225 3231                  if (strlen(tmp) + sizeof ("[].65535") > size)
3226 3232                          goto err;
3227 3233                  /* struct sockaddr_storage gets port info from v4 loc */
3228 3234                  (void) snprintf(buf, size, "[%s].%u", tmp,
3229 3235                      ntohs(in6->sin6_port));
3230 3236                  return (buf);
3231 3237          }
3232 3238          case AF_INET: {
3233 3239                  const struct sockaddr_in *in = (const struct sockaddr_in *) sa;
3234 3240  
3235 3241                  (void) inet_ntop(in->sin_family, &in->sin_addr, tmp,
3236 3242                      sizeof (tmp));
3237 3243                  if (strlen(tmp) + sizeof ("[].65535") > size)
3238 3244                                  goto err;
3239 3245                  (void) snprintf(buf, size,  "[%s].%u", tmp,
3240 3246                      ntohs(in->sin_port));
3241 3247                  return (buf);
3242 3248          }
3243 3249          default:
3244 3250                  break;
3245 3251          }
3246 3252  err:
3247 3253          (void) snprintf(buf, size, "%s", bogus_ip);
3248 3254          return (buf);
3249 3255  }
  
    | 
      ↓ open down ↓ | 
    2368 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX