1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013 by Delphix. All rights reserved.
  27  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  28  */
  29 
  30 #include <sys/conf.h>
  31 #include <sys/stat.h>
  32 #include <sys/file.h>
  33 #include <sys/ddi.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/modctl.h>
  36 #include <sys/priv.h>
  37 #include <sys/cpuvar.h>
  38 #include <sys/socket.h>
  39 #include <sys/strsubr.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/sdt.h>
  42 #include <netinet/tcp.h>
  43 #include <inet/tcp.h>
  44 #include <sys/socketvar.h>
  45 #include <sys/pathname.h>
  46 #include <sys/fs/snode.h>
  47 #include <sys/fs/dv_node.h>
  48 #include <sys/vnode.h>
  49 #include <netinet/in.h>
  50 #include <net/if.h>
  51 #include <sys/sockio.h>
  52 #include <sys/ksocket.h>
  53 #include <sys/filio.h>            /* FIONBIO */
  54 #include <sys/iscsi_protocol.h>
  55 #include <sys/idm/idm.h>
  56 #include <sys/idm/idm_so.h>
  57 #include <sys/idm/idm_text.h>
  58 
  59 #define IN_PROGRESS_DELAY       1
  60 
  61 /*
  62  * in6addr_any is currently all zeroes, but use the macro in case this
  63  * ever changes.
  64  */
  65 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
  66 
  67 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  68 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  69 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  70 
  71 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
  72 static void idm_so_conn_destroy_common(idm_conn_t *ic);
  73 static void idm_so_conn_connect_common(idm_conn_t *ic);
  74 
  75 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
  76     boolean_t boot_conn);
  77 static void idm_set_postconnect_options(ksocket_t so);
  78 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
  79 
  80 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
  81 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
  82     idm_buf_t *idb, uint32_t offset, uint32_t length);
  83 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
  84 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
  85     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
  86 
  87 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
  88     uint32_t ro, uint32_t dlength);
  89 
  90 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
  91     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
  92 
  93 static void idm_so_socket_set_nonblock(struct sonode *node);
  94 static void idm_so_socket_set_block(struct sonode *node);
  95 
  96 /*
  97  * Transport ops prototypes
  98  */
  99 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
 100 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
 101 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
 102 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
 103 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
 104 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
 105 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
 106 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
 107     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
 108 static void idm_so_notice_key_values(idm_conn_t *it,
 109     nvlist_t *negotiated_nvl);
 110 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
 111     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
 112 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
 113     idm_transport_caps_t *caps);
 114 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
 115 static void idm_so_buf_free(idm_buf_t *idb);
 116 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
 117 static void idm_so_buf_teardown(idm_buf_t *idb);
 118 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
 119 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
 120 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
 121 static void idm_so_tgt_svc_offline(idm_svc_t *is);
 122 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
 123 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
 124 static void idm_so_conn_disconnect(idm_conn_t *ic);
 125 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
 126 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
 127 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
 128 
 129 /*
 130  * IDM Native Sockets transport operations
 131  */
 132 static
 133 idm_transport_ops_t idm_so_transport_ops = {
 134         idm_so_tx,                      /* it_tx_pdu */
 135         idm_so_buf_tx_to_ini,           /* it_buf_tx_to_ini */
 136         idm_so_buf_rx_from_ini,         /* it_buf_rx_from_ini */
 137         idm_so_rx_datain,               /* it_rx_datain */
 138         idm_so_rx_rtt,                  /* it_rx_rtt */
 139         idm_so_rx_dataout,              /* it_rx_dataout */
 140         NULL,                           /* it_alloc_conn_rsrc */
 141         NULL,                           /* it_free_conn_rsrc */
 142         NULL,                           /* it_tgt_enable_datamover */
 143         NULL,                           /* it_ini_enable_datamover */
 144         NULL,                           /* it_conn_terminate */
 145         idm_so_free_task_rsrc,          /* it_free_task_rsrc */
 146         idm_so_negotiate_key_values,    /* it_negotiate_key_values */
 147         idm_so_notice_key_values,       /* it_notice_key_values */
 148         idm_so_conn_is_capable,         /* it_conn_is_capable */
 149         idm_so_buf_alloc,               /* it_buf_alloc */
 150         idm_so_buf_free,                /* it_buf_free */
 151         idm_so_buf_setup,               /* it_buf_setup */
 152         idm_so_buf_teardown,            /* it_buf_teardown */
 153         idm_so_tgt_svc_create,          /* it_tgt_svc_create */
 154         idm_so_tgt_svc_destroy,         /* it_tgt_svc_destroy */
 155         idm_so_tgt_svc_online,          /* it_tgt_svc_online */
 156         idm_so_tgt_svc_offline,         /* it_tgt_svc_offline */
 157         idm_so_tgt_conn_destroy,        /* it_tgt_conn_destroy */
 158         idm_so_tgt_conn_connect,        /* it_tgt_conn_connect */
 159         idm_so_conn_disconnect,         /* it_tgt_conn_disconnect */
 160         idm_so_ini_conn_create,         /* it_ini_conn_create */
 161         idm_so_ini_conn_destroy,        /* it_ini_conn_destroy */
 162         idm_so_ini_conn_connect,        /* it_ini_conn_connect */
 163         idm_so_conn_disconnect,         /* it_ini_conn_disconnect */
 164         idm_so_declare_key_values       /* it_declare_key_values */
 165 };
 166 
 167 kmutex_t        idm_so_timed_socket_mutex;
 168 
 169 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
 170 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
 171 
 172 /*
 173  * idm_so_init()
 174  * Sockets transport initialization
 175  */
 176 void
 177 idm_so_init(idm_transport_t *it)
 178 {
 179         /* Cache for IDM Data and R2T Transmit PDU's */
 180         idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
 181             sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
 182             &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 183 
 184         /* Cache for IDM Receive PDU's */
 185         idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
 186             sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
 187             &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 188 
 189         /* 128k buffer cache */
 190         idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
 191             IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
 192 
 193         /* Set the sockets transport ops */
 194         it->it_ops = &idm_so_transport_ops;
 195 
 196         mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
 197 
 198 }
 199 
 200 /*
 201  * idm_so_fini()
 202  * Sockets transport teardown
 203  */
 204 void
 205 idm_so_fini(void)
 206 {
 207         kmem_cache_destroy(idm.idm_so_128k_buf_cache);
 208         kmem_cache_destroy(idm.idm_sotx_pdu_cache);
 209         kmem_cache_destroy(idm.idm_sorx_pdu_cache);
 210         mutex_destroy(&idm_so_timed_socket_mutex);
 211 }
 212 
 213 ksocket_t
 214 idm_socreate(int domain, int type, int protocol)
 215 {
 216         ksocket_t ks;
 217 
 218         if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
 219             CRED())) {
 220                 return (ks);
 221         } else {
 222                 return (NULL);
 223         }
 224 }
 225 
 226 /*
 227  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
 228  * reception and transmission.  The sonode still exists but its state
 229  * gets modified to indicate it is no longer connected.  Calls to
 230  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
 231  * regain control of a thread stuck in idm_sorecv.
 232  */
 233 void
 234 idm_soshutdown(ksocket_t so)
 235 {
 236         (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
 237 }
 238 
 239 /*
 240  * idm_sodestroy releases all resources associated with a socket previously
 241  * created with idm_socreate.  The socket must be shutdown using
 242  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
 243  * otherwise undefined behavior will result.
 244  */
 245 void
 246 idm_sodestroy(ksocket_t ks)
 247 {
 248         (void) ksocket_close(ks, CRED());
 249 }
 250 
 251 /*
 252  * Function to compare two addresses in sockaddr_storage format
 253  */
 254 
 255 int
 256 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
 257     const struct sockaddr_storage *cmp_ss2,
 258     boolean_t v4_mapped_as_v4,
 259     boolean_t compare_ports)
 260 {
 261         struct sockaddr_storage                 mapped_v4_ss1, mapped_v4_ss2;
 262         const struct sockaddr_storage           *ss1, *ss2;
 263         struct in_addr                          *in1, *in2;
 264         struct in6_addr                         *in61, *in62;
 265         int i;
 266 
 267         /*
 268          * Normalize V4-mapped IPv6 addresses into V4 format if
 269          * v4_mapped_as_v4 is B_TRUE.
 270          */
 271         ss1 = cmp_ss1;
 272         ss2 = cmp_ss2;
 273         if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
 274                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 275                 if (IN6_IS_ADDR_V4MAPPED(in61)) {
 276                         bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
 277                         mapped_v4_ss1.ss_family = AF_INET;
 278                         ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
 279                             ((struct sockaddr_in *)ss1)->sin_port;
 280                         IN6_V4MAPPED_TO_INADDR(in61,
 281                             &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
 282                         ss1 = &mapped_v4_ss1;
 283                 }
 284         }
 285         ss2 = cmp_ss2;
 286         if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
 287                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 288                 if (IN6_IS_ADDR_V4MAPPED(in62)) {
 289                         bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
 290                         mapped_v4_ss2.ss_family = AF_INET;
 291                         ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
 292                             ((struct sockaddr_in *)ss2)->sin_port;
 293                         IN6_V4MAPPED_TO_INADDR(in62,
 294                             &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
 295                         ss2 = &mapped_v4_ss2;
 296                 }
 297         }
 298 
 299         /*
 300          * Compare ports, then address family, then ip address
 301          */
 302         if (compare_ports &&
 303             (((struct sockaddr_in *)ss1)->sin_port !=
 304             ((struct sockaddr_in *)ss2)->sin_port)) {
 305                 if (((struct sockaddr_in *)ss1)->sin_port >
 306                     ((struct sockaddr_in *)ss2)->sin_port)
 307                         return (1);
 308                 else
 309                         return (-1);
 310         }
 311 
 312         /*
 313          * ports are the same
 314          */
 315         if (ss1->ss_family != ss2->ss_family) {
 316                 if (ss1->ss_family == AF_INET)
 317                         return (1);
 318                 else
 319                         return (-1);
 320         }
 321 
 322         /*
 323          * address families are the same
 324          */
 325         if (ss1->ss_family == AF_INET) {
 326                 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
 327                 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
 328 
 329                 if (in1->s_addr > in2->s_addr)
 330                         return (1);
 331                 else if (in1->s_addr < in2->s_addr)
 332                         return (-1);
 333                 else
 334                         return (0);
 335         } else if (ss1->ss_family == AF_INET6) {
 336                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 337                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 338 
 339                 for (i = 0; i < 4; i++) {
 340                         if (in61->s6_addr32[i] > in62->s6_addr32[i])
 341                                 return (1);
 342                         else if (in61->s6_addr32[i] < in62->s6_addr32[i])
 343                                 return (-1);
 344                 }
 345                 return (0);
 346         }
 347 
 348         return (1);
 349 }
 350 
 351 /*
 352  * IP address filter functions to flag addresses that should not
 353  * go out to initiators through discovery.
 354  */
 355 static boolean_t
 356 idm_v4_addr_okay(struct in_addr *in_addr)
 357 {
 358         in_addr_t addr = ntohl(in_addr->s_addr);
 359 
 360         if ((INADDR_NONE == addr) ||
 361             (IN_MULTICAST(addr)) ||
 362             ((addr >> IN_CLASSA_NSHIFT) == 0) ||
 363             ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
 364                 return (B_FALSE);
 365         }
 366         return (B_TRUE);
 367 }
 368 
 369 static boolean_t
 370 idm_v6_addr_okay(struct in6_addr *addr6)
 371 {
 372 
 373         if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
 374             (IN6_IS_ADDR_LOOPBACK(addr6)) ||
 375             (IN6_IS_ADDR_MULTICAST(addr6)) ||
 376             (IN6_IS_ADDR_V4MAPPED(addr6)) ||
 377             (IN6_IS_ADDR_V4COMPAT(addr6)) ||
 378             (IN6_IS_ADDR_LINKLOCAL(addr6))) {
 379                 return (B_FALSE);
 380         }
 381         return (B_TRUE);
 382 }
 383 
 384 /*
 385  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
 386  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
 387  */
 388 int
 389 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
 390 {
 391         ksocket_t               so4, so6;
 392         struct lifnum           lifn;
 393         struct lifconf          lifc;
 394         struct lifreq           *lp;
 395         int                     rval;
 396         int                     numifs;
 397         int                     bufsize;
 398         void                    *buf;
 399         int                     i, j, n, rc;
 400         struct sockaddr_storage ss;
 401         struct sockaddr_in      *sin;
 402         struct sockaddr_in6     *sin6;
 403         idm_addr_t              *ip;
 404         idm_addr_list_t         *ipaddr = NULL;
 405         int                     size_ipaddr;
 406 
 407         *ipaddr_p = NULL;
 408         size_ipaddr = 0;
 409         buf = NULL;
 410 
 411         /* create an ipv4 and ipv6 UDP socket */
 412         if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
 413                 return (0);
 414         if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
 415                 idm_sodestroy(so6);
 416                 return (0);
 417         }
 418 
 419 
 420 retry_count:
 421         /* snapshot the current number of interfaces */
 422         lifn.lifn_family = PF_UNSPEC;
 423         lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 424         lifn.lifn_count = 0;
 425         /* use vp6 for ioctls with unspecified families by default */
 426         if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
 427             != 0) {
 428                 goto cleanup;
 429         }
 430 
 431         numifs = lifn.lifn_count;
 432         if (numifs <= 0) {
 433                 goto cleanup;
 434         }
 435 
 436         /* allocate extra room in case more interfaces appear */
 437         numifs += 10;
 438 
 439         /* get the interface names and ip addresses */
 440         bufsize = numifs * sizeof (struct lifreq);
 441         buf = kmem_alloc(bufsize, KM_SLEEP);
 442 
 443         lifc.lifc_family = AF_UNSPEC;
 444         lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 445         lifc.lifc_len = bufsize;
 446         lifc.lifc_buf = buf;
 447         rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 448         if (rc != 0) {
 449                 goto cleanup;
 450         }
 451         /* if our extra room is used up, try again */
 452         if (bufsize <= lifc.lifc_len) {
 453                 kmem_free(buf, bufsize);
 454                 buf = NULL;
 455                 goto retry_count;
 456         }
 457         /* calc actual number of ifconfs */
 458         n = lifc.lifc_len / sizeof (struct lifreq);
 459 
 460         /* get ip address */
 461         if (n > 0) {
 462                 size_ipaddr = sizeof (idm_addr_list_t) +
 463                     (n - 1) * sizeof (idm_addr_t);
 464                 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
 465         } else {
 466                 goto cleanup;
 467         }
 468 
 469         /*
 470          * Examine the array of interfaces and filter uninteresting ones
 471          */
 472         for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 473 
 474                 /*
 475                  * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 476                  */
 477                 ss = lp->lifr_addr;
 478                 /*
 479                  * fetch the flags using the socket of the correct family
 480                  */
 481                 switch (ss.ss_family) {
 482                 case AF_INET:
 483                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
 484                             &rval, CRED());
 485                         break;
 486                 case AF_INET6:
 487                         rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
 488                             &rval, CRED());
 489                         break;
 490                 default:
 491                         continue;
 492                 }
 493                 if (rc == 0) {
 494                         /*
 495                          * If we got the flags, skip uninteresting
 496                          * interfaces based on flags
 497                          */
 498                         if ((lp->lifr_flags & IFF_UP) != IFF_UP)
 499                                 continue;
 500                         if (lp->lifr_flags &
 501                             (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 502                                 continue;
 503                 }
 504 
 505                 /* save ip address */
 506                 ip = &ipaddr->al_addrs[j];
 507                 switch (ss.ss_family) {
 508                 case AF_INET:
 509                         sin = (struct sockaddr_in *)&ss;
 510                         if (!idm_v4_addr_okay(&sin->sin_addr))
 511                                 continue;
 512                         ip->a_addr.i_addr.in4 = sin->sin_addr;
 513                         ip->a_addr.i_insize = sizeof (struct in_addr);
 514                         break;
 515                 case AF_INET6:
 516                         sin6 = (struct sockaddr_in6 *)&ss;
 517                         if (!idm_v6_addr_okay(&sin6->sin6_addr))
 518                                 continue;
 519                         ip->a_addr.i_addr.in6 = sin6->sin6_addr;
 520                         ip->a_addr.i_insize = sizeof (struct in6_addr);
 521                         break;
 522                 default:
 523                         continue;
 524                 }
 525                 j++;
 526         }
 527 
 528         if (j == 0) {
 529                 /* no valid ifaddr */
 530                 kmem_free(ipaddr, size_ipaddr);
 531                 size_ipaddr = 0;
 532                 ipaddr = NULL;
 533         } else {
 534                 ipaddr->al_out_cnt = j;
 535         }
 536 
 537 
 538 cleanup:
 539         idm_sodestroy(so6);
 540         idm_sodestroy(so4);
 541 
 542         if (buf != NULL)
 543                 kmem_free(buf, bufsize);
 544 
 545         *ipaddr_p = ipaddr;
 546         return (size_ipaddr);
 547 }
 548 
 549 int
 550 idm_sorecv(ksocket_t so, void *msg, size_t len)
 551 {
 552         iovec_t iov;
 553 
 554         ASSERT(so != NULL);
 555         ASSERT(len != 0);
 556 
 557         /*
 558          * Fill in iovec and receive data
 559          */
 560         iov.iov_base = msg;
 561         iov.iov_len = len;
 562 
 563         return (idm_iov_sorecv(so, &iov, 1, len));
 564 }
 565 
 566 /*
 567  * idm_sosendto - Sends a buffered data on a non-connected socket.
 568  *
 569  * This function puts the data provided on the wire by calling sosendmsg.
 570  * It will return only when all the data has been sent or if an error
 571  * occurs.
 572  *
 573  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 574  * -1 if sosendmsg returns success but uio_resid != 0
 575  */
 576 int
 577 idm_sosendto(ksocket_t so, void *buff, size_t len,
 578     struct sockaddr *name, socklen_t namelen)
 579 {
 580         struct msghdr           msg;
 581         struct iovec            iov[1];
 582         int                     error;
 583         size_t                  sent = 0;
 584 
 585         iov[0].iov_base = buff;
 586         iov[0].iov_len  = len;
 587 
 588         /* Initialization of the message header. */
 589         bzero(&msg, sizeof (msg));
 590         msg.msg_iov     = iov;
 591         msg.msg_iovlen  = 1;
 592         msg.msg_name    = name;
 593         msg.msg_namelen = namelen;
 594 
 595         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
 596                 /* Data sent */
 597                 if (sent == len) {
 598                         /* All data sent.  Success. */
 599                         return (0);
 600                 } else {
 601                         /* Not all data was sent.  Failure */
 602                         return (-1);
 603                 }
 604         }
 605 
 606         /* Send failed */
 607         return (error);
 608 }
 609 
 610 /*
 611  * idm_iov_sosend - Sends an iovec on a connection.
 612  *
 613  * This function puts the data provided on the wire by calling sosendmsg.
 614  * It will return only when all the data has been sent or if an error
 615  * occurs.
 616  *
 617  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 618  * -1 if sosendmsg returns success but uio_resid != 0
 619  */
 620 int
 621 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 622 {
 623         struct msghdr           msg;
 624         int                     error;
 625         size_t                  sent = 0;
 626 
 627         ASSERT(iop != NULL);
 628 
 629         /* Initialization of the message header. */
 630         bzero(&msg, sizeof (msg));
 631         msg.msg_iov     = iop;
 632         msg.msg_iovlen  = iovlen;
 633 
 634         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
 635             == 0) {
 636                 /* Data sent */
 637                 if (sent == total_len) {
 638                         /* All data sent.  Success. */
 639                         return (0);
 640                 } else {
 641                         /* Not all data was sent.  Failure */
 642                         return (-1);
 643                 }
 644         }
 645 
 646         /* Send failed */
 647         return (error);
 648 }
 649 
 650 /*
 651  * idm_iov_sorecv - Receives an iovec from a connection
 652  *
 653  * This function gets the data asked for from the socket.  It will return
 654  * only when all the requested data has been retrieved or if an error
 655  * occurs.
 656  *
 657  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
 658  * -1 if sorecvmsg returns success but uio_resid != 0
 659  */
 660 int
 661 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 662 {
 663         struct msghdr           msg;
 664         int                     error;
 665         size_t                  recv;
 666         int                     flags;
 667 
 668         ASSERT(iop != NULL);
 669 
 670         /* Initialization of the message header. */
 671         bzero(&msg, sizeof (msg));
 672         msg.msg_iov     = iop;
 673         msg.msg_iovlen  = iovlen;
 674         flags           = MSG_WAITALL;
 675 
 676         if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
 677             == 0) {
 678                 /* Received data */
 679                 if (recv == total_len) {
 680                         /* All requested data received.  Success */
 681                         return (0);
 682                 } else {
 683                         /*
 684                          * Not all data was received.  The connection has
 685                          * probably failed.
 686                          */
 687                         return (-1);
 688                 }
 689         }
 690 
 691         /* Receive failed */
 692         return (error);
 693 }
 694 
 695 static void
 696 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
 697 {
 698         int     conn_abort = 10000;
 699         int     conn_notify = 2000;
 700         int     abort = 30000;
 701 
 702         /* Pre-connect socket options */
 703         (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 704             TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
 705             CRED());
 706         if (boot_conn == B_FALSE) {
 707                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 708                     TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
 709                     CRED());
 710                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 711                     TCP_ABORT_THRESHOLD,
 712                     (char *)&abort, sizeof (int), CRED());
 713         }
 714 }
 715 
 716 static void
 717 idm_set_postconnect_options(ksocket_t ks)
 718 {
 719         const int       on = 1;
 720 
 721         /* Set connect options */
 722         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
 723             (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
 724         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
 725             (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
 726         (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
 727             (char *)&on, sizeof (on), CRED());
 728 }
 729 
 730 static uint32_t
 731 n2h24(const uchar_t *ptr)
 732 {
 733         return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
 734 }
 735 
 736 static boolean_t
 737 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu)
 738 {
 739         iscsi_hdr_t     *bhs;
 740 
 741         if (ic->ic_conn_type == CONN_TYPE_TGT &&
 742             pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
 743                 IDM_CONN_LOG(CE_WARN,
 744                     "idm_dataseglenokay: exceeded the max data segment length");
 745                 return (B_FALSE);
 746         }
 747 
 748         bhs = pdu->isp_hdr;
 749         /*
 750          * Filter out any RFC3720 data-size violations.
 751          */
 752         switch (IDM_PDU_OPCODE(pdu)) {
 753         case ISCSI_OP_SCSI_TASK_MGT_MSG:
 754         case ISCSI_OP_SCSI_TASK_MGT_RSP:
 755         case ISCSI_OP_RTT_RSP:
 756         case ISCSI_OP_LOGOUT_CMD:
 757                 /*
 758                  * Data-segment not allowed and additional headers not allowed.
 759                  * (both must be zero according to the RFC3720.)
 760                  */
 761                 if (bhs->hlength != 0 || pdu->isp_datalen != 0)
 762                         return (B_FALSE);
 763                 break;
 764         case ISCSI_OP_NOOP_OUT:
 765         case ISCSI_OP_LOGIN_CMD:
 766         case ISCSI_OP_TEXT_CMD:
 767         case ISCSI_OP_SNACK_CMD:
 768         case ISCSI_OP_NOOP_IN:
 769         case ISCSI_OP_SCSI_RSP:
 770         case ISCSI_OP_LOGIN_RSP:
 771         case ISCSI_OP_TEXT_RSP:
 772         case ISCSI_OP_SCSI_DATA_RSP:
 773         case ISCSI_OP_LOGOUT_RSP:
 774         case ISCSI_OP_ASYNC_EVENT:
 775         case ISCSI_OP_REJECT_MSG:
 776                 /*
 777                  * Additional headers not allowed.
 778                  * (must be zero according to RFC3720.)
 779                  */
 780                 if (bhs->hlength)
 781                         return (B_FALSE);
 782                 break;
 783         case ISCSI_OP_SCSI_CMD:
 784                 /*
 785                  * See RFC3720, section 10.3
 786                  *
 787                  * For pure read cmds, data-segment-length must be zero.
 788                  * For non-final transfers, data-size must be even number of
 789                  * 4-byte words.
 790                  * For any transfer, an expected byte count must be provided.
 791                  * For bidirectional transfers, an additional-header must be
 792                  * provided (for the read byte-count.)
 793                  */
 794                 if (pdu->isp_datalen) {
 795                         if ((bhs->flags & (ISCSI_FLAG_CMD_READ |
 796                             ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ)
 797                                 return (B_FALSE);
 798                         if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
 799                             ((pdu->isp_datalen & 0x3) != 0))
 800                                 return (B_FALSE);
 801                 }
 802                 if (bhs->flags & (ISCSI_FLAG_CMD_READ |
 803                     ISCSI_FLAG_CMD_WRITE)) {
 804                         iscsi_scsi_cmd_hdr_t *cmdhdr =
 805                             (iscsi_scsi_cmd_hdr_t *)bhs;
 806                         /*
 807                          * we're transfering some data, we must have a
 808                          * byte count
 809                          */
 810                         if (cmdhdr->data_length == 0)
 811                                 return (B_FALSE);
 812                 }
 813                 break;
 814         case ISCSI_OP_SCSI_DATA:
 815                 /*
 816                  * See RFC3720, section 10.7
 817                  *
 818                  * Additional headers aren't allowed, and the data-size must
 819                  * be an even number of 4-byte words (unless the final bit
 820                  * is set.)
 821                  */
 822                 if (bhs->hlength)
 823                         return (B_FALSE);
 824                 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
 825                     ((pdu->isp_datalen & 0x3) != 0))
 826                         return (B_FALSE);
 827                 break;
 828         default:
 829                 break;
 830         }
 831         return (B_TRUE);
 832 }
 833 
 834 static idm_status_t
 835 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
 836 {
 837         iscsi_hdr_t     *bhs;
 838         uint32_t        hdr_digest_crc;
 839         uint32_t        crc_calculated;
 840         void            *new_hdr;
 841         int             ahslen = 0;
 842         int             total_len = 0;
 843         int             iovlen = 0;
 844         struct iovec    iov[2];
 845         idm_so_conn_t   *so_conn;
 846         int             rc;
 847 
 848         so_conn = ic->ic_transport_private;
 849 
 850         /*
 851          * Read BHS
 852          */
 853         bhs = pdu->isp_hdr;
 854         rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
 855         if (rc != IDM_STATUS_SUCCESS) {
 856                 return (IDM_STATUS_FAIL);
 857         }
 858 
 859         /*
 860          * Check actual AHS length against the amount available in the buffer
 861          */
 862         pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 863             (bhs->hlength * sizeof (uint32_t));
 864         pdu->isp_datalen = n2h24(bhs->dlength);
 865 
 866         if (!idm_dataseglenokay(ic, pdu)) {
 867                 IDM_CONN_LOG(CE_WARN,
 868                     "idm_sorecvhdr: invalid data segment length");
 869                 return (IDM_STATUS_FAIL);
 870         }
 871         if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
 872                 /* Allocate a new header segment and change the callback */
 873                 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 874                 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
 875                 pdu->isp_hdr = new_hdr;
 876                 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
 877 
 878                 /*
 879                  * This callback will restore the expected values after
 880                  * the RX PDU has been processed.
 881                  */
 882                 pdu->isp_callback = idm_sorx_addl_pdu_cb;
 883         }
 884 
 885         /*
 886          * Setup receipt of additional header and header digest (if enabled).
 887          */
 888         if (bhs->hlength > 0) {
 889                 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
 890                 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
 891                 iov[iovlen].iov_len = ahslen;
 892                 total_len += iov[iovlen].iov_len;
 893                 iovlen++;
 894         }
 895 
 896         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 897                 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
 898                 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
 899                 total_len += iov[iovlen].iov_len;
 900                 iovlen++;
 901         }
 902 
 903         if ((iovlen != 0) &&
 904             (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
 905             total_len) != 0)) {
 906                 return (IDM_STATUS_FAIL);
 907         }
 908 
 909         /*
 910          * Validate header digest if enabled
 911          */
 912         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 913                 crc_calculated = idm_crc32c(pdu->isp_hdr,
 914                     sizeof (iscsi_hdr_t) + ahslen);
 915                 if (crc_calculated != hdr_digest_crc) {
 916                         /* Invalid Header Digest */
 917                         return (IDM_STATUS_HEADER_DIGEST);
 918                 }
 919         }
 920 
 921         return (0);
 922 }
 923 
 924 /*
 925  * idm_so_ini_conn_create()
 926  * Allocate the sockets transport connection resources.
 927  */
 928 static idm_status_t
 929 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
 930 {
 931         ksocket_t       so;
 932         idm_so_conn_t   *so_conn;
 933         idm_status_t    idmrc;
 934 
 935         so = idm_socreate(cr->cr_domain, cr->cr_type,
 936             cr->cr_protocol);
 937         if (so == NULL) {
 938                 return (IDM_STATUS_FAIL);
 939         }
 940 
 941         /* Bind the socket if configured to do so */
 942         if (cr->cr_bound) {
 943                 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
 944                     SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
 945                         idm_sodestroy(so);
 946                         return (IDM_STATUS_FAIL);
 947                 }
 948         }
 949 
 950         idmrc = idm_so_conn_create_common(ic, so);
 951         if (idmrc != IDM_STATUS_SUCCESS) {
 952                 idm_soshutdown(so);
 953                 idm_sodestroy(so);
 954                 return (IDM_STATUS_FAIL);
 955         }
 956 
 957         so_conn = ic->ic_transport_private;
 958         /* Set up socket options */
 959         idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
 960 
 961         return (IDM_STATUS_SUCCESS);
 962 }
 963 
 964 /*
 965  * idm_so_ini_conn_destroy()
 966  * Tear down the sockets transport connection resources.
 967  */
 968 static void
 969 idm_so_ini_conn_destroy(idm_conn_t *ic)
 970 {
 971         idm_so_conn_destroy_common(ic);
 972 }
 973 
 974 /*
 975  * idm_so_ini_conn_connect()
 976  * Establish the connection referred to by the handle previously allocated via
 977  * idm_so_ini_conn_create().
 978  */
 979 static idm_status_t
 980 idm_so_ini_conn_connect(idm_conn_t *ic)
 981 {
 982         idm_so_conn_t   *so_conn;
 983         struct sonode   *node = NULL;
 984         int             rc;
 985         clock_t         lbolt, conn_login_max, conn_login_interval;
 986         boolean_t       nonblock;
 987 
 988         so_conn = ic->ic_transport_private;
 989         nonblock = ic->ic_conn_params.nonblock_socket;
 990         conn_login_max = ic->ic_conn_params.conn_login_max;
 991         conn_login_interval = ddi_get_lbolt() +
 992             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 993 
 994         if (nonblock == B_TRUE) {
 995                 node = ((struct sonode *)(so_conn->ic_so));
 996                 /* Set to none block socket mode */
 997                 idm_so_socket_set_nonblock(node);
 998                 do {
 999                         rc = ksocket_connect(so_conn->ic_so,
1000                             &ic->ic_ini_dst_addr.sin,
1001                             (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
1002                             CRED());
1003                         if (rc == 0 || rc == EISCONN) {
1004                                 /* socket success or already success */
1005                                 rc = IDM_STATUS_SUCCESS;
1006                                 break;
1007                         }
1008                         if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
1009                             (rc == ECONNRESET)) {
1010                                 /* socket connection timeout or refuse */
1011                                 break;
1012                         }
1013                         lbolt = ddi_get_lbolt();
1014                         if (lbolt > conn_login_max) {
1015                                 /*
1016                                  * Connection retry timeout,
1017                                  * failed connect to target.
1018                                  */
1019                                 break;
1020                         }
1021                         if (lbolt < conn_login_interval) {
1022                                 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
1023                                         /* TCP connect still in progress */
1024                                         delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
1025                                         continue;
1026                                 } else {
1027                                         delay(conn_login_interval - lbolt);
1028                                 }
1029                         }
1030                         conn_login_interval = ddi_get_lbolt() +
1031                             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
1032                 } while (rc != 0);
1033                 /* resume to nonblock mode */
1034                 if (rc == IDM_STATUS_SUCCESS) {
1035                         idm_so_socket_set_block(node);
1036                 }
1037         } else {
1038                 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
1039                     (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
1040         }
1041 
1042         if (rc != 0) {
1043                 idm_soshutdown(so_conn->ic_so);
1044                 return (IDM_STATUS_FAIL);
1045         }
1046 
1047         idm_so_conn_connect_common(ic);
1048 
1049         idm_set_postconnect_options(so_conn->ic_so);
1050 
1051         return (IDM_STATUS_SUCCESS);
1052 }
1053 
1054 idm_status_t
1055 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
1056 {
1057         idm_status_t    idmrc;
1058 
1059         idm_set_postconnect_options(new_so);
1060         idmrc = idm_so_conn_create_common(ic, new_so);
1061 
1062         return (idmrc);
1063 }
1064 
1065 static void
1066 idm_so_tgt_conn_destroy(idm_conn_t *ic)
1067 {
1068         idm_so_conn_destroy_common(ic);
1069 }
1070 
1071 /*
1072  * idm_so_tgt_conn_connect()
1073  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1074  * is invoked from the SM as a result of an inbound connection request.
1075  */
1076 static idm_status_t
1077 idm_so_tgt_conn_connect(idm_conn_t *ic)
1078 {
1079         idm_so_conn_connect_common(ic);
1080 
1081         return (IDM_STATUS_SUCCESS);
1082 }
1083 
1084 static idm_status_t
1085 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
1086 {
1087         idm_so_conn_t   *so_conn;
1088 
1089         so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1090         so_conn->ic_so = new_so;
1091 
1092         ic->ic_transport_private = so_conn;
1093         ic->ic_transport_hdrlen = 0;
1094 
1095         /* Set the scoreboarding flag on this connection */
1096         ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1097         ic->ic_conn_params.max_recv_dataseglen =
1098             ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1099         ic->ic_conn_params.max_xmit_dataseglen =
1100             ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1101 
1102         /*
1103          * Initialize tx thread mutex and list
1104          */
1105         mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1106         cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1107         list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1108             offsetof(idm_pdu_t, idm_tx_link));
1109 
1110         return (IDM_STATUS_SUCCESS);
1111 }
1112 
1113 static void
1114 idm_so_conn_destroy_common(idm_conn_t *ic)
1115 {
1116         idm_so_conn_t   *so_conn = ic->ic_transport_private;
1117 
1118         ic->ic_transport_private = NULL;
1119         idm_sodestroy(so_conn->ic_so);
1120         list_destroy(&so_conn->ic_tx_list);
1121         mutex_destroy(&so_conn->ic_tx_mutex);
1122         cv_destroy(&so_conn->ic_tx_cv);
1123 
1124         kmem_free(so_conn, sizeof (idm_so_conn_t));
1125 }
1126 
1127 static void
1128 idm_so_conn_connect_common(idm_conn_t *ic)
1129 {
1130         idm_so_conn_t   *so_conn;
1131         struct sockaddr_in6     t_addr;
1132         socklen_t       t_addrlen = 0;
1133 
1134         so_conn = ic->ic_transport_private;
1135         bzero(&t_addr, sizeof (struct sockaddr_in6));
1136         t_addrlen = sizeof (struct sockaddr_in6);
1137 
1138         /* Set the local and remote addresses in the idm conn handle */
1139         (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1140             &t_addrlen, CRED());
1141         bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1142         (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1143             &t_addrlen, CRED());
1144         bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1145 
1146         mutex_enter(&ic->ic_mutex);
1147         so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1148             &p0, TS_RUN, minclsyspri);
1149         so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1150             &p0, TS_RUN, minclsyspri);
1151 
1152         while (so_conn->ic_rx_thread_did == 0 ||
1153             so_conn->ic_tx_thread_did == 0)
1154                 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1155         mutex_exit(&ic->ic_mutex);
1156 }
1157 
1158 /*
1159  * idm_so_conn_disconnect()
1160  * Shutdown the socket connection and stop the thread
1161  */
1162 static void
1163 idm_so_conn_disconnect(idm_conn_t *ic)
1164 {
1165         idm_so_conn_t   *so_conn;
1166 
1167         so_conn = ic->ic_transport_private;
1168 
1169         mutex_enter(&ic->ic_mutex);
1170         so_conn->ic_rx_thread_running = B_FALSE;
1171         so_conn->ic_tx_thread_running = B_FALSE;
1172         /* We need to wakeup the TX thread */
1173         mutex_enter(&so_conn->ic_tx_mutex);
1174         cv_signal(&so_conn->ic_tx_cv);
1175         mutex_exit(&so_conn->ic_tx_mutex);
1176         mutex_exit(&ic->ic_mutex);
1177 
1178         /* This should wakeup the RX thread if it is sleeping */
1179         idm_soshutdown(so_conn->ic_so);
1180 
1181         thread_join(so_conn->ic_tx_thread_did);
1182         thread_join(so_conn->ic_rx_thread_did);
1183 }
1184 
1185 /*
1186  * idm_so_tgt_svc_create()
1187  * Establish a service on an IP address and port.  idm_svc_req_t contains
1188  * the service parameters.
1189  */
1190 /*ARGSUSED*/
1191 static idm_status_t
1192 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1193 {
1194         idm_so_svc_t            *so_svc;
1195 
1196         so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1197 
1198         /* Set the new sockets service in svc handle */
1199         is->is_so_svc = (void *)so_svc;
1200 
1201         return (IDM_STATUS_SUCCESS);
1202 }
1203 
1204 /*
1205  * idm_so_tgt_svc_destroy()
1206  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1207  */
1208 static void
1209 idm_so_tgt_svc_destroy(idm_svc_t *is)
1210 {
1211         /* the socket will have been torn down; free the service */
1212         kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1213 }
1214 
1215 /*
1216  * idm_so_tgt_svc_online()
1217  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1218  */
1219 
1220 static idm_status_t
1221 idm_so_tgt_svc_online(idm_svc_t *is)
1222 {
1223         idm_so_svc_t            *so_svc;
1224         idm_svc_req_t           *sr = &is->is_svc_req;
1225         struct sockaddr_in6     sin6_ip;
1226         const uint32_t          on = 1;
1227         const uint32_t          off = 0;
1228 
1229         mutex_enter(&is->is_mutex);
1230         so_svc = (idm_so_svc_t *)is->is_so_svc;
1231 
1232         /*
1233          * Try creating an IPv6 socket first
1234          */
1235         if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1236                 mutex_exit(&is->is_mutex);
1237                 return (IDM_STATUS_FAIL);
1238         } else {
1239                 bzero(&sin6_ip, sizeof (sin6_ip));
1240                 sin6_ip.sin6_family = AF_INET6;
1241                 sin6_ip.sin6_port = htons(sr->sr_port);
1242                 sin6_ip.sin6_addr = in6addr_any;
1243 
1244                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1245                     SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1246                 /*
1247                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1248                  */
1249                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1250                     SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1251 
1252                 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1253                     sizeof (sin6_ip), CRED()) != 0) {
1254                         mutex_exit(&is->is_mutex);
1255                         idm_sodestroy(so_svc->is_so);
1256                         return (IDM_STATUS_FAIL);
1257                 }
1258         }
1259 
1260         idm_set_postconnect_options(so_svc->is_so);
1261 
1262         if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1263                 mutex_exit(&is->is_mutex);
1264                 idm_soshutdown(so_svc->is_so);
1265                 idm_sodestroy(so_svc->is_so);
1266                 return (IDM_STATUS_FAIL);
1267         }
1268 
1269         /* Launch a watch thread */
1270         so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1271             is, 0, &p0, TS_RUN, minclsyspri);
1272 
1273         if (so_svc->is_thread == NULL) {
1274                 /* Failure to launch; teardown the socket */
1275                 mutex_exit(&is->is_mutex);
1276                 idm_soshutdown(so_svc->is_so);
1277                 idm_sodestroy(so_svc->is_so);
1278                 return (IDM_STATUS_FAIL);
1279         }
1280         ksocket_hold(so_svc->is_so);
1281         /* Wait for the port watcher thread to start */
1282         while (!so_svc->is_thread_running)
1283                 cv_wait(&is->is_cv, &is->is_mutex);
1284         mutex_exit(&is->is_mutex);
1285 
1286         return (IDM_STATUS_SUCCESS);
1287 }
1288 
1289 /*
1290  * idm_so_tgt_svc_offline
1291  *
1292  * Stop listening on the IP address and port identified by idm_svc_t.
1293  */
1294 static void
1295 idm_so_tgt_svc_offline(idm_svc_t *is)
1296 {
1297         idm_so_svc_t            *so_svc;
1298         mutex_enter(&is->is_mutex);
1299         so_svc = (idm_so_svc_t *)is->is_so_svc;
1300         so_svc->is_thread_running = B_FALSE;
1301         mutex_exit(&is->is_mutex);
1302 
1303         /*
1304          * Teardown socket
1305          */
1306         idm_sodestroy(so_svc->is_so);
1307 
1308         /*
1309          * Now we expect the port watcher thread to terminate
1310          */
1311         thread_join(so_svc->is_thread_did);
1312 }
1313 
1314 /*
1315  * Watch thread for target service connection establishment.
1316  */
1317 void
1318 idm_so_svc_port_watcher(void *arg)
1319 {
1320         idm_svc_t               *svc = arg;
1321         ksocket_t               new_so;
1322         idm_conn_t              *ic;
1323         idm_status_t            idmrc;
1324         idm_so_svc_t            *so_svc;
1325         int                     rc;
1326         const uint32_t          off = 0;
1327         struct sockaddr_in6     t_addr;
1328         socklen_t               t_addrlen;
1329 
1330         bzero(&t_addr, sizeof (struct sockaddr_in6));
1331         t_addrlen = sizeof (struct sockaddr_in6);
1332         mutex_enter(&svc->is_mutex);
1333 
1334         so_svc = svc->is_so_svc;
1335         so_svc->is_thread_running = B_TRUE;
1336         so_svc->is_thread_did = so_svc->is_thread->t_did;
1337 
1338         cv_signal(&svc->is_cv);
1339 
1340         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1341             svc->is_svc_req.sr_port);
1342 
1343         while (so_svc->is_thread_running) {
1344                 mutex_exit(&svc->is_mutex);
1345 
1346                 if ((rc = ksocket_accept(so_svc->is_so,
1347                     (struct sockaddr *)&t_addr, &t_addrlen,
1348                     &new_so, CRED())) != 0) {
1349                         mutex_enter(&svc->is_mutex);
1350                         if (rc != ECONNABORTED && rc != EINTR) {
1351                                 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1352                                     " ksocket_accept failed %d", rc);
1353                         }
1354                         /*
1355                          * Unclean shutdown of this thread is not handled
1356                          * wait for !is_thread_running.
1357                          */
1358                         continue;
1359                 }
1360                 /*
1361                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1362                  */
1363                 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1364                     (char *)&off, sizeof (off), CRED());
1365 
1366                 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1367                     &ic);
1368                 if (idmrc != IDM_STATUS_SUCCESS) {
1369                         /* Drop connection */
1370                         idm_soshutdown(new_so);
1371                         idm_sodestroy(new_so);
1372                         mutex_enter(&svc->is_mutex);
1373                         continue;
1374                 }
1375 
1376                 idmrc = idm_so_tgt_conn_create(ic, new_so);
1377                 if (idmrc != IDM_STATUS_SUCCESS) {
1378                         idm_svc_conn_destroy(ic);
1379                         idm_soshutdown(new_so);
1380                         idm_sodestroy(new_so);
1381                         mutex_enter(&svc->is_mutex);
1382                         continue;
1383                 }
1384 
1385                 /*
1386                  * Kick the state machine.  At CS_S3_XPT_UP the state machine
1387                  * will notify the client (target) about the new connection.
1388                  */
1389                 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1390 
1391                 mutex_enter(&svc->is_mutex);
1392         }
1393         ksocket_rele(so_svc->is_so);
1394         so_svc->is_thread_running = B_FALSE;
1395         mutex_exit(&svc->is_mutex);
1396 
1397         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1398             svc->is_svc_req.sr_port);
1399 
1400         thread_exit();
1401 }
1402 
1403 /*
1404  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1405  * frees resources associated with the task.
1406  *
1407  * It's not clear that this should return idm_status_t.  What do we do
1408  * if it fails?
1409  */
1410 static idm_status_t
1411 idm_so_free_task_rsrc(idm_task_t *idt)
1412 {
1413         idm_buf_t       *idb, *next_idb;
1414 
1415         /*
1416          * There is nothing to cleanup on initiator connections
1417          */
1418         if (IDM_CONN_ISINI(idt->idt_ic))
1419                 return (IDM_STATUS_SUCCESS);
1420 
1421         /*
1422          * If this is a target connection, call idm_buf_rx_from_ini_done for
1423          * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1424          *
1425          * In addition, remove any buffers associated with this task from
1426          * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1427          * items don't actually get removed from that list (and completion
1428          * routines called) until idm_task_cleanup.
1429          */
1430         mutex_enter(&idt->idt_mutex);
1431 
1432         for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1433                 next_idb = list_next(&idt->idt_outbufv, idb);
1434                 if (idb->idb_in_transport) {
1435                         /*
1436                          * idm_buf_rx_from_ini_done releases idt->idt_mutex
1437                          */
1438                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1439                             uintptr_t, idb->idb_buf,
1440                             uint32_t, idb->idb_bufoffset,
1441                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1442                             uint32_t, idb->idb_xfer_len,
1443                             int, XFER_BUF_RX_FROM_INI);
1444                         idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1445                         mutex_enter(&idt->idt_mutex);
1446                 }
1447         }
1448 
1449         for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1450                 next_idb = list_next(&idt->idt_inbufv, idb);
1451                 /*
1452                  * We want to remove these items from the tx_list as well,
1453                  * but knowing it's in the idt_inbufv list is not a guarantee
1454                  * that it's in the tx_list.  If it's on the tx list then
1455                  * let idm_sotx_thread() clean it up.
1456                  */
1457                 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1458                         /*
1459                          * idm_buf_tx_to_ini_done releases idt->idt_mutex
1460                          */
1461                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1462                             uintptr_t, idb->idb_buf,
1463                             uint32_t, idb->idb_bufoffset,
1464                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1465                             uint32_t, idb->idb_xfer_len,
1466                             int, XFER_BUF_TX_TO_INI);
1467                         idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1468                         mutex_enter(&idt->idt_mutex);
1469                 }
1470         }
1471 
1472         mutex_exit(&idt->idt_mutex);
1473 
1474         return (IDM_STATUS_SUCCESS);
1475 }
1476 
1477 /*
1478  * idm_so_negotiate_key_values() validates the key values for this connection
1479  */
1480 /* ARGSUSED */
1481 static kv_status_t
1482 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1483     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1484 {
1485         /* All parameters are negotiated at the iscsit level */
1486         return (KV_HANDLED);
1487 }
1488 
1489 /*
1490  * idm_so_notice_key_values() activates the negotiated key values for
1491  * this connection.
1492  */
1493 static void
1494 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1495 {
1496         char                    *nvp_name;
1497         nvpair_t                *nvp;
1498         nvpair_t                *next_nvp;
1499         int                     nvrc;
1500         idm_status_t            idm_status;
1501         const idm_kv_xlate_t    *ikvx;
1502         uint64_t                num_val;
1503 
1504         for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1505             nvp != NULL; nvp = next_nvp) {
1506                 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1507                 nvp_name = nvpair_name(nvp);
1508 
1509                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1510                 switch (ikvx->ik_key_id) {
1511                 case KI_HEADER_DIGEST:
1512                 case KI_DATA_DIGEST:
1513                         idm_status = idm_so_handle_digest(it, nvp, ikvx);
1514                         ASSERT(idm_status == 0);
1515 
1516                         /* Remove processed item from negotiated_nvl list */
1517                         nvrc = nvlist_remove_all(
1518                             negotiated_nvl, ikvx->ik_key_name);
1519                         ASSERT(nvrc == 0);
1520                         break;
1521                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1522                         /*
1523                          * Just pass the value down to idm layer.
1524                          * No need to remove it from negotiated_nvl list here.
1525                          */
1526                         nvrc = nvpair_value_uint64(nvp, &num_val);
1527                         ASSERT(nvrc == 0);
1528                         it->ic_conn_params.max_xmit_dataseglen =
1529                             (uint32_t)num_val;
1530                         break;
1531                 default:
1532                         break;
1533                 }
1534         }
1535 }
1536 
1537 /*
1538  * idm_so_declare_key_values() declares the key values for this connection
1539  */
1540 /* ARGSUSED */
1541 static kv_status_t
1542 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1543     nvlist_t *outgoing_nvl)
1544 {
1545         char                    *nvp_name;
1546         nvpair_t                *nvp;
1547         nvpair_t                *next_nvp;
1548         kv_status_t             kvrc;
1549         int                     nvrc = 0;
1550         const idm_kv_xlate_t    *ikvx;
1551         uint64_t                num_val;
1552 
1553         for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1554             nvp != NULL && nvrc == 0; nvp = next_nvp) {
1555                 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1556                 nvp_name = nvpair_name(nvp);
1557 
1558                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1559                 switch (ikvx->ik_key_id) {
1560                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1561                         if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1562                                 break;
1563                         }
1564                         if (outgoing_nvl &&
1565                             (nvrc = nvlist_add_uint64(outgoing_nvl,
1566                             nvp_name, num_val)) != 0) {
1567                                 break;
1568                         }
1569                         it->ic_conn_params.max_recv_dataseglen =
1570                             (uint32_t)num_val;
1571                         break;
1572                 default:
1573                         break;
1574                 }
1575         }
1576         kvrc = idm_nvstat_to_kvstat(nvrc);
1577         return (kvrc);
1578 }
1579 
1580 static idm_status_t
1581 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1582     const idm_kv_xlate_t *ikvx)
1583 {
1584         int                     nvrc;
1585         char                    *digest_choice_string;
1586 
1587         nvrc = nvpair_value_string(digest_choice,
1588             &digest_choice_string);
1589         ASSERT(nvrc == 0);
1590         if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1591                 switch (ikvx->ik_key_id) {
1592                 case KI_HEADER_DIGEST:
1593                         it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1594                         break;
1595                 case KI_DATA_DIGEST:
1596                         it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1597                         break;
1598                 default:
1599                         ASSERT(0);
1600                         break;
1601                 }
1602         } else if (strcasecmp(digest_choice_string, "none") == 0) {
1603                 switch (ikvx->ik_key_id) {
1604                 case KI_HEADER_DIGEST:
1605                         it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1606                         break;
1607                 case KI_DATA_DIGEST:
1608                         it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1609                         break;
1610                 default:
1611                         ASSERT(0);
1612                         break;
1613                 }
1614         } else {
1615                 ASSERT(0);
1616         }
1617 
1618         return (IDM_STATUS_SUCCESS);
1619 }
1620 
1621 
1622 /*
1623  * idm_so_conn_is_capable() verifies that the passed connection is provided
1624  * for by the sockets interface.
1625  */
1626 /* ARGSUSED */
1627 static boolean_t
1628 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1629 {
1630         return (B_TRUE);
1631 }
1632 
1633 /*
1634  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1635  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1636  * off the socket into the appropriate buffers.
1637  */
1638 static void
1639 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1640 {
1641         iscsi_data_hdr_t        *bhs;
1642         idm_task_t              *idt;
1643         idm_buf_t               *idb;
1644         uint32_t                datasn;
1645         size_t                  offset;
1646         iscsi_hdr_t             *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1647         iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1648 
1649         ASSERT(ic != NULL);
1650         ASSERT(pdu != NULL);
1651         ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP);
1652 
1653         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1654         datasn  = ntohl(bhs->datasn);
1655         offset  = ntohl(bhs->offset);
1656 
1657         /*
1658          * Look up the task corresponding to the initiator task tag
1659          * to get the buffers affiliated with the task.
1660          */
1661         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1662         if (idt == NULL) {
1663                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1664                 idm_pdu_rx_protocol_error(ic, pdu);
1665                 return;
1666         }
1667 
1668         idb = pdu->isp_sorx_buf;
1669         if (idb == NULL) {
1670                 IDM_CONN_LOG(CE_WARN,
1671                     "idm_so_rx_datain: failed to find buffer");
1672                 idm_task_rele(idt);
1673                 idm_pdu_rx_protocol_error(ic, pdu);
1674                 return;
1675         }
1676 
1677         /*
1678          * DataSN values should be sequential and should not have any gaps or
1679          * repetitions. Check the DataSN with the one stored in the task.
1680          */
1681         if (datasn == idt->idt_exp_datasn) {
1682                 idt->idt_exp_datasn++; /* keep track of DataSN received */
1683         } else {
1684                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1685                 idm_task_rele(idt);
1686                 idm_pdu_rx_protocol_error(ic, pdu);
1687                 return;
1688         }
1689 
1690         /*
1691          * PDUs in a sequence should be in continuously increasing
1692          * address offset
1693          */
1694         if (offset != idb->idb_exp_offset) {
1695                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1696                 idm_task_rele(idt);
1697                 idm_pdu_rx_protocol_error(ic, pdu);
1698                 return;
1699         }
1700         /* Expected next relative buffer offset */
1701         idb->idb_exp_offset += n2h24(bhs->dlength);
1702         idt->idt_rx_bytes += n2h24(bhs->dlength);
1703 
1704         idm_task_rele(idt);
1705 
1706         /*
1707          * For now call scsi_rsp which will process the data rsp
1708          * Revisit, need to provide an explicit client entry point for
1709          * phase collapse completions.
1710          */
1711         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) &&
1712             (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1713                 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1714         }
1715 
1716         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717 }
1718 
1719 /*
1720  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1721  * data from the Data-Out PDU sent by the iSCSI initiator.
1722  *
1723  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1724  * task to get the buffers associated with the PDU. A PDU might span buffers.
1725  * The data is then read into the respective buffer.
1726  */
1727 static void
1728 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1729 {
1730 
1731         iscsi_data_hdr_t        *bhs;
1732         idm_task_t              *idt;
1733         idm_buf_t               *idb;
1734         size_t                  offset;
1735 
1736         ASSERT(ic != NULL);
1737         ASSERT(pdu != NULL);
1738         ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA);
1739 
1740         bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1741         offset = ntohl(bhs->offset);
1742 
1743         /*
1744          * Look up the task corresponding to the initiator task tag
1745          * to get the buffers affiliated with the task.
1746          */
1747         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1748         if (idt == NULL) {
1749                 IDM_CONN_LOG(CE_WARN,
1750                     "idm_so_rx_dataout: failed to find task");
1751                 idm_pdu_rx_protocol_error(ic, pdu);
1752                 return;
1753         }
1754 
1755         idb = pdu->isp_sorx_buf;
1756         if (idb == NULL) {
1757                 IDM_CONN_LOG(CE_WARN,
1758                     "idm_so_rx_dataout: failed to find buffer");
1759                 idm_task_rele(idt);
1760                 idm_pdu_rx_protocol_error(ic, pdu);
1761                 return;
1762         }
1763 
1764         /* Keep track of data transferred - check data offsets */
1765         if (offset != idb->idb_exp_offset) {
1766                 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1767                     "%ld, %d", offset, idb->idb_exp_offset);
1768                 idm_task_rele(idt);
1769                 idm_pdu_rx_protocol_error(ic, pdu);
1770                 return;
1771         }
1772         /* Expected next relative offset */
1773         idb->idb_exp_offset += ntoh24(bhs->dlength);
1774         idt->idt_rx_bytes += n2h24(bhs->dlength);
1775 
1776         /*
1777          * Call the buffer callback when the transfer is complete
1778          *
1779          * The connection state machine should only abort tasks after
1780          * shutting down the connection so we are assured that there
1781          * won't be a simultaneous attempt to abort this task at the
1782          * same time as we are processing this PDU (due to a connection
1783          * state change).
1784          */
1785         if (bhs->flags & ISCSI_FLAG_FINAL) {
1786                 /*
1787                  * We have gotten the last data-message for the current
1788                  * transfer.  idb_xfer_len represents the data that the
1789                  * command intended to transfer, it does not represent the
1790                  * actual number of bytes transferred. If we have not
1791                  * transferred the expected number of bytes something is
1792                  * wrong.
1793                  *
1794                  * We have two options, when there is a mismatch, we can
1795                  * regard the transfer as invalid -- or we can modify our
1796                  * notion of "xfer_len." In order to be as stringent as
1797                  * possible, here we regard this transfer as in error; and
1798                  * bail out.
1799                  */
1800                 if (idb->idb_buflen == idb->idb_xfer_len &&
1801                     idb->idb_buflen !=
1802                     (idb->idb_exp_offset - idb->idb_bufoffset)) {
1803                         printf("idm_so_rx_dataout: incomplete transfer, "
1804                             "protocol err");
1805                         IDM_CONN_LOG(CE_NOTE,
1806                             "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1807                             offset, (int)(idb->idb_exp_offset - offset));
1808                         idm_task_rele(idt);
1809                         idm_pdu_rx_protocol_error(ic, pdu);
1810                         return;
1811                 }
1812                 /*
1813                  * We only want to call idm_buf_rx_from_ini_done once
1814                  * per transfer.  It's possible that this task has
1815                  * already been aborted in which case
1816                  * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1817                  * for each buffer with idb_in_transport==B_TRUE.  To
1818                  * close this window and ensure that this doesn't happen,
1819                  * we'll clear idb->idb_in_transport now while holding
1820                  * the task mutex.   This is only really an issue for
1821                  * SCSI task abort -- if tasks were being aborted because
1822                  * of a connection state change the state machine would
1823                  * have already stopped the receive thread.
1824                  */
1825                 mutex_enter(&idt->idt_mutex);
1826 
1827                 /*
1828                  * Release the task hold here (obtained in idm_task_find)
1829                  * because the task may complete synchronously during
1830                  * idm_buf_rx_from_ini_done.  Since we still have an active
1831                  * buffer we know there is at least one additional hold on idt.
1832                  */
1833                 idm_task_rele(idt);
1834 
1835                 /*
1836                  * idm_buf_rx_from_ini_done releases idt->idt_mutex
1837                  */
1838                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1839                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1840                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
1841                     uint32_t, idb->idb_xfer_len,
1842                     int, XFER_BUF_RX_FROM_INI);
1843                 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1844                 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1845                 return;
1846         }
1847 
1848         idm_task_rele(idt);
1849         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1850 }
1851 
1852 /*
1853  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1854  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1855  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1856  * and looks up the task in the task tree using the itt to get the output
1857  * buffers associated the task. The R2T PDU contains the offset of the
1858  * requested data and the data length. This function then constructs a
1859  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1860  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1861  */
1862 
1863 static void
1864 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1865 {
1866         idm_task_t              *idt;
1867         idm_buf_t               *idb;
1868         iscsi_rtt_hdr_t         *rtt_hdr;
1869         uint32_t                data_offset;
1870         uint32_t                data_length;
1871 
1872         ASSERT(ic != NULL);
1873         ASSERT(pdu != NULL);
1874 
1875         rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1876         data_offset = ntohl(rtt_hdr->data_offset);
1877         data_length = ntohl(rtt_hdr->data_length);
1878         idt     = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1879 
1880         if (idt == NULL) {
1881                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1882                 idm_pdu_rx_protocol_error(ic, pdu);
1883                 return;
1884         }
1885 
1886         /* Find the buffer bound to the task by the iSCSI initiator */
1887         mutex_enter(&idt->idt_mutex);
1888         idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1889         if (idb == NULL) {
1890                 mutex_exit(&idt->idt_mutex);
1891                 idm_task_rele(idt);
1892                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1893                 idm_pdu_rx_protocol_error(ic, pdu);
1894                 return;
1895         }
1896 
1897         /* return buffer contains this data */
1898         if (data_offset + data_length > idb->idb_buflen) {
1899                 /* Overflow */
1900                 mutex_exit(&idt->idt_mutex);
1901                 idm_task_rele(idt);
1902                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1903                     "buffer");
1904                 idm_pdu_rx_protocol_error(ic, pdu);
1905                 return;
1906         }
1907 
1908         idt->idt_r2t_ttt = rtt_hdr->ttt;
1909         idt->idt_exp_datasn = 0;
1910 
1911         idm_so_send_rtt_data(ic, idt, idb, data_offset,
1912             ntohl(rtt_hdr->data_length));
1913         /*
1914          * the idt_mutex is released in idm_so_send_rtt_data
1915          */
1916 
1917         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1918         idm_task_rele(idt);
1919 
1920 }
1921 
1922 idm_status_t
1923 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1924 {
1925         uint8_t         pad[ISCSI_PAD_WORD_LEN];
1926         int             pad_len;
1927         uint32_t        data_digest_crc;
1928         uint32_t        crc_calculated;
1929         int             total_len;
1930         idm_so_conn_t   *so_conn;
1931 
1932         so_conn = ic->ic_transport_private;
1933 
1934         pad_len = ((ISCSI_PAD_WORD_LEN -
1935             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1936             (ISCSI_PAD_WORD_LEN - 1));
1937 
1938         ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1939 
1940         total_len = pdu->isp_datalen;
1941 
1942         if (pad_len) {
1943                 pdu->isp_iov[pdu->isp_iovlen].iov_base    = (char *)&pad;
1944                 pdu->isp_iov[pdu->isp_iovlen].iov_len     = pad_len;
1945                 total_len               += pad_len;
1946                 pdu->isp_iovlen++;
1947         }
1948 
1949         /* setup data digest */
1950         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1951                 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1952                     (char *)&data_digest_crc;
1953                 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1954                     sizeof (data_digest_crc);
1955                 total_len               += sizeof (data_digest_crc);
1956                 pdu->isp_iovlen++;
1957         }
1958 
1959         pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1960 
1961         if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1962             pdu->isp_iovlen, total_len) != 0) {
1963                 return (IDM_STATUS_IO);
1964         }
1965 
1966         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1967                 crc_calculated = idm_crc32c(pdu->isp_data,
1968                     pdu->isp_datalen);
1969                 if (pad_len) {
1970                         crc_calculated = idm_crc32c_continued((char *)&pad,
1971                             pad_len, crc_calculated);
1972                 }
1973                 if (crc_calculated != data_digest_crc) {
1974                         IDM_CONN_LOG(CE_WARN,
1975                             "idm_sorecvdata: "
1976                             "CRC error: actual 0x%x, calc 0x%x",
1977                             data_digest_crc, crc_calculated);
1978 
1979                         /* Invalid Data Digest */
1980                         return (IDM_STATUS_DATA_DIGEST);
1981                 }
1982         }
1983 
1984         return (IDM_STATUS_SUCCESS);
1985 }
1986 
1987 /*
1988  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1989  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1990  * calling this function.
1991  */
1992 idm_status_t
1993 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1994 {
1995         iscsi_data_hdr_t        *bhs;
1996         idm_task_t              *task;
1997         uint32_t                offset;
1998         uint8_t                 opcode;
1999         uint32_t                dlength;
2000         list_t                  *buflst;
2001         uint32_t                xfer_bytes;
2002         idm_status_t            status;
2003 
2004         ASSERT(ic != NULL);
2005         ASSERT(pdu != NULL);
2006 
2007         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
2008 
2009         offset  = ntohl(bhs->offset);
2010         opcode  = IDM_PDU_OPCODE(pdu);
2011         dlength = n2h24(bhs->dlength);
2012 
2013         ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
2014             (opcode == ISCSI_OP_SCSI_DATA));
2015 
2016         /*
2017          * Successful lookup implicitly gets a "hold" on the task.  This
2018          * hold must be released before leaving this function.  At one
2019          * point we were caching this task context and retaining the hold
2020          * but it turned out to be very difficult to release the hold properly.
2021          * The task can be aborted and the connection shutdown between this
2022          * call and the subsequent expected call to idm_so_rx_datain/
2023          * idm_so_rx_dataout (in which case those functions are not called).
2024          * Releasing the hold in the PDU callback doesn't work well either
2025          * because the whole task may be completed by then at which point
2026          * it is too late to release the hold -- for better or worse this
2027          * code doesn't wait on the refcnts during normal operation.
2028          * idm_task_find() is very fast and it is not a huge burden if we
2029          * have to do it twice.
2030          */
2031         task = idm_task_find(ic, bhs->itt, bhs->ttt);
2032         if (task == NULL) {
2033                 IDM_CONN_LOG(CE_WARN,
2034                     "idm_sorecv_scsidata: could not find task");
2035                 return (IDM_STATUS_FAIL);
2036         }
2037 
2038         mutex_enter(&task->idt_mutex);
2039         buflst  = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
2040             &task->idt_inbufv : &task->idt_outbufv;
2041         pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
2042         mutex_exit(&task->idt_mutex);
2043 
2044         if (pdu->isp_sorx_buf == NULL) {
2045                 idm_task_rele(task);
2046                 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
2047                     "buffer for offset %x opcode=%x",
2048                     offset, opcode);
2049                 return (IDM_STATUS_FAIL);
2050         }
2051 
2052         xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
2053         ASSERT(xfer_bytes != 0);
2054         if (xfer_bytes != dlength) {
2055                 idm_task_rele(task);
2056                 /*
2057                  * Buffer overflow, connection error.  The PDU data is still
2058                  * sitting in the socket so we can't use the connection
2059                  * again until that data is drained.
2060                  */
2061                 return (IDM_STATUS_FAIL);
2062         }
2063 
2064         status = idm_sorecvdata(ic, pdu);
2065 
2066         idm_task_rele(task);
2067 
2068         return (status);
2069 }
2070 
2071 static uint32_t
2072 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
2073 {
2074         uint32_t        buf_ro = ro - idb->idb_bufoffset;
2075         uint32_t        xfer_len = min(dlength, idb->idb_buflen - buf_ro);
2076 
2077         ASSERT(ro >= idb->idb_bufoffset);
2078 
2079         pdu->isp_iov[pdu->isp_iovlen].iov_base    =
2080             (caddr_t)idb->idb_buf + buf_ro;
2081         pdu->isp_iov[pdu->isp_iovlen].iov_len     = xfer_len;
2082         pdu->isp_iovlen++;
2083 
2084         return (xfer_len);
2085 }
2086 
2087 int
2088 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2089 {
2090         pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
2091         ASSERT(pdu->isp_data != NULL);
2092 
2093         pdu->isp_databuflen = pdu->isp_datalen;
2094         pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
2095         pdu->isp_iov[0].iov_len = pdu->isp_datalen;
2096         pdu->isp_iovlen = 1;
2097         /*
2098          * Since we are associating a new data buffer with this received
2099          * PDU we need to set a specific callback to free the data
2100          * after the PDU is processed.
2101          */
2102         pdu->isp_flags |= IDM_PDU_ADDL_DATA;
2103         pdu->isp_callback = idm_sorx_addl_pdu_cb;
2104 
2105         return (idm_sorecvdata(ic, pdu));
2106 }
2107 
2108 void
2109 idm_sorx_thread(void *arg)
2110 {
2111         boolean_t       conn_failure = B_FALSE;
2112         idm_conn_t      *ic = (idm_conn_t *)arg;
2113         idm_so_conn_t   *so_conn;
2114         idm_pdu_t       *pdu;
2115         idm_status_t    rc;
2116 
2117         idm_conn_hold(ic);
2118 
2119         mutex_enter(&ic->ic_mutex);
2120 
2121         so_conn = ic->ic_transport_private;
2122         so_conn->ic_rx_thread_running = B_TRUE;
2123         so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2124         cv_signal(&ic->ic_cv);
2125 
2126         while (so_conn->ic_rx_thread_running) {
2127                 mutex_exit(&ic->ic_mutex);
2128 
2129                 /*
2130                  * Get PDU with default header size (large enough for
2131                  * BHS plus any anticipated AHS).  PDU from
2132                  * the cache will have all values set correctly
2133                  * for sockets RX including callback.
2134                  */
2135                 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2136                 pdu->isp_ic = ic;
2137                 pdu->isp_flags = 0;
2138                 pdu->isp_transport_hdrlen = 0;
2139 
2140                 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2141                         /*
2142                          * Call idm_pdu_complete so that we call the callback
2143                          * and ensure any memory allocated in idm_sorecvhdr
2144                          * gets freed up.
2145                          */
2146                         idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2147 
2148                         /*
2149                          * If ic_rx_thread_running is still set then
2150                          * this is some kind of connection problem
2151                          * on the socket.  In this case we want to
2152                          * generate an event.  Otherwise some other
2153                          * thread closed the socket due to another
2154                          * issue in which case we don't need to
2155                          * generate an event.
2156                          */
2157                         mutex_enter(&ic->ic_mutex);
2158                         if (so_conn->ic_rx_thread_running) {
2159                                 conn_failure = B_TRUE;
2160                                 so_conn->ic_rx_thread_running = B_FALSE;
2161                         }
2162 
2163                         continue;
2164                 }
2165 
2166                 /*
2167                  * Header has been read and validated.  Now we need
2168                  * to read the PDU data payload (if present).  SCSI data
2169                  * need to be transferred from the socket directly into
2170                  * the associated transfer buffer for the SCSI task.
2171                  */
2172                 if (pdu->isp_datalen != 0) {
2173                         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2174                             (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2175                                 rc = idm_sorecv_scsidata(ic, pdu);
2176                                 /*
2177                                  * All SCSI errors are fatal to the
2178                                  * connection right now since we have no
2179                                  * place to put the data.  What we need
2180                                  * is some kind of sink to dispose of unwanted
2181                                  * SCSI data.  For example an invalid task tag
2182                                  * should not kill the connection (although
2183                                  * we may want to drop the connection).
2184                                  */
2185                         } else {
2186                                 /*
2187                                  * Not data PDUs so allocate a buffer for the
2188                                  * data segment and read the remaining data.
2189                                  */
2190                                 rc = idm_sorecv_nonscsidata(ic, pdu);
2191                         }
2192                         if (rc != 0) {
2193                                 /*
2194                                  * Call idm_pdu_complete so that we call the
2195                                  * callback and ensure any memory allocated
2196                                  * in idm_sorecvhdr gets freed up.
2197                                  */
2198                                 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2199 
2200                                 /*
2201                                  * If ic_rx_thread_running is still set then
2202                                  * this is some kind of connection problem
2203                                  * on the socket.  In this case we want to
2204                                  * generate an event.  Otherwise some other
2205                                  * thread closed the socket due to another
2206                                  * issue in which case we don't need to
2207                                  * generate an event.
2208                                  */
2209                                 mutex_enter(&ic->ic_mutex);
2210                                 if (so_conn->ic_rx_thread_running) {
2211                                         conn_failure = B_TRUE;
2212                                         so_conn->ic_rx_thread_running = B_FALSE;
2213                                 }
2214                                 continue;
2215                         }
2216                 }
2217 
2218                 /*
2219                  * Process RX PDU
2220                  */
2221                 idm_pdu_rx(ic, pdu);
2222 
2223                 mutex_enter(&ic->ic_mutex);
2224         }
2225 
2226         mutex_exit(&ic->ic_mutex);
2227 
2228         /*
2229          * If we dropped out of the RX processing loop because of
2230          * a socket problem or other connection failure (including
2231          * digest errors) then we need to generate a state machine
2232          * event to shut the connection down.
2233          * If the state machine is already in, for example, INIT_ERROR, this
2234          * event will get dropped, and the TX thread will never be notified
2235          * to shut down.  To be safe, we'll just notify it here.
2236          */
2237         if (conn_failure) {
2238                 if (so_conn->ic_tx_thread_running) {
2239                         so_conn->ic_tx_thread_running = B_FALSE;
2240                         mutex_enter(&so_conn->ic_tx_mutex);
2241                         cv_signal(&so_conn->ic_tx_cv);
2242                         mutex_exit(&so_conn->ic_tx_mutex);
2243                 }
2244 
2245                 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2246         }
2247 
2248         idm_conn_rele(ic);
2249 
2250         thread_exit();
2251 }
2252 
2253 /*
2254  * idm_so_tx
2255  *
2256  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2257  * point.  By definition, it is supposed to be fast.  So, simply queue
2258  * the entry and return.  The real work is done by idm_i_so_tx() via
2259  * idm_sotx_thread().
2260  */
2261 
2262 static void
2263 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2264 {
2265         idm_so_conn_t *so_conn = ic->ic_transport_private;
2266 
2267         ASSERT(pdu->isp_ic == ic);
2268         mutex_enter(&so_conn->ic_tx_mutex);
2269 
2270         if (!so_conn->ic_tx_thread_running) {
2271                 mutex_exit(&so_conn->ic_tx_mutex);
2272                 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2273                 return;
2274         }
2275 
2276         list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2277         cv_signal(&so_conn->ic_tx_cv);
2278         mutex_exit(&so_conn->ic_tx_mutex);
2279 }
2280 
2281 static idm_status_t
2282 idm_i_so_tx(idm_pdu_t *pdu)
2283 {
2284         idm_conn_t      *ic = pdu->isp_ic;
2285         idm_status_t    status = IDM_STATUS_SUCCESS;
2286         uint8_t         pad[ISCSI_PAD_WORD_LEN];
2287         int             pad_len;
2288         uint32_t        hdr_digest_crc;
2289         uint32_t        data_digest_crc = 0;
2290         int             total_len = 0;
2291         int             iovlen = 0;
2292         struct iovec    iov[6];
2293         idm_so_conn_t   *so_conn;
2294 
2295         so_conn = ic->ic_transport_private;
2296 
2297         /* Setup BHS */
2298         iov[iovlen].iov_base    = (caddr_t)pdu->isp_hdr;
2299         iov[iovlen].iov_len     = pdu->isp_hdrlen;
2300         total_len               += iov[iovlen].iov_len;
2301         iovlen++;
2302 
2303         /* Setup header digest */
2304         if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2305             (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2306                 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2307 
2308                 iov[iovlen].iov_base    = (caddr_t)&hdr_digest_crc;
2309                 iov[iovlen].iov_len     = sizeof (hdr_digest_crc);
2310                 total_len               += iov[iovlen].iov_len;
2311                 iovlen++;
2312         }
2313 
2314         /* Setup the data */
2315         if (pdu->isp_datalen) {
2316                 idm_task_t              *idt;
2317                 idm_buf_t               *idb;
2318                 iscsi_data_hdr_t        *ihp;
2319                 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2320                 /* Write of immediate data */
2321                 if (ic->ic_ffp &&
2322                     (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD ||
2323                     IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) {
2324                         idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2325                         if (idt) {
2326                                 mutex_enter(&idt->idt_mutex);
2327                                 idb = idm_buf_find(&idt->idt_outbufv, 0);
2328                                 mutex_exit(&idt->idt_mutex);
2329                                 /*
2330                                  * If the initiator call to idm_buf_alloc
2331                                  * failed then we can get to this point
2332                                  * without a bound buffer.  The associated
2333                                  * connection failure will clean things up
2334                                  * later.  It would be nice to come up with
2335                                  * a cleaner way to handle this.  In
2336                                  * particular it seems absurd to look up
2337                                  * the task and the buffer just to update
2338                                  * this counter.
2339                                  */
2340                                 if (idb)
2341                                         idb->idb_xfer_len += pdu->isp_datalen;
2342                                 idm_task_rele(idt);
2343                         }
2344                 }
2345 
2346                 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2347                 iov[iovlen].iov_len  = pdu->isp_datalen;
2348                 total_len += iov[iovlen].iov_len;
2349                 iovlen++;
2350         }
2351 
2352         /* Setup the data pad if necessary */
2353         pad_len = ((ISCSI_PAD_WORD_LEN -
2354             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2355             (ISCSI_PAD_WORD_LEN - 1));
2356 
2357         if (pad_len) {
2358                 bzero(pad, sizeof (pad));
2359                 iov[iovlen].iov_base = (void *)&pad;
2360                 iov[iovlen].iov_len  = pad_len;
2361                 total_len               += iov[iovlen].iov_len;
2362                 iovlen++;
2363         }
2364 
2365         /*
2366          * Setup the data digest if enabled.  Data-digest is not sent
2367          * for login-phase PDUs.
2368          */
2369         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2370             ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2371             (pdu->isp_datalen || pad_len)) {
2372                 /*
2373                  * RFC3720/10.2.3: A zero-length Data Segment also
2374                  * implies a zero-length data digest.
2375                  */
2376                 if (pdu->isp_datalen) {
2377                         data_digest_crc = idm_crc32c(pdu->isp_data,
2378                             pdu->isp_datalen);
2379                 }
2380                 if (pad_len) {
2381                         data_digest_crc = idm_crc32c_continued(&pad,
2382                             pad_len, data_digest_crc);
2383                 }
2384 
2385                 iov[iovlen].iov_base    = (caddr_t)&data_digest_crc;
2386                 iov[iovlen].iov_len     = sizeof (data_digest_crc);
2387                 total_len               += iov[iovlen].iov_len;
2388                 iovlen++;
2389         }
2390 
2391         /* Transmit the PDU */
2392         if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2393             total_len) != 0) {
2394                 /* Set error status */
2395                 IDM_CONN_LOG(CE_WARN,
2396                     "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2397                     "data: %p", (void *) so_conn->ic_so, (void *) ic,
2398                     (void *) pdu->isp_data);
2399                 status = IDM_STATUS_IO;
2400         }
2401 
2402         /*
2403          * Success does not mean that the PDU actually reached the
2404          * remote node since it could get dropped along the way.
2405          */
2406         idm_pdu_complete(pdu, status);
2407 
2408         return (status);
2409 }
2410 
2411 /*
2412  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2413  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2414  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2415  * A target can invoke this function multiple times for a single read command
2416  * (identified by the same ITT) to split the input into several sequences.
2417  *
2418  * DataSN starts with 0 for the first data PDU of an input command and advances
2419  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2420  * which is set to 1 for the last data PDU of a sequence.
2421  * If the initiator supports phase collapse, the status bit must be set along
2422  * with the F bit to indicate that the status is shipped together with the last
2423  * Data-In PDU.
2424  *
2425  * The data PDUs within a sequence will be sent in order with the buffer offset
2426  * in increasing order. i.e. initiator and target must have negotiated the
2427  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2428  *
2429  * Caller holds idt->idt_mutex
2430  */
2431 static idm_status_t
2432 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2433 {
2434         idm_so_conn_t   *so_conn = idb->idb_ic->ic_transport_private;
2435         idm_pdu_t       tmppdu;
2436 
2437         ASSERT(mutex_owned(&idt->idt_mutex));
2438 
2439         /*
2440          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2441          * idm_sotx_thread.
2442          */
2443         mutex_enter(&so_conn->ic_tx_mutex);
2444 
2445         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2446             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2447             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2448             uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2449 
2450         if (!so_conn->ic_tx_thread_running) {
2451                 mutex_exit(&so_conn->ic_tx_mutex);
2452                 /*
2453                  * Don't release idt->idt_mutex since we're supposed to hold
2454                  * in when calling idm_buf_tx_to_ini_done
2455                  */
2456                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2457                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2458                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2459                     uint32_t, idb->idb_xfer_len,
2460                     int, XFER_BUF_TX_TO_INI);
2461                 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2462                 return (IDM_STATUS_FAIL);
2463         }
2464 
2465         /*
2466          * Build a template for the data PDU headers we will use so that
2467          * the SN values will stay consistent with other PDU's we are
2468          * transmitting like R2T and SCSI status.
2469          */
2470         bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2471         tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2472         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2473             ISCSI_OP_SCSI_DATA_RSP);
2474         idb->idb_tx_thread = B_TRUE;
2475         list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2476         cv_signal(&so_conn->ic_tx_cv);
2477         mutex_exit(&so_conn->ic_tx_mutex);
2478         mutex_exit(&idt->idt_mutex);
2479 
2480         /*
2481          * Returning success here indicates the transfer was successfully
2482          * dispatched -- it does not mean that the transfer completed
2483          * successfully.
2484          */
2485         return (IDM_STATUS_SUCCESS);
2486 }
2487 
2488 /*
2489  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2490  * data blocks it is ready to receive from the initiator in response to a WRITE
2491  * SCSI command. The target iSCSI layer passes the information about the desired
2492  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2493  * offset and datalen are passed via the 'idb' argument.
2494  *
2495  * Scope for Prototype build:
2496  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2497  * negotiated the "InitialR2T" to "Yes".
2498  *
2499  * Caller holds idt->idt_mutex
2500  */
2501 static idm_status_t
2502 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2503 {
2504         idm_pdu_t               *pdu;
2505         iscsi_rtt_hdr_t         *rtt;
2506 
2507         ASSERT(mutex_owned(&idt->idt_mutex));
2508 
2509         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2510             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2511             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2512             uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2513 
2514         pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2515         pdu->isp_ic = idt->idt_ic;
2516         pdu->isp_flags = IDM_PDU_SET_STATSN;
2517         bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2518 
2519         /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2520         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2521 
2522         /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2523         rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2524 
2525         rtt->opcode          = ISCSI_OP_RTT_RSP;
2526         rtt->flags           = ISCSI_FLAG_FINAL;
2527         rtt->data_offset     = htonl(idb->idb_bufoffset);
2528         rtt->data_length     = htonl(idb->idb_xfer_len);
2529         rtt->rttsn           = htonl(idt->idt_exp_rttsn++);
2530 
2531         /* Keep track of buffer offsets */
2532         idb->idb_exp_offset  = idb->idb_bufoffset;
2533         mutex_exit(&idt->idt_mutex);
2534 
2535         /*
2536          * Transmit the PDU.
2537          */
2538         idm_pdu_tx(pdu);
2539 
2540         return (IDM_STATUS_SUCCESS);
2541 }
2542 
2543 static idm_status_t
2544 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2545 {
2546         if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2547                 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2548                     KM_NOSLEEP);
2549                 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2550         } else {
2551                 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2552                 idb->idb_buf_private = NULL;
2553         }
2554 
2555         if (idb->idb_buf == NULL) {
2556                 IDM_CONN_LOG(CE_NOTE,
2557                     "idm_so_buf_alloc: failed buffer allocation");
2558                 return (IDM_STATUS_FAIL);
2559         }
2560 
2561         return (IDM_STATUS_SUCCESS);
2562 }
2563 
2564 /* ARGSUSED */
2565 static idm_status_t
2566 idm_so_buf_setup(idm_buf_t *idb)
2567 {
2568         /* Ensure bufalloc'd flag is unset */
2569         idb->idb_bufalloc = B_FALSE;
2570 
2571         return (IDM_STATUS_SUCCESS);
2572 }
2573 
2574 /* ARGSUSED */
2575 static void
2576 idm_so_buf_teardown(idm_buf_t *idb)
2577 {
2578         /* nothing to do here */
2579 }
2580 
2581 static void
2582 idm_so_buf_free(idm_buf_t *idb)
2583 {
2584         if (idb->idb_buf_private == NULL) {
2585                 kmem_free(idb->idb_buf, idb->idb_buflen);
2586         } else {
2587                 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2588         }
2589 }
2590 
2591 static void
2592 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2593     uint32_t offset, uint32_t length)
2594 {
2595         idm_so_conn_t   *so_conn = ic->ic_transport_private;
2596         idm_pdu_t       tmppdu;
2597         idm_buf_t       *rtt_buf;
2598 
2599         ASSERT(mutex_owned(&idt->idt_mutex));
2600 
2601         /*
2602          * Allocate a buffer to represent the RTT transfer.  We could further
2603          * optimize this by allocating the buffers internally from an rtt
2604          * specific buffer cache since this is socket-specific code but for
2605          * now we will keep it simple.
2606          */
2607         rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2608         if (rtt_buf == NULL) {
2609                 /*
2610                  * If we're in FFP then the failure was likely a resource
2611                  * allocation issue and we should close the connection by
2612                  * sending a CE_TRANSPORT_FAIL event.
2613                  *
2614                  * If we're not in FFP then idm_buf_alloc will always
2615                  * fail and the state is transitioning to "complete" anyway
2616                  * so we won't bother to send an event.
2617                  */
2618                 mutex_enter(&ic->ic_state_mutex);
2619                 if (ic->ic_ffp)
2620                         idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2621                             NULL, CT_NONE);
2622                 mutex_exit(&ic->ic_state_mutex);
2623                 mutex_exit(&idt->idt_mutex);
2624                 return;
2625         }
2626 
2627         rtt_buf->idb_buf_cb = NULL;
2628         rtt_buf->idb_cb_arg = NULL;
2629         rtt_buf->idb_bufoffset = offset;
2630         rtt_buf->idb_xfer_len = length;
2631         rtt_buf->idb_ic = idt->idt_ic;
2632         rtt_buf->idb_task_binding = idt;
2633 
2634         /*
2635          * The new buffer (if any) represents an additional
2636          * reference on the task
2637          */
2638         idm_task_hold(idt);
2639         mutex_exit(&idt->idt_mutex);
2640 
2641         /*
2642          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2643          * idm_sotx_thread.
2644          */
2645         mutex_enter(&so_conn->ic_tx_mutex);
2646 
2647         if (!so_conn->ic_tx_thread_running) {
2648                 idm_buf_free(rtt_buf);
2649                 mutex_exit(&so_conn->ic_tx_mutex);
2650                 idm_task_rele(idt);
2651                 return;
2652         }
2653 
2654         /*
2655          * Build a template for the data PDU headers we will use so that
2656          * the SN values will stay consistent with other PDU's we are
2657          * transmitting like R2T and SCSI status.
2658          */
2659         bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2660         tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2661         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2662             ISCSI_OP_SCSI_DATA);
2663         rtt_buf->idb_tx_thread = B_TRUE;
2664         rtt_buf->idb_in_transport = B_TRUE;
2665         list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2666         cv_signal(&so_conn->ic_tx_cv);
2667         mutex_exit(&so_conn->ic_tx_mutex);
2668 }
2669 
2670 static void
2671 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2672 {
2673         /*
2674          * Don't worry about status -- we assume any error handling
2675          * is performed by the caller (idm_sotx_thread).
2676          */
2677         idb->idb_in_transport = B_FALSE;
2678         idm_task_rele(idt);
2679         idm_buf_free(idb);
2680 }
2681 
2682 static idm_status_t
2683 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2684     uint32_t buf_region_offset, uint32_t buf_region_length)
2685 {
2686         idm_conn_t              *ic;
2687         uint32_t                max_dataseglen;
2688         size_t                  remainder, chunk;
2689         uint32_t                data_offset = buf_region_offset;
2690         iscsi_data_hdr_t        *bhs;
2691         idm_pdu_t               *pdu;
2692         idm_status_t            tx_status;
2693 
2694         ASSERT(mutex_owned(&idt->idt_mutex));
2695 
2696         ic = idt->idt_ic;
2697 
2698         max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2699         remainder = buf_region_length;
2700 
2701         while (remainder) {
2702                 if (idt->idt_state != TASK_ACTIVE) {
2703                         ASSERT((idt->idt_state != TASK_IDLE) &&
2704                             (idt->idt_state != TASK_COMPLETE));
2705                         return (IDM_STATUS_ABORTED);
2706                 }
2707 
2708                 /* check to see if we need to chunk the data */
2709                 if (remainder > max_dataseglen) {
2710                         chunk = max_dataseglen;
2711                 } else {
2712                         chunk = remainder;
2713                 }
2714 
2715                 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2716                 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2717                 pdu->isp_ic = ic;
2718                 pdu->isp_flags = 0;  /* initialize isp_flags */
2719 
2720                 /*
2721                  * We've already built a build a header template
2722                  * to use during the transfer.  Use this template so that
2723                  * the SN values stay consistent with any unrelated PDU's
2724                  * being transmitted.
2725                  */
2726                 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2727                     sizeof (iscsi_hdr_t));
2728 
2729                 /*
2730                  * Set DataSN, data offset, and flags in BHS
2731                  * For the prototype build, A = 0, S = 0, U = 0
2732                  */
2733                 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2734 
2735                 bhs->datasn          = htonl(idt->idt_exp_datasn++);
2736 
2737                 hton24(bhs->dlength, chunk);
2738                 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2739 
2740                 /* setup data */
2741                 pdu->isp_data        =  (uint8_t *)idb->idb_buf + data_offset;
2742                 pdu->isp_datalen = (uint_t)chunk;
2743 
2744                 if (chunk == remainder) {
2745                         bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2746                         /* Piggyback the status with the last data PDU */
2747                         if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2748                                 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2749                                     IDM_PDU_ADVANCE_STATSN;
2750                                 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2751                                     (idt, pdu);
2752                                 idt->idt_flags |=
2753                                     IDM_TASK_PHASECOLLAPSE_SUCCESS;
2754 
2755                         }
2756                 }
2757 
2758                 remainder       -= chunk;
2759                 data_offset     += chunk;
2760 
2761                 /* Instrument the data-send DTrace probe. */
2762                 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2763                         DTRACE_ISCSI_2(data__send,
2764                             idm_conn_t *, idt->idt_ic,
2765                             iscsi_data_rsp_hdr_t *,
2766                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2767                 }
2768 
2769                 /*
2770                  * Now that we're done working with idt_exp_datasn,
2771                  * idt->idt_state and idb->idb_bufoffset we can release
2772                  * the task lock -- don't want to hold it across the
2773                  * call to idm_i_so_tx since we could block.
2774                  */
2775                 mutex_exit(&idt->idt_mutex);
2776 
2777                 /*
2778                  * Transmit the PDU.  Call the internal routine directly
2779                  * as there is already implicit ordering.
2780                  */
2781                 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2782                         mutex_enter(&idt->idt_mutex);
2783                         return (tx_status);
2784                 }
2785 
2786                 mutex_enter(&idt->idt_mutex);
2787                 idt->idt_tx_bytes += chunk;
2788         }
2789 
2790         return (IDM_STATUS_SUCCESS);
2791 }
2792 
2793 /*
2794  * TX PDU cache
2795  */
2796 /* ARGSUSED */
2797 int
2798 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2799 {
2800         idm_pdu_t       *pdu = hdl;
2801 
2802         bzero(pdu, sizeof (idm_pdu_t));
2803         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2804         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2805         pdu->isp_callback = idm_sotx_cache_pdu_cb;
2806         pdu->isp_magic = IDM_PDU_MAGIC;
2807         bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2808 
2809         return (0);
2810 }
2811 
2812 /* ARGSUSED */
2813 void
2814 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2815 {
2816         /* reset values between use */
2817         pdu->isp_datalen = 0;
2818 
2819         kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2820 }
2821 
2822 /*
2823  * RX PDU cache
2824  */
2825 /* ARGSUSED */
2826 int
2827 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2828 {
2829         idm_pdu_t       *pdu = hdl;
2830 
2831         bzero(pdu, sizeof (idm_pdu_t));
2832         pdu->isp_magic = IDM_PDU_MAGIC;
2833         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2834         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2835 
2836         return (0);
2837 }
2838 
2839 /* ARGSUSED */
2840 static void
2841 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2842 {
2843         pdu->isp_iovlen = 0;
2844         pdu->isp_sorx_buf = 0;
2845         kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2846 }
2847 
2848 static void
2849 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2850 {
2851         /*
2852          * We had to modify our cached RX PDU with a longer header buffer
2853          * and/or a longer data buffer.  Release the new buffers and fix
2854          * the fields back to what we would expect for a cached RX PDU.
2855          */
2856         if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2857                 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2858         }
2859         if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2860                 kmem_free(pdu->isp_data, pdu->isp_datalen);
2861         }
2862         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2863         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2864         pdu->isp_data = NULL;
2865         pdu->isp_datalen = 0;
2866         pdu->isp_sorx_buf = 0;
2867         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2868         idm_sorx_cache_pdu_cb(pdu, status);
2869 }
2870 
2871 /*
2872  * This thread is only active when I/O is queued for transmit
2873  * because the socket is busy.
2874  */
2875 void
2876 idm_sotx_thread(void *arg)
2877 {
2878         idm_conn_t      *ic = arg;
2879         idm_tx_obj_t    *object, *next;
2880         idm_so_conn_t   *so_conn;
2881         idm_status_t    status = IDM_STATUS_SUCCESS;
2882 
2883         idm_conn_hold(ic);
2884 
2885         mutex_enter(&ic->ic_mutex);
2886         so_conn = ic->ic_transport_private;
2887         so_conn->ic_tx_thread_running = B_TRUE;
2888         so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2889         cv_signal(&ic->ic_cv);
2890         mutex_exit(&ic->ic_mutex);
2891 
2892         mutex_enter(&so_conn->ic_tx_mutex);
2893 
2894         while (so_conn->ic_tx_thread_running) {
2895                 while (list_is_empty(&so_conn->ic_tx_list)) {
2896                         DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2897                         cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2898                         DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2899 
2900                         if (!so_conn->ic_tx_thread_running) {
2901                                 goto tx_bail;
2902                         }
2903                 }
2904 
2905                 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2906                 list_remove(&so_conn->ic_tx_list, object);
2907                 mutex_exit(&so_conn->ic_tx_mutex);
2908 
2909                 switch (object->idm_tx_obj_magic) {
2910                 case IDM_PDU_MAGIC: {
2911                         idm_pdu_t *pdu = (idm_pdu_t *)object;
2912                         DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2913                             idm_pdu_t *, (idm_pdu_t *)object);
2914 
2915                         if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2916                                 /* No IDM task */
2917                                 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2918                         }
2919                         status = idm_i_so_tx((idm_pdu_t *)object);
2920                         break;
2921                 }
2922                 case IDM_BUF_MAGIC: {
2923                         idm_buf_t *idb = (idm_buf_t *)object;
2924                         idm_task_t *idt = idb->idb_task_binding;
2925 
2926                         DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2927                             idm_buf_t *, idb);
2928 
2929                         mutex_enter(&idt->idt_mutex);
2930                         status = idm_so_send_buf_region(idt,
2931                             idb, 0, idb->idb_xfer_len);
2932 
2933                         /*
2934                          * TX thread owns the buffer so we expect it to
2935                          * be "in transport"
2936                          */
2937                         ASSERT(idb->idb_in_transport);
2938                         if (IDM_CONN_ISTGT(ic)) {
2939                                 /*
2940                                  * idm_buf_tx_to_ini_done releases
2941                                  * idt->idt_mutex
2942                                  */
2943                                 DTRACE_ISCSI_8(xfer__done,
2944                                     idm_conn_t *, idt->idt_ic,
2945                                     uintptr_t, idb->idb_buf,
2946                                     uint32_t, idb->idb_bufoffset,
2947                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2948                                     uint32_t, idb->idb_xfer_len,
2949                                     int, XFER_BUF_TX_TO_INI);
2950                                 idm_buf_tx_to_ini_done(idt, idb, status);
2951                         } else {
2952                                 idm_so_send_rtt_data_done(idt, idb);
2953                                 mutex_exit(&idt->idt_mutex);
2954                         }
2955                         break;
2956                 }
2957 
2958                 default:
2959                         IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2960                             "(0x%08x)", object->idm_tx_obj_magic);
2961                         status = IDM_STATUS_FAIL;
2962                 }
2963 
2964                 mutex_enter(&so_conn->ic_tx_mutex);
2965 
2966                 if (status != IDM_STATUS_SUCCESS) {
2967                         so_conn->ic_tx_thread_running = B_FALSE;
2968                         idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2969                 }
2970         }
2971 
2972         /*
2973          * Before we leave, we need to abort every item remaining in the
2974          * TX list.
2975          */
2976 
2977 tx_bail:
2978         object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2979 
2980         while (object != NULL) {
2981                 next = list_next(&so_conn->ic_tx_list, object);
2982 
2983                 list_remove(&so_conn->ic_tx_list, object);
2984                 switch (object->idm_tx_obj_magic) {
2985                 case IDM_PDU_MAGIC:
2986                         idm_pdu_complete((idm_pdu_t *)object,
2987                             IDM_STATUS_ABORTED);
2988                         break;
2989 
2990                 case IDM_BUF_MAGIC: {
2991                         idm_buf_t *idb = (idm_buf_t *)object;
2992                         idm_task_t *idt = idb->idb_task_binding;
2993                         mutex_exit(&so_conn->ic_tx_mutex);
2994                         mutex_enter(&idt->idt_mutex);
2995                         /*
2996                          * TX thread owns the buffer so we expect it to
2997                          * be "in transport"
2998                          */
2999                         ASSERT(idb->idb_in_transport);
3000                         if (IDM_CONN_ISTGT(ic)) {
3001                                 /*
3002                                  * idm_buf_tx_to_ini_done releases
3003                                  * idt->idt_mutex
3004                                  */
3005                                 DTRACE_ISCSI_8(xfer__done,
3006                                     idm_conn_t *, idt->idt_ic,
3007                                     uintptr_t, idb->idb_buf,
3008                                     uint32_t, idb->idb_bufoffset,
3009                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
3010                                     uint32_t, idb->idb_xfer_len,
3011                                     int, XFER_BUF_TX_TO_INI);
3012                                 idm_buf_tx_to_ini_done(idt, idb,
3013                                     IDM_STATUS_ABORTED);
3014                         } else {
3015                                 idm_so_send_rtt_data_done(idt, idb);
3016                                 mutex_exit(&idt->idt_mutex);
3017                         }
3018                         mutex_enter(&so_conn->ic_tx_mutex);
3019                         break;
3020                 }
3021                 default:
3022                         IDM_CONN_LOG(CE_WARN,
3023                             "idm_sotx_thread: Unexpected magic "
3024                             "(0x%08x)", object->idm_tx_obj_magic);
3025                 }
3026 
3027                 object = next;
3028         }
3029 
3030         mutex_exit(&so_conn->ic_tx_mutex);
3031         idm_conn_rele(ic);
3032         thread_exit();
3033         /*NOTREACHED*/
3034 }
3035 
3036 static void
3037 idm_so_socket_set_nonblock(struct sonode *node)
3038 {
3039         (void) VOP_SETFL(node->so_vnode, node->so_flag,
3040             (node->so_state | FNONBLOCK), CRED(), NULL);
3041 }
3042 
3043 static void
3044 idm_so_socket_set_block(struct sonode *node)
3045 {
3046         (void) VOP_SETFL(node->so_vnode, node->so_flag,
3047             (node->so_state & (~FNONBLOCK)), CRED(), NULL);
3048 }
3049 
3050 
3051 /*
3052  * Called by kernel sockets when the connection has been accepted or
3053  * rejected. In early volo, a "disconnect" callback was sent instead of
3054  * "connectfailed", so we check for both.
3055  */
3056 /* ARGSUSED */
3057 void
3058 idm_so_timed_socket_connect_cb(ksocket_t ks,
3059     ksocket_callback_event_t ev, void *arg, uintptr_t info)
3060 {
3061         idm_so_timed_socket_t   *itp = arg;
3062         ASSERT(itp != NULL);
3063         ASSERT(ev == KSOCKET_EV_CONNECTED ||
3064             ev == KSOCKET_EV_CONNECTFAILED ||
3065             ev == KSOCKET_EV_DISCONNECTED);
3066 
3067         mutex_enter(&idm_so_timed_socket_mutex);
3068         itp->it_callback_called = B_TRUE;
3069         if (ev == KSOCKET_EV_CONNECTED) {
3070                 itp->it_socket_error_code = 0;
3071         } else {
3072                 /* Make sure the error code is non-zero on error */
3073                 if (info == 0)
3074                         info = ECONNRESET;
3075                 itp->it_socket_error_code = (int)info;
3076         }
3077         cv_signal(&itp->it_cv);
3078         mutex_exit(&idm_so_timed_socket_mutex);
3079 }
3080 
3081 int
3082 idm_so_timed_socket_connect(ksocket_t ks,
3083     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
3084 {
3085         clock_t                 conn_login_max;
3086         int                     rc, nonblocking, rval;
3087         idm_so_timed_socket_t   it;
3088         ksocket_callbacks_t     ks_cb;
3089 
3090         conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
3091 
3092         /*
3093          * Set to non-block socket mode, with callback on connect
3094          * Early volo used "disconnected" instead of "connectfailed",
3095          * so set callback to look for both.
3096          */
3097         bzero(&it, sizeof (it));
3098         ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
3099             KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
3100         ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
3101         ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
3102         ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
3103         cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
3104         rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
3105         if (rc != 0)
3106                 return (rc);
3107 
3108         /* Set to non-blocking mode */
3109         nonblocking = 1;
3110         rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3111             CRED());
3112         if (rc != 0)
3113                 goto cleanup;
3114 
3115         bzero(&it, sizeof (it));
3116         for (;;) {
3117                 /*
3118                  * Warning -- in a loopback scenario, the call to
3119                  * the connect_cb can occur inside the call to
3120                  * ksocket_connect. Do not hold the mutex around the
3121                  * call to ksocket_connect.
3122                  */
3123                 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3124                 if (rc == 0 || rc == EISCONN) {
3125                         /* socket success or already success */
3126                         rc = 0;
3127                         break;
3128                 }
3129                 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3130                         break;
3131                 }
3132 
3133                 /* TCP connect still in progress. See if out of time. */
3134                 if (ddi_get_lbolt() > conn_login_max) {
3135                         /*
3136                          * Connection retry timeout,
3137                          * failed connect to target.
3138                          */
3139                         rc = ETIMEDOUT;
3140                         break;
3141                 }
3142 
3143                 /*
3144                  * TCP connect still in progress.  Sleep until callback.
3145                  * Do NOT go to sleep if the callback already occurred!
3146                  */
3147                 mutex_enter(&idm_so_timed_socket_mutex);
3148                 if (!it.it_callback_called) {
3149                         (void) cv_timedwait(&it.it_cv,
3150                             &idm_so_timed_socket_mutex, conn_login_max);
3151                 }
3152                 if (it.it_callback_called) {
3153                         rc = it.it_socket_error_code;
3154                         mutex_exit(&idm_so_timed_socket_mutex);
3155                         break;
3156                 }
3157                 /* If timer expires, go call ksocket_connect one last time. */
3158                 mutex_exit(&idm_so_timed_socket_mutex);
3159         }
3160 
3161         /* resume blocking mode */
3162         nonblocking = 0;
3163         (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3164             CRED());
3165 cleanup:
3166         (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3167         cv_destroy(&it.it_cv);
3168         if (rc != 0) {
3169                 idm_soshutdown(ks);
3170         }
3171         return (rc);
3172 }
3173 
3174 
3175 void
3176 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3177 {
3178         int                     dp_addr_size;
3179         struct sockaddr_in      *sin;
3180         struct sockaddr_in6     *sin6;
3181 
3182         /* Build sockaddr_storage for this portal (idm_addr_t) */
3183         bzero(sa, sizeof (*sa));
3184         dp_addr_size = dportal->a_addr.i_insize;
3185         if (dp_addr_size == sizeof (struct in_addr)) {
3186                 /* IPv4 */
3187                 sa->ss_family = AF_INET;
3188                 sin = (struct sockaddr_in *)sa;
3189                 sin->sin_port = htons(dportal->a_port);
3190                 bcopy(&dportal->a_addr.i_addr.in4,
3191                     &sin->sin_addr, sizeof (struct in_addr));
3192         } else if (dp_addr_size == sizeof (struct in6_addr)) {
3193                 /* IPv6 */
3194                 sa->ss_family = AF_INET6;
3195                 sin6 = (struct sockaddr_in6 *)sa;
3196                 sin6->sin6_port = htons(dportal->a_port);
3197                 bcopy(&dportal->a_addr.i_addr.in6,
3198                     &sin6->sin6_addr, sizeof (struct in6_addr));
3199         } else {
3200                 ASSERT(0);
3201         }
3202 }
3203 
3204 
3205 /*
3206  * return a human-readable form of a sockaddr_storage, in the form
3207  * [ip-address]:port.  This is used in calls to logging functions.
3208  * If several calls to idm_sa_ntop are made within the same invocation
3209  * of a logging function, then each one needs its own buf.
3210  */
3211 const char *
3212 idm_sa_ntop(const struct sockaddr_storage *sa,
3213     char *buf, size_t size)
3214 {
3215         static const char bogus_ip[] = "[0].-1";
3216         char tmp[INET6_ADDRSTRLEN];
3217 
3218         switch (sa->ss_family) {
3219         case AF_INET6: {
3220                 const struct sockaddr_in6 *in6 =
3221                     (const struct sockaddr_in6 *) sa;
3222 
3223                 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp,
3224                     sizeof (tmp));
3225                 if (strlen(tmp) + sizeof ("[].65535") > size)
3226                         goto err;
3227                 /* struct sockaddr_storage gets port info from v4 loc */
3228                 (void) snprintf(buf, size, "[%s].%u", tmp,
3229                     ntohs(in6->sin6_port));
3230                 return (buf);
3231         }
3232         case AF_INET: {
3233                 const struct sockaddr_in *in = (const struct sockaddr_in *) sa;
3234 
3235                 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp,
3236                     sizeof (tmp));
3237                 if (strlen(tmp) + sizeof ("[].65535") > size)
3238                                 goto err;
3239                 (void) snprintf(buf, size,  "[%s].%u", tmp,
3240                     ntohs(in->sin_port));
3241                 return (buf);
3242         }
3243         default:
3244                 break;
3245         }
3246 err:
3247         (void) snprintf(buf, size, "%s", bogus_ip);
3248         return (buf);
3249 }