1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2013 by Delphix. All rights reserved.
  27  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  28  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  29  */
  30 
  31 #include <sys/conf.h>
  32 #include <sys/stat.h>
  33 #include <sys/file.h>
  34 #include <sys/ddi.h>
  35 #include <sys/sunddi.h>
  36 #include <sys/modctl.h>
  37 #include <sys/priv.h>
  38 #include <sys/cpuvar.h>
  39 #include <sys/socket.h>
  40 #include <sys/strsubr.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/sdt.h>
  43 #include <netinet/tcp.h>
  44 #include <inet/tcp.h>
  45 #include <sys/socketvar.h>
  46 #include <sys/pathname.h>
  47 #include <sys/fs/snode.h>
  48 #include <sys/fs/dv_node.h>
  49 #include <sys/vnode.h>
  50 #include <netinet/in.h>
  51 #include <net/if.h>
  52 #include <sys/sockio.h>
  53 #include <sys/ksocket.h>
  54 #include <sys/filio.h>            /* FIONBIO */
  55 #include <sys/iscsi_protocol.h>
  56 #include <sys/idm/idm.h>
  57 #include <sys/idm/idm_so.h>
  58 #include <sys/idm/idm_text.h>
  59 
  60 #define IN_PROGRESS_DELAY       1
  61 
  62 /*
  63  * in6addr_any is currently all zeroes, but use the macro in case this
  64  * ever changes.
  65  */
  66 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
  67 
  68 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  69 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  70 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  71 
  72 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
  73 static void idm_so_conn_destroy_common(idm_conn_t *ic);
  74 static void idm_so_conn_connect_common(idm_conn_t *ic);
  75 
  76 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
  77     boolean_t boot_conn);
  78 static void idm_set_postconnect_options(ksocket_t so);
  79 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
  80 
  81 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
  82 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
  83     idm_buf_t *idb, uint32_t offset, uint32_t length);
  84 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
  85 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
  86     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
  87 
  88 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
  89     uint32_t ro, uint32_t dlength);
  90 
  91 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
  92     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
  93 
  94 static void idm_so_socket_set_nonblock(struct sonode *node);
  95 static void idm_so_socket_set_block(struct sonode *node);
  96 
  97 /*
  98  * Transport ops prototypes
  99  */
 100 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
 101 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
 102 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
 103 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
 104 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
 105 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
 106 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
 107 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
 108     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
 109 static void idm_so_notice_key_values(idm_conn_t *it,
 110     nvlist_t *negotiated_nvl);
 111 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
 112     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
 113 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
 114     idm_transport_caps_t *caps);
 115 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
 116 static void idm_so_buf_free(idm_buf_t *idb);
 117 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
 118 static void idm_so_buf_teardown(idm_buf_t *idb);
 119 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
 120 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
 121 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
 122 static void idm_so_tgt_svc_offline(idm_svc_t *is);
 123 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
 124 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
 125 static void idm_so_conn_disconnect(idm_conn_t *ic);
 126 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
 127 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
 128 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
 129 
 130 /*
 131  * IDM Native Sockets transport operations
 132  */
 133 static
 134 idm_transport_ops_t idm_so_transport_ops = {
 135         idm_so_tx,                      /* it_tx_pdu */
 136         idm_so_buf_tx_to_ini,           /* it_buf_tx_to_ini */
 137         idm_so_buf_rx_from_ini,         /* it_buf_rx_from_ini */
 138         idm_so_rx_datain,               /* it_rx_datain */
 139         idm_so_rx_rtt,                  /* it_rx_rtt */
 140         idm_so_rx_dataout,              /* it_rx_dataout */
 141         NULL,                           /* it_alloc_conn_rsrc */
 142         NULL,                           /* it_free_conn_rsrc */
 143         NULL,                           /* it_tgt_enable_datamover */
 144         NULL,                           /* it_ini_enable_datamover */
 145         NULL,                           /* it_conn_terminate */
 146         idm_so_free_task_rsrc,          /* it_free_task_rsrc */
 147         idm_so_negotiate_key_values,    /* it_negotiate_key_values */
 148         idm_so_notice_key_values,       /* it_notice_key_values */
 149         idm_so_conn_is_capable,         /* it_conn_is_capable */
 150         idm_so_buf_alloc,               /* it_buf_alloc */
 151         idm_so_buf_free,                /* it_buf_free */
 152         idm_so_buf_setup,               /* it_buf_setup */
 153         idm_so_buf_teardown,            /* it_buf_teardown */
 154         idm_so_tgt_svc_create,          /* it_tgt_svc_create */
 155         idm_so_tgt_svc_destroy,         /* it_tgt_svc_destroy */
 156         idm_so_tgt_svc_online,          /* it_tgt_svc_online */
 157         idm_so_tgt_svc_offline,         /* it_tgt_svc_offline */
 158         idm_so_tgt_conn_destroy,        /* it_tgt_conn_destroy */
 159         idm_so_tgt_conn_connect,        /* it_tgt_conn_connect */
 160         idm_so_conn_disconnect,         /* it_tgt_conn_disconnect */
 161         idm_so_ini_conn_create,         /* it_ini_conn_create */
 162         idm_so_ini_conn_destroy,        /* it_ini_conn_destroy */
 163         idm_so_ini_conn_connect,        /* it_ini_conn_connect */
 164         idm_so_conn_disconnect,         /* it_ini_conn_disconnect */
 165         idm_so_declare_key_values       /* it_declare_key_values */
 166 };
 167 
 168 kmutex_t        idm_so_timed_socket_mutex;
 169 
 170 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
 171 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
 172 
 173 /*
 174  * idm_so_init()
 175  * Sockets transport initialization
 176  */
 177 void
 178 idm_so_init(idm_transport_t *it)
 179 {
 180         /* Cache for IDM Data and R2T Transmit PDU's */
 181         idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
 182             sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
 183             &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 184 
 185         /* Cache for IDM Receive PDU's */
 186         idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
 187             sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
 188             &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 189 
 190         /* 128k buffer cache */
 191         idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
 192             IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
 193 
 194         /* Set the sockets transport ops */
 195         it->it_ops = &idm_so_transport_ops;
 196 
 197         mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
 198 
 199 }
 200 
 201 /*
 202  * idm_so_fini()
 203  * Sockets transport teardown
 204  */
 205 void
 206 idm_so_fini(void)
 207 {
 208         kmem_cache_destroy(idm.idm_so_128k_buf_cache);
 209         kmem_cache_destroy(idm.idm_sotx_pdu_cache);
 210         kmem_cache_destroy(idm.idm_sorx_pdu_cache);
 211         mutex_destroy(&idm_so_timed_socket_mutex);
 212 }
 213 
 214 ksocket_t
 215 idm_socreate(int domain, int type, int protocol)
 216 {
 217         ksocket_t ks;
 218 
 219         if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
 220             CRED())) {
 221                 return (ks);
 222         } else {
 223                 return (NULL);
 224         }
 225 }
 226 
 227 /*
 228  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
 229  * reception and transmission.  The sonode still exists but its state
 230  * gets modified to indicate it is no longer connected.  Calls to
 231  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
 232  * regain control of a thread stuck in idm_sorecv.
 233  */
 234 void
 235 idm_soshutdown(ksocket_t so)
 236 {
 237         (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
 238 }
 239 
 240 /*
 241  * idm_sodestroy releases all resources associated with a socket previously
 242  * created with idm_socreate.  The socket must be shutdown using
 243  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
 244  * otherwise undefined behavior will result.
 245  */
 246 void
 247 idm_sodestroy(ksocket_t ks)
 248 {
 249         (void) ksocket_close(ks, CRED());
 250 }
 251 
 252 /*
 253  * Function to compare two addresses in sockaddr_storage format
 254  */
 255 
 256 int
 257 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
 258     const struct sockaddr_storage *cmp_ss2,
 259     boolean_t v4_mapped_as_v4,
 260     boolean_t compare_ports)
 261 {
 262         struct sockaddr_storage                 mapped_v4_ss1, mapped_v4_ss2;
 263         const struct sockaddr_storage           *ss1, *ss2;
 264         struct in_addr                          *in1, *in2;
 265         struct in6_addr                         *in61, *in62;
 266         int i;
 267 
 268         /*
 269          * Normalize V4-mapped IPv6 addresses into V4 format if
 270          * v4_mapped_as_v4 is B_TRUE.
 271          */
 272         ss1 = cmp_ss1;
 273         ss2 = cmp_ss2;
 274         if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
 275                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 276                 if (IN6_IS_ADDR_V4MAPPED(in61)) {
 277                         bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
 278                         mapped_v4_ss1.ss_family = AF_INET;
 279                         ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
 280                             ((struct sockaddr_in *)ss1)->sin_port;
 281                         IN6_V4MAPPED_TO_INADDR(in61,
 282                             &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
 283                         ss1 = &mapped_v4_ss1;
 284                 }
 285         }
 286         ss2 = cmp_ss2;
 287         if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
 288                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 289                 if (IN6_IS_ADDR_V4MAPPED(in62)) {
 290                         bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
 291                         mapped_v4_ss2.ss_family = AF_INET;
 292                         ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
 293                             ((struct sockaddr_in *)ss2)->sin_port;
 294                         IN6_V4MAPPED_TO_INADDR(in62,
 295                             &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
 296                         ss2 = &mapped_v4_ss2;
 297                 }
 298         }
 299 
 300         /*
 301          * Compare ports, then address family, then ip address
 302          */
 303         if (compare_ports &&
 304             (((struct sockaddr_in *)ss1)->sin_port !=
 305             ((struct sockaddr_in *)ss2)->sin_port)) {
 306                 if (((struct sockaddr_in *)ss1)->sin_port >
 307                     ((struct sockaddr_in *)ss2)->sin_port)
 308                         return (1);
 309                 else
 310                         return (-1);
 311         }
 312 
 313         /*
 314          * ports are the same
 315          */
 316         if (ss1->ss_family != ss2->ss_family) {
 317                 if (ss1->ss_family == AF_INET)
 318                         return (1);
 319                 else
 320                         return (-1);
 321         }
 322 
 323         /*
 324          * address families are the same
 325          */
 326         if (ss1->ss_family == AF_INET) {
 327                 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
 328                 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
 329 
 330                 if (in1->s_addr > in2->s_addr)
 331                         return (1);
 332                 else if (in1->s_addr < in2->s_addr)
 333                         return (-1);
 334                 else
 335                         return (0);
 336         } else if (ss1->ss_family == AF_INET6) {
 337                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 338                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 339 
 340                 for (i = 0; i < 4; i++) {
 341                         if (in61->s6_addr32[i] > in62->s6_addr32[i])
 342                                 return (1);
 343                         else if (in61->s6_addr32[i] < in62->s6_addr32[i])
 344                                 return (-1);
 345                 }
 346                 return (0);
 347         }
 348 
 349         return (1);
 350 }
 351 
 352 /*
 353  * IP address filter functions to flag addresses that should not
 354  * go out to initiators through discovery.
 355  */
 356 static boolean_t
 357 idm_v4_addr_okay(struct in_addr *in_addr)
 358 {
 359         in_addr_t addr = ntohl(in_addr->s_addr);
 360 
 361         if ((INADDR_NONE == addr) ||
 362             (IN_MULTICAST(addr)) ||
 363             ((addr >> IN_CLASSA_NSHIFT) == 0) ||
 364             ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
 365                 return (B_FALSE);
 366         }
 367         return (B_TRUE);
 368 }
 369 
 370 static boolean_t
 371 idm_v6_addr_okay(struct in6_addr *addr6)
 372 {
 373 
 374         if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
 375             (IN6_IS_ADDR_LOOPBACK(addr6)) ||
 376             (IN6_IS_ADDR_MULTICAST(addr6)) ||
 377             (IN6_IS_ADDR_V4MAPPED(addr6)) ||
 378             (IN6_IS_ADDR_V4COMPAT(addr6)) ||
 379             (IN6_IS_ADDR_LINKLOCAL(addr6))) {
 380                 return (B_FALSE);
 381         }
 382         return (B_TRUE);
 383 }
 384 
 385 /*
 386  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
 387  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
 388  */
 389 int
 390 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
 391 {
 392         ksocket_t               so4, so6;
 393         struct lifnum           lifn;
 394         struct lifconf          lifc;
 395         struct lifreq           *lp;
 396         int                     rval;
 397         int                     numifs;
 398         int                     bufsize;
 399         void                    *buf;
 400         int                     i, j, n, rc;
 401         struct sockaddr_storage ss;
 402         struct sockaddr_in      *sin;
 403         struct sockaddr_in6     *sin6;
 404         idm_addr_t              *ip;
 405         idm_addr_list_t         *ipaddr = NULL;
 406         int                     size_ipaddr;
 407 
 408         *ipaddr_p = NULL;
 409         size_ipaddr = 0;
 410         buf = NULL;
 411 
 412         /* create an ipv4 and ipv6 UDP socket */
 413         if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
 414                 return (0);
 415         if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
 416                 idm_sodestroy(so6);
 417                 return (0);
 418         }
 419 
 420 
 421 retry_count:
 422         /* snapshot the current number of interfaces */
 423         lifn.lifn_family = PF_UNSPEC;
 424         lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 425         lifn.lifn_count = 0;
 426         /* use vp6 for ioctls with unspecified families by default */
 427         if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
 428             != 0) {
 429                 goto cleanup;
 430         }
 431 
 432         numifs = lifn.lifn_count;
 433         if (numifs <= 0) {
 434                 goto cleanup;
 435         }
 436 
 437         /* allocate extra room in case more interfaces appear */
 438         numifs += 10;
 439 
 440         /* get the interface names and ip addresses */
 441         bufsize = numifs * sizeof (struct lifreq);
 442         buf = kmem_alloc(bufsize, KM_SLEEP);
 443 
 444         lifc.lifc_family = AF_UNSPEC;
 445         lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 446         lifc.lifc_len = bufsize;
 447         lifc.lifc_buf = buf;
 448         rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 449         if (rc != 0) {
 450                 goto cleanup;
 451         }
 452         /* if our extra room is used up, try again */
 453         if (bufsize <= lifc.lifc_len) {
 454                 kmem_free(buf, bufsize);
 455                 buf = NULL;
 456                 goto retry_count;
 457         }
 458         /* calc actual number of ifconfs */
 459         n = lifc.lifc_len / sizeof (struct lifreq);
 460 
 461         /* get ip address */
 462         if (n > 0) {
 463                 size_ipaddr = sizeof (idm_addr_list_t) +
 464                     (n - 1) * sizeof (idm_addr_t);
 465                 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
 466         } else {
 467                 goto cleanup;
 468         }
 469 
 470         /*
 471          * Examine the array of interfaces and filter uninteresting ones
 472          */
 473         for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 474 
 475                 /*
 476                  * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 477                  */
 478                 ss = lp->lifr_addr;
 479                 /*
 480                  * fetch the flags using the socket of the correct family
 481                  */
 482                 switch (ss.ss_family) {
 483                 case AF_INET:
 484                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
 485                             &rval, CRED());
 486                         break;
 487                 case AF_INET6:
 488                         rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
 489                             &rval, CRED());
 490                         break;
 491                 default:
 492                         continue;
 493                 }
 494                 if (rc == 0) {
 495                         /*
 496                          * If we got the flags, skip uninteresting
 497                          * interfaces based on flags
 498                          */
 499                         if ((lp->lifr_flags & IFF_UP) != IFF_UP)
 500                                 continue;
 501                         if (lp->lifr_flags &
 502                             (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 503                                 continue;
 504                 }
 505 
 506                 /* save ip address */
 507                 ip = &ipaddr->al_addrs[j];
 508                 switch (ss.ss_family) {
 509                 case AF_INET:
 510                         sin = (struct sockaddr_in *)&ss;
 511                         if (!idm_v4_addr_okay(&sin->sin_addr))
 512                                 continue;
 513                         ip->a_addr.i_addr.in4 = sin->sin_addr;
 514                         ip->a_addr.i_insize = sizeof (struct in_addr);
 515                         break;
 516                 case AF_INET6:
 517                         sin6 = (struct sockaddr_in6 *)&ss;
 518                         if (!idm_v6_addr_okay(&sin6->sin6_addr))
 519                                 continue;
 520                         ip->a_addr.i_addr.in6 = sin6->sin6_addr;
 521                         ip->a_addr.i_insize = sizeof (struct in6_addr);
 522                         break;
 523                 default:
 524                         continue;
 525                 }
 526                 j++;
 527         }
 528 
 529         if (j == 0) {
 530                 /* no valid ifaddr */
 531                 kmem_free(ipaddr, size_ipaddr);
 532                 size_ipaddr = 0;
 533                 ipaddr = NULL;
 534         } else {
 535                 ipaddr->al_out_cnt = j;
 536         }
 537 
 538 
 539 cleanup:
 540         idm_sodestroy(so6);
 541         idm_sodestroy(so4);
 542 
 543         if (buf != NULL)
 544                 kmem_free(buf, bufsize);
 545 
 546         *ipaddr_p = ipaddr;
 547         return (size_ipaddr);
 548 }
 549 
 550 int
 551 idm_sorecv(ksocket_t so, void *msg, size_t len)
 552 {
 553         iovec_t iov;
 554 
 555         ASSERT(so != NULL);
 556         ASSERT(len != 0);
 557 
 558         /*
 559          * Fill in iovec and receive data
 560          */
 561         iov.iov_base = msg;
 562         iov.iov_len = len;
 563 
 564         return (idm_iov_sorecv(so, &iov, 1, len));
 565 }
 566 
 567 /*
 568  * idm_sosendto - Sends a buffered data on a non-connected socket.
 569  *
 570  * This function puts the data provided on the wire by calling sosendmsg.
 571  * It will return only when all the data has been sent or if an error
 572  * occurs.
 573  *
 574  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 575  * -1 if sosendmsg returns success but uio_resid != 0
 576  */
 577 int
 578 idm_sosendto(ksocket_t so, void *buff, size_t len,
 579     struct sockaddr *name, socklen_t namelen)
 580 {
 581         struct msghdr           msg;
 582         struct iovec            iov[1];
 583         int                     error;
 584         size_t                  sent = 0;
 585 
 586         iov[0].iov_base = buff;
 587         iov[0].iov_len  = len;
 588 
 589         /* Initialization of the message header. */
 590         bzero(&msg, sizeof (msg));
 591         msg.msg_iov     = iov;
 592         msg.msg_iovlen  = 1;
 593         msg.msg_name    = name;
 594         msg.msg_namelen = namelen;
 595 
 596         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
 597                 /* Data sent */
 598                 if (sent == len) {
 599                         /* All data sent.  Success. */
 600                         return (0);
 601                 } else {
 602                         /* Not all data was sent.  Failure */
 603                         return (-1);
 604                 }
 605         }
 606 
 607         /* Send failed */
 608         return (error);
 609 }
 610 
 611 /*
 612  * idm_iov_sosend - Sends an iovec on a connection.
 613  *
 614  * This function puts the data provided on the wire by calling sosendmsg.
 615  * It will return only when all the data has been sent or if an error
 616  * occurs.
 617  *
 618  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 619  * -1 if sosendmsg returns success but uio_resid != 0
 620  */
 621 int
 622 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 623 {
 624         struct msghdr           msg;
 625         int                     error;
 626         size_t                  sent = 0;
 627 
 628         ASSERT(iop != NULL);
 629 
 630         /* Initialization of the message header. */
 631         bzero(&msg, sizeof (msg));
 632         msg.msg_iov     = iop;
 633         msg.msg_iovlen  = iovlen;
 634 
 635         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
 636             == 0) {
 637                 /* Data sent */
 638                 if (sent == total_len) {
 639                         /* All data sent.  Success. */
 640                         return (0);
 641                 } else {
 642                         /* Not all data was sent.  Failure */
 643                         return (-1);
 644                 }
 645         }
 646 
 647         /* Send failed */
 648         return (error);
 649 }
 650 
 651 /*
 652  * idm_iov_sorecv - Receives an iovec from a connection
 653  *
 654  * This function gets the data asked for from the socket.  It will return
 655  * only when all the requested data has been retrieved or if an error
 656  * occurs.
 657  *
 658  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
 659  * -1 if sorecvmsg returns success but uio_resid != 0
 660  */
 661 int
 662 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 663 {
 664         struct msghdr           msg;
 665         int                     error;
 666         size_t                  recv;
 667         int                     flags;
 668 
 669         ASSERT(iop != NULL);
 670 
 671         /* Initialization of the message header. */
 672         bzero(&msg, sizeof (msg));
 673         msg.msg_iov     = iop;
 674         msg.msg_iovlen  = iovlen;
 675         flags           = MSG_WAITALL;
 676 
 677         if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
 678             == 0) {
 679                 /* Received data */
 680                 if (recv == total_len) {
 681                         /* All requested data received.  Success */
 682                         return (0);
 683                 } else {
 684                         /*
 685                          * Not all data was received.  The connection has
 686                          * probably failed.
 687                          */
 688                         return (-1);
 689                 }
 690         }
 691 
 692         /* Receive failed */
 693         return (error);
 694 }
 695 
 696 static void
 697 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
 698 {
 699         int     conn_abort = 10000;
 700         int     conn_notify = 2000;
 701         int     abort = 30000;
 702 
 703         /* Pre-connect socket options */
 704         (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 705             TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
 706             CRED());
 707         if (boot_conn == B_FALSE) {
 708                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 709                     TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
 710                     CRED());
 711                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 712                     TCP_ABORT_THRESHOLD,
 713                     (char *)&abort, sizeof (int), CRED());
 714         }
 715 }
 716 
 717 static void
 718 idm_set_postconnect_options(ksocket_t ks)
 719 {
 720         const int       on = 1;
 721 
 722         /* Set connect options */
 723         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
 724             (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
 725         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
 726             (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
 727         (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
 728             (char *)&on, sizeof (on), CRED());
 729 }
 730 
 731 static uint32_t
 732 n2h24(const uchar_t *ptr)
 733 {
 734         return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
 735 }
 736 
 737 static boolean_t
 738 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu)
 739 {
 740         iscsi_hdr_t     *bhs;
 741 
 742         if (ic->ic_conn_type == CONN_TYPE_TGT &&
 743             pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
 744                 IDM_CONN_LOG(CE_WARN,
 745                     "idm_dataseglenokay: exceeded the max data segment length");
 746                 return (B_FALSE);
 747         }
 748 
 749         bhs = pdu->isp_hdr;
 750         /*
 751          * Filter out any RFC3720 data-size violations.
 752          */
 753         switch (IDM_PDU_OPCODE(pdu)) {
 754         case ISCSI_OP_SCSI_TASK_MGT_MSG:
 755         case ISCSI_OP_SCSI_TASK_MGT_RSP:
 756         case ISCSI_OP_RTT_RSP:
 757         case ISCSI_OP_LOGOUT_CMD:
 758                 /*
 759                  * Data-segment not allowed and additional headers not allowed.
 760                  * (both must be zero according to the RFC3720.)
 761                  */
 762                 if (bhs->hlength != 0 || pdu->isp_datalen != 0)
 763                         return (B_FALSE);
 764                 break;
 765         case ISCSI_OP_NOOP_OUT:
 766         case ISCSI_OP_LOGIN_CMD:
 767         case ISCSI_OP_TEXT_CMD:
 768         case ISCSI_OP_SNACK_CMD:
 769         case ISCSI_OP_NOOP_IN:
 770         case ISCSI_OP_SCSI_RSP:
 771         case ISCSI_OP_LOGIN_RSP:
 772         case ISCSI_OP_TEXT_RSP:
 773         case ISCSI_OP_SCSI_DATA_RSP:
 774         case ISCSI_OP_LOGOUT_RSP:
 775         case ISCSI_OP_ASYNC_EVENT:
 776         case ISCSI_OP_REJECT_MSG:
 777                 /*
 778                  * Additional headers not allowed.
 779                  * (must be zero according to RFC3720.)
 780                  */
 781                 if (bhs->hlength)
 782                         return (B_FALSE);
 783                 break;
 784         case ISCSI_OP_SCSI_CMD:
 785                 /*
 786                  * See RFC3720, section 10.3
 787                  *
 788                  * For pure read cmds, data-segment-length must be zero.
 789                  * For non-final transfers, data-size must be even number of
 790                  * 4-byte words.
 791                  * For any transfer, an expected byte count must be provided.
 792                  * For bidirectional transfers, an additional-header must be
 793                  * provided (for the read byte-count.)
 794                  */
 795                 if (pdu->isp_datalen) {
 796                         if ((bhs->flags & (ISCSI_FLAG_CMD_READ |
 797                             ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ)
 798                                 return (B_FALSE);
 799                         if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
 800                             ((pdu->isp_datalen & 0x3) != 0))
 801                                 return (B_FALSE);
 802                 }
 803                 if (bhs->flags & (ISCSI_FLAG_CMD_READ |
 804                     ISCSI_FLAG_CMD_WRITE)) {
 805                         iscsi_scsi_cmd_hdr_t *cmdhdr =
 806                             (iscsi_scsi_cmd_hdr_t *)bhs;
 807                         /*
 808                          * we're transfering some data, we must have a
 809                          * byte count
 810                          */
 811                         if (cmdhdr->data_length == 0)
 812                                 return (B_FALSE);
 813                 }
 814                 break;
 815         case ISCSI_OP_SCSI_DATA:
 816                 /*
 817                  * See RFC3720, section 10.7
 818                  *
 819                  * Additional headers aren't allowed, and the data-size must
 820                  * be an even number of 4-byte words (unless the final bit
 821                  * is set.)
 822                  */
 823                 if (bhs->hlength)
 824                         return (B_FALSE);
 825                 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
 826                     ((pdu->isp_datalen & 0x3) != 0))
 827                         return (B_FALSE);
 828                 break;
 829         default:
 830                 break;
 831         }
 832         return (B_TRUE);
 833 }
 834 
 835 static idm_status_t
 836 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
 837 {
 838         iscsi_hdr_t     *bhs;
 839         uint32_t        hdr_digest_crc;
 840         uint32_t        crc_calculated;
 841         void            *new_hdr;
 842         int             ahslen = 0;
 843         int             total_len = 0;
 844         int             iovlen = 0;
 845         struct iovec    iov[2];
 846         idm_so_conn_t   *so_conn;
 847         int             rc;
 848 
 849         so_conn = ic->ic_transport_private;
 850 
 851         /*
 852          * Read BHS
 853          */
 854         bhs = pdu->isp_hdr;
 855         rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
 856         if (rc != IDM_STATUS_SUCCESS) {
 857                 return (IDM_STATUS_FAIL);
 858         }
 859 
 860         /*
 861          * Check actual AHS length against the amount available in the buffer
 862          */
 863         if ((IDM_PDU_OPCODE(pdu) != ISCSI_OP_SCSI_CMD) &&
 864             (bhs->hlength != 0)) {
 865                 /* ---- hlength is only only valid for SCSI Request ---- */
 866                 return (IDM_STATUS_FAIL);
 867         }
 868         pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 869             (bhs->hlength * sizeof (uint32_t));
 870         pdu->isp_datalen = n2h24(bhs->dlength);
 871 
 872         if (!idm_dataseglenokay(ic, pdu)) {
 873                 IDM_CONN_LOG(CE_WARN,
 874                     "idm_sorecvhdr: invalid data segment length");
 875                 return (IDM_STATUS_FAIL);
 876         }
 877         if (bhs->hlength > IDM_SORX_WIRE_AHSLEN) {
 878                 /* Allocate a new header segment and change the callback */
 879                 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 880                 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
 881                 pdu->isp_hdr = new_hdr;
 882                 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
 883 
 884                 /*
 885                  * This callback will restore the expected values after
 886                  * the RX PDU has been processed.
 887                  */
 888                 pdu->isp_callback = idm_sorx_addl_pdu_cb;
 889         }
 890 
 891         /*
 892          * Setup receipt of additional header and header digest (if enabled).
 893          */
 894         if (bhs->hlength > 0) {
 895                 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
 896                 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
 897                 iov[iovlen].iov_len = ahslen;
 898                 total_len += iov[iovlen].iov_len;
 899                 iovlen++;
 900         }
 901 
 902         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 903                 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
 904                 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
 905                 total_len += iov[iovlen].iov_len;
 906                 iovlen++;
 907         }
 908 
 909         if ((iovlen != 0) &&
 910             (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
 911             total_len) != 0)) {
 912                 return (IDM_STATUS_FAIL);
 913         }
 914 
 915         /*
 916          * Validate header digest if enabled
 917          */
 918         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 919                 crc_calculated = idm_crc32c(pdu->isp_hdr,
 920                     sizeof (iscsi_hdr_t) + ahslen);
 921                 if (crc_calculated != hdr_digest_crc) {
 922                         /* Invalid Header Digest */
 923                         return (IDM_STATUS_HEADER_DIGEST);
 924                 }
 925         }
 926 
 927         return (0);
 928 }
 929 
 930 /*
 931  * idm_so_ini_conn_create()
 932  * Allocate the sockets transport connection resources.
 933  */
 934 static idm_status_t
 935 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
 936 {
 937         ksocket_t       so;
 938         idm_so_conn_t   *so_conn;
 939         idm_status_t    idmrc;
 940 
 941         so = idm_socreate(cr->cr_domain, cr->cr_type,
 942             cr->cr_protocol);
 943         if (so == NULL) {
 944                 return (IDM_STATUS_FAIL);
 945         }
 946 
 947         /* Bind the socket if configured to do so */
 948         if (cr->cr_bound) {
 949                 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
 950                     SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
 951                         idm_sodestroy(so);
 952                         return (IDM_STATUS_FAIL);
 953                 }
 954         }
 955 
 956         idmrc = idm_so_conn_create_common(ic, so);
 957         if (idmrc != IDM_STATUS_SUCCESS) {
 958                 idm_soshutdown(so);
 959                 idm_sodestroy(so);
 960                 return (IDM_STATUS_FAIL);
 961         }
 962 
 963         so_conn = ic->ic_transport_private;
 964         /* Set up socket options */
 965         idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
 966 
 967         return (IDM_STATUS_SUCCESS);
 968 }
 969 
 970 /*
 971  * idm_so_ini_conn_destroy()
 972  * Tear down the sockets transport connection resources.
 973  */
 974 static void
 975 idm_so_ini_conn_destroy(idm_conn_t *ic)
 976 {
 977         idm_so_conn_destroy_common(ic);
 978 }
 979 
 980 /*
 981  * idm_so_ini_conn_connect()
 982  * Establish the connection referred to by the handle previously allocated via
 983  * idm_so_ini_conn_create().
 984  */
 985 static idm_status_t
 986 idm_so_ini_conn_connect(idm_conn_t *ic)
 987 {
 988         idm_so_conn_t   *so_conn;
 989         struct sonode   *node = NULL;
 990         int             rc;
 991         clock_t         lbolt, conn_login_max, conn_login_interval;
 992         boolean_t       nonblock;
 993 
 994         so_conn = ic->ic_transport_private;
 995         nonblock = ic->ic_conn_params.nonblock_socket;
 996         conn_login_max = ic->ic_conn_params.conn_login_max;
 997         conn_login_interval = ddi_get_lbolt() +
 998             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 999 
1000         if (nonblock == B_TRUE) {
1001                 node = ((struct sonode *)(so_conn->ic_so));
1002                 /* Set to none block socket mode */
1003                 idm_so_socket_set_nonblock(node);
1004                 do {
1005                         rc = ksocket_connect(so_conn->ic_so,
1006                             &ic->ic_ini_dst_addr.sin,
1007                             (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
1008                             CRED());
1009                         if (rc == 0 || rc == EISCONN) {
1010                                 /* socket success or already success */
1011                                 rc = IDM_STATUS_SUCCESS;
1012                                 break;
1013                         }
1014                         if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
1015                             (rc == ECONNRESET)) {
1016                                 /* socket connection timeout or refuse */
1017                                 break;
1018                         }
1019                         lbolt = ddi_get_lbolt();
1020                         if (lbolt > conn_login_max) {
1021                                 /*
1022                                  * Connection retry timeout,
1023                                  * failed connect to target.
1024                                  */
1025                                 break;
1026                         }
1027                         if (lbolt < conn_login_interval) {
1028                                 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
1029                                         /* TCP connect still in progress */
1030                                         delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
1031                                         continue;
1032                                 } else {
1033                                         delay(conn_login_interval - lbolt);
1034                                 }
1035                         }
1036                         conn_login_interval = ddi_get_lbolt() +
1037                             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
1038                 } while (rc != 0);
1039                 /* resume to nonblock mode */
1040                 if (rc == IDM_STATUS_SUCCESS) {
1041                         idm_so_socket_set_block(node);
1042                 }
1043         } else {
1044                 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
1045                     (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
1046         }
1047 
1048         if (rc != 0) {
1049                 idm_soshutdown(so_conn->ic_so);
1050                 return (IDM_STATUS_FAIL);
1051         }
1052 
1053         idm_so_conn_connect_common(ic);
1054 
1055         idm_set_postconnect_options(so_conn->ic_so);
1056 
1057         return (IDM_STATUS_SUCCESS);
1058 }
1059 
1060 idm_status_t
1061 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
1062 {
1063         idm_status_t    idmrc;
1064 
1065         idm_set_postconnect_options(new_so);
1066         idmrc = idm_so_conn_create_common(ic, new_so);
1067 
1068         return (idmrc);
1069 }
1070 
1071 static void
1072 idm_so_tgt_conn_destroy(idm_conn_t *ic)
1073 {
1074         idm_so_conn_destroy_common(ic);
1075 }
1076 
1077 /*
1078  * idm_so_tgt_conn_connect()
1079  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1080  * is invoked from the SM as a result of an inbound connection request.
1081  */
1082 static idm_status_t
1083 idm_so_tgt_conn_connect(idm_conn_t *ic)
1084 {
1085         idm_so_conn_connect_common(ic);
1086 
1087         return (IDM_STATUS_SUCCESS);
1088 }
1089 
1090 static idm_status_t
1091 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
1092 {
1093         idm_so_conn_t   *so_conn;
1094 
1095         so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1096         so_conn->ic_so = new_so;
1097 
1098         ic->ic_transport_private = so_conn;
1099         ic->ic_transport_hdrlen = 0;
1100 
1101         /* Set the scoreboarding flag on this connection */
1102         ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1103         ic->ic_conn_params.max_recv_dataseglen =
1104             ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1105         ic->ic_conn_params.max_xmit_dataseglen =
1106             ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1107 
1108         /*
1109          * Initialize tx thread mutex and list
1110          */
1111         mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1112         cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1113         list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1114             offsetof(idm_pdu_t, idm_tx_link));
1115 
1116         return (IDM_STATUS_SUCCESS);
1117 }
1118 
1119 static void
1120 idm_so_conn_destroy_common(idm_conn_t *ic)
1121 {
1122         idm_so_conn_t   *so_conn = ic->ic_transport_private;
1123 
1124         ic->ic_transport_private = NULL;
1125         idm_sodestroy(so_conn->ic_so);
1126         list_destroy(&so_conn->ic_tx_list);
1127         mutex_destroy(&so_conn->ic_tx_mutex);
1128         cv_destroy(&so_conn->ic_tx_cv);
1129 
1130         kmem_free(so_conn, sizeof (idm_so_conn_t));
1131 }
1132 
1133 static void
1134 idm_so_conn_connect_common(idm_conn_t *ic)
1135 {
1136         idm_so_conn_t   *so_conn;
1137         struct sockaddr_in6     t_addr;
1138         socklen_t       t_addrlen = 0;
1139 
1140         so_conn = ic->ic_transport_private;
1141         bzero(&t_addr, sizeof (struct sockaddr_in6));
1142         t_addrlen = sizeof (struct sockaddr_in6);
1143 
1144         /* Set the local and remote addresses in the idm conn handle */
1145         (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1146             &t_addrlen, CRED());
1147         bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1148         (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1149             &t_addrlen, CRED());
1150         bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1151 
1152         mutex_enter(&ic->ic_mutex);
1153         so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1154             &p0, TS_RUN, minclsyspri);
1155         so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1156             &p0, TS_RUN, minclsyspri);
1157 
1158         while (so_conn->ic_rx_thread_did == 0 ||
1159             so_conn->ic_tx_thread_did == 0)
1160                 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1161         mutex_exit(&ic->ic_mutex);
1162 }
1163 
1164 /*
1165  * idm_so_conn_disconnect()
1166  * Shutdown the socket connection and stop the thread
1167  */
1168 static void
1169 idm_so_conn_disconnect(idm_conn_t *ic)
1170 {
1171         idm_so_conn_t   *so_conn;
1172 
1173         so_conn = ic->ic_transport_private;
1174 
1175         mutex_enter(&ic->ic_mutex);
1176         so_conn->ic_rx_thread_running = B_FALSE;
1177         so_conn->ic_tx_thread_running = B_FALSE;
1178         /* We need to wakeup the TX thread */
1179         mutex_enter(&so_conn->ic_tx_mutex);
1180         cv_signal(&so_conn->ic_tx_cv);
1181         mutex_exit(&so_conn->ic_tx_mutex);
1182         mutex_exit(&ic->ic_mutex);
1183 
1184         /* This should wakeup the RX thread if it is sleeping */
1185         idm_soshutdown(so_conn->ic_so);
1186 
1187         thread_join(so_conn->ic_tx_thread_did);
1188         thread_join(so_conn->ic_rx_thread_did);
1189 }
1190 
1191 /*
1192  * idm_so_tgt_svc_create()
1193  * Establish a service on an IP address and port.  idm_svc_req_t contains
1194  * the service parameters.
1195  */
1196 /*ARGSUSED*/
1197 static idm_status_t
1198 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1199 {
1200         idm_so_svc_t            *so_svc;
1201 
1202         so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1203 
1204         /* Set the new sockets service in svc handle */
1205         is->is_so_svc = (void *)so_svc;
1206 
1207         return (IDM_STATUS_SUCCESS);
1208 }
1209 
1210 /*
1211  * idm_so_tgt_svc_destroy()
1212  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1213  */
1214 static void
1215 idm_so_tgt_svc_destroy(idm_svc_t *is)
1216 {
1217         /* the socket will have been torn down; free the service */
1218         kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1219 }
1220 
1221 /*
1222  * idm_so_tgt_svc_online()
1223  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1224  */
1225 
1226 static idm_status_t
1227 idm_so_tgt_svc_online(idm_svc_t *is)
1228 {
1229         idm_so_svc_t            *so_svc;
1230         idm_svc_req_t           *sr = &is->is_svc_req;
1231         struct sockaddr_in6     sin6_ip;
1232         const uint32_t          on = 1;
1233         const uint32_t          off = 0;
1234 
1235         mutex_enter(&is->is_mutex);
1236         so_svc = (idm_so_svc_t *)is->is_so_svc;
1237 
1238         /*
1239          * Try creating an IPv6 socket first
1240          */
1241         if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1242                 mutex_exit(&is->is_mutex);
1243                 return (IDM_STATUS_FAIL);
1244         } else {
1245                 bzero(&sin6_ip, sizeof (sin6_ip));
1246                 sin6_ip.sin6_family = AF_INET6;
1247                 sin6_ip.sin6_port = htons(sr->sr_port);
1248                 sin6_ip.sin6_addr = in6addr_any;
1249 
1250                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1251                     SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1252                 /*
1253                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1254                  */
1255                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1256                     SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1257 
1258                 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1259                     sizeof (sin6_ip), CRED()) != 0) {
1260                         mutex_exit(&is->is_mutex);
1261                         idm_sodestroy(so_svc->is_so);
1262                         return (IDM_STATUS_FAIL);
1263                 }
1264         }
1265 
1266         idm_set_postconnect_options(so_svc->is_so);
1267 
1268         if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1269                 mutex_exit(&is->is_mutex);
1270                 idm_soshutdown(so_svc->is_so);
1271                 idm_sodestroy(so_svc->is_so);
1272                 return (IDM_STATUS_FAIL);
1273         }
1274 
1275         /* Launch a watch thread */
1276         so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1277             is, 0, &p0, TS_RUN, minclsyspri);
1278 
1279         if (so_svc->is_thread == NULL) {
1280                 /* Failure to launch; teardown the socket */
1281                 mutex_exit(&is->is_mutex);
1282                 idm_soshutdown(so_svc->is_so);
1283                 idm_sodestroy(so_svc->is_so);
1284                 return (IDM_STATUS_FAIL);
1285         }
1286         ksocket_hold(so_svc->is_so);
1287         /* Wait for the port watcher thread to start */
1288         while (!so_svc->is_thread_running)
1289                 cv_wait(&is->is_cv, &is->is_mutex);
1290         mutex_exit(&is->is_mutex);
1291 
1292         return (IDM_STATUS_SUCCESS);
1293 }
1294 
1295 /*
1296  * idm_so_tgt_svc_offline
1297  *
1298  * Stop listening on the IP address and port identified by idm_svc_t.
1299  */
1300 static void
1301 idm_so_tgt_svc_offline(idm_svc_t *is)
1302 {
1303         idm_so_svc_t            *so_svc;
1304         mutex_enter(&is->is_mutex);
1305         so_svc = (idm_so_svc_t *)is->is_so_svc;
1306         so_svc->is_thread_running = B_FALSE;
1307         mutex_exit(&is->is_mutex);
1308 
1309         /*
1310          * Teardown socket
1311          */
1312         idm_sodestroy(so_svc->is_so);
1313 
1314         /*
1315          * Now we expect the port watcher thread to terminate
1316          */
1317         thread_join(so_svc->is_thread_did);
1318 }
1319 
1320 /*
1321  * Watch thread for target service connection establishment.
1322  */
1323 void
1324 idm_so_svc_port_watcher(void *arg)
1325 {
1326         idm_svc_t               *svc = arg;
1327         ksocket_t               new_so;
1328         idm_conn_t              *ic;
1329         idm_status_t            idmrc;
1330         idm_so_svc_t            *so_svc;
1331         int                     rc;
1332         const uint32_t          off = 0;
1333         struct sockaddr_in6     t_addr;
1334         socklen_t               t_addrlen;
1335 
1336         bzero(&t_addr, sizeof (struct sockaddr_in6));
1337         t_addrlen = sizeof (struct sockaddr_in6);
1338         mutex_enter(&svc->is_mutex);
1339 
1340         so_svc = svc->is_so_svc;
1341         so_svc->is_thread_running = B_TRUE;
1342         so_svc->is_thread_did = so_svc->is_thread->t_did;
1343 
1344         cv_signal(&svc->is_cv);
1345 
1346         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1347             svc->is_svc_req.sr_port);
1348 
1349         while (so_svc->is_thread_running) {
1350                 mutex_exit(&svc->is_mutex);
1351 
1352                 if ((rc = ksocket_accept(so_svc->is_so,
1353                     (struct sockaddr *)&t_addr, &t_addrlen,
1354                     &new_so, CRED())) != 0) {
1355                         mutex_enter(&svc->is_mutex);
1356                         if (rc != ECONNABORTED && rc != EINTR) {
1357                                 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1358                                     " ksocket_accept failed %d", rc);
1359                         }
1360                         /*
1361                          * Unclean shutdown of this thread is not handled
1362                          * wait for !is_thread_running.
1363                          */
1364                         continue;
1365                 }
1366                 /*
1367                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1368                  */
1369                 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1370                     (char *)&off, sizeof (off), CRED());
1371 
1372                 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1373                     &ic);
1374                 if (idmrc != IDM_STATUS_SUCCESS) {
1375                         /* Drop connection */
1376                         idm_soshutdown(new_so);
1377                         idm_sodestroy(new_so);
1378                         mutex_enter(&svc->is_mutex);
1379                         continue;
1380                 }
1381 
1382                 idmrc = idm_so_tgt_conn_create(ic, new_so);
1383                 if (idmrc != IDM_STATUS_SUCCESS) {
1384                         idm_svc_conn_destroy(ic);
1385                         idm_soshutdown(new_so);
1386                         idm_sodestroy(new_so);
1387                         mutex_enter(&svc->is_mutex);
1388                         continue;
1389                 }
1390 
1391                 /*
1392                  * Kick the state machine.  At CS_S3_XPT_UP the state machine
1393                  * will notify the client (target) about the new connection.
1394                  */
1395                 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1396 
1397                 mutex_enter(&svc->is_mutex);
1398         }
1399         ksocket_rele(so_svc->is_so);
1400         so_svc->is_thread_running = B_FALSE;
1401         mutex_exit(&svc->is_mutex);
1402 
1403         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1404             svc->is_svc_req.sr_port);
1405 
1406         thread_exit();
1407 }
1408 
1409 /*
1410  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1411  * frees resources associated with the task.
1412  *
1413  * It's not clear that this should return idm_status_t.  What do we do
1414  * if it fails?
1415  */
1416 static idm_status_t
1417 idm_so_free_task_rsrc(idm_task_t *idt)
1418 {
1419         idm_buf_t       *idb, *next_idb;
1420 
1421         /*
1422          * There is nothing to cleanup on initiator connections
1423          */
1424         if (IDM_CONN_ISINI(idt->idt_ic))
1425                 return (IDM_STATUS_SUCCESS);
1426 
1427         /*
1428          * If this is a target connection, call idm_buf_rx_from_ini_done for
1429          * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1430          *
1431          * In addition, remove any buffers associated with this task from
1432          * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1433          * items don't actually get removed from that list (and completion
1434          * routines called) until idm_task_cleanup.
1435          */
1436         mutex_enter(&idt->idt_mutex);
1437 
1438         for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1439                 next_idb = list_next(&idt->idt_outbufv, idb);
1440                 if (idb->idb_in_transport) {
1441                         /*
1442                          * idm_buf_rx_from_ini_done releases idt->idt_mutex
1443                          */
1444                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1445                             uintptr_t, idb->idb_buf,
1446                             uint32_t, idb->idb_bufoffset,
1447                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1448                             uint32_t, idb->idb_xfer_len,
1449                             int, XFER_BUF_RX_FROM_INI);
1450                         idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1451                         mutex_enter(&idt->idt_mutex);
1452                 }
1453         }
1454 
1455         for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1456                 next_idb = list_next(&idt->idt_inbufv, idb);
1457                 /*
1458                  * We want to remove these items from the tx_list as well,
1459                  * but knowing it's in the idt_inbufv list is not a guarantee
1460                  * that it's in the tx_list.  If it's on the tx list then
1461                  * let idm_sotx_thread() clean it up.
1462                  */
1463                 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1464                         /*
1465                          * idm_buf_tx_to_ini_done releases idt->idt_mutex
1466                          */
1467                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1468                             uintptr_t, idb->idb_buf,
1469                             uint32_t, idb->idb_bufoffset,
1470                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1471                             uint32_t, idb->idb_xfer_len,
1472                             int, XFER_BUF_TX_TO_INI);
1473                         idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1474                         mutex_enter(&idt->idt_mutex);
1475                 }
1476         }
1477 
1478         mutex_exit(&idt->idt_mutex);
1479 
1480         return (IDM_STATUS_SUCCESS);
1481 }
1482 
1483 /*
1484  * idm_so_negotiate_key_values() validates the key values for this connection
1485  */
1486 /* ARGSUSED */
1487 static kv_status_t
1488 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1489     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1490 {
1491         /* All parameters are negotiated at the iscsit level */
1492         return (KV_HANDLED);
1493 }
1494 
1495 /*
1496  * idm_so_notice_key_values() activates the negotiated key values for
1497  * this connection.
1498  */
1499 static void
1500 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1501 {
1502         char                    *nvp_name;
1503         nvpair_t                *nvp;
1504         nvpair_t                *next_nvp;
1505         int                     nvrc;
1506         idm_status_t            idm_status;
1507         const idm_kv_xlate_t    *ikvx;
1508         uint64_t                num_val;
1509 
1510         for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1511             nvp != NULL; nvp = next_nvp) {
1512                 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1513                 nvp_name = nvpair_name(nvp);
1514 
1515                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1516                 switch (ikvx->ik_key_id) {
1517                 case KI_HEADER_DIGEST:
1518                 case KI_DATA_DIGEST:
1519                         idm_status = idm_so_handle_digest(it, nvp, ikvx);
1520                         ASSERT(idm_status == 0);
1521 
1522                         /* Remove processed item from negotiated_nvl list */
1523                         nvrc = nvlist_remove_all(
1524                             negotiated_nvl, ikvx->ik_key_name);
1525                         ASSERT(nvrc == 0);
1526                         break;
1527                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1528                         /*
1529                          * Just pass the value down to idm layer.
1530                          * No need to remove it from negotiated_nvl list here.
1531                          */
1532                         nvrc = nvpair_value_uint64(nvp, &num_val);
1533                         ASSERT(nvrc == 0);
1534                         it->ic_conn_params.max_xmit_dataseglen =
1535                             (uint32_t)num_val;
1536                         break;
1537                 default:
1538                         break;
1539                 }
1540         }
1541 }
1542 
1543 /*
1544  * idm_so_declare_key_values() declares the key values for this connection
1545  */
1546 /* ARGSUSED */
1547 static kv_status_t
1548 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1549     nvlist_t *outgoing_nvl)
1550 {
1551         char                    *nvp_name;
1552         nvpair_t                *nvp;
1553         nvpair_t                *next_nvp;
1554         kv_status_t             kvrc;
1555         int                     nvrc = 0;
1556         const idm_kv_xlate_t    *ikvx;
1557         uint64_t                num_val;
1558 
1559         for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1560             nvp != NULL && nvrc == 0; nvp = next_nvp) {
1561                 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1562                 nvp_name = nvpair_name(nvp);
1563 
1564                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1565                 switch (ikvx->ik_key_id) {
1566                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1567                         if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1568                                 break;
1569                         }
1570                         if (outgoing_nvl &&
1571                             (nvrc = nvlist_add_uint64(outgoing_nvl,
1572                             nvp_name, num_val)) != 0) {
1573                                 break;
1574                         }
1575                         it->ic_conn_params.max_recv_dataseglen =
1576                             (uint32_t)num_val;
1577                         break;
1578                 default:
1579                         break;
1580                 }
1581         }
1582         kvrc = idm_nvstat_to_kvstat(nvrc);
1583         return (kvrc);
1584 }
1585 
1586 static idm_status_t
1587 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1588     const idm_kv_xlate_t *ikvx)
1589 {
1590         int                     nvrc;
1591         char                    *digest_choice_string;
1592 
1593         nvrc = nvpair_value_string(digest_choice,
1594             &digest_choice_string);
1595         ASSERT(nvrc == 0);
1596         if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1597                 switch (ikvx->ik_key_id) {
1598                 case KI_HEADER_DIGEST:
1599                         it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1600                         break;
1601                 case KI_DATA_DIGEST:
1602                         it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1603                         break;
1604                 default:
1605                         ASSERT(0);
1606                         break;
1607                 }
1608         } else if (strcasecmp(digest_choice_string, "none") == 0) {
1609                 switch (ikvx->ik_key_id) {
1610                 case KI_HEADER_DIGEST:
1611                         it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1612                         break;
1613                 case KI_DATA_DIGEST:
1614                         it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1615                         break;
1616                 default:
1617                         ASSERT(0);
1618                         break;
1619                 }
1620         } else {
1621                 ASSERT(0);
1622         }
1623 
1624         return (IDM_STATUS_SUCCESS);
1625 }
1626 
1627 
1628 /*
1629  * idm_so_conn_is_capable() verifies that the passed connection is provided
1630  * for by the sockets interface.
1631  */
1632 /* ARGSUSED */
1633 static boolean_t
1634 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1635 {
1636         return (B_TRUE);
1637 }
1638 
1639 /*
1640  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1641  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1642  * off the socket into the appropriate buffers.
1643  */
1644 static void
1645 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1646 {
1647         iscsi_data_hdr_t        *bhs;
1648         idm_task_t              *idt;
1649         idm_buf_t               *idb;
1650         uint32_t                datasn;
1651         size_t                  offset;
1652         iscsi_hdr_t             *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1653         iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1654 
1655         ASSERT(ic != NULL);
1656         ASSERT(pdu != NULL);
1657         ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP);
1658 
1659         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1660         datasn  = ntohl(bhs->datasn);
1661         offset  = ntohl(bhs->offset);
1662 
1663         /*
1664          * Look up the task corresponding to the initiator task tag
1665          * to get the buffers affiliated with the task.
1666          */
1667         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1668         if (idt == NULL) {
1669                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1670                 idm_pdu_rx_protocol_error(ic, pdu);
1671                 return;
1672         }
1673 
1674         idb = pdu->isp_sorx_buf;
1675         if (idb == NULL) {
1676                 IDM_CONN_LOG(CE_WARN,
1677                     "idm_so_rx_datain: failed to find buffer");
1678                 idm_task_rele(idt);
1679                 idm_pdu_rx_protocol_error(ic, pdu);
1680                 return;
1681         }
1682 
1683         /*
1684          * DataSN values should be sequential and should not have any gaps or
1685          * repetitions. Check the DataSN with the one stored in the task.
1686          */
1687         if (datasn == idt->idt_exp_datasn) {
1688                 idt->idt_exp_datasn++; /* keep track of DataSN received */
1689         } else {
1690                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1691                 idm_task_rele(idt);
1692                 idm_pdu_rx_protocol_error(ic, pdu);
1693                 return;
1694         }
1695 
1696         /*
1697          * PDUs in a sequence should be in continuously increasing
1698          * address offset
1699          */
1700         if (offset != idb->idb_exp_offset) {
1701                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1702                 idm_task_rele(idt);
1703                 idm_pdu_rx_protocol_error(ic, pdu);
1704                 return;
1705         }
1706         /* Expected next relative buffer offset */
1707         idb->idb_exp_offset += n2h24(bhs->dlength);
1708         idt->idt_rx_bytes += n2h24(bhs->dlength);
1709 
1710         idm_task_rele(idt);
1711 
1712         /*
1713          * For now call scsi_rsp which will process the data rsp
1714          * Revisit, need to provide an explicit client entry point for
1715          * phase collapse completions.
1716          */
1717         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) &&
1718             (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1719                 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1720         }
1721 
1722         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1723 }
1724 
1725 /*
1726  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1727  * data from the Data-Out PDU sent by the iSCSI initiator.
1728  *
1729  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1730  * task to get the buffers associated with the PDU. A PDU might span buffers.
1731  * The data is then read into the respective buffer.
1732  */
1733 static void
1734 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1735 {
1736 
1737         iscsi_data_hdr_t        *bhs;
1738         idm_task_t              *idt;
1739         idm_buf_t               *idb;
1740         size_t                  offset;
1741 
1742         ASSERT(ic != NULL);
1743         ASSERT(pdu != NULL);
1744         ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA);
1745 
1746         bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1747         offset = ntohl(bhs->offset);
1748 
1749         /*
1750          * Look up the task corresponding to the initiator task tag
1751          * to get the buffers affiliated with the task.
1752          */
1753         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1754         if (idt == NULL) {
1755                 IDM_CONN_LOG(CE_WARN,
1756                     "idm_so_rx_dataout: failed to find task");
1757                 idm_pdu_rx_protocol_error(ic, pdu);
1758                 return;
1759         }
1760 
1761         idb = pdu->isp_sorx_buf;
1762         if (idb == NULL) {
1763                 IDM_CONN_LOG(CE_WARN,
1764                     "idm_so_rx_dataout: failed to find buffer");
1765                 idm_task_rele(idt);
1766                 idm_pdu_rx_protocol_error(ic, pdu);
1767                 return;
1768         }
1769 
1770         /* Keep track of data transferred - check data offsets */
1771         if (offset != idb->idb_exp_offset) {
1772                 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1773                     "%ld, %d", offset, idb->idb_exp_offset);
1774                 idm_task_rele(idt);
1775                 idm_pdu_rx_protocol_error(ic, pdu);
1776                 return;
1777         }
1778         /* Expected next relative offset */
1779         idb->idb_exp_offset += ntoh24(bhs->dlength);
1780         idt->idt_rx_bytes += n2h24(bhs->dlength);
1781 
1782         /*
1783          * Call the buffer callback when the transfer is complete
1784          *
1785          * The connection state machine should only abort tasks after
1786          * shutting down the connection so we are assured that there
1787          * won't be a simultaneous attempt to abort this task at the
1788          * same time as we are processing this PDU (due to a connection
1789          * state change).
1790          */
1791         if (bhs->flags & ISCSI_FLAG_FINAL) {
1792                 /*
1793                  * We have gotten the last data-message for the current
1794                  * transfer.  idb_xfer_len represents the data that the
1795                  * command intended to transfer, it does not represent the
1796                  * actual number of bytes transferred. If we have not
1797                  * transferred the expected number of bytes something is
1798                  * wrong.
1799                  *
1800                  * We have two options, when there is a mismatch, we can
1801                  * regard the transfer as invalid -- or we can modify our
1802                  * notion of "xfer_len." In order to be as stringent as
1803                  * possible, here we regard this transfer as in error; and
1804                  * bail out.
1805                  */
1806                 if (idb->idb_buflen == idb->idb_xfer_len &&
1807                     idb->idb_buflen !=
1808                     (idb->idb_exp_offset - idb->idb_bufoffset)) {
1809                         printf("idm_so_rx_dataout: incomplete transfer, "
1810                             "protocol err");
1811                         IDM_CONN_LOG(CE_NOTE,
1812                             "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1813                             offset, (int)(idb->idb_exp_offset - offset));
1814                         idm_task_rele(idt);
1815                         idm_pdu_rx_protocol_error(ic, pdu);
1816                         return;
1817                 }
1818                 /*
1819                  * We only want to call idm_buf_rx_from_ini_done once
1820                  * per transfer.  It's possible that this task has
1821                  * already been aborted in which case
1822                  * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1823                  * for each buffer with idb_in_transport==B_TRUE.  To
1824                  * close this window and ensure that this doesn't happen,
1825                  * we'll clear idb->idb_in_transport now while holding
1826                  * the task mutex.   This is only really an issue for
1827                  * SCSI task abort -- if tasks were being aborted because
1828                  * of a connection state change the state machine would
1829                  * have already stopped the receive thread.
1830                  */
1831                 mutex_enter(&idt->idt_mutex);
1832 
1833                 /*
1834                  * Release the task hold here (obtained in idm_task_find)
1835                  * because the task may complete synchronously during
1836                  * idm_buf_rx_from_ini_done.  Since we still have an active
1837                  * buffer we know there is at least one additional hold on idt.
1838                  */
1839                 idm_task_rele(idt);
1840 
1841                 /*
1842                  * idm_buf_rx_from_ini_done releases idt->idt_mutex
1843                  */
1844                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1845                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1846                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
1847                     uint32_t, idb->idb_xfer_len,
1848                     int, XFER_BUF_RX_FROM_INI);
1849                 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1850                 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1851                 return;
1852         }
1853 
1854         idm_task_rele(idt);
1855         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1856 }
1857 
1858 /*
1859  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1860  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1861  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1862  * and looks up the task in the task tree using the itt to get the output
1863  * buffers associated the task. The R2T PDU contains the offset of the
1864  * requested data and the data length. This function then constructs a
1865  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1866  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1867  */
1868 
1869 static void
1870 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1871 {
1872         idm_task_t              *idt;
1873         idm_buf_t               *idb;
1874         iscsi_rtt_hdr_t         *rtt_hdr;
1875         uint32_t                data_offset;
1876         uint32_t                data_length;
1877 
1878         ASSERT(ic != NULL);
1879         ASSERT(pdu != NULL);
1880 
1881         rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1882         data_offset = ntohl(rtt_hdr->data_offset);
1883         data_length = ntohl(rtt_hdr->data_length);
1884         idt     = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1885 
1886         if (idt == NULL) {
1887                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1888                 idm_pdu_rx_protocol_error(ic, pdu);
1889                 return;
1890         }
1891 
1892         /* Find the buffer bound to the task by the iSCSI initiator */
1893         mutex_enter(&idt->idt_mutex);
1894         idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1895         if (idb == NULL) {
1896                 mutex_exit(&idt->idt_mutex);
1897                 idm_task_rele(idt);
1898                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1899                 idm_pdu_rx_protocol_error(ic, pdu);
1900                 return;
1901         }
1902 
1903         /* return buffer contains this data */
1904         if (data_offset + data_length > idb->idb_buflen) {
1905                 /* Overflow */
1906                 mutex_exit(&idt->idt_mutex);
1907                 idm_task_rele(idt);
1908                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1909                     "buffer");
1910                 idm_pdu_rx_protocol_error(ic, pdu);
1911                 return;
1912         }
1913 
1914         idt->idt_r2t_ttt = rtt_hdr->ttt;
1915         idt->idt_exp_datasn = 0;
1916 
1917         idm_so_send_rtt_data(ic, idt, idb, data_offset,
1918             ntohl(rtt_hdr->data_length));
1919         /*
1920          * the idt_mutex is released in idm_so_send_rtt_data
1921          */
1922 
1923         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1924         idm_task_rele(idt);
1925 
1926 }
1927 
1928 idm_status_t
1929 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1930 {
1931         uint8_t         pad[ISCSI_PAD_WORD_LEN];
1932         int             pad_len;
1933         uint32_t        data_digest_crc;
1934         uint32_t        crc_calculated;
1935         int             total_len;
1936         idm_so_conn_t   *so_conn;
1937 
1938         so_conn = ic->ic_transport_private;
1939 
1940         pad_len = ((ISCSI_PAD_WORD_LEN -
1941             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1942             (ISCSI_PAD_WORD_LEN - 1));
1943 
1944         ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1945 
1946         total_len = pdu->isp_datalen;
1947 
1948         if (pad_len) {
1949                 pdu->isp_iov[pdu->isp_iovlen].iov_base    = (char *)&pad;
1950                 pdu->isp_iov[pdu->isp_iovlen].iov_len     = pad_len;
1951                 total_len               += pad_len;
1952                 pdu->isp_iovlen++;
1953         }
1954 
1955         /* setup data digest */
1956         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1957                 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1958                     (char *)&data_digest_crc;
1959                 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1960                     sizeof (data_digest_crc);
1961                 total_len               += sizeof (data_digest_crc);
1962                 pdu->isp_iovlen++;
1963         }
1964 
1965         pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1966 
1967         if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1968             pdu->isp_iovlen, total_len) != 0) {
1969                 return (IDM_STATUS_IO);
1970         }
1971 
1972         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1973                 crc_calculated = idm_crc32c(pdu->isp_data,
1974                     pdu->isp_datalen);
1975                 if (pad_len) {
1976                         crc_calculated = idm_crc32c_continued((char *)&pad,
1977                             pad_len, crc_calculated);
1978                 }
1979                 if (crc_calculated != data_digest_crc) {
1980                         IDM_CONN_LOG(CE_WARN,
1981                             "idm_sorecvdata: "
1982                             "CRC error: actual 0x%x, calc 0x%x",
1983                             data_digest_crc, crc_calculated);
1984 
1985                         /* Invalid Data Digest */
1986                         return (IDM_STATUS_DATA_DIGEST);
1987                 }
1988         }
1989 
1990         return (IDM_STATUS_SUCCESS);
1991 }
1992 
1993 /*
1994  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1995  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1996  * calling this function.
1997  */
1998 idm_status_t
1999 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2000 {
2001         iscsi_data_hdr_t        *bhs;
2002         idm_task_t              *task;
2003         uint32_t                offset;
2004         uint8_t                 opcode;
2005         uint32_t                dlength;
2006         list_t                  *buflst;
2007         uint32_t                xfer_bytes;
2008         idm_status_t            status;
2009 
2010         ASSERT(ic != NULL);
2011         ASSERT(pdu != NULL);
2012 
2013         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
2014 
2015         offset  = ntohl(bhs->offset);
2016         opcode  = IDM_PDU_OPCODE(pdu);
2017         dlength = n2h24(bhs->dlength);
2018 
2019         ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
2020             (opcode == ISCSI_OP_SCSI_DATA));
2021 
2022         /*
2023          * Successful lookup implicitly gets a "hold" on the task.  This
2024          * hold must be released before leaving this function.  At one
2025          * point we were caching this task context and retaining the hold
2026          * but it turned out to be very difficult to release the hold properly.
2027          * The task can be aborted and the connection shutdown between this
2028          * call and the subsequent expected call to idm_so_rx_datain/
2029          * idm_so_rx_dataout (in which case those functions are not called).
2030          * Releasing the hold in the PDU callback doesn't work well either
2031          * because the whole task may be completed by then at which point
2032          * it is too late to release the hold -- for better or worse this
2033          * code doesn't wait on the refcnts during normal operation.
2034          * idm_task_find() is very fast and it is not a huge burden if we
2035          * have to do it twice.
2036          */
2037         task = idm_task_find(ic, bhs->itt, bhs->ttt);
2038         if (task == NULL) {
2039                 IDM_CONN_LOG(CE_WARN,
2040                     "idm_sorecv_scsidata: could not find task");
2041                 return (IDM_STATUS_FAIL);
2042         }
2043 
2044         mutex_enter(&task->idt_mutex);
2045         buflst  = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
2046             &task->idt_inbufv : &task->idt_outbufv;
2047         pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
2048         mutex_exit(&task->idt_mutex);
2049 
2050         if (pdu->isp_sorx_buf == NULL) {
2051                 idm_task_rele(task);
2052                 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
2053                     "buffer for offset %x opcode=%x",
2054                     offset, opcode);
2055                 return (IDM_STATUS_FAIL);
2056         }
2057 
2058         xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
2059         ASSERT(xfer_bytes != 0);
2060         if (xfer_bytes != dlength) {
2061                 idm_task_rele(task);
2062                 /*
2063                  * Buffer overflow, connection error.  The PDU data is still
2064                  * sitting in the socket so we can't use the connection
2065                  * again until that data is drained.
2066                  */
2067                 return (IDM_STATUS_FAIL);
2068         }
2069 
2070         status = idm_sorecvdata(ic, pdu);
2071 
2072         idm_task_rele(task);
2073 
2074         return (status);
2075 }
2076 
2077 static uint32_t
2078 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
2079 {
2080         uint32_t        buf_ro = ro - idb->idb_bufoffset;
2081         uint32_t        xfer_len = min(dlength, idb->idb_buflen - buf_ro);
2082 
2083         ASSERT(ro >= idb->idb_bufoffset);
2084 
2085         pdu->isp_iov[pdu->isp_iovlen].iov_base    =
2086             (caddr_t)idb->idb_buf + buf_ro;
2087         pdu->isp_iov[pdu->isp_iovlen].iov_len     = xfer_len;
2088         pdu->isp_iovlen++;
2089 
2090         return (xfer_len);
2091 }
2092 
2093 int
2094 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2095 {
2096         pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
2097         ASSERT(pdu->isp_data != NULL);
2098 
2099         pdu->isp_databuflen = pdu->isp_datalen;
2100         pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
2101         pdu->isp_iov[0].iov_len = pdu->isp_datalen;
2102         pdu->isp_iovlen = 1;
2103         /*
2104          * Since we are associating a new data buffer with this received
2105          * PDU we need to set a specific callback to free the data
2106          * after the PDU is processed.
2107          */
2108         pdu->isp_flags |= IDM_PDU_ADDL_DATA;
2109         pdu->isp_callback = idm_sorx_addl_pdu_cb;
2110 
2111         return (idm_sorecvdata(ic, pdu));
2112 }
2113 
2114 void
2115 idm_sorx_thread(void *arg)
2116 {
2117         boolean_t       conn_failure = B_FALSE;
2118         idm_conn_t      *ic = (idm_conn_t *)arg;
2119         idm_so_conn_t   *so_conn;
2120         idm_pdu_t       *pdu;
2121         idm_status_t    rc;
2122 
2123         idm_conn_hold(ic);
2124 
2125         mutex_enter(&ic->ic_mutex);
2126 
2127         so_conn = ic->ic_transport_private;
2128         so_conn->ic_rx_thread_running = B_TRUE;
2129         so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2130         cv_signal(&ic->ic_cv);
2131 
2132         while (so_conn->ic_rx_thread_running) {
2133                 mutex_exit(&ic->ic_mutex);
2134 
2135                 /*
2136                  * Get PDU with default header size (large enough for
2137                  * BHS plus any anticipated AHS).  PDU from
2138                  * the cache will have all values set correctly
2139                  * for sockets RX including callback.
2140                  */
2141                 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2142                 pdu->isp_ic = ic;
2143                 pdu->isp_flags = 0;
2144                 pdu->isp_transport_hdrlen = 0;
2145 
2146                 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2147                         /*
2148                          * Call idm_pdu_complete so that we call the callback
2149                          * and ensure any memory allocated in idm_sorecvhdr
2150                          * gets freed up.
2151                          */
2152                         idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2153 
2154                         /*
2155                          * If ic_rx_thread_running is still set then
2156                          * this is some kind of connection problem
2157                          * on the socket.  In this case we want to
2158                          * generate an event.  Otherwise some other
2159                          * thread closed the socket due to another
2160                          * issue in which case we don't need to
2161                          * generate an event.
2162                          */
2163                         mutex_enter(&ic->ic_mutex);
2164                         if (so_conn->ic_rx_thread_running) {
2165                                 conn_failure = B_TRUE;
2166                                 so_conn->ic_rx_thread_running = B_FALSE;
2167                         }
2168 
2169                         continue;
2170                 }
2171 
2172                 /*
2173                  * Header has been read and validated.  Now we need
2174                  * to read the PDU data payload (if present).  SCSI data
2175                  * need to be transferred from the socket directly into
2176                  * the associated transfer buffer for the SCSI task.
2177                  */
2178                 if (pdu->isp_datalen != 0) {
2179                         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2180                             (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2181                                 rc = idm_sorecv_scsidata(ic, pdu);
2182                                 /*
2183                                  * All SCSI errors are fatal to the
2184                                  * connection right now since we have no
2185                                  * place to put the data.  What we need
2186                                  * is some kind of sink to dispose of unwanted
2187                                  * SCSI data.  For example an invalid task tag
2188                                  * should not kill the connection (although
2189                                  * we may want to drop the connection).
2190                                  */
2191                         } else {
2192                                 /*
2193                                  * Not data PDUs so allocate a buffer for the
2194                                  * data segment and read the remaining data.
2195                                  */
2196                                 rc = idm_sorecv_nonscsidata(ic, pdu);
2197                         }
2198                         if (rc != 0) {
2199                                 /*
2200                                  * Call idm_pdu_complete so that we call the
2201                                  * callback and ensure any memory allocated
2202                                  * in idm_sorecvhdr gets freed up.
2203                                  */
2204                                 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2205 
2206                                 /*
2207                                  * If ic_rx_thread_running is still set then
2208                                  * this is some kind of connection problem
2209                                  * on the socket.  In this case we want to
2210                                  * generate an event.  Otherwise some other
2211                                  * thread closed the socket due to another
2212                                  * issue in which case we don't need to
2213                                  * generate an event.
2214                                  */
2215                                 mutex_enter(&ic->ic_mutex);
2216                                 if (so_conn->ic_rx_thread_running) {
2217                                         conn_failure = B_TRUE;
2218                                         so_conn->ic_rx_thread_running = B_FALSE;
2219                                 }
2220                                 continue;
2221                         }
2222                 }
2223 
2224                 /*
2225                  * Process RX PDU
2226                  */
2227                 idm_pdu_rx(ic, pdu);
2228 
2229                 mutex_enter(&ic->ic_mutex);
2230         }
2231 
2232         mutex_exit(&ic->ic_mutex);
2233 
2234         /*
2235          * If we dropped out of the RX processing loop because of
2236          * a socket problem or other connection failure (including
2237          * digest errors) then we need to generate a state machine
2238          * event to shut the connection down.
2239          * If the state machine is already in, for example, INIT_ERROR, this
2240          * event will get dropped, and the TX thread will never be notified
2241          * to shut down.  To be safe, we'll just notify it here.
2242          */
2243         if (conn_failure) {
2244                 if (so_conn->ic_tx_thread_running) {
2245                         so_conn->ic_tx_thread_running = B_FALSE;
2246                         mutex_enter(&so_conn->ic_tx_mutex);
2247                         cv_signal(&so_conn->ic_tx_cv);
2248                         mutex_exit(&so_conn->ic_tx_mutex);
2249                 }
2250 
2251                 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2252         }
2253 
2254         idm_conn_rele(ic);
2255 
2256         thread_exit();
2257 }
2258 
2259 /*
2260  * idm_so_tx
2261  *
2262  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2263  * point.  By definition, it is supposed to be fast.  So, simply queue
2264  * the entry and return.  The real work is done by idm_i_so_tx() via
2265  * idm_sotx_thread().
2266  */
2267 
2268 static void
2269 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2270 {
2271         idm_so_conn_t *so_conn = ic->ic_transport_private;
2272 
2273         ASSERT(pdu->isp_ic == ic);
2274         mutex_enter(&so_conn->ic_tx_mutex);
2275 
2276         if (!so_conn->ic_tx_thread_running) {
2277                 mutex_exit(&so_conn->ic_tx_mutex);
2278                 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2279                 return;
2280         }
2281 
2282         list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2283         cv_signal(&so_conn->ic_tx_cv);
2284         mutex_exit(&so_conn->ic_tx_mutex);
2285 }
2286 
2287 static idm_status_t
2288 idm_i_so_tx(idm_pdu_t *pdu)
2289 {
2290         idm_conn_t      *ic = pdu->isp_ic;
2291         idm_status_t    status = IDM_STATUS_SUCCESS;
2292         uint8_t         pad[ISCSI_PAD_WORD_LEN];
2293         int             pad_len;
2294         uint32_t        hdr_digest_crc;
2295         uint32_t        data_digest_crc = 0;
2296         int             total_len = 0;
2297         int             iovlen = 0;
2298         struct iovec    iov[6];
2299         idm_so_conn_t   *so_conn;
2300 
2301         so_conn = ic->ic_transport_private;
2302 
2303         /* Setup BHS */
2304         iov[iovlen].iov_base    = (caddr_t)pdu->isp_hdr;
2305         iov[iovlen].iov_len     = pdu->isp_hdrlen;
2306         total_len               += iov[iovlen].iov_len;
2307         iovlen++;
2308 
2309         /* Setup header digest */
2310         if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2311             (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2312                 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2313 
2314                 iov[iovlen].iov_base    = (caddr_t)&hdr_digest_crc;
2315                 iov[iovlen].iov_len     = sizeof (hdr_digest_crc);
2316                 total_len               += iov[iovlen].iov_len;
2317                 iovlen++;
2318         }
2319 
2320         /* Setup the data */
2321         if (pdu->isp_datalen) {
2322                 idm_task_t              *idt;
2323                 idm_buf_t               *idb;
2324                 iscsi_data_hdr_t        *ihp;
2325                 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2326                 /* Write of immediate data */
2327                 if (ic->ic_ffp &&
2328                     (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD ||
2329                     IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) {
2330                         idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2331                         if (idt) {
2332                                 mutex_enter(&idt->idt_mutex);
2333                                 idb = idm_buf_find(&idt->idt_outbufv, 0);
2334                                 mutex_exit(&idt->idt_mutex);
2335                                 /*
2336                                  * If the initiator call to idm_buf_alloc
2337                                  * failed then we can get to this point
2338                                  * without a bound buffer.  The associated
2339                                  * connection failure will clean things up
2340                                  * later.  It would be nice to come up with
2341                                  * a cleaner way to handle this.  In
2342                                  * particular it seems absurd to look up
2343                                  * the task and the buffer just to update
2344                                  * this counter.
2345                                  */
2346                                 if (idb)
2347                                         idb->idb_xfer_len += pdu->isp_datalen;
2348                                 idm_task_rele(idt);
2349                         }
2350                 }
2351 
2352                 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2353                 iov[iovlen].iov_len  = pdu->isp_datalen;
2354                 total_len += iov[iovlen].iov_len;
2355                 iovlen++;
2356         }
2357 
2358         /* Setup the data pad if necessary */
2359         pad_len = ((ISCSI_PAD_WORD_LEN -
2360             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2361             (ISCSI_PAD_WORD_LEN - 1));
2362 
2363         if (pad_len) {
2364                 bzero(pad, sizeof (pad));
2365                 iov[iovlen].iov_base = (void *)&pad;
2366                 iov[iovlen].iov_len  = pad_len;
2367                 total_len               += iov[iovlen].iov_len;
2368                 iovlen++;
2369         }
2370 
2371         /*
2372          * Setup the data digest if enabled.  Data-digest is not sent
2373          * for login-phase PDUs.
2374          */
2375         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2376             ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2377             (pdu->isp_datalen || pad_len)) {
2378                 /*
2379                  * RFC3720/10.2.3: A zero-length Data Segment also
2380                  * implies a zero-length data digest.
2381                  */
2382                 if (pdu->isp_datalen) {
2383                         data_digest_crc = idm_crc32c(pdu->isp_data,
2384                             pdu->isp_datalen);
2385                 }
2386                 if (pad_len) {
2387                         data_digest_crc = idm_crc32c_continued(&pad,
2388                             pad_len, data_digest_crc);
2389                 }
2390 
2391                 iov[iovlen].iov_base    = (caddr_t)&data_digest_crc;
2392                 iov[iovlen].iov_len     = sizeof (data_digest_crc);
2393                 total_len               += iov[iovlen].iov_len;
2394                 iovlen++;
2395         }
2396 
2397         /* Transmit the PDU */
2398         if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2399             total_len) != 0) {
2400                 /* Set error status */
2401                 IDM_CONN_LOG(CE_WARN,
2402                     "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2403                     "data: %p", (void *) so_conn->ic_so, (void *) ic,
2404                     (void *) pdu->isp_data);
2405                 status = IDM_STATUS_IO;
2406         }
2407 
2408         /*
2409          * Success does not mean that the PDU actually reached the
2410          * remote node since it could get dropped along the way.
2411          */
2412         idm_pdu_complete(pdu, status);
2413 
2414         return (status);
2415 }
2416 
2417 /*
2418  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2419  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2420  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2421  * A target can invoke this function multiple times for a single read command
2422  * (identified by the same ITT) to split the input into several sequences.
2423  *
2424  * DataSN starts with 0 for the first data PDU of an input command and advances
2425  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2426  * which is set to 1 for the last data PDU of a sequence.
2427  * If the initiator supports phase collapse, the status bit must be set along
2428  * with the F bit to indicate that the status is shipped together with the last
2429  * Data-In PDU.
2430  *
2431  * The data PDUs within a sequence will be sent in order with the buffer offset
2432  * in increasing order. i.e. initiator and target must have negotiated the
2433  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2434  *
2435  * Caller holds idt->idt_mutex
2436  */
2437 static idm_status_t
2438 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2439 {
2440         idm_so_conn_t   *so_conn = idb->idb_ic->ic_transport_private;
2441         idm_pdu_t       tmppdu;
2442 
2443         ASSERT(mutex_owned(&idt->idt_mutex));
2444 
2445         /*
2446          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2447          * idm_sotx_thread.
2448          */
2449         mutex_enter(&so_conn->ic_tx_mutex);
2450 
2451         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2452             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2453             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2454             uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2455 
2456         if (!so_conn->ic_tx_thread_running) {
2457                 mutex_exit(&so_conn->ic_tx_mutex);
2458                 /*
2459                  * Don't release idt->idt_mutex since we're supposed to hold
2460                  * in when calling idm_buf_tx_to_ini_done
2461                  */
2462                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2463                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2464                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2465                     uint32_t, idb->idb_xfer_len,
2466                     int, XFER_BUF_TX_TO_INI);
2467                 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2468                 return (IDM_STATUS_FAIL);
2469         }
2470 
2471         /*
2472          * Build a template for the data PDU headers we will use so that
2473          * the SN values will stay consistent with other PDU's we are
2474          * transmitting like R2T and SCSI status.
2475          */
2476         bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2477         tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2478         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2479             ISCSI_OP_SCSI_DATA_RSP);
2480         idb->idb_tx_thread = B_TRUE;
2481         list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2482         cv_signal(&so_conn->ic_tx_cv);
2483         mutex_exit(&so_conn->ic_tx_mutex);
2484         mutex_exit(&idt->idt_mutex);
2485 
2486         /*
2487          * Returning success here indicates the transfer was successfully
2488          * dispatched -- it does not mean that the transfer completed
2489          * successfully.
2490          */
2491         return (IDM_STATUS_SUCCESS);
2492 }
2493 
2494 /*
2495  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2496  * data blocks it is ready to receive from the initiator in response to a WRITE
2497  * SCSI command. The target iSCSI layer passes the information about the desired
2498  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2499  * offset and datalen are passed via the 'idb' argument.
2500  *
2501  * Scope for Prototype build:
2502  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2503  * negotiated the "InitialR2T" to "Yes".
2504  *
2505  * Caller holds idt->idt_mutex
2506  */
2507 static idm_status_t
2508 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2509 {
2510         idm_pdu_t               *pdu;
2511         iscsi_rtt_hdr_t         *rtt;
2512 
2513         ASSERT(mutex_owned(&idt->idt_mutex));
2514 
2515         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2516             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2517             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2518             uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2519 
2520         pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2521         pdu->isp_ic = idt->idt_ic;
2522         pdu->isp_flags = IDM_PDU_SET_STATSN;
2523         bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2524 
2525         /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2526         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2527 
2528         /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2529         rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2530 
2531         rtt->opcode          = ISCSI_OP_RTT_RSP;
2532         rtt->flags           = ISCSI_FLAG_FINAL;
2533         rtt->data_offset     = htonl(idb->idb_bufoffset);
2534         rtt->data_length     = htonl(idb->idb_xfer_len);
2535         rtt->rttsn           = htonl(idt->idt_exp_rttsn++);
2536 
2537         /* Keep track of buffer offsets */
2538         idb->idb_exp_offset  = idb->idb_bufoffset;
2539         mutex_exit(&idt->idt_mutex);
2540 
2541         /*
2542          * Transmit the PDU.
2543          */
2544         idm_pdu_tx(pdu);
2545 
2546         return (IDM_STATUS_SUCCESS);
2547 }
2548 
2549 static idm_status_t
2550 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2551 {
2552         if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2553                 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2554                     KM_NOSLEEP);
2555                 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2556         } else {
2557                 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2558                 idb->idb_buf_private = NULL;
2559         }
2560 
2561         if (idb->idb_buf == NULL) {
2562                 IDM_CONN_LOG(CE_NOTE,
2563                     "idm_so_buf_alloc: failed buffer allocation");
2564                 return (IDM_STATUS_FAIL);
2565         }
2566 
2567         return (IDM_STATUS_SUCCESS);
2568 }
2569 
2570 /* ARGSUSED */
2571 static idm_status_t
2572 idm_so_buf_setup(idm_buf_t *idb)
2573 {
2574         /* Ensure bufalloc'd flag is unset */
2575         idb->idb_bufalloc = B_FALSE;
2576 
2577         return (IDM_STATUS_SUCCESS);
2578 }
2579 
2580 /* ARGSUSED */
2581 static void
2582 idm_so_buf_teardown(idm_buf_t *idb)
2583 {
2584         /* nothing to do here */
2585 }
2586 
2587 static void
2588 idm_so_buf_free(idm_buf_t *idb)
2589 {
2590         if (idb->idb_buf_private == NULL) {
2591                 kmem_free(idb->idb_buf, idb->idb_buflen);
2592         } else {
2593                 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2594         }
2595 }
2596 
2597 static void
2598 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2599     uint32_t offset, uint32_t length)
2600 {
2601         idm_so_conn_t   *so_conn = ic->ic_transport_private;
2602         idm_pdu_t       tmppdu;
2603         idm_buf_t       *rtt_buf;
2604 
2605         ASSERT(mutex_owned(&idt->idt_mutex));
2606 
2607         /*
2608          * Allocate a buffer to represent the RTT transfer.  We could further
2609          * optimize this by allocating the buffers internally from an rtt
2610          * specific buffer cache since this is socket-specific code but for
2611          * now we will keep it simple.
2612          */
2613         rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2614         if (rtt_buf == NULL) {
2615                 /*
2616                  * If we're in FFP then the failure was likely a resource
2617                  * allocation issue and we should close the connection by
2618                  * sending a CE_TRANSPORT_FAIL event.
2619                  *
2620                  * If we're not in FFP then idm_buf_alloc will always
2621                  * fail and the state is transitioning to "complete" anyway
2622                  * so we won't bother to send an event.
2623                  */
2624                 mutex_enter(&ic->ic_state_mutex);
2625                 if (ic->ic_ffp)
2626                         idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2627                             NULL, CT_NONE);
2628                 mutex_exit(&ic->ic_state_mutex);
2629                 mutex_exit(&idt->idt_mutex);
2630                 return;
2631         }
2632 
2633         rtt_buf->idb_buf_cb = NULL;
2634         rtt_buf->idb_cb_arg = NULL;
2635         rtt_buf->idb_bufoffset = offset;
2636         rtt_buf->idb_xfer_len = length;
2637         rtt_buf->idb_ic = idt->idt_ic;
2638         rtt_buf->idb_task_binding = idt;
2639 
2640         /*
2641          * The new buffer (if any) represents an additional
2642          * reference on the task
2643          */
2644         idm_task_hold(idt);
2645         mutex_exit(&idt->idt_mutex);
2646 
2647         /*
2648          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2649          * idm_sotx_thread.
2650          */
2651         mutex_enter(&so_conn->ic_tx_mutex);
2652 
2653         if (!so_conn->ic_tx_thread_running) {
2654                 idm_buf_free(rtt_buf);
2655                 mutex_exit(&so_conn->ic_tx_mutex);
2656                 idm_task_rele(idt);
2657                 return;
2658         }
2659 
2660         /*
2661          * Build a template for the data PDU headers we will use so that
2662          * the SN values will stay consistent with other PDU's we are
2663          * transmitting like R2T and SCSI status.
2664          */
2665         bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2666         tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2667         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2668             ISCSI_OP_SCSI_DATA);
2669         rtt_buf->idb_tx_thread = B_TRUE;
2670         rtt_buf->idb_in_transport = B_TRUE;
2671         list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2672         cv_signal(&so_conn->ic_tx_cv);
2673         mutex_exit(&so_conn->ic_tx_mutex);
2674 }
2675 
2676 static void
2677 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2678 {
2679         /*
2680          * Don't worry about status -- we assume any error handling
2681          * is performed by the caller (idm_sotx_thread).
2682          */
2683         idb->idb_in_transport = B_FALSE;
2684         idm_task_rele(idt);
2685         idm_buf_free(idb);
2686 }
2687 
2688 static idm_status_t
2689 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2690     uint32_t buf_region_offset, uint32_t buf_region_length)
2691 {
2692         idm_conn_t              *ic;
2693         uint32_t                max_dataseglen;
2694         size_t                  remainder, chunk;
2695         uint32_t                data_offset = buf_region_offset;
2696         iscsi_data_hdr_t        *bhs;
2697         idm_pdu_t               *pdu;
2698         idm_status_t            tx_status;
2699 
2700         ASSERT(mutex_owned(&idt->idt_mutex));
2701 
2702         ic = idt->idt_ic;
2703 
2704         max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2705         remainder = buf_region_length;
2706 
2707         while (remainder) {
2708                 if (idt->idt_state != TASK_ACTIVE) {
2709                         ASSERT((idt->idt_state != TASK_IDLE) &&
2710                             (idt->idt_state != TASK_COMPLETE));
2711                         return (IDM_STATUS_ABORTED);
2712                 }
2713 
2714                 /* check to see if we need to chunk the data */
2715                 if (remainder > max_dataseglen) {
2716                         chunk = max_dataseglen;
2717                 } else {
2718                         chunk = remainder;
2719                 }
2720 
2721                 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2722                 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2723                 pdu->isp_ic = ic;
2724                 pdu->isp_flags = 0;  /* initialize isp_flags */
2725 
2726                 /*
2727                  * We've already built a build a header template
2728                  * to use during the transfer.  Use this template so that
2729                  * the SN values stay consistent with any unrelated PDU's
2730                  * being transmitted.
2731                  */
2732                 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2733                     sizeof (iscsi_hdr_t));
2734 
2735                 /*
2736                  * Set DataSN, data offset, and flags in BHS
2737                  * For the prototype build, A = 0, S = 0, U = 0
2738                  */
2739                 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2740 
2741                 bhs->datasn          = htonl(idt->idt_exp_datasn++);
2742 
2743                 hton24(bhs->dlength, chunk);
2744                 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2745 
2746                 /* setup data */
2747                 pdu->isp_data        =  (uint8_t *)idb->idb_buf + data_offset;
2748                 pdu->isp_datalen = (uint_t)chunk;
2749 
2750                 if (chunk == remainder) {
2751                         bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2752                         /* Piggyback the status with the last data PDU */
2753                         if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2754                                 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2755                                     IDM_PDU_ADVANCE_STATSN;
2756                                 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2757                                     (idt, pdu);
2758                                 idt->idt_flags |=
2759                                     IDM_TASK_PHASECOLLAPSE_SUCCESS;
2760 
2761                         }
2762                 }
2763 
2764                 remainder       -= chunk;
2765                 data_offset     += chunk;
2766 
2767                 /* Instrument the data-send DTrace probe. */
2768                 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2769                         DTRACE_ISCSI_2(data__send,
2770                             idm_conn_t *, idt->idt_ic,
2771                             iscsi_data_rsp_hdr_t *,
2772                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2773                 }
2774 
2775                 /*
2776                  * Now that we're done working with idt_exp_datasn,
2777                  * idt->idt_state and idb->idb_bufoffset we can release
2778                  * the task lock -- don't want to hold it across the
2779                  * call to idm_i_so_tx since we could block.
2780                  */
2781                 mutex_exit(&idt->idt_mutex);
2782 
2783                 /*
2784                  * Transmit the PDU.  Call the internal routine directly
2785                  * as there is already implicit ordering.
2786                  */
2787                 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2788                         mutex_enter(&idt->idt_mutex);
2789                         return (tx_status);
2790                 }
2791 
2792                 mutex_enter(&idt->idt_mutex);
2793                 idt->idt_tx_bytes += chunk;
2794         }
2795 
2796         return (IDM_STATUS_SUCCESS);
2797 }
2798 
2799 /*
2800  * TX PDU cache
2801  */
2802 /* ARGSUSED */
2803 int
2804 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2805 {
2806         idm_pdu_t       *pdu = hdl;
2807 
2808         bzero(pdu, sizeof (idm_pdu_t));
2809         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2810         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2811         pdu->isp_callback = idm_sotx_cache_pdu_cb;
2812         pdu->isp_magic = IDM_PDU_MAGIC;
2813         bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2814 
2815         return (0);
2816 }
2817 
2818 /* ARGSUSED */
2819 void
2820 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2821 {
2822         /* reset values between use */
2823         pdu->isp_datalen = 0;
2824 
2825         kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2826 }
2827 
2828 /*
2829  * RX PDU cache
2830  */
2831 /* ARGSUSED */
2832 int
2833 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2834 {
2835         idm_pdu_t       *pdu = hdl;
2836 
2837         bzero(pdu, sizeof (idm_pdu_t));
2838         pdu->isp_magic = IDM_PDU_MAGIC;
2839         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2840         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2841 
2842         return (0);
2843 }
2844 
2845 /* ARGSUSED */
2846 static void
2847 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2848 {
2849         pdu->isp_iovlen = 0;
2850         pdu->isp_sorx_buf = 0;
2851         kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2852 }
2853 
2854 static void
2855 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2856 {
2857         /*
2858          * We had to modify our cached RX PDU with a longer header buffer
2859          * and/or a longer data buffer.  Release the new buffers and fix
2860          * the fields back to what we would expect for a cached RX PDU.
2861          */
2862         if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2863                 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2864         }
2865         if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2866                 kmem_free(pdu->isp_data, pdu->isp_datalen);
2867         }
2868         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2869         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2870         pdu->isp_data = NULL;
2871         pdu->isp_datalen = 0;
2872         pdu->isp_sorx_buf = 0;
2873         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2874         idm_sorx_cache_pdu_cb(pdu, status);
2875 }
2876 
2877 /*
2878  * This thread is only active when I/O is queued for transmit
2879  * because the socket is busy.
2880  */
2881 void
2882 idm_sotx_thread(void *arg)
2883 {
2884         idm_conn_t      *ic = arg;
2885         idm_tx_obj_t    *object, *next;
2886         idm_so_conn_t   *so_conn;
2887         idm_status_t    status = IDM_STATUS_SUCCESS;
2888 
2889         idm_conn_hold(ic);
2890 
2891         mutex_enter(&ic->ic_mutex);
2892         so_conn = ic->ic_transport_private;
2893         so_conn->ic_tx_thread_running = B_TRUE;
2894         so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2895         cv_signal(&ic->ic_cv);
2896         mutex_exit(&ic->ic_mutex);
2897 
2898         mutex_enter(&so_conn->ic_tx_mutex);
2899 
2900         while (so_conn->ic_tx_thread_running) {
2901                 while (list_is_empty(&so_conn->ic_tx_list)) {
2902                         DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2903                         cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2904                         DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2905 
2906                         if (!so_conn->ic_tx_thread_running) {
2907                                 goto tx_bail;
2908                         }
2909                 }
2910 
2911                 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2912                 list_remove(&so_conn->ic_tx_list, object);
2913                 mutex_exit(&so_conn->ic_tx_mutex);
2914 
2915                 switch (object->idm_tx_obj_magic) {
2916                 case IDM_PDU_MAGIC: {
2917                         idm_pdu_t *pdu = (idm_pdu_t *)object;
2918                         DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2919                             idm_pdu_t *, (idm_pdu_t *)object);
2920 
2921                         if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2922                                 /* No IDM task */
2923                                 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2924                         }
2925                         status = idm_i_so_tx((idm_pdu_t *)object);
2926                         break;
2927                 }
2928                 case IDM_BUF_MAGIC: {
2929                         idm_buf_t *idb = (idm_buf_t *)object;
2930                         idm_task_t *idt = idb->idb_task_binding;
2931 
2932                         DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2933                             idm_buf_t *, idb);
2934 
2935                         mutex_enter(&idt->idt_mutex);
2936                         status = idm_so_send_buf_region(idt,
2937                             idb, 0, idb->idb_xfer_len);
2938 
2939                         /*
2940                          * TX thread owns the buffer so we expect it to
2941                          * be "in transport"
2942                          */
2943                         ASSERT(idb->idb_in_transport);
2944                         if (IDM_CONN_ISTGT(ic)) {
2945                                 /*
2946                                  * idm_buf_tx_to_ini_done releases
2947                                  * idt->idt_mutex
2948                                  */
2949                                 DTRACE_ISCSI_8(xfer__done,
2950                                     idm_conn_t *, idt->idt_ic,
2951                                     uintptr_t, idb->idb_buf,
2952                                     uint32_t, idb->idb_bufoffset,
2953                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2954                                     uint32_t, idb->idb_xfer_len,
2955                                     int, XFER_BUF_TX_TO_INI);
2956                                 idm_buf_tx_to_ini_done(idt, idb, status);
2957                         } else {
2958                                 idm_so_send_rtt_data_done(idt, idb);
2959                                 mutex_exit(&idt->idt_mutex);
2960                         }
2961                         break;
2962                 }
2963 
2964                 default:
2965                         IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2966                             "(0x%08x)", object->idm_tx_obj_magic);
2967                         status = IDM_STATUS_FAIL;
2968                 }
2969 
2970                 mutex_enter(&so_conn->ic_tx_mutex);
2971 
2972                 if (status != IDM_STATUS_SUCCESS) {
2973                         so_conn->ic_tx_thread_running = B_FALSE;
2974                         idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2975                 }
2976         }
2977 
2978         /*
2979          * Before we leave, we need to abort every item remaining in the
2980          * TX list.
2981          */
2982 
2983 tx_bail:
2984         object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2985 
2986         while (object != NULL) {
2987                 next = list_next(&so_conn->ic_tx_list, object);
2988 
2989                 list_remove(&so_conn->ic_tx_list, object);
2990                 switch (object->idm_tx_obj_magic) {
2991                 case IDM_PDU_MAGIC:
2992                         idm_pdu_complete((idm_pdu_t *)object,
2993                             IDM_STATUS_ABORTED);
2994                         break;
2995 
2996                 case IDM_BUF_MAGIC: {
2997                         idm_buf_t *idb = (idm_buf_t *)object;
2998                         idm_task_t *idt = idb->idb_task_binding;
2999                         mutex_exit(&so_conn->ic_tx_mutex);
3000                         mutex_enter(&idt->idt_mutex);
3001                         /*
3002                          * TX thread owns the buffer so we expect it to
3003                          * be "in transport"
3004                          */
3005                         ASSERT(idb->idb_in_transport);
3006                         if (IDM_CONN_ISTGT(ic)) {
3007                                 /*
3008                                  * idm_buf_tx_to_ini_done releases
3009                                  * idt->idt_mutex
3010                                  */
3011                                 DTRACE_ISCSI_8(xfer__done,
3012                                     idm_conn_t *, idt->idt_ic,
3013                                     uintptr_t, idb->idb_buf,
3014                                     uint32_t, idb->idb_bufoffset,
3015                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
3016                                     uint32_t, idb->idb_xfer_len,
3017                                     int, XFER_BUF_TX_TO_INI);
3018                                 idm_buf_tx_to_ini_done(idt, idb,
3019                                     IDM_STATUS_ABORTED);
3020                         } else {
3021                                 idm_so_send_rtt_data_done(idt, idb);
3022                                 mutex_exit(&idt->idt_mutex);
3023                         }
3024                         mutex_enter(&so_conn->ic_tx_mutex);
3025                         break;
3026                 }
3027                 default:
3028                         IDM_CONN_LOG(CE_WARN,
3029                             "idm_sotx_thread: Unexpected magic "
3030                             "(0x%08x)", object->idm_tx_obj_magic);
3031                 }
3032 
3033                 object = next;
3034         }
3035 
3036         mutex_exit(&so_conn->ic_tx_mutex);
3037         idm_conn_rele(ic);
3038         thread_exit();
3039         /*NOTREACHED*/
3040 }
3041 
3042 static void
3043 idm_so_socket_set_nonblock(struct sonode *node)
3044 {
3045         (void) VOP_SETFL(node->so_vnode, node->so_flag,
3046             (node->so_state | FNONBLOCK), CRED(), NULL);
3047 }
3048 
3049 static void
3050 idm_so_socket_set_block(struct sonode *node)
3051 {
3052         (void) VOP_SETFL(node->so_vnode, node->so_flag,
3053             (node->so_state & (~FNONBLOCK)), CRED(), NULL);
3054 }
3055 
3056 
3057 /*
3058  * Called by kernel sockets when the connection has been accepted or
3059  * rejected. In early volo, a "disconnect" callback was sent instead of
3060  * "connectfailed", so we check for both.
3061  */
3062 /* ARGSUSED */
3063 void
3064 idm_so_timed_socket_connect_cb(ksocket_t ks,
3065     ksocket_callback_event_t ev, void *arg, uintptr_t info)
3066 {
3067         idm_so_timed_socket_t   *itp = arg;
3068         ASSERT(itp != NULL);
3069         ASSERT(ev == KSOCKET_EV_CONNECTED ||
3070             ev == KSOCKET_EV_CONNECTFAILED ||
3071             ev == KSOCKET_EV_DISCONNECTED);
3072 
3073         mutex_enter(&idm_so_timed_socket_mutex);
3074         itp->it_callback_called = B_TRUE;
3075         if (ev == KSOCKET_EV_CONNECTED) {
3076                 itp->it_socket_error_code = 0;
3077         } else {
3078                 /* Make sure the error code is non-zero on error */
3079                 if (info == 0)
3080                         info = ECONNRESET;
3081                 itp->it_socket_error_code = (int)info;
3082         }
3083         cv_signal(&itp->it_cv);
3084         mutex_exit(&idm_so_timed_socket_mutex);
3085 }
3086 
3087 int
3088 idm_so_timed_socket_connect(ksocket_t ks,
3089     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
3090 {
3091         clock_t                 conn_login_max;
3092         int                     rc, nonblocking, rval;
3093         idm_so_timed_socket_t   it;
3094         ksocket_callbacks_t     ks_cb;
3095 
3096         conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
3097 
3098         /*
3099          * Set to non-block socket mode, with callback on connect
3100          * Early volo used "disconnected" instead of "connectfailed",
3101          * so set callback to look for both.
3102          */
3103         bzero(&it, sizeof (it));
3104         ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
3105             KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
3106         ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
3107         ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
3108         ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
3109         cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
3110         rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
3111         if (rc != 0)
3112                 return (rc);
3113 
3114         /* Set to non-blocking mode */
3115         nonblocking = 1;
3116         rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3117             CRED());
3118         if (rc != 0)
3119                 goto cleanup;
3120 
3121         bzero(&it, sizeof (it));
3122         for (;;) {
3123                 /*
3124                  * Warning -- in a loopback scenario, the call to
3125                  * the connect_cb can occur inside the call to
3126                  * ksocket_connect. Do not hold the mutex around the
3127                  * call to ksocket_connect.
3128                  */
3129                 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3130                 if (rc == 0 || rc == EISCONN) {
3131                         /* socket success or already success */
3132                         rc = 0;
3133                         break;
3134                 }
3135                 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3136                         break;
3137                 }
3138 
3139                 /* TCP connect still in progress. See if out of time. */
3140                 if (ddi_get_lbolt() > conn_login_max) {
3141                         /*
3142                          * Connection retry timeout,
3143                          * failed connect to target.
3144                          */
3145                         rc = ETIMEDOUT;
3146                         break;
3147                 }
3148 
3149                 /*
3150                  * TCP connect still in progress.  Sleep until callback.
3151                  * Do NOT go to sleep if the callback already occurred!
3152                  */
3153                 mutex_enter(&idm_so_timed_socket_mutex);
3154                 if (!it.it_callback_called) {
3155                         (void) cv_timedwait(&it.it_cv,
3156                             &idm_so_timed_socket_mutex, conn_login_max);
3157                 }
3158                 if (it.it_callback_called) {
3159                         rc = it.it_socket_error_code;
3160                         mutex_exit(&idm_so_timed_socket_mutex);
3161                         break;
3162                 }
3163                 /* If timer expires, go call ksocket_connect one last time. */
3164                 mutex_exit(&idm_so_timed_socket_mutex);
3165         }
3166 
3167         /* resume blocking mode */
3168         nonblocking = 0;
3169         (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3170             CRED());
3171 cleanup:
3172         (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3173         cv_destroy(&it.it_cv);
3174         if (rc != 0) {
3175                 idm_soshutdown(ks);
3176         }
3177         return (rc);
3178 }
3179 
3180 
3181 void
3182 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3183 {
3184         int                     dp_addr_size;
3185         struct sockaddr_in      *sin;
3186         struct sockaddr_in6     *sin6;
3187 
3188         /* Build sockaddr_storage for this portal (idm_addr_t) */
3189         bzero(sa, sizeof (*sa));
3190         dp_addr_size = dportal->a_addr.i_insize;
3191         if (dp_addr_size == sizeof (struct in_addr)) {
3192                 /* IPv4 */
3193                 sa->ss_family = AF_INET;
3194                 sin = (struct sockaddr_in *)sa;
3195                 sin->sin_port = htons(dportal->a_port);
3196                 bcopy(&dportal->a_addr.i_addr.in4,
3197                     &sin->sin_addr, sizeof (struct in_addr));
3198         } else if (dp_addr_size == sizeof (struct in6_addr)) {
3199                 /* IPv6 */
3200                 sa->ss_family = AF_INET6;
3201                 sin6 = (struct sockaddr_in6 *)sa;
3202                 sin6->sin6_port = htons(dportal->a_port);
3203                 bcopy(&dportal->a_addr.i_addr.in6,
3204                     &sin6->sin6_addr, sizeof (struct in6_addr));
3205         } else {
3206                 ASSERT(0);
3207         }
3208 }
3209 
3210 
3211 /*
3212  * return a human-readable form of a sockaddr_storage, in the form
3213  * [ip-address]:port.  This is used in calls to logging functions.
3214  * If several calls to idm_sa_ntop are made within the same invocation
3215  * of a logging function, then each one needs its own buf.
3216  */
3217 const char *
3218 idm_sa_ntop(const struct sockaddr_storage *sa,
3219     char *buf, size_t size)
3220 {
3221         static const char bogus_ip[] = "[0].-1";
3222         char tmp[INET6_ADDRSTRLEN];
3223 
3224         switch (sa->ss_family) {
3225         case AF_INET6: {
3226                 const struct sockaddr_in6 *in6 =
3227                     (const struct sockaddr_in6 *) sa;
3228 
3229                 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp,
3230                     sizeof (tmp));
3231                 if (strlen(tmp) + sizeof ("[].65535") > size)
3232                         goto err;
3233                 /* struct sockaddr_storage gets port info from v4 loc */
3234                 (void) snprintf(buf, size, "[%s].%u", tmp,
3235                     ntohs(in6->sin6_port));
3236                 return (buf);
3237         }
3238         case AF_INET: {
3239                 const struct sockaddr_in *in = (const struct sockaddr_in *) sa;
3240 
3241                 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp,
3242                     sizeof (tmp));
3243                 if (strlen(tmp) + sizeof ("[].65535") > size)
3244                                 goto err;
3245                 (void) snprintf(buf, size,  "[%s].%u", tmp,
3246                     ntohs(in->sin_port));
3247                 return (buf);
3248         }
3249         default:
3250                 break;
3251         }
3252 err:
3253         (void) snprintf(buf, size, "%s", bogus_ip);
3254         return (buf);
3255 }