1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/conf.h>
  27 #include <sys/stat.h>
  28 #include <sys/file.h>
  29 #include <sys/ddi.h>
  30 #include <sys/sunddi.h>
  31 #include <sys/modctl.h>
  32 #include <sys/priv.h>
  33 #include <sys/cpuvar.h>
  34 #include <sys/socket.h>
  35 #include <sys/strsubr.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/sdt.h>
  38 #include <netinet/tcp.h>
  39 #include <inet/tcp.h>
  40 #include <sys/socketvar.h>
  41 #include <sys/pathname.h>
  42 #include <sys/fs/snode.h>
  43 #include <sys/fs/dv_node.h>
  44 #include <sys/vnode.h>
  45 #include <netinet/in.h>
  46 #include <net/if.h>
  47 #include <sys/sockio.h>
  48 #include <sys/ksocket.h>
  49 #include <sys/filio.h>            /* FIONBIO */
  50 #include <sys/iscsi_protocol.h>
  51 #include <sys/idm/idm.h>
  52 #include <sys/idm/idm_so.h>
  53 #include <sys/idm/idm_text.h>
  54 
  55 #define IN_PROGRESS_DELAY       1
  56 
  57 /*
  58  * in6addr_any is currently all zeroes, but use the macro in case this
  59  * ever changes.
  60  */
  61 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
  62 
  63 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  64 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  65 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
  66 
  67 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
  68 static void idm_so_conn_destroy_common(idm_conn_t *ic);
  69 static void idm_so_conn_connect_common(idm_conn_t *ic);
  70 
  71 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
  72     boolean_t boot_conn);
  73 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
  74 static void idm_set_tgt_connect_options(ksocket_t so);
  75 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
  76 
  77 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
  78 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
  79     idm_buf_t *idb, uint32_t offset, uint32_t length);
  80 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
  81 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
  82     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
  83 
  84 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
  85     uint32_t ro, uint32_t dlength);
  86 
  87 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
  88     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
  89 
  90 static void idm_so_socket_set_nonblock(struct sonode *node);
  91 static void idm_so_socket_set_block(struct sonode *node);
  92 
  93 /*
  94  * Transport ops prototypes
  95  */
  96 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
  97 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
  98 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
  99 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
 100 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
 101 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
 102 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
 103 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
 104     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
 105 static void idm_so_notice_key_values(idm_conn_t *it,
 106     nvlist_t *negotiated_nvl);
 107 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
 108     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
 109 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
 110     idm_transport_caps_t *caps);
 111 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
 112 static void idm_so_buf_free(idm_buf_t *idb);
 113 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
 114 static void idm_so_buf_teardown(idm_buf_t *idb);
 115 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
 116 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
 117 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
 118 static void idm_so_tgt_svc_offline(idm_svc_t *is);
 119 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
 120 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
 121 static void idm_so_conn_disconnect(idm_conn_t *ic);
 122 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
 123 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
 124 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
 125 
 126 /*
 127  * IDM Native Sockets transport operations
 128  */
 129 static
 130 idm_transport_ops_t idm_so_transport_ops = {
 131         idm_so_tx,                      /* it_tx_pdu */
 132         idm_so_buf_tx_to_ini,           /* it_buf_tx_to_ini */
 133         idm_so_buf_rx_from_ini,         /* it_buf_rx_from_ini */
 134         idm_so_rx_datain,               /* it_rx_datain */
 135         idm_so_rx_rtt,                  /* it_rx_rtt */
 136         idm_so_rx_dataout,              /* it_rx_dataout */
 137         NULL,                           /* it_alloc_conn_rsrc */
 138         NULL,                           /* it_free_conn_rsrc */
 139         NULL,                           /* it_tgt_enable_datamover */
 140         NULL,                           /* it_ini_enable_datamover */
 141         NULL,                           /* it_conn_terminate */
 142         idm_so_free_task_rsrc,          /* it_free_task_rsrc */
 143         idm_so_negotiate_key_values,    /* it_negotiate_key_values */
 144         idm_so_notice_key_values,       /* it_notice_key_values */
 145         idm_so_conn_is_capable,         /* it_conn_is_capable */
 146         idm_so_buf_alloc,               /* it_buf_alloc */
 147         idm_so_buf_free,                /* it_buf_free */
 148         idm_so_buf_setup,               /* it_buf_setup */
 149         idm_so_buf_teardown,            /* it_buf_teardown */
 150         idm_so_tgt_svc_create,          /* it_tgt_svc_create */
 151         idm_so_tgt_svc_destroy,         /* it_tgt_svc_destroy */
 152         idm_so_tgt_svc_online,          /* it_tgt_svc_online */
 153         idm_so_tgt_svc_offline,         /* it_tgt_svc_offline */
 154         idm_so_tgt_conn_destroy,        /* it_tgt_conn_destroy */
 155         idm_so_tgt_conn_connect,        /* it_tgt_conn_connect */
 156         idm_so_conn_disconnect,         /* it_tgt_conn_disconnect */
 157         idm_so_ini_conn_create,         /* it_ini_conn_create */
 158         idm_so_ini_conn_destroy,        /* it_ini_conn_destroy */
 159         idm_so_ini_conn_connect,        /* it_ini_conn_connect */
 160         idm_so_conn_disconnect,         /* it_ini_conn_disconnect */
 161         idm_so_declare_key_values       /* it_declare_key_values */
 162 };
 163 
 164 kmutex_t        idm_so_timed_socket_mutex;
 165 /*
 166  * idm_so_init()
 167  * Sockets transport initialization
 168  */
 169 void
 170 idm_so_init(idm_transport_t *it)
 171 {
 172         /* Cache for IDM Data and R2T Transmit PDU's */
 173         idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
 174             sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
 175             &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 176 
 177         /* Cache for IDM Receive PDU's */
 178         idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
 179             sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
 180             &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
 181 
 182         /* 128k buffer cache */
 183         idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
 184             IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
 185 
 186         /* Set the sockets transport ops */
 187         it->it_ops = &idm_so_transport_ops;
 188 
 189         mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
 190 
 191 }
 192 
 193 /*
 194  * idm_so_fini()
 195  * Sockets transport teardown
 196  */
 197 void
 198 idm_so_fini(void)
 199 {
 200         kmem_cache_destroy(idm.idm_so_128k_buf_cache);
 201         kmem_cache_destroy(idm.idm_sotx_pdu_cache);
 202         kmem_cache_destroy(idm.idm_sorx_pdu_cache);
 203         mutex_destroy(&idm_so_timed_socket_mutex);
 204 }
 205 
 206 ksocket_t
 207 idm_socreate(int domain, int type, int protocol)
 208 {
 209         ksocket_t ks;
 210 
 211         if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
 212             CRED())) {
 213                 return (ks);
 214         } else {
 215                 return (NULL);
 216         }
 217 }
 218 
 219 /*
 220  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
 221  * reception and transmission.  The sonode still exists but its state
 222  * gets modified to indicate it is no longer connected.  Calls to
 223  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
 224  * regain control of a thread stuck in idm_sorecv.
 225  */
 226 void
 227 idm_soshutdown(ksocket_t so)
 228 {
 229         (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
 230 }
 231 
 232 /*
 233  * idm_sodestroy releases all resources associated with a socket previously
 234  * created with idm_socreate.  The socket must be shutdown using
 235  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
 236  * otherwise undefined behavior will result.
 237  */
 238 void
 239 idm_sodestroy(ksocket_t ks)
 240 {
 241         (void) ksocket_close(ks, CRED());
 242 }
 243 
 244 /*
 245  * Function to compare two addresses in sockaddr_storage format
 246  */
 247 
 248 int
 249 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
 250     const struct sockaddr_storage *cmp_ss2,
 251     boolean_t v4_mapped_as_v4,
 252     boolean_t compare_ports)
 253 {
 254         struct sockaddr_storage                 mapped_v4_ss1, mapped_v4_ss2;
 255         const struct sockaddr_storage           *ss1, *ss2;
 256         struct in_addr                          *in1, *in2;
 257         struct in6_addr                         *in61, *in62;
 258         int i;
 259 
 260         /*
 261          * Normalize V4-mapped IPv6 addresses into V4 format if
 262          * v4_mapped_as_v4 is B_TRUE.
 263          */
 264         ss1 = cmp_ss1;
 265         ss2 = cmp_ss2;
 266         if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
 267                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 268                 if (IN6_IS_ADDR_V4MAPPED(in61)) {
 269                         bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
 270                         mapped_v4_ss1.ss_family = AF_INET;
 271                         ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
 272                             ((struct sockaddr_in *)ss1)->sin_port;
 273                         IN6_V4MAPPED_TO_INADDR(in61,
 274                             &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
 275                         ss1 = &mapped_v4_ss1;
 276                 }
 277         }
 278         ss2 = cmp_ss2;
 279         if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
 280                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 281                 if (IN6_IS_ADDR_V4MAPPED(in62)) {
 282                         bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
 283                         mapped_v4_ss2.ss_family = AF_INET;
 284                         ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
 285                             ((struct sockaddr_in *)ss2)->sin_port;
 286                         IN6_V4MAPPED_TO_INADDR(in62,
 287                             &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
 288                         ss2 = &mapped_v4_ss2;
 289                 }
 290         }
 291 
 292         /*
 293          * Compare ports, then address family, then ip address
 294          */
 295         if (compare_ports &&
 296             (((struct sockaddr_in *)ss1)->sin_port !=
 297             ((struct sockaddr_in *)ss2)->sin_port)) {
 298                 if (((struct sockaddr_in *)ss1)->sin_port >
 299                     ((struct sockaddr_in *)ss2)->sin_port)
 300                         return (1);
 301                 else
 302                         return (-1);
 303         }
 304 
 305         /*
 306          * ports are the same
 307          */
 308         if (ss1->ss_family != ss2->ss_family) {
 309                 if (ss1->ss_family == AF_INET)
 310                         return (1);
 311                 else
 312                         return (-1);
 313         }
 314 
 315         /*
 316          * address families are the same
 317          */
 318         if (ss1->ss_family == AF_INET) {
 319                 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
 320                 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
 321 
 322                 if (in1->s_addr > in2->s_addr)
 323                         return (1);
 324                 else if (in1->s_addr < in2->s_addr)
 325                         return (-1);
 326                 else
 327                         return (0);
 328         } else if (ss1->ss_family == AF_INET6) {
 329                 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
 330                 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
 331 
 332                 for (i = 0; i < 4; i++) {
 333                         if (in61->s6_addr32[i] > in62->s6_addr32[i])
 334                                 return (1);
 335                         else if (in61->s6_addr32[i] < in62->s6_addr32[i])
 336                                 return (-1);
 337                 }
 338                 return (0);
 339         }
 340 
 341         return (1);
 342 }
 343 
 344 /*
 345  * IP address filter functions to flag addresses that should not
 346  * go out to initiators through discovery.
 347  */
 348 static boolean_t
 349 idm_v4_addr_okay(struct in_addr *in_addr)
 350 {
 351         in_addr_t addr = ntohl(in_addr->s_addr);
 352 
 353         if ((INADDR_NONE == addr) ||
 354             (IN_MULTICAST(addr)) ||
 355             ((addr >> IN_CLASSA_NSHIFT) == 0) ||
 356             ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
 357                 return (B_FALSE);
 358         }
 359         return (B_TRUE);
 360 }
 361 
 362 static boolean_t
 363 idm_v6_addr_okay(struct in6_addr *addr6)
 364 {
 365 
 366         if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
 367             (IN6_IS_ADDR_LOOPBACK(addr6)) ||
 368             (IN6_IS_ADDR_MULTICAST(addr6)) ||
 369             (IN6_IS_ADDR_V4MAPPED(addr6)) ||
 370             (IN6_IS_ADDR_V4COMPAT(addr6)) ||
 371             (IN6_IS_ADDR_LINKLOCAL(addr6))) {
 372                 return (B_FALSE);
 373         }
 374         return (B_TRUE);
 375 }
 376 
 377 /*
 378  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
 379  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
 380  */
 381 int
 382 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
 383 {
 384         ksocket_t               so4, so6;
 385         struct lifnum           lifn;
 386         struct lifconf          lifc;
 387         struct lifreq           *lp;
 388         int                     rval;
 389         int                     numifs;
 390         int                     bufsize;
 391         void                    *buf;
 392         int                     i, j, n, rc;
 393         struct sockaddr_storage ss;
 394         struct sockaddr_in      *sin;
 395         struct sockaddr_in6     *sin6;
 396         idm_addr_t              *ip;
 397         idm_addr_list_t         *ipaddr = NULL;
 398         int                     size_ipaddr;
 399 
 400         *ipaddr_p = NULL;
 401         size_ipaddr = 0;
 402         buf = NULL;
 403 
 404         /* create an ipv4 and ipv6 UDP socket */
 405         if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
 406                 return (0);
 407         if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
 408                 idm_sodestroy(so6);
 409                 return (0);
 410         }
 411 
 412 
 413 retry_count:
 414         /* snapshot the current number of interfaces */
 415         lifn.lifn_family = PF_UNSPEC;
 416         lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 417         lifn.lifn_count = 0;
 418         /* use vp6 for ioctls with unspecified families by default */
 419         if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
 420             != 0) {
 421                 goto cleanup;
 422         }
 423 
 424         numifs = lifn.lifn_count;
 425         if (numifs <= 0) {
 426                 goto cleanup;
 427         }
 428 
 429         /* allocate extra room in case more interfaces appear */
 430         numifs += 10;
 431 
 432         /* get the interface names and ip addresses */
 433         bufsize = numifs * sizeof (struct lifreq);
 434         buf = kmem_alloc(bufsize, KM_SLEEP);
 435 
 436         lifc.lifc_family = AF_UNSPEC;
 437         lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
 438         lifc.lifc_len = bufsize;
 439         lifc.lifc_buf = buf;
 440         rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
 441         if (rc != 0) {
 442                 goto cleanup;
 443         }
 444         /* if our extra room is used up, try again */
 445         if (bufsize <= lifc.lifc_len) {
 446                 kmem_free(buf, bufsize);
 447                 buf = NULL;
 448                 goto retry_count;
 449         }
 450         /* calc actual number of ifconfs */
 451         n = lifc.lifc_len / sizeof (struct lifreq);
 452 
 453         /* get ip address */
 454         if (n > 0) {
 455                 size_ipaddr = sizeof (idm_addr_list_t) +
 456                     (n - 1) * sizeof (idm_addr_t);
 457                 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
 458         } else {
 459                 goto cleanup;
 460         }
 461 
 462         /*
 463          * Examine the array of interfaces and filter uninteresting ones
 464          */
 465         for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
 466 
 467                 /*
 468                  * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
 469                  */
 470                 ss = lp->lifr_addr;
 471                 /*
 472                  * fetch the flags using the socket of the correct family
 473                  */
 474                 switch (ss.ss_family) {
 475                 case AF_INET:
 476                         rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
 477                             &rval, CRED());
 478                         break;
 479                 case AF_INET6:
 480                         rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
 481                             &rval, CRED());
 482                         break;
 483                 default:
 484                         continue;
 485                 }
 486                 if (rc == 0) {
 487                         /*
 488                          * If we got the flags, skip uninteresting
 489                          * interfaces based on flags
 490                          */
 491                         if ((lp->lifr_flags & IFF_UP) != IFF_UP)
 492                                 continue;
 493                         if (lp->lifr_flags &
 494                             (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
 495                                 continue;
 496                 }
 497 
 498                 /* save ip address */
 499                 ip = &ipaddr->al_addrs[j];
 500                 switch (ss.ss_family) {
 501                 case AF_INET:
 502                         sin = (struct sockaddr_in *)&ss;
 503                         if (!idm_v4_addr_okay(&sin->sin_addr))
 504                                 continue;
 505                         ip->a_addr.i_addr.in4 = sin->sin_addr;
 506                         ip->a_addr.i_insize = sizeof (struct in_addr);
 507                         break;
 508                 case AF_INET6:
 509                         sin6 = (struct sockaddr_in6 *)&ss;
 510                         if (!idm_v6_addr_okay(&sin6->sin6_addr))
 511                                 continue;
 512                         ip->a_addr.i_addr.in6 = sin6->sin6_addr;
 513                         ip->a_addr.i_insize = sizeof (struct in6_addr);
 514                         break;
 515                 default:
 516                         continue;
 517                 }
 518                 j++;
 519         }
 520 
 521         if (j == 0) {
 522                 /* no valid ifaddr */
 523                 kmem_free(ipaddr, size_ipaddr);
 524                 size_ipaddr = 0;
 525                 ipaddr = NULL;
 526         } else {
 527                 ipaddr->al_out_cnt = j;
 528         }
 529 
 530 
 531 cleanup:
 532         idm_sodestroy(so6);
 533         idm_sodestroy(so4);
 534 
 535         if (buf != NULL)
 536                 kmem_free(buf, bufsize);
 537 
 538         *ipaddr_p = ipaddr;
 539         return (size_ipaddr);
 540 }
 541 
 542 int
 543 idm_sorecv(ksocket_t so, void *msg, size_t len)
 544 {
 545         iovec_t iov;
 546 
 547         ASSERT(so != NULL);
 548         ASSERT(len != 0);
 549 
 550         /*
 551          * Fill in iovec and receive data
 552          */
 553         iov.iov_base = msg;
 554         iov.iov_len = len;
 555 
 556         return (idm_iov_sorecv(so, &iov, 1, len));
 557 }
 558 
 559 /*
 560  * idm_sosendto - Sends a buffered data on a non-connected socket.
 561  *
 562  * This function puts the data provided on the wire by calling sosendmsg.
 563  * It will return only when all the data has been sent or if an error
 564  * occurs.
 565  *
 566  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 567  * -1 if sosendmsg returns success but uio_resid != 0
 568  */
 569 int
 570 idm_sosendto(ksocket_t so, void *buff, size_t len,
 571     struct sockaddr *name, socklen_t namelen)
 572 {
 573         struct msghdr           msg;
 574         struct iovec            iov[1];
 575         int                     error;
 576         size_t                  sent = 0;
 577 
 578         iov[0].iov_base = buff;
 579         iov[0].iov_len  = len;
 580 
 581         /* Initialization of the message header. */
 582         bzero(&msg, sizeof (msg));
 583         msg.msg_iov     = iov;
 584         msg.msg_iovlen  = 1;
 585         msg.msg_name    = name;
 586         msg.msg_namelen = namelen;
 587 
 588         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
 589                 /* Data sent */
 590                 if (sent == len) {
 591                         /* All data sent.  Success. */
 592                         return (0);
 593                 } else {
 594                         /* Not all data was sent.  Failure */
 595                         return (-1);
 596                 }
 597         }
 598 
 599         /* Send failed */
 600         return (error);
 601 }
 602 
 603 /*
 604  * idm_iov_sosend - Sends an iovec on a connection.
 605  *
 606  * This function puts the data provided on the wire by calling sosendmsg.
 607  * It will return only when all the data has been sent or if an error
 608  * occurs.
 609  *
 610  * Returns 0 for success, the socket errno value if sosendmsg fails, and
 611  * -1 if sosendmsg returns success but uio_resid != 0
 612  */
 613 int
 614 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 615 {
 616         struct msghdr           msg;
 617         int                     error;
 618         size_t                  sent = 0;
 619 
 620         ASSERT(iop != NULL);
 621 
 622         /* Initialization of the message header. */
 623         bzero(&msg, sizeof (msg));
 624         msg.msg_iov     = iop;
 625         msg.msg_iovlen  = iovlen;
 626 
 627         if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
 628             == 0) {
 629                 /* Data sent */
 630                 if (sent == total_len) {
 631                         /* All data sent.  Success. */
 632                         return (0);
 633                 } else {
 634                         /* Not all data was sent.  Failure */
 635                         return (-1);
 636                 }
 637         }
 638 
 639         /* Send failed */
 640         return (error);
 641 }
 642 
 643 /*
 644  * idm_iov_sorecv - Receives an iovec from a connection
 645  *
 646  * This function gets the data asked for from the socket.  It will return
 647  * only when all the requested data has been retrieved or if an error
 648  * occurs.
 649  *
 650  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
 651  * -1 if sorecvmsg returns success but uio_resid != 0
 652  */
 653 int
 654 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
 655 {
 656         struct msghdr           msg;
 657         int                     error;
 658         size_t                  recv;
 659         int                     flags;
 660 
 661         ASSERT(iop != NULL);
 662 
 663         /* Initialization of the message header. */
 664         bzero(&msg, sizeof (msg));
 665         msg.msg_iov     = iop;
 666         msg.msg_iovlen  = iovlen;
 667         flags           = MSG_WAITALL;
 668 
 669         if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
 670             == 0) {
 671                 /* Received data */
 672                 if (recv == total_len) {
 673                         /* All requested data received.  Success */
 674                         return (0);
 675                 } else {
 676                         /*
 677                          * Not all data was received.  The connection has
 678                          * probably failed.
 679                          */
 680                         return (-1);
 681                 }
 682         }
 683 
 684         /* Receive failed */
 685         return (error);
 686 }
 687 
 688 static void
 689 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
 690 {
 691         int     conn_abort = 10000;
 692         int     conn_notify = 2000;
 693         int     abort = 30000;
 694 
 695         /* Pre-connect socket options */
 696         (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 697             TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
 698             CRED());
 699         if (boot_conn == B_FALSE) {
 700                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 701                     TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
 702                     CRED());
 703                 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
 704                     TCP_ABORT_THRESHOLD,
 705                     (char *)&abort, sizeof (int), CRED());
 706         }
 707 }
 708 
 709 static void
 710 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
 711 {
 712         int32_t         rcvbuf = IDM_RCVBUF_SIZE;
 713         int32_t         sndbuf = IDM_SNDBUF_SIZE;
 714         const int       on = 1;
 715 
 716         /* Set postconnect options */
 717         (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
 718             (char *)&on, sizeof (int), CRED());
 719         (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
 720             (char *)&rcvbuf, sizeof (int), CRED());
 721         (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
 722             (char *)&sndbuf, sizeof (int), CRED());
 723 }
 724 
 725 static void
 726 idm_set_tgt_connect_options(ksocket_t ks)
 727 {
 728         int32_t         rcvbuf = IDM_RCVBUF_SIZE;
 729         int32_t         sndbuf = IDM_SNDBUF_SIZE;
 730         const int       on = 1;
 731 
 732         /* Set connect options */
 733         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
 734             (char *)&rcvbuf, sizeof (int), CRED());
 735         (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
 736             (char *)&sndbuf, sizeof (int), CRED());
 737         (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
 738             (char *)&on, sizeof (on), CRED());
 739 }
 740 
 741 static uint32_t
 742 n2h24(const uchar_t *ptr)
 743 {
 744         return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
 745 }
 746 
 747 
 748 static idm_status_t
 749 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
 750 {
 751         iscsi_hdr_t     *bhs;
 752         uint32_t        hdr_digest_crc;
 753         uint32_t        crc_calculated;
 754         void            *new_hdr;
 755         int             ahslen = 0;
 756         int             total_len = 0;
 757         int             iovlen = 0;
 758         struct iovec    iov[2];
 759         idm_so_conn_t   *so_conn;
 760         int             rc;
 761 
 762         so_conn = ic->ic_transport_private;
 763 
 764         /*
 765          * Read BHS
 766          */
 767         bhs = pdu->isp_hdr;
 768         rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
 769         if (rc != IDM_STATUS_SUCCESS) {
 770                 return (IDM_STATUS_FAIL);
 771         }
 772 
 773         /*
 774          * Check actual AHS length against the amount available in the buffer
 775          */
 776         pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
 777             (bhs->hlength * sizeof (uint32_t));
 778         pdu->isp_datalen = n2h24(bhs->dlength);
 779         if (ic->ic_conn_type == CONN_TYPE_TGT &&
 780             pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
 781                 IDM_CONN_LOG(CE_WARN,
 782                     "idm_sorecvhdr: exceeded the max data segment length");
 783                 return (IDM_STATUS_FAIL);
 784         }
 785         if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
 786                 /* Allocate a new header segment and change the callback */
 787                 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
 788                 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
 789                 pdu->isp_hdr = new_hdr;
 790                 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
 791 
 792                 /*
 793                  * This callback will restore the expected values after
 794                  * the RX PDU has been processed.
 795                  */
 796                 pdu->isp_callback = idm_sorx_addl_pdu_cb;
 797         }
 798 
 799         /*
 800          * Setup receipt of additional header and header digest (if enabled).
 801          */
 802         if (bhs->hlength > 0) {
 803                 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
 804                 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
 805                 iov[iovlen].iov_len = ahslen;
 806                 total_len += iov[iovlen].iov_len;
 807                 iovlen++;
 808         }
 809 
 810         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 811                 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
 812                 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
 813                 total_len += iov[iovlen].iov_len;
 814                 iovlen++;
 815         }
 816 
 817         if ((iovlen != 0) &&
 818             (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
 819             total_len) != 0)) {
 820                 return (IDM_STATUS_FAIL);
 821         }
 822 
 823         /*
 824          * Validate header digest if enabled
 825          */
 826         if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
 827                 crc_calculated = idm_crc32c(pdu->isp_hdr,
 828                     sizeof (iscsi_hdr_t) + ahslen);
 829                 if (crc_calculated != hdr_digest_crc) {
 830                         /* Invalid Header Digest */
 831                         return (IDM_STATUS_HEADER_DIGEST);
 832                 }
 833         }
 834 
 835         return (0);
 836 }
 837 
 838 /*
 839  * idm_so_ini_conn_create()
 840  * Allocate the sockets transport connection resources.
 841  */
 842 static idm_status_t
 843 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
 844 {
 845         ksocket_t       so;
 846         idm_so_conn_t   *so_conn;
 847         idm_status_t    idmrc;
 848 
 849         so = idm_socreate(cr->cr_domain, cr->cr_type,
 850             cr->cr_protocol);
 851         if (so == NULL) {
 852                 return (IDM_STATUS_FAIL);
 853         }
 854 
 855         /* Bind the socket if configured to do so */
 856         if (cr->cr_bound) {
 857                 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
 858                     SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
 859                         idm_sodestroy(so);
 860                         return (IDM_STATUS_FAIL);
 861                 }
 862         }
 863 
 864         idmrc = idm_so_conn_create_common(ic, so);
 865         if (idmrc != IDM_STATUS_SUCCESS) {
 866                 idm_soshutdown(so);
 867                 idm_sodestroy(so);
 868                 return (IDM_STATUS_FAIL);
 869         }
 870 
 871         so_conn = ic->ic_transport_private;
 872         /* Set up socket options */
 873         idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
 874 
 875         return (IDM_STATUS_SUCCESS);
 876 }
 877 
 878 /*
 879  * idm_so_ini_conn_destroy()
 880  * Tear down the sockets transport connection resources.
 881  */
 882 static void
 883 idm_so_ini_conn_destroy(idm_conn_t *ic)
 884 {
 885         idm_so_conn_destroy_common(ic);
 886 }
 887 
 888 /*
 889  * idm_so_ini_conn_connect()
 890  * Establish the connection referred to by the handle previously allocated via
 891  * idm_so_ini_conn_create().
 892  */
 893 static idm_status_t
 894 idm_so_ini_conn_connect(idm_conn_t *ic)
 895 {
 896         idm_so_conn_t   *so_conn;
 897         struct sonode   *node = NULL;
 898         int             rc;
 899         clock_t         lbolt, conn_login_max, conn_login_interval;
 900         boolean_t       nonblock;
 901 
 902         so_conn = ic->ic_transport_private;
 903         nonblock = ic->ic_conn_params.nonblock_socket;
 904         conn_login_max = ic->ic_conn_params.conn_login_max;
 905         conn_login_interval = ddi_get_lbolt() +
 906             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 907 
 908         if (nonblock == B_TRUE) {
 909                 node = ((struct sonode *)(so_conn->ic_so));
 910                 /* Set to none block socket mode */
 911                 idm_so_socket_set_nonblock(node);
 912                 do {
 913                         rc = ksocket_connect(so_conn->ic_so,
 914                             &ic->ic_ini_dst_addr.sin,
 915                             (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
 916                             CRED());
 917                         if (rc == 0 || rc == EISCONN) {
 918                                 /* socket success or already success */
 919                                 rc = IDM_STATUS_SUCCESS;
 920                                 break;
 921                         }
 922                         if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
 923                             (rc == ECONNRESET)) {
 924                                 /* socket connection timeout or refuse */
 925                                 break;
 926                         }
 927                         lbolt = ddi_get_lbolt();
 928                         if (lbolt > conn_login_max) {
 929                                 /*
 930                                  * Connection retry timeout,
 931                                  * failed connect to target.
 932                                  */
 933                                 break;
 934                         }
 935                         if (lbolt < conn_login_interval) {
 936                                 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
 937                                         /* TCP connect still in progress */
 938                                         delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
 939                                         continue;
 940                                 } else {
 941                                         delay(conn_login_interval - lbolt);
 942                                 }
 943                         }
 944                         conn_login_interval = ddi_get_lbolt() +
 945                             SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
 946                 } while (rc != 0);
 947                 /* resume to nonblock mode */
 948                 if (rc == IDM_STATUS_SUCCESS) {
 949                         idm_so_socket_set_block(node);
 950                 }
 951         } else {
 952                 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
 953                     (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
 954         }
 955 
 956         if (rc != 0) {
 957                 idm_soshutdown(so_conn->ic_so);
 958                 return (IDM_STATUS_FAIL);
 959         }
 960 
 961         idm_so_conn_connect_common(ic);
 962 
 963         idm_set_ini_postconnect_options(so_conn);
 964 
 965         return (IDM_STATUS_SUCCESS);
 966 }
 967 
 968 idm_status_t
 969 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
 970 {
 971         idm_status_t    idmrc;
 972 
 973         idmrc = idm_so_conn_create_common(ic, new_so);
 974 
 975         return (idmrc);
 976 }
 977 
 978 static void
 979 idm_so_tgt_conn_destroy(idm_conn_t *ic)
 980 {
 981         idm_so_conn_destroy_common(ic);
 982 }
 983 
 984 /*
 985  * idm_so_tgt_conn_connect()
 986  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
 987  * is invoked from the SM as a result of an inbound connection request.
 988  */
 989 static idm_status_t
 990 idm_so_tgt_conn_connect(idm_conn_t *ic)
 991 {
 992         idm_so_conn_connect_common(ic);
 993 
 994         return (IDM_STATUS_SUCCESS);
 995 }
 996 
 997 static idm_status_t
 998 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
 999 {
1000         idm_so_conn_t   *so_conn;
1001 
1002         so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1003         so_conn->ic_so = new_so;
1004 
1005         ic->ic_transport_private = so_conn;
1006         ic->ic_transport_hdrlen = 0;
1007 
1008         /* Set the scoreboarding flag on this connection */
1009         ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1010         ic->ic_conn_params.max_recv_dataseglen =
1011             ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1012         ic->ic_conn_params.max_xmit_dataseglen =
1013             ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1014 
1015         /*
1016          * Initialize tx thread mutex and list
1017          */
1018         mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1019         cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1020         list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1021             offsetof(idm_pdu_t, idm_tx_link));
1022 
1023         return (IDM_STATUS_SUCCESS);
1024 }
1025 
1026 static void
1027 idm_so_conn_destroy_common(idm_conn_t *ic)
1028 {
1029         idm_so_conn_t   *so_conn = ic->ic_transport_private;
1030 
1031         ic->ic_transport_private = NULL;
1032         idm_sodestroy(so_conn->ic_so);
1033         list_destroy(&so_conn->ic_tx_list);
1034         mutex_destroy(&so_conn->ic_tx_mutex);
1035         cv_destroy(&so_conn->ic_tx_cv);
1036 
1037         kmem_free(so_conn, sizeof (idm_so_conn_t));
1038 }
1039 
1040 static void
1041 idm_so_conn_connect_common(idm_conn_t *ic)
1042 {
1043         idm_so_conn_t   *so_conn;
1044         struct sockaddr_in6     t_addr;
1045         socklen_t       t_addrlen = 0;
1046 
1047         so_conn = ic->ic_transport_private;
1048         bzero(&t_addr, sizeof (struct sockaddr_in6));
1049         t_addrlen = sizeof (struct sockaddr_in6);
1050 
1051         /* Set the local and remote addresses in the idm conn handle */
1052         (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1053             &t_addrlen, CRED());
1054         bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1055         (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1056             &t_addrlen, CRED());
1057         bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1058 
1059         mutex_enter(&ic->ic_mutex);
1060         so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1061             &p0, TS_RUN, minclsyspri);
1062         so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1063             &p0, TS_RUN, minclsyspri);
1064 
1065         while (so_conn->ic_rx_thread_did == 0 ||
1066             so_conn->ic_tx_thread_did == 0)
1067                 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1068         mutex_exit(&ic->ic_mutex);
1069 }
1070 
1071 /*
1072  * idm_so_conn_disconnect()
1073  * Shutdown the socket connection and stop the thread
1074  */
1075 static void
1076 idm_so_conn_disconnect(idm_conn_t *ic)
1077 {
1078         idm_so_conn_t   *so_conn;
1079 
1080         so_conn = ic->ic_transport_private;
1081 
1082         mutex_enter(&ic->ic_mutex);
1083         so_conn->ic_rx_thread_running = B_FALSE;
1084         so_conn->ic_tx_thread_running = B_FALSE;
1085         /* We need to wakeup the TX thread */
1086         mutex_enter(&so_conn->ic_tx_mutex);
1087         cv_signal(&so_conn->ic_tx_cv);
1088         mutex_exit(&so_conn->ic_tx_mutex);
1089         mutex_exit(&ic->ic_mutex);
1090 
1091         /* This should wakeup the RX thread if it is sleeping */
1092         idm_soshutdown(so_conn->ic_so);
1093 
1094         thread_join(so_conn->ic_tx_thread_did);
1095         thread_join(so_conn->ic_rx_thread_did);
1096 }
1097 
1098 /*
1099  * idm_so_tgt_svc_create()
1100  * Establish a service on an IP address and port.  idm_svc_req_t contains
1101  * the service parameters.
1102  */
1103 /*ARGSUSED*/
1104 static idm_status_t
1105 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1106 {
1107         idm_so_svc_t            *so_svc;
1108 
1109         so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1110 
1111         /* Set the new sockets service in svc handle */
1112         is->is_so_svc = (void *)so_svc;
1113 
1114         return (IDM_STATUS_SUCCESS);
1115 }
1116 
1117 /*
1118  * idm_so_tgt_svc_destroy()
1119  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1120  */
1121 static void
1122 idm_so_tgt_svc_destroy(idm_svc_t *is)
1123 {
1124         /* the socket will have been torn down; free the service */
1125         kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1126 }
1127 
1128 /*
1129  * idm_so_tgt_svc_online()
1130  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1131  */
1132 
1133 static idm_status_t
1134 idm_so_tgt_svc_online(idm_svc_t *is)
1135 {
1136         idm_so_svc_t            *so_svc;
1137         idm_svc_req_t           *sr = &is->is_svc_req;
1138         struct sockaddr_in6     sin6_ip;
1139         const uint32_t          on = 1;
1140         const uint32_t          off = 0;
1141 
1142         mutex_enter(&is->is_mutex);
1143         so_svc = (idm_so_svc_t *)is->is_so_svc;
1144 
1145         /*
1146          * Try creating an IPv6 socket first
1147          */
1148         if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1149                 mutex_exit(&is->is_mutex);
1150                 return (IDM_STATUS_FAIL);
1151         } else {
1152                 bzero(&sin6_ip, sizeof (sin6_ip));
1153                 sin6_ip.sin6_family = AF_INET6;
1154                 sin6_ip.sin6_port = htons(sr->sr_port);
1155                 sin6_ip.sin6_addr = in6addr_any;
1156 
1157                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1158                     SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1159                 /*
1160                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1161                  */
1162                 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1163                     SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1164 
1165                 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1166                     sizeof (sin6_ip), CRED()) != 0) {
1167                         mutex_exit(&is->is_mutex);
1168                         idm_sodestroy(so_svc->is_so);
1169                         return (IDM_STATUS_FAIL);
1170                 }
1171         }
1172 
1173         idm_set_tgt_connect_options(so_svc->is_so);
1174 
1175         if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1176                 mutex_exit(&is->is_mutex);
1177                 idm_soshutdown(so_svc->is_so);
1178                 idm_sodestroy(so_svc->is_so);
1179                 return (IDM_STATUS_FAIL);
1180         }
1181 
1182         /* Launch a watch thread */
1183         so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1184             is, 0, &p0, TS_RUN, minclsyspri);
1185 
1186         if (so_svc->is_thread == NULL) {
1187                 /* Failure to launch; teardown the socket */
1188                 mutex_exit(&is->is_mutex);
1189                 idm_soshutdown(so_svc->is_so);
1190                 idm_sodestroy(so_svc->is_so);
1191                 return (IDM_STATUS_FAIL);
1192         }
1193         ksocket_hold(so_svc->is_so);
1194         /* Wait for the port watcher thread to start */
1195         while (!so_svc->is_thread_running)
1196                 cv_wait(&is->is_cv, &is->is_mutex);
1197         mutex_exit(&is->is_mutex);
1198 
1199         return (IDM_STATUS_SUCCESS);
1200 }
1201 
1202 /*
1203  * idm_so_tgt_svc_offline
1204  *
1205  * Stop listening on the IP address and port identified by idm_svc_t.
1206  */
1207 static void
1208 idm_so_tgt_svc_offline(idm_svc_t *is)
1209 {
1210         idm_so_svc_t            *so_svc;
1211         mutex_enter(&is->is_mutex);
1212         so_svc = (idm_so_svc_t *)is->is_so_svc;
1213         so_svc->is_thread_running = B_FALSE;
1214         mutex_exit(&is->is_mutex);
1215 
1216         /*
1217          * Teardown socket
1218          */
1219         idm_sodestroy(so_svc->is_so);
1220 
1221         /*
1222          * Now we expect the port watcher thread to terminate
1223          */
1224         thread_join(so_svc->is_thread_did);
1225 }
1226 
1227 /*
1228  * Watch thread for target service connection establishment.
1229  */
1230 void
1231 idm_so_svc_port_watcher(void *arg)
1232 {
1233         idm_svc_t               *svc = arg;
1234         ksocket_t               new_so;
1235         idm_conn_t              *ic;
1236         idm_status_t            idmrc;
1237         idm_so_svc_t            *so_svc;
1238         int                     rc;
1239         const uint32_t          off = 0;
1240         struct sockaddr_in6     t_addr;
1241         socklen_t               t_addrlen;
1242 
1243         bzero(&t_addr, sizeof (struct sockaddr_in6));
1244         t_addrlen = sizeof (struct sockaddr_in6);
1245         mutex_enter(&svc->is_mutex);
1246 
1247         so_svc = svc->is_so_svc;
1248         so_svc->is_thread_running = B_TRUE;
1249         so_svc->is_thread_did = so_svc->is_thread->t_did;
1250 
1251         cv_signal(&svc->is_cv);
1252 
1253         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1254             svc->is_svc_req.sr_port);
1255 
1256         while (so_svc->is_thread_running) {
1257                 mutex_exit(&svc->is_mutex);
1258 
1259                 if ((rc = ksocket_accept(so_svc->is_so,
1260                     (struct sockaddr *)&t_addr, &t_addrlen,
1261                     &new_so, CRED())) != 0) {
1262                         mutex_enter(&svc->is_mutex);
1263                         if (rc != ECONNABORTED && rc != EINTR) {
1264                                 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1265                                     " ksocket_accept failed %d", rc);
1266                         }
1267                         /*
1268                          * Unclean shutdown of this thread is not handled
1269                          * wait for !is_thread_running.
1270                          */
1271                         continue;
1272                 }
1273                 /*
1274                  * Turn off SO_MAC_EXEMPT so future sobinds succeed
1275                  */
1276                 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1277                     (char *)&off, sizeof (off), CRED());
1278 
1279                 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1280                     &ic);
1281                 if (idmrc != IDM_STATUS_SUCCESS) {
1282                         /* Drop connection */
1283                         idm_soshutdown(new_so);
1284                         idm_sodestroy(new_so);
1285                         mutex_enter(&svc->is_mutex);
1286                         continue;
1287                 }
1288 
1289                 idmrc = idm_so_tgt_conn_create(ic, new_so);
1290                 if (idmrc != IDM_STATUS_SUCCESS) {
1291                         idm_svc_conn_destroy(ic);
1292                         idm_soshutdown(new_so);
1293                         idm_sodestroy(new_so);
1294                         mutex_enter(&svc->is_mutex);
1295                         continue;
1296                 }
1297 
1298                 /*
1299                  * Kick the state machine.  At CS_S3_XPT_UP the state machine
1300                  * will notify the client (target) about the new connection.
1301                  */
1302                 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1303 
1304                 mutex_enter(&svc->is_mutex);
1305         }
1306         ksocket_rele(so_svc->is_so);
1307         so_svc->is_thread_running = B_FALSE;
1308         mutex_exit(&svc->is_mutex);
1309 
1310         IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1311             svc->is_svc_req.sr_port);
1312 
1313         thread_exit();
1314 }
1315 
1316 /*
1317  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1318  * frees resources associated with the task.
1319  *
1320  * It's not clear that this should return idm_status_t.  What do we do
1321  * if it fails?
1322  */
1323 static idm_status_t
1324 idm_so_free_task_rsrc(idm_task_t *idt)
1325 {
1326         idm_buf_t       *idb, *next_idb;
1327 
1328         /*
1329          * There is nothing to cleanup on initiator connections
1330          */
1331         if (IDM_CONN_ISINI(idt->idt_ic))
1332                 return (IDM_STATUS_SUCCESS);
1333 
1334         /*
1335          * If this is a target connection, call idm_buf_rx_from_ini_done for
1336          * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1337          *
1338          * In addition, remove any buffers associated with this task from
1339          * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1340          * items don't actually get removed from that list (and completion
1341          * routines called) until idm_task_cleanup.
1342          */
1343         mutex_enter(&idt->idt_mutex);
1344 
1345         for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1346                 next_idb = list_next(&idt->idt_outbufv, idb);
1347                 if (idb->idb_in_transport) {
1348                         /*
1349                          * idm_buf_rx_from_ini_done releases idt->idt_mutex
1350                          */
1351                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1352                             uintptr_t, idb->idb_buf,
1353                             uint32_t, idb->idb_bufoffset,
1354                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1355                             uint32_t, idb->idb_xfer_len,
1356                             int, XFER_BUF_RX_FROM_INI);
1357                         idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1358                         mutex_enter(&idt->idt_mutex);
1359                 }
1360         }
1361 
1362         for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1363                 next_idb = list_next(&idt->idt_inbufv, idb);
1364                 /*
1365                  * We want to remove these items from the tx_list as well,
1366                  * but knowing it's in the idt_inbufv list is not a guarantee
1367                  * that it's in the tx_list.  If it's on the tx list then
1368                  * let idm_sotx_thread() clean it up.
1369                  */
1370                 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1371                         /*
1372                          * idm_buf_tx_to_ini_done releases idt->idt_mutex
1373                          */
1374                         DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1375                             uintptr_t, idb->idb_buf,
1376                             uint32_t, idb->idb_bufoffset,
1377                             uint64_t, 0, uint32_t, 0, uint32_t, 0,
1378                             uint32_t, idb->idb_xfer_len,
1379                             int, XFER_BUF_TX_TO_INI);
1380                         idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1381                         mutex_enter(&idt->idt_mutex);
1382                 }
1383         }
1384 
1385         mutex_exit(&idt->idt_mutex);
1386 
1387         return (IDM_STATUS_SUCCESS);
1388 }
1389 
1390 /*
1391  * idm_so_negotiate_key_values() validates the key values for this connection
1392  */
1393 /* ARGSUSED */
1394 static kv_status_t
1395 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1396     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1397 {
1398         /* All parameters are negotiated at the iscsit level */
1399         return (KV_HANDLED);
1400 }
1401 
1402 /*
1403  * idm_so_notice_key_values() activates the negotiated key values for
1404  * this connection.
1405  */
1406 static void
1407 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1408 {
1409         char                    *nvp_name;
1410         nvpair_t                *nvp;
1411         nvpair_t                *next_nvp;
1412         int                     nvrc;
1413         idm_status_t            idm_status;
1414         const idm_kv_xlate_t    *ikvx;
1415         uint64_t                num_val;
1416 
1417         for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1418             nvp != NULL; nvp = next_nvp) {
1419                 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1420                 nvp_name = nvpair_name(nvp);
1421 
1422                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1423                 switch (ikvx->ik_key_id) {
1424                 case KI_HEADER_DIGEST:
1425                 case KI_DATA_DIGEST:
1426                         idm_status = idm_so_handle_digest(it, nvp, ikvx);
1427                         ASSERT(idm_status == 0);
1428 
1429                         /* Remove processed item from negotiated_nvl list */
1430                         nvrc = nvlist_remove_all(
1431                             negotiated_nvl, ikvx->ik_key_name);
1432                         ASSERT(nvrc == 0);
1433                         break;
1434                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1435                         /*
1436                          * Just pass the value down to idm layer.
1437                          * No need to remove it from negotiated_nvl list here.
1438                          */
1439                         nvrc = nvpair_value_uint64(nvp, &num_val);
1440                         ASSERT(nvrc == 0);
1441                         it->ic_conn_params.max_xmit_dataseglen =
1442                             (uint32_t)num_val;
1443                         break;
1444                 default:
1445                         break;
1446                 }
1447         }
1448 }
1449 
1450 /*
1451  * idm_so_declare_key_values() declares the key values for this connection
1452  */
1453 /* ARGSUSED */
1454 static kv_status_t
1455 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1456     nvlist_t *outgoing_nvl)
1457 {
1458         char                    *nvp_name;
1459         nvpair_t                *nvp;
1460         nvpair_t                *next_nvp;
1461         kv_status_t             kvrc;
1462         int                     nvrc = 0;
1463         const idm_kv_xlate_t    *ikvx;
1464         uint64_t                num_val;
1465 
1466         for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1467             nvp != NULL && nvrc == 0; nvp = next_nvp) {
1468                 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1469                 nvp_name = nvpair_name(nvp);
1470 
1471                 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1472                 switch (ikvx->ik_key_id) {
1473                 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1474                         if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1475                                 break;
1476                         }
1477                         if (outgoing_nvl &&
1478                             (nvrc = nvlist_add_uint64(outgoing_nvl,
1479                             nvp_name, num_val)) != 0) {
1480                                 break;
1481                         }
1482                         it->ic_conn_params.max_recv_dataseglen =
1483                             (uint32_t)num_val;
1484                         break;
1485                 default:
1486                         break;
1487                 }
1488         }
1489         kvrc = idm_nvstat_to_kvstat(nvrc);
1490         return (kvrc);
1491 }
1492 
1493 static idm_status_t
1494 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1495     const idm_kv_xlate_t *ikvx)
1496 {
1497         int                     nvrc;
1498         char                    *digest_choice_string;
1499 
1500         nvrc = nvpair_value_string(digest_choice,
1501             &digest_choice_string);
1502         ASSERT(nvrc == 0);
1503         if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1504                 switch (ikvx->ik_key_id) {
1505                 case KI_HEADER_DIGEST:
1506                         it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1507                         break;
1508                 case KI_DATA_DIGEST:
1509                         it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1510                         break;
1511                 default:
1512                         ASSERT(0);
1513                         break;
1514                 }
1515         } else if (strcasecmp(digest_choice_string, "none") == 0) {
1516                 switch (ikvx->ik_key_id) {
1517                 case KI_HEADER_DIGEST:
1518                         it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1519                         break;
1520                 case KI_DATA_DIGEST:
1521                         it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1522                         break;
1523                 default:
1524                         ASSERT(0);
1525                         break;
1526                 }
1527         } else {
1528                 ASSERT(0);
1529         }
1530 
1531         return (IDM_STATUS_SUCCESS);
1532 }
1533 
1534 
1535 /*
1536  * idm_so_conn_is_capable() verifies that the passed connection is provided
1537  * for by the sockets interface.
1538  */
1539 /* ARGSUSED */
1540 static boolean_t
1541 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1542 {
1543         return (B_TRUE);
1544 }
1545 
1546 /*
1547  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1548  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1549  * off the socket into the appropriate buffers.
1550  */
1551 static void
1552 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1553 {
1554         iscsi_data_hdr_t        *bhs;
1555         idm_task_t              *idt;
1556         idm_buf_t               *idb;
1557         uint32_t                datasn;
1558         size_t                  offset;
1559         iscsi_hdr_t             *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1560         iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1561 
1562         ASSERT(ic != NULL);
1563         ASSERT(pdu != NULL);
1564 
1565         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1566         datasn  = ntohl(bhs->datasn);
1567         offset  = ntohl(bhs->offset);
1568 
1569         ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1570 
1571         /*
1572          * Look up the task corresponding to the initiator task tag
1573          * to get the buffers affiliated with the task.
1574          */
1575         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1576         if (idt == NULL) {
1577                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1578                 idm_pdu_rx_protocol_error(ic, pdu);
1579                 return;
1580         }
1581 
1582         idb = pdu->isp_sorx_buf;
1583         if (idb == NULL) {
1584                 IDM_CONN_LOG(CE_WARN,
1585                     "idm_so_rx_datain: failed to find buffer");
1586                 idm_task_rele(idt);
1587                 idm_pdu_rx_protocol_error(ic, pdu);
1588                 return;
1589         }
1590 
1591         /*
1592          * DataSN values should be sequential and should not have any gaps or
1593          * repetitions. Check the DataSN with the one stored in the task.
1594          */
1595         if (datasn == idt->idt_exp_datasn) {
1596                 idt->idt_exp_datasn++; /* keep track of DataSN received */
1597         } else {
1598                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1599                 idm_task_rele(idt);
1600                 idm_pdu_rx_protocol_error(ic, pdu);
1601                 return;
1602         }
1603 
1604         /*
1605          * PDUs in a sequence should be in continuously increasing
1606          * address offset
1607          */
1608         if (offset != idb->idb_exp_offset) {
1609                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1610                 idm_task_rele(idt);
1611                 idm_pdu_rx_protocol_error(ic, pdu);
1612                 return;
1613         }
1614         /* Expected next relative buffer offset */
1615         idb->idb_exp_offset += n2h24(bhs->dlength);
1616         idt->idt_rx_bytes += n2h24(bhs->dlength);
1617 
1618         idm_task_rele(idt);
1619 
1620         /*
1621          * For now call scsi_rsp which will process the data rsp
1622          * Revisit, need to provide an explicit client entry point for
1623          * phase collapse completions.
1624          */
1625         if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1626             (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1627                 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1628         }
1629 
1630         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1631 }
1632 
1633 /*
1634  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1635  * data from the Data-Out PDU sent by the iSCSI initiator.
1636  *
1637  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1638  * task to get the buffers associated with the PDU. A PDU might span buffers.
1639  * The data is then read into the respective buffer.
1640  */
1641 static void
1642 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1643 {
1644 
1645         iscsi_data_hdr_t        *bhs;
1646         idm_task_t              *idt;
1647         idm_buf_t               *idb;
1648         size_t                  offset;
1649 
1650         ASSERT(ic != NULL);
1651         ASSERT(pdu != NULL);
1652 
1653         bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1654         offset = ntohl(bhs->offset);
1655         ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1656 
1657         /*
1658          * Look up the task corresponding to the initiator task tag
1659          * to get the buffers affiliated with the task.
1660          */
1661         idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1662         if (idt == NULL) {
1663                 IDM_CONN_LOG(CE_WARN,
1664                     "idm_so_rx_dataout: failed to find task");
1665                 idm_pdu_rx_protocol_error(ic, pdu);
1666                 return;
1667         }
1668 
1669         idb = pdu->isp_sorx_buf;
1670         if (idb == NULL) {
1671                 IDM_CONN_LOG(CE_WARN,
1672                     "idm_so_rx_dataout: failed to find buffer");
1673                 idm_task_rele(idt);
1674                 idm_pdu_rx_protocol_error(ic, pdu);
1675                 return;
1676         }
1677 
1678         /* Keep track of data transferred - check data offsets */
1679         if (offset != idb->idb_exp_offset) {
1680                 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1681                     "%ld, %d", offset, idb->idb_exp_offset);
1682                 idm_task_rele(idt);
1683                 idm_pdu_rx_protocol_error(ic, pdu);
1684                 return;
1685         }
1686         /* Expected next relative offset */
1687         idb->idb_exp_offset += ntoh24(bhs->dlength);
1688         idt->idt_rx_bytes += n2h24(bhs->dlength);
1689 
1690         /*
1691          * Call the buffer callback when the transfer is complete
1692          *
1693          * The connection state machine should only abort tasks after
1694          * shutting down the connection so we are assured that there
1695          * won't be a simultaneous attempt to abort this task at the
1696          * same time as we are processing this PDU (due to a connection
1697          * state change).
1698          */
1699         if (bhs->flags & ISCSI_FLAG_FINAL) {
1700                 /*
1701                  * We only want to call idm_buf_rx_from_ini_done once
1702                  * per transfer.  It's possible that this task has
1703                  * already been aborted in which case
1704                  * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1705                  * for each buffer with idb_in_transport==B_TRUE.  To
1706                  * close this window and ensure that this doesn't happen,
1707                  * we'll clear idb->idb_in_transport now while holding
1708                  * the task mutex.   This is only really an issue for
1709                  * SCSI task abort -- if tasks were being aborted because
1710                  * of a connection state change the state machine would
1711                  * have already stopped the receive thread.
1712                  */
1713                 mutex_enter(&idt->idt_mutex);
1714 
1715                 /*
1716                  * Release the task hold here (obtained in idm_task_find)
1717                  * because the task may complete synchronously during
1718                  * idm_buf_rx_from_ini_done.  Since we still have an active
1719                  * buffer we know there is at least one additional hold on idt.
1720                  */
1721                 idm_task_rele(idt);
1722 
1723                 /*
1724                  * idm_buf_rx_from_ini_done releases idt->idt_mutex
1725                  */
1726                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1727                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1728                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
1729                     uint32_t, idb->idb_xfer_len,
1730                     int, XFER_BUF_RX_FROM_INI);
1731                 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1732                 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1733                 return;
1734         }
1735 
1736         idm_task_rele(idt);
1737         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1738 }
1739 
1740 /*
1741  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1742  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1743  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1744  * and looks up the task in the task tree using the itt to get the output
1745  * buffers associated the task. The R2T PDU contains the offset of the
1746  * requested data and the data length. This function then constructs a
1747  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1748  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1749  */
1750 
1751 static void
1752 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1753 {
1754         idm_task_t              *idt;
1755         idm_buf_t               *idb;
1756         iscsi_rtt_hdr_t         *rtt_hdr;
1757         uint32_t                data_offset;
1758         uint32_t                data_length;
1759 
1760         ASSERT(ic != NULL);
1761         ASSERT(pdu != NULL);
1762 
1763         rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1764         data_offset = ntohl(rtt_hdr->data_offset);
1765         data_length = ntohl(rtt_hdr->data_length);
1766         idt     = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1767 
1768         if (idt == NULL) {
1769                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1770                 idm_pdu_rx_protocol_error(ic, pdu);
1771                 return;
1772         }
1773 
1774         /* Find the buffer bound to the task by the iSCSI initiator */
1775         mutex_enter(&idt->idt_mutex);
1776         idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1777         if (idb == NULL) {
1778                 mutex_exit(&idt->idt_mutex);
1779                 idm_task_rele(idt);
1780                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1781                 idm_pdu_rx_protocol_error(ic, pdu);
1782                 return;
1783         }
1784 
1785         /* return buffer contains this data */
1786         if (data_offset + data_length > idb->idb_buflen) {
1787                 /* Overflow */
1788                 mutex_exit(&idt->idt_mutex);
1789                 idm_task_rele(idt);
1790                 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1791                     "buffer");
1792                 idm_pdu_rx_protocol_error(ic, pdu);
1793                 return;
1794         }
1795 
1796         idt->idt_r2t_ttt = rtt_hdr->ttt;
1797         idt->idt_exp_datasn = 0;
1798 
1799         idm_so_send_rtt_data(ic, idt, idb, data_offset,
1800             ntohl(rtt_hdr->data_length));
1801         /*
1802          * the idt_mutex is released in idm_so_send_rtt_data
1803          */
1804 
1805         idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1806         idm_task_rele(idt);
1807 
1808 }
1809 
1810 idm_status_t
1811 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1812 {
1813         uint8_t         pad[ISCSI_PAD_WORD_LEN];
1814         int             pad_len;
1815         uint32_t        data_digest_crc;
1816         uint32_t        crc_calculated;
1817         int             total_len;
1818         idm_so_conn_t   *so_conn;
1819 
1820         so_conn = ic->ic_transport_private;
1821 
1822         pad_len = ((ISCSI_PAD_WORD_LEN -
1823             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1824             (ISCSI_PAD_WORD_LEN - 1));
1825 
1826         ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1827 
1828         total_len = pdu->isp_datalen;
1829 
1830         if (pad_len) {
1831                 pdu->isp_iov[pdu->isp_iovlen].iov_base    = (char *)&pad;
1832                 pdu->isp_iov[pdu->isp_iovlen].iov_len     = pad_len;
1833                 total_len               += pad_len;
1834                 pdu->isp_iovlen++;
1835         }
1836 
1837         /* setup data digest */
1838         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1839                 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1840                     (char *)&data_digest_crc;
1841                 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1842                     sizeof (data_digest_crc);
1843                 total_len               += sizeof (data_digest_crc);
1844                 pdu->isp_iovlen++;
1845         }
1846 
1847         pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1848 
1849         if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1850             pdu->isp_iovlen, total_len) != 0) {
1851                 return (IDM_STATUS_IO);
1852         }
1853 
1854         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1855                 crc_calculated = idm_crc32c(pdu->isp_data,
1856                     pdu->isp_datalen);
1857                 if (pad_len) {
1858                         crc_calculated = idm_crc32c_continued((char *)&pad,
1859                             pad_len, crc_calculated);
1860                 }
1861                 if (crc_calculated != data_digest_crc) {
1862                         IDM_CONN_LOG(CE_WARN,
1863                             "idm_sorecvdata: "
1864                             "CRC error: actual 0x%x, calc 0x%x",
1865                             data_digest_crc, crc_calculated);
1866 
1867                         /* Invalid Data Digest */
1868                         return (IDM_STATUS_DATA_DIGEST);
1869                 }
1870         }
1871 
1872         return (IDM_STATUS_SUCCESS);
1873 }
1874 
1875 /*
1876  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1877  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1878  * calling this function.
1879  */
1880 idm_status_t
1881 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1882 {
1883         iscsi_data_hdr_t        *bhs;
1884         idm_task_t              *task;
1885         uint32_t                offset;
1886         uint8_t                 opcode;
1887         uint32_t                dlength;
1888         list_t                  *buflst;
1889         uint32_t                xfer_bytes;
1890         idm_status_t            status;
1891 
1892         ASSERT(ic != NULL);
1893         ASSERT(pdu != NULL);
1894 
1895         bhs     = (iscsi_data_hdr_t *)pdu->isp_hdr;
1896 
1897         offset  = ntohl(bhs->offset);
1898         opcode  = bhs->opcode;
1899         dlength = n2h24(bhs->dlength);
1900 
1901         ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1902             (opcode == ISCSI_OP_SCSI_DATA));
1903 
1904         /*
1905          * Successful lookup implicitly gets a "hold" on the task.  This
1906          * hold must be released before leaving this function.  At one
1907          * point we were caching this task context and retaining the hold
1908          * but it turned out to be very difficult to release the hold properly.
1909          * The task can be aborted and the connection shutdown between this
1910          * call and the subsequent expected call to idm_so_rx_datain/
1911          * idm_so_rx_dataout (in which case those functions are not called).
1912          * Releasing the hold in the PDU callback doesn't work well either
1913          * because the whole task may be completed by then at which point
1914          * it is too late to release the hold -- for better or worse this
1915          * code doesn't wait on the refcnts during normal operation.
1916          * idm_task_find() is very fast and it is not a huge burden if we
1917          * have to do it twice.
1918          */
1919         task = idm_task_find(ic, bhs->itt, bhs->ttt);
1920         if (task == NULL) {
1921                 IDM_CONN_LOG(CE_WARN,
1922                     "idm_sorecv_scsidata: could not find task");
1923                 return (IDM_STATUS_FAIL);
1924         }
1925 
1926         mutex_enter(&task->idt_mutex);
1927         buflst  = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1928             &task->idt_inbufv : &task->idt_outbufv;
1929         pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1930         mutex_exit(&task->idt_mutex);
1931 
1932         if (pdu->isp_sorx_buf == NULL) {
1933                 idm_task_rele(task);
1934                 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1935                     "buffer for offset %x opcode=%x",
1936                     offset, opcode);
1937                 return (IDM_STATUS_FAIL);
1938         }
1939 
1940         xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1941         ASSERT(xfer_bytes != 0);
1942         if (xfer_bytes != dlength) {
1943                 idm_task_rele(task);
1944                 /*
1945                  * Buffer overflow, connection error.  The PDU data is still
1946                  * sitting in the socket so we can't use the connection
1947                  * again until that data is drained.
1948                  */
1949                 return (IDM_STATUS_FAIL);
1950         }
1951 
1952         status = idm_sorecvdata(ic, pdu);
1953 
1954         idm_task_rele(task);
1955 
1956         return (status);
1957 }
1958 
1959 static uint32_t
1960 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1961 {
1962         uint32_t        buf_ro = ro - idb->idb_bufoffset;
1963         uint32_t        xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1964 
1965         ASSERT(ro >= idb->idb_bufoffset);
1966 
1967         pdu->isp_iov[pdu->isp_iovlen].iov_base    =
1968             (caddr_t)idb->idb_buf + buf_ro;
1969         pdu->isp_iov[pdu->isp_iovlen].iov_len     = xfer_len;
1970         pdu->isp_iovlen++;
1971 
1972         return (xfer_len);
1973 }
1974 
1975 int
1976 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1977 {
1978         pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1979         ASSERT(pdu->isp_data != NULL);
1980 
1981         pdu->isp_databuflen = pdu->isp_datalen;
1982         pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1983         pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1984         pdu->isp_iovlen = 1;
1985         /*
1986          * Since we are associating a new data buffer with this received
1987          * PDU we need to set a specific callback to free the data
1988          * after the PDU is processed.
1989          */
1990         pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1991         pdu->isp_callback = idm_sorx_addl_pdu_cb;
1992 
1993         return (idm_sorecvdata(ic, pdu));
1994 }
1995 
1996 void
1997 idm_sorx_thread(void *arg)
1998 {
1999         boolean_t       conn_failure = B_FALSE;
2000         idm_conn_t      *ic = (idm_conn_t *)arg;
2001         idm_so_conn_t   *so_conn;
2002         idm_pdu_t       *pdu;
2003         idm_status_t    rc;
2004 
2005         idm_conn_hold(ic);
2006 
2007         mutex_enter(&ic->ic_mutex);
2008 
2009         so_conn = ic->ic_transport_private;
2010         so_conn->ic_rx_thread_running = B_TRUE;
2011         so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2012         cv_signal(&ic->ic_cv);
2013 
2014         while (so_conn->ic_rx_thread_running) {
2015                 mutex_exit(&ic->ic_mutex);
2016 
2017                 /*
2018                  * Get PDU with default header size (large enough for
2019                  * BHS plus any anticipated AHS).  PDU from
2020                  * the cache will have all values set correctly
2021                  * for sockets RX including callback.
2022                  */
2023                 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2024                 pdu->isp_ic = ic;
2025                 pdu->isp_flags = 0;
2026                 pdu->isp_transport_hdrlen = 0;
2027 
2028                 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2029                         /*
2030                          * Call idm_pdu_complete so that we call the callback
2031                          * and ensure any memory allocated in idm_sorecvhdr
2032                          * gets freed up.
2033                          */
2034                         idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2035 
2036                         /*
2037                          * If ic_rx_thread_running is still set then
2038                          * this is some kind of connection problem
2039                          * on the socket.  In this case we want to
2040                          * generate an event.  Otherwise some other
2041                          * thread closed the socket due to another
2042                          * issue in which case we don't need to
2043                          * generate an event.
2044                          */
2045                         mutex_enter(&ic->ic_mutex);
2046                         if (so_conn->ic_rx_thread_running) {
2047                                 conn_failure = B_TRUE;
2048                                 so_conn->ic_rx_thread_running = B_FALSE;
2049                         }
2050 
2051                         continue;
2052                 }
2053 
2054                 /*
2055                  * Header has been read and validated.  Now we need
2056                  * to read the PDU data payload (if present).  SCSI data
2057                  * need to be transferred from the socket directly into
2058                  * the associated transfer buffer for the SCSI task.
2059                  */
2060                 if (pdu->isp_datalen != 0) {
2061                         if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2062                             (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2063                                 rc = idm_sorecv_scsidata(ic, pdu);
2064                                 /*
2065                                  * All SCSI errors are fatal to the
2066                                  * connection right now since we have no
2067                                  * place to put the data.  What we need
2068                                  * is some kind of sink to dispose of unwanted
2069                                  * SCSI data.  For example an invalid task tag
2070                                  * should not kill the connection (although
2071                                  * we may want to drop the connection).
2072                                  */
2073                         } else {
2074                                 /*
2075                                  * Not data PDUs so allocate a buffer for the
2076                                  * data segment and read the remaining data.
2077                                  */
2078                                 rc = idm_sorecv_nonscsidata(ic, pdu);
2079                         }
2080                         if (rc != 0) {
2081                                 /*
2082                                  * Call idm_pdu_complete so that we call the
2083                                  * callback and ensure any memory allocated
2084                                  * in idm_sorecvhdr gets freed up.
2085                                  */
2086                                 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2087 
2088                                 /*
2089                                  * If ic_rx_thread_running is still set then
2090                                  * this is some kind of connection problem
2091                                  * on the socket.  In this case we want to
2092                                  * generate an event.  Otherwise some other
2093                                  * thread closed the socket due to another
2094                                  * issue in which case we don't need to
2095                                  * generate an event.
2096                                  */
2097                                 mutex_enter(&ic->ic_mutex);
2098                                 if (so_conn->ic_rx_thread_running) {
2099                                         conn_failure = B_TRUE;
2100                                         so_conn->ic_rx_thread_running = B_FALSE;
2101                                 }
2102                                 continue;
2103                         }
2104                 }
2105 
2106                 /*
2107                  * Process RX PDU
2108                  */
2109                 idm_pdu_rx(ic, pdu);
2110 
2111                 mutex_enter(&ic->ic_mutex);
2112         }
2113 
2114         mutex_exit(&ic->ic_mutex);
2115 
2116         /*
2117          * If we dropped out of the RX processing loop because of
2118          * a socket problem or other connection failure (including
2119          * digest errors) then we need to generate a state machine
2120          * event to shut the connection down.
2121          * If the state machine is already in, for example, INIT_ERROR, this
2122          * event will get dropped, and the TX thread will never be notified
2123          * to shut down.  To be safe, we'll just notify it here.
2124          */
2125         if (conn_failure) {
2126                 if (so_conn->ic_tx_thread_running) {
2127                         so_conn->ic_tx_thread_running = B_FALSE;
2128                         mutex_enter(&so_conn->ic_tx_mutex);
2129                         cv_signal(&so_conn->ic_tx_cv);
2130                         mutex_exit(&so_conn->ic_tx_mutex);
2131                 }
2132 
2133                 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2134         }
2135 
2136         idm_conn_rele(ic);
2137 
2138         thread_exit();
2139 }
2140 
2141 /*
2142  * idm_so_tx
2143  *
2144  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2145  * point.  By definition, it is supposed to be fast.  So, simply queue
2146  * the entry and return.  The real work is done by idm_i_so_tx() via
2147  * idm_sotx_thread().
2148  */
2149 
2150 static void
2151 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2152 {
2153         idm_so_conn_t *so_conn = ic->ic_transport_private;
2154 
2155         ASSERT(pdu->isp_ic == ic);
2156         mutex_enter(&so_conn->ic_tx_mutex);
2157 
2158         if (!so_conn->ic_tx_thread_running) {
2159                 mutex_exit(&so_conn->ic_tx_mutex);
2160                 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2161                 return;
2162         }
2163 
2164         list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2165         cv_signal(&so_conn->ic_tx_cv);
2166         mutex_exit(&so_conn->ic_tx_mutex);
2167 }
2168 
2169 static idm_status_t
2170 idm_i_so_tx(idm_pdu_t *pdu)
2171 {
2172         idm_conn_t      *ic = pdu->isp_ic;
2173         idm_status_t    status = IDM_STATUS_SUCCESS;
2174         uint8_t         pad[ISCSI_PAD_WORD_LEN];
2175         int             pad_len;
2176         uint32_t        hdr_digest_crc;
2177         uint32_t        data_digest_crc = 0;
2178         int             total_len = 0;
2179         int             iovlen = 0;
2180         struct iovec    iov[6];
2181         idm_so_conn_t   *so_conn;
2182 
2183         so_conn = ic->ic_transport_private;
2184 
2185         /* Setup BHS */
2186         iov[iovlen].iov_base    = (caddr_t)pdu->isp_hdr;
2187         iov[iovlen].iov_len     = pdu->isp_hdrlen;
2188         total_len               += iov[iovlen].iov_len;
2189         iovlen++;
2190 
2191         /* Setup header digest */
2192         if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2193             (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2194                 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2195 
2196                 iov[iovlen].iov_base    = (caddr_t)&hdr_digest_crc;
2197                 iov[iovlen].iov_len     = sizeof (hdr_digest_crc);
2198                 total_len               += iov[iovlen].iov_len;
2199                 iovlen++;
2200         }
2201 
2202         /* Setup the data */
2203         if (pdu->isp_datalen) {
2204                 idm_task_t              *idt;
2205                 idm_buf_t               *idb;
2206                 iscsi_data_hdr_t        *ihp;
2207                 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2208                 /* Write of immediate data */
2209                 if (ic->ic_ffp &&
2210                     (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2211                     ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2212                         idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2213                         if (idt) {
2214                                 mutex_enter(&idt->idt_mutex);
2215                                 idb = idm_buf_find(&idt->idt_outbufv, 0);
2216                                 mutex_exit(&idt->idt_mutex);
2217                                 /*
2218                                  * If the initiator call to idm_buf_alloc
2219                                  * failed then we can get to this point
2220                                  * without a bound buffer.  The associated
2221                                  * connection failure will clean things up
2222                                  * later.  It would be nice to come up with
2223                                  * a cleaner way to handle this.  In
2224                                  * particular it seems absurd to look up
2225                                  * the task and the buffer just to update
2226                                  * this counter.
2227                                  */
2228                                 if (idb)
2229                                         idb->idb_xfer_len += pdu->isp_datalen;
2230                                 idm_task_rele(idt);
2231                         }
2232                 }
2233 
2234                 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2235                 iov[iovlen].iov_len  = pdu->isp_datalen;
2236                 total_len += iov[iovlen].iov_len;
2237                 iovlen++;
2238         }
2239 
2240         /* Setup the data pad if necessary */
2241         pad_len = ((ISCSI_PAD_WORD_LEN -
2242             (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2243             (ISCSI_PAD_WORD_LEN - 1));
2244 
2245         if (pad_len) {
2246                 bzero(pad, sizeof (pad));
2247                 iov[iovlen].iov_base = (void *)&pad;
2248                 iov[iovlen].iov_len  = pad_len;
2249                 total_len               += iov[iovlen].iov_len;
2250                 iovlen++;
2251         }
2252 
2253         /*
2254          * Setup the data digest if enabled.  Data-digest is not sent
2255          * for login-phase PDUs.
2256          */
2257         if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2258             ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2259             (pdu->isp_datalen || pad_len)) {
2260                 /*
2261                  * RFC3720/10.2.3: A zero-length Data Segment also
2262                  * implies a zero-length data digest.
2263                  */
2264                 if (pdu->isp_datalen) {
2265                         data_digest_crc = idm_crc32c(pdu->isp_data,
2266                             pdu->isp_datalen);
2267                 }
2268                 if (pad_len) {
2269                         data_digest_crc = idm_crc32c_continued(&pad,
2270                             pad_len, data_digest_crc);
2271                 }
2272 
2273                 iov[iovlen].iov_base    = (caddr_t)&data_digest_crc;
2274                 iov[iovlen].iov_len     = sizeof (data_digest_crc);
2275                 total_len               += iov[iovlen].iov_len;
2276                 iovlen++;
2277         }
2278 
2279         /* Transmit the PDU */
2280         if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2281             total_len) != 0) {
2282                 /* Set error status */
2283                 IDM_CONN_LOG(CE_WARN,
2284                     "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2285                     "data: %p", (void *) so_conn->ic_so, (void *) ic,
2286                     (void *) pdu->isp_data);
2287                 status = IDM_STATUS_IO;
2288         }
2289 
2290         /*
2291          * Success does not mean that the PDU actually reached the
2292          * remote node since it could get dropped along the way.
2293          */
2294         idm_pdu_complete(pdu, status);
2295 
2296         return (status);
2297 }
2298 
2299 /*
2300  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2301  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2302  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2303  * A target can invoke this function multiple times for a single read command
2304  * (identified by the same ITT) to split the input into several sequences.
2305  *
2306  * DataSN starts with 0 for the first data PDU of an input command and advances
2307  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2308  * which is set to 1 for the last data PDU of a sequence.
2309  * If the initiator supports phase collapse, the status bit must be set along
2310  * with the F bit to indicate that the status is shipped together with the last
2311  * Data-In PDU.
2312  *
2313  * The data PDUs within a sequence will be sent in order with the buffer offset
2314  * in increasing order. i.e. initiator and target must have negotiated the
2315  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2316  *
2317  * Caller holds idt->idt_mutex
2318  */
2319 static idm_status_t
2320 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2321 {
2322         idm_so_conn_t   *so_conn = idb->idb_ic->ic_transport_private;
2323         idm_pdu_t       tmppdu;
2324 
2325         ASSERT(mutex_owned(&idt->idt_mutex));
2326 
2327         /*
2328          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2329          * idm_sotx_thread.
2330          */
2331         mutex_enter(&so_conn->ic_tx_mutex);
2332 
2333         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2334             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2335             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2336             uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2337 
2338         if (!so_conn->ic_tx_thread_running) {
2339                 mutex_exit(&so_conn->ic_tx_mutex);
2340                 /*
2341                  * Don't release idt->idt_mutex since we're supposed to hold
2342                  * in when calling idm_buf_tx_to_ini_done
2343                  */
2344                 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2345                     uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2346                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2347                     uint32_t, idb->idb_xfer_len,
2348                     int, XFER_BUF_TX_TO_INI);
2349                 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2350                 return (IDM_STATUS_FAIL);
2351         }
2352 
2353         /*
2354          * Build a template for the data PDU headers we will use so that
2355          * the SN values will stay consistent with other PDU's we are
2356          * transmitting like R2T and SCSI status.
2357          */
2358         bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2359         tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2360         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2361             ISCSI_OP_SCSI_DATA_RSP);
2362         idb->idb_tx_thread = B_TRUE;
2363         list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2364         cv_signal(&so_conn->ic_tx_cv);
2365         mutex_exit(&so_conn->ic_tx_mutex);
2366         mutex_exit(&idt->idt_mutex);
2367 
2368         /*
2369          * Returning success here indicates the transfer was successfully
2370          * dispatched -- it does not mean that the transfer completed
2371          * successfully.
2372          */
2373         return (IDM_STATUS_SUCCESS);
2374 }
2375 
2376 /*
2377  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2378  * data blocks it is ready to receive from the initiator in response to a WRITE
2379  * SCSI command. The target iSCSI layer passes the information about the desired
2380  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2381  * offset and datalen are passed via the 'idb' argument.
2382  *
2383  * Scope for Prototype build:
2384  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2385  * negotiated the "InitialR2T" to "Yes".
2386  *
2387  * Caller holds idt->idt_mutex
2388  */
2389 static idm_status_t
2390 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2391 {
2392         idm_pdu_t               *pdu;
2393         iscsi_rtt_hdr_t         *rtt;
2394 
2395         ASSERT(mutex_owned(&idt->idt_mutex));
2396 
2397         DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2398             uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2399             uint64_t, 0, uint32_t, 0, uint32_t, 0,
2400             uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2401 
2402         pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2403         pdu->isp_ic = idt->idt_ic;
2404         pdu->isp_flags = IDM_PDU_SET_STATSN;
2405         bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2406 
2407         /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2408         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2409 
2410         /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2411         rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2412 
2413         rtt->opcode          = ISCSI_OP_RTT_RSP;
2414         rtt->flags           = ISCSI_FLAG_FINAL;
2415         rtt->data_offset     = htonl(idb->idb_bufoffset);
2416         rtt->data_length     = htonl(idb->idb_xfer_len);
2417         rtt->rttsn           = htonl(idt->idt_exp_rttsn++);
2418 
2419         /* Keep track of buffer offsets */
2420         idb->idb_exp_offset  = idb->idb_bufoffset;
2421         mutex_exit(&idt->idt_mutex);
2422 
2423         /*
2424          * Transmit the PDU.
2425          */
2426         idm_pdu_tx(pdu);
2427 
2428         return (IDM_STATUS_SUCCESS);
2429 }
2430 
2431 static idm_status_t
2432 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2433 {
2434         if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2435                 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2436                     KM_NOSLEEP);
2437                 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2438         } else {
2439                 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2440                 idb->idb_buf_private = NULL;
2441         }
2442 
2443         if (idb->idb_buf == NULL) {
2444                 IDM_CONN_LOG(CE_NOTE,
2445                     "idm_so_buf_alloc: failed buffer allocation");
2446                 return (IDM_STATUS_FAIL);
2447         }
2448 
2449         return (IDM_STATUS_SUCCESS);
2450 }
2451 
2452 /* ARGSUSED */
2453 static idm_status_t
2454 idm_so_buf_setup(idm_buf_t *idb)
2455 {
2456         /* Ensure bufalloc'd flag is unset */
2457         idb->idb_bufalloc = B_FALSE;
2458 
2459         return (IDM_STATUS_SUCCESS);
2460 }
2461 
2462 /* ARGSUSED */
2463 static void
2464 idm_so_buf_teardown(idm_buf_t *idb)
2465 {
2466         /* nothing to do here */
2467 }
2468 
2469 static void
2470 idm_so_buf_free(idm_buf_t *idb)
2471 {
2472         if (idb->idb_buf_private == NULL) {
2473                 kmem_free(idb->idb_buf, idb->idb_buflen);
2474         } else {
2475                 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2476         }
2477 }
2478 
2479 static void
2480 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2481     uint32_t offset, uint32_t length)
2482 {
2483         idm_so_conn_t   *so_conn = ic->ic_transport_private;
2484         idm_pdu_t       tmppdu;
2485         idm_buf_t       *rtt_buf;
2486 
2487         ASSERT(mutex_owned(&idt->idt_mutex));
2488 
2489         /*
2490          * Allocate a buffer to represent the RTT transfer.  We could further
2491          * optimize this by allocating the buffers internally from an rtt
2492          * specific buffer cache since this is socket-specific code but for
2493          * now we will keep it simple.
2494          */
2495         rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2496         if (rtt_buf == NULL) {
2497                 /*
2498                  * If we're in FFP then the failure was likely a resource
2499                  * allocation issue and we should close the connection by
2500                  * sending a CE_TRANSPORT_FAIL event.
2501                  *
2502                  * If we're not in FFP then idm_buf_alloc will always
2503                  * fail and the state is transitioning to "complete" anyway
2504                  * so we won't bother to send an event.
2505                  */
2506                 mutex_enter(&ic->ic_state_mutex);
2507                 if (ic->ic_ffp)
2508                         idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2509                             NULL, CT_NONE);
2510                 mutex_exit(&ic->ic_state_mutex);
2511                 mutex_exit(&idt->idt_mutex);
2512                 return;
2513         }
2514 
2515         rtt_buf->idb_buf_cb = NULL;
2516         rtt_buf->idb_cb_arg = NULL;
2517         rtt_buf->idb_bufoffset = offset;
2518         rtt_buf->idb_xfer_len = length;
2519         rtt_buf->idb_ic = idt->idt_ic;
2520         rtt_buf->idb_task_binding = idt;
2521 
2522         /*
2523          * The new buffer (if any) represents an additional
2524          * reference on the task
2525          */
2526         idm_task_hold(idt);
2527         mutex_exit(&idt->idt_mutex);
2528 
2529         /*
2530          * Put the idm_buf_t on the tx queue.  It will be transmitted by
2531          * idm_sotx_thread.
2532          */
2533         mutex_enter(&so_conn->ic_tx_mutex);
2534 
2535         if (!so_conn->ic_tx_thread_running) {
2536                 idm_buf_free(rtt_buf);
2537                 mutex_exit(&so_conn->ic_tx_mutex);
2538                 idm_task_rele(idt);
2539                 return;
2540         }
2541 
2542         /*
2543          * Build a template for the data PDU headers we will use so that
2544          * the SN values will stay consistent with other PDU's we are
2545          * transmitting like R2T and SCSI status.
2546          */
2547         bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2548         tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2549         (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2550             ISCSI_OP_SCSI_DATA);
2551         rtt_buf->idb_tx_thread = B_TRUE;
2552         rtt_buf->idb_in_transport = B_TRUE;
2553         list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2554         cv_signal(&so_conn->ic_tx_cv);
2555         mutex_exit(&so_conn->ic_tx_mutex);
2556 }
2557 
2558 static void
2559 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2560 {
2561         /*
2562          * Don't worry about status -- we assume any error handling
2563          * is performed by the caller (idm_sotx_thread).
2564          */
2565         idb->idb_in_transport = B_FALSE;
2566         idm_task_rele(idt);
2567         idm_buf_free(idb);
2568 }
2569 
2570 static idm_status_t
2571 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2572     uint32_t buf_region_offset, uint32_t buf_region_length)
2573 {
2574         idm_conn_t              *ic;
2575         uint32_t                max_dataseglen;
2576         size_t                  remainder, chunk;
2577         uint32_t                data_offset = buf_region_offset;
2578         iscsi_data_hdr_t        *bhs;
2579         idm_pdu_t               *pdu;
2580         idm_status_t            tx_status;
2581 
2582         ASSERT(mutex_owned(&idt->idt_mutex));
2583 
2584         ic = idt->idt_ic;
2585 
2586         max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2587         remainder = buf_region_length;
2588 
2589         while (remainder) {
2590                 if (idt->idt_state != TASK_ACTIVE) {
2591                         ASSERT((idt->idt_state != TASK_IDLE) &&
2592                             (idt->idt_state != TASK_COMPLETE));
2593                         return (IDM_STATUS_ABORTED);
2594                 }
2595 
2596                 /* check to see if we need to chunk the data */
2597                 if (remainder > max_dataseglen) {
2598                         chunk = max_dataseglen;
2599                 } else {
2600                         chunk = remainder;
2601                 }
2602 
2603                 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2604                 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2605                 pdu->isp_ic = ic;
2606                 pdu->isp_flags = 0;  /* initialize isp_flags */
2607 
2608                 /*
2609                  * We've already built a build a header template
2610                  * to use during the transfer.  Use this template so that
2611                  * the SN values stay consistent with any unrelated PDU's
2612                  * being transmitted.
2613                  */
2614                 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2615                     sizeof (iscsi_hdr_t));
2616 
2617                 /*
2618                  * Set DataSN, data offset, and flags in BHS
2619                  * For the prototype build, A = 0, S = 0, U = 0
2620                  */
2621                 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2622 
2623                 bhs->datasn          = htonl(idt->idt_exp_datasn++);
2624 
2625                 hton24(bhs->dlength, chunk);
2626                 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2627 
2628                 /* setup data */
2629                 pdu->isp_data        =  (uint8_t *)idb->idb_buf + data_offset;
2630                 pdu->isp_datalen = (uint_t)chunk;
2631 
2632                 if (chunk == remainder) {
2633                         bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2634                         /* Piggyback the status with the last data PDU */
2635                         if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2636                                 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2637                                     IDM_PDU_ADVANCE_STATSN;
2638                                 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2639                                     (idt, pdu);
2640                                 idt->idt_flags |=
2641                                     IDM_TASK_PHASECOLLAPSE_SUCCESS;
2642 
2643                         }
2644                 }
2645 
2646                 remainder       -= chunk;
2647                 data_offset     += chunk;
2648 
2649                 /* Instrument the data-send DTrace probe. */
2650                 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2651                         DTRACE_ISCSI_2(data__send,
2652                             idm_conn_t *, idt->idt_ic,
2653                             iscsi_data_rsp_hdr_t *,
2654                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2655                 }
2656 
2657                 /*
2658                  * Now that we're done working with idt_exp_datasn,
2659                  * idt->idt_state and idb->idb_bufoffset we can release
2660                  * the task lock -- don't want to hold it across the
2661                  * call to idm_i_so_tx since we could block.
2662                  */
2663                 mutex_exit(&idt->idt_mutex);
2664 
2665                 /*
2666                  * Transmit the PDU.  Call the internal routine directly
2667                  * as there is already implicit ordering.
2668                  */
2669                 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2670                         mutex_enter(&idt->idt_mutex);
2671                         return (tx_status);
2672                 }
2673 
2674                 mutex_enter(&idt->idt_mutex);
2675                 idt->idt_tx_bytes += chunk;
2676         }
2677 
2678         return (IDM_STATUS_SUCCESS);
2679 }
2680 
2681 /*
2682  * TX PDU cache
2683  */
2684 /* ARGSUSED */
2685 int
2686 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2687 {
2688         idm_pdu_t       *pdu = hdl;
2689 
2690         bzero(pdu, sizeof (idm_pdu_t));
2691         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2692         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2693         pdu->isp_callback = idm_sotx_cache_pdu_cb;
2694         pdu->isp_magic = IDM_PDU_MAGIC;
2695         bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2696 
2697         return (0);
2698 }
2699 
2700 /* ARGSUSED */
2701 void
2702 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2703 {
2704         /* reset values between use */
2705         pdu->isp_datalen = 0;
2706 
2707         kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2708 }
2709 
2710 /*
2711  * RX PDU cache
2712  */
2713 /* ARGSUSED */
2714 int
2715 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2716 {
2717         idm_pdu_t       *pdu = hdl;
2718 
2719         bzero(pdu, sizeof (idm_pdu_t));
2720         pdu->isp_magic = IDM_PDU_MAGIC;
2721         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2722         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2723 
2724         return (0);
2725 }
2726 
2727 /* ARGSUSED */
2728 static void
2729 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2730 {
2731         pdu->isp_iovlen = 0;
2732         pdu->isp_sorx_buf = 0;
2733         kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2734 }
2735 
2736 static void
2737 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2738 {
2739         /*
2740          * We had to modify our cached RX PDU with a longer header buffer
2741          * and/or a longer data buffer.  Release the new buffers and fix
2742          * the fields back to what we would expect for a cached RX PDU.
2743          */
2744         if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2745                 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2746         }
2747         if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2748                 kmem_free(pdu->isp_data, pdu->isp_datalen);
2749         }
2750         pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2751         pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2752         pdu->isp_data = NULL;
2753         pdu->isp_datalen = 0;
2754         pdu->isp_sorx_buf = 0;
2755         pdu->isp_callback = idm_sorx_cache_pdu_cb;
2756         idm_sorx_cache_pdu_cb(pdu, status);
2757 }
2758 
2759 /*
2760  * This thread is only active when I/O is queued for transmit
2761  * because the socket is busy.
2762  */
2763 void
2764 idm_sotx_thread(void *arg)
2765 {
2766         idm_conn_t      *ic = arg;
2767         idm_tx_obj_t    *object, *next;
2768         idm_so_conn_t   *so_conn;
2769         idm_status_t    status = IDM_STATUS_SUCCESS;
2770 
2771         idm_conn_hold(ic);
2772 
2773         mutex_enter(&ic->ic_mutex);
2774         so_conn = ic->ic_transport_private;
2775         so_conn->ic_tx_thread_running = B_TRUE;
2776         so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2777         cv_signal(&ic->ic_cv);
2778         mutex_exit(&ic->ic_mutex);
2779 
2780         mutex_enter(&so_conn->ic_tx_mutex);
2781 
2782         while (so_conn->ic_tx_thread_running) {
2783                 while (list_is_empty(&so_conn->ic_tx_list)) {
2784                         DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2785                         cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2786                         DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2787 
2788                         if (!so_conn->ic_tx_thread_running) {
2789                                 goto tx_bail;
2790                         }
2791                 }
2792 
2793                 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2794                 list_remove(&so_conn->ic_tx_list, object);
2795                 mutex_exit(&so_conn->ic_tx_mutex);
2796 
2797                 switch (object->idm_tx_obj_magic) {
2798                 case IDM_PDU_MAGIC: {
2799                         idm_pdu_t *pdu = (idm_pdu_t *)object;
2800                         DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2801                             idm_pdu_t *, (idm_pdu_t *)object);
2802 
2803                         if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2804                                 /* No IDM task */
2805                                 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2806                         }
2807                         status = idm_i_so_tx((idm_pdu_t *)object);
2808                         break;
2809                 }
2810                 case IDM_BUF_MAGIC: {
2811                         idm_buf_t *idb = (idm_buf_t *)object;
2812                         idm_task_t *idt = idb->idb_task_binding;
2813 
2814                         DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2815                             idm_buf_t *, idb);
2816 
2817                         mutex_enter(&idt->idt_mutex);
2818                         status = idm_so_send_buf_region(idt,
2819                             idb, 0, idb->idb_xfer_len);
2820 
2821                         /*
2822                          * TX thread owns the buffer so we expect it to
2823                          * be "in transport"
2824                          */
2825                         ASSERT(idb->idb_in_transport);
2826                         if (IDM_CONN_ISTGT(ic)) {
2827                                 /*
2828                                  * idm_buf_tx_to_ini_done releases
2829                                  * idt->idt_mutex
2830                                  */
2831                                 DTRACE_ISCSI_8(xfer__done,
2832                                     idm_conn_t *, idt->idt_ic,
2833                                     uintptr_t, idb->idb_buf,
2834                                     uint32_t, idb->idb_bufoffset,
2835                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2836                                     uint32_t, idb->idb_xfer_len,
2837                                     int, XFER_BUF_TX_TO_INI);
2838                                 idm_buf_tx_to_ini_done(idt, idb, status);
2839                         } else {
2840                                 idm_so_send_rtt_data_done(idt, idb);
2841                                 mutex_exit(&idt->idt_mutex);
2842                         }
2843                         break;
2844                 }
2845 
2846                 default:
2847                         IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2848                             "(0x%08x)", object->idm_tx_obj_magic);
2849                         status = IDM_STATUS_FAIL;
2850                 }
2851 
2852                 mutex_enter(&so_conn->ic_tx_mutex);
2853 
2854                 if (status != IDM_STATUS_SUCCESS) {
2855                         so_conn->ic_tx_thread_running = B_FALSE;
2856                         idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2857                 }
2858         }
2859 
2860         /*
2861          * Before we leave, we need to abort every item remaining in the
2862          * TX list.
2863          */
2864 
2865 tx_bail:
2866         object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2867 
2868         while (object != NULL) {
2869                 next = list_next(&so_conn->ic_tx_list, object);
2870 
2871                 list_remove(&so_conn->ic_tx_list, object);
2872                 switch (object->idm_tx_obj_magic) {
2873                 case IDM_PDU_MAGIC:
2874                         idm_pdu_complete((idm_pdu_t *)object,
2875                             IDM_STATUS_ABORTED);
2876                         break;
2877 
2878                 case IDM_BUF_MAGIC: {
2879                         idm_buf_t *idb = (idm_buf_t *)object;
2880                         idm_task_t *idt = idb->idb_task_binding;
2881                         mutex_exit(&so_conn->ic_tx_mutex);
2882                         mutex_enter(&idt->idt_mutex);
2883                         /*
2884                          * TX thread owns the buffer so we expect it to
2885                          * be "in transport"
2886                          */
2887                         ASSERT(idb->idb_in_transport);
2888                         if (IDM_CONN_ISTGT(ic)) {
2889                                 /*
2890                                  * idm_buf_tx_to_ini_done releases
2891                                  * idt->idt_mutex
2892                                  */
2893                                 DTRACE_ISCSI_8(xfer__done,
2894                                     idm_conn_t *, idt->idt_ic,
2895                                     uintptr_t, idb->idb_buf,
2896                                     uint32_t, idb->idb_bufoffset,
2897                                     uint64_t, 0, uint32_t, 0, uint32_t, 0,
2898                                     uint32_t, idb->idb_xfer_len,
2899                                     int, XFER_BUF_TX_TO_INI);
2900                                 idm_buf_tx_to_ini_done(idt, idb,
2901                                     IDM_STATUS_ABORTED);
2902                         } else {
2903                                 idm_so_send_rtt_data_done(idt, idb);
2904                                 mutex_exit(&idt->idt_mutex);
2905                         }
2906                         mutex_enter(&so_conn->ic_tx_mutex);
2907                         break;
2908                 }
2909                 default:
2910                         IDM_CONN_LOG(CE_WARN,
2911                             "idm_sotx_thread: Unexpected magic "
2912                             "(0x%08x)", object->idm_tx_obj_magic);
2913                 }
2914 
2915                 object = next;
2916         }
2917 
2918         mutex_exit(&so_conn->ic_tx_mutex);
2919         idm_conn_rele(ic);
2920         thread_exit();
2921         /*NOTREACHED*/
2922 }
2923 
2924 static void
2925 idm_so_socket_set_nonblock(struct sonode *node)
2926 {
2927         (void) VOP_SETFL(node->so_vnode, node->so_flag,
2928             (node->so_state | FNONBLOCK), CRED(), NULL);
2929 }
2930 
2931 static void
2932 idm_so_socket_set_block(struct sonode *node)
2933 {
2934         (void) VOP_SETFL(node->so_vnode, node->so_flag,
2935             (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2936 }
2937 
2938 
2939 /*
2940  * Called by kernel sockets when the connection has been accepted or
2941  * rejected. In early volo, a "disconnect" callback was sent instead of
2942  * "connectfailed", so we check for both.
2943  */
2944 /* ARGSUSED */
2945 void
2946 idm_so_timed_socket_connect_cb(ksocket_t ks,
2947     ksocket_callback_event_t ev, void *arg, uintptr_t info)
2948 {
2949         idm_so_timed_socket_t   *itp = arg;
2950         ASSERT(itp != NULL);
2951         ASSERT(ev == KSOCKET_EV_CONNECTED ||
2952             ev == KSOCKET_EV_CONNECTFAILED ||
2953             ev == KSOCKET_EV_DISCONNECTED);
2954 
2955         mutex_enter(&idm_so_timed_socket_mutex);
2956         itp->it_callback_called = B_TRUE;
2957         if (ev == KSOCKET_EV_CONNECTED) {
2958                 itp->it_socket_error_code = 0;
2959         } else {
2960                 /* Make sure the error code is non-zero on error */
2961                 if (info == 0)
2962                         info = ECONNRESET;
2963                 itp->it_socket_error_code = (int)info;
2964         }
2965         cv_signal(&itp->it_cv);
2966         mutex_exit(&idm_so_timed_socket_mutex);
2967 }
2968 
2969 int
2970 idm_so_timed_socket_connect(ksocket_t ks,
2971     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2972 {
2973         clock_t                 conn_login_max;
2974         int                     rc, nonblocking, rval;
2975         idm_so_timed_socket_t   it;
2976         ksocket_callbacks_t     ks_cb;
2977 
2978         conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2979 
2980         /*
2981          * Set to non-block socket mode, with callback on connect
2982          * Early volo used "disconnected" instead of "connectfailed",
2983          * so set callback to look for both.
2984          */
2985         bzero(&it, sizeof (it));
2986         ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2987             KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2988         ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2989         ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2990         ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2991         cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2992         rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2993         if (rc != 0)
2994                 return (rc);
2995 
2996         /* Set to non-blocking mode */
2997         nonblocking = 1;
2998         rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2999             CRED());
3000         if (rc != 0)
3001                 goto cleanup;
3002 
3003         bzero(&it, sizeof (it));
3004         for (;;) {
3005                 /*
3006                  * Warning -- in a loopback scenario, the call to
3007                  * the connect_cb can occur inside the call to
3008                  * ksocket_connect. Do not hold the mutex around the
3009                  * call to ksocket_connect.
3010                  */
3011                 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3012                 if (rc == 0 || rc == EISCONN) {
3013                         /* socket success or already success */
3014                         rc = 0;
3015                         break;
3016                 }
3017                 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3018                         break;
3019                 }
3020 
3021                 /* TCP connect still in progress. See if out of time. */
3022                 if (ddi_get_lbolt() > conn_login_max) {
3023                         /*
3024                          * Connection retry timeout,
3025                          * failed connect to target.
3026                          */
3027                         rc = ETIMEDOUT;
3028                         break;
3029                 }
3030 
3031                 /*
3032                  * TCP connect still in progress.  Sleep until callback.
3033                  * Do NOT go to sleep if the callback already occurred!
3034                  */
3035                 mutex_enter(&idm_so_timed_socket_mutex);
3036                 if (!it.it_callback_called) {
3037                         (void) cv_timedwait(&it.it_cv,
3038                             &idm_so_timed_socket_mutex, conn_login_max);
3039                 }
3040                 if (it.it_callback_called) {
3041                         rc = it.it_socket_error_code;
3042                         mutex_exit(&idm_so_timed_socket_mutex);
3043                         break;
3044                 }
3045                 /* If timer expires, go call ksocket_connect one last time. */
3046                 mutex_exit(&idm_so_timed_socket_mutex);
3047         }
3048 
3049         /* resume blocking mode */
3050         nonblocking = 0;
3051         (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3052             CRED());
3053 cleanup:
3054         (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3055         cv_destroy(&it.it_cv);
3056         if (rc != 0) {
3057                 idm_soshutdown(ks);
3058         }
3059         return (rc);
3060 }
3061 
3062 
3063 void
3064 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3065 {
3066         int                     dp_addr_size;
3067         struct sockaddr_in      *sin;
3068         struct sockaddr_in6     *sin6;
3069 
3070         /* Build sockaddr_storage for this portal (idm_addr_t) */
3071         bzero(sa, sizeof (*sa));
3072         dp_addr_size = dportal->a_addr.i_insize;
3073         if (dp_addr_size == sizeof (struct in_addr)) {
3074                 /* IPv4 */
3075                 sa->ss_family = AF_INET;
3076                 sin = (struct sockaddr_in *)sa;
3077                 sin->sin_port = htons(dportal->a_port);
3078                 bcopy(&dportal->a_addr.i_addr.in4,
3079                     &sin->sin_addr, sizeof (struct in_addr));
3080         } else if (dp_addr_size == sizeof (struct in6_addr)) {
3081                 /* IPv6 */
3082                 sa->ss_family = AF_INET6;
3083                 sin6 = (struct sockaddr_in6 *)sa;
3084                 sin6->sin6_port = htons(dportal->a_port);
3085                 bcopy(&dportal->a_addr.i_addr.in6,
3086                     &sin6->sin6_addr, sizeof (struct in6_addr));
3087         } else {
3088                 ASSERT(0);
3089         }
3090 }
3091 
3092 
3093 /*
3094  * return a human-readable form of a sockaddr_storage, in the form
3095  * [ip-address]:port.  This is used in calls to logging functions.
3096  * If several calls to idm_sa_ntop are made within the same invocation
3097  * of a logging function, then each one needs its own buf.
3098  */
3099 const char *
3100 idm_sa_ntop(const struct sockaddr_storage *sa,
3101     char *buf, size_t size)
3102 {
3103         static const char bogus_ip[] = "[0].-1";
3104         char tmp[INET6_ADDRSTRLEN];
3105 
3106         switch (sa->ss_family) {
3107         case AF_INET6:
3108                 {
3109                         const struct sockaddr_in6 *in6 =
3110                             (const struct sockaddr_in6 *) sa;
3111 
3112                         if (inet_ntop(in6->sin6_family,
3113                             &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3114                                 goto err;
3115                         }
3116                         if (strlen(tmp) + sizeof ("[].65535") > size) {
3117                                 goto err;
3118                         }
3119                         /* struct sockaddr_storage gets port info from v4 loc */
3120                         (void) snprintf(buf, size, "[%s].%u", tmp,
3121                             ntohs(in6->sin6_port));
3122                         return (buf);
3123                 }
3124         case AF_INET:
3125                 {
3126                         const struct sockaddr_in *in =
3127                             (const struct sockaddr_in *) sa;
3128 
3129                         if (inet_ntop(in->sin_family, &in->sin_addr,
3130                             tmp, sizeof (tmp)) == NULL) {
3131                                 goto err;
3132                         }
3133                         if (strlen(tmp) + sizeof ("[].65535") > size) {
3134                                 goto err;
3135                         }
3136                         (void) snprintf(buf, size,  "[%s].%u", tmp,
3137                             ntohs(in->sin_port));
3138                         return (buf);
3139                 }
3140         default:
3141                 break;
3142         }
3143 err:
3144         (void) snprintf(buf, size, "%s", bogus_ip);
3145         return (buf);
3146 }