1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/conf.h>
27 #include <sys/stat.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/priv.h>
33 #include <sys/cpuvar.h>
34 #include <sys/socket.h>
35 #include <sys/strsubr.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sdt.h>
38 #include <netinet/tcp.h>
39 #include <inet/tcp.h>
40 #include <sys/socketvar.h>
41 #include <sys/pathname.h>
42 #include <sys/fs/snode.h>
43 #include <sys/fs/dv_node.h>
44 #include <sys/vnode.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <sys/sockio.h>
48 #include <sys/ksocket.h>
49 #include <sys/filio.h> /* FIONBIO */
50 #include <sys/iscsi_protocol.h>
51 #include <sys/idm/idm.h>
52 #include <sys/idm/idm_so.h>
53 #include <sys/idm/idm_text.h>
54
55 #define IN_PROGRESS_DELAY 1
56
57 /*
58 * in6addr_any is currently all zeroes, but use the macro in case this
59 * ever changes.
60 */
61 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
62
63 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
64 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
65 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
66
67 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
68 static void idm_so_conn_destroy_common(idm_conn_t *ic);
69 static void idm_so_conn_connect_common(idm_conn_t *ic);
70
71 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
72 boolean_t boot_conn);
73 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
74 static void idm_set_tgt_connect_options(ksocket_t so);
75 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
76
77 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
78 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
79 idm_buf_t *idb, uint32_t offset, uint32_t length);
80 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
81 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
82 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
83
84 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
85 uint32_t ro, uint32_t dlength);
86
87 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
88 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
89
90 static void idm_so_socket_set_nonblock(struct sonode *node);
91 static void idm_so_socket_set_block(struct sonode *node);
92
93 /*
94 * Transport ops prototypes
95 */
96 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
97 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
98 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
99 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
100 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
101 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
102 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
103 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
104 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
105 static void idm_so_notice_key_values(idm_conn_t *it,
106 nvlist_t *negotiated_nvl);
107 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
108 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
109 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
110 idm_transport_caps_t *caps);
111 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
112 static void idm_so_buf_free(idm_buf_t *idb);
113 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
114 static void idm_so_buf_teardown(idm_buf_t *idb);
115 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
116 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
117 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
118 static void idm_so_tgt_svc_offline(idm_svc_t *is);
119 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
120 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
121 static void idm_so_conn_disconnect(idm_conn_t *ic);
122 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
123 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
124 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
125
126 /*
127 * IDM Native Sockets transport operations
128 */
129 static
130 idm_transport_ops_t idm_so_transport_ops = {
131 idm_so_tx, /* it_tx_pdu */
132 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
133 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
134 idm_so_rx_datain, /* it_rx_datain */
135 idm_so_rx_rtt, /* it_rx_rtt */
136 idm_so_rx_dataout, /* it_rx_dataout */
137 NULL, /* it_alloc_conn_rsrc */
138 NULL, /* it_free_conn_rsrc */
139 NULL, /* it_tgt_enable_datamover */
140 NULL, /* it_ini_enable_datamover */
141 NULL, /* it_conn_terminate */
142 idm_so_free_task_rsrc, /* it_free_task_rsrc */
143 idm_so_negotiate_key_values, /* it_negotiate_key_values */
144 idm_so_notice_key_values, /* it_notice_key_values */
145 idm_so_conn_is_capable, /* it_conn_is_capable */
146 idm_so_buf_alloc, /* it_buf_alloc */
147 idm_so_buf_free, /* it_buf_free */
148 idm_so_buf_setup, /* it_buf_setup */
149 idm_so_buf_teardown, /* it_buf_teardown */
150 idm_so_tgt_svc_create, /* it_tgt_svc_create */
151 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
152 idm_so_tgt_svc_online, /* it_tgt_svc_online */
153 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
154 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
155 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
156 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
157 idm_so_ini_conn_create, /* it_ini_conn_create */
158 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
159 idm_so_ini_conn_connect, /* it_ini_conn_connect */
160 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
161 idm_so_declare_key_values /* it_declare_key_values */
162 };
163
164 kmutex_t idm_so_timed_socket_mutex;
165 /*
166 * idm_so_init()
167 * Sockets transport initialization
168 */
169 void
170 idm_so_init(idm_transport_t *it)
171 {
172 /* Cache for IDM Data and R2T Transmit PDU's */
173 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
174 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
175 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
176
177 /* Cache for IDM Receive PDU's */
178 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
179 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
180 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
181
182 /* 128k buffer cache */
183 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
184 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
185
186 /* Set the sockets transport ops */
187 it->it_ops = &idm_so_transport_ops;
188
189 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
190
191 }
192
193 /*
194 * idm_so_fini()
195 * Sockets transport teardown
196 */
197 void
198 idm_so_fini(void)
199 {
200 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
201 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
202 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
203 mutex_destroy(&idm_so_timed_socket_mutex);
204 }
205
206 ksocket_t
207 idm_socreate(int domain, int type, int protocol)
208 {
209 ksocket_t ks;
210
211 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
212 CRED())) {
213 return (ks);
214 } else {
215 return (NULL);
216 }
217 }
218
219 /*
220 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
221 * reception and transmission. The sonode still exists but its state
222 * gets modified to indicate it is no longer connected. Calls to
223 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
224 * regain control of a thread stuck in idm_sorecv.
225 */
226 void
227 idm_soshutdown(ksocket_t so)
228 {
229 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
230 }
231
232 /*
233 * idm_sodestroy releases all resources associated with a socket previously
234 * created with idm_socreate. The socket must be shutdown using
235 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
236 * otherwise undefined behavior will result.
237 */
238 void
239 idm_sodestroy(ksocket_t ks)
240 {
241 (void) ksocket_close(ks, CRED());
242 }
243
244 /*
245 * Function to compare two addresses in sockaddr_storage format
246 */
247
248 int
249 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
250 const struct sockaddr_storage *cmp_ss2,
251 boolean_t v4_mapped_as_v4,
252 boolean_t compare_ports)
253 {
254 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
255 const struct sockaddr_storage *ss1, *ss2;
256 struct in_addr *in1, *in2;
257 struct in6_addr *in61, *in62;
258 int i;
259
260 /*
261 * Normalize V4-mapped IPv6 addresses into V4 format if
262 * v4_mapped_as_v4 is B_TRUE.
263 */
264 ss1 = cmp_ss1;
265 ss2 = cmp_ss2;
266 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
267 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
268 if (IN6_IS_ADDR_V4MAPPED(in61)) {
269 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
270 mapped_v4_ss1.ss_family = AF_INET;
271 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
272 ((struct sockaddr_in *)ss1)->sin_port;
273 IN6_V4MAPPED_TO_INADDR(in61,
274 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
275 ss1 = &mapped_v4_ss1;
276 }
277 }
278 ss2 = cmp_ss2;
279 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
280 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
281 if (IN6_IS_ADDR_V4MAPPED(in62)) {
282 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
283 mapped_v4_ss2.ss_family = AF_INET;
284 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
285 ((struct sockaddr_in *)ss2)->sin_port;
286 IN6_V4MAPPED_TO_INADDR(in62,
287 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
288 ss2 = &mapped_v4_ss2;
289 }
290 }
291
292 /*
293 * Compare ports, then address family, then ip address
294 */
295 if (compare_ports &&
296 (((struct sockaddr_in *)ss1)->sin_port !=
297 ((struct sockaddr_in *)ss2)->sin_port)) {
298 if (((struct sockaddr_in *)ss1)->sin_port >
299 ((struct sockaddr_in *)ss2)->sin_port)
300 return (1);
301 else
302 return (-1);
303 }
304
305 /*
306 * ports are the same
307 */
308 if (ss1->ss_family != ss2->ss_family) {
309 if (ss1->ss_family == AF_INET)
310 return (1);
311 else
312 return (-1);
313 }
314
315 /*
316 * address families are the same
317 */
318 if (ss1->ss_family == AF_INET) {
319 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
320 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
321
322 if (in1->s_addr > in2->s_addr)
323 return (1);
324 else if (in1->s_addr < in2->s_addr)
325 return (-1);
326 else
327 return (0);
328 } else if (ss1->ss_family == AF_INET6) {
329 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
330 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
331
332 for (i = 0; i < 4; i++) {
333 if (in61->s6_addr32[i] > in62->s6_addr32[i])
334 return (1);
335 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
336 return (-1);
337 }
338 return (0);
339 }
340
341 return (1);
342 }
343
344 /*
345 * IP address filter functions to flag addresses that should not
346 * go out to initiators through discovery.
347 */
348 static boolean_t
349 idm_v4_addr_okay(struct in_addr *in_addr)
350 {
351 in_addr_t addr = ntohl(in_addr->s_addr);
352
353 if ((INADDR_NONE == addr) ||
354 (IN_MULTICAST(addr)) ||
355 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
356 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
357 return (B_FALSE);
358 }
359 return (B_TRUE);
360 }
361
362 static boolean_t
363 idm_v6_addr_okay(struct in6_addr *addr6)
364 {
365
366 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
367 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
368 (IN6_IS_ADDR_MULTICAST(addr6)) ||
369 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
370 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
371 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
372 return (B_FALSE);
373 }
374 return (B_TRUE);
375 }
376
377 /*
378 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
379 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
380 */
381 int
382 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
383 {
384 ksocket_t so4, so6;
385 struct lifnum lifn;
386 struct lifconf lifc;
387 struct lifreq *lp;
388 int rval;
389 int numifs;
390 int bufsize;
391 void *buf;
392 int i, j, n, rc;
393 struct sockaddr_storage ss;
394 struct sockaddr_in *sin;
395 struct sockaddr_in6 *sin6;
396 idm_addr_t *ip;
397 idm_addr_list_t *ipaddr = NULL;
398 int size_ipaddr;
399
400 *ipaddr_p = NULL;
401 size_ipaddr = 0;
402 buf = NULL;
403
404 /* create an ipv4 and ipv6 UDP socket */
405 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
406 return (0);
407 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
408 idm_sodestroy(so6);
409 return (0);
410 }
411
412
413 retry_count:
414 /* snapshot the current number of interfaces */
415 lifn.lifn_family = PF_UNSPEC;
416 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
417 lifn.lifn_count = 0;
418 /* use vp6 for ioctls with unspecified families by default */
419 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
420 != 0) {
421 goto cleanup;
422 }
423
424 numifs = lifn.lifn_count;
425 if (numifs <= 0) {
426 goto cleanup;
427 }
428
429 /* allocate extra room in case more interfaces appear */
430 numifs += 10;
431
432 /* get the interface names and ip addresses */
433 bufsize = numifs * sizeof (struct lifreq);
434 buf = kmem_alloc(bufsize, KM_SLEEP);
435
436 lifc.lifc_family = AF_UNSPEC;
437 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
438 lifc.lifc_len = bufsize;
439 lifc.lifc_buf = buf;
440 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
441 if (rc != 0) {
442 goto cleanup;
443 }
444 /* if our extra room is used up, try again */
445 if (bufsize <= lifc.lifc_len) {
446 kmem_free(buf, bufsize);
447 buf = NULL;
448 goto retry_count;
449 }
450 /* calc actual number of ifconfs */
451 n = lifc.lifc_len / sizeof (struct lifreq);
452
453 /* get ip address */
454 if (n > 0) {
455 size_ipaddr = sizeof (idm_addr_list_t) +
456 (n - 1) * sizeof (idm_addr_t);
457 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
458 } else {
459 goto cleanup;
460 }
461
462 /*
463 * Examine the array of interfaces and filter uninteresting ones
464 */
465 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
466
467 /*
468 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
469 */
470 ss = lp->lifr_addr;
471 /*
472 * fetch the flags using the socket of the correct family
473 */
474 switch (ss.ss_family) {
475 case AF_INET:
476 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
477 &rval, CRED());
478 break;
479 case AF_INET6:
480 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
481 &rval, CRED());
482 break;
483 default:
484 continue;
485 }
486 if (rc == 0) {
487 /*
488 * If we got the flags, skip uninteresting
489 * interfaces based on flags
490 */
491 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
492 continue;
493 if (lp->lifr_flags &
494 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
495 continue;
496 }
497
498 /* save ip address */
499 ip = &ipaddr->al_addrs[j];
500 switch (ss.ss_family) {
501 case AF_INET:
502 sin = (struct sockaddr_in *)&ss;
503 if (!idm_v4_addr_okay(&sin->sin_addr))
504 continue;
505 ip->a_addr.i_addr.in4 = sin->sin_addr;
506 ip->a_addr.i_insize = sizeof (struct in_addr);
507 break;
508 case AF_INET6:
509 sin6 = (struct sockaddr_in6 *)&ss;
510 if (!idm_v6_addr_okay(&sin6->sin6_addr))
511 continue;
512 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
513 ip->a_addr.i_insize = sizeof (struct in6_addr);
514 break;
515 default:
516 continue;
517 }
518 j++;
519 }
520
521 if (j == 0) {
522 /* no valid ifaddr */
523 kmem_free(ipaddr, size_ipaddr);
524 size_ipaddr = 0;
525 ipaddr = NULL;
526 } else {
527 ipaddr->al_out_cnt = j;
528 }
529
530
531 cleanup:
532 idm_sodestroy(so6);
533 idm_sodestroy(so4);
534
535 if (buf != NULL)
536 kmem_free(buf, bufsize);
537
538 *ipaddr_p = ipaddr;
539 return (size_ipaddr);
540 }
541
542 int
543 idm_sorecv(ksocket_t so, void *msg, size_t len)
544 {
545 iovec_t iov;
546
547 ASSERT(so != NULL);
548 ASSERT(len != 0);
549
550 /*
551 * Fill in iovec and receive data
552 */
553 iov.iov_base = msg;
554 iov.iov_len = len;
555
556 return (idm_iov_sorecv(so, &iov, 1, len));
557 }
558
559 /*
560 * idm_sosendto - Sends a buffered data on a non-connected socket.
561 *
562 * This function puts the data provided on the wire by calling sosendmsg.
563 * It will return only when all the data has been sent or if an error
564 * occurs.
565 *
566 * Returns 0 for success, the socket errno value if sosendmsg fails, and
567 * -1 if sosendmsg returns success but uio_resid != 0
568 */
569 int
570 idm_sosendto(ksocket_t so, void *buff, size_t len,
571 struct sockaddr *name, socklen_t namelen)
572 {
573 struct msghdr msg;
574 struct iovec iov[1];
575 int error;
576 size_t sent = 0;
577
578 iov[0].iov_base = buff;
579 iov[0].iov_len = len;
580
581 /* Initialization of the message header. */
582 bzero(&msg, sizeof (msg));
583 msg.msg_iov = iov;
584 msg.msg_iovlen = 1;
585 msg.msg_name = name;
586 msg.msg_namelen = namelen;
587
588 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
589 /* Data sent */
590 if (sent == len) {
591 /* All data sent. Success. */
592 return (0);
593 } else {
594 /* Not all data was sent. Failure */
595 return (-1);
596 }
597 }
598
599 /* Send failed */
600 return (error);
601 }
602
603 /*
604 * idm_iov_sosend - Sends an iovec on a connection.
605 *
606 * This function puts the data provided on the wire by calling sosendmsg.
607 * It will return only when all the data has been sent or if an error
608 * occurs.
609 *
610 * Returns 0 for success, the socket errno value if sosendmsg fails, and
611 * -1 if sosendmsg returns success but uio_resid != 0
612 */
613 int
614 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
615 {
616 struct msghdr msg;
617 int error;
618 size_t sent = 0;
619
620 ASSERT(iop != NULL);
621
622 /* Initialization of the message header. */
623 bzero(&msg, sizeof (msg));
624 msg.msg_iov = iop;
625 msg.msg_iovlen = iovlen;
626
627 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
628 == 0) {
629 /* Data sent */
630 if (sent == total_len) {
631 /* All data sent. Success. */
632 return (0);
633 } else {
634 /* Not all data was sent. Failure */
635 return (-1);
636 }
637 }
638
639 /* Send failed */
640 return (error);
641 }
642
643 /*
644 * idm_iov_sorecv - Receives an iovec from a connection
645 *
646 * This function gets the data asked for from the socket. It will return
647 * only when all the requested data has been retrieved or if an error
648 * occurs.
649 *
650 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
651 * -1 if sorecvmsg returns success but uio_resid != 0
652 */
653 int
654 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
655 {
656 struct msghdr msg;
657 int error;
658 size_t recv;
659 int flags;
660
661 ASSERT(iop != NULL);
662
663 /* Initialization of the message header. */
664 bzero(&msg, sizeof (msg));
665 msg.msg_iov = iop;
666 msg.msg_iovlen = iovlen;
667 flags = MSG_WAITALL;
668
669 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
670 == 0) {
671 /* Received data */
672 if (recv == total_len) {
673 /* All requested data received. Success */
674 return (0);
675 } else {
676 /*
677 * Not all data was received. The connection has
678 * probably failed.
679 */
680 return (-1);
681 }
682 }
683
684 /* Receive failed */
685 return (error);
686 }
687
688 static void
689 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
690 {
691 int conn_abort = 10000;
692 int conn_notify = 2000;
693 int abort = 30000;
694
695 /* Pre-connect socket options */
696 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
697 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
698 CRED());
699 if (boot_conn == B_FALSE) {
700 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
701 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
702 CRED());
703 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
704 TCP_ABORT_THRESHOLD,
705 (char *)&abort, sizeof (int), CRED());
706 }
707 }
708
709 static void
710 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
711 {
712 int32_t rcvbuf = IDM_RCVBUF_SIZE;
713 int32_t sndbuf = IDM_SNDBUF_SIZE;
714 const int on = 1;
715
716 /* Set postconnect options */
717 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
718 (char *)&on, sizeof (int), CRED());
719 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
720 (char *)&rcvbuf, sizeof (int), CRED());
721 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
722 (char *)&sndbuf, sizeof (int), CRED());
723 }
724
725 static void
726 idm_set_tgt_connect_options(ksocket_t ks)
727 {
728 int32_t rcvbuf = IDM_RCVBUF_SIZE;
729 int32_t sndbuf = IDM_SNDBUF_SIZE;
730 const int on = 1;
731
732 /* Set connect options */
733 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
734 (char *)&rcvbuf, sizeof (int), CRED());
735 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
736 (char *)&sndbuf, sizeof (int), CRED());
737 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
738 (char *)&on, sizeof (on), CRED());
739 }
740
741 static uint32_t
742 n2h24(const uchar_t *ptr)
743 {
744 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
745 }
746
747
748 static idm_status_t
749 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
750 {
751 iscsi_hdr_t *bhs;
752 uint32_t hdr_digest_crc;
753 uint32_t crc_calculated;
754 void *new_hdr;
755 int ahslen = 0;
756 int total_len = 0;
757 int iovlen = 0;
758 struct iovec iov[2];
759 idm_so_conn_t *so_conn;
760 int rc;
761
762 so_conn = ic->ic_transport_private;
763
764 /*
765 * Read BHS
766 */
767 bhs = pdu->isp_hdr;
768 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
769 if (rc != IDM_STATUS_SUCCESS) {
770 return (IDM_STATUS_FAIL);
771 }
772
773 /*
774 * Check actual AHS length against the amount available in the buffer
775 */
776 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
777 (bhs->hlength * sizeof (uint32_t));
778 pdu->isp_datalen = n2h24(bhs->dlength);
779 if (ic->ic_conn_type == CONN_TYPE_TGT &&
780 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
781 IDM_CONN_LOG(CE_WARN,
782 "idm_sorecvhdr: exceeded the max data segment length");
783 return (IDM_STATUS_FAIL);
784 }
785 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
786 /* Allocate a new header segment and change the callback */
787 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
788 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
789 pdu->isp_hdr = new_hdr;
790 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
791
792 /*
793 * This callback will restore the expected values after
794 * the RX PDU has been processed.
795 */
796 pdu->isp_callback = idm_sorx_addl_pdu_cb;
797 }
798
799 /*
800 * Setup receipt of additional header and header digest (if enabled).
801 */
802 if (bhs->hlength > 0) {
803 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
804 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
805 iov[iovlen].iov_len = ahslen;
806 total_len += iov[iovlen].iov_len;
807 iovlen++;
808 }
809
810 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
811 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
812 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
813 total_len += iov[iovlen].iov_len;
814 iovlen++;
815 }
816
817 if ((iovlen != 0) &&
818 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
819 total_len) != 0)) {
820 return (IDM_STATUS_FAIL);
821 }
822
823 /*
824 * Validate header digest if enabled
825 */
826 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
827 crc_calculated = idm_crc32c(pdu->isp_hdr,
828 sizeof (iscsi_hdr_t) + ahslen);
829 if (crc_calculated != hdr_digest_crc) {
830 /* Invalid Header Digest */
831 return (IDM_STATUS_HEADER_DIGEST);
832 }
833 }
834
835 return (0);
836 }
837
838 /*
839 * idm_so_ini_conn_create()
840 * Allocate the sockets transport connection resources.
841 */
842 static idm_status_t
843 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
844 {
845 ksocket_t so;
846 idm_so_conn_t *so_conn;
847 idm_status_t idmrc;
848
849 so = idm_socreate(cr->cr_domain, cr->cr_type,
850 cr->cr_protocol);
851 if (so == NULL) {
852 return (IDM_STATUS_FAIL);
853 }
854
855 /* Bind the socket if configured to do so */
856 if (cr->cr_bound) {
857 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
858 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
859 idm_sodestroy(so);
860 return (IDM_STATUS_FAIL);
861 }
862 }
863
864 idmrc = idm_so_conn_create_common(ic, so);
865 if (idmrc != IDM_STATUS_SUCCESS) {
866 idm_soshutdown(so);
867 idm_sodestroy(so);
868 return (IDM_STATUS_FAIL);
869 }
870
871 so_conn = ic->ic_transport_private;
872 /* Set up socket options */
873 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
874
875 return (IDM_STATUS_SUCCESS);
876 }
877
878 /*
879 * idm_so_ini_conn_destroy()
880 * Tear down the sockets transport connection resources.
881 */
882 static void
883 idm_so_ini_conn_destroy(idm_conn_t *ic)
884 {
885 idm_so_conn_destroy_common(ic);
886 }
887
888 /*
889 * idm_so_ini_conn_connect()
890 * Establish the connection referred to by the handle previously allocated via
891 * idm_so_ini_conn_create().
892 */
893 static idm_status_t
894 idm_so_ini_conn_connect(idm_conn_t *ic)
895 {
896 idm_so_conn_t *so_conn;
897 struct sonode *node = NULL;
898 int rc;
899 clock_t lbolt, conn_login_max, conn_login_interval;
900 boolean_t nonblock;
901
902 so_conn = ic->ic_transport_private;
903 nonblock = ic->ic_conn_params.nonblock_socket;
904 conn_login_max = ic->ic_conn_params.conn_login_max;
905 conn_login_interval = ddi_get_lbolt() +
906 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
907
908 if (nonblock == B_TRUE) {
909 node = ((struct sonode *)(so_conn->ic_so));
910 /* Set to none block socket mode */
911 idm_so_socket_set_nonblock(node);
912 do {
913 rc = ksocket_connect(so_conn->ic_so,
914 &ic->ic_ini_dst_addr.sin,
915 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
916 CRED());
917 if (rc == 0 || rc == EISCONN) {
918 /* socket success or already success */
919 rc = IDM_STATUS_SUCCESS;
920 break;
921 }
922 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
923 (rc == ECONNRESET)) {
924 /* socket connection timeout or refuse */
925 break;
926 }
927 lbolt = ddi_get_lbolt();
928 if (lbolt > conn_login_max) {
929 /*
930 * Connection retry timeout,
931 * failed connect to target.
932 */
933 break;
934 }
935 if (lbolt < conn_login_interval) {
936 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
937 /* TCP connect still in progress */
938 delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
939 continue;
940 } else {
941 delay(conn_login_interval - lbolt);
942 }
943 }
944 conn_login_interval = ddi_get_lbolt() +
945 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
946 } while (rc != 0);
947 /* resume to nonblock mode */
948 if (rc == IDM_STATUS_SUCCESS) {
949 idm_so_socket_set_block(node);
950 }
951 } else {
952 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
953 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
954 }
955
956 if (rc != 0) {
957 idm_soshutdown(so_conn->ic_so);
958 return (IDM_STATUS_FAIL);
959 }
960
961 idm_so_conn_connect_common(ic);
962
963 idm_set_ini_postconnect_options(so_conn);
964
965 return (IDM_STATUS_SUCCESS);
966 }
967
968 idm_status_t
969 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
970 {
971 idm_status_t idmrc;
972
973 idmrc = idm_so_conn_create_common(ic, new_so);
974
975 return (idmrc);
976 }
977
978 static void
979 idm_so_tgt_conn_destroy(idm_conn_t *ic)
980 {
981 idm_so_conn_destroy_common(ic);
982 }
983
984 /*
985 * idm_so_tgt_conn_connect()
986 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
987 * is invoked from the SM as a result of an inbound connection request.
988 */
989 static idm_status_t
990 idm_so_tgt_conn_connect(idm_conn_t *ic)
991 {
992 idm_so_conn_connect_common(ic);
993
994 return (IDM_STATUS_SUCCESS);
995 }
996
997 static idm_status_t
998 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
999 {
1000 idm_so_conn_t *so_conn;
1001
1002 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1003 so_conn->ic_so = new_so;
1004
1005 ic->ic_transport_private = so_conn;
1006 ic->ic_transport_hdrlen = 0;
1007
1008 /* Set the scoreboarding flag on this connection */
1009 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1010 ic->ic_conn_params.max_recv_dataseglen =
1011 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1012 ic->ic_conn_params.max_xmit_dataseglen =
1013 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1014
1015 /*
1016 * Initialize tx thread mutex and list
1017 */
1018 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1019 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1020 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1021 offsetof(idm_pdu_t, idm_tx_link));
1022
1023 return (IDM_STATUS_SUCCESS);
1024 }
1025
1026 static void
1027 idm_so_conn_destroy_common(idm_conn_t *ic)
1028 {
1029 idm_so_conn_t *so_conn = ic->ic_transport_private;
1030
1031 ic->ic_transport_private = NULL;
1032 idm_sodestroy(so_conn->ic_so);
1033 list_destroy(&so_conn->ic_tx_list);
1034 mutex_destroy(&so_conn->ic_tx_mutex);
1035 cv_destroy(&so_conn->ic_tx_cv);
1036
1037 kmem_free(so_conn, sizeof (idm_so_conn_t));
1038 }
1039
1040 static void
1041 idm_so_conn_connect_common(idm_conn_t *ic)
1042 {
1043 idm_so_conn_t *so_conn;
1044 struct sockaddr_in6 t_addr;
1045 socklen_t t_addrlen = 0;
1046
1047 so_conn = ic->ic_transport_private;
1048 bzero(&t_addr, sizeof (struct sockaddr_in6));
1049 t_addrlen = sizeof (struct sockaddr_in6);
1050
1051 /* Set the local and remote addresses in the idm conn handle */
1052 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1053 &t_addrlen, CRED());
1054 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1055 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1056 &t_addrlen, CRED());
1057 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1058
1059 mutex_enter(&ic->ic_mutex);
1060 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1061 &p0, TS_RUN, minclsyspri);
1062 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1063 &p0, TS_RUN, minclsyspri);
1064
1065 while (so_conn->ic_rx_thread_did == 0 ||
1066 so_conn->ic_tx_thread_did == 0)
1067 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1068 mutex_exit(&ic->ic_mutex);
1069 }
1070
1071 /*
1072 * idm_so_conn_disconnect()
1073 * Shutdown the socket connection and stop the thread
1074 */
1075 static void
1076 idm_so_conn_disconnect(idm_conn_t *ic)
1077 {
1078 idm_so_conn_t *so_conn;
1079
1080 so_conn = ic->ic_transport_private;
1081
1082 mutex_enter(&ic->ic_mutex);
1083 so_conn->ic_rx_thread_running = B_FALSE;
1084 so_conn->ic_tx_thread_running = B_FALSE;
1085 /* We need to wakeup the TX thread */
1086 mutex_enter(&so_conn->ic_tx_mutex);
1087 cv_signal(&so_conn->ic_tx_cv);
1088 mutex_exit(&so_conn->ic_tx_mutex);
1089 mutex_exit(&ic->ic_mutex);
1090
1091 /* This should wakeup the RX thread if it is sleeping */
1092 idm_soshutdown(so_conn->ic_so);
1093
1094 thread_join(so_conn->ic_tx_thread_did);
1095 thread_join(so_conn->ic_rx_thread_did);
1096 }
1097
1098 /*
1099 * idm_so_tgt_svc_create()
1100 * Establish a service on an IP address and port. idm_svc_req_t contains
1101 * the service parameters.
1102 */
1103 /*ARGSUSED*/
1104 static idm_status_t
1105 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1106 {
1107 idm_so_svc_t *so_svc;
1108
1109 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1110
1111 /* Set the new sockets service in svc handle */
1112 is->is_so_svc = (void *)so_svc;
1113
1114 return (IDM_STATUS_SUCCESS);
1115 }
1116
1117 /*
1118 * idm_so_tgt_svc_destroy()
1119 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1120 */
1121 static void
1122 idm_so_tgt_svc_destroy(idm_svc_t *is)
1123 {
1124 /* the socket will have been torn down; free the service */
1125 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1126 }
1127
1128 /*
1129 * idm_so_tgt_svc_online()
1130 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1131 */
1132
1133 static idm_status_t
1134 idm_so_tgt_svc_online(idm_svc_t *is)
1135 {
1136 idm_so_svc_t *so_svc;
1137 idm_svc_req_t *sr = &is->is_svc_req;
1138 struct sockaddr_in6 sin6_ip;
1139 const uint32_t on = 1;
1140 const uint32_t off = 0;
1141
1142 mutex_enter(&is->is_mutex);
1143 so_svc = (idm_so_svc_t *)is->is_so_svc;
1144
1145 /*
1146 * Try creating an IPv6 socket first
1147 */
1148 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1149 mutex_exit(&is->is_mutex);
1150 return (IDM_STATUS_FAIL);
1151 } else {
1152 bzero(&sin6_ip, sizeof (sin6_ip));
1153 sin6_ip.sin6_family = AF_INET6;
1154 sin6_ip.sin6_port = htons(sr->sr_port);
1155 sin6_ip.sin6_addr = in6addr_any;
1156
1157 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1158 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1159 /*
1160 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1161 */
1162 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1163 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1164
1165 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1166 sizeof (sin6_ip), CRED()) != 0) {
1167 mutex_exit(&is->is_mutex);
1168 idm_sodestroy(so_svc->is_so);
1169 return (IDM_STATUS_FAIL);
1170 }
1171 }
1172
1173 idm_set_tgt_connect_options(so_svc->is_so);
1174
1175 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1176 mutex_exit(&is->is_mutex);
1177 idm_soshutdown(so_svc->is_so);
1178 idm_sodestroy(so_svc->is_so);
1179 return (IDM_STATUS_FAIL);
1180 }
1181
1182 /* Launch a watch thread */
1183 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1184 is, 0, &p0, TS_RUN, minclsyspri);
1185
1186 if (so_svc->is_thread == NULL) {
1187 /* Failure to launch; teardown the socket */
1188 mutex_exit(&is->is_mutex);
1189 idm_soshutdown(so_svc->is_so);
1190 idm_sodestroy(so_svc->is_so);
1191 return (IDM_STATUS_FAIL);
1192 }
1193 ksocket_hold(so_svc->is_so);
1194 /* Wait for the port watcher thread to start */
1195 while (!so_svc->is_thread_running)
1196 cv_wait(&is->is_cv, &is->is_mutex);
1197 mutex_exit(&is->is_mutex);
1198
1199 return (IDM_STATUS_SUCCESS);
1200 }
1201
1202 /*
1203 * idm_so_tgt_svc_offline
1204 *
1205 * Stop listening on the IP address and port identified by idm_svc_t.
1206 */
1207 static void
1208 idm_so_tgt_svc_offline(idm_svc_t *is)
1209 {
1210 idm_so_svc_t *so_svc;
1211 mutex_enter(&is->is_mutex);
1212 so_svc = (idm_so_svc_t *)is->is_so_svc;
1213 so_svc->is_thread_running = B_FALSE;
1214 mutex_exit(&is->is_mutex);
1215
1216 /*
1217 * Teardown socket
1218 */
1219 idm_sodestroy(so_svc->is_so);
1220
1221 /*
1222 * Now we expect the port watcher thread to terminate
1223 */
1224 thread_join(so_svc->is_thread_did);
1225 }
1226
1227 /*
1228 * Watch thread for target service connection establishment.
1229 */
1230 void
1231 idm_so_svc_port_watcher(void *arg)
1232 {
1233 idm_svc_t *svc = arg;
1234 ksocket_t new_so;
1235 idm_conn_t *ic;
1236 idm_status_t idmrc;
1237 idm_so_svc_t *so_svc;
1238 int rc;
1239 const uint32_t off = 0;
1240 struct sockaddr_in6 t_addr;
1241 socklen_t t_addrlen;
1242
1243 bzero(&t_addr, sizeof (struct sockaddr_in6));
1244 t_addrlen = sizeof (struct sockaddr_in6);
1245 mutex_enter(&svc->is_mutex);
1246
1247 so_svc = svc->is_so_svc;
1248 so_svc->is_thread_running = B_TRUE;
1249 so_svc->is_thread_did = so_svc->is_thread->t_did;
1250
1251 cv_signal(&svc->is_cv);
1252
1253 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1254 svc->is_svc_req.sr_port);
1255
1256 while (so_svc->is_thread_running) {
1257 mutex_exit(&svc->is_mutex);
1258
1259 if ((rc = ksocket_accept(so_svc->is_so,
1260 (struct sockaddr *)&t_addr, &t_addrlen,
1261 &new_so, CRED())) != 0) {
1262 mutex_enter(&svc->is_mutex);
1263 if (rc != ECONNABORTED && rc != EINTR) {
1264 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1265 " ksocket_accept failed %d", rc);
1266 }
1267 /*
1268 * Unclean shutdown of this thread is not handled
1269 * wait for !is_thread_running.
1270 */
1271 continue;
1272 }
1273 /*
1274 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1275 */
1276 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1277 (char *)&off, sizeof (off), CRED());
1278
1279 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1280 &ic);
1281 if (idmrc != IDM_STATUS_SUCCESS) {
1282 /* Drop connection */
1283 idm_soshutdown(new_so);
1284 idm_sodestroy(new_so);
1285 mutex_enter(&svc->is_mutex);
1286 continue;
1287 }
1288
1289 idmrc = idm_so_tgt_conn_create(ic, new_so);
1290 if (idmrc != IDM_STATUS_SUCCESS) {
1291 idm_svc_conn_destroy(ic);
1292 idm_soshutdown(new_so);
1293 idm_sodestroy(new_so);
1294 mutex_enter(&svc->is_mutex);
1295 continue;
1296 }
1297
1298 /*
1299 * Kick the state machine. At CS_S3_XPT_UP the state machine
1300 * will notify the client (target) about the new connection.
1301 */
1302 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1303
1304 mutex_enter(&svc->is_mutex);
1305 }
1306 ksocket_rele(so_svc->is_so);
1307 so_svc->is_thread_running = B_FALSE;
1308 mutex_exit(&svc->is_mutex);
1309
1310 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1311 svc->is_svc_req.sr_port);
1312
1313 thread_exit();
1314 }
1315
1316 /*
1317 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1318 * frees resources associated with the task.
1319 *
1320 * It's not clear that this should return idm_status_t. What do we do
1321 * if it fails?
1322 */
1323 static idm_status_t
1324 idm_so_free_task_rsrc(idm_task_t *idt)
1325 {
1326 idm_buf_t *idb, *next_idb;
1327
1328 /*
1329 * There is nothing to cleanup on initiator connections
1330 */
1331 if (IDM_CONN_ISINI(idt->idt_ic))
1332 return (IDM_STATUS_SUCCESS);
1333
1334 /*
1335 * If this is a target connection, call idm_buf_rx_from_ini_done for
1336 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1337 *
1338 * In addition, remove any buffers associated with this task from
1339 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1340 * items don't actually get removed from that list (and completion
1341 * routines called) until idm_task_cleanup.
1342 */
1343 mutex_enter(&idt->idt_mutex);
1344
1345 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1346 next_idb = list_next(&idt->idt_outbufv, idb);
1347 if (idb->idb_in_transport) {
1348 /*
1349 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1350 */
1351 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1352 uintptr_t, idb->idb_buf,
1353 uint32_t, idb->idb_bufoffset,
1354 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1355 uint32_t, idb->idb_xfer_len,
1356 int, XFER_BUF_RX_FROM_INI);
1357 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1358 mutex_enter(&idt->idt_mutex);
1359 }
1360 }
1361
1362 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1363 next_idb = list_next(&idt->idt_inbufv, idb);
1364 /*
1365 * We want to remove these items from the tx_list as well,
1366 * but knowing it's in the idt_inbufv list is not a guarantee
1367 * that it's in the tx_list. If it's on the tx list then
1368 * let idm_sotx_thread() clean it up.
1369 */
1370 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1371 /*
1372 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1373 */
1374 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1375 uintptr_t, idb->idb_buf,
1376 uint32_t, idb->idb_bufoffset,
1377 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1378 uint32_t, idb->idb_xfer_len,
1379 int, XFER_BUF_TX_TO_INI);
1380 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1381 mutex_enter(&idt->idt_mutex);
1382 }
1383 }
1384
1385 mutex_exit(&idt->idt_mutex);
1386
1387 return (IDM_STATUS_SUCCESS);
1388 }
1389
1390 /*
1391 * idm_so_negotiate_key_values() validates the key values for this connection
1392 */
1393 /* ARGSUSED */
1394 static kv_status_t
1395 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1396 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1397 {
1398 /* All parameters are negotiated at the iscsit level */
1399 return (KV_HANDLED);
1400 }
1401
1402 /*
1403 * idm_so_notice_key_values() activates the negotiated key values for
1404 * this connection.
1405 */
1406 static void
1407 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1408 {
1409 char *nvp_name;
1410 nvpair_t *nvp;
1411 nvpair_t *next_nvp;
1412 int nvrc;
1413 idm_status_t idm_status;
1414 const idm_kv_xlate_t *ikvx;
1415 uint64_t num_val;
1416
1417 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1418 nvp != NULL; nvp = next_nvp) {
1419 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1420 nvp_name = nvpair_name(nvp);
1421
1422 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1423 switch (ikvx->ik_key_id) {
1424 case KI_HEADER_DIGEST:
1425 case KI_DATA_DIGEST:
1426 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1427 ASSERT(idm_status == 0);
1428
1429 /* Remove processed item from negotiated_nvl list */
1430 nvrc = nvlist_remove_all(
1431 negotiated_nvl, ikvx->ik_key_name);
1432 ASSERT(nvrc == 0);
1433 break;
1434 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1435 /*
1436 * Just pass the value down to idm layer.
1437 * No need to remove it from negotiated_nvl list here.
1438 */
1439 nvrc = nvpair_value_uint64(nvp, &num_val);
1440 ASSERT(nvrc == 0);
1441 it->ic_conn_params.max_xmit_dataseglen =
1442 (uint32_t)num_val;
1443 break;
1444 default:
1445 break;
1446 }
1447 }
1448 }
1449
1450 /*
1451 * idm_so_declare_key_values() declares the key values for this connection
1452 */
1453 /* ARGSUSED */
1454 static kv_status_t
1455 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1456 nvlist_t *outgoing_nvl)
1457 {
1458 char *nvp_name;
1459 nvpair_t *nvp;
1460 nvpair_t *next_nvp;
1461 kv_status_t kvrc;
1462 int nvrc = 0;
1463 const idm_kv_xlate_t *ikvx;
1464 uint64_t num_val;
1465
1466 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1467 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1468 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1469 nvp_name = nvpair_name(nvp);
1470
1471 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1472 switch (ikvx->ik_key_id) {
1473 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1474 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1475 break;
1476 }
1477 if (outgoing_nvl &&
1478 (nvrc = nvlist_add_uint64(outgoing_nvl,
1479 nvp_name, num_val)) != 0) {
1480 break;
1481 }
1482 it->ic_conn_params.max_recv_dataseglen =
1483 (uint32_t)num_val;
1484 break;
1485 default:
1486 break;
1487 }
1488 }
1489 kvrc = idm_nvstat_to_kvstat(nvrc);
1490 return (kvrc);
1491 }
1492
1493 static idm_status_t
1494 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1495 const idm_kv_xlate_t *ikvx)
1496 {
1497 int nvrc;
1498 char *digest_choice_string;
1499
1500 nvrc = nvpair_value_string(digest_choice,
1501 &digest_choice_string);
1502 ASSERT(nvrc == 0);
1503 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1504 switch (ikvx->ik_key_id) {
1505 case KI_HEADER_DIGEST:
1506 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1507 break;
1508 case KI_DATA_DIGEST:
1509 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1510 break;
1511 default:
1512 ASSERT(0);
1513 break;
1514 }
1515 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1516 switch (ikvx->ik_key_id) {
1517 case KI_HEADER_DIGEST:
1518 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1519 break;
1520 case KI_DATA_DIGEST:
1521 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1522 break;
1523 default:
1524 ASSERT(0);
1525 break;
1526 }
1527 } else {
1528 ASSERT(0);
1529 }
1530
1531 return (IDM_STATUS_SUCCESS);
1532 }
1533
1534
1535 /*
1536 * idm_so_conn_is_capable() verifies that the passed connection is provided
1537 * for by the sockets interface.
1538 */
1539 /* ARGSUSED */
1540 static boolean_t
1541 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1542 {
1543 return (B_TRUE);
1544 }
1545
1546 /*
1547 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1548 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1549 * off the socket into the appropriate buffers.
1550 */
1551 static void
1552 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1553 {
1554 iscsi_data_hdr_t *bhs;
1555 idm_task_t *idt;
1556 idm_buf_t *idb;
1557 uint32_t datasn;
1558 size_t offset;
1559 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1560 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1561
1562 ASSERT(ic != NULL);
1563 ASSERT(pdu != NULL);
1564
1565 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1566 datasn = ntohl(bhs->datasn);
1567 offset = ntohl(bhs->offset);
1568
1569 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1570
1571 /*
1572 * Look up the task corresponding to the initiator task tag
1573 * to get the buffers affiliated with the task.
1574 */
1575 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1576 if (idt == NULL) {
1577 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1578 idm_pdu_rx_protocol_error(ic, pdu);
1579 return;
1580 }
1581
1582 idb = pdu->isp_sorx_buf;
1583 if (idb == NULL) {
1584 IDM_CONN_LOG(CE_WARN,
1585 "idm_so_rx_datain: failed to find buffer");
1586 idm_task_rele(idt);
1587 idm_pdu_rx_protocol_error(ic, pdu);
1588 return;
1589 }
1590
1591 /*
1592 * DataSN values should be sequential and should not have any gaps or
1593 * repetitions. Check the DataSN with the one stored in the task.
1594 */
1595 if (datasn == idt->idt_exp_datasn) {
1596 idt->idt_exp_datasn++; /* keep track of DataSN received */
1597 } else {
1598 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1599 idm_task_rele(idt);
1600 idm_pdu_rx_protocol_error(ic, pdu);
1601 return;
1602 }
1603
1604 /*
1605 * PDUs in a sequence should be in continuously increasing
1606 * address offset
1607 */
1608 if (offset != idb->idb_exp_offset) {
1609 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1610 idm_task_rele(idt);
1611 idm_pdu_rx_protocol_error(ic, pdu);
1612 return;
1613 }
1614 /* Expected next relative buffer offset */
1615 idb->idb_exp_offset += n2h24(bhs->dlength);
1616 idt->idt_rx_bytes += n2h24(bhs->dlength);
1617
1618 idm_task_rele(idt);
1619
1620 /*
1621 * For now call scsi_rsp which will process the data rsp
1622 * Revisit, need to provide an explicit client entry point for
1623 * phase collapse completions.
1624 */
1625 if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1626 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1627 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1628 }
1629
1630 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1631 }
1632
1633 /*
1634 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1635 * data from the Data-Out PDU sent by the iSCSI initiator.
1636 *
1637 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1638 * task to get the buffers associated with the PDU. A PDU might span buffers.
1639 * The data is then read into the respective buffer.
1640 */
1641 static void
1642 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1643 {
1644
1645 iscsi_data_hdr_t *bhs;
1646 idm_task_t *idt;
1647 idm_buf_t *idb;
1648 size_t offset;
1649
1650 ASSERT(ic != NULL);
1651 ASSERT(pdu != NULL);
1652
1653 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1654 offset = ntohl(bhs->offset);
1655 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1656
1657 /*
1658 * Look up the task corresponding to the initiator task tag
1659 * to get the buffers affiliated with the task.
1660 */
1661 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1662 if (idt == NULL) {
1663 IDM_CONN_LOG(CE_WARN,
1664 "idm_so_rx_dataout: failed to find task");
1665 idm_pdu_rx_protocol_error(ic, pdu);
1666 return;
1667 }
1668
1669 idb = pdu->isp_sorx_buf;
1670 if (idb == NULL) {
1671 IDM_CONN_LOG(CE_WARN,
1672 "idm_so_rx_dataout: failed to find buffer");
1673 idm_task_rele(idt);
1674 idm_pdu_rx_protocol_error(ic, pdu);
1675 return;
1676 }
1677
1678 /* Keep track of data transferred - check data offsets */
1679 if (offset != idb->idb_exp_offset) {
1680 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1681 "%ld, %d", offset, idb->idb_exp_offset);
1682 idm_task_rele(idt);
1683 idm_pdu_rx_protocol_error(ic, pdu);
1684 return;
1685 }
1686 /* Expected next relative offset */
1687 idb->idb_exp_offset += ntoh24(bhs->dlength);
1688 idt->idt_rx_bytes += n2h24(bhs->dlength);
1689
1690 /*
1691 * Call the buffer callback when the transfer is complete
1692 *
1693 * The connection state machine should only abort tasks after
1694 * shutting down the connection so we are assured that there
1695 * won't be a simultaneous attempt to abort this task at the
1696 * same time as we are processing this PDU (due to a connection
1697 * state change).
1698 */
1699 if (bhs->flags & ISCSI_FLAG_FINAL) {
1700 /*
1701 * We only want to call idm_buf_rx_from_ini_done once
1702 * per transfer. It's possible that this task has
1703 * already been aborted in which case
1704 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1705 * for each buffer with idb_in_transport==B_TRUE. To
1706 * close this window and ensure that this doesn't happen,
1707 * we'll clear idb->idb_in_transport now while holding
1708 * the task mutex. This is only really an issue for
1709 * SCSI task abort -- if tasks were being aborted because
1710 * of a connection state change the state machine would
1711 * have already stopped the receive thread.
1712 */
1713 mutex_enter(&idt->idt_mutex);
1714
1715 /*
1716 * Release the task hold here (obtained in idm_task_find)
1717 * because the task may complete synchronously during
1718 * idm_buf_rx_from_ini_done. Since we still have an active
1719 * buffer we know there is at least one additional hold on idt.
1720 */
1721 idm_task_rele(idt);
1722
1723 /*
1724 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1725 */
1726 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1727 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1728 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1729 uint32_t, idb->idb_xfer_len,
1730 int, XFER_BUF_RX_FROM_INI);
1731 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1732 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1733 return;
1734 }
1735
1736 idm_task_rele(idt);
1737 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1738 }
1739
1740 /*
1741 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1742 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1743 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1744 * and looks up the task in the task tree using the itt to get the output
1745 * buffers associated the task. The R2T PDU contains the offset of the
1746 * requested data and the data length. This function then constructs a
1747 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1748 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1749 */
1750
1751 static void
1752 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1753 {
1754 idm_task_t *idt;
1755 idm_buf_t *idb;
1756 iscsi_rtt_hdr_t *rtt_hdr;
1757 uint32_t data_offset;
1758 uint32_t data_length;
1759
1760 ASSERT(ic != NULL);
1761 ASSERT(pdu != NULL);
1762
1763 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1764 data_offset = ntohl(rtt_hdr->data_offset);
1765 data_length = ntohl(rtt_hdr->data_length);
1766 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1767
1768 if (idt == NULL) {
1769 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1770 idm_pdu_rx_protocol_error(ic, pdu);
1771 return;
1772 }
1773
1774 /* Find the buffer bound to the task by the iSCSI initiator */
1775 mutex_enter(&idt->idt_mutex);
1776 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1777 if (idb == NULL) {
1778 mutex_exit(&idt->idt_mutex);
1779 idm_task_rele(idt);
1780 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1781 idm_pdu_rx_protocol_error(ic, pdu);
1782 return;
1783 }
1784
1785 /* return buffer contains this data */
1786 if (data_offset + data_length > idb->idb_buflen) {
1787 /* Overflow */
1788 mutex_exit(&idt->idt_mutex);
1789 idm_task_rele(idt);
1790 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1791 "buffer");
1792 idm_pdu_rx_protocol_error(ic, pdu);
1793 return;
1794 }
1795
1796 idt->idt_r2t_ttt = rtt_hdr->ttt;
1797 idt->idt_exp_datasn = 0;
1798
1799 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1800 ntohl(rtt_hdr->data_length));
1801 /*
1802 * the idt_mutex is released in idm_so_send_rtt_data
1803 */
1804
1805 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1806 idm_task_rele(idt);
1807
1808 }
1809
1810 idm_status_t
1811 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1812 {
1813 uint8_t pad[ISCSI_PAD_WORD_LEN];
1814 int pad_len;
1815 uint32_t data_digest_crc;
1816 uint32_t crc_calculated;
1817 int total_len;
1818 idm_so_conn_t *so_conn;
1819
1820 so_conn = ic->ic_transport_private;
1821
1822 pad_len = ((ISCSI_PAD_WORD_LEN -
1823 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1824 (ISCSI_PAD_WORD_LEN - 1));
1825
1826 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1827
1828 total_len = pdu->isp_datalen;
1829
1830 if (pad_len) {
1831 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1832 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1833 total_len += pad_len;
1834 pdu->isp_iovlen++;
1835 }
1836
1837 /* setup data digest */
1838 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1839 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1840 (char *)&data_digest_crc;
1841 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1842 sizeof (data_digest_crc);
1843 total_len += sizeof (data_digest_crc);
1844 pdu->isp_iovlen++;
1845 }
1846
1847 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1848
1849 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1850 pdu->isp_iovlen, total_len) != 0) {
1851 return (IDM_STATUS_IO);
1852 }
1853
1854 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1855 crc_calculated = idm_crc32c(pdu->isp_data,
1856 pdu->isp_datalen);
1857 if (pad_len) {
1858 crc_calculated = idm_crc32c_continued((char *)&pad,
1859 pad_len, crc_calculated);
1860 }
1861 if (crc_calculated != data_digest_crc) {
1862 IDM_CONN_LOG(CE_WARN,
1863 "idm_sorecvdata: "
1864 "CRC error: actual 0x%x, calc 0x%x",
1865 data_digest_crc, crc_calculated);
1866
1867 /* Invalid Data Digest */
1868 return (IDM_STATUS_DATA_DIGEST);
1869 }
1870 }
1871
1872 return (IDM_STATUS_SUCCESS);
1873 }
1874
1875 /*
1876 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1877 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1878 * calling this function.
1879 */
1880 idm_status_t
1881 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1882 {
1883 iscsi_data_hdr_t *bhs;
1884 idm_task_t *task;
1885 uint32_t offset;
1886 uint8_t opcode;
1887 uint32_t dlength;
1888 list_t *buflst;
1889 uint32_t xfer_bytes;
1890 idm_status_t status;
1891
1892 ASSERT(ic != NULL);
1893 ASSERT(pdu != NULL);
1894
1895 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1896
1897 offset = ntohl(bhs->offset);
1898 opcode = bhs->opcode;
1899 dlength = n2h24(bhs->dlength);
1900
1901 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1902 (opcode == ISCSI_OP_SCSI_DATA));
1903
1904 /*
1905 * Successful lookup implicitly gets a "hold" on the task. This
1906 * hold must be released before leaving this function. At one
1907 * point we were caching this task context and retaining the hold
1908 * but it turned out to be very difficult to release the hold properly.
1909 * The task can be aborted and the connection shutdown between this
1910 * call and the subsequent expected call to idm_so_rx_datain/
1911 * idm_so_rx_dataout (in which case those functions are not called).
1912 * Releasing the hold in the PDU callback doesn't work well either
1913 * because the whole task may be completed by then at which point
1914 * it is too late to release the hold -- for better or worse this
1915 * code doesn't wait on the refcnts during normal operation.
1916 * idm_task_find() is very fast and it is not a huge burden if we
1917 * have to do it twice.
1918 */
1919 task = idm_task_find(ic, bhs->itt, bhs->ttt);
1920 if (task == NULL) {
1921 IDM_CONN_LOG(CE_WARN,
1922 "idm_sorecv_scsidata: could not find task");
1923 return (IDM_STATUS_FAIL);
1924 }
1925
1926 mutex_enter(&task->idt_mutex);
1927 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1928 &task->idt_inbufv : &task->idt_outbufv;
1929 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1930 mutex_exit(&task->idt_mutex);
1931
1932 if (pdu->isp_sorx_buf == NULL) {
1933 idm_task_rele(task);
1934 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1935 "buffer for offset %x opcode=%x",
1936 offset, opcode);
1937 return (IDM_STATUS_FAIL);
1938 }
1939
1940 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1941 ASSERT(xfer_bytes != 0);
1942 if (xfer_bytes != dlength) {
1943 idm_task_rele(task);
1944 /*
1945 * Buffer overflow, connection error. The PDU data is still
1946 * sitting in the socket so we can't use the connection
1947 * again until that data is drained.
1948 */
1949 return (IDM_STATUS_FAIL);
1950 }
1951
1952 status = idm_sorecvdata(ic, pdu);
1953
1954 idm_task_rele(task);
1955
1956 return (status);
1957 }
1958
1959 static uint32_t
1960 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1961 {
1962 uint32_t buf_ro = ro - idb->idb_bufoffset;
1963 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1964
1965 ASSERT(ro >= idb->idb_bufoffset);
1966
1967 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1968 (caddr_t)idb->idb_buf + buf_ro;
1969 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
1970 pdu->isp_iovlen++;
1971
1972 return (xfer_len);
1973 }
1974
1975 int
1976 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1977 {
1978 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1979 ASSERT(pdu->isp_data != NULL);
1980
1981 pdu->isp_databuflen = pdu->isp_datalen;
1982 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1983 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1984 pdu->isp_iovlen = 1;
1985 /*
1986 * Since we are associating a new data buffer with this received
1987 * PDU we need to set a specific callback to free the data
1988 * after the PDU is processed.
1989 */
1990 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1991 pdu->isp_callback = idm_sorx_addl_pdu_cb;
1992
1993 return (idm_sorecvdata(ic, pdu));
1994 }
1995
1996 void
1997 idm_sorx_thread(void *arg)
1998 {
1999 boolean_t conn_failure = B_FALSE;
2000 idm_conn_t *ic = (idm_conn_t *)arg;
2001 idm_so_conn_t *so_conn;
2002 idm_pdu_t *pdu;
2003 idm_status_t rc;
2004
2005 idm_conn_hold(ic);
2006
2007 mutex_enter(&ic->ic_mutex);
2008
2009 so_conn = ic->ic_transport_private;
2010 so_conn->ic_rx_thread_running = B_TRUE;
2011 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2012 cv_signal(&ic->ic_cv);
2013
2014 while (so_conn->ic_rx_thread_running) {
2015 mutex_exit(&ic->ic_mutex);
2016
2017 /*
2018 * Get PDU with default header size (large enough for
2019 * BHS plus any anticipated AHS). PDU from
2020 * the cache will have all values set correctly
2021 * for sockets RX including callback.
2022 */
2023 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2024 pdu->isp_ic = ic;
2025 pdu->isp_flags = 0;
2026 pdu->isp_transport_hdrlen = 0;
2027
2028 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2029 /*
2030 * Call idm_pdu_complete so that we call the callback
2031 * and ensure any memory allocated in idm_sorecvhdr
2032 * gets freed up.
2033 */
2034 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2035
2036 /*
2037 * If ic_rx_thread_running is still set then
2038 * this is some kind of connection problem
2039 * on the socket. In this case we want to
2040 * generate an event. Otherwise some other
2041 * thread closed the socket due to another
2042 * issue in which case we don't need to
2043 * generate an event.
2044 */
2045 mutex_enter(&ic->ic_mutex);
2046 if (so_conn->ic_rx_thread_running) {
2047 conn_failure = B_TRUE;
2048 so_conn->ic_rx_thread_running = B_FALSE;
2049 }
2050
2051 continue;
2052 }
2053
2054 /*
2055 * Header has been read and validated. Now we need
2056 * to read the PDU data payload (if present). SCSI data
2057 * need to be transferred from the socket directly into
2058 * the associated transfer buffer for the SCSI task.
2059 */
2060 if (pdu->isp_datalen != 0) {
2061 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2062 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2063 rc = idm_sorecv_scsidata(ic, pdu);
2064 /*
2065 * All SCSI errors are fatal to the
2066 * connection right now since we have no
2067 * place to put the data. What we need
2068 * is some kind of sink to dispose of unwanted
2069 * SCSI data. For example an invalid task tag
2070 * should not kill the connection (although
2071 * we may want to drop the connection).
2072 */
2073 } else {
2074 /*
2075 * Not data PDUs so allocate a buffer for the
2076 * data segment and read the remaining data.
2077 */
2078 rc = idm_sorecv_nonscsidata(ic, pdu);
2079 }
2080 if (rc != 0) {
2081 /*
2082 * Call idm_pdu_complete so that we call the
2083 * callback and ensure any memory allocated
2084 * in idm_sorecvhdr gets freed up.
2085 */
2086 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2087
2088 /*
2089 * If ic_rx_thread_running is still set then
2090 * this is some kind of connection problem
2091 * on the socket. In this case we want to
2092 * generate an event. Otherwise some other
2093 * thread closed the socket due to another
2094 * issue in which case we don't need to
2095 * generate an event.
2096 */
2097 mutex_enter(&ic->ic_mutex);
2098 if (so_conn->ic_rx_thread_running) {
2099 conn_failure = B_TRUE;
2100 so_conn->ic_rx_thread_running = B_FALSE;
2101 }
2102 continue;
2103 }
2104 }
2105
2106 /*
2107 * Process RX PDU
2108 */
2109 idm_pdu_rx(ic, pdu);
2110
2111 mutex_enter(&ic->ic_mutex);
2112 }
2113
2114 mutex_exit(&ic->ic_mutex);
2115
2116 /*
2117 * If we dropped out of the RX processing loop because of
2118 * a socket problem or other connection failure (including
2119 * digest errors) then we need to generate a state machine
2120 * event to shut the connection down.
2121 * If the state machine is already in, for example, INIT_ERROR, this
2122 * event will get dropped, and the TX thread will never be notified
2123 * to shut down. To be safe, we'll just notify it here.
2124 */
2125 if (conn_failure) {
2126 if (so_conn->ic_tx_thread_running) {
2127 so_conn->ic_tx_thread_running = B_FALSE;
2128 mutex_enter(&so_conn->ic_tx_mutex);
2129 cv_signal(&so_conn->ic_tx_cv);
2130 mutex_exit(&so_conn->ic_tx_mutex);
2131 }
2132
2133 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2134 }
2135
2136 idm_conn_rele(ic);
2137
2138 thread_exit();
2139 }
2140
2141 /*
2142 * idm_so_tx
2143 *
2144 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2145 * point. By definition, it is supposed to be fast. So, simply queue
2146 * the entry and return. The real work is done by idm_i_so_tx() via
2147 * idm_sotx_thread().
2148 */
2149
2150 static void
2151 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2152 {
2153 idm_so_conn_t *so_conn = ic->ic_transport_private;
2154
2155 ASSERT(pdu->isp_ic == ic);
2156 mutex_enter(&so_conn->ic_tx_mutex);
2157
2158 if (!so_conn->ic_tx_thread_running) {
2159 mutex_exit(&so_conn->ic_tx_mutex);
2160 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2161 return;
2162 }
2163
2164 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2165 cv_signal(&so_conn->ic_tx_cv);
2166 mutex_exit(&so_conn->ic_tx_mutex);
2167 }
2168
2169 static idm_status_t
2170 idm_i_so_tx(idm_pdu_t *pdu)
2171 {
2172 idm_conn_t *ic = pdu->isp_ic;
2173 idm_status_t status = IDM_STATUS_SUCCESS;
2174 uint8_t pad[ISCSI_PAD_WORD_LEN];
2175 int pad_len;
2176 uint32_t hdr_digest_crc;
2177 uint32_t data_digest_crc = 0;
2178 int total_len = 0;
2179 int iovlen = 0;
2180 struct iovec iov[6];
2181 idm_so_conn_t *so_conn;
2182
2183 so_conn = ic->ic_transport_private;
2184
2185 /* Setup BHS */
2186 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2187 iov[iovlen].iov_len = pdu->isp_hdrlen;
2188 total_len += iov[iovlen].iov_len;
2189 iovlen++;
2190
2191 /* Setup header digest */
2192 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2193 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2194 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2195
2196 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2197 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2198 total_len += iov[iovlen].iov_len;
2199 iovlen++;
2200 }
2201
2202 /* Setup the data */
2203 if (pdu->isp_datalen) {
2204 idm_task_t *idt;
2205 idm_buf_t *idb;
2206 iscsi_data_hdr_t *ihp;
2207 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2208 /* Write of immediate data */
2209 if (ic->ic_ffp &&
2210 (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2211 ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2212 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2213 if (idt) {
2214 mutex_enter(&idt->idt_mutex);
2215 idb = idm_buf_find(&idt->idt_outbufv, 0);
2216 mutex_exit(&idt->idt_mutex);
2217 /*
2218 * If the initiator call to idm_buf_alloc
2219 * failed then we can get to this point
2220 * without a bound buffer. The associated
2221 * connection failure will clean things up
2222 * later. It would be nice to come up with
2223 * a cleaner way to handle this. In
2224 * particular it seems absurd to look up
2225 * the task and the buffer just to update
2226 * this counter.
2227 */
2228 if (idb)
2229 idb->idb_xfer_len += pdu->isp_datalen;
2230 idm_task_rele(idt);
2231 }
2232 }
2233
2234 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2235 iov[iovlen].iov_len = pdu->isp_datalen;
2236 total_len += iov[iovlen].iov_len;
2237 iovlen++;
2238 }
2239
2240 /* Setup the data pad if necessary */
2241 pad_len = ((ISCSI_PAD_WORD_LEN -
2242 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2243 (ISCSI_PAD_WORD_LEN - 1));
2244
2245 if (pad_len) {
2246 bzero(pad, sizeof (pad));
2247 iov[iovlen].iov_base = (void *)&pad;
2248 iov[iovlen].iov_len = pad_len;
2249 total_len += iov[iovlen].iov_len;
2250 iovlen++;
2251 }
2252
2253 /*
2254 * Setup the data digest if enabled. Data-digest is not sent
2255 * for login-phase PDUs.
2256 */
2257 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2258 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2259 (pdu->isp_datalen || pad_len)) {
2260 /*
2261 * RFC3720/10.2.3: A zero-length Data Segment also
2262 * implies a zero-length data digest.
2263 */
2264 if (pdu->isp_datalen) {
2265 data_digest_crc = idm_crc32c(pdu->isp_data,
2266 pdu->isp_datalen);
2267 }
2268 if (pad_len) {
2269 data_digest_crc = idm_crc32c_continued(&pad,
2270 pad_len, data_digest_crc);
2271 }
2272
2273 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2274 iov[iovlen].iov_len = sizeof (data_digest_crc);
2275 total_len += iov[iovlen].iov_len;
2276 iovlen++;
2277 }
2278
2279 /* Transmit the PDU */
2280 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2281 total_len) != 0) {
2282 /* Set error status */
2283 IDM_CONN_LOG(CE_WARN,
2284 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2285 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2286 (void *) pdu->isp_data);
2287 status = IDM_STATUS_IO;
2288 }
2289
2290 /*
2291 * Success does not mean that the PDU actually reached the
2292 * remote node since it could get dropped along the way.
2293 */
2294 idm_pdu_complete(pdu, status);
2295
2296 return (status);
2297 }
2298
2299 /*
2300 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2301 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2302 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2303 * A target can invoke this function multiple times for a single read command
2304 * (identified by the same ITT) to split the input into several sequences.
2305 *
2306 * DataSN starts with 0 for the first data PDU of an input command and advances
2307 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2308 * which is set to 1 for the last data PDU of a sequence.
2309 * If the initiator supports phase collapse, the status bit must be set along
2310 * with the F bit to indicate that the status is shipped together with the last
2311 * Data-In PDU.
2312 *
2313 * The data PDUs within a sequence will be sent in order with the buffer offset
2314 * in increasing order. i.e. initiator and target must have negotiated the
2315 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2316 *
2317 * Caller holds idt->idt_mutex
2318 */
2319 static idm_status_t
2320 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2321 {
2322 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2323 idm_pdu_t tmppdu;
2324
2325 ASSERT(mutex_owned(&idt->idt_mutex));
2326
2327 /*
2328 * Put the idm_buf_t on the tx queue. It will be transmitted by
2329 * idm_sotx_thread.
2330 */
2331 mutex_enter(&so_conn->ic_tx_mutex);
2332
2333 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2334 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2335 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2336 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2337
2338 if (!so_conn->ic_tx_thread_running) {
2339 mutex_exit(&so_conn->ic_tx_mutex);
2340 /*
2341 * Don't release idt->idt_mutex since we're supposed to hold
2342 * in when calling idm_buf_tx_to_ini_done
2343 */
2344 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2345 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2346 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2347 uint32_t, idb->idb_xfer_len,
2348 int, XFER_BUF_TX_TO_INI);
2349 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2350 return (IDM_STATUS_FAIL);
2351 }
2352
2353 /*
2354 * Build a template for the data PDU headers we will use so that
2355 * the SN values will stay consistent with other PDU's we are
2356 * transmitting like R2T and SCSI status.
2357 */
2358 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2359 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2360 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2361 ISCSI_OP_SCSI_DATA_RSP);
2362 idb->idb_tx_thread = B_TRUE;
2363 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2364 cv_signal(&so_conn->ic_tx_cv);
2365 mutex_exit(&so_conn->ic_tx_mutex);
2366 mutex_exit(&idt->idt_mutex);
2367
2368 /*
2369 * Returning success here indicates the transfer was successfully
2370 * dispatched -- it does not mean that the transfer completed
2371 * successfully.
2372 */
2373 return (IDM_STATUS_SUCCESS);
2374 }
2375
2376 /*
2377 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2378 * data blocks it is ready to receive from the initiator in response to a WRITE
2379 * SCSI command. The target iSCSI layer passes the information about the desired
2380 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2381 * offset and datalen are passed via the 'idb' argument.
2382 *
2383 * Scope for Prototype build:
2384 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2385 * negotiated the "InitialR2T" to "Yes".
2386 *
2387 * Caller holds idt->idt_mutex
2388 */
2389 static idm_status_t
2390 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2391 {
2392 idm_pdu_t *pdu;
2393 iscsi_rtt_hdr_t *rtt;
2394
2395 ASSERT(mutex_owned(&idt->idt_mutex));
2396
2397 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2398 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2399 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2400 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2401
2402 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2403 pdu->isp_ic = idt->idt_ic;
2404 pdu->isp_flags = IDM_PDU_SET_STATSN;
2405 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2406
2407 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2408 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2409
2410 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2411 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2412
2413 rtt->opcode = ISCSI_OP_RTT_RSP;
2414 rtt->flags = ISCSI_FLAG_FINAL;
2415 rtt->data_offset = htonl(idb->idb_bufoffset);
2416 rtt->data_length = htonl(idb->idb_xfer_len);
2417 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2418
2419 /* Keep track of buffer offsets */
2420 idb->idb_exp_offset = idb->idb_bufoffset;
2421 mutex_exit(&idt->idt_mutex);
2422
2423 /*
2424 * Transmit the PDU.
2425 */
2426 idm_pdu_tx(pdu);
2427
2428 return (IDM_STATUS_SUCCESS);
2429 }
2430
2431 static idm_status_t
2432 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2433 {
2434 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2435 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2436 KM_NOSLEEP);
2437 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2438 } else {
2439 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2440 idb->idb_buf_private = NULL;
2441 }
2442
2443 if (idb->idb_buf == NULL) {
2444 IDM_CONN_LOG(CE_NOTE,
2445 "idm_so_buf_alloc: failed buffer allocation");
2446 return (IDM_STATUS_FAIL);
2447 }
2448
2449 return (IDM_STATUS_SUCCESS);
2450 }
2451
2452 /* ARGSUSED */
2453 static idm_status_t
2454 idm_so_buf_setup(idm_buf_t *idb)
2455 {
2456 /* Ensure bufalloc'd flag is unset */
2457 idb->idb_bufalloc = B_FALSE;
2458
2459 return (IDM_STATUS_SUCCESS);
2460 }
2461
2462 /* ARGSUSED */
2463 static void
2464 idm_so_buf_teardown(idm_buf_t *idb)
2465 {
2466 /* nothing to do here */
2467 }
2468
2469 static void
2470 idm_so_buf_free(idm_buf_t *idb)
2471 {
2472 if (idb->idb_buf_private == NULL) {
2473 kmem_free(idb->idb_buf, idb->idb_buflen);
2474 } else {
2475 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2476 }
2477 }
2478
2479 static void
2480 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2481 uint32_t offset, uint32_t length)
2482 {
2483 idm_so_conn_t *so_conn = ic->ic_transport_private;
2484 idm_pdu_t tmppdu;
2485 idm_buf_t *rtt_buf;
2486
2487 ASSERT(mutex_owned(&idt->idt_mutex));
2488
2489 /*
2490 * Allocate a buffer to represent the RTT transfer. We could further
2491 * optimize this by allocating the buffers internally from an rtt
2492 * specific buffer cache since this is socket-specific code but for
2493 * now we will keep it simple.
2494 */
2495 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2496 if (rtt_buf == NULL) {
2497 /*
2498 * If we're in FFP then the failure was likely a resource
2499 * allocation issue and we should close the connection by
2500 * sending a CE_TRANSPORT_FAIL event.
2501 *
2502 * If we're not in FFP then idm_buf_alloc will always
2503 * fail and the state is transitioning to "complete" anyway
2504 * so we won't bother to send an event.
2505 */
2506 mutex_enter(&ic->ic_state_mutex);
2507 if (ic->ic_ffp)
2508 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2509 NULL, CT_NONE);
2510 mutex_exit(&ic->ic_state_mutex);
2511 mutex_exit(&idt->idt_mutex);
2512 return;
2513 }
2514
2515 rtt_buf->idb_buf_cb = NULL;
2516 rtt_buf->idb_cb_arg = NULL;
2517 rtt_buf->idb_bufoffset = offset;
2518 rtt_buf->idb_xfer_len = length;
2519 rtt_buf->idb_ic = idt->idt_ic;
2520 rtt_buf->idb_task_binding = idt;
2521
2522 /*
2523 * The new buffer (if any) represents an additional
2524 * reference on the task
2525 */
2526 idm_task_hold(idt);
2527 mutex_exit(&idt->idt_mutex);
2528
2529 /*
2530 * Put the idm_buf_t on the tx queue. It will be transmitted by
2531 * idm_sotx_thread.
2532 */
2533 mutex_enter(&so_conn->ic_tx_mutex);
2534
2535 if (!so_conn->ic_tx_thread_running) {
2536 idm_buf_free(rtt_buf);
2537 mutex_exit(&so_conn->ic_tx_mutex);
2538 idm_task_rele(idt);
2539 return;
2540 }
2541
2542 /*
2543 * Build a template for the data PDU headers we will use so that
2544 * the SN values will stay consistent with other PDU's we are
2545 * transmitting like R2T and SCSI status.
2546 */
2547 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2548 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2549 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2550 ISCSI_OP_SCSI_DATA);
2551 rtt_buf->idb_tx_thread = B_TRUE;
2552 rtt_buf->idb_in_transport = B_TRUE;
2553 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2554 cv_signal(&so_conn->ic_tx_cv);
2555 mutex_exit(&so_conn->ic_tx_mutex);
2556 }
2557
2558 static void
2559 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2560 {
2561 /*
2562 * Don't worry about status -- we assume any error handling
2563 * is performed by the caller (idm_sotx_thread).
2564 */
2565 idb->idb_in_transport = B_FALSE;
2566 idm_task_rele(idt);
2567 idm_buf_free(idb);
2568 }
2569
2570 static idm_status_t
2571 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2572 uint32_t buf_region_offset, uint32_t buf_region_length)
2573 {
2574 idm_conn_t *ic;
2575 uint32_t max_dataseglen;
2576 size_t remainder, chunk;
2577 uint32_t data_offset = buf_region_offset;
2578 iscsi_data_hdr_t *bhs;
2579 idm_pdu_t *pdu;
2580 idm_status_t tx_status;
2581
2582 ASSERT(mutex_owned(&idt->idt_mutex));
2583
2584 ic = idt->idt_ic;
2585
2586 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2587 remainder = buf_region_length;
2588
2589 while (remainder) {
2590 if (idt->idt_state != TASK_ACTIVE) {
2591 ASSERT((idt->idt_state != TASK_IDLE) &&
2592 (idt->idt_state != TASK_COMPLETE));
2593 return (IDM_STATUS_ABORTED);
2594 }
2595
2596 /* check to see if we need to chunk the data */
2597 if (remainder > max_dataseglen) {
2598 chunk = max_dataseglen;
2599 } else {
2600 chunk = remainder;
2601 }
2602
2603 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2604 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2605 pdu->isp_ic = ic;
2606 pdu->isp_flags = 0; /* initialize isp_flags */
2607
2608 /*
2609 * We've already built a build a header template
2610 * to use during the transfer. Use this template so that
2611 * the SN values stay consistent with any unrelated PDU's
2612 * being transmitted.
2613 */
2614 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2615 sizeof (iscsi_hdr_t));
2616
2617 /*
2618 * Set DataSN, data offset, and flags in BHS
2619 * For the prototype build, A = 0, S = 0, U = 0
2620 */
2621 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2622
2623 bhs->datasn = htonl(idt->idt_exp_datasn++);
2624
2625 hton24(bhs->dlength, chunk);
2626 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2627
2628 /* setup data */
2629 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2630 pdu->isp_datalen = (uint_t)chunk;
2631
2632 if (chunk == remainder) {
2633 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2634 /* Piggyback the status with the last data PDU */
2635 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2636 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2637 IDM_PDU_ADVANCE_STATSN;
2638 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2639 (idt, pdu);
2640 idt->idt_flags |=
2641 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2642
2643 }
2644 }
2645
2646 remainder -= chunk;
2647 data_offset += chunk;
2648
2649 /* Instrument the data-send DTrace probe. */
2650 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2651 DTRACE_ISCSI_2(data__send,
2652 idm_conn_t *, idt->idt_ic,
2653 iscsi_data_rsp_hdr_t *,
2654 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2655 }
2656
2657 /*
2658 * Now that we're done working with idt_exp_datasn,
2659 * idt->idt_state and idb->idb_bufoffset we can release
2660 * the task lock -- don't want to hold it across the
2661 * call to idm_i_so_tx since we could block.
2662 */
2663 mutex_exit(&idt->idt_mutex);
2664
2665 /*
2666 * Transmit the PDU. Call the internal routine directly
2667 * as there is already implicit ordering.
2668 */
2669 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2670 mutex_enter(&idt->idt_mutex);
2671 return (tx_status);
2672 }
2673
2674 mutex_enter(&idt->idt_mutex);
2675 idt->idt_tx_bytes += chunk;
2676 }
2677
2678 return (IDM_STATUS_SUCCESS);
2679 }
2680
2681 /*
2682 * TX PDU cache
2683 */
2684 /* ARGSUSED */
2685 int
2686 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2687 {
2688 idm_pdu_t *pdu = hdl;
2689
2690 bzero(pdu, sizeof (idm_pdu_t));
2691 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2692 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2693 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2694 pdu->isp_magic = IDM_PDU_MAGIC;
2695 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2696
2697 return (0);
2698 }
2699
2700 /* ARGSUSED */
2701 void
2702 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2703 {
2704 /* reset values between use */
2705 pdu->isp_datalen = 0;
2706
2707 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2708 }
2709
2710 /*
2711 * RX PDU cache
2712 */
2713 /* ARGSUSED */
2714 int
2715 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2716 {
2717 idm_pdu_t *pdu = hdl;
2718
2719 bzero(pdu, sizeof (idm_pdu_t));
2720 pdu->isp_magic = IDM_PDU_MAGIC;
2721 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2722 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2723
2724 return (0);
2725 }
2726
2727 /* ARGSUSED */
2728 static void
2729 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2730 {
2731 pdu->isp_iovlen = 0;
2732 pdu->isp_sorx_buf = 0;
2733 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2734 }
2735
2736 static void
2737 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2738 {
2739 /*
2740 * We had to modify our cached RX PDU with a longer header buffer
2741 * and/or a longer data buffer. Release the new buffers and fix
2742 * the fields back to what we would expect for a cached RX PDU.
2743 */
2744 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2745 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2746 }
2747 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2748 kmem_free(pdu->isp_data, pdu->isp_datalen);
2749 }
2750 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2751 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2752 pdu->isp_data = NULL;
2753 pdu->isp_datalen = 0;
2754 pdu->isp_sorx_buf = 0;
2755 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2756 idm_sorx_cache_pdu_cb(pdu, status);
2757 }
2758
2759 /*
2760 * This thread is only active when I/O is queued for transmit
2761 * because the socket is busy.
2762 */
2763 void
2764 idm_sotx_thread(void *arg)
2765 {
2766 idm_conn_t *ic = arg;
2767 idm_tx_obj_t *object, *next;
2768 idm_so_conn_t *so_conn;
2769 idm_status_t status = IDM_STATUS_SUCCESS;
2770
2771 idm_conn_hold(ic);
2772
2773 mutex_enter(&ic->ic_mutex);
2774 so_conn = ic->ic_transport_private;
2775 so_conn->ic_tx_thread_running = B_TRUE;
2776 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2777 cv_signal(&ic->ic_cv);
2778 mutex_exit(&ic->ic_mutex);
2779
2780 mutex_enter(&so_conn->ic_tx_mutex);
2781
2782 while (so_conn->ic_tx_thread_running) {
2783 while (list_is_empty(&so_conn->ic_tx_list)) {
2784 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2785 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2786 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2787
2788 if (!so_conn->ic_tx_thread_running) {
2789 goto tx_bail;
2790 }
2791 }
2792
2793 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2794 list_remove(&so_conn->ic_tx_list, object);
2795 mutex_exit(&so_conn->ic_tx_mutex);
2796
2797 switch (object->idm_tx_obj_magic) {
2798 case IDM_PDU_MAGIC: {
2799 idm_pdu_t *pdu = (idm_pdu_t *)object;
2800 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2801 idm_pdu_t *, (idm_pdu_t *)object);
2802
2803 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2804 /* No IDM task */
2805 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2806 }
2807 status = idm_i_so_tx((idm_pdu_t *)object);
2808 break;
2809 }
2810 case IDM_BUF_MAGIC: {
2811 idm_buf_t *idb = (idm_buf_t *)object;
2812 idm_task_t *idt = idb->idb_task_binding;
2813
2814 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2815 idm_buf_t *, idb);
2816
2817 mutex_enter(&idt->idt_mutex);
2818 status = idm_so_send_buf_region(idt,
2819 idb, 0, idb->idb_xfer_len);
2820
2821 /*
2822 * TX thread owns the buffer so we expect it to
2823 * be "in transport"
2824 */
2825 ASSERT(idb->idb_in_transport);
2826 if (IDM_CONN_ISTGT(ic)) {
2827 /*
2828 * idm_buf_tx_to_ini_done releases
2829 * idt->idt_mutex
2830 */
2831 DTRACE_ISCSI_8(xfer__done,
2832 idm_conn_t *, idt->idt_ic,
2833 uintptr_t, idb->idb_buf,
2834 uint32_t, idb->idb_bufoffset,
2835 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2836 uint32_t, idb->idb_xfer_len,
2837 int, XFER_BUF_TX_TO_INI);
2838 idm_buf_tx_to_ini_done(idt, idb, status);
2839 } else {
2840 idm_so_send_rtt_data_done(idt, idb);
2841 mutex_exit(&idt->idt_mutex);
2842 }
2843 break;
2844 }
2845
2846 default:
2847 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2848 "(0x%08x)", object->idm_tx_obj_magic);
2849 status = IDM_STATUS_FAIL;
2850 }
2851
2852 mutex_enter(&so_conn->ic_tx_mutex);
2853
2854 if (status != IDM_STATUS_SUCCESS) {
2855 so_conn->ic_tx_thread_running = B_FALSE;
2856 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2857 }
2858 }
2859
2860 /*
2861 * Before we leave, we need to abort every item remaining in the
2862 * TX list.
2863 */
2864
2865 tx_bail:
2866 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2867
2868 while (object != NULL) {
2869 next = list_next(&so_conn->ic_tx_list, object);
2870
2871 list_remove(&so_conn->ic_tx_list, object);
2872 switch (object->idm_tx_obj_magic) {
2873 case IDM_PDU_MAGIC:
2874 idm_pdu_complete((idm_pdu_t *)object,
2875 IDM_STATUS_ABORTED);
2876 break;
2877
2878 case IDM_BUF_MAGIC: {
2879 idm_buf_t *idb = (idm_buf_t *)object;
2880 idm_task_t *idt = idb->idb_task_binding;
2881 mutex_exit(&so_conn->ic_tx_mutex);
2882 mutex_enter(&idt->idt_mutex);
2883 /*
2884 * TX thread owns the buffer so we expect it to
2885 * be "in transport"
2886 */
2887 ASSERT(idb->idb_in_transport);
2888 if (IDM_CONN_ISTGT(ic)) {
2889 /*
2890 * idm_buf_tx_to_ini_done releases
2891 * idt->idt_mutex
2892 */
2893 DTRACE_ISCSI_8(xfer__done,
2894 idm_conn_t *, idt->idt_ic,
2895 uintptr_t, idb->idb_buf,
2896 uint32_t, idb->idb_bufoffset,
2897 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2898 uint32_t, idb->idb_xfer_len,
2899 int, XFER_BUF_TX_TO_INI);
2900 idm_buf_tx_to_ini_done(idt, idb,
2901 IDM_STATUS_ABORTED);
2902 } else {
2903 idm_so_send_rtt_data_done(idt, idb);
2904 mutex_exit(&idt->idt_mutex);
2905 }
2906 mutex_enter(&so_conn->ic_tx_mutex);
2907 break;
2908 }
2909 default:
2910 IDM_CONN_LOG(CE_WARN,
2911 "idm_sotx_thread: Unexpected magic "
2912 "(0x%08x)", object->idm_tx_obj_magic);
2913 }
2914
2915 object = next;
2916 }
2917
2918 mutex_exit(&so_conn->ic_tx_mutex);
2919 idm_conn_rele(ic);
2920 thread_exit();
2921 /*NOTREACHED*/
2922 }
2923
2924 static void
2925 idm_so_socket_set_nonblock(struct sonode *node)
2926 {
2927 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2928 (node->so_state | FNONBLOCK), CRED(), NULL);
2929 }
2930
2931 static void
2932 idm_so_socket_set_block(struct sonode *node)
2933 {
2934 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2935 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2936 }
2937
2938
2939 /*
2940 * Called by kernel sockets when the connection has been accepted or
2941 * rejected. In early volo, a "disconnect" callback was sent instead of
2942 * "connectfailed", so we check for both.
2943 */
2944 /* ARGSUSED */
2945 void
2946 idm_so_timed_socket_connect_cb(ksocket_t ks,
2947 ksocket_callback_event_t ev, void *arg, uintptr_t info)
2948 {
2949 idm_so_timed_socket_t *itp = arg;
2950 ASSERT(itp != NULL);
2951 ASSERT(ev == KSOCKET_EV_CONNECTED ||
2952 ev == KSOCKET_EV_CONNECTFAILED ||
2953 ev == KSOCKET_EV_DISCONNECTED);
2954
2955 mutex_enter(&idm_so_timed_socket_mutex);
2956 itp->it_callback_called = B_TRUE;
2957 if (ev == KSOCKET_EV_CONNECTED) {
2958 itp->it_socket_error_code = 0;
2959 } else {
2960 /* Make sure the error code is non-zero on error */
2961 if (info == 0)
2962 info = ECONNRESET;
2963 itp->it_socket_error_code = (int)info;
2964 }
2965 cv_signal(&itp->it_cv);
2966 mutex_exit(&idm_so_timed_socket_mutex);
2967 }
2968
2969 int
2970 idm_so_timed_socket_connect(ksocket_t ks,
2971 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2972 {
2973 clock_t conn_login_max;
2974 int rc, nonblocking, rval;
2975 idm_so_timed_socket_t it;
2976 ksocket_callbacks_t ks_cb;
2977
2978 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2979
2980 /*
2981 * Set to non-block socket mode, with callback on connect
2982 * Early volo used "disconnected" instead of "connectfailed",
2983 * so set callback to look for both.
2984 */
2985 bzero(&it, sizeof (it));
2986 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2987 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2988 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2989 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2990 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2991 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2992 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2993 if (rc != 0)
2994 return (rc);
2995
2996 /* Set to non-blocking mode */
2997 nonblocking = 1;
2998 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2999 CRED());
3000 if (rc != 0)
3001 goto cleanup;
3002
3003 bzero(&it, sizeof (it));
3004 for (;;) {
3005 /*
3006 * Warning -- in a loopback scenario, the call to
3007 * the connect_cb can occur inside the call to
3008 * ksocket_connect. Do not hold the mutex around the
3009 * call to ksocket_connect.
3010 */
3011 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3012 if (rc == 0 || rc == EISCONN) {
3013 /* socket success or already success */
3014 rc = 0;
3015 break;
3016 }
3017 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3018 break;
3019 }
3020
3021 /* TCP connect still in progress. See if out of time. */
3022 if (ddi_get_lbolt() > conn_login_max) {
3023 /*
3024 * Connection retry timeout,
3025 * failed connect to target.
3026 */
3027 rc = ETIMEDOUT;
3028 break;
3029 }
3030
3031 /*
3032 * TCP connect still in progress. Sleep until callback.
3033 * Do NOT go to sleep if the callback already occurred!
3034 */
3035 mutex_enter(&idm_so_timed_socket_mutex);
3036 if (!it.it_callback_called) {
3037 (void) cv_timedwait(&it.it_cv,
3038 &idm_so_timed_socket_mutex, conn_login_max);
3039 }
3040 if (it.it_callback_called) {
3041 rc = it.it_socket_error_code;
3042 mutex_exit(&idm_so_timed_socket_mutex);
3043 break;
3044 }
3045 /* If timer expires, go call ksocket_connect one last time. */
3046 mutex_exit(&idm_so_timed_socket_mutex);
3047 }
3048
3049 /* resume blocking mode */
3050 nonblocking = 0;
3051 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3052 CRED());
3053 cleanup:
3054 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3055 cv_destroy(&it.it_cv);
3056 if (rc != 0) {
3057 idm_soshutdown(ks);
3058 }
3059 return (rc);
3060 }
3061
3062
3063 void
3064 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3065 {
3066 int dp_addr_size;
3067 struct sockaddr_in *sin;
3068 struct sockaddr_in6 *sin6;
3069
3070 /* Build sockaddr_storage for this portal (idm_addr_t) */
3071 bzero(sa, sizeof (*sa));
3072 dp_addr_size = dportal->a_addr.i_insize;
3073 if (dp_addr_size == sizeof (struct in_addr)) {
3074 /* IPv4 */
3075 sa->ss_family = AF_INET;
3076 sin = (struct sockaddr_in *)sa;
3077 sin->sin_port = htons(dportal->a_port);
3078 bcopy(&dportal->a_addr.i_addr.in4,
3079 &sin->sin_addr, sizeof (struct in_addr));
3080 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3081 /* IPv6 */
3082 sa->ss_family = AF_INET6;
3083 sin6 = (struct sockaddr_in6 *)sa;
3084 sin6->sin6_port = htons(dportal->a_port);
3085 bcopy(&dportal->a_addr.i_addr.in6,
3086 &sin6->sin6_addr, sizeof (struct in6_addr));
3087 } else {
3088 ASSERT(0);
3089 }
3090 }
3091
3092
3093 /*
3094 * return a human-readable form of a sockaddr_storage, in the form
3095 * [ip-address]:port. This is used in calls to logging functions.
3096 * If several calls to idm_sa_ntop are made within the same invocation
3097 * of a logging function, then each one needs its own buf.
3098 */
3099 const char *
3100 idm_sa_ntop(const struct sockaddr_storage *sa,
3101 char *buf, size_t size)
3102 {
3103 static const char bogus_ip[] = "[0].-1";
3104 char tmp[INET6_ADDRSTRLEN];
3105
3106 switch (sa->ss_family) {
3107 case AF_INET6:
3108 {
3109 const struct sockaddr_in6 *in6 =
3110 (const struct sockaddr_in6 *) sa;
3111
3112 if (inet_ntop(in6->sin6_family,
3113 &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3114 goto err;
3115 }
3116 if (strlen(tmp) + sizeof ("[].65535") > size) {
3117 goto err;
3118 }
3119 /* struct sockaddr_storage gets port info from v4 loc */
3120 (void) snprintf(buf, size, "[%s].%u", tmp,
3121 ntohs(in6->sin6_port));
3122 return (buf);
3123 }
3124 case AF_INET:
3125 {
3126 const struct sockaddr_in *in =
3127 (const struct sockaddr_in *) sa;
3128
3129 if (inet_ntop(in->sin_family, &in->sin_addr,
3130 tmp, sizeof (tmp)) == NULL) {
3131 goto err;
3132 }
3133 if (strlen(tmp) + sizeof ("[].65535") > size) {
3134 goto err;
3135 }
3136 (void) snprintf(buf, size, "[%s].%u", tmp,
3137 ntohs(in->sin_port));
3138 return (buf);
3139 }
3140 default:
3141 break;
3142 }
3143 err:
3144 (void) snprintf(buf, size, "%s", bogus_ip);
3145 return (buf);
3146 }