1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2013 by Delphix. All rights reserved.
27 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/conf.h>
31 #include <sys/stat.h>
32 #include <sys/file.h>
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/modctl.h>
36 #include <sys/priv.h>
37 #include <sys/cpuvar.h>
38 #include <sys/socket.h>
39 #include <sys/strsubr.h>
40 #include <sys/sysmacros.h>
41 #include <sys/sdt.h>
42 #include <netinet/tcp.h>
43 #include <inet/tcp.h>
44 #include <sys/socketvar.h>
45 #include <sys/pathname.h>
46 #include <sys/fs/snode.h>
47 #include <sys/fs/dv_node.h>
48 #include <sys/vnode.h>
49 #include <netinet/in.h>
50 #include <net/if.h>
51 #include <sys/sockio.h>
52 #include <sys/ksocket.h>
53 #include <sys/filio.h> /* FIONBIO */
54 #include <sys/iscsi_protocol.h>
55 #include <sys/idm/idm.h>
56 #include <sys/idm/idm_so.h>
57 #include <sys/idm/idm_text.h>
58
59 #define IN_PROGRESS_DELAY 1
60
61 /*
62 * in6addr_any is currently all zeroes, but use the macro in case this
63 * ever changes.
64 */
65 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
66
67 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
68 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
69 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
70
71 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
72 static void idm_so_conn_destroy_common(idm_conn_t *ic);
73 static void idm_so_conn_connect_common(idm_conn_t *ic);
74
75 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
76 boolean_t boot_conn);
77 static void idm_set_postconnect_options(ksocket_t so);
78 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
79
80 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
81 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
82 idm_buf_t *idb, uint32_t offset, uint32_t length);
83 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
84 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
85 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
86
87 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
88 uint32_t ro, uint32_t dlength);
89
90 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
91 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
92
93 static void idm_so_socket_set_nonblock(struct sonode *node);
94 static void idm_so_socket_set_block(struct sonode *node);
95
96 /*
97 * Transport ops prototypes
98 */
99 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
100 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
101 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
102 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
103 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
104 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
105 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
106 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
107 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
108 static void idm_so_notice_key_values(idm_conn_t *it,
109 nvlist_t *negotiated_nvl);
110 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
111 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
112 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
113 idm_transport_caps_t *caps);
114 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
115 static void idm_so_buf_free(idm_buf_t *idb);
116 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
117 static void idm_so_buf_teardown(idm_buf_t *idb);
118 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
119 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
120 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
121 static void idm_so_tgt_svc_offline(idm_svc_t *is);
122 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
123 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
124 static void idm_so_conn_disconnect(idm_conn_t *ic);
125 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
126 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
127 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
128
129 /*
130 * IDM Native Sockets transport operations
131 */
132 static
133 idm_transport_ops_t idm_so_transport_ops = {
134 idm_so_tx, /* it_tx_pdu */
135 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
136 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
137 idm_so_rx_datain, /* it_rx_datain */
138 idm_so_rx_rtt, /* it_rx_rtt */
139 idm_so_rx_dataout, /* it_rx_dataout */
140 NULL, /* it_alloc_conn_rsrc */
141 NULL, /* it_free_conn_rsrc */
142 NULL, /* it_tgt_enable_datamover */
143 NULL, /* it_ini_enable_datamover */
144 NULL, /* it_conn_terminate */
145 idm_so_free_task_rsrc, /* it_free_task_rsrc */
146 idm_so_negotiate_key_values, /* it_negotiate_key_values */
147 idm_so_notice_key_values, /* it_notice_key_values */
148 idm_so_conn_is_capable, /* it_conn_is_capable */
149 idm_so_buf_alloc, /* it_buf_alloc */
150 idm_so_buf_free, /* it_buf_free */
151 idm_so_buf_setup, /* it_buf_setup */
152 idm_so_buf_teardown, /* it_buf_teardown */
153 idm_so_tgt_svc_create, /* it_tgt_svc_create */
154 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
155 idm_so_tgt_svc_online, /* it_tgt_svc_online */
156 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
157 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
158 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
159 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
160 idm_so_ini_conn_create, /* it_ini_conn_create */
161 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
162 idm_so_ini_conn_connect, /* it_ini_conn_connect */
163 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
164 idm_so_declare_key_values /* it_declare_key_values */
165 };
166
167 kmutex_t idm_so_timed_socket_mutex;
168
169 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
170 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
171
172 /*
173 * idm_so_init()
174 * Sockets transport initialization
175 */
176 void
177 idm_so_init(idm_transport_t *it)
178 {
179 /* Cache for IDM Data and R2T Transmit PDU's */
180 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
181 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
182 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
183
184 /* Cache for IDM Receive PDU's */
185 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
186 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
187 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
188
189 /* 128k buffer cache */
190 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
191 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
192
193 /* Set the sockets transport ops */
194 it->it_ops = &idm_so_transport_ops;
195
196 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
197
198 }
199
200 /*
201 * idm_so_fini()
202 * Sockets transport teardown
203 */
204 void
205 idm_so_fini(void)
206 {
207 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
208 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
209 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
210 mutex_destroy(&idm_so_timed_socket_mutex);
211 }
212
213 ksocket_t
214 idm_socreate(int domain, int type, int protocol)
215 {
216 ksocket_t ks;
217
218 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
219 CRED())) {
220 return (ks);
221 } else {
222 return (NULL);
223 }
224 }
225
226 /*
227 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
228 * reception and transmission. The sonode still exists but its state
229 * gets modified to indicate it is no longer connected. Calls to
230 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
231 * regain control of a thread stuck in idm_sorecv.
232 */
233 void
234 idm_soshutdown(ksocket_t so)
235 {
236 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
237 }
238
239 /*
240 * idm_sodestroy releases all resources associated with a socket previously
241 * created with idm_socreate. The socket must be shutdown using
242 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
243 * otherwise undefined behavior will result.
244 */
245 void
246 idm_sodestroy(ksocket_t ks)
247 {
248 (void) ksocket_close(ks, CRED());
249 }
250
251 /*
252 * Function to compare two addresses in sockaddr_storage format
253 */
254
255 int
256 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
257 const struct sockaddr_storage *cmp_ss2,
258 boolean_t v4_mapped_as_v4,
259 boolean_t compare_ports)
260 {
261 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
262 const struct sockaddr_storage *ss1, *ss2;
263 struct in_addr *in1, *in2;
264 struct in6_addr *in61, *in62;
265 int i;
266
267 /*
268 * Normalize V4-mapped IPv6 addresses into V4 format if
269 * v4_mapped_as_v4 is B_TRUE.
270 */
271 ss1 = cmp_ss1;
272 ss2 = cmp_ss2;
273 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
274 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
275 if (IN6_IS_ADDR_V4MAPPED(in61)) {
276 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
277 mapped_v4_ss1.ss_family = AF_INET;
278 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
279 ((struct sockaddr_in *)ss1)->sin_port;
280 IN6_V4MAPPED_TO_INADDR(in61,
281 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
282 ss1 = &mapped_v4_ss1;
283 }
284 }
285 ss2 = cmp_ss2;
286 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
287 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
288 if (IN6_IS_ADDR_V4MAPPED(in62)) {
289 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
290 mapped_v4_ss2.ss_family = AF_INET;
291 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
292 ((struct sockaddr_in *)ss2)->sin_port;
293 IN6_V4MAPPED_TO_INADDR(in62,
294 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
295 ss2 = &mapped_v4_ss2;
296 }
297 }
298
299 /*
300 * Compare ports, then address family, then ip address
301 */
302 if (compare_ports &&
303 (((struct sockaddr_in *)ss1)->sin_port !=
304 ((struct sockaddr_in *)ss2)->sin_port)) {
305 if (((struct sockaddr_in *)ss1)->sin_port >
306 ((struct sockaddr_in *)ss2)->sin_port)
307 return (1);
308 else
309 return (-1);
310 }
311
312 /*
313 * ports are the same
314 */
315 if (ss1->ss_family != ss2->ss_family) {
316 if (ss1->ss_family == AF_INET)
317 return (1);
318 else
319 return (-1);
320 }
321
322 /*
323 * address families are the same
324 */
325 if (ss1->ss_family == AF_INET) {
326 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
327 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
328
329 if (in1->s_addr > in2->s_addr)
330 return (1);
331 else if (in1->s_addr < in2->s_addr)
332 return (-1);
333 else
334 return (0);
335 } else if (ss1->ss_family == AF_INET6) {
336 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
337 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
338
339 for (i = 0; i < 4; i++) {
340 if (in61->s6_addr32[i] > in62->s6_addr32[i])
341 return (1);
342 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
343 return (-1);
344 }
345 return (0);
346 }
347
348 return (1);
349 }
350
351 /*
352 * IP address filter functions to flag addresses that should not
353 * go out to initiators through discovery.
354 */
355 static boolean_t
356 idm_v4_addr_okay(struct in_addr *in_addr)
357 {
358 in_addr_t addr = ntohl(in_addr->s_addr);
359
360 if ((INADDR_NONE == addr) ||
361 (IN_MULTICAST(addr)) ||
362 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
363 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
364 return (B_FALSE);
365 }
366 return (B_TRUE);
367 }
368
369 static boolean_t
370 idm_v6_addr_okay(struct in6_addr *addr6)
371 {
372
373 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
374 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
375 (IN6_IS_ADDR_MULTICAST(addr6)) ||
376 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
377 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
378 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
379 return (B_FALSE);
380 }
381 return (B_TRUE);
382 }
383
384 /*
385 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
386 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
387 */
388 int
389 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
390 {
391 ksocket_t so4, so6;
392 struct lifnum lifn;
393 struct lifconf lifc;
394 struct lifreq *lp;
395 int rval;
396 int numifs;
397 int bufsize;
398 void *buf;
399 int i, j, n, rc;
400 struct sockaddr_storage ss;
401 struct sockaddr_in *sin;
402 struct sockaddr_in6 *sin6;
403 idm_addr_t *ip;
404 idm_addr_list_t *ipaddr = NULL;
405 int size_ipaddr;
406
407 *ipaddr_p = NULL;
408 size_ipaddr = 0;
409 buf = NULL;
410
411 /* create an ipv4 and ipv6 UDP socket */
412 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
413 return (0);
414 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
415 idm_sodestroy(so6);
416 return (0);
417 }
418
419
420 retry_count:
421 /* snapshot the current number of interfaces */
422 lifn.lifn_family = PF_UNSPEC;
423 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
424 lifn.lifn_count = 0;
425 /* use vp6 for ioctls with unspecified families by default */
426 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
427 != 0) {
428 goto cleanup;
429 }
430
431 numifs = lifn.lifn_count;
432 if (numifs <= 0) {
433 goto cleanup;
434 }
435
436 /* allocate extra room in case more interfaces appear */
437 numifs += 10;
438
439 /* get the interface names and ip addresses */
440 bufsize = numifs * sizeof (struct lifreq);
441 buf = kmem_alloc(bufsize, KM_SLEEP);
442
443 lifc.lifc_family = AF_UNSPEC;
444 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
445 lifc.lifc_len = bufsize;
446 lifc.lifc_buf = buf;
447 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
448 if (rc != 0) {
449 goto cleanup;
450 }
451 /* if our extra room is used up, try again */
452 if (bufsize <= lifc.lifc_len) {
453 kmem_free(buf, bufsize);
454 buf = NULL;
455 goto retry_count;
456 }
457 /* calc actual number of ifconfs */
458 n = lifc.lifc_len / sizeof (struct lifreq);
459
460 /* get ip address */
461 if (n > 0) {
462 size_ipaddr = sizeof (idm_addr_list_t) +
463 (n - 1) * sizeof (idm_addr_t);
464 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
465 } else {
466 goto cleanup;
467 }
468
469 /*
470 * Examine the array of interfaces and filter uninteresting ones
471 */
472 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
473
474 /*
475 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
476 */
477 ss = lp->lifr_addr;
478 /*
479 * fetch the flags using the socket of the correct family
480 */
481 switch (ss.ss_family) {
482 case AF_INET:
483 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
484 &rval, CRED());
485 break;
486 case AF_INET6:
487 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
488 &rval, CRED());
489 break;
490 default:
491 continue;
492 }
493 if (rc == 0) {
494 /*
495 * If we got the flags, skip uninteresting
496 * interfaces based on flags
497 */
498 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
499 continue;
500 if (lp->lifr_flags &
501 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
502 continue;
503 }
504
505 /* save ip address */
506 ip = &ipaddr->al_addrs[j];
507 switch (ss.ss_family) {
508 case AF_INET:
509 sin = (struct sockaddr_in *)&ss;
510 if (!idm_v4_addr_okay(&sin->sin_addr))
511 continue;
512 ip->a_addr.i_addr.in4 = sin->sin_addr;
513 ip->a_addr.i_insize = sizeof (struct in_addr);
514 break;
515 case AF_INET6:
516 sin6 = (struct sockaddr_in6 *)&ss;
517 if (!idm_v6_addr_okay(&sin6->sin6_addr))
518 continue;
519 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
520 ip->a_addr.i_insize = sizeof (struct in6_addr);
521 break;
522 default:
523 continue;
524 }
525 j++;
526 }
527
528 if (j == 0) {
529 /* no valid ifaddr */
530 kmem_free(ipaddr, size_ipaddr);
531 size_ipaddr = 0;
532 ipaddr = NULL;
533 } else {
534 ipaddr->al_out_cnt = j;
535 }
536
537
538 cleanup:
539 idm_sodestroy(so6);
540 idm_sodestroy(so4);
541
542 if (buf != NULL)
543 kmem_free(buf, bufsize);
544
545 *ipaddr_p = ipaddr;
546 return (size_ipaddr);
547 }
548
549 int
550 idm_sorecv(ksocket_t so, void *msg, size_t len)
551 {
552 iovec_t iov;
553
554 ASSERT(so != NULL);
555 ASSERT(len != 0);
556
557 /*
558 * Fill in iovec and receive data
559 */
560 iov.iov_base = msg;
561 iov.iov_len = len;
562
563 return (idm_iov_sorecv(so, &iov, 1, len));
564 }
565
566 /*
567 * idm_sosendto - Sends a buffered data on a non-connected socket.
568 *
569 * This function puts the data provided on the wire by calling sosendmsg.
570 * It will return only when all the data has been sent or if an error
571 * occurs.
572 *
573 * Returns 0 for success, the socket errno value if sosendmsg fails, and
574 * -1 if sosendmsg returns success but uio_resid != 0
575 */
576 int
577 idm_sosendto(ksocket_t so, void *buff, size_t len,
578 struct sockaddr *name, socklen_t namelen)
579 {
580 struct msghdr msg;
581 struct iovec iov[1];
582 int error;
583 size_t sent = 0;
584
585 iov[0].iov_base = buff;
586 iov[0].iov_len = len;
587
588 /* Initialization of the message header. */
589 bzero(&msg, sizeof (msg));
590 msg.msg_iov = iov;
591 msg.msg_iovlen = 1;
592 msg.msg_name = name;
593 msg.msg_namelen = namelen;
594
595 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
596 /* Data sent */
597 if (sent == len) {
598 /* All data sent. Success. */
599 return (0);
600 } else {
601 /* Not all data was sent. Failure */
602 return (-1);
603 }
604 }
605
606 /* Send failed */
607 return (error);
608 }
609
610 /*
611 * idm_iov_sosend - Sends an iovec on a connection.
612 *
613 * This function puts the data provided on the wire by calling sosendmsg.
614 * It will return only when all the data has been sent or if an error
615 * occurs.
616 *
617 * Returns 0 for success, the socket errno value if sosendmsg fails, and
618 * -1 if sosendmsg returns success but uio_resid != 0
619 */
620 int
621 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
622 {
623 struct msghdr msg;
624 int error;
625 size_t sent = 0;
626
627 ASSERT(iop != NULL);
628
629 /* Initialization of the message header. */
630 bzero(&msg, sizeof (msg));
631 msg.msg_iov = iop;
632 msg.msg_iovlen = iovlen;
633
634 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
635 == 0) {
636 /* Data sent */
637 if (sent == total_len) {
638 /* All data sent. Success. */
639 return (0);
640 } else {
641 /* Not all data was sent. Failure */
642 return (-1);
643 }
644 }
645
646 /* Send failed */
647 return (error);
648 }
649
650 /*
651 * idm_iov_sorecv - Receives an iovec from a connection
652 *
653 * This function gets the data asked for from the socket. It will return
654 * only when all the requested data has been retrieved or if an error
655 * occurs.
656 *
657 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
658 * -1 if sorecvmsg returns success but uio_resid != 0
659 */
660 int
661 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
662 {
663 struct msghdr msg;
664 int error;
665 size_t recv;
666 int flags;
667
668 ASSERT(iop != NULL);
669
670 /* Initialization of the message header. */
671 bzero(&msg, sizeof (msg));
672 msg.msg_iov = iop;
673 msg.msg_iovlen = iovlen;
674 flags = MSG_WAITALL;
675
676 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
677 == 0) {
678 /* Received data */
679 if (recv == total_len) {
680 /* All requested data received. Success */
681 return (0);
682 } else {
683 /*
684 * Not all data was received. The connection has
685 * probably failed.
686 */
687 return (-1);
688 }
689 }
690
691 /* Receive failed */
692 return (error);
693 }
694
695 static void
696 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
697 {
698 int conn_abort = 10000;
699 int conn_notify = 2000;
700 int abort = 30000;
701
702 /* Pre-connect socket options */
703 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
704 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
705 CRED());
706 if (boot_conn == B_FALSE) {
707 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
708 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
709 CRED());
710 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
711 TCP_ABORT_THRESHOLD,
712 (char *)&abort, sizeof (int), CRED());
713 }
714 }
715
716 static void
717 idm_set_postconnect_options(ksocket_t ks)
718 {
719 const int on = 1;
720
721 /* Set connect options */
722 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
723 (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
724 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
725 (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
726 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
727 (char *)&on, sizeof (on), CRED());
728 }
729
730 static uint32_t
731 n2h24(const uchar_t *ptr)
732 {
733 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
734 }
735
736 static boolean_t
737 idm_dataseglenokay(idm_conn_t *ic, idm_pdu_t *pdu)
738 {
739 iscsi_hdr_t *bhs;
740
741 if (ic->ic_conn_type == CONN_TYPE_TGT &&
742 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
743 IDM_CONN_LOG(CE_WARN,
744 "idm_dataseglenokay: exceeded the max data segment length");
745 return (B_FALSE);
746 }
747
748 bhs = pdu->isp_hdr;
749 /*
750 * Filter out any RFC3720 data-size violations.
751 */
752 switch (IDM_PDU_OPCODE(pdu)) {
753 case ISCSI_OP_SCSI_TASK_MGT_MSG:
754 case ISCSI_OP_SCSI_TASK_MGT_RSP:
755 case ISCSI_OP_RTT_RSP:
756 case ISCSI_OP_LOGOUT_CMD:
757 /*
758 * Data-segment not allowed and additional headers not allowed.
759 * (both must be zero according to the RFC3720.)
760 */
761 if (bhs->hlength != 0 || pdu->isp_datalen != 0)
762 return (B_FALSE);
763 break;
764 case ISCSI_OP_NOOP_OUT:
765 case ISCSI_OP_LOGIN_CMD:
766 case ISCSI_OP_TEXT_CMD:
767 case ISCSI_OP_SNACK_CMD:
768 case ISCSI_OP_NOOP_IN:
769 case ISCSI_OP_SCSI_RSP:
770 case ISCSI_OP_LOGIN_RSP:
771 case ISCSI_OP_TEXT_RSP:
772 case ISCSI_OP_SCSI_DATA_RSP:
773 case ISCSI_OP_LOGOUT_RSP:
774 case ISCSI_OP_ASYNC_EVENT:
775 case ISCSI_OP_REJECT_MSG:
776 /*
777 * Additional headers not allowed.
778 * (must be zero according to RFC3720.)
779 */
780 if (bhs->hlength)
781 return (B_FALSE);
782 break;
783 case ISCSI_OP_SCSI_CMD:
784 /*
785 * See RFC3720, section 10.3
786 *
787 * For pure read cmds, data-segment-length must be zero.
788 * For non-final transfers, data-size must be even number of
789 * 4-byte words.
790 * For any transfer, an expected byte count must be provided.
791 * For bidirectional transfers, an additional-header must be
792 * provided (for the read byte-count.)
793 */
794 if (pdu->isp_datalen) {
795 if ((bhs->flags & (ISCSI_FLAG_CMD_READ |
796 ISCSI_FLAG_CMD_WRITE)) == ISCSI_FLAG_CMD_READ)
797 return (B_FALSE);
798 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
799 ((pdu->isp_datalen & 0x3) != 0))
800 return (B_FALSE);
801 }
802 if (bhs->flags & (ISCSI_FLAG_CMD_READ |
803 ISCSI_FLAG_CMD_WRITE)) {
804 iscsi_scsi_cmd_hdr_t *cmdhdr =
805 (iscsi_scsi_cmd_hdr_t *)bhs;
806 /*
807 * we're transfering some data, we must have a
808 * byte count
809 */
810 if (cmdhdr->data_length == 0)
811 return (B_FALSE);
812 }
813 break;
814 case ISCSI_OP_SCSI_DATA:
815 /*
816 * See RFC3720, section 10.7
817 *
818 * Additional headers aren't allowed, and the data-size must
819 * be an even number of 4-byte words (unless the final bit
820 * is set.)
821 */
822 if (bhs->hlength)
823 return (B_FALSE);
824 if ((bhs->flags & ISCSI_FLAG_FINAL) == 0 &&
825 ((pdu->isp_datalen & 0x3) != 0))
826 return (B_FALSE);
827 break;
828 default:
829 break;
830 }
831 return (B_TRUE);
832 }
833
834 static idm_status_t
835 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
836 {
837 iscsi_hdr_t *bhs;
838 uint32_t hdr_digest_crc;
839 uint32_t crc_calculated;
840 void *new_hdr;
841 int ahslen = 0;
842 int total_len = 0;
843 int iovlen = 0;
844 struct iovec iov[2];
845 idm_so_conn_t *so_conn;
846 int rc;
847
848 so_conn = ic->ic_transport_private;
849
850 /*
851 * Read BHS
852 */
853 bhs = pdu->isp_hdr;
854 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
855 if (rc != IDM_STATUS_SUCCESS) {
856 return (IDM_STATUS_FAIL);
857 }
858
859 /*
860 * Check actual AHS length against the amount available in the buffer
861 */
862 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
863 (bhs->hlength * sizeof (uint32_t));
864 pdu->isp_datalen = n2h24(bhs->dlength);
865
866 if (!idm_dataseglenokay(ic, pdu)) {
867 IDM_CONN_LOG(CE_WARN,
868 "idm_sorecvhdr: invalid data segment length");
869 return (IDM_STATUS_FAIL);
870 }
871 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
872 /* Allocate a new header segment and change the callback */
873 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
874 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
875 pdu->isp_hdr = new_hdr;
876 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
877
878 /*
879 * This callback will restore the expected values after
880 * the RX PDU has been processed.
881 */
882 pdu->isp_callback = idm_sorx_addl_pdu_cb;
883 }
884
885 /*
886 * Setup receipt of additional header and header digest (if enabled).
887 */
888 if (bhs->hlength > 0) {
889 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
890 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
891 iov[iovlen].iov_len = ahslen;
892 total_len += iov[iovlen].iov_len;
893 iovlen++;
894 }
895
896 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
897 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
898 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
899 total_len += iov[iovlen].iov_len;
900 iovlen++;
901 }
902
903 if ((iovlen != 0) &&
904 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
905 total_len) != 0)) {
906 return (IDM_STATUS_FAIL);
907 }
908
909 /*
910 * Validate header digest if enabled
911 */
912 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
913 crc_calculated = idm_crc32c(pdu->isp_hdr,
914 sizeof (iscsi_hdr_t) + ahslen);
915 if (crc_calculated != hdr_digest_crc) {
916 /* Invalid Header Digest */
917 return (IDM_STATUS_HEADER_DIGEST);
918 }
919 }
920
921 return (0);
922 }
923
924 /*
925 * idm_so_ini_conn_create()
926 * Allocate the sockets transport connection resources.
927 */
928 static idm_status_t
929 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
930 {
931 ksocket_t so;
932 idm_so_conn_t *so_conn;
933 idm_status_t idmrc;
934
935 so = idm_socreate(cr->cr_domain, cr->cr_type,
936 cr->cr_protocol);
937 if (so == NULL) {
938 return (IDM_STATUS_FAIL);
939 }
940
941 /* Bind the socket if configured to do so */
942 if (cr->cr_bound) {
943 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
944 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
945 idm_sodestroy(so);
946 return (IDM_STATUS_FAIL);
947 }
948 }
949
950 idmrc = idm_so_conn_create_common(ic, so);
951 if (idmrc != IDM_STATUS_SUCCESS) {
952 idm_soshutdown(so);
953 idm_sodestroy(so);
954 return (IDM_STATUS_FAIL);
955 }
956
957 so_conn = ic->ic_transport_private;
958 /* Set up socket options */
959 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
960
961 return (IDM_STATUS_SUCCESS);
962 }
963
964 /*
965 * idm_so_ini_conn_destroy()
966 * Tear down the sockets transport connection resources.
967 */
968 static void
969 idm_so_ini_conn_destroy(idm_conn_t *ic)
970 {
971 idm_so_conn_destroy_common(ic);
972 }
973
974 /*
975 * idm_so_ini_conn_connect()
976 * Establish the connection referred to by the handle previously allocated via
977 * idm_so_ini_conn_create().
978 */
979 static idm_status_t
980 idm_so_ini_conn_connect(idm_conn_t *ic)
981 {
982 idm_so_conn_t *so_conn;
983 struct sonode *node = NULL;
984 int rc;
985 clock_t lbolt, conn_login_max, conn_login_interval;
986 boolean_t nonblock;
987
988 so_conn = ic->ic_transport_private;
989 nonblock = ic->ic_conn_params.nonblock_socket;
990 conn_login_max = ic->ic_conn_params.conn_login_max;
991 conn_login_interval = ddi_get_lbolt() +
992 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
993
994 if (nonblock == B_TRUE) {
995 node = ((struct sonode *)(so_conn->ic_so));
996 /* Set to none block socket mode */
997 idm_so_socket_set_nonblock(node);
998 do {
999 rc = ksocket_connect(so_conn->ic_so,
1000 &ic->ic_ini_dst_addr.sin,
1001 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
1002 CRED());
1003 if (rc == 0 || rc == EISCONN) {
1004 /* socket success or already success */
1005 rc = IDM_STATUS_SUCCESS;
1006 break;
1007 }
1008 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
1009 (rc == ECONNRESET)) {
1010 /* socket connection timeout or refuse */
1011 break;
1012 }
1013 lbolt = ddi_get_lbolt();
1014 if (lbolt > conn_login_max) {
1015 /*
1016 * Connection retry timeout,
1017 * failed connect to target.
1018 */
1019 break;
1020 }
1021 if (lbolt < conn_login_interval) {
1022 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
1023 /* TCP connect still in progress */
1024 delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
1025 continue;
1026 } else {
1027 delay(conn_login_interval - lbolt);
1028 }
1029 }
1030 conn_login_interval = ddi_get_lbolt() +
1031 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
1032 } while (rc != 0);
1033 /* resume to nonblock mode */
1034 if (rc == IDM_STATUS_SUCCESS) {
1035 idm_so_socket_set_block(node);
1036 }
1037 } else {
1038 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
1039 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
1040 }
1041
1042 if (rc != 0) {
1043 idm_soshutdown(so_conn->ic_so);
1044 return (IDM_STATUS_FAIL);
1045 }
1046
1047 idm_so_conn_connect_common(ic);
1048
1049 idm_set_postconnect_options(so_conn->ic_so);
1050
1051 return (IDM_STATUS_SUCCESS);
1052 }
1053
1054 idm_status_t
1055 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
1056 {
1057 idm_status_t idmrc;
1058
1059 idm_set_postconnect_options(new_so);
1060 idmrc = idm_so_conn_create_common(ic, new_so);
1061
1062 return (idmrc);
1063 }
1064
1065 static void
1066 idm_so_tgt_conn_destroy(idm_conn_t *ic)
1067 {
1068 idm_so_conn_destroy_common(ic);
1069 }
1070
1071 /*
1072 * idm_so_tgt_conn_connect()
1073 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
1074 * is invoked from the SM as a result of an inbound connection request.
1075 */
1076 static idm_status_t
1077 idm_so_tgt_conn_connect(idm_conn_t *ic)
1078 {
1079 idm_so_conn_connect_common(ic);
1080
1081 return (IDM_STATUS_SUCCESS);
1082 }
1083
1084 static idm_status_t
1085 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
1086 {
1087 idm_so_conn_t *so_conn;
1088
1089 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1090 so_conn->ic_so = new_so;
1091
1092 ic->ic_transport_private = so_conn;
1093 ic->ic_transport_hdrlen = 0;
1094
1095 /* Set the scoreboarding flag on this connection */
1096 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1097 ic->ic_conn_params.max_recv_dataseglen =
1098 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1099 ic->ic_conn_params.max_xmit_dataseglen =
1100 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1101
1102 /*
1103 * Initialize tx thread mutex and list
1104 */
1105 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1106 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1107 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1108 offsetof(idm_pdu_t, idm_tx_link));
1109
1110 return (IDM_STATUS_SUCCESS);
1111 }
1112
1113 static void
1114 idm_so_conn_destroy_common(idm_conn_t *ic)
1115 {
1116 idm_so_conn_t *so_conn = ic->ic_transport_private;
1117
1118 ic->ic_transport_private = NULL;
1119 idm_sodestroy(so_conn->ic_so);
1120 list_destroy(&so_conn->ic_tx_list);
1121 mutex_destroy(&so_conn->ic_tx_mutex);
1122 cv_destroy(&so_conn->ic_tx_cv);
1123
1124 kmem_free(so_conn, sizeof (idm_so_conn_t));
1125 }
1126
1127 static void
1128 idm_so_conn_connect_common(idm_conn_t *ic)
1129 {
1130 idm_so_conn_t *so_conn;
1131 struct sockaddr_in6 t_addr;
1132 socklen_t t_addrlen = 0;
1133
1134 so_conn = ic->ic_transport_private;
1135 bzero(&t_addr, sizeof (struct sockaddr_in6));
1136 t_addrlen = sizeof (struct sockaddr_in6);
1137
1138 /* Set the local and remote addresses in the idm conn handle */
1139 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1140 &t_addrlen, CRED());
1141 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1142 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1143 &t_addrlen, CRED());
1144 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1145
1146 mutex_enter(&ic->ic_mutex);
1147 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1148 &p0, TS_RUN, minclsyspri);
1149 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1150 &p0, TS_RUN, minclsyspri);
1151
1152 while (so_conn->ic_rx_thread_did == 0 ||
1153 so_conn->ic_tx_thread_did == 0)
1154 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1155 mutex_exit(&ic->ic_mutex);
1156 }
1157
1158 /*
1159 * idm_so_conn_disconnect()
1160 * Shutdown the socket connection and stop the thread
1161 */
1162 static void
1163 idm_so_conn_disconnect(idm_conn_t *ic)
1164 {
1165 idm_so_conn_t *so_conn;
1166
1167 so_conn = ic->ic_transport_private;
1168
1169 mutex_enter(&ic->ic_mutex);
1170 so_conn->ic_rx_thread_running = B_FALSE;
1171 so_conn->ic_tx_thread_running = B_FALSE;
1172 /* We need to wakeup the TX thread */
1173 mutex_enter(&so_conn->ic_tx_mutex);
1174 cv_signal(&so_conn->ic_tx_cv);
1175 mutex_exit(&so_conn->ic_tx_mutex);
1176 mutex_exit(&ic->ic_mutex);
1177
1178 /* This should wakeup the RX thread if it is sleeping */
1179 idm_soshutdown(so_conn->ic_so);
1180
1181 thread_join(so_conn->ic_tx_thread_did);
1182 thread_join(so_conn->ic_rx_thread_did);
1183 }
1184
1185 /*
1186 * idm_so_tgt_svc_create()
1187 * Establish a service on an IP address and port. idm_svc_req_t contains
1188 * the service parameters.
1189 */
1190 /*ARGSUSED*/
1191 static idm_status_t
1192 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1193 {
1194 idm_so_svc_t *so_svc;
1195
1196 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1197
1198 /* Set the new sockets service in svc handle */
1199 is->is_so_svc = (void *)so_svc;
1200
1201 return (IDM_STATUS_SUCCESS);
1202 }
1203
1204 /*
1205 * idm_so_tgt_svc_destroy()
1206 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1207 */
1208 static void
1209 idm_so_tgt_svc_destroy(idm_svc_t *is)
1210 {
1211 /* the socket will have been torn down; free the service */
1212 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1213 }
1214
1215 /*
1216 * idm_so_tgt_svc_online()
1217 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1218 */
1219
1220 static idm_status_t
1221 idm_so_tgt_svc_online(idm_svc_t *is)
1222 {
1223 idm_so_svc_t *so_svc;
1224 idm_svc_req_t *sr = &is->is_svc_req;
1225 struct sockaddr_in6 sin6_ip;
1226 const uint32_t on = 1;
1227 const uint32_t off = 0;
1228
1229 mutex_enter(&is->is_mutex);
1230 so_svc = (idm_so_svc_t *)is->is_so_svc;
1231
1232 /*
1233 * Try creating an IPv6 socket first
1234 */
1235 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1236 mutex_exit(&is->is_mutex);
1237 return (IDM_STATUS_FAIL);
1238 } else {
1239 bzero(&sin6_ip, sizeof (sin6_ip));
1240 sin6_ip.sin6_family = AF_INET6;
1241 sin6_ip.sin6_port = htons(sr->sr_port);
1242 sin6_ip.sin6_addr = in6addr_any;
1243
1244 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1245 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1246 /*
1247 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1248 */
1249 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1250 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1251
1252 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1253 sizeof (sin6_ip), CRED()) != 0) {
1254 mutex_exit(&is->is_mutex);
1255 idm_sodestroy(so_svc->is_so);
1256 return (IDM_STATUS_FAIL);
1257 }
1258 }
1259
1260 idm_set_postconnect_options(so_svc->is_so);
1261
1262 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1263 mutex_exit(&is->is_mutex);
1264 idm_soshutdown(so_svc->is_so);
1265 idm_sodestroy(so_svc->is_so);
1266 return (IDM_STATUS_FAIL);
1267 }
1268
1269 /* Launch a watch thread */
1270 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1271 is, 0, &p0, TS_RUN, minclsyspri);
1272
1273 if (so_svc->is_thread == NULL) {
1274 /* Failure to launch; teardown the socket */
1275 mutex_exit(&is->is_mutex);
1276 idm_soshutdown(so_svc->is_so);
1277 idm_sodestroy(so_svc->is_so);
1278 return (IDM_STATUS_FAIL);
1279 }
1280 ksocket_hold(so_svc->is_so);
1281 /* Wait for the port watcher thread to start */
1282 while (!so_svc->is_thread_running)
1283 cv_wait(&is->is_cv, &is->is_mutex);
1284 mutex_exit(&is->is_mutex);
1285
1286 return (IDM_STATUS_SUCCESS);
1287 }
1288
1289 /*
1290 * idm_so_tgt_svc_offline
1291 *
1292 * Stop listening on the IP address and port identified by idm_svc_t.
1293 */
1294 static void
1295 idm_so_tgt_svc_offline(idm_svc_t *is)
1296 {
1297 idm_so_svc_t *so_svc;
1298 mutex_enter(&is->is_mutex);
1299 so_svc = (idm_so_svc_t *)is->is_so_svc;
1300 so_svc->is_thread_running = B_FALSE;
1301 mutex_exit(&is->is_mutex);
1302
1303 /*
1304 * Teardown socket
1305 */
1306 idm_sodestroy(so_svc->is_so);
1307
1308 /*
1309 * Now we expect the port watcher thread to terminate
1310 */
1311 thread_join(so_svc->is_thread_did);
1312 }
1313
1314 /*
1315 * Watch thread for target service connection establishment.
1316 */
1317 void
1318 idm_so_svc_port_watcher(void *arg)
1319 {
1320 idm_svc_t *svc = arg;
1321 ksocket_t new_so;
1322 idm_conn_t *ic;
1323 idm_status_t idmrc;
1324 idm_so_svc_t *so_svc;
1325 int rc;
1326 const uint32_t off = 0;
1327 struct sockaddr_in6 t_addr;
1328 socklen_t t_addrlen;
1329
1330 bzero(&t_addr, sizeof (struct sockaddr_in6));
1331 t_addrlen = sizeof (struct sockaddr_in6);
1332 mutex_enter(&svc->is_mutex);
1333
1334 so_svc = svc->is_so_svc;
1335 so_svc->is_thread_running = B_TRUE;
1336 so_svc->is_thread_did = so_svc->is_thread->t_did;
1337
1338 cv_signal(&svc->is_cv);
1339
1340 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1341 svc->is_svc_req.sr_port);
1342
1343 while (so_svc->is_thread_running) {
1344 mutex_exit(&svc->is_mutex);
1345
1346 if ((rc = ksocket_accept(so_svc->is_so,
1347 (struct sockaddr *)&t_addr, &t_addrlen,
1348 &new_so, CRED())) != 0) {
1349 mutex_enter(&svc->is_mutex);
1350 if (rc != ECONNABORTED && rc != EINTR) {
1351 IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1352 " ksocket_accept failed %d", rc);
1353 }
1354 /*
1355 * Unclean shutdown of this thread is not handled
1356 * wait for !is_thread_running.
1357 */
1358 continue;
1359 }
1360 /*
1361 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1362 */
1363 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1364 (char *)&off, sizeof (off), CRED());
1365
1366 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1367 &ic);
1368 if (idmrc != IDM_STATUS_SUCCESS) {
1369 /* Drop connection */
1370 idm_soshutdown(new_so);
1371 idm_sodestroy(new_so);
1372 mutex_enter(&svc->is_mutex);
1373 continue;
1374 }
1375
1376 idmrc = idm_so_tgt_conn_create(ic, new_so);
1377 if (idmrc != IDM_STATUS_SUCCESS) {
1378 idm_svc_conn_destroy(ic);
1379 idm_soshutdown(new_so);
1380 idm_sodestroy(new_so);
1381 mutex_enter(&svc->is_mutex);
1382 continue;
1383 }
1384
1385 /*
1386 * Kick the state machine. At CS_S3_XPT_UP the state machine
1387 * will notify the client (target) about the new connection.
1388 */
1389 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1390
1391 mutex_enter(&svc->is_mutex);
1392 }
1393 ksocket_rele(so_svc->is_so);
1394 so_svc->is_thread_running = B_FALSE;
1395 mutex_exit(&svc->is_mutex);
1396
1397 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1398 svc->is_svc_req.sr_port);
1399
1400 thread_exit();
1401 }
1402
1403 /*
1404 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1405 * frees resources associated with the task.
1406 *
1407 * It's not clear that this should return idm_status_t. What do we do
1408 * if it fails?
1409 */
1410 static idm_status_t
1411 idm_so_free_task_rsrc(idm_task_t *idt)
1412 {
1413 idm_buf_t *idb, *next_idb;
1414
1415 /*
1416 * There is nothing to cleanup on initiator connections
1417 */
1418 if (IDM_CONN_ISINI(idt->idt_ic))
1419 return (IDM_STATUS_SUCCESS);
1420
1421 /*
1422 * If this is a target connection, call idm_buf_rx_from_ini_done for
1423 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1424 *
1425 * In addition, remove any buffers associated with this task from
1426 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1427 * items don't actually get removed from that list (and completion
1428 * routines called) until idm_task_cleanup.
1429 */
1430 mutex_enter(&idt->idt_mutex);
1431
1432 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1433 next_idb = list_next(&idt->idt_outbufv, idb);
1434 if (idb->idb_in_transport) {
1435 /*
1436 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1437 */
1438 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1439 uintptr_t, idb->idb_buf,
1440 uint32_t, idb->idb_bufoffset,
1441 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1442 uint32_t, idb->idb_xfer_len,
1443 int, XFER_BUF_RX_FROM_INI);
1444 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1445 mutex_enter(&idt->idt_mutex);
1446 }
1447 }
1448
1449 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1450 next_idb = list_next(&idt->idt_inbufv, idb);
1451 /*
1452 * We want to remove these items from the tx_list as well,
1453 * but knowing it's in the idt_inbufv list is not a guarantee
1454 * that it's in the tx_list. If it's on the tx list then
1455 * let idm_sotx_thread() clean it up.
1456 */
1457 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1458 /*
1459 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1460 */
1461 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1462 uintptr_t, idb->idb_buf,
1463 uint32_t, idb->idb_bufoffset,
1464 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1465 uint32_t, idb->idb_xfer_len,
1466 int, XFER_BUF_TX_TO_INI);
1467 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1468 mutex_enter(&idt->idt_mutex);
1469 }
1470 }
1471
1472 mutex_exit(&idt->idt_mutex);
1473
1474 return (IDM_STATUS_SUCCESS);
1475 }
1476
1477 /*
1478 * idm_so_negotiate_key_values() validates the key values for this connection
1479 */
1480 /* ARGSUSED */
1481 static kv_status_t
1482 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1483 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1484 {
1485 /* All parameters are negotiated at the iscsit level */
1486 return (KV_HANDLED);
1487 }
1488
1489 /*
1490 * idm_so_notice_key_values() activates the negotiated key values for
1491 * this connection.
1492 */
1493 static void
1494 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1495 {
1496 char *nvp_name;
1497 nvpair_t *nvp;
1498 nvpair_t *next_nvp;
1499 int nvrc;
1500 idm_status_t idm_status;
1501 const idm_kv_xlate_t *ikvx;
1502 uint64_t num_val;
1503
1504 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1505 nvp != NULL; nvp = next_nvp) {
1506 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1507 nvp_name = nvpair_name(nvp);
1508
1509 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1510 switch (ikvx->ik_key_id) {
1511 case KI_HEADER_DIGEST:
1512 case KI_DATA_DIGEST:
1513 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1514 ASSERT(idm_status == 0);
1515
1516 /* Remove processed item from negotiated_nvl list */
1517 nvrc = nvlist_remove_all(
1518 negotiated_nvl, ikvx->ik_key_name);
1519 ASSERT(nvrc == 0);
1520 break;
1521 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1522 /*
1523 * Just pass the value down to idm layer.
1524 * No need to remove it from negotiated_nvl list here.
1525 */
1526 nvrc = nvpair_value_uint64(nvp, &num_val);
1527 ASSERT(nvrc == 0);
1528 it->ic_conn_params.max_xmit_dataseglen =
1529 (uint32_t)num_val;
1530 break;
1531 default:
1532 break;
1533 }
1534 }
1535 }
1536
1537 /*
1538 * idm_so_declare_key_values() declares the key values for this connection
1539 */
1540 /* ARGSUSED */
1541 static kv_status_t
1542 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1543 nvlist_t *outgoing_nvl)
1544 {
1545 char *nvp_name;
1546 nvpair_t *nvp;
1547 nvpair_t *next_nvp;
1548 kv_status_t kvrc;
1549 int nvrc = 0;
1550 const idm_kv_xlate_t *ikvx;
1551 uint64_t num_val;
1552
1553 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1554 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1555 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1556 nvp_name = nvpair_name(nvp);
1557
1558 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1559 switch (ikvx->ik_key_id) {
1560 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1561 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1562 break;
1563 }
1564 if (outgoing_nvl &&
1565 (nvrc = nvlist_add_uint64(outgoing_nvl,
1566 nvp_name, num_val)) != 0) {
1567 break;
1568 }
1569 it->ic_conn_params.max_recv_dataseglen =
1570 (uint32_t)num_val;
1571 break;
1572 default:
1573 break;
1574 }
1575 }
1576 kvrc = idm_nvstat_to_kvstat(nvrc);
1577 return (kvrc);
1578 }
1579
1580 static idm_status_t
1581 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1582 const idm_kv_xlate_t *ikvx)
1583 {
1584 int nvrc;
1585 char *digest_choice_string;
1586
1587 nvrc = nvpair_value_string(digest_choice,
1588 &digest_choice_string);
1589 ASSERT(nvrc == 0);
1590 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1591 switch (ikvx->ik_key_id) {
1592 case KI_HEADER_DIGEST:
1593 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1594 break;
1595 case KI_DATA_DIGEST:
1596 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1597 break;
1598 default:
1599 ASSERT(0);
1600 break;
1601 }
1602 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1603 switch (ikvx->ik_key_id) {
1604 case KI_HEADER_DIGEST:
1605 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1606 break;
1607 case KI_DATA_DIGEST:
1608 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1609 break;
1610 default:
1611 ASSERT(0);
1612 break;
1613 }
1614 } else {
1615 ASSERT(0);
1616 }
1617
1618 return (IDM_STATUS_SUCCESS);
1619 }
1620
1621
1622 /*
1623 * idm_so_conn_is_capable() verifies that the passed connection is provided
1624 * for by the sockets interface.
1625 */
1626 /* ARGSUSED */
1627 static boolean_t
1628 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1629 {
1630 return (B_TRUE);
1631 }
1632
1633 /*
1634 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1635 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1636 * off the socket into the appropriate buffers.
1637 */
1638 static void
1639 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1640 {
1641 iscsi_data_hdr_t *bhs;
1642 idm_task_t *idt;
1643 idm_buf_t *idb;
1644 uint32_t datasn;
1645 size_t offset;
1646 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1647 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1648
1649 ASSERT(ic != NULL);
1650 ASSERT(pdu != NULL);
1651 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP);
1652
1653 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1654 datasn = ntohl(bhs->datasn);
1655 offset = ntohl(bhs->offset);
1656
1657 /*
1658 * Look up the task corresponding to the initiator task tag
1659 * to get the buffers affiliated with the task.
1660 */
1661 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1662 if (idt == NULL) {
1663 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1664 idm_pdu_rx_protocol_error(ic, pdu);
1665 return;
1666 }
1667
1668 idb = pdu->isp_sorx_buf;
1669 if (idb == NULL) {
1670 IDM_CONN_LOG(CE_WARN,
1671 "idm_so_rx_datain: failed to find buffer");
1672 idm_task_rele(idt);
1673 idm_pdu_rx_protocol_error(ic, pdu);
1674 return;
1675 }
1676
1677 /*
1678 * DataSN values should be sequential and should not have any gaps or
1679 * repetitions. Check the DataSN with the one stored in the task.
1680 */
1681 if (datasn == idt->idt_exp_datasn) {
1682 idt->idt_exp_datasn++; /* keep track of DataSN received */
1683 } else {
1684 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1685 idm_task_rele(idt);
1686 idm_pdu_rx_protocol_error(ic, pdu);
1687 return;
1688 }
1689
1690 /*
1691 * PDUs in a sequence should be in continuously increasing
1692 * address offset
1693 */
1694 if (offset != idb->idb_exp_offset) {
1695 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1696 idm_task_rele(idt);
1697 idm_pdu_rx_protocol_error(ic, pdu);
1698 return;
1699 }
1700 /* Expected next relative buffer offset */
1701 idb->idb_exp_offset += n2h24(bhs->dlength);
1702 idt->idt_rx_bytes += n2h24(bhs->dlength);
1703
1704 idm_task_rele(idt);
1705
1706 /*
1707 * For now call scsi_rsp which will process the data rsp
1708 * Revisit, need to provide an explicit client entry point for
1709 * phase collapse completions.
1710 */
1711 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) &&
1712 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1713 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1714 }
1715
1716 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717 }
1718
1719 /*
1720 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1721 * data from the Data-Out PDU sent by the iSCSI initiator.
1722 *
1723 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1724 * task to get the buffers associated with the PDU. A PDU might span buffers.
1725 * The data is then read into the respective buffer.
1726 */
1727 static void
1728 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1729 {
1730
1731 iscsi_data_hdr_t *bhs;
1732 idm_task_t *idt;
1733 idm_buf_t *idb;
1734 size_t offset;
1735
1736 ASSERT(ic != NULL);
1737 ASSERT(pdu != NULL);
1738 ASSERT(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA);
1739
1740 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1741 offset = ntohl(bhs->offset);
1742
1743 /*
1744 * Look up the task corresponding to the initiator task tag
1745 * to get the buffers affiliated with the task.
1746 */
1747 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1748 if (idt == NULL) {
1749 IDM_CONN_LOG(CE_WARN,
1750 "idm_so_rx_dataout: failed to find task");
1751 idm_pdu_rx_protocol_error(ic, pdu);
1752 return;
1753 }
1754
1755 idb = pdu->isp_sorx_buf;
1756 if (idb == NULL) {
1757 IDM_CONN_LOG(CE_WARN,
1758 "idm_so_rx_dataout: failed to find buffer");
1759 idm_task_rele(idt);
1760 idm_pdu_rx_protocol_error(ic, pdu);
1761 return;
1762 }
1763
1764 /* Keep track of data transferred - check data offsets */
1765 if (offset != idb->idb_exp_offset) {
1766 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1767 "%ld, %d", offset, idb->idb_exp_offset);
1768 idm_task_rele(idt);
1769 idm_pdu_rx_protocol_error(ic, pdu);
1770 return;
1771 }
1772 /* Expected next relative offset */
1773 idb->idb_exp_offset += ntoh24(bhs->dlength);
1774 idt->idt_rx_bytes += n2h24(bhs->dlength);
1775
1776 /*
1777 * Call the buffer callback when the transfer is complete
1778 *
1779 * The connection state machine should only abort tasks after
1780 * shutting down the connection so we are assured that there
1781 * won't be a simultaneous attempt to abort this task at the
1782 * same time as we are processing this PDU (due to a connection
1783 * state change).
1784 */
1785 if (bhs->flags & ISCSI_FLAG_FINAL) {
1786 /*
1787 * We have gotten the last data-message for the current
1788 * transfer. idb_xfer_len represents the data that the
1789 * command intended to transfer, it does not represent the
1790 * actual number of bytes transferred. If we have not
1791 * transferred the expected number of bytes something is
1792 * wrong.
1793 *
1794 * We have two options, when there is a mismatch, we can
1795 * regard the transfer as invalid -- or we can modify our
1796 * notion of "xfer_len." In order to be as stringent as
1797 * possible, here we regard this transfer as in error; and
1798 * bail out.
1799 */
1800 if (idb->idb_buflen == idb->idb_xfer_len &&
1801 idb->idb_buflen !=
1802 (idb->idb_exp_offset - idb->idb_bufoffset)) {
1803 printf("idm_so_rx_dataout: incomplete transfer, "
1804 "protocol err");
1805 IDM_CONN_LOG(CE_NOTE,
1806 "idm_so_rx_dataout: incomplete transfer: %ld, %d",
1807 offset, (int)(idb->idb_exp_offset - offset));
1808 idm_task_rele(idt);
1809 idm_pdu_rx_protocol_error(ic, pdu);
1810 return;
1811 }
1812 /*
1813 * We only want to call idm_buf_rx_from_ini_done once
1814 * per transfer. It's possible that this task has
1815 * already been aborted in which case
1816 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1817 * for each buffer with idb_in_transport==B_TRUE. To
1818 * close this window and ensure that this doesn't happen,
1819 * we'll clear idb->idb_in_transport now while holding
1820 * the task mutex. This is only really an issue for
1821 * SCSI task abort -- if tasks were being aborted because
1822 * of a connection state change the state machine would
1823 * have already stopped the receive thread.
1824 */
1825 mutex_enter(&idt->idt_mutex);
1826
1827 /*
1828 * Release the task hold here (obtained in idm_task_find)
1829 * because the task may complete synchronously during
1830 * idm_buf_rx_from_ini_done. Since we still have an active
1831 * buffer we know there is at least one additional hold on idt.
1832 */
1833 idm_task_rele(idt);
1834
1835 /*
1836 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1837 */
1838 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1839 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1840 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1841 uint32_t, idb->idb_xfer_len,
1842 int, XFER_BUF_RX_FROM_INI);
1843 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1844 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1845 return;
1846 }
1847
1848 idm_task_rele(idt);
1849 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1850 }
1851
1852 /*
1853 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1854 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1855 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1856 * and looks up the task in the task tree using the itt to get the output
1857 * buffers associated the task. The R2T PDU contains the offset of the
1858 * requested data and the data length. This function then constructs a
1859 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1860 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1861 */
1862
1863 static void
1864 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1865 {
1866 idm_task_t *idt;
1867 idm_buf_t *idb;
1868 iscsi_rtt_hdr_t *rtt_hdr;
1869 uint32_t data_offset;
1870 uint32_t data_length;
1871
1872 ASSERT(ic != NULL);
1873 ASSERT(pdu != NULL);
1874
1875 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1876 data_offset = ntohl(rtt_hdr->data_offset);
1877 data_length = ntohl(rtt_hdr->data_length);
1878 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1879
1880 if (idt == NULL) {
1881 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1882 idm_pdu_rx_protocol_error(ic, pdu);
1883 return;
1884 }
1885
1886 /* Find the buffer bound to the task by the iSCSI initiator */
1887 mutex_enter(&idt->idt_mutex);
1888 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1889 if (idb == NULL) {
1890 mutex_exit(&idt->idt_mutex);
1891 idm_task_rele(idt);
1892 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1893 idm_pdu_rx_protocol_error(ic, pdu);
1894 return;
1895 }
1896
1897 /* return buffer contains this data */
1898 if (data_offset + data_length > idb->idb_buflen) {
1899 /* Overflow */
1900 mutex_exit(&idt->idt_mutex);
1901 idm_task_rele(idt);
1902 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1903 "buffer");
1904 idm_pdu_rx_protocol_error(ic, pdu);
1905 return;
1906 }
1907
1908 idt->idt_r2t_ttt = rtt_hdr->ttt;
1909 idt->idt_exp_datasn = 0;
1910
1911 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1912 ntohl(rtt_hdr->data_length));
1913 /*
1914 * the idt_mutex is released in idm_so_send_rtt_data
1915 */
1916
1917 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1918 idm_task_rele(idt);
1919
1920 }
1921
1922 idm_status_t
1923 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1924 {
1925 uint8_t pad[ISCSI_PAD_WORD_LEN];
1926 int pad_len;
1927 uint32_t data_digest_crc;
1928 uint32_t crc_calculated;
1929 int total_len;
1930 idm_so_conn_t *so_conn;
1931
1932 so_conn = ic->ic_transport_private;
1933
1934 pad_len = ((ISCSI_PAD_WORD_LEN -
1935 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1936 (ISCSI_PAD_WORD_LEN - 1));
1937
1938 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1939
1940 total_len = pdu->isp_datalen;
1941
1942 if (pad_len) {
1943 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1944 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1945 total_len += pad_len;
1946 pdu->isp_iovlen++;
1947 }
1948
1949 /* setup data digest */
1950 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1951 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1952 (char *)&data_digest_crc;
1953 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1954 sizeof (data_digest_crc);
1955 total_len += sizeof (data_digest_crc);
1956 pdu->isp_iovlen++;
1957 }
1958
1959 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1960
1961 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1962 pdu->isp_iovlen, total_len) != 0) {
1963 return (IDM_STATUS_IO);
1964 }
1965
1966 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1967 crc_calculated = idm_crc32c(pdu->isp_data,
1968 pdu->isp_datalen);
1969 if (pad_len) {
1970 crc_calculated = idm_crc32c_continued((char *)&pad,
1971 pad_len, crc_calculated);
1972 }
1973 if (crc_calculated != data_digest_crc) {
1974 IDM_CONN_LOG(CE_WARN,
1975 "idm_sorecvdata: "
1976 "CRC error: actual 0x%x, calc 0x%x",
1977 data_digest_crc, crc_calculated);
1978
1979 /* Invalid Data Digest */
1980 return (IDM_STATUS_DATA_DIGEST);
1981 }
1982 }
1983
1984 return (IDM_STATUS_SUCCESS);
1985 }
1986
1987 /*
1988 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1989 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1990 * calling this function.
1991 */
1992 idm_status_t
1993 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1994 {
1995 iscsi_data_hdr_t *bhs;
1996 idm_task_t *task;
1997 uint32_t offset;
1998 uint8_t opcode;
1999 uint32_t dlength;
2000 list_t *buflst;
2001 uint32_t xfer_bytes;
2002 idm_status_t status;
2003
2004 ASSERT(ic != NULL);
2005 ASSERT(pdu != NULL);
2006
2007 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
2008
2009 offset = ntohl(bhs->offset);
2010 opcode = IDM_PDU_OPCODE(pdu);
2011 dlength = n2h24(bhs->dlength);
2012
2013 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
2014 (opcode == ISCSI_OP_SCSI_DATA));
2015
2016 /*
2017 * Successful lookup implicitly gets a "hold" on the task. This
2018 * hold must be released before leaving this function. At one
2019 * point we were caching this task context and retaining the hold
2020 * but it turned out to be very difficult to release the hold properly.
2021 * The task can be aborted and the connection shutdown between this
2022 * call and the subsequent expected call to idm_so_rx_datain/
2023 * idm_so_rx_dataout (in which case those functions are not called).
2024 * Releasing the hold in the PDU callback doesn't work well either
2025 * because the whole task may be completed by then at which point
2026 * it is too late to release the hold -- for better or worse this
2027 * code doesn't wait on the refcnts during normal operation.
2028 * idm_task_find() is very fast and it is not a huge burden if we
2029 * have to do it twice.
2030 */
2031 task = idm_task_find(ic, bhs->itt, bhs->ttt);
2032 if (task == NULL) {
2033 IDM_CONN_LOG(CE_WARN,
2034 "idm_sorecv_scsidata: could not find task");
2035 return (IDM_STATUS_FAIL);
2036 }
2037
2038 mutex_enter(&task->idt_mutex);
2039 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
2040 &task->idt_inbufv : &task->idt_outbufv;
2041 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
2042 mutex_exit(&task->idt_mutex);
2043
2044 if (pdu->isp_sorx_buf == NULL) {
2045 idm_task_rele(task);
2046 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
2047 "buffer for offset %x opcode=%x",
2048 offset, opcode);
2049 return (IDM_STATUS_FAIL);
2050 }
2051
2052 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
2053 ASSERT(xfer_bytes != 0);
2054 if (xfer_bytes != dlength) {
2055 idm_task_rele(task);
2056 /*
2057 * Buffer overflow, connection error. The PDU data is still
2058 * sitting in the socket so we can't use the connection
2059 * again until that data is drained.
2060 */
2061 return (IDM_STATUS_FAIL);
2062 }
2063
2064 status = idm_sorecvdata(ic, pdu);
2065
2066 idm_task_rele(task);
2067
2068 return (status);
2069 }
2070
2071 static uint32_t
2072 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
2073 {
2074 uint32_t buf_ro = ro - idb->idb_bufoffset;
2075 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
2076
2077 ASSERT(ro >= idb->idb_bufoffset);
2078
2079 pdu->isp_iov[pdu->isp_iovlen].iov_base =
2080 (caddr_t)idb->idb_buf + buf_ro;
2081 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
2082 pdu->isp_iovlen++;
2083
2084 return (xfer_len);
2085 }
2086
2087 int
2088 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
2089 {
2090 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
2091 ASSERT(pdu->isp_data != NULL);
2092
2093 pdu->isp_databuflen = pdu->isp_datalen;
2094 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
2095 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
2096 pdu->isp_iovlen = 1;
2097 /*
2098 * Since we are associating a new data buffer with this received
2099 * PDU we need to set a specific callback to free the data
2100 * after the PDU is processed.
2101 */
2102 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
2103 pdu->isp_callback = idm_sorx_addl_pdu_cb;
2104
2105 return (idm_sorecvdata(ic, pdu));
2106 }
2107
2108 void
2109 idm_sorx_thread(void *arg)
2110 {
2111 boolean_t conn_failure = B_FALSE;
2112 idm_conn_t *ic = (idm_conn_t *)arg;
2113 idm_so_conn_t *so_conn;
2114 idm_pdu_t *pdu;
2115 idm_status_t rc;
2116
2117 idm_conn_hold(ic);
2118
2119 mutex_enter(&ic->ic_mutex);
2120
2121 so_conn = ic->ic_transport_private;
2122 so_conn->ic_rx_thread_running = B_TRUE;
2123 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2124 cv_signal(&ic->ic_cv);
2125
2126 while (so_conn->ic_rx_thread_running) {
2127 mutex_exit(&ic->ic_mutex);
2128
2129 /*
2130 * Get PDU with default header size (large enough for
2131 * BHS plus any anticipated AHS). PDU from
2132 * the cache will have all values set correctly
2133 * for sockets RX including callback.
2134 */
2135 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2136 pdu->isp_ic = ic;
2137 pdu->isp_flags = 0;
2138 pdu->isp_transport_hdrlen = 0;
2139
2140 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2141 /*
2142 * Call idm_pdu_complete so that we call the callback
2143 * and ensure any memory allocated in idm_sorecvhdr
2144 * gets freed up.
2145 */
2146 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2147
2148 /*
2149 * If ic_rx_thread_running is still set then
2150 * this is some kind of connection problem
2151 * on the socket. In this case we want to
2152 * generate an event. Otherwise some other
2153 * thread closed the socket due to another
2154 * issue in which case we don't need to
2155 * generate an event.
2156 */
2157 mutex_enter(&ic->ic_mutex);
2158 if (so_conn->ic_rx_thread_running) {
2159 conn_failure = B_TRUE;
2160 so_conn->ic_rx_thread_running = B_FALSE;
2161 }
2162
2163 continue;
2164 }
2165
2166 /*
2167 * Header has been read and validated. Now we need
2168 * to read the PDU data payload (if present). SCSI data
2169 * need to be transferred from the socket directly into
2170 * the associated transfer buffer for the SCSI task.
2171 */
2172 if (pdu->isp_datalen != 0) {
2173 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2174 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2175 rc = idm_sorecv_scsidata(ic, pdu);
2176 /*
2177 * All SCSI errors are fatal to the
2178 * connection right now since we have no
2179 * place to put the data. What we need
2180 * is some kind of sink to dispose of unwanted
2181 * SCSI data. For example an invalid task tag
2182 * should not kill the connection (although
2183 * we may want to drop the connection).
2184 */
2185 } else {
2186 /*
2187 * Not data PDUs so allocate a buffer for the
2188 * data segment and read the remaining data.
2189 */
2190 rc = idm_sorecv_nonscsidata(ic, pdu);
2191 }
2192 if (rc != 0) {
2193 /*
2194 * Call idm_pdu_complete so that we call the
2195 * callback and ensure any memory allocated
2196 * in idm_sorecvhdr gets freed up.
2197 */
2198 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2199
2200 /*
2201 * If ic_rx_thread_running is still set then
2202 * this is some kind of connection problem
2203 * on the socket. In this case we want to
2204 * generate an event. Otherwise some other
2205 * thread closed the socket due to another
2206 * issue in which case we don't need to
2207 * generate an event.
2208 */
2209 mutex_enter(&ic->ic_mutex);
2210 if (so_conn->ic_rx_thread_running) {
2211 conn_failure = B_TRUE;
2212 so_conn->ic_rx_thread_running = B_FALSE;
2213 }
2214 continue;
2215 }
2216 }
2217
2218 /*
2219 * Process RX PDU
2220 */
2221 idm_pdu_rx(ic, pdu);
2222
2223 mutex_enter(&ic->ic_mutex);
2224 }
2225
2226 mutex_exit(&ic->ic_mutex);
2227
2228 /*
2229 * If we dropped out of the RX processing loop because of
2230 * a socket problem or other connection failure (including
2231 * digest errors) then we need to generate a state machine
2232 * event to shut the connection down.
2233 * If the state machine is already in, for example, INIT_ERROR, this
2234 * event will get dropped, and the TX thread will never be notified
2235 * to shut down. To be safe, we'll just notify it here.
2236 */
2237 if (conn_failure) {
2238 if (so_conn->ic_tx_thread_running) {
2239 so_conn->ic_tx_thread_running = B_FALSE;
2240 mutex_enter(&so_conn->ic_tx_mutex);
2241 cv_signal(&so_conn->ic_tx_cv);
2242 mutex_exit(&so_conn->ic_tx_mutex);
2243 }
2244
2245 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2246 }
2247
2248 idm_conn_rele(ic);
2249
2250 thread_exit();
2251 }
2252
2253 /*
2254 * idm_so_tx
2255 *
2256 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2257 * point. By definition, it is supposed to be fast. So, simply queue
2258 * the entry and return. The real work is done by idm_i_so_tx() via
2259 * idm_sotx_thread().
2260 */
2261
2262 static void
2263 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2264 {
2265 idm_so_conn_t *so_conn = ic->ic_transport_private;
2266
2267 ASSERT(pdu->isp_ic == ic);
2268 mutex_enter(&so_conn->ic_tx_mutex);
2269
2270 if (!so_conn->ic_tx_thread_running) {
2271 mutex_exit(&so_conn->ic_tx_mutex);
2272 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2273 return;
2274 }
2275
2276 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2277 cv_signal(&so_conn->ic_tx_cv);
2278 mutex_exit(&so_conn->ic_tx_mutex);
2279 }
2280
2281 static idm_status_t
2282 idm_i_so_tx(idm_pdu_t *pdu)
2283 {
2284 idm_conn_t *ic = pdu->isp_ic;
2285 idm_status_t status = IDM_STATUS_SUCCESS;
2286 uint8_t pad[ISCSI_PAD_WORD_LEN];
2287 int pad_len;
2288 uint32_t hdr_digest_crc;
2289 uint32_t data_digest_crc = 0;
2290 int total_len = 0;
2291 int iovlen = 0;
2292 struct iovec iov[6];
2293 idm_so_conn_t *so_conn;
2294
2295 so_conn = ic->ic_transport_private;
2296
2297 /* Setup BHS */
2298 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2299 iov[iovlen].iov_len = pdu->isp_hdrlen;
2300 total_len += iov[iovlen].iov_len;
2301 iovlen++;
2302
2303 /* Setup header digest */
2304 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2305 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2306 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2307
2308 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2309 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2310 total_len += iov[iovlen].iov_len;
2311 iovlen++;
2312 }
2313
2314 /* Setup the data */
2315 if (pdu->isp_datalen) {
2316 idm_task_t *idt;
2317 idm_buf_t *idb;
2318 iscsi_data_hdr_t *ihp;
2319 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2320 /* Write of immediate data */
2321 if (ic->ic_ffp &&
2322 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_CMD ||
2323 IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA)) {
2324 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2325 if (idt) {
2326 mutex_enter(&idt->idt_mutex);
2327 idb = idm_buf_find(&idt->idt_outbufv, 0);
2328 mutex_exit(&idt->idt_mutex);
2329 /*
2330 * If the initiator call to idm_buf_alloc
2331 * failed then we can get to this point
2332 * without a bound buffer. The associated
2333 * connection failure will clean things up
2334 * later. It would be nice to come up with
2335 * a cleaner way to handle this. In
2336 * particular it seems absurd to look up
2337 * the task and the buffer just to update
2338 * this counter.
2339 */
2340 if (idb)
2341 idb->idb_xfer_len += pdu->isp_datalen;
2342 idm_task_rele(idt);
2343 }
2344 }
2345
2346 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2347 iov[iovlen].iov_len = pdu->isp_datalen;
2348 total_len += iov[iovlen].iov_len;
2349 iovlen++;
2350 }
2351
2352 /* Setup the data pad if necessary */
2353 pad_len = ((ISCSI_PAD_WORD_LEN -
2354 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2355 (ISCSI_PAD_WORD_LEN - 1));
2356
2357 if (pad_len) {
2358 bzero(pad, sizeof (pad));
2359 iov[iovlen].iov_base = (void *)&pad;
2360 iov[iovlen].iov_len = pad_len;
2361 total_len += iov[iovlen].iov_len;
2362 iovlen++;
2363 }
2364
2365 /*
2366 * Setup the data digest if enabled. Data-digest is not sent
2367 * for login-phase PDUs.
2368 */
2369 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2370 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2371 (pdu->isp_datalen || pad_len)) {
2372 /*
2373 * RFC3720/10.2.3: A zero-length Data Segment also
2374 * implies a zero-length data digest.
2375 */
2376 if (pdu->isp_datalen) {
2377 data_digest_crc = idm_crc32c(pdu->isp_data,
2378 pdu->isp_datalen);
2379 }
2380 if (pad_len) {
2381 data_digest_crc = idm_crc32c_continued(&pad,
2382 pad_len, data_digest_crc);
2383 }
2384
2385 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2386 iov[iovlen].iov_len = sizeof (data_digest_crc);
2387 total_len += iov[iovlen].iov_len;
2388 iovlen++;
2389 }
2390
2391 /* Transmit the PDU */
2392 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2393 total_len) != 0) {
2394 /* Set error status */
2395 IDM_CONN_LOG(CE_WARN,
2396 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2397 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2398 (void *) pdu->isp_data);
2399 status = IDM_STATUS_IO;
2400 }
2401
2402 /*
2403 * Success does not mean that the PDU actually reached the
2404 * remote node since it could get dropped along the way.
2405 */
2406 idm_pdu_complete(pdu, status);
2407
2408 return (status);
2409 }
2410
2411 /*
2412 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2413 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2414 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2415 * A target can invoke this function multiple times for a single read command
2416 * (identified by the same ITT) to split the input into several sequences.
2417 *
2418 * DataSN starts with 0 for the first data PDU of an input command and advances
2419 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2420 * which is set to 1 for the last data PDU of a sequence.
2421 * If the initiator supports phase collapse, the status bit must be set along
2422 * with the F bit to indicate that the status is shipped together with the last
2423 * Data-In PDU.
2424 *
2425 * The data PDUs within a sequence will be sent in order with the buffer offset
2426 * in increasing order. i.e. initiator and target must have negotiated the
2427 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2428 *
2429 * Caller holds idt->idt_mutex
2430 */
2431 static idm_status_t
2432 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2433 {
2434 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2435 idm_pdu_t tmppdu;
2436
2437 ASSERT(mutex_owned(&idt->idt_mutex));
2438
2439 /*
2440 * Put the idm_buf_t on the tx queue. It will be transmitted by
2441 * idm_sotx_thread.
2442 */
2443 mutex_enter(&so_conn->ic_tx_mutex);
2444
2445 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2446 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2447 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2448 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2449
2450 if (!so_conn->ic_tx_thread_running) {
2451 mutex_exit(&so_conn->ic_tx_mutex);
2452 /*
2453 * Don't release idt->idt_mutex since we're supposed to hold
2454 * in when calling idm_buf_tx_to_ini_done
2455 */
2456 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2457 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2458 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2459 uint32_t, idb->idb_xfer_len,
2460 int, XFER_BUF_TX_TO_INI);
2461 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2462 return (IDM_STATUS_FAIL);
2463 }
2464
2465 /*
2466 * Build a template for the data PDU headers we will use so that
2467 * the SN values will stay consistent with other PDU's we are
2468 * transmitting like R2T and SCSI status.
2469 */
2470 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2471 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2472 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2473 ISCSI_OP_SCSI_DATA_RSP);
2474 idb->idb_tx_thread = B_TRUE;
2475 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2476 cv_signal(&so_conn->ic_tx_cv);
2477 mutex_exit(&so_conn->ic_tx_mutex);
2478 mutex_exit(&idt->idt_mutex);
2479
2480 /*
2481 * Returning success here indicates the transfer was successfully
2482 * dispatched -- it does not mean that the transfer completed
2483 * successfully.
2484 */
2485 return (IDM_STATUS_SUCCESS);
2486 }
2487
2488 /*
2489 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2490 * data blocks it is ready to receive from the initiator in response to a WRITE
2491 * SCSI command. The target iSCSI layer passes the information about the desired
2492 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2493 * offset and datalen are passed via the 'idb' argument.
2494 *
2495 * Scope for Prototype build:
2496 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2497 * negotiated the "InitialR2T" to "Yes".
2498 *
2499 * Caller holds idt->idt_mutex
2500 */
2501 static idm_status_t
2502 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2503 {
2504 idm_pdu_t *pdu;
2505 iscsi_rtt_hdr_t *rtt;
2506
2507 ASSERT(mutex_owned(&idt->idt_mutex));
2508
2509 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2510 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2511 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2512 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2513
2514 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2515 pdu->isp_ic = idt->idt_ic;
2516 pdu->isp_flags = IDM_PDU_SET_STATSN;
2517 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2518
2519 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2520 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2521
2522 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2523 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2524
2525 rtt->opcode = ISCSI_OP_RTT_RSP;
2526 rtt->flags = ISCSI_FLAG_FINAL;
2527 rtt->data_offset = htonl(idb->idb_bufoffset);
2528 rtt->data_length = htonl(idb->idb_xfer_len);
2529 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2530
2531 /* Keep track of buffer offsets */
2532 idb->idb_exp_offset = idb->idb_bufoffset;
2533 mutex_exit(&idt->idt_mutex);
2534
2535 /*
2536 * Transmit the PDU.
2537 */
2538 idm_pdu_tx(pdu);
2539
2540 return (IDM_STATUS_SUCCESS);
2541 }
2542
2543 static idm_status_t
2544 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2545 {
2546 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2547 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2548 KM_NOSLEEP);
2549 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2550 } else {
2551 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2552 idb->idb_buf_private = NULL;
2553 }
2554
2555 if (idb->idb_buf == NULL) {
2556 IDM_CONN_LOG(CE_NOTE,
2557 "idm_so_buf_alloc: failed buffer allocation");
2558 return (IDM_STATUS_FAIL);
2559 }
2560
2561 return (IDM_STATUS_SUCCESS);
2562 }
2563
2564 /* ARGSUSED */
2565 static idm_status_t
2566 idm_so_buf_setup(idm_buf_t *idb)
2567 {
2568 /* Ensure bufalloc'd flag is unset */
2569 idb->idb_bufalloc = B_FALSE;
2570
2571 return (IDM_STATUS_SUCCESS);
2572 }
2573
2574 /* ARGSUSED */
2575 static void
2576 idm_so_buf_teardown(idm_buf_t *idb)
2577 {
2578 /* nothing to do here */
2579 }
2580
2581 static void
2582 idm_so_buf_free(idm_buf_t *idb)
2583 {
2584 if (idb->idb_buf_private == NULL) {
2585 kmem_free(idb->idb_buf, idb->idb_buflen);
2586 } else {
2587 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2588 }
2589 }
2590
2591 static void
2592 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2593 uint32_t offset, uint32_t length)
2594 {
2595 idm_so_conn_t *so_conn = ic->ic_transport_private;
2596 idm_pdu_t tmppdu;
2597 idm_buf_t *rtt_buf;
2598
2599 ASSERT(mutex_owned(&idt->idt_mutex));
2600
2601 /*
2602 * Allocate a buffer to represent the RTT transfer. We could further
2603 * optimize this by allocating the buffers internally from an rtt
2604 * specific buffer cache since this is socket-specific code but for
2605 * now we will keep it simple.
2606 */
2607 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2608 if (rtt_buf == NULL) {
2609 /*
2610 * If we're in FFP then the failure was likely a resource
2611 * allocation issue and we should close the connection by
2612 * sending a CE_TRANSPORT_FAIL event.
2613 *
2614 * If we're not in FFP then idm_buf_alloc will always
2615 * fail and the state is transitioning to "complete" anyway
2616 * so we won't bother to send an event.
2617 */
2618 mutex_enter(&ic->ic_state_mutex);
2619 if (ic->ic_ffp)
2620 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2621 NULL, CT_NONE);
2622 mutex_exit(&ic->ic_state_mutex);
2623 mutex_exit(&idt->idt_mutex);
2624 return;
2625 }
2626
2627 rtt_buf->idb_buf_cb = NULL;
2628 rtt_buf->idb_cb_arg = NULL;
2629 rtt_buf->idb_bufoffset = offset;
2630 rtt_buf->idb_xfer_len = length;
2631 rtt_buf->idb_ic = idt->idt_ic;
2632 rtt_buf->idb_task_binding = idt;
2633
2634 /*
2635 * The new buffer (if any) represents an additional
2636 * reference on the task
2637 */
2638 idm_task_hold(idt);
2639 mutex_exit(&idt->idt_mutex);
2640
2641 /*
2642 * Put the idm_buf_t on the tx queue. It will be transmitted by
2643 * idm_sotx_thread.
2644 */
2645 mutex_enter(&so_conn->ic_tx_mutex);
2646
2647 if (!so_conn->ic_tx_thread_running) {
2648 idm_buf_free(rtt_buf);
2649 mutex_exit(&so_conn->ic_tx_mutex);
2650 idm_task_rele(idt);
2651 return;
2652 }
2653
2654 /*
2655 * Build a template for the data PDU headers we will use so that
2656 * the SN values will stay consistent with other PDU's we are
2657 * transmitting like R2T and SCSI status.
2658 */
2659 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2660 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2661 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2662 ISCSI_OP_SCSI_DATA);
2663 rtt_buf->idb_tx_thread = B_TRUE;
2664 rtt_buf->idb_in_transport = B_TRUE;
2665 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2666 cv_signal(&so_conn->ic_tx_cv);
2667 mutex_exit(&so_conn->ic_tx_mutex);
2668 }
2669
2670 static void
2671 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2672 {
2673 /*
2674 * Don't worry about status -- we assume any error handling
2675 * is performed by the caller (idm_sotx_thread).
2676 */
2677 idb->idb_in_transport = B_FALSE;
2678 idm_task_rele(idt);
2679 idm_buf_free(idb);
2680 }
2681
2682 static idm_status_t
2683 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2684 uint32_t buf_region_offset, uint32_t buf_region_length)
2685 {
2686 idm_conn_t *ic;
2687 uint32_t max_dataseglen;
2688 size_t remainder, chunk;
2689 uint32_t data_offset = buf_region_offset;
2690 iscsi_data_hdr_t *bhs;
2691 idm_pdu_t *pdu;
2692 idm_status_t tx_status;
2693
2694 ASSERT(mutex_owned(&idt->idt_mutex));
2695
2696 ic = idt->idt_ic;
2697
2698 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2699 remainder = buf_region_length;
2700
2701 while (remainder) {
2702 if (idt->idt_state != TASK_ACTIVE) {
2703 ASSERT((idt->idt_state != TASK_IDLE) &&
2704 (idt->idt_state != TASK_COMPLETE));
2705 return (IDM_STATUS_ABORTED);
2706 }
2707
2708 /* check to see if we need to chunk the data */
2709 if (remainder > max_dataseglen) {
2710 chunk = max_dataseglen;
2711 } else {
2712 chunk = remainder;
2713 }
2714
2715 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2716 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2717 pdu->isp_ic = ic;
2718 pdu->isp_flags = 0; /* initialize isp_flags */
2719
2720 /*
2721 * We've already built a build a header template
2722 * to use during the transfer. Use this template so that
2723 * the SN values stay consistent with any unrelated PDU's
2724 * being transmitted.
2725 */
2726 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2727 sizeof (iscsi_hdr_t));
2728
2729 /*
2730 * Set DataSN, data offset, and flags in BHS
2731 * For the prototype build, A = 0, S = 0, U = 0
2732 */
2733 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2734
2735 bhs->datasn = htonl(idt->idt_exp_datasn++);
2736
2737 hton24(bhs->dlength, chunk);
2738 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2739
2740 /* setup data */
2741 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2742 pdu->isp_datalen = (uint_t)chunk;
2743
2744 if (chunk == remainder) {
2745 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2746 /* Piggyback the status with the last data PDU */
2747 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2748 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2749 IDM_PDU_ADVANCE_STATSN;
2750 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2751 (idt, pdu);
2752 idt->idt_flags |=
2753 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2754
2755 }
2756 }
2757
2758 remainder -= chunk;
2759 data_offset += chunk;
2760
2761 /* Instrument the data-send DTrace probe. */
2762 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2763 DTRACE_ISCSI_2(data__send,
2764 idm_conn_t *, idt->idt_ic,
2765 iscsi_data_rsp_hdr_t *,
2766 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2767 }
2768
2769 /*
2770 * Now that we're done working with idt_exp_datasn,
2771 * idt->idt_state and idb->idb_bufoffset we can release
2772 * the task lock -- don't want to hold it across the
2773 * call to idm_i_so_tx since we could block.
2774 */
2775 mutex_exit(&idt->idt_mutex);
2776
2777 /*
2778 * Transmit the PDU. Call the internal routine directly
2779 * as there is already implicit ordering.
2780 */
2781 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2782 mutex_enter(&idt->idt_mutex);
2783 return (tx_status);
2784 }
2785
2786 mutex_enter(&idt->idt_mutex);
2787 idt->idt_tx_bytes += chunk;
2788 }
2789
2790 return (IDM_STATUS_SUCCESS);
2791 }
2792
2793 /*
2794 * TX PDU cache
2795 */
2796 /* ARGSUSED */
2797 int
2798 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2799 {
2800 idm_pdu_t *pdu = hdl;
2801
2802 bzero(pdu, sizeof (idm_pdu_t));
2803 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2804 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2805 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2806 pdu->isp_magic = IDM_PDU_MAGIC;
2807 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2808
2809 return (0);
2810 }
2811
2812 /* ARGSUSED */
2813 void
2814 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2815 {
2816 /* reset values between use */
2817 pdu->isp_datalen = 0;
2818
2819 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2820 }
2821
2822 /*
2823 * RX PDU cache
2824 */
2825 /* ARGSUSED */
2826 int
2827 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2828 {
2829 idm_pdu_t *pdu = hdl;
2830
2831 bzero(pdu, sizeof (idm_pdu_t));
2832 pdu->isp_magic = IDM_PDU_MAGIC;
2833 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2834 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2835
2836 return (0);
2837 }
2838
2839 /* ARGSUSED */
2840 static void
2841 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2842 {
2843 pdu->isp_iovlen = 0;
2844 pdu->isp_sorx_buf = 0;
2845 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2846 }
2847
2848 static void
2849 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2850 {
2851 /*
2852 * We had to modify our cached RX PDU with a longer header buffer
2853 * and/or a longer data buffer. Release the new buffers and fix
2854 * the fields back to what we would expect for a cached RX PDU.
2855 */
2856 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2857 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2858 }
2859 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2860 kmem_free(pdu->isp_data, pdu->isp_datalen);
2861 }
2862 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2863 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2864 pdu->isp_data = NULL;
2865 pdu->isp_datalen = 0;
2866 pdu->isp_sorx_buf = 0;
2867 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2868 idm_sorx_cache_pdu_cb(pdu, status);
2869 }
2870
2871 /*
2872 * This thread is only active when I/O is queued for transmit
2873 * because the socket is busy.
2874 */
2875 void
2876 idm_sotx_thread(void *arg)
2877 {
2878 idm_conn_t *ic = arg;
2879 idm_tx_obj_t *object, *next;
2880 idm_so_conn_t *so_conn;
2881 idm_status_t status = IDM_STATUS_SUCCESS;
2882
2883 idm_conn_hold(ic);
2884
2885 mutex_enter(&ic->ic_mutex);
2886 so_conn = ic->ic_transport_private;
2887 so_conn->ic_tx_thread_running = B_TRUE;
2888 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2889 cv_signal(&ic->ic_cv);
2890 mutex_exit(&ic->ic_mutex);
2891
2892 mutex_enter(&so_conn->ic_tx_mutex);
2893
2894 while (so_conn->ic_tx_thread_running) {
2895 while (list_is_empty(&so_conn->ic_tx_list)) {
2896 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2897 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2898 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2899
2900 if (!so_conn->ic_tx_thread_running) {
2901 goto tx_bail;
2902 }
2903 }
2904
2905 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2906 list_remove(&so_conn->ic_tx_list, object);
2907 mutex_exit(&so_conn->ic_tx_mutex);
2908
2909 switch (object->idm_tx_obj_magic) {
2910 case IDM_PDU_MAGIC: {
2911 idm_pdu_t *pdu = (idm_pdu_t *)object;
2912 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2913 idm_pdu_t *, (idm_pdu_t *)object);
2914
2915 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2916 /* No IDM task */
2917 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2918 }
2919 status = idm_i_so_tx((idm_pdu_t *)object);
2920 break;
2921 }
2922 case IDM_BUF_MAGIC: {
2923 idm_buf_t *idb = (idm_buf_t *)object;
2924 idm_task_t *idt = idb->idb_task_binding;
2925
2926 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2927 idm_buf_t *, idb);
2928
2929 mutex_enter(&idt->idt_mutex);
2930 status = idm_so_send_buf_region(idt,
2931 idb, 0, idb->idb_xfer_len);
2932
2933 /*
2934 * TX thread owns the buffer so we expect it to
2935 * be "in transport"
2936 */
2937 ASSERT(idb->idb_in_transport);
2938 if (IDM_CONN_ISTGT(ic)) {
2939 /*
2940 * idm_buf_tx_to_ini_done releases
2941 * idt->idt_mutex
2942 */
2943 DTRACE_ISCSI_8(xfer__done,
2944 idm_conn_t *, idt->idt_ic,
2945 uintptr_t, idb->idb_buf,
2946 uint32_t, idb->idb_bufoffset,
2947 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2948 uint32_t, idb->idb_xfer_len,
2949 int, XFER_BUF_TX_TO_INI);
2950 idm_buf_tx_to_ini_done(idt, idb, status);
2951 } else {
2952 idm_so_send_rtt_data_done(idt, idb);
2953 mutex_exit(&idt->idt_mutex);
2954 }
2955 break;
2956 }
2957
2958 default:
2959 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2960 "(0x%08x)", object->idm_tx_obj_magic);
2961 status = IDM_STATUS_FAIL;
2962 }
2963
2964 mutex_enter(&so_conn->ic_tx_mutex);
2965
2966 if (status != IDM_STATUS_SUCCESS) {
2967 so_conn->ic_tx_thread_running = B_FALSE;
2968 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2969 }
2970 }
2971
2972 /*
2973 * Before we leave, we need to abort every item remaining in the
2974 * TX list.
2975 */
2976
2977 tx_bail:
2978 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2979
2980 while (object != NULL) {
2981 next = list_next(&so_conn->ic_tx_list, object);
2982
2983 list_remove(&so_conn->ic_tx_list, object);
2984 switch (object->idm_tx_obj_magic) {
2985 case IDM_PDU_MAGIC:
2986 idm_pdu_complete((idm_pdu_t *)object,
2987 IDM_STATUS_ABORTED);
2988 break;
2989
2990 case IDM_BUF_MAGIC: {
2991 idm_buf_t *idb = (idm_buf_t *)object;
2992 idm_task_t *idt = idb->idb_task_binding;
2993 mutex_exit(&so_conn->ic_tx_mutex);
2994 mutex_enter(&idt->idt_mutex);
2995 /*
2996 * TX thread owns the buffer so we expect it to
2997 * be "in transport"
2998 */
2999 ASSERT(idb->idb_in_transport);
3000 if (IDM_CONN_ISTGT(ic)) {
3001 /*
3002 * idm_buf_tx_to_ini_done releases
3003 * idt->idt_mutex
3004 */
3005 DTRACE_ISCSI_8(xfer__done,
3006 idm_conn_t *, idt->idt_ic,
3007 uintptr_t, idb->idb_buf,
3008 uint32_t, idb->idb_bufoffset,
3009 uint64_t, 0, uint32_t, 0, uint32_t, 0,
3010 uint32_t, idb->idb_xfer_len,
3011 int, XFER_BUF_TX_TO_INI);
3012 idm_buf_tx_to_ini_done(idt, idb,
3013 IDM_STATUS_ABORTED);
3014 } else {
3015 idm_so_send_rtt_data_done(idt, idb);
3016 mutex_exit(&idt->idt_mutex);
3017 }
3018 mutex_enter(&so_conn->ic_tx_mutex);
3019 break;
3020 }
3021 default:
3022 IDM_CONN_LOG(CE_WARN,
3023 "idm_sotx_thread: Unexpected magic "
3024 "(0x%08x)", object->idm_tx_obj_magic);
3025 }
3026
3027 object = next;
3028 }
3029
3030 mutex_exit(&so_conn->ic_tx_mutex);
3031 idm_conn_rele(ic);
3032 thread_exit();
3033 /*NOTREACHED*/
3034 }
3035
3036 static void
3037 idm_so_socket_set_nonblock(struct sonode *node)
3038 {
3039 (void) VOP_SETFL(node->so_vnode, node->so_flag,
3040 (node->so_state | FNONBLOCK), CRED(), NULL);
3041 }
3042
3043 static void
3044 idm_so_socket_set_block(struct sonode *node)
3045 {
3046 (void) VOP_SETFL(node->so_vnode, node->so_flag,
3047 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
3048 }
3049
3050
3051 /*
3052 * Called by kernel sockets when the connection has been accepted or
3053 * rejected. In early volo, a "disconnect" callback was sent instead of
3054 * "connectfailed", so we check for both.
3055 */
3056 /* ARGSUSED */
3057 void
3058 idm_so_timed_socket_connect_cb(ksocket_t ks,
3059 ksocket_callback_event_t ev, void *arg, uintptr_t info)
3060 {
3061 idm_so_timed_socket_t *itp = arg;
3062 ASSERT(itp != NULL);
3063 ASSERT(ev == KSOCKET_EV_CONNECTED ||
3064 ev == KSOCKET_EV_CONNECTFAILED ||
3065 ev == KSOCKET_EV_DISCONNECTED);
3066
3067 mutex_enter(&idm_so_timed_socket_mutex);
3068 itp->it_callback_called = B_TRUE;
3069 if (ev == KSOCKET_EV_CONNECTED) {
3070 itp->it_socket_error_code = 0;
3071 } else {
3072 /* Make sure the error code is non-zero on error */
3073 if (info == 0)
3074 info = ECONNRESET;
3075 itp->it_socket_error_code = (int)info;
3076 }
3077 cv_signal(&itp->it_cv);
3078 mutex_exit(&idm_so_timed_socket_mutex);
3079 }
3080
3081 int
3082 idm_so_timed_socket_connect(ksocket_t ks,
3083 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
3084 {
3085 clock_t conn_login_max;
3086 int rc, nonblocking, rval;
3087 idm_so_timed_socket_t it;
3088 ksocket_callbacks_t ks_cb;
3089
3090 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
3091
3092 /*
3093 * Set to non-block socket mode, with callback on connect
3094 * Early volo used "disconnected" instead of "connectfailed",
3095 * so set callback to look for both.
3096 */
3097 bzero(&it, sizeof (it));
3098 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
3099 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
3100 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
3101 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
3102 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
3103 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
3104 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
3105 if (rc != 0)
3106 return (rc);
3107
3108 /* Set to non-blocking mode */
3109 nonblocking = 1;
3110 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3111 CRED());
3112 if (rc != 0)
3113 goto cleanup;
3114
3115 bzero(&it, sizeof (it));
3116 for (;;) {
3117 /*
3118 * Warning -- in a loopback scenario, the call to
3119 * the connect_cb can occur inside the call to
3120 * ksocket_connect. Do not hold the mutex around the
3121 * call to ksocket_connect.
3122 */
3123 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3124 if (rc == 0 || rc == EISCONN) {
3125 /* socket success or already success */
3126 rc = 0;
3127 break;
3128 }
3129 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3130 break;
3131 }
3132
3133 /* TCP connect still in progress. See if out of time. */
3134 if (ddi_get_lbolt() > conn_login_max) {
3135 /*
3136 * Connection retry timeout,
3137 * failed connect to target.
3138 */
3139 rc = ETIMEDOUT;
3140 break;
3141 }
3142
3143 /*
3144 * TCP connect still in progress. Sleep until callback.
3145 * Do NOT go to sleep if the callback already occurred!
3146 */
3147 mutex_enter(&idm_so_timed_socket_mutex);
3148 if (!it.it_callback_called) {
3149 (void) cv_timedwait(&it.it_cv,
3150 &idm_so_timed_socket_mutex, conn_login_max);
3151 }
3152 if (it.it_callback_called) {
3153 rc = it.it_socket_error_code;
3154 mutex_exit(&idm_so_timed_socket_mutex);
3155 break;
3156 }
3157 /* If timer expires, go call ksocket_connect one last time. */
3158 mutex_exit(&idm_so_timed_socket_mutex);
3159 }
3160
3161 /* resume blocking mode */
3162 nonblocking = 0;
3163 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3164 CRED());
3165 cleanup:
3166 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3167 cv_destroy(&it.it_cv);
3168 if (rc != 0) {
3169 idm_soshutdown(ks);
3170 }
3171 return (rc);
3172 }
3173
3174
3175 void
3176 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3177 {
3178 int dp_addr_size;
3179 struct sockaddr_in *sin;
3180 struct sockaddr_in6 *sin6;
3181
3182 /* Build sockaddr_storage for this portal (idm_addr_t) */
3183 bzero(sa, sizeof (*sa));
3184 dp_addr_size = dportal->a_addr.i_insize;
3185 if (dp_addr_size == sizeof (struct in_addr)) {
3186 /* IPv4 */
3187 sa->ss_family = AF_INET;
3188 sin = (struct sockaddr_in *)sa;
3189 sin->sin_port = htons(dportal->a_port);
3190 bcopy(&dportal->a_addr.i_addr.in4,
3191 &sin->sin_addr, sizeof (struct in_addr));
3192 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3193 /* IPv6 */
3194 sa->ss_family = AF_INET6;
3195 sin6 = (struct sockaddr_in6 *)sa;
3196 sin6->sin6_port = htons(dportal->a_port);
3197 bcopy(&dportal->a_addr.i_addr.in6,
3198 &sin6->sin6_addr, sizeof (struct in6_addr));
3199 } else {
3200 ASSERT(0);
3201 }
3202 }
3203
3204
3205 /*
3206 * return a human-readable form of a sockaddr_storage, in the form
3207 * [ip-address]:port. This is used in calls to logging functions.
3208 * If several calls to idm_sa_ntop are made within the same invocation
3209 * of a logging function, then each one needs its own buf.
3210 */
3211 const char *
3212 idm_sa_ntop(const struct sockaddr_storage *sa,
3213 char *buf, size_t size)
3214 {
3215 static const char bogus_ip[] = "[0].-1";
3216 char tmp[INET6_ADDRSTRLEN];
3217
3218 switch (sa->ss_family) {
3219 case AF_INET6: {
3220 const struct sockaddr_in6 *in6 =
3221 (const struct sockaddr_in6 *) sa;
3222
3223 (void) inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp,
3224 sizeof (tmp));
3225 if (strlen(tmp) + sizeof ("[].65535") > size)
3226 goto err;
3227 /* struct sockaddr_storage gets port info from v4 loc */
3228 (void) snprintf(buf, size, "[%s].%u", tmp,
3229 ntohs(in6->sin6_port));
3230 return (buf);
3231 }
3232 case AF_INET: {
3233 const struct sockaddr_in *in = (const struct sockaddr_in *) sa;
3234
3235 (void) inet_ntop(in->sin_family, &in->sin_addr, tmp,
3236 sizeof (tmp));
3237 if (strlen(tmp) + sizeof ("[].65535") > size)
3238 goto err;
3239 (void) snprintf(buf, size, "[%s].%u", tmp,
3240 ntohs(in->sin_port));
3241 return (buf);
3242 }
3243 default:
3244 break;
3245 }
3246 err:
3247 (void) snprintf(buf, size, "%s", bogus_ip);
3248 return (buf);
3249 }