1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 #include <sys/cpuvar.h>
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32
33 #include <sys/socket.h>
34 #include <sys/strsubr.h>
35 #include <sys/sysmacros.h>
36
37 #include <sys/socketvar.h>
38 #include <netinet/in.h>
39
40 #include <sys/idm/idm.h>
41 #include <sys/idm/idm_so.h>
42
43 #define IDM_NAME_VERSION "iSCSI Data Mover"
44
45 extern struct mod_ops mod_miscops;
46 extern struct mod_ops mod_miscops;
47
48 static struct modlmisc modlmisc = {
49 &mod_miscops, /* Type of module */
50 IDM_NAME_VERSION
51 };
52
53 static struct modlinkage modlinkage = {
54 MODREV_1, (void *)&modlmisc, NULL
55 };
56
57 extern void idm_wd_thread(void *arg);
58
59 static int _idm_init(void);
60 static int _idm_fini(void);
61 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
62 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
63 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
64 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
65 static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
66 idm_abort_type_t abort_type);
67 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
68 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
69 int sleepflag);
70
71 boolean_t idm_conn_logging = 0;
72 boolean_t idm_svc_logging = 0;
73 #ifdef DEBUG
74 boolean_t idm_pattern_checking = 1;
75 #else
76 boolean_t idm_pattern_checking = 0;
77 #endif
78
79 /*
80 * Potential tuneable for the maximum number of tasks. Default to
81 * IDM_TASKIDS_MAX
82 */
83
84 uint32_t idm_max_taskids = IDM_TASKIDS_MAX;
85
86 /*
87 * Global list of transport handles
88 * These are listed in preferential order, so we can simply take the
89 * first "it_conn_is_capable" hit. Note also that the order maps to
90 * the order of the idm_transport_type_t list.
91 */
92 idm_transport_t idm_transport_list[] = {
93
94 /* iSER on InfiniBand transport handle */
95 {IDM_TRANSPORT_TYPE_ISER, /* type */
96 "/devices/ib/iser@0:iser", /* device path */
97 NULL, /* LDI handle */
98 NULL, /* transport ops */
99 NULL}, /* transport caps */
100
101 /* IDM native sockets transport handle */
102 {IDM_TRANSPORT_TYPE_SOCKETS, /* type */
103 NULL, /* device path */
104 NULL, /* LDI handle */
105 NULL, /* transport ops */
106 NULL} /* transport caps */
107
108 };
109
110 int
111 _init(void)
112 {
113 int rc;
114
115 if ((rc = _idm_init()) != 0) {
116 return (rc);
117 }
118
119 return (mod_install(&modlinkage));
120 }
121
122 int
123 _fini(void)
124 {
125 int rc;
126
127 if ((rc = _idm_fini()) != 0) {
128 return (rc);
129 }
130
131 if ((rc = mod_remove(&modlinkage)) != 0) {
132 return (rc);
133 }
134
135 return (rc);
136 }
137
138 int
139 _info(struct modinfo *modinfop)
140 {
141 return (mod_info(&modlinkage, modinfop));
142 }
143
144 /*
145 * idm_transport_register()
146 *
147 * Provides a mechanism for an IDM transport driver to register its
148 * transport ops and caps with the IDM kernel module. Invoked during
149 * a transport driver's attach routine.
150 */
151 idm_status_t
152 idm_transport_register(idm_transport_attr_t *attr)
153 {
154 ASSERT(attr->it_ops != NULL);
155 ASSERT(attr->it_caps != NULL);
156
157 switch (attr->type) {
158 /* All known non-native transports here; for now, iSER */
159 case IDM_TRANSPORT_TYPE_ISER:
160 idm_transport_list[attr->type].it_ops = attr->it_ops;
161 idm_transport_list[attr->type].it_caps = attr->it_caps;
162 return (IDM_STATUS_SUCCESS);
163
164 default:
165 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
166 "idm_transport_register", attr->type);
167 return (IDM_STATUS_SUCCESS);
168 }
169 }
170
171 /*
172 * idm_ini_conn_create
173 *
174 * This function is invoked by the iSCSI layer to create a connection context.
175 * This does not actually establish the socket connection.
176 *
177 * cr - Connection request parameters
178 * new_con - Output parameter that contains the new request if successful
179 *
180 */
181 idm_status_t
182 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
183 {
184 idm_transport_t *it;
185 idm_conn_t *ic;
186 int rc;
187
188 it = idm_transport_lookup(cr);
189
190 retry:
191 ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
192 &cr->icr_conn_ops);
193
194 bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
195 sizeof (cr->cr_ini_dst_addr));
196
197 /* create the transport-specific connection components */
198 rc = it->it_ops->it_ini_conn_create(cr, ic);
199 if (rc != IDM_STATUS_SUCCESS) {
200 /* cleanup the failed connection */
201 idm_conn_destroy_common(ic);
202
203 /*
204 * It is possible for an IB client to connect to
205 * an ethernet-only client via an IB-eth gateway.
206 * Therefore, if we are attempting to use iSER and
207 * fail, retry with sockets before ultimately
208 * failing the connection.
209 */
210 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
211 it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
212 goto retry;
213 }
214
215 return (IDM_STATUS_FAIL);
216 }
217
218 *new_con = ic;
219
220 mutex_enter(&idm.idm_global_mutex);
221 list_insert_tail(&idm.idm_ini_conn_list, ic);
222 mutex_exit(&idm.idm_global_mutex);
223
224 return (IDM_STATUS_SUCCESS);
225 }
226
227 /*
228 * idm_ini_conn_destroy
229 *
230 * Releases any resources associated with the connection. This is the
231 * complement to idm_ini_conn_create.
232 * ic - idm_conn_t structure representing the relevant connection
233 *
234 */
235 void
236 idm_ini_conn_destroy_task(void *ic_void)
237 {
238 idm_conn_t *ic = ic_void;
239
240 ic->ic_transport_ops->it_ini_conn_destroy(ic);
241 idm_conn_destroy_common(ic);
242 }
243
244 void
245 idm_ini_conn_destroy(idm_conn_t *ic)
246 {
247 /*
248 * It's reasonable for the initiator to call idm_ini_conn_destroy
249 * from within the context of the CN_CONNECT_DESTROY notification.
250 * That's a problem since we want to destroy the taskq for the
251 * state machine associated with the connection. Remove the
252 * connection from the list right away then handle the remaining
253 * work via the idm_global_taskq.
254 */
255 mutex_enter(&idm.idm_global_mutex);
256 list_remove(&idm.idm_ini_conn_list, ic);
257 mutex_exit(&idm.idm_global_mutex);
258
259 if (taskq_dispatch(idm.idm_global_taskq,
260 &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
261 cmn_err(CE_WARN,
262 "idm_ini_conn_destroy: Couldn't dispatch task");
263 }
264 }
265
266 /*
267 * idm_ini_conn_connect
268 *
269 * Establish connection to the remote system identified in idm_conn_t.
270 * The connection parameters including the remote IP address were established
271 * in the call to idm_ini_conn_create. The IDM state machine will
272 * perform client notifications as necessary to prompt the initiator through
273 * the login process. IDM also keeps a timer running so that if the login
274 * process doesn't complete in a timely manner it will fail.
275 *
276 * ic - idm_conn_t structure representing the relevant connection
277 *
278 * Returns success if the connection was established, otherwise some kind
279 * of meaningful error code.
280 *
281 * Upon return the login has either failed or is loggin in (ffp)
282 */
283 idm_status_t
284 idm_ini_conn_connect(idm_conn_t *ic)
285 {
286 idm_status_t rc;
287
288 rc = idm_conn_sm_init(ic);
289 if (rc != IDM_STATUS_SUCCESS) {
290 return (ic->ic_conn_sm_status);
291 }
292
293 /* Hold connection until we return */
294 idm_conn_hold(ic);
295
296 /* Kick state machine */
297 idm_conn_event(ic, CE_CONNECT_REQ, NULL);
298
299 /* Wait for login flag */
300 mutex_enter(&ic->ic_state_mutex);
301 while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
302 !(ic->ic_state_flags & CF_ERROR)) {
303 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
304 }
305
306 /*
307 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to
308 * idm_notify_client has already been generated by the idm conn
309 * state machine. If connection fails any time after this
310 * check, we will detect it in iscsi_login.
311 */
312 if (ic->ic_state_flags & CF_ERROR) {
313 rc = ic->ic_conn_sm_status;
314 }
315 mutex_exit(&ic->ic_state_mutex);
316 idm_conn_rele(ic);
317
318 return (rc);
319 }
320
321 /*
322 * idm_ini_conn_disconnect
323 *
324 * Forces a connection (previously established using idm_ini_conn_connect)
325 * to perform a controlled shutdown, cleaning up any outstanding requests.
326 *
327 * ic - idm_conn_t structure representing the relevant connection
328 *
329 * This is asynchronous and will return before the connection is properly
330 * shutdown
331 */
332 /* ARGSUSED */
333 void
334 idm_ini_conn_disconnect(idm_conn_t *ic)
335 {
336 idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
337 }
338
339 /*
340 * idm_ini_conn_disconnect_wait
341 *
342 * Forces a connection (previously established using idm_ini_conn_connect)
343 * to perform a controlled shutdown. Blocks until the connection is
344 * disconnected.
345 *
346 * ic - idm_conn_t structure representing the relevant connection
347 */
348 /* ARGSUSED */
349 void
350 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
351 {
352 mutex_enter(&ic->ic_state_mutex);
353 if ((ic->ic_state != CS_S9_INIT_ERROR) &&
354 (ic->ic_state != CS_S11_COMPLETE)) {
355 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
356 while ((ic->ic_state != CS_S9_INIT_ERROR) &&
357 (ic->ic_state != CS_S11_COMPLETE))
358 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
359 }
360 mutex_exit(&ic->ic_state_mutex);
361 }
362
363 /*
364 * idm_tgt_svc_create
365 *
366 * The target calls this service to obtain a service context for each available
367 * transport, starting a service of each type related to the IP address and port
368 * passed. The idm_svc_req_t contains the service parameters.
369 */
370 idm_status_t
371 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
372 {
373 idm_transport_type_t type;
374 idm_transport_t *it;
375 idm_svc_t *is;
376 int rc;
377
378 *new_svc = NULL;
379 is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
380
381 /* Initialize transport-agnostic components of the service handle */
382 is->is_svc_req = *sr;
383 mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
384 cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
385 mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
386 cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
387 idm_refcnt_init(&is->is_refcnt, is);
388
389 /*
390 * Make sure all available transports are setup. We call this now
391 * instead of at initialization time in case IB has become available
392 * since we started (hotplug, etc).
393 */
394 idm_transport_setup(sr->sr_li, B_FALSE);
395
396 /*
397 * Loop through the transports, configuring the transport-specific
398 * components of each one.
399 */
400 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
401
402 it = &idm_transport_list[type];
403 /*
404 * If it_ops is NULL then the transport is unconfigured
405 * and we shouldn't try to start the service.
406 */
407 if (it->it_ops == NULL) {
408 continue;
409 }
410
411 rc = it->it_ops->it_tgt_svc_create(sr, is);
412 if (rc != IDM_STATUS_SUCCESS) {
413 /* Teardown any configured services */
414 while (type--) {
415 it = &idm_transport_list[type];
416 if (it->it_ops == NULL) {
417 continue;
418 }
419 it->it_ops->it_tgt_svc_destroy(is);
420 }
421 /* Free the svc context and return */
422 kmem_free(is, sizeof (idm_svc_t));
423 return (rc);
424 }
425 }
426
427 *new_svc = is;
428
429 mutex_enter(&idm.idm_global_mutex);
430 list_insert_tail(&idm.idm_tgt_svc_list, is);
431 mutex_exit(&idm.idm_global_mutex);
432
433 return (IDM_STATUS_SUCCESS);
434 }
435
436 /*
437 * idm_tgt_svc_destroy
438 *
439 * is - idm_svc_t returned by the call to idm_tgt_svc_create
440 *
441 * Cleanup any resources associated with the idm_svc_t.
442 */
443 void
444 idm_tgt_svc_destroy(idm_svc_t *is)
445 {
446 idm_transport_type_t type;
447 idm_transport_t *it;
448
449 mutex_enter(&idm.idm_global_mutex);
450 /* remove this service from the global list */
451 list_remove(&idm.idm_tgt_svc_list, is);
452 /* wakeup any waiters for service change */
453 cv_broadcast(&idm.idm_tgt_svc_cv);
454 mutex_exit(&idm.idm_global_mutex);
455
456 /* teardown each transport-specific service */
457 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
458 it = &idm_transport_list[type];
459 if (it->it_ops == NULL) {
460 continue;
461 }
462
463 it->it_ops->it_tgt_svc_destroy(is);
464 }
465
466 /* tear down the svc resources */
467 idm_refcnt_destroy(&is->is_refcnt);
468 cv_destroy(&is->is_count_cv);
469 mutex_destroy(&is->is_count_mutex);
470 cv_destroy(&is->is_cv);
471 mutex_destroy(&is->is_mutex);
472
473 /* free the svc handle */
474 kmem_free(is, sizeof (idm_svc_t));
475 }
476
477 void
478 idm_tgt_svc_hold(idm_svc_t *is)
479 {
480 idm_refcnt_hold(&is->is_refcnt);
481 }
482
483 void
484 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
485 {
486 idm_refcnt_rele_and_destroy(&is->is_refcnt,
487 (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
488 }
489
490 /*
491 * idm_tgt_svc_online
492 *
493 * is - idm_svc_t returned by the call to idm_tgt_svc_create
494 *
495 * Online each transport service, as we want this target to be accessible
496 * via any configured transport.
497 *
498 * When the initiator establishes a new connection to the target, IDM will
499 * call the "new connect" callback defined in the idm_svc_req_t structure
500 * and it will pass an idm_conn_t structure representing that new connection.
501 */
502 idm_status_t
503 idm_tgt_svc_online(idm_svc_t *is)
504 {
505
506 idm_transport_type_t type, last_type;
507 idm_transport_t *it;
508 int rc = IDM_STATUS_SUCCESS;
509
510 mutex_enter(&is->is_mutex);
511 if (is->is_online == 0) {
512 /* Walk through each of the transports and online them */
513 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
514 it = &idm_transport_list[type];
515 if (it->it_ops == NULL) {
516 /* transport is not registered */
517 continue;
518 }
519
520 mutex_exit(&is->is_mutex);
521 rc = it->it_ops->it_tgt_svc_online(is);
522 mutex_enter(&is->is_mutex);
523 if (rc != IDM_STATUS_SUCCESS) {
524 last_type = type;
525 break;
526 }
527 }
528 if (rc != IDM_STATUS_SUCCESS) {
529 /*
530 * The last transport failed to online.
531 * Offline any transport onlined above and
532 * do not online the target.
533 */
534 for (type = 0; type < last_type; type++) {
535 it = &idm_transport_list[type];
536 if (it->it_ops == NULL) {
537 /* transport is not registered */
538 continue;
539 }
540
541 mutex_exit(&is->is_mutex);
542 it->it_ops->it_tgt_svc_offline(is);
543 mutex_enter(&is->is_mutex);
544 }
545 } else {
546 /* Target service now online */
547 is->is_online = 1;
548 }
549 } else {
550 /* Target service already online, just bump the count */
551 is->is_online++;
552 }
553 mutex_exit(&is->is_mutex);
554
555 return (rc);
556 }
557
558 /*
559 * idm_tgt_svc_offline
560 *
561 * is - idm_svc_t returned by the call to idm_tgt_svc_create
562 *
563 * Shutdown any online target services.
564 */
565 void
566 idm_tgt_svc_offline(idm_svc_t *is)
567 {
568 idm_transport_type_t type;
569 idm_transport_t *it;
570
571 mutex_enter(&is->is_mutex);
572 is->is_online--;
573 if (is->is_online == 0) {
574 /* Walk through each of the transports and offline them */
575 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
576 it = &idm_transport_list[type];
577 if (it->it_ops == NULL) {
578 /* transport is not registered */
579 continue;
580 }
581
582 mutex_exit(&is->is_mutex);
583 it->it_ops->it_tgt_svc_offline(is);
584 mutex_enter(&is->is_mutex);
585 }
586 }
587 mutex_exit(&is->is_mutex);
588 }
589
590 /*
591 * idm_tgt_svc_lookup
592 *
593 * Lookup a service instance listening on the specified port
594 */
595
596 idm_svc_t *
597 idm_tgt_svc_lookup(uint16_t port)
598 {
599 idm_svc_t *result;
600
601 retry:
602 mutex_enter(&idm.idm_global_mutex);
603 for (result = list_head(&idm.idm_tgt_svc_list);
604 result != NULL;
605 result = list_next(&idm.idm_tgt_svc_list, result)) {
606 if (result->is_svc_req.sr_port == port) {
607 if (result->is_online == 0) {
608 /*
609 * A service exists on this port, but it
610 * is going away, wait for it to cleanup.
611 */
612 cv_wait(&idm.idm_tgt_svc_cv,
613 &idm.idm_global_mutex);
614 mutex_exit(&idm.idm_global_mutex);
615 goto retry;
616 }
617 idm_tgt_svc_hold(result);
618 mutex_exit(&idm.idm_global_mutex);
619 return (result);
620 }
621 }
622 mutex_exit(&idm.idm_global_mutex);
623
624 return (NULL);
625 }
626
627 /*
628 * idm_negotiate_key_values()
629 * Give IDM level a chance to negotiate any login parameters it should own.
630 * -- leave unhandled parameters alone on request_nvl
631 * -- move all handled parameters to response_nvl with an appropriate response
632 * -- also add an entry to negotiated_nvl for any accepted parameters
633 */
634 kv_status_t
635 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
636 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
637 {
638 ASSERT(ic->ic_transport_ops != NULL);
639 return (ic->ic_transport_ops->it_negotiate_key_values(ic,
640 request_nvl, response_nvl, negotiated_nvl));
641 }
642
643 /*
644 * idm_notice_key_values()
645 * Activate at the IDM level any parameters that have been negotiated.
646 * Passes the set of key value pairs to the transport for activation.
647 * This will be invoked as the connection is entering full-feature mode.
648 */
649 void
650 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
651 {
652 ASSERT(ic->ic_transport_ops != NULL);
653 ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
654 }
655
656 /*
657 * idm_declare_key_values()
658 * Activate an operational set of declarative parameters from the config_nvl,
659 * and return the selected values in the outgoing_nvl.
660 */
661 kv_status_t
662 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
663 nvlist_t *outgoing_nvl)
664 {
665 ASSERT(ic->ic_transport_ops != NULL);
666 return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl,
667 outgoing_nvl));
668 }
669
670 /*
671 * idm_buf_tx_to_ini
672 *
673 * This is IDM's implementation of the 'Put_Data' operational primitive.
674 *
675 * This function is invoked by a target iSCSI layer to request its local
676 * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
677 * on the remote iSCSI node. The I/O buffer represented by 'idb' is
678 * transferred to the initiator associated with task 'idt'. The connection
679 * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
680 * and the callback (idb->idb_buf_cb) at transfer completion are
681 * provided as input.
682 *
683 * This data transfer takes place transparently to the remote iSCSI layer,
684 * i.e. without its participation.
685 *
686 * Using sockets, IDM implements the data transfer by segmenting the data
687 * buffer into appropriately sized iSCSI PDUs and transmitting them to the
688 * initiator. iSER performs the transfer using RDMA write.
689 *
690 */
691 idm_status_t
692 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
693 uint32_t offset, uint32_t xfer_len,
694 idm_buf_cb_t idb_buf_cb, void *cb_arg)
695 {
696 idm_status_t rc;
697
698 idb->idb_bufoffset = offset;
699 idb->idb_xfer_len = xfer_len;
700 idb->idb_buf_cb = idb_buf_cb;
701 idb->idb_cb_arg = cb_arg;
702 gethrestime(&idb->idb_xfer_start);
703
704 /*
705 * Buffer should not contain the pattern. If the pattern is
706 * present then we've been asked to transmit initialized data
707 */
708 IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
709
710 mutex_enter(&idt->idt_mutex);
711 switch (idt->idt_state) {
712 case TASK_ACTIVE:
713 idt->idt_tx_to_ini_start++;
714 idm_task_hold(idt);
715 idm_buf_bind_in_locked(idt, idb);
716 idb->idb_in_transport = B_TRUE;
717 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
718 (idt, idb);
719 return (rc);
720
721 case TASK_SUSPENDING:
722 case TASK_SUSPENDED:
723 /*
724 * Bind buffer but don't start a transfer since the task
725 * is suspended
726 */
727 idm_buf_bind_in_locked(idt, idb);
728 mutex_exit(&idt->idt_mutex);
729 return (IDM_STATUS_SUCCESS);
730
731 case TASK_ABORTING:
732 case TASK_ABORTED:
733 /*
734 * Once the task is aborted, any buffers added to the
735 * idt_inbufv will never get cleaned up, so just return
736 * SUCCESS. The buffer should get cleaned up by the
737 * client or framework once task_aborted has completed.
738 */
739 mutex_exit(&idt->idt_mutex);
740 return (IDM_STATUS_SUCCESS);
741
742 default:
743 ASSERT(0);
744 break;
745 }
746 mutex_exit(&idt->idt_mutex);
747
748 return (IDM_STATUS_FAIL);
749 }
750
751 /*
752 * idm_buf_rx_from_ini
753 *
754 * This is IDM's implementation of the 'Get_Data' operational primitive.
755 *
756 * This function is invoked by a target iSCSI layer to request its local
757 * Datamover layer to retrieve certain data identified by the R2T PDU from the
758 * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
759 * mapped to the respective buffer by the task tags (ITT & TTT).
760 * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
761 * the callback (idb->idb_buf_cb) notification for data transfer completion are
762 * are provided as input.
763 *
764 * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
765 * Datamover layer, the local and remote Datamover layers transparently bring
766 * about the data transfer requested by the R2T PDU, without the participation
767 * of the iSCSI layers.
768 *
769 * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
770 * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
771 *
772 */
773 idm_status_t
774 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
775 uint32_t offset, uint32_t xfer_len,
776 idm_buf_cb_t idb_buf_cb, void *cb_arg)
777 {
778 idm_status_t rc;
779
780 idb->idb_bufoffset = offset;
781 idb->idb_xfer_len = xfer_len;
782 idb->idb_buf_cb = idb_buf_cb;
783 idb->idb_cb_arg = cb_arg;
784 gethrestime(&idb->idb_xfer_start);
785
786 /*
787 * "In" buf list is for "Data In" PDU's, "Out" buf list is for
788 * "Data Out" PDU's
789 */
790 mutex_enter(&idt->idt_mutex);
791 switch (idt->idt_state) {
792 case TASK_ACTIVE:
793 idt->idt_rx_from_ini_start++;
794 idm_task_hold(idt);
795 idm_buf_bind_out_locked(idt, idb);
796 idb->idb_in_transport = B_TRUE;
797 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
798 (idt, idb);
799 return (rc);
800 case TASK_SUSPENDING:
801 case TASK_SUSPENDED:
802 case TASK_ABORTING:
803 case TASK_ABORTED:
804 /*
805 * Bind buffer but don't start a transfer since the task
806 * is suspended
807 */
808 idm_buf_bind_out_locked(idt, idb);
809 mutex_exit(&idt->idt_mutex);
810 return (IDM_STATUS_SUCCESS);
811 default:
812 ASSERT(0);
813 break;
814 }
815 mutex_exit(&idt->idt_mutex);
816
817 return (IDM_STATUS_FAIL);
818 }
819
820 /*
821 * idm_buf_tx_to_ini_done
822 *
823 * The transport calls this after it has completed a transfer requested by
824 * a call to transport_buf_tx_to_ini
825 *
826 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
827 * idt may be freed after the call to idb->idb_buf_cb.
828 */
829 void
830 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
831 {
832 ASSERT(mutex_owned(&idt->idt_mutex));
833 idb->idb_in_transport = B_FALSE;
834 idb->idb_tx_thread = B_FALSE;
835 idt->idt_tx_to_ini_done++;
836 gethrestime(&idb->idb_xfer_done);
837
838 /*
839 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
840 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
841 * to 0.
842 */
843 idm_task_rele(idt);
844 idb->idb_status = status;
845
846 switch (idt->idt_state) {
847 case TASK_ACTIVE:
848 idt->idt_ic->ic_timestamp = ddi_get_lbolt();
849 idm_buf_unbind_in_locked(idt, idb);
850 mutex_exit(&idt->idt_mutex);
851 (*idb->idb_buf_cb)(idb, status);
852 return;
853 case TASK_SUSPENDING:
854 case TASK_SUSPENDED:
855 case TASK_ABORTING:
856 case TASK_ABORTED:
857 /*
858 * To keep things simple we will ignore the case where the
859 * transfer was successful and leave all buffers bound to the
860 * task. This allows us to also ignore the case where we've
861 * been asked to abort a task but the last transfer of the
862 * task has completed. IDM has no idea whether this was, in
863 * fact, the last transfer of the task so it would be difficult
864 * to handle this case. Everything should get sorted out again
865 * after task reassignment is complete.
866 *
867 * In the case of TASK_ABORTING we could conceivably call the
868 * buffer callback here but the timing of when the client's
869 * client_task_aborted callback is invoked vs. when the client's
870 * buffer callback gets invoked gets sticky. We don't want
871 * the client to here from us again after the call to
872 * client_task_aborted() but we don't want to give it a bunch
873 * of failed buffer transfers until we've called
874 * client_task_aborted(). Instead we'll just leave all the
875 * buffers bound and allow the client to cleanup.
876 */
877 break;
878 default:
879 ASSERT(0);
880 }
881 mutex_exit(&idt->idt_mutex);
882 }
883
884 /*
885 * idm_buf_rx_from_ini_done
886 *
887 * The transport calls this after it has completed a transfer requested by
888 * a call totransport_buf_tx_to_ini
889 *
890 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
891 * idt may be freed after the call to idb->idb_buf_cb.
892 */
893 void
894 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
895 {
896 ASSERT(mutex_owned(&idt->idt_mutex));
897 idb->idb_in_transport = B_FALSE;
898 idt->idt_rx_from_ini_done++;
899 gethrestime(&idb->idb_xfer_done);
900
901 /*
902 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
903 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
904 * to 0.
905 */
906 idm_task_rele(idt);
907 idb->idb_status = status;
908
909 if (status == IDM_STATUS_SUCCESS) {
910 /*
911 * Buffer should not contain the pattern. If it does then
912 * we did not get the data from the remote host.
913 */
914 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
915 }
916
917 switch (idt->idt_state) {
918 case TASK_ACTIVE:
919 idt->idt_ic->ic_timestamp = ddi_get_lbolt();
920 idm_buf_unbind_out_locked(idt, idb);
921 mutex_exit(&idt->idt_mutex);
922 (*idb->idb_buf_cb)(idb, status);
923 return;
924 case TASK_SUSPENDING:
925 case TASK_SUSPENDED:
926 case TASK_ABORTING:
927 case TASK_ABORTED:
928 /*
929 * To keep things simple we will ignore the case where the
930 * transfer was successful and leave all buffers bound to the
931 * task. This allows us to also ignore the case where we've
932 * been asked to abort a task but the last transfer of the
933 * task has completed. IDM has no idea whether this was, in
934 * fact, the last transfer of the task so it would be difficult
935 * to handle this case. Everything should get sorted out again
936 * after task reassignment is complete.
937 *
938 * In the case of TASK_ABORTING we could conceivably call the
939 * buffer callback here but the timing of when the client's
940 * client_task_aborted callback is invoked vs. when the client's
941 * buffer callback gets invoked gets sticky. We don't want
942 * the client to here from us again after the call to
943 * client_task_aborted() but we don't want to give it a bunch
944 * of failed buffer transfers until we've called
945 * client_task_aborted(). Instead we'll just leave all the
946 * buffers bound and allow the client to cleanup.
947 */
948 break;
949 default:
950 ASSERT(0);
951 }
952 mutex_exit(&idt->idt_mutex);
953 }
954
955 /*
956 * idm_buf_alloc
957 *
958 * Allocates a buffer handle and registers it for use with the transport
959 * layer. If a buffer is not passed on bufptr, the buffer will be allocated
960 * as well as the handle.
961 *
962 * ic - connection on which the buffer will be transferred
963 * bufptr - allocate memory for buffer if NULL, else assign to buffer
964 * buflen - length of buffer
965 *
966 * Returns idm_buf_t handle if successful, otherwise NULL
967 */
968 idm_buf_t *
969 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
970 {
971 idm_buf_t *buf = NULL;
972 int rc;
973
974 ASSERT(ic != NULL);
975 ASSERT(idm.idm_buf_cache != NULL);
976 ASSERT(buflen > 0);
977
978 /* Don't allocate new buffers if we are not in FFP */
979 mutex_enter(&ic->ic_state_mutex);
980 if (!ic->ic_ffp) {
981 mutex_exit(&ic->ic_state_mutex);
982 return (NULL);
983 }
984
985
986 idm_conn_hold(ic);
987 mutex_exit(&ic->ic_state_mutex);
988
989 buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
990 if (buf == NULL) {
991 idm_conn_rele(ic);
992 return (NULL);
993 }
994
995 buf->idb_ic = ic;
996 buf->idb_buflen = buflen;
997 buf->idb_exp_offset = 0;
998 buf->idb_bufoffset = 0;
999 buf->idb_xfer_len = 0;
1000 buf->idb_magic = IDM_BUF_MAGIC;
1001 buf->idb_in_transport = B_FALSE;
1002 buf->idb_bufbcopy = B_FALSE;
1003
1004 /*
1005 * If bufptr is NULL, we have an implicit request to allocate
1006 * memory for this IDM buffer handle and register it for use
1007 * with the transport. To simplify this, and to give more freedom
1008 * to the transport layer for it's own buffer management, both of
1009 * these actions will take place in the transport layer.
1010 * If bufptr is set, then the caller has allocated memory (or more
1011 * likely it's been passed from an upper layer), and we need only
1012 * register the buffer for use with the transport layer.
1013 */
1014 if (bufptr == NULL) {
1015 /*
1016 * Allocate a buffer from the transport layer (which
1017 * will also register the buffer for use).
1018 */
1019 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1020 if (rc != 0) {
1021 idm_conn_rele(ic);
1022 kmem_cache_free(idm.idm_buf_cache, buf);
1023 return (NULL);
1024 }
1025 /* Set the bufalloc'd flag */
1026 buf->idb_bufalloc = B_TRUE;
1027 } else {
1028 /*
1029 * For large transfers, Set the passed bufptr into
1030 * the buf handle, and register the handle with the
1031 * transport layer. As memory registration with the
1032 * transport layer is a time/cpu intensive operation,
1033 * for small transfers (up to a pre-defined bcopy
1034 * threshold), use pre-registered memory buffers
1035 * and bcopy data at the appropriate time.
1036 */
1037 buf->idb_buf = bufptr;
1038
1039 rc = ic->ic_transport_ops->it_buf_setup(buf);
1040 if (rc != 0) {
1041 idm_conn_rele(ic);
1042 kmem_cache_free(idm.idm_buf_cache, buf);
1043 return (NULL);
1044 }
1045 /*
1046 * The transport layer is now expected to set the idb_bufalloc
1047 * correctly to indicate if resources have been allocated.
1048 */
1049 }
1050
1051 IDM_BUFPAT_SET(buf);
1052
1053 return (buf);
1054 }
1055
1056 /*
1057 * idm_buf_free
1058 *
1059 * Release a buffer handle along with the associated buffer that was allocated
1060 * or assigned with idm_buf_alloc
1061 */
1062 void
1063 idm_buf_free(idm_buf_t *buf)
1064 {
1065 idm_conn_t *ic = buf->idb_ic;
1066
1067
1068 buf->idb_task_binding = NULL;
1069
1070 if (buf->idb_bufalloc) {
1071 ic->ic_transport_ops->it_buf_free(buf);
1072 } else {
1073 ic->ic_transport_ops->it_buf_teardown(buf);
1074 }
1075 kmem_cache_free(idm.idm_buf_cache, buf);
1076 idm_conn_rele(ic);
1077 }
1078
1079 /*
1080 * idm_buf_bind_in
1081 *
1082 * This function associates a buffer with a task. This is only for use by the
1083 * iSCSI initiator that will have only one buffer per transfer direction
1084 *
1085 */
1086 void
1087 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1088 {
1089 mutex_enter(&idt->idt_mutex);
1090 idm_buf_bind_in_locked(idt, buf);
1091 mutex_exit(&idt->idt_mutex);
1092 }
1093
1094 static void
1095 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1096 {
1097 buf->idb_task_binding = idt;
1098 buf->idb_ic = idt->idt_ic;
1099 idm_listbuf_insert(&idt->idt_inbufv, buf);
1100 }
1101
1102 void
1103 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1104 {
1105 /*
1106 * For small transfers, the iSER transport delegates the IDM
1107 * layer to bcopy the SCSI Write data for faster IOPS.
1108 */
1109 if (buf->idb_bufbcopy == B_TRUE) {
1110
1111 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1112 }
1113 mutex_enter(&idt->idt_mutex);
1114 idm_buf_bind_out_locked(idt, buf);
1115 mutex_exit(&idt->idt_mutex);
1116 }
1117
1118 static void
1119 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1120 {
1121 buf->idb_task_binding = idt;
1122 buf->idb_ic = idt->idt_ic;
1123 idm_listbuf_insert(&idt->idt_outbufv, buf);
1124 }
1125
1126 void
1127 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1128 {
1129 /*
1130 * For small transfers, the iSER transport delegates the IDM
1131 * layer to bcopy the SCSI Read data into the read buufer
1132 * for faster IOPS.
1133 */
1134 if (buf->idb_bufbcopy == B_TRUE) {
1135 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1136 }
1137 mutex_enter(&idt->idt_mutex);
1138 idm_buf_unbind_in_locked(idt, buf);
1139 mutex_exit(&idt->idt_mutex);
1140 }
1141
1142 static void
1143 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1144 {
1145 list_remove(&idt->idt_inbufv, buf);
1146 }
1147
1148 void
1149 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1150 {
1151 mutex_enter(&idt->idt_mutex);
1152 idm_buf_unbind_out_locked(idt, buf);
1153 mutex_exit(&idt->idt_mutex);
1154 }
1155
1156 static void
1157 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1158 {
1159 list_remove(&idt->idt_outbufv, buf);
1160 }
1161
1162 /*
1163 * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1164 * iSCSI PDU
1165 */
1166 idm_buf_t *
1167 idm_buf_find(void *lbuf, size_t data_offset)
1168 {
1169 idm_buf_t *idb;
1170 list_t *lst = (list_t *)lbuf;
1171
1172 /* iterate through the list to find the buffer */
1173 for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1174
1175 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1176 (idb->idb_bufoffset == 0));
1177
1178 if ((data_offset >= idb->idb_bufoffset) &&
1179 (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1180
1181 return (idb);
1182 }
1183 }
1184
1185 return (NULL);
1186 }
1187
1188 void
1189 idm_bufpat_set(idm_buf_t *idb)
1190 {
1191 idm_bufpat_t *bufpat;
1192 int len, i;
1193
1194 len = idb->idb_buflen;
1195 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1196
1197 bufpat = idb->idb_buf;
1198 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1199 bufpat->bufpat_idb = idb;
1200 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1201 bufpat->bufpat_offset = i;
1202 bufpat++;
1203 }
1204 }
1205
1206 boolean_t
1207 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1208 {
1209 idm_bufpat_t *bufpat;
1210 int len, i;
1211
1212 len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1213 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1214 ASSERT(len <= idb->idb_buflen);
1215 bufpat = idb->idb_buf;
1216
1217 /*
1218 * Don't check the pattern in buffers that came from outside IDM
1219 * (these will be buffers from the initiator that we opted not
1220 * to double-buffer)
1221 */
1222 if (!idb->idb_bufalloc)
1223 return (B_FALSE);
1224
1225 /*
1226 * Return true if we find the pattern anywhere in the buffer
1227 */
1228 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1229 if (BUFPAT_MATCH(bufpat, idb)) {
1230 IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1231 "idb %p bufpat %p "
1232 "bufpat_idb=%p bufmagic=%08x offset=%08x",
1233 (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1234 bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1235 DTRACE_PROBE2(bufpat__pattern__found,
1236 idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1237 if (type == BP_CHECK_ASSERT) {
1238 ASSERT(0);
1239 }
1240 return (B_TRUE);
1241 }
1242 bufpat++;
1243 }
1244
1245 return (B_FALSE);
1246 }
1247
1248 /*
1249 * idm_task_alloc
1250 *
1251 * This function will allocate a idm_task_t structure. A task tag is also
1252 * generated and saved in idt_tt. The task is not active.
1253 */
1254 idm_task_t *
1255 idm_task_alloc(idm_conn_t *ic)
1256 {
1257 idm_task_t *idt;
1258
1259 ASSERT(ic != NULL);
1260
1261 /* Don't allocate new tasks if we are not in FFP */
1262 if (!ic->ic_ffp) {
1263 return (NULL);
1264 }
1265 idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1266 if (idt == NULL) {
1267 return (NULL);
1268 }
1269
1270 ASSERT(list_is_empty(&idt->idt_inbufv));
1271 ASSERT(list_is_empty(&idt->idt_outbufv));
1272
1273 mutex_enter(&ic->ic_state_mutex);
1274 if (!ic->ic_ffp) {
1275 mutex_exit(&ic->ic_state_mutex);
1276 kmem_cache_free(idm.idm_task_cache, idt);
1277 return (NULL);
1278 }
1279 idm_conn_hold(ic);
1280 mutex_exit(&ic->ic_state_mutex);
1281
1282 idt->idt_state = TASK_IDLE;
1283 idt->idt_ic = ic;
1284 idt->idt_private = NULL;
1285 idt->idt_exp_datasn = 0;
1286 idt->idt_exp_rttsn = 0;
1287 idt->idt_flags = 0;
1288 return (idt);
1289 }
1290
1291 /*
1292 * idm_task_start
1293 *
1294 * Mark the task active and initialize some stats. The caller
1295 * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1296 * The task service does not function as a task/work engine, it is the
1297 * responsibility of the initiator to start the data transfer and free the
1298 * resources.
1299 */
1300 void
1301 idm_task_start(idm_task_t *idt, uintptr_t handle)
1302 {
1303 ASSERT(idt != NULL);
1304
1305 /* mark the task as ACTIVE */
1306 idt->idt_state = TASK_ACTIVE;
1307 idt->idt_client_handle = handle;
1308 idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1309 idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1310 idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1311 }
1312
1313 /*
1314 * idm_task_done
1315 *
1316 * This function sets the state to indicate that the task is no longer active.
1317 */
1318 void
1319 idm_task_done(idm_task_t *idt)
1320 {
1321 ASSERT(idt != NULL);
1322
1323 mutex_enter(&idt->idt_mutex);
1324 idt->idt_state = TASK_IDLE;
1325 mutex_exit(&idt->idt_mutex);
1326
1327 /*
1328 * Although unlikely it is possible for a reference to come in after
1329 * the client has decided the task is over but before we've marked
1330 * the task idle. One specific unavoidable scenario is the case where
1331 * received PDU with the matching ITT/TTT results in a successful
1332 * lookup of this task. We are at the mercy of the remote node in
1333 * that case so we need to handle it. Now that the task state
1334 * has changed no more references will occur so a simple call to
1335 * idm_refcnt_wait_ref should deal with the situation.
1336 */
1337 idm_refcnt_wait_ref(&idt->idt_refcnt);
1338 idm_refcnt_reset(&idt->idt_refcnt);
1339 }
1340
1341 /*
1342 * idm_task_free
1343 *
1344 * This function will free the Task Tag and the memory allocated for the task
1345 * idm_task_done should be called prior to this call
1346 */
1347 void
1348 idm_task_free(idm_task_t *idt)
1349 {
1350 idm_conn_t *ic;
1351
1352 ASSERT(idt != NULL);
1353 ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1354 ASSERT(idt->idt_state == TASK_IDLE);
1355
1356 ic = idt->idt_ic;
1357
1358 /*
1359 * It's possible for items to still be in the idt_inbufv list if
1360 * they were added after idm_free_task_rsrc was called. We rely on
1361 * STMF to free all buffers associated with the task however STMF
1362 * doesn't know that we have this reference to the buffers.
1363 * Use list_create so that we don't end up with stale references
1364 * to these buffers.
1365 */
1366 list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1367 offsetof(idm_buf_t, idb_buflink));
1368 list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1369 offsetof(idm_buf_t, idb_buflink));
1370
1371 kmem_cache_free(idm.idm_task_cache, idt);
1372
1373 idm_conn_rele(ic);
1374 }
1375
1376 /*
1377 * idm_task_find_common
1378 * common code for idm_task_find() and idm_task_find_and_complete()
1379 */
1380 /*ARGSUSED*/
1381 static idm_task_t *
1382 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1383 boolean_t complete)
1384 {
1385 uint32_t tt, client_handle;
1386 idm_task_t *idt;
1387
1388 /*
1389 * Must match both itt and ttt. The table is indexed by itt
1390 * for initiator connections and ttt for target connections.
1391 */
1392 if (IDM_CONN_ISTGT(ic)) {
1393 tt = ttt;
1394 client_handle = itt;
1395 } else {
1396 tt = itt;
1397 client_handle = ttt;
1398 }
1399
1400 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1401 if (tt >= idm.idm_taskid_max) {
1402 rw_exit(&idm.idm_taskid_table_lock);
1403 return (NULL);
1404 }
1405
1406 idt = idm.idm_taskid_table[tt];
1407
1408 if (idt != NULL) {
1409 mutex_enter(&idt->idt_mutex);
1410 if ((idt->idt_state != TASK_ACTIVE) ||
1411 (idt->idt_ic != ic) ||
1412 (IDM_CONN_ISTGT(ic) &&
1413 (idt->idt_client_handle != client_handle))) {
1414 /*
1415 * Task doesn't match or task is aborting and
1416 * we don't want any more references.
1417 */
1418 if ((idt->idt_ic != ic) &&
1419 (idt->idt_state == TASK_ACTIVE) &&
1420 (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1421 client_handle)) {
1422 IDM_CONN_LOG(CE_WARN,
1423 "idm_task_find: wrong connection %p != %p",
1424 (void *)ic, (void *)idt->idt_ic);
1425 }
1426 mutex_exit(&idt->idt_mutex);
1427 rw_exit(&idm.idm_taskid_table_lock);
1428 return (NULL);
1429 }
1430 idm_task_hold(idt);
1431 /*
1432 * Set the task state to TASK_COMPLETE so it can no longer
1433 * be found or aborted.
1434 */
1435 if (B_TRUE == complete)
1436 idt->idt_state = TASK_COMPLETE;
1437 mutex_exit(&idt->idt_mutex);
1438 }
1439 rw_exit(&idm.idm_taskid_table_lock);
1440
1441 return (idt);
1442 }
1443
1444 /*
1445 * This function looks up a task by task tag.
1446 */
1447 idm_task_t *
1448 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1449 {
1450 return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1451 }
1452
1453 /*
1454 * This function looks up a task by task tag. If found, the task state
1455 * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1456 */
1457 idm_task_t *
1458 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1459 {
1460 return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1461 }
1462
1463 /*
1464 * idm_task_find_by_handle
1465 *
1466 * This function looks up a task by the client-private idt_client_handle.
1467 *
1468 * This function should NEVER be called in the performance path. It is
1469 * intended strictly for error recovery/task management.
1470 */
1471 /*ARGSUSED*/
1472 void *
1473 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1474 {
1475 idm_task_t *idt = NULL;
1476 int idx = 0;
1477
1478 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1479
1480 for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1481 idt = idm.idm_taskid_table[idx];
1482
1483 if (idt == NULL)
1484 continue;
1485
1486 mutex_enter(&idt->idt_mutex);
1487
1488 if (idt->idt_state != TASK_ACTIVE) {
1489 /*
1490 * Task is either in suspend, abort, or already
1491 * complete.
1492 */
1493 mutex_exit(&idt->idt_mutex);
1494 continue;
1495 }
1496
1497 if (idt->idt_client_handle == handle) {
1498 idm_task_hold(idt);
1499 mutex_exit(&idt->idt_mutex);
1500 break;
1501 }
1502
1503 mutex_exit(&idt->idt_mutex);
1504 }
1505
1506 rw_exit(&idm.idm_taskid_table_lock);
1507
1508 if ((idt == NULL) || (idx == idm.idm_taskid_max))
1509 return (NULL);
1510
1511 return (idt->idt_private);
1512 }
1513
1514 void
1515 idm_task_hold(idm_task_t *idt)
1516 {
1517 idm_refcnt_hold(&idt->idt_refcnt);
1518 }
1519
1520 void
1521 idm_task_rele(idm_task_t *idt)
1522 {
1523 idm_refcnt_rele(&idt->idt_refcnt);
1524 }
1525
1526 stmf_status_t
1527 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1528 {
1529 idm_task_t *task;
1530 int idx;
1531 stmf_status_t s = STMF_SUCCESS;
1532
1533 /*
1534 * Passing NULL as the task indicates that all tasks
1535 * for this connection should be aborted.
1536 */
1537 if (idt == NULL) {
1538 /*
1539 * Only the connection state machine should ask for
1540 * all tasks to abort and this should never happen in FFP.
1541 */
1542 ASSERT(!ic->ic_ffp);
1543 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1544 for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1545 task = idm.idm_taskid_table[idx];
1546 if (task == NULL)
1547 continue;
1548 mutex_enter(&task->idt_mutex);
1549 if ((task->idt_state != TASK_IDLE) &&
1550 (task->idt_state != TASK_COMPLETE) &&
1551 (task->idt_ic == ic)) {
1552 rw_exit(&idm.idm_taskid_table_lock);
1553 s = idm_task_abort_one(ic, task, abort_type);
1554 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1555 } else
1556 mutex_exit(&task->idt_mutex);
1557 }
1558 rw_exit(&idm.idm_taskid_table_lock);
1559 } else {
1560 mutex_enter(&idt->idt_mutex);
1561 s = idm_task_abort_one(ic, idt, abort_type);
1562 }
1563 return (s);
1564 }
1565
1566 static void
1567 idm_task_abort_unref_cb(void *ref)
1568 {
1569 idm_task_t *idt = ref;
1570
1571 mutex_enter(&idt->idt_mutex);
1572 switch (idt->idt_state) {
1573 case TASK_SUSPENDING:
1574 idt->idt_state = TASK_SUSPENDED;
1575 mutex_exit(&idt->idt_mutex);
1576 idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1577 return;
1578 case TASK_ABORTING:
1579 idt->idt_state = TASK_ABORTED;
1580 mutex_exit(&idt->idt_mutex);
1581 idm_task_aborted(idt, IDM_STATUS_ABORTED);
1582 return;
1583 default:
1584 mutex_exit(&idt->idt_mutex);
1585 ASSERT(0);
1586 break;
1587 }
1588 }
1589
1590 /*
1591 * Abort the idm task.
1592 * Caller must hold the task mutex, which will be released before return
1593 */
1594 static stmf_status_t
1595 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1596 {
1597 stmf_status_t s = STMF_SUCCESS;
1598
1599 /* Caller must hold connection mutex */
1600 ASSERT(mutex_owned(&idt->idt_mutex));
1601 switch (idt->idt_state) {
1602 case TASK_ACTIVE:
1603 switch (abort_type) {
1604 case AT_INTERNAL_SUSPEND:
1605 /* Call transport to release any resources */
1606 idt->idt_state = TASK_SUSPENDING;
1607 mutex_exit(&idt->idt_mutex);
1608 ic->ic_transport_ops->it_free_task_rsrc(idt);
1609
1610 /*
1611 * Wait for outstanding references. When all
1612 * references are released the callback will call
1613 * idm_task_aborted().
1614 */
1615 idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1616 &idm_task_abort_unref_cb);
1617 return (s);
1618 case AT_INTERNAL_ABORT:
1619 case AT_TASK_MGMT_ABORT:
1620 idt->idt_state = TASK_ABORTING;
1621 mutex_exit(&idt->idt_mutex);
1622 ic->ic_transport_ops->it_free_task_rsrc(idt);
1623
1624 /*
1625 * Wait for outstanding references. When all
1626 * references are released the callback will call
1627 * idm_task_aborted().
1628 */
1629 idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1630 &idm_task_abort_unref_cb);
1631 return (s);
1632 default:
1633 ASSERT(0);
1634 }
1635 break;
1636 case TASK_SUSPENDING:
1637 /* Already called transport_free_task_rsrc(); */
1638 switch (abort_type) {
1639 case AT_INTERNAL_SUSPEND:
1640 /* Already doing it */
1641 break;
1642 case AT_INTERNAL_ABORT:
1643 case AT_TASK_MGMT_ABORT:
1644 idt->idt_state = TASK_ABORTING;
1645 break;
1646 default:
1647 ASSERT(0);
1648 }
1649 break;
1650 case TASK_SUSPENDED:
1651 /* Already called transport_free_task_rsrc(); */
1652 switch (abort_type) {
1653 case AT_INTERNAL_SUSPEND:
1654 /* Already doing it */
1655 break;
1656 case AT_INTERNAL_ABORT:
1657 case AT_TASK_MGMT_ABORT:
1658 idt->idt_state = TASK_ABORTING;
1659 mutex_exit(&idt->idt_mutex);
1660
1661 /*
1662 * We could probably call idm_task_aborted directly
1663 * here but we may be holding the conn lock. It's
1664 * easier to just switch contexts. Even though
1665 * we shouldn't really have any references we'll
1666 * set the state to TASK_ABORTING instead of
1667 * TASK_ABORTED so we can use the same code path.
1668 */
1669 idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1670 &idm_task_abort_unref_cb);
1671 return (s);
1672 default:
1673 ASSERT(0);
1674 }
1675 break;
1676 case TASK_ABORTING:
1677 case TASK_ABORTED:
1678 switch (abort_type) {
1679 case AT_INTERNAL_SUSPEND:
1680 /* We're already past this point... */
1681 case AT_INTERNAL_ABORT:
1682 case AT_TASK_MGMT_ABORT:
1683 /* Already doing it */
1684 break;
1685 default:
1686 ASSERT(0);
1687 }
1688 break;
1689 case TASK_COMPLETE:
1690 idm_refcnt_wait_ref(&idt->idt_refcnt);
1691 s = STMF_ABORT_SUCCESS;
1692 break;
1693 default:
1694 ASSERT(0);
1695 }
1696 mutex_exit(&idt->idt_mutex);
1697
1698 return (s);
1699 }
1700
1701 static void
1702 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1703 {
1704 (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1705 }
1706
1707 /*
1708 * idm_pdu_tx
1709 *
1710 * This is IDM's implementation of the 'Send_Control' operational primitive.
1711 * This function is invoked by an initiator iSCSI layer requesting the transfer
1712 * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1713 * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1714 * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1715 * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1716 * are provided as input.
1717 *
1718 */
1719 void
1720 idm_pdu_tx(idm_pdu_t *pdu)
1721 {
1722 idm_conn_t *ic = pdu->isp_ic;
1723 iscsi_async_evt_hdr_t *async_evt;
1724
1725 /*
1726 * If we are in full-featured mode then route SCSI-related
1727 * commands to the appropriate function vector without checking
1728 * the connection state. We will only be in full-feature mode
1729 * when we are in an acceptable state for SCSI PDU's.
1730 *
1731 * We also need to ensure that there are no PDU events outstanding
1732 * on the state machine. Any non-SCSI PDU's received in full-feature
1733 * mode will result in PDU events and until these have been handled
1734 * we need to route all PDU's through the state machine as PDU
1735 * events to maintain ordering.
1736 *
1737 * Note that IDM cannot enter FFP mode until it processes in
1738 * its state machine the last xmit of the login process.
1739 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1740 * superfluous.
1741 */
1742 mutex_enter(&ic->ic_state_mutex);
1743 if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1744 mutex_exit(&ic->ic_state_mutex);
1745 switch (IDM_PDU_OPCODE(pdu)) {
1746 case ISCSI_OP_SCSI_RSP:
1747 /* Target only */
1748 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1749 iscsi_scsi_rsp_hdr_t *,
1750 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1751 idm_pdu_tx_forward(ic, pdu);
1752 return;
1753 case ISCSI_OP_SCSI_TASK_MGT_RSP:
1754 /* Target only */
1755 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1756 iscsi_text_rsp_hdr_t *,
1757 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1758 idm_pdu_tx_forward(ic, pdu);
1759 return;
1760 case ISCSI_OP_SCSI_DATA_RSP:
1761 /* Target only */
1762 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1763 iscsi_data_rsp_hdr_t *,
1764 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1765 idm_pdu_tx_forward(ic, pdu);
1766 return;
1767 case ISCSI_OP_RTT_RSP:
1768 /* Target only */
1769 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1770 iscsi_rtt_hdr_t *,
1771 (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1772 idm_pdu_tx_forward(ic, pdu);
1773 return;
1774 case ISCSI_OP_NOOP_IN:
1775 /* Target only */
1776 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1777 iscsi_nop_in_hdr_t *,
1778 (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1779 idm_pdu_tx_forward(ic, pdu);
1780 return;
1781 case ISCSI_OP_TEXT_RSP:
1782 /* Target only */
1783 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1784 iscsi_text_rsp_hdr_t *,
1785 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1786 idm_pdu_tx_forward(ic, pdu);
1787 return;
1788 case ISCSI_OP_TEXT_CMD:
1789 case ISCSI_OP_NOOP_OUT:
1790 case ISCSI_OP_SCSI_CMD:
1791 case ISCSI_OP_SCSI_DATA:
1792 case ISCSI_OP_SCSI_TASK_MGT_MSG:
1793 /* Initiator only */
1794 idm_pdu_tx_forward(ic, pdu);
1795 return;
1796 default:
1797 break;
1798 }
1799
1800 mutex_enter(&ic->ic_state_mutex);
1801 }
1802
1803 /*
1804 * Any PDU's processed outside of full-feature mode and non-SCSI
1805 * PDU's in full-feature mode are handled by generating an
1806 * event to the connection state machine. The state machine
1807 * will validate the PDU against the current state and either
1808 * transmit the PDU if the opcode is allowed or handle an
1809 * error if the PDU is not allowed.
1810 *
1811 * This code-path will also generate any events that are implied
1812 * by the PDU opcode. For example a "login response" with success
1813 * status generates a CE_LOGOUT_SUCCESS_SND event.
1814 */
1815 switch (IDM_PDU_OPCODE(pdu)) {
1816 case ISCSI_OP_LOGIN_CMD:
1817 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1818 break;
1819 case ISCSI_OP_LOGIN_RSP:
1820 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic,
1821 iscsi_login_rsp_hdr_t *,
1822 (iscsi_login_rsp_hdr_t *)pdu->isp_hdr);
1823 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1824 break;
1825 case ISCSI_OP_LOGOUT_CMD:
1826 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1827 break;
1828 case ISCSI_OP_LOGOUT_RSP:
1829 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic,
1830 iscsi_logout_rsp_hdr_t *,
1831 (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr);
1832 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1833 break;
1834 case ISCSI_OP_ASYNC_EVENT:
1835 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic,
1836 iscsi_async_evt_hdr_t *,
1837 (iscsi_async_evt_hdr_t *)pdu->isp_hdr);
1838 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1839 switch (async_evt->async_event) {
1840 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1841 idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1842 (uintptr_t)pdu);
1843 break;
1844 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1845 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1846 (uintptr_t)pdu);
1847 break;
1848 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1849 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1850 (uintptr_t)pdu);
1851 break;
1852 case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1853 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1854 default:
1855 idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1856 (uintptr_t)pdu);
1857 break;
1858 }
1859 break;
1860 case ISCSI_OP_SCSI_RSP:
1861 /* Target only */
1862 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1863 iscsi_scsi_rsp_hdr_t *,
1864 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1865 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1866 break;
1867 case ISCSI_OP_SCSI_TASK_MGT_RSP:
1868 /* Target only */
1869 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1870 iscsi_scsi_task_mgt_rsp_hdr_t *,
1871 (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr);
1872 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1873 break;
1874 case ISCSI_OP_SCSI_DATA_RSP:
1875 /* Target only */
1876 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1877 iscsi_data_rsp_hdr_t *,
1878 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1879 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1880 break;
1881 case ISCSI_OP_RTT_RSP:
1882 /* Target only */
1883 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1884 iscsi_rtt_hdr_t *,
1885 (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1886 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1887 break;
1888 case ISCSI_OP_NOOP_IN:
1889 /* Target only */
1890 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1891 iscsi_nop_in_hdr_t *,
1892 (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1893 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1894 break;
1895 case ISCSI_OP_TEXT_RSP:
1896 /* Target only */
1897 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1898 iscsi_text_rsp_hdr_t *,
1899 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1900 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1901 break;
1902 /* Initiator only */
1903 case ISCSI_OP_SCSI_CMD:
1904 case ISCSI_OP_SCSI_TASK_MGT_MSG:
1905 case ISCSI_OP_SCSI_DATA:
1906 case ISCSI_OP_NOOP_OUT:
1907 case ISCSI_OP_TEXT_CMD:
1908 case ISCSI_OP_SNACK_CMD:
1909 case ISCSI_OP_REJECT_MSG:
1910 default:
1911 /*
1912 * Connection state machine will validate these PDU's against
1913 * the current state. A PDU not allowed in the current
1914 * state will cause a protocol error.
1915 */
1916 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1917 break;
1918 }
1919 mutex_exit(&ic->ic_state_mutex);
1920 }
1921
1922 /*
1923 * Common allocation of a PDU along with memory for header and data.
1924 */
1925 static idm_pdu_t *
1926 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1927 {
1928 idm_pdu_t *result;
1929
1930 /*
1931 * IDM clients should cache these structures for performance
1932 * critical paths. We can't cache effectively in IDM because we
1933 * don't know the correct header and data size.
1934 *
1935 * Valid header length is assumed to be hdrlen and valid data
1936 * length is assumed to be datalen. isp_hdrlen and isp_datalen
1937 * can be adjusted after the PDU is returned if necessary.
1938 */
1939 result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1940 if (result != NULL) {
1941 /* For idm_pdu_free sanity check */
1942 result->isp_flags |= IDM_PDU_ALLOC;
1943 /* pointer arithmetic */
1944 result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1945 result->isp_hdrlen = hdrlen;
1946 result->isp_hdrbuflen = hdrlen;
1947 result->isp_transport_hdrlen = 0;
1948 if (datalen != 0)
1949 result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1950 result->isp_datalen = datalen;
1951 result->isp_databuflen = datalen;
1952 result->isp_magic = IDM_PDU_MAGIC;
1953 }
1954
1955 return (result);
1956 }
1957
1958 /*
1959 * Typical idm_pdu_alloc invocation, will block for resources.
1960 */
1961 idm_pdu_t *
1962 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1963 {
1964 return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1965 }
1966
1967 /*
1968 * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1969 * are not available. Needed for transport-layer allocations which may
1970 * be invoking in interrupt context.
1971 */
1972 idm_pdu_t *
1973 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1974 {
1975 return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1976 }
1977
1978 /*
1979 * Free a PDU previously allocated with idm_pdu_alloc() including any
1980 * header and data space allocated as part of the original request.
1981 * Additional memory regions referenced by subsequent modification of
1982 * the isp_hdr and/or isp_data fields will not be freed.
1983 */
1984 void
1985 idm_pdu_free(idm_pdu_t *pdu)
1986 {
1987 /* Make sure the structure was allocated using idm_pdu_alloc() */
1988 ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1989 kmem_free(pdu,
1990 sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1991 }
1992
1993 /*
1994 * Initialize the connection, private and callback fields in a PDU.
1995 */
1996 void
1997 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1998 {
1999 /*
2000 * idm_pdu_complete() will call idm_pdu_free if the callback is
2001 * NULL. This will only work if the PDU was originally allocated
2002 * with idm_pdu_alloc().
2003 */
2004 ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
2005 (cb != NULL));
2006 pdu->isp_magic = IDM_PDU_MAGIC;
2007 pdu->isp_ic = ic;
2008 pdu->isp_private = private;
2009 pdu->isp_callback = cb;
2010 }
2011
2012 /*
2013 * Initialize the header and header length field. This function should
2014 * not be used to adjust the header length in a buffer allocated via
2015 * pdu_pdu_alloc since it overwrites the existing header pointer.
2016 */
2017 void
2018 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2019 {
2020 pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2021 pdu->isp_hdrlen = hdrlen;
2022 }
2023
2024 /*
2025 * Initialize the data and data length fields. This function should
2026 * not be used to adjust the data length of a buffer allocated via
2027 * idm_pdu_alloc since it overwrites the existing data pointer.
2028 */
2029 void
2030 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2031 {
2032 pdu->isp_data = data;
2033 pdu->isp_datalen = datalen;
2034 }
2035
2036 void
2037 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2038 {
2039 if (pdu->isp_callback) {
2040 pdu->isp_status = status;
2041 (*pdu->isp_callback)(pdu, status);
2042 } else {
2043 idm_pdu_free(pdu);
2044 }
2045 }
2046
2047 /*
2048 * State machine auditing
2049 */
2050
2051 void
2052 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2053 {
2054 bzero(audit_buf, sizeof (sm_audit_buf_t));
2055 audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2056 }
2057
2058 static
2059 sm_audit_record_t *
2060 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2061 sm_audit_sm_type_t sm_type,
2062 int current_state)
2063 {
2064 sm_audit_record_t *sar;
2065
2066 sar = audit_buf->sab_records;
2067 sar += audit_buf->sab_index;
2068 audit_buf->sab_index++;
2069 audit_buf->sab_index &= audit_buf->sab_max_index;
2070
2071 sar->sar_type = r_type;
2072 gethrestime(&sar->sar_timestamp);
2073 sar->sar_sm_type = sm_type;
2074 sar->sar_state = current_state;
2075
2076 return (sar);
2077 }
2078
2079 void
2080 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2081 sm_audit_sm_type_t sm_type, int current_state,
2082 int event, uintptr_t event_info)
2083 {
2084 sm_audit_record_t *sar;
2085
2086 sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2087 sm_type, current_state);
2088 sar->sar_event = event;
2089 sar->sar_event_info = event_info;
2090 }
2091
2092 void
2093 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2094 sm_audit_sm_type_t sm_type, int current_state, int new_state)
2095 {
2096 sm_audit_record_t *sar;
2097
2098 sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2099 sm_type, current_state);
2100 sar->sar_new_state = new_state;
2101 }
2102
2103
2104 /*
2105 * Object reference tracking
2106 */
2107
2108 void
2109 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2110 {
2111 bzero(refcnt, sizeof (*refcnt));
2112 idm_refcnt_reset(refcnt);
2113 refcnt->ir_referenced_obj = referenced_obj;
2114 bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2115 refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2116 mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2117 cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2118 }
2119
2120 void
2121 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2122 {
2123 /*
2124 * Grab the mutex to there are no other lingering threads holding
2125 * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2126 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2127 */
2128 mutex_enter(&refcnt->ir_mutex);
2129 ASSERT(refcnt->ir_refcnt == 0);
2130 cv_destroy(&refcnt->ir_cv);
2131 mutex_destroy(&refcnt->ir_mutex);
2132 }
2133
2134 void
2135 idm_refcnt_reset(idm_refcnt_t *refcnt)
2136 {
2137 refcnt->ir_waiting = REF_NOWAIT;
2138 refcnt->ir_refcnt = 0;
2139 }
2140
2141 void
2142 idm_refcnt_hold(idm_refcnt_t *refcnt)
2143 {
2144 /*
2145 * Nothing should take a hold on an object after a call to
2146 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2147 */
2148 ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2149
2150 mutex_enter(&refcnt->ir_mutex);
2151 refcnt->ir_refcnt++;
2152 REFCNT_AUDIT(refcnt);
2153 mutex_exit(&refcnt->ir_mutex);
2154 }
2155
2156 static void
2157 idm_refcnt_unref_task(void *refcnt_void)
2158 {
2159 idm_refcnt_t *refcnt = refcnt_void;
2160
2161 mutex_enter(&refcnt->ir_mutex);
2162 REFCNT_AUDIT(refcnt);
2163 mutex_exit(&refcnt->ir_mutex);
2164 (*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2165 }
2166
2167 void
2168 idm_refcnt_rele(idm_refcnt_t *refcnt)
2169 {
2170 mutex_enter(&refcnt->ir_mutex);
2171 ASSERT(refcnt->ir_refcnt > 0);
2172 refcnt->ir_refcnt--;
2173 REFCNT_AUDIT(refcnt);
2174 if (refcnt->ir_waiting == REF_NOWAIT) {
2175 /* No one is waiting on this object */
2176 mutex_exit(&refcnt->ir_mutex);
2177 return;
2178 }
2179
2180 /*
2181 * Someone is waiting for this object to go idle so check if
2182 * refcnt is 0. Waiting on an object then later grabbing another
2183 * reference is not allowed so we don't need to handle that case.
2184 */
2185 if (refcnt->ir_refcnt == 0) {
2186 if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2187 if (taskq_dispatch(idm.idm_global_taskq,
2188 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2189 cmn_err(CE_WARN,
2190 "idm_refcnt_rele: Couldn't dispatch task");
2191 }
2192 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2193 cv_signal(&refcnt->ir_cv);
2194 }
2195 }
2196 mutex_exit(&refcnt->ir_mutex);
2197 }
2198
2199 void
2200 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2201 {
2202 mutex_enter(&refcnt->ir_mutex);
2203 ASSERT(refcnt->ir_refcnt > 0);
2204 refcnt->ir_refcnt--;
2205 REFCNT_AUDIT(refcnt);
2206
2207 /*
2208 * Someone is waiting for this object to go idle so check if
2209 * refcnt is 0. Waiting on an object then later grabbing another
2210 * reference is not allowed so we don't need to handle that case.
2211 */
2212 if (refcnt->ir_refcnt == 0) {
2213 refcnt->ir_cb = cb_func;
2214 refcnt->ir_waiting = REF_WAIT_ASYNC;
2215 if (taskq_dispatch(idm.idm_global_taskq,
2216 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2217 cmn_err(CE_WARN,
2218 "idm_refcnt_rele: Couldn't dispatch task");
2219 }
2220 }
2221 mutex_exit(&refcnt->ir_mutex);
2222 }
2223
2224 void
2225 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2226 {
2227 mutex_enter(&refcnt->ir_mutex);
2228 refcnt->ir_waiting = REF_WAIT_SYNC;
2229 REFCNT_AUDIT(refcnt);
2230 while (refcnt->ir_refcnt != 0)
2231 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2232 mutex_exit(&refcnt->ir_mutex);
2233 }
2234
2235 void
2236 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2237 {
2238 mutex_enter(&refcnt->ir_mutex);
2239 refcnt->ir_waiting = REF_WAIT_ASYNC;
2240 refcnt->ir_cb = cb_func;
2241 REFCNT_AUDIT(refcnt);
2242 /*
2243 * It's possible we don't have any references. To make things easier
2244 * on the caller use a taskq to call the callback instead of
2245 * calling it synchronously
2246 */
2247 if (refcnt->ir_refcnt == 0) {
2248 if (taskq_dispatch(idm.idm_global_taskq,
2249 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2250 cmn_err(CE_WARN,
2251 "idm_refcnt_async_wait_ref: "
2252 "Couldn't dispatch task");
2253 }
2254 }
2255 mutex_exit(&refcnt->ir_mutex);
2256 }
2257
2258 void
2259 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2260 idm_refcnt_cb_t *cb_func)
2261 {
2262 mutex_enter(&refcnt->ir_mutex);
2263 if (refcnt->ir_refcnt == 0) {
2264 mutex_exit(&refcnt->ir_mutex);
2265 (*cb_func)(refcnt->ir_referenced_obj);
2266 return;
2267 }
2268 mutex_exit(&refcnt->ir_mutex);
2269 }
2270
2271 /*
2272 * used to determine the status of the refcnt.
2273 *
2274 * if refcnt is 0 return is 0
2275 * if refcnt is negative return is -1
2276 * if refcnt > 0 and no waiters return is 1
2277 * if refcnt > 0 and waiters return is 2
2278 */
2279 int
2280 idm_refcnt_is_held(idm_refcnt_t *refcnt)
2281 {
2282 if (refcnt->ir_refcnt < 0)
2283 return (-1);
2284
2285 if (refcnt->ir_refcnt == 0)
2286 return (0);
2287
2288 if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0)
2289 return (1);
2290
2291 return (2);
2292 }
2293
2294 void
2295 idm_conn_hold(idm_conn_t *ic)
2296 {
2297 idm_refcnt_hold(&ic->ic_refcnt);
2298 }
2299
2300 void
2301 idm_conn_rele(idm_conn_t *ic)
2302 {
2303 idm_refcnt_rele(&ic->ic_refcnt);
2304 }
2305
2306 void
2307 idm_conn_set_target_name(idm_conn_t *ic, char *target_name)
2308 {
2309 (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1);
2310 }
2311
2312 void
2313 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name)
2314 {
2315 (void) strlcpy(ic->ic_initiator_name, initiator_name,
2316 ISCSI_MAX_NAME_LEN + 1);
2317 }
2318
2319 void
2320 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN])
2321 {
2322 (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1,
2323 "%02x%02x%02x%02x%02x%02x",
2324 isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]);
2325 }
2326
2327 static int
2328 _idm_init(void)
2329 {
2330 /* Initialize the rwlock for the taskid table */
2331 rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2332
2333 /* Initialize the global mutex and taskq */
2334 mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2335
2336 cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2337 cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2338
2339 /*
2340 * The maximum allocation needs to be high here since there can be
2341 * many concurrent tasks using the global taskq.
2342 */
2343 idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2344 128, 16384, TASKQ_PREPOPULATE);
2345 if (idm.idm_global_taskq == NULL) {
2346 cv_destroy(&idm.idm_wd_cv);
2347 cv_destroy(&idm.idm_tgt_svc_cv);
2348 mutex_destroy(&idm.idm_global_mutex);
2349 rw_destroy(&idm.idm_taskid_table_lock);
2350 return (ENOMEM);
2351 }
2352
2353 /* Start watchdog thread */
2354 idm.idm_wd_thread = thread_create(NULL, 0,
2355 idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2356 if (idm.idm_wd_thread == NULL) {
2357 /* Couldn't create the watchdog thread */
2358 taskq_destroy(idm.idm_global_taskq);
2359 cv_destroy(&idm.idm_wd_cv);
2360 cv_destroy(&idm.idm_tgt_svc_cv);
2361 mutex_destroy(&idm.idm_global_mutex);
2362 rw_destroy(&idm.idm_taskid_table_lock);
2363 return (ENOMEM);
2364 }
2365
2366 /* Pause until the watchdog thread is running */
2367 mutex_enter(&idm.idm_global_mutex);
2368 while (!idm.idm_wd_thread_running)
2369 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2370 mutex_exit(&idm.idm_global_mutex);
2371
2372 /*
2373 * Allocate the task ID table and set "next" to 0.
2374 */
2375
2376 idm.idm_taskid_max = idm_max_taskids;
2377 idm.idm_taskid_table = (idm_task_t **)
2378 kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2379 idm.idm_taskid_next = 0;
2380
2381 /* Create the global buffer and task kmem caches */
2382 idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2383 sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2384
2385 /*
2386 * Note, we're explicitly allocating an additional iSER header-
2387 * sized chunk for each of these elements. See idm_task_constructor().
2388 */
2389 idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2390 sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2391 &idm_task_constructor, &idm_task_destructor,
2392 NULL, NULL, NULL, KM_SLEEP);
2393
2394 /* Create the service and connection context lists */
2395 list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2396 offsetof(idm_svc_t, is_list_node));
2397 list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2398 offsetof(idm_conn_t, ic_list_node));
2399 list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2400 offsetof(idm_conn_t, ic_list_node));
2401
2402 /* Initialize the native sockets transport */
2403 idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2404
2405 /* Create connection ID pool */
2406 (void) idm_idpool_create(&idm.idm_conn_id_pool);
2407
2408 return (DDI_SUCCESS);
2409 }
2410
2411 static int
2412 _idm_fini(void)
2413 {
2414 if (!list_is_empty(&idm.idm_ini_conn_list) ||
2415 !list_is_empty(&idm.idm_tgt_conn_list) ||
2416 !list_is_empty(&idm.idm_tgt_svc_list)) {
2417 return (EBUSY);
2418 }
2419
2420 mutex_enter(&idm.idm_global_mutex);
2421 idm.idm_wd_thread_running = B_FALSE;
2422 cv_signal(&idm.idm_wd_cv);
2423 mutex_exit(&idm.idm_global_mutex);
2424
2425 thread_join(idm.idm_wd_thread_did);
2426
2427 idm_idpool_destroy(&idm.idm_conn_id_pool);
2428
2429 /* Close any LDI handles we have open on transport drivers */
2430 mutex_enter(&idm.idm_global_mutex);
2431 idm_transport_teardown();
2432 mutex_exit(&idm.idm_global_mutex);
2433
2434 /* Teardown the native sockets transport */
2435 idm_so_fini();
2436
2437 list_destroy(&idm.idm_ini_conn_list);
2438 list_destroy(&idm.idm_tgt_conn_list);
2439 list_destroy(&idm.idm_tgt_svc_list);
2440 kmem_cache_destroy(idm.idm_task_cache);
2441 kmem_cache_destroy(idm.idm_buf_cache);
2442 kmem_free(idm.idm_taskid_table,
2443 idm.idm_taskid_max * sizeof (idm_task_t *));
2444 mutex_destroy(&idm.idm_global_mutex);
2445 cv_destroy(&idm.idm_wd_cv);
2446 cv_destroy(&idm.idm_tgt_svc_cv);
2447 rw_destroy(&idm.idm_taskid_table_lock);
2448
2449 return (0);
2450 }