1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
  24  */
  25 
  26 #include <sys/cpuvar.h>
  27 #include <sys/conf.h>
  28 #include <sys/file.h>
  29 #include <sys/ddi.h>
  30 #include <sys/sunddi.h>
  31 #include <sys/modctl.h>
  32 
  33 #include <sys/socket.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/sysmacros.h>
  36 
  37 #include <sys/socketvar.h>
  38 #include <netinet/in.h>
  39 
  40 #include <sys/idm/idm.h>
  41 #include <sys/idm/idm_so.h>
  42 
  43 #define IDM_NAME_VERSION        "iSCSI Data Mover"
  44 
  45 extern struct mod_ops mod_miscops;
  46 extern struct mod_ops mod_miscops;
  47 
  48 static struct modlmisc modlmisc = {
  49         &mod_miscops,       /* Type of module */
  50         IDM_NAME_VERSION
  51 };
  52 
  53 static struct modlinkage modlinkage = {
  54         MODREV_1, (void *)&modlmisc, NULL
  55 };
  56 
  57 extern void idm_wd_thread(void *arg);
  58 
  59 static int _idm_init(void);
  60 static int _idm_fini(void);
  61 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
  62 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
  63 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
  64 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
  65 static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
  66     idm_abort_type_t abort_type);
  67 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
  68 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
  69     int sleepflag);
  70 
  71 boolean_t idm_conn_logging = 0;
  72 boolean_t idm_svc_logging = 0;
  73 #ifdef DEBUG
  74 boolean_t idm_pattern_checking = 1;
  75 #else
  76 boolean_t idm_pattern_checking = 0;
  77 #endif
  78 
  79 /*
  80  * Potential tuneable for the maximum number of tasks.  Default to
  81  * IDM_TASKIDS_MAX
  82  */
  83 
  84 uint32_t        idm_max_taskids = IDM_TASKIDS_MAX;
  85 
  86 /*
  87  * Global list of transport handles
  88  *   These are listed in preferential order, so we can simply take the
  89  *   first "it_conn_is_capable" hit. Note also that the order maps to
  90  *   the order of the idm_transport_type_t list.
  91  */
  92 idm_transport_t idm_transport_list[] = {
  93 
  94         /* iSER on InfiniBand transport handle */
  95         {IDM_TRANSPORT_TYPE_ISER,       /* type */
  96         "/devices/ib/iser@0:iser",      /* device path */
  97         NULL,                           /* LDI handle */
  98         NULL,                           /* transport ops */
  99         NULL},                          /* transport caps */
 100 
 101         /* IDM native sockets transport handle */
 102         {IDM_TRANSPORT_TYPE_SOCKETS,    /* type */
 103         NULL,                           /* device path */
 104         NULL,                           /* LDI handle */
 105         NULL,                           /* transport ops */
 106         NULL}                           /* transport caps */
 107 
 108 };
 109 
 110 int
 111 _init(void)
 112 {
 113         int rc;
 114 
 115         if ((rc = _idm_init()) != 0) {
 116                 return (rc);
 117         }
 118 
 119         return (mod_install(&modlinkage));
 120 }
 121 
 122 int
 123 _fini(void)
 124 {
 125         int rc;
 126 
 127         if ((rc = _idm_fini()) != 0) {
 128                 return (rc);
 129         }
 130 
 131         if ((rc = mod_remove(&modlinkage)) != 0) {
 132                 return (rc);
 133         }
 134 
 135         return (rc);
 136 }
 137 
 138 int
 139 _info(struct modinfo *modinfop)
 140 {
 141         return (mod_info(&modlinkage, modinfop));
 142 }
 143 
 144 /*
 145  * idm_transport_register()
 146  *
 147  * Provides a mechanism for an IDM transport driver to register its
 148  * transport ops and caps with the IDM kernel module. Invoked during
 149  * a transport driver's attach routine.
 150  */
 151 idm_status_t
 152 idm_transport_register(idm_transport_attr_t *attr)
 153 {
 154         ASSERT(attr->it_ops != NULL);
 155         ASSERT(attr->it_caps != NULL);
 156 
 157         switch (attr->type) {
 158         /* All known non-native transports here; for now, iSER */
 159         case IDM_TRANSPORT_TYPE_ISER:
 160                 idm_transport_list[attr->type].it_ops        = attr->it_ops;
 161                 idm_transport_list[attr->type].it_caps       = attr->it_caps;
 162                 return (IDM_STATUS_SUCCESS);
 163 
 164         default:
 165                 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
 166                     "idm_transport_register", attr->type);
 167                 return (IDM_STATUS_SUCCESS);
 168         }
 169 }
 170 
 171 /*
 172  * idm_ini_conn_create
 173  *
 174  * This function is invoked by the iSCSI layer to create a connection context.
 175  * This does not actually establish the socket connection.
 176  *
 177  * cr - Connection request parameters
 178  * new_con - Output parameter that contains the new request if successful
 179  *
 180  */
 181 idm_status_t
 182 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
 183 {
 184         idm_transport_t         *it;
 185         idm_conn_t              *ic;
 186         int                     rc;
 187 
 188         it = idm_transport_lookup(cr);
 189 
 190 retry:
 191         ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
 192             &cr->icr_conn_ops);
 193 
 194         bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
 195             sizeof (cr->cr_ini_dst_addr));
 196 
 197         /* create the transport-specific connection components */
 198         rc = it->it_ops->it_ini_conn_create(cr, ic);
 199         if (rc != IDM_STATUS_SUCCESS) {
 200                 /* cleanup the failed connection */
 201                 idm_conn_destroy_common(ic);
 202 
 203                 /*
 204                  * It is possible for an IB client to connect to
 205                  * an ethernet-only client via an IB-eth gateway.
 206                  * Therefore, if we are attempting to use iSER and
 207                  * fail, retry with sockets before ultimately
 208                  * failing the connection.
 209                  */
 210                 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
 211                         it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
 212                         goto retry;
 213                 }
 214 
 215                 return (IDM_STATUS_FAIL);
 216         }
 217 
 218         *new_con = ic;
 219 
 220         mutex_enter(&idm.idm_global_mutex);
 221         list_insert_tail(&idm.idm_ini_conn_list, ic);
 222         mutex_exit(&idm.idm_global_mutex);
 223 
 224         return (IDM_STATUS_SUCCESS);
 225 }
 226 
 227 /*
 228  * idm_ini_conn_destroy
 229  *
 230  * Releases any resources associated with the connection.  This is the
 231  * complement to idm_ini_conn_create.
 232  * ic - idm_conn_t structure representing the relevant connection
 233  *
 234  */
 235 void
 236 idm_ini_conn_destroy_task(void *ic_void)
 237 {
 238         idm_conn_t *ic = ic_void;
 239 
 240         ic->ic_transport_ops->it_ini_conn_destroy(ic);
 241         idm_conn_destroy_common(ic);
 242 }
 243 
 244 void
 245 idm_ini_conn_destroy(idm_conn_t *ic)
 246 {
 247         /*
 248          * It's reasonable for the initiator to call idm_ini_conn_destroy
 249          * from within the context of the CN_CONNECT_DESTROY notification.
 250          * That's a problem since we want to destroy the taskq for the
 251          * state machine associated with the connection.  Remove the
 252          * connection from the list right away then handle the remaining
 253          * work via the idm_global_taskq.
 254          */
 255         mutex_enter(&idm.idm_global_mutex);
 256         list_remove(&idm.idm_ini_conn_list, ic);
 257         mutex_exit(&idm.idm_global_mutex);
 258 
 259         if (taskq_dispatch(idm.idm_global_taskq,
 260             &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
 261                 cmn_err(CE_WARN,
 262                     "idm_ini_conn_destroy: Couldn't dispatch task");
 263         }
 264 }
 265 
 266 /*
 267  * idm_ini_conn_connect
 268  *
 269  * Establish connection to the remote system identified in idm_conn_t.
 270  * The connection parameters including the remote IP address were established
 271  * in the call to idm_ini_conn_create.  The IDM state machine will
 272  * perform client notifications as necessary to prompt the initiator through
 273  * the login process.  IDM also keeps a timer running so that if the login
 274  * process doesn't complete in a timely manner it will fail.
 275  *
 276  * ic - idm_conn_t structure representing the relevant connection
 277  *
 278  * Returns success if the connection was established, otherwise some kind
 279  * of meaningful error code.
 280  *
 281  * Upon return the login has either failed or is loggin in (ffp)
 282  */
 283 idm_status_t
 284 idm_ini_conn_connect(idm_conn_t *ic)
 285 {
 286         idm_status_t    rc;
 287 
 288         rc = idm_conn_sm_init(ic);
 289         if (rc != IDM_STATUS_SUCCESS) {
 290                 return (ic->ic_conn_sm_status);
 291         }
 292 
 293         /* Hold connection until we return */
 294         idm_conn_hold(ic);
 295 
 296         /* Kick state machine */
 297         idm_conn_event(ic, CE_CONNECT_REQ, NULL);
 298 
 299         /* Wait for login flag */
 300         mutex_enter(&ic->ic_state_mutex);
 301         while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
 302             !(ic->ic_state_flags & CF_ERROR)) {
 303                 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
 304         }
 305 
 306         /*
 307          * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to
 308          * idm_notify_client has already been generated by the idm conn
 309          * state machine.  If connection fails any time after this
 310          * check, we will detect it in iscsi_login.
 311          */
 312         if (ic->ic_state_flags & CF_ERROR) {
 313                 rc = ic->ic_conn_sm_status;
 314         }
 315         mutex_exit(&ic->ic_state_mutex);
 316         idm_conn_rele(ic);
 317 
 318         return (rc);
 319 }
 320 
 321 /*
 322  * idm_ini_conn_disconnect
 323  *
 324  * Forces a connection (previously established using idm_ini_conn_connect)
 325  * to perform a controlled shutdown, cleaning up any outstanding requests.
 326  *
 327  * ic - idm_conn_t structure representing the relevant connection
 328  *
 329  * This is asynchronous and will return before the connection is properly
 330  * shutdown
 331  */
 332 /* ARGSUSED */
 333 void
 334 idm_ini_conn_disconnect(idm_conn_t *ic)
 335 {
 336         idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
 337 }
 338 
 339 /*
 340  * idm_ini_conn_disconnect_wait
 341  *
 342  * Forces a connection (previously established using idm_ini_conn_connect)
 343  * to perform a controlled shutdown.  Blocks until the connection is
 344  * disconnected.
 345  *
 346  * ic - idm_conn_t structure representing the relevant connection
 347  */
 348 /* ARGSUSED */
 349 void
 350 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
 351 {
 352         mutex_enter(&ic->ic_state_mutex);
 353         if ((ic->ic_state != CS_S9_INIT_ERROR) &&
 354             (ic->ic_state != CS_S11_COMPLETE)) {
 355                 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
 356                 while ((ic->ic_state != CS_S9_INIT_ERROR) &&
 357                     (ic->ic_state != CS_S11_COMPLETE))
 358                         cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
 359         }
 360         mutex_exit(&ic->ic_state_mutex);
 361 }
 362 
 363 /*
 364  * idm_tgt_svc_create
 365  *
 366  * The target calls this service to obtain a service context for each available
 367  * transport, starting a service of each type related to the IP address and port
 368  * passed. The idm_svc_req_t contains the service parameters.
 369  */
 370 idm_status_t
 371 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
 372 {
 373         idm_transport_type_t    type;
 374         idm_transport_t         *it;
 375         idm_svc_t               *is;
 376         int                     rc;
 377 
 378         *new_svc = NULL;
 379         is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
 380 
 381         /* Initialize transport-agnostic components of the service handle */
 382         is->is_svc_req = *sr;
 383         mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
 384         cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
 385         mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
 386         cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
 387         idm_refcnt_init(&is->is_refcnt, is);
 388 
 389         /*
 390          * Make sure all available transports are setup.  We call this now
 391          * instead of at initialization time in case IB has become available
 392          * since we started (hotplug, etc).
 393          */
 394         idm_transport_setup(sr->sr_li, B_FALSE);
 395 
 396         /*
 397          * Loop through the transports, configuring the transport-specific
 398          * components of each one.
 399          */
 400         for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 401 
 402                 it = &idm_transport_list[type];
 403                 /*
 404                  * If it_ops is NULL then the transport is unconfigured
 405                  * and we shouldn't try to start the service.
 406                  */
 407                 if (it->it_ops == NULL) {
 408                         continue;
 409                 }
 410 
 411                 rc = it->it_ops->it_tgt_svc_create(sr, is);
 412                 if (rc != IDM_STATUS_SUCCESS) {
 413                         /* Teardown any configured services */
 414                         while (type--) {
 415                                 it = &idm_transport_list[type];
 416                                 if (it->it_ops == NULL) {
 417                                         continue;
 418                                 }
 419                                 it->it_ops->it_tgt_svc_destroy(is);
 420                         }
 421                         /* Free the svc context and return */
 422                         kmem_free(is, sizeof (idm_svc_t));
 423                         return (rc);
 424                 }
 425         }
 426 
 427         *new_svc = is;
 428 
 429         mutex_enter(&idm.idm_global_mutex);
 430         list_insert_tail(&idm.idm_tgt_svc_list, is);
 431         mutex_exit(&idm.idm_global_mutex);
 432 
 433         return (IDM_STATUS_SUCCESS);
 434 }
 435 
 436 /*
 437  * idm_tgt_svc_destroy
 438  *
 439  * is - idm_svc_t returned by the call to idm_tgt_svc_create
 440  *
 441  * Cleanup any resources associated with the idm_svc_t.
 442  */
 443 void
 444 idm_tgt_svc_destroy(idm_svc_t *is)
 445 {
 446         idm_transport_type_t    type;
 447         idm_transport_t         *it;
 448 
 449         mutex_enter(&idm.idm_global_mutex);
 450         /* remove this service from the global list */
 451         list_remove(&idm.idm_tgt_svc_list, is);
 452         /* wakeup any waiters for service change */
 453         cv_broadcast(&idm.idm_tgt_svc_cv);
 454         mutex_exit(&idm.idm_global_mutex);
 455 
 456         /* teardown each transport-specific service */
 457         for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 458                 it = &idm_transport_list[type];
 459                 if (it->it_ops == NULL) {
 460                         continue;
 461                 }
 462 
 463                 it->it_ops->it_tgt_svc_destroy(is);
 464         }
 465 
 466         /* tear down the svc resources */
 467         idm_refcnt_destroy(&is->is_refcnt);
 468         cv_destroy(&is->is_count_cv);
 469         mutex_destroy(&is->is_count_mutex);
 470         cv_destroy(&is->is_cv);
 471         mutex_destroy(&is->is_mutex);
 472 
 473         /* free the svc handle */
 474         kmem_free(is, sizeof (idm_svc_t));
 475 }
 476 
 477 void
 478 idm_tgt_svc_hold(idm_svc_t *is)
 479 {
 480         idm_refcnt_hold(&is->is_refcnt);
 481 }
 482 
 483 void
 484 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
 485 {
 486         idm_refcnt_rele_and_destroy(&is->is_refcnt,
 487             (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
 488 }
 489 
 490 /*
 491  * idm_tgt_svc_online
 492  *
 493  * is - idm_svc_t returned by the call to idm_tgt_svc_create
 494  *
 495  * Online each transport service, as we want this target to be accessible
 496  * via any configured transport.
 497  *
 498  * When the initiator establishes a new connection to the target, IDM will
 499  * call the "new connect" callback defined in the idm_svc_req_t structure
 500  * and it will pass an idm_conn_t structure representing that new connection.
 501  */
 502 idm_status_t
 503 idm_tgt_svc_online(idm_svc_t *is)
 504 {
 505 
 506         idm_transport_type_t    type, last_type;
 507         idm_transport_t         *it;
 508         int                     rc = IDM_STATUS_SUCCESS;
 509 
 510         mutex_enter(&is->is_mutex);
 511         if (is->is_online == 0) {
 512                 /* Walk through each of the transports and online them */
 513                 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 514                         it = &idm_transport_list[type];
 515                         if (it->it_ops == NULL) {
 516                                 /* transport is not registered */
 517                                 continue;
 518                         }
 519 
 520                         mutex_exit(&is->is_mutex);
 521                         rc = it->it_ops->it_tgt_svc_online(is);
 522                         mutex_enter(&is->is_mutex);
 523                         if (rc != IDM_STATUS_SUCCESS) {
 524                                 last_type = type;
 525                                 break;
 526                         }
 527                 }
 528                 if (rc != IDM_STATUS_SUCCESS) {
 529                         /*
 530                          * The last transport failed to online.
 531                          * Offline any transport onlined above and
 532                          * do not online the target.
 533                          */
 534                         for (type = 0; type < last_type; type++) {
 535                                 it = &idm_transport_list[type];
 536                                 if (it->it_ops == NULL) {
 537                                         /* transport is not registered */
 538                                         continue;
 539                                 }
 540 
 541                                 mutex_exit(&is->is_mutex);
 542                                 it->it_ops->it_tgt_svc_offline(is);
 543                                 mutex_enter(&is->is_mutex);
 544                         }
 545                 } else {
 546                         /* Target service now online */
 547                         is->is_online = 1;
 548                 }
 549         } else {
 550                 /* Target service already online, just bump the count */
 551                 is->is_online++;
 552         }
 553         mutex_exit(&is->is_mutex);
 554 
 555         return (rc);
 556 }
 557 
 558 /*
 559  * idm_tgt_svc_offline
 560  *
 561  * is - idm_svc_t returned by the call to idm_tgt_svc_create
 562  *
 563  * Shutdown any online target services.
 564  */
 565 void
 566 idm_tgt_svc_offline(idm_svc_t *is)
 567 {
 568         idm_transport_type_t    type;
 569         idm_transport_t         *it;
 570 
 571         mutex_enter(&is->is_mutex);
 572         is->is_online--;
 573         if (is->is_online == 0) {
 574                 /* Walk through each of the transports and offline them */
 575                 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
 576                         it = &idm_transport_list[type];
 577                         if (it->it_ops == NULL) {
 578                                 /* transport is not registered */
 579                                 continue;
 580                         }
 581 
 582                         mutex_exit(&is->is_mutex);
 583                         it->it_ops->it_tgt_svc_offline(is);
 584                         mutex_enter(&is->is_mutex);
 585                 }
 586         }
 587         mutex_exit(&is->is_mutex);
 588 }
 589 
 590 /*
 591  * idm_tgt_svc_lookup
 592  *
 593  * Lookup a service instance listening on the specified port
 594  */
 595 
 596 idm_svc_t *
 597 idm_tgt_svc_lookup(uint16_t port)
 598 {
 599         idm_svc_t *result;
 600 
 601 retry:
 602         mutex_enter(&idm.idm_global_mutex);
 603         for (result = list_head(&idm.idm_tgt_svc_list);
 604             result != NULL;
 605             result = list_next(&idm.idm_tgt_svc_list, result)) {
 606                 if (result->is_svc_req.sr_port == port) {
 607                         if (result->is_online == 0) {
 608                                 /*
 609                                  * A service exists on this port, but it
 610                                  * is going away, wait for it to cleanup.
 611                                  */
 612                                 cv_wait(&idm.idm_tgt_svc_cv,
 613                                     &idm.idm_global_mutex);
 614                                 mutex_exit(&idm.idm_global_mutex);
 615                                 goto retry;
 616                         }
 617                         idm_tgt_svc_hold(result);
 618                         mutex_exit(&idm.idm_global_mutex);
 619                         return (result);
 620                 }
 621         }
 622         mutex_exit(&idm.idm_global_mutex);
 623 
 624         return (NULL);
 625 }
 626 
 627 /*
 628  * idm_negotiate_key_values()
 629  * Give IDM level a chance to negotiate any login parameters it should own.
 630  *  -- leave unhandled parameters alone on request_nvl
 631  *  -- move all handled parameters to response_nvl with an appropriate response
 632  *  -- also add an entry to negotiated_nvl for any accepted parameters
 633  */
 634 kv_status_t
 635 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
 636     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
 637 {
 638         ASSERT(ic->ic_transport_ops != NULL);
 639         return (ic->ic_transport_ops->it_negotiate_key_values(ic,
 640             request_nvl, response_nvl, negotiated_nvl));
 641 }
 642 
 643 /*
 644  * idm_notice_key_values()
 645  * Activate at the IDM level any parameters that have been negotiated.
 646  * Passes the set of key value pairs to the transport for activation.
 647  * This will be invoked as the connection is entering full-feature mode.
 648  */
 649 void
 650 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
 651 {
 652         ASSERT(ic->ic_transport_ops != NULL);
 653         ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
 654 }
 655 
 656 /*
 657  * idm_declare_key_values()
 658  * Activate an operational set of declarative parameters from the config_nvl,
 659  * and return the selected values in the outgoing_nvl.
 660  */
 661 kv_status_t
 662 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
 663     nvlist_t *outgoing_nvl)
 664 {
 665         ASSERT(ic->ic_transport_ops != NULL);
 666         return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl,
 667             outgoing_nvl));
 668 }
 669 
 670 /*
 671  * idm_buf_tx_to_ini
 672  *
 673  * This is IDM's implementation of the 'Put_Data' operational primitive.
 674  *
 675  * This function is invoked by a target iSCSI layer to request its local
 676  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
 677  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
 678  * transferred to the initiator associated with task 'idt'. The connection
 679  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
 680  * and the callback (idb->idb_buf_cb) at transfer completion are
 681  * provided as input.
 682  *
 683  * This data transfer takes place transparently to the remote iSCSI layer,
 684  * i.e. without its participation.
 685  *
 686  * Using sockets, IDM implements the data transfer by segmenting the data
 687  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
 688  * initiator. iSER performs the transfer using RDMA write.
 689  *
 690  */
 691 idm_status_t
 692 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
 693     uint32_t offset, uint32_t xfer_len,
 694     idm_buf_cb_t idb_buf_cb, void *cb_arg)
 695 {
 696         idm_status_t rc;
 697 
 698         idb->idb_bufoffset = offset;
 699         idb->idb_xfer_len = xfer_len;
 700         idb->idb_buf_cb = idb_buf_cb;
 701         idb->idb_cb_arg = cb_arg;
 702         gethrestime(&idb->idb_xfer_start);
 703 
 704         /*
 705          * Buffer should not contain the pattern.  If the pattern is
 706          * present then we've been asked to transmit initialized data
 707          */
 708         IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
 709 
 710         mutex_enter(&idt->idt_mutex);
 711         switch (idt->idt_state) {
 712         case TASK_ACTIVE:
 713                 idt->idt_tx_to_ini_start++;
 714                 idm_task_hold(idt);
 715                 idm_buf_bind_in_locked(idt, idb);
 716                 idb->idb_in_transport = B_TRUE;
 717                 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
 718                     (idt, idb);
 719                 return (rc);
 720 
 721         case TASK_SUSPENDING:
 722         case TASK_SUSPENDED:
 723                 /*
 724                  * Bind buffer but don't start a transfer since the task
 725                  * is suspended
 726                  */
 727                 idm_buf_bind_in_locked(idt, idb);
 728                 mutex_exit(&idt->idt_mutex);
 729                 return (IDM_STATUS_SUCCESS);
 730 
 731         case TASK_ABORTING:
 732         case TASK_ABORTED:
 733                 /*
 734                  * Once the task is aborted, any buffers added to the
 735                  * idt_inbufv will never get cleaned up, so just return
 736                  * SUCCESS.  The buffer should get cleaned up by the
 737                  * client or framework once task_aborted has completed.
 738                  */
 739                 mutex_exit(&idt->idt_mutex);
 740                 return (IDM_STATUS_SUCCESS);
 741 
 742         default:
 743                 ASSERT(0);
 744                 break;
 745         }
 746         mutex_exit(&idt->idt_mutex);
 747 
 748         return (IDM_STATUS_FAIL);
 749 }
 750 
 751 /*
 752  * idm_buf_rx_from_ini
 753  *
 754  * This is IDM's implementation of the 'Get_Data' operational primitive.
 755  *
 756  * This function is invoked by a target iSCSI layer to request its local
 757  * Datamover layer to retrieve certain data identified by the R2T PDU from the
 758  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
 759  * mapped to the respective buffer by the task tags (ITT & TTT).
 760  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
 761  * the callback (idb->idb_buf_cb) notification for data transfer completion are
 762  * are provided as input.
 763  *
 764  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
 765  * Datamover layer, the local and remote Datamover layers transparently bring
 766  * about the data transfer requested by the R2T PDU, without the participation
 767  * of the iSCSI layers.
 768  *
 769  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
 770  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
 771  *
 772  */
 773 idm_status_t
 774 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
 775     uint32_t offset, uint32_t xfer_len,
 776     idm_buf_cb_t idb_buf_cb, void *cb_arg)
 777 {
 778         idm_status_t rc;
 779 
 780         idb->idb_bufoffset = offset;
 781         idb->idb_xfer_len = xfer_len;
 782         idb->idb_buf_cb = idb_buf_cb;
 783         idb->idb_cb_arg = cb_arg;
 784         gethrestime(&idb->idb_xfer_start);
 785 
 786         /*
 787          * "In" buf list is for "Data In" PDU's, "Out" buf list is for
 788          * "Data Out" PDU's
 789          */
 790         mutex_enter(&idt->idt_mutex);
 791         switch (idt->idt_state) {
 792         case TASK_ACTIVE:
 793                 idt->idt_rx_from_ini_start++;
 794                 idm_task_hold(idt);
 795                 idm_buf_bind_out_locked(idt, idb);
 796                 idb->idb_in_transport = B_TRUE;
 797                 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
 798                     (idt, idb);
 799                 return (rc);
 800         case TASK_SUSPENDING:
 801         case TASK_SUSPENDED:
 802         case TASK_ABORTING:
 803         case TASK_ABORTED:
 804                 /*
 805                  * Bind buffer but don't start a transfer since the task
 806                  * is suspended
 807                  */
 808                 idm_buf_bind_out_locked(idt, idb);
 809                 mutex_exit(&idt->idt_mutex);
 810                 return (IDM_STATUS_SUCCESS);
 811         default:
 812                 ASSERT(0);
 813                 break;
 814         }
 815         mutex_exit(&idt->idt_mutex);
 816 
 817         return (IDM_STATUS_FAIL);
 818 }
 819 
 820 /*
 821  * idm_buf_tx_to_ini_done
 822  *
 823  * The transport calls this after it has completed a transfer requested by
 824  * a call to transport_buf_tx_to_ini
 825  *
 826  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
 827  * idt may be freed after the call to idb->idb_buf_cb.
 828  */
 829 void
 830 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
 831 {
 832         ASSERT(mutex_owned(&idt->idt_mutex));
 833         idb->idb_in_transport = B_FALSE;
 834         idb->idb_tx_thread = B_FALSE;
 835         idt->idt_tx_to_ini_done++;
 836         gethrestime(&idb->idb_xfer_done);
 837 
 838         /*
 839          * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
 840          * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
 841          * to 0.
 842          */
 843         idm_task_rele(idt);
 844         idb->idb_status = status;
 845 
 846         switch (idt->idt_state) {
 847         case TASK_ACTIVE:
 848                 idt->idt_ic->ic_timestamp = ddi_get_lbolt();
 849                 idm_buf_unbind_in_locked(idt, idb);
 850                 mutex_exit(&idt->idt_mutex);
 851                 (*idb->idb_buf_cb)(idb, status);
 852                 return;
 853         case TASK_SUSPENDING:
 854         case TASK_SUSPENDED:
 855         case TASK_ABORTING:
 856         case TASK_ABORTED:
 857                 /*
 858                  * To keep things simple we will ignore the case where the
 859                  * transfer was successful and leave all buffers bound to the
 860                  * task.  This allows us to also ignore the case where we've
 861                  * been asked to abort a task but the last transfer of the
 862                  * task has completed.  IDM has no idea whether this was, in
 863                  * fact, the last transfer of the task so it would be difficult
 864                  * to handle this case.  Everything should get sorted out again
 865                  * after task reassignment is complete.
 866                  *
 867                  * In the case of TASK_ABORTING we could conceivably call the
 868                  * buffer callback here but the timing of when the client's
 869                  * client_task_aborted callback is invoked vs. when the client's
 870                  * buffer callback gets invoked gets sticky.  We don't want
 871                  * the client to here from us again after the call to
 872                  * client_task_aborted() but we don't want to give it a bunch
 873                  * of failed buffer transfers until we've called
 874                  * client_task_aborted().  Instead we'll just leave all the
 875                  * buffers bound and allow the client to cleanup.
 876                  */
 877                 break;
 878         default:
 879                 ASSERT(0);
 880         }
 881         mutex_exit(&idt->idt_mutex);
 882 }
 883 
 884 /*
 885  * idm_buf_rx_from_ini_done
 886  *
 887  * The transport calls this after it has completed a transfer requested by
 888  * a call totransport_buf_tx_to_ini
 889  *
 890  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
 891  * idt may be freed after the call to idb->idb_buf_cb.
 892  */
 893 void
 894 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
 895 {
 896         ASSERT(mutex_owned(&idt->idt_mutex));
 897         idb->idb_in_transport = B_FALSE;
 898         idt->idt_rx_from_ini_done++;
 899         gethrestime(&idb->idb_xfer_done);
 900 
 901         /*
 902          * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
 903          * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
 904          * to 0.
 905          */
 906         idm_task_rele(idt);
 907         idb->idb_status = status;
 908 
 909         if (status == IDM_STATUS_SUCCESS) {
 910                 /*
 911                  * Buffer should not contain the pattern.  If it does then
 912                  * we did not get the data from the remote host.
 913                  */
 914                 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
 915         }
 916 
 917         switch (idt->idt_state) {
 918         case TASK_ACTIVE:
 919                 idt->idt_ic->ic_timestamp = ddi_get_lbolt();
 920                 idm_buf_unbind_out_locked(idt, idb);
 921                 mutex_exit(&idt->idt_mutex);
 922                 (*idb->idb_buf_cb)(idb, status);
 923                 return;
 924         case TASK_SUSPENDING:
 925         case TASK_SUSPENDED:
 926         case TASK_ABORTING:
 927         case TASK_ABORTED:
 928                 /*
 929                  * To keep things simple we will ignore the case where the
 930                  * transfer was successful and leave all buffers bound to the
 931                  * task.  This allows us to also ignore the case where we've
 932                  * been asked to abort a task but the last transfer of the
 933                  * task has completed.  IDM has no idea whether this was, in
 934                  * fact, the last transfer of the task so it would be difficult
 935                  * to handle this case.  Everything should get sorted out again
 936                  * after task reassignment is complete.
 937                  *
 938                  * In the case of TASK_ABORTING we could conceivably call the
 939                  * buffer callback here but the timing of when the client's
 940                  * client_task_aborted callback is invoked vs. when the client's
 941                  * buffer callback gets invoked gets sticky.  We don't want
 942                  * the client to here from us again after the call to
 943                  * client_task_aborted() but we don't want to give it a bunch
 944                  * of failed buffer transfers until we've called
 945                  * client_task_aborted().  Instead we'll just leave all the
 946                  * buffers bound and allow the client to cleanup.
 947                  */
 948                 break;
 949         default:
 950                 ASSERT(0);
 951         }
 952         mutex_exit(&idt->idt_mutex);
 953 }
 954 
 955 /*
 956  * idm_buf_alloc
 957  *
 958  * Allocates a buffer handle and registers it for use with the transport
 959  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
 960  * as well as the handle.
 961  *
 962  * ic           - connection on which the buffer will be transferred
 963  * bufptr       - allocate memory for buffer if NULL, else assign to buffer
 964  * buflen       - length of buffer
 965  *
 966  * Returns idm_buf_t handle if successful, otherwise NULL
 967  */
 968 idm_buf_t *
 969 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
 970 {
 971         idm_buf_t       *buf = NULL;
 972         int             rc;
 973 
 974         ASSERT(ic != NULL);
 975         ASSERT(idm.idm_buf_cache != NULL);
 976         ASSERT(buflen > 0);
 977 
 978         /* Don't allocate new buffers if we are not in FFP */
 979         mutex_enter(&ic->ic_state_mutex);
 980         if (!ic->ic_ffp) {
 981                 mutex_exit(&ic->ic_state_mutex);
 982                 return (NULL);
 983         }
 984 
 985 
 986         idm_conn_hold(ic);
 987         mutex_exit(&ic->ic_state_mutex);
 988 
 989         buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
 990         if (buf == NULL) {
 991                 idm_conn_rele(ic);
 992                 return (NULL);
 993         }
 994 
 995         buf->idb_ic          = ic;
 996         buf->idb_buflen              = buflen;
 997         buf->idb_exp_offset  = 0;
 998         buf->idb_bufoffset   = 0;
 999         buf->idb_xfer_len    = 0;
1000         buf->idb_magic               = IDM_BUF_MAGIC;
1001         buf->idb_in_transport        = B_FALSE;
1002         buf->idb_bufbcopy    = B_FALSE;
1003 
1004         /*
1005          * If bufptr is NULL, we have an implicit request to allocate
1006          * memory for this IDM buffer handle and register it for use
1007          * with the transport. To simplify this, and to give more freedom
1008          * to the transport layer for it's own buffer management, both of
1009          * these actions will take place in the transport layer.
1010          * If bufptr is set, then the caller has allocated memory (or more
1011          * likely it's been passed from an upper layer), and we need only
1012          * register the buffer for use with the transport layer.
1013          */
1014         if (bufptr == NULL) {
1015                 /*
1016                  * Allocate a buffer from the transport layer (which
1017                  * will also register the buffer for use).
1018                  */
1019                 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1020                 if (rc != 0) {
1021                         idm_conn_rele(ic);
1022                         kmem_cache_free(idm.idm_buf_cache, buf);
1023                         return (NULL);
1024                 }
1025                 /* Set the bufalloc'd flag */
1026                 buf->idb_bufalloc = B_TRUE;
1027         } else {
1028                 /*
1029                  * For large transfers, Set the passed bufptr into
1030                  * the buf handle, and register the handle with the
1031                  * transport layer. As memory registration with the
1032                  * transport layer is a time/cpu intensive operation,
1033                  * for small transfers (up to a pre-defined bcopy
1034                  * threshold), use pre-registered memory buffers
1035                  * and bcopy data at the appropriate time.
1036                  */
1037                 buf->idb_buf = bufptr;
1038 
1039                 rc = ic->ic_transport_ops->it_buf_setup(buf);
1040                 if (rc != 0) {
1041                         idm_conn_rele(ic);
1042                         kmem_cache_free(idm.idm_buf_cache, buf);
1043                         return (NULL);
1044                 }
1045                 /*
1046                  * The transport layer is now expected to set the idb_bufalloc
1047                  * correctly to indicate if resources have been allocated.
1048                  */
1049         }
1050 
1051         IDM_BUFPAT_SET(buf);
1052 
1053         return (buf);
1054 }
1055 
1056 /*
1057  * idm_buf_free
1058  *
1059  * Release a buffer handle along with the associated buffer that was allocated
1060  * or assigned with idm_buf_alloc
1061  */
1062 void
1063 idm_buf_free(idm_buf_t *buf)
1064 {
1065         idm_conn_t *ic = buf->idb_ic;
1066 
1067 
1068         buf->idb_task_binding        = NULL;
1069 
1070         if (buf->idb_bufalloc) {
1071                 ic->ic_transport_ops->it_buf_free(buf);
1072         } else {
1073                 ic->ic_transport_ops->it_buf_teardown(buf);
1074         }
1075         kmem_cache_free(idm.idm_buf_cache, buf);
1076         idm_conn_rele(ic);
1077 }
1078 
1079 /*
1080  * idm_buf_bind_in
1081  *
1082  * This function associates a buffer with a task. This is only for use by the
1083  * iSCSI initiator that will have only one buffer per transfer direction
1084  *
1085  */
1086 void
1087 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1088 {
1089         mutex_enter(&idt->idt_mutex);
1090         idm_buf_bind_in_locked(idt, buf);
1091         mutex_exit(&idt->idt_mutex);
1092 }
1093 
1094 static void
1095 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1096 {
1097         buf->idb_task_binding = idt;
1098         buf->idb_ic = idt->idt_ic;
1099         idm_listbuf_insert(&idt->idt_inbufv, buf);
1100 }
1101 
1102 void
1103 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1104 {
1105         /*
1106          * For small transfers, the iSER transport delegates the IDM
1107          * layer to bcopy the SCSI Write data for faster IOPS.
1108          */
1109         if (buf->idb_bufbcopy == B_TRUE) {
1110 
1111                 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1112         }
1113         mutex_enter(&idt->idt_mutex);
1114         idm_buf_bind_out_locked(idt, buf);
1115         mutex_exit(&idt->idt_mutex);
1116 }
1117 
1118 static void
1119 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1120 {
1121         buf->idb_task_binding = idt;
1122         buf->idb_ic = idt->idt_ic;
1123         idm_listbuf_insert(&idt->idt_outbufv, buf);
1124 }
1125 
1126 void
1127 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1128 {
1129         /*
1130          * For small transfers, the iSER transport delegates the IDM
1131          * layer to bcopy the SCSI Read data into the read buufer
1132          * for faster IOPS.
1133          */
1134         if (buf->idb_bufbcopy == B_TRUE) {
1135                 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1136         }
1137         mutex_enter(&idt->idt_mutex);
1138         idm_buf_unbind_in_locked(idt, buf);
1139         mutex_exit(&idt->idt_mutex);
1140 }
1141 
1142 static void
1143 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1144 {
1145         list_remove(&idt->idt_inbufv, buf);
1146 }
1147 
1148 void
1149 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1150 {
1151         mutex_enter(&idt->idt_mutex);
1152         idm_buf_unbind_out_locked(idt, buf);
1153         mutex_exit(&idt->idt_mutex);
1154 }
1155 
1156 static void
1157 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1158 {
1159         list_remove(&idt->idt_outbufv, buf);
1160 }
1161 
1162 /*
1163  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1164  * iSCSI PDU
1165  */
1166 idm_buf_t *
1167 idm_buf_find(void *lbuf, size_t data_offset)
1168 {
1169         idm_buf_t       *idb;
1170         list_t          *lst = (list_t *)lbuf;
1171 
1172         /* iterate through the list to find the buffer */
1173         for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1174 
1175                 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1176                     (idb->idb_bufoffset == 0));
1177 
1178                 if ((data_offset >= idb->idb_bufoffset) &&
1179                     (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1180 
1181                         return (idb);
1182                 }
1183         }
1184 
1185         return (NULL);
1186 }
1187 
1188 void
1189 idm_bufpat_set(idm_buf_t *idb)
1190 {
1191         idm_bufpat_t    *bufpat;
1192         int             len, i;
1193 
1194         len = idb->idb_buflen;
1195         len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1196 
1197         bufpat = idb->idb_buf;
1198         for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1199                 bufpat->bufpat_idb = idb;
1200                 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1201                 bufpat->bufpat_offset = i;
1202                 bufpat++;
1203         }
1204 }
1205 
1206 boolean_t
1207 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1208 {
1209         idm_bufpat_t    *bufpat;
1210         int             len, i;
1211 
1212         len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1213         len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1214         ASSERT(len <= idb->idb_buflen);
1215         bufpat = idb->idb_buf;
1216 
1217         /*
1218          * Don't check the pattern in buffers that came from outside IDM
1219          * (these will be buffers from the initiator that we opted not
1220          * to double-buffer)
1221          */
1222         if (!idb->idb_bufalloc)
1223                 return (B_FALSE);
1224 
1225         /*
1226          * Return true if we find the pattern anywhere in the buffer
1227          */
1228         for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1229                 if (BUFPAT_MATCH(bufpat, idb)) {
1230                         IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1231                             "idb %p bufpat %p "
1232                             "bufpat_idb=%p bufmagic=%08x offset=%08x",
1233                             (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1234                             bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1235                         DTRACE_PROBE2(bufpat__pattern__found,
1236                             idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1237                         if (type == BP_CHECK_ASSERT) {
1238                                 ASSERT(0);
1239                         }
1240                         return (B_TRUE);
1241                 }
1242                 bufpat++;
1243         }
1244 
1245         return (B_FALSE);
1246 }
1247 
1248 /*
1249  * idm_task_alloc
1250  *
1251  * This function will allocate a idm_task_t structure. A task tag is also
1252  * generated and saved in idt_tt. The task is not active.
1253  */
1254 idm_task_t *
1255 idm_task_alloc(idm_conn_t *ic)
1256 {
1257         idm_task_t      *idt;
1258 
1259         ASSERT(ic != NULL);
1260 
1261         /* Don't allocate new tasks if we are not in FFP */
1262         if (!ic->ic_ffp) {
1263                 return (NULL);
1264         }
1265         idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1266         if (idt == NULL) {
1267                 return (NULL);
1268         }
1269 
1270         ASSERT(list_is_empty(&idt->idt_inbufv));
1271         ASSERT(list_is_empty(&idt->idt_outbufv));
1272 
1273         mutex_enter(&ic->ic_state_mutex);
1274         if (!ic->ic_ffp) {
1275                 mutex_exit(&ic->ic_state_mutex);
1276                 kmem_cache_free(idm.idm_task_cache, idt);
1277                 return (NULL);
1278         }
1279         idm_conn_hold(ic);
1280         mutex_exit(&ic->ic_state_mutex);
1281 
1282         idt->idt_state               = TASK_IDLE;
1283         idt->idt_ic          = ic;
1284         idt->idt_private     = NULL;
1285         idt->idt_exp_datasn  = 0;
1286         idt->idt_exp_rttsn   = 0;
1287         idt->idt_flags               = 0;
1288         return (idt);
1289 }
1290 
1291 /*
1292  * idm_task_start
1293  *
1294  * Mark the task active and initialize some stats. The caller
1295  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1296  * The task service does not function as a task/work engine, it is the
1297  * responsibility of the initiator to start the data transfer and free the
1298  * resources.
1299  */
1300 void
1301 idm_task_start(idm_task_t *idt, uintptr_t handle)
1302 {
1303         ASSERT(idt != NULL);
1304 
1305         /* mark the task as ACTIVE */
1306         idt->idt_state = TASK_ACTIVE;
1307         idt->idt_client_handle = handle;
1308         idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1309             idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1310             idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1311 }
1312 
1313 /*
1314  * idm_task_done
1315  *
1316  * This function sets the state to indicate that the task is no longer active.
1317  */
1318 void
1319 idm_task_done(idm_task_t *idt)
1320 {
1321         ASSERT(idt != NULL);
1322 
1323         mutex_enter(&idt->idt_mutex);
1324         idt->idt_state = TASK_IDLE;
1325         mutex_exit(&idt->idt_mutex);
1326 
1327         /*
1328          * Although unlikely it is possible for a reference to come in after
1329          * the client has decided the task is over but before we've marked
1330          * the task idle.  One specific unavoidable scenario is the case where
1331          * received PDU with the matching ITT/TTT results in a successful
1332          * lookup of this task.  We are at the mercy of the remote node in
1333          * that case so we need to handle it.  Now that the task state
1334          * has changed no more references will occur so a simple call to
1335          * idm_refcnt_wait_ref should deal with the situation.
1336          */
1337         idm_refcnt_wait_ref(&idt->idt_refcnt);
1338         idm_refcnt_reset(&idt->idt_refcnt);
1339 }
1340 
1341 /*
1342  * idm_task_free
1343  *
1344  * This function will free the Task Tag and the memory allocated for the task
1345  * idm_task_done should be called prior to this call
1346  */
1347 void
1348 idm_task_free(idm_task_t *idt)
1349 {
1350         idm_conn_t *ic;
1351 
1352         ASSERT(idt != NULL);
1353         ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1354         ASSERT(idt->idt_state == TASK_IDLE);
1355 
1356         ic = idt->idt_ic;
1357 
1358         /*
1359          * It's possible for items to still be in the idt_inbufv list if
1360          * they were added after idm_free_task_rsrc was called.  We rely on
1361          * STMF to free all buffers associated with the task however STMF
1362          * doesn't know that we have this reference to the buffers.
1363          * Use list_create so that we don't end up with stale references
1364          * to these buffers.
1365          */
1366         list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1367             offsetof(idm_buf_t, idb_buflink));
1368         list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1369             offsetof(idm_buf_t, idb_buflink));
1370 
1371         kmem_cache_free(idm.idm_task_cache, idt);
1372 
1373         idm_conn_rele(ic);
1374 }
1375 
1376 /*
1377  * idm_task_find_common
1378  *      common code for idm_task_find() and idm_task_find_and_complete()
1379  */
1380 /*ARGSUSED*/
1381 static idm_task_t *
1382 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1383     boolean_t complete)
1384 {
1385         uint32_t        tt, client_handle;
1386         idm_task_t      *idt;
1387 
1388         /*
1389          * Must match both itt and ttt.  The table is indexed by itt
1390          * for initiator connections and ttt for target connections.
1391          */
1392         if (IDM_CONN_ISTGT(ic)) {
1393                 tt = ttt;
1394                 client_handle = itt;
1395         } else {
1396                 tt = itt;
1397                 client_handle = ttt;
1398         }
1399 
1400         rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1401         if (tt >= idm.idm_taskid_max) {
1402                 rw_exit(&idm.idm_taskid_table_lock);
1403                 return (NULL);
1404         }
1405 
1406         idt = idm.idm_taskid_table[tt];
1407 
1408         if (idt != NULL) {
1409                 mutex_enter(&idt->idt_mutex);
1410                 if ((idt->idt_state != TASK_ACTIVE) ||
1411                     (idt->idt_ic != ic) ||
1412                     (IDM_CONN_ISTGT(ic) &&
1413                     (idt->idt_client_handle != client_handle))) {
1414                         /*
1415                          * Task doesn't match or task is aborting and
1416                          * we don't want any more references.
1417                          */
1418                         if ((idt->idt_ic != ic) &&
1419                             (idt->idt_state == TASK_ACTIVE) &&
1420                             (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1421                             client_handle)) {
1422                                 IDM_CONN_LOG(CE_WARN,
1423                                 "idm_task_find: wrong connection %p != %p",
1424                                     (void *)ic, (void *)idt->idt_ic);
1425                         }
1426                         mutex_exit(&idt->idt_mutex);
1427                         rw_exit(&idm.idm_taskid_table_lock);
1428                         return (NULL);
1429                 }
1430                 idm_task_hold(idt);
1431                 /*
1432                  * Set the task state to TASK_COMPLETE so it can no longer
1433                  * be found or aborted.
1434                  */
1435                 if (B_TRUE == complete)
1436                         idt->idt_state = TASK_COMPLETE;
1437                 mutex_exit(&idt->idt_mutex);
1438         }
1439         rw_exit(&idm.idm_taskid_table_lock);
1440 
1441         return (idt);
1442 }
1443 
1444 /*
1445  * This function looks up a task by task tag.
1446  */
1447 idm_task_t *
1448 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1449 {
1450         return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1451 }
1452 
1453 /*
1454  * This function looks up a task by task tag. If found, the task state
1455  * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1456  */
1457 idm_task_t *
1458 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1459 {
1460         return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1461 }
1462 
1463 /*
1464  * idm_task_find_by_handle
1465  *
1466  * This function looks up a task by the client-private idt_client_handle.
1467  *
1468  * This function should NEVER be called in the performance path.  It is
1469  * intended strictly for error recovery/task management.
1470  */
1471 /*ARGSUSED*/
1472 void *
1473 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1474 {
1475         idm_task_t      *idt = NULL;
1476         int             idx = 0;
1477 
1478         rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1479 
1480         for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1481                 idt = idm.idm_taskid_table[idx];
1482 
1483                 if (idt == NULL)
1484                         continue;
1485 
1486                 mutex_enter(&idt->idt_mutex);
1487 
1488                 if (idt->idt_state != TASK_ACTIVE) {
1489                         /*
1490                          * Task is either in suspend, abort, or already
1491                          * complete.
1492                          */
1493                         mutex_exit(&idt->idt_mutex);
1494                         continue;
1495                 }
1496 
1497                 if (idt->idt_client_handle == handle) {
1498                         idm_task_hold(idt);
1499                         mutex_exit(&idt->idt_mutex);
1500                         break;
1501                 }
1502 
1503                 mutex_exit(&idt->idt_mutex);
1504         }
1505 
1506         rw_exit(&idm.idm_taskid_table_lock);
1507 
1508         if ((idt == NULL) || (idx == idm.idm_taskid_max))
1509                 return (NULL);
1510 
1511         return (idt->idt_private);
1512 }
1513 
1514 void
1515 idm_task_hold(idm_task_t *idt)
1516 {
1517         idm_refcnt_hold(&idt->idt_refcnt);
1518 }
1519 
1520 void
1521 idm_task_rele(idm_task_t *idt)
1522 {
1523         idm_refcnt_rele(&idt->idt_refcnt);
1524 }
1525 
1526 stmf_status_t
1527 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1528 {
1529         idm_task_t      *task;
1530         int             idx;
1531         stmf_status_t   s = STMF_SUCCESS;
1532 
1533         /*
1534          * Passing NULL as the task indicates that all tasks
1535          * for this connection should be aborted.
1536          */
1537         if (idt == NULL) {
1538                 /*
1539                  * Only the connection state machine should ask for
1540                  * all tasks to abort and this should never happen in FFP.
1541                  */
1542                 ASSERT(!ic->ic_ffp);
1543                 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1544                 for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1545                         task = idm.idm_taskid_table[idx];
1546                         if (task == NULL)
1547                                 continue;
1548                         mutex_enter(&task->idt_mutex);
1549                         if ((task->idt_state != TASK_IDLE) &&
1550                             (task->idt_state != TASK_COMPLETE) &&
1551                             (task->idt_ic == ic)) {
1552                                 rw_exit(&idm.idm_taskid_table_lock);
1553                                 s = idm_task_abort_one(ic, task, abort_type);
1554                                 rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1555                         } else
1556                                 mutex_exit(&task->idt_mutex);
1557                 }
1558                 rw_exit(&idm.idm_taskid_table_lock);
1559         } else {
1560                 mutex_enter(&idt->idt_mutex);
1561                 s = idm_task_abort_one(ic, idt, abort_type);
1562         }
1563         return (s);
1564 }
1565 
1566 static void
1567 idm_task_abort_unref_cb(void *ref)
1568 {
1569         idm_task_t *idt = ref;
1570 
1571         mutex_enter(&idt->idt_mutex);
1572         switch (idt->idt_state) {
1573         case TASK_SUSPENDING:
1574                 idt->idt_state = TASK_SUSPENDED;
1575                 mutex_exit(&idt->idt_mutex);
1576                 idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1577                 return;
1578         case TASK_ABORTING:
1579                 idt->idt_state = TASK_ABORTED;
1580                 mutex_exit(&idt->idt_mutex);
1581                 idm_task_aborted(idt, IDM_STATUS_ABORTED);
1582                 return;
1583         default:
1584                 mutex_exit(&idt->idt_mutex);
1585                 ASSERT(0);
1586                 break;
1587         }
1588 }
1589 
1590 /*
1591  * Abort the idm task.
1592  *    Caller must hold the task mutex, which will be released before return
1593  */
1594 static stmf_status_t
1595 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1596 {
1597         stmf_status_t   s = STMF_SUCCESS;
1598 
1599         /* Caller must hold connection mutex */
1600         ASSERT(mutex_owned(&idt->idt_mutex));
1601         switch (idt->idt_state) {
1602         case TASK_ACTIVE:
1603                 switch (abort_type) {
1604                 case AT_INTERNAL_SUSPEND:
1605                         /* Call transport to release any resources */
1606                         idt->idt_state = TASK_SUSPENDING;
1607                         mutex_exit(&idt->idt_mutex);
1608                         ic->ic_transport_ops->it_free_task_rsrc(idt);
1609 
1610                         /*
1611                          * Wait for outstanding references.  When all
1612                          * references are released the callback will call
1613                          * idm_task_aborted().
1614                          */
1615                         idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1616                             &idm_task_abort_unref_cb);
1617                         return (s);
1618                 case AT_INTERNAL_ABORT:
1619                 case AT_TASK_MGMT_ABORT:
1620                         idt->idt_state = TASK_ABORTING;
1621                         mutex_exit(&idt->idt_mutex);
1622                         ic->ic_transport_ops->it_free_task_rsrc(idt);
1623 
1624                         /*
1625                          * Wait for outstanding references.  When all
1626                          * references are released the callback will call
1627                          * idm_task_aborted().
1628                          */
1629                         idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1630                             &idm_task_abort_unref_cb);
1631                         return (s);
1632                 default:
1633                         ASSERT(0);
1634                 }
1635                 break;
1636         case TASK_SUSPENDING:
1637                 /* Already called transport_free_task_rsrc(); */
1638                 switch (abort_type) {
1639                 case AT_INTERNAL_SUSPEND:
1640                         /* Already doing it */
1641                         break;
1642                 case AT_INTERNAL_ABORT:
1643                 case AT_TASK_MGMT_ABORT:
1644                         idt->idt_state = TASK_ABORTING;
1645                         break;
1646                 default:
1647                         ASSERT(0);
1648                 }
1649                 break;
1650         case TASK_SUSPENDED:
1651                 /* Already called transport_free_task_rsrc(); */
1652                 switch (abort_type) {
1653                 case AT_INTERNAL_SUSPEND:
1654                         /* Already doing it */
1655                         break;
1656                 case AT_INTERNAL_ABORT:
1657                 case AT_TASK_MGMT_ABORT:
1658                         idt->idt_state = TASK_ABORTING;
1659                         mutex_exit(&idt->idt_mutex);
1660 
1661                         /*
1662                          * We could probably call idm_task_aborted directly
1663                          * here but we may be holding the conn lock. It's
1664                          * easier to just switch contexts.  Even though
1665                          * we shouldn't really have any references we'll
1666                          * set the state to TASK_ABORTING instead of
1667                          * TASK_ABORTED so we can use the same code path.
1668                          */
1669                         idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1670                             &idm_task_abort_unref_cb);
1671                         return (s);
1672                 default:
1673                         ASSERT(0);
1674                 }
1675                 break;
1676         case TASK_ABORTING:
1677         case TASK_ABORTED:
1678                 switch (abort_type) {
1679                 case AT_INTERNAL_SUSPEND:
1680                         /* We're already past this point... */
1681                 case AT_INTERNAL_ABORT:
1682                 case AT_TASK_MGMT_ABORT:
1683                         /* Already doing it */
1684                         break;
1685                 default:
1686                         ASSERT(0);
1687                 }
1688                 break;
1689         case TASK_COMPLETE:
1690                 idm_refcnt_wait_ref(&idt->idt_refcnt);
1691                 s = STMF_ABORT_SUCCESS;
1692                 break;
1693         default:
1694                 ASSERT(0);
1695         }
1696         mutex_exit(&idt->idt_mutex);
1697 
1698         return (s);
1699 }
1700 
1701 static void
1702 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1703 {
1704         (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1705 }
1706 
1707 /*
1708  * idm_pdu_tx
1709  *
1710  * This is IDM's implementation of the 'Send_Control' operational primitive.
1711  * This function is invoked by an initiator iSCSI layer requesting the transfer
1712  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1713  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1714  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1715  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1716  * are provided as input.
1717  *
1718  */
1719 void
1720 idm_pdu_tx(idm_pdu_t *pdu)
1721 {
1722         idm_conn_t              *ic = pdu->isp_ic;
1723         iscsi_async_evt_hdr_t   *async_evt;
1724 
1725         /*
1726          * If we are in full-featured mode then route SCSI-related
1727          * commands to the appropriate function vector without checking
1728          * the connection state.  We will only be in full-feature mode
1729          * when we are in an acceptable state for SCSI PDU's.
1730          *
1731          * We also need to ensure that there are no PDU events outstanding
1732          * on the state machine.  Any non-SCSI PDU's received in full-feature
1733          * mode will result in PDU events and until these have been handled
1734          * we need to route all PDU's through the state machine as PDU
1735          * events to maintain ordering.
1736          *
1737          * Note that IDM cannot enter FFP mode until it processes in
1738          * its state machine the last xmit of the login process.
1739          * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1740          * superfluous.
1741          */
1742         mutex_enter(&ic->ic_state_mutex);
1743         if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1744                 mutex_exit(&ic->ic_state_mutex);
1745                 switch (IDM_PDU_OPCODE(pdu)) {
1746                 case ISCSI_OP_SCSI_RSP:
1747                         /* Target only */
1748                         DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1749                             iscsi_scsi_rsp_hdr_t *,
1750                             (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1751                         idm_pdu_tx_forward(ic, pdu);
1752                         return;
1753                 case ISCSI_OP_SCSI_TASK_MGT_RSP:
1754                         /* Target only */
1755                         DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1756                             iscsi_text_rsp_hdr_t *,
1757                             (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1758                         idm_pdu_tx_forward(ic, pdu);
1759                         return;
1760                 case ISCSI_OP_SCSI_DATA_RSP:
1761                         /* Target only */
1762                         DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1763                             iscsi_data_rsp_hdr_t *,
1764                             (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1765                         idm_pdu_tx_forward(ic, pdu);
1766                         return;
1767                 case ISCSI_OP_RTT_RSP:
1768                         /* Target only */
1769                         DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1770                             iscsi_rtt_hdr_t *,
1771                             (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1772                         idm_pdu_tx_forward(ic, pdu);
1773                         return;
1774                 case ISCSI_OP_NOOP_IN:
1775                         /* Target only */
1776                         DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1777                             iscsi_nop_in_hdr_t *,
1778                             (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1779                         idm_pdu_tx_forward(ic, pdu);
1780                         return;
1781                 case ISCSI_OP_TEXT_RSP:
1782                         /* Target only */
1783                         DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1784                             iscsi_text_rsp_hdr_t *,
1785                             (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1786                         idm_pdu_tx_forward(ic, pdu);
1787                         return;
1788                 case ISCSI_OP_TEXT_CMD:
1789                 case ISCSI_OP_NOOP_OUT:
1790                 case ISCSI_OP_SCSI_CMD:
1791                 case ISCSI_OP_SCSI_DATA:
1792                 case ISCSI_OP_SCSI_TASK_MGT_MSG:
1793                         /* Initiator only */
1794                         idm_pdu_tx_forward(ic, pdu);
1795                         return;
1796                 default:
1797                         break;
1798                 }
1799 
1800                 mutex_enter(&ic->ic_state_mutex);
1801         }
1802 
1803         /*
1804          * Any PDU's processed outside of full-feature mode and non-SCSI
1805          * PDU's in full-feature mode are handled by generating an
1806          * event to the connection state machine.  The state machine
1807          * will validate the PDU against the current state and either
1808          * transmit the PDU if the opcode is allowed or handle an
1809          * error if the PDU is not allowed.
1810          *
1811          * This code-path will also generate any events that are implied
1812          * by the PDU opcode.  For example a "login response" with success
1813          * status generates a CE_LOGOUT_SUCCESS_SND event.
1814          */
1815         switch (IDM_PDU_OPCODE(pdu)) {
1816         case ISCSI_OP_LOGIN_CMD:
1817                 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1818                 break;
1819         case ISCSI_OP_LOGIN_RSP:
1820                 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic,
1821                     iscsi_login_rsp_hdr_t *,
1822                     (iscsi_login_rsp_hdr_t *)pdu->isp_hdr);
1823                 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1824                 break;
1825         case ISCSI_OP_LOGOUT_CMD:
1826                 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1827                 break;
1828         case ISCSI_OP_LOGOUT_RSP:
1829                 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic,
1830                     iscsi_logout_rsp_hdr_t *,
1831                     (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr);
1832                 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1833                 break;
1834         case ISCSI_OP_ASYNC_EVENT:
1835                 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic,
1836                     iscsi_async_evt_hdr_t *,
1837                     (iscsi_async_evt_hdr_t *)pdu->isp_hdr);
1838                 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1839                 switch (async_evt->async_event) {
1840                 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1841                         idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1842                             (uintptr_t)pdu);
1843                         break;
1844                 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1845                         idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1846                             (uintptr_t)pdu);
1847                         break;
1848                 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1849                         idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1850                             (uintptr_t)pdu);
1851                         break;
1852                 case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1853                 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1854                 default:
1855                         idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1856                             (uintptr_t)pdu);
1857                         break;
1858                 }
1859                 break;
1860         case ISCSI_OP_SCSI_RSP:
1861                 /* Target only */
1862                 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic,
1863                     iscsi_scsi_rsp_hdr_t *,
1864                     (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr);
1865                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1866                 break;
1867         case ISCSI_OP_SCSI_TASK_MGT_RSP:
1868                 /* Target only */
1869                 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic,
1870                     iscsi_scsi_task_mgt_rsp_hdr_t *,
1871                     (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr);
1872                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1873                 break;
1874         case ISCSI_OP_SCSI_DATA_RSP:
1875                 /* Target only */
1876                 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic,
1877                     iscsi_data_rsp_hdr_t *,
1878                     (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
1879                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1880                 break;
1881         case ISCSI_OP_RTT_RSP:
1882                 /* Target only */
1883                 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic,
1884                     iscsi_rtt_hdr_t *,
1885                     (iscsi_rtt_hdr_t *)pdu->isp_hdr);
1886                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1887                 break;
1888         case ISCSI_OP_NOOP_IN:
1889                 /* Target only */
1890                 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic,
1891                     iscsi_nop_in_hdr_t *,
1892                     (iscsi_nop_in_hdr_t *)pdu->isp_hdr);
1893                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1894                 break;
1895         case ISCSI_OP_TEXT_RSP:
1896                 /* Target only */
1897                 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic,
1898                     iscsi_text_rsp_hdr_t *,
1899                     (iscsi_text_rsp_hdr_t *)pdu->isp_hdr);
1900                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1901                 break;
1902                 /* Initiator only */
1903         case ISCSI_OP_SCSI_CMD:
1904         case ISCSI_OP_SCSI_TASK_MGT_MSG:
1905         case ISCSI_OP_SCSI_DATA:
1906         case ISCSI_OP_NOOP_OUT:
1907         case ISCSI_OP_TEXT_CMD:
1908         case ISCSI_OP_SNACK_CMD:
1909         case ISCSI_OP_REJECT_MSG:
1910         default:
1911                 /*
1912                  * Connection state machine will validate these PDU's against
1913                  * the current state.  A PDU not allowed in the current
1914                  * state will cause a protocol error.
1915                  */
1916                 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1917                 break;
1918         }
1919         mutex_exit(&ic->ic_state_mutex);
1920 }
1921 
1922 /*
1923  * Common allocation of a PDU along with memory for header and data.
1924  */
1925 static idm_pdu_t *
1926 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1927 {
1928         idm_pdu_t *result;
1929 
1930         /*
1931          * IDM clients should cache these structures for performance
1932          * critical paths.  We can't cache effectively in IDM because we
1933          * don't know the correct header and data size.
1934          *
1935          * Valid header length is assumed to be hdrlen and valid data
1936          * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1937          * can be adjusted after the PDU is returned if necessary.
1938          */
1939         result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1940         if (result != NULL) {
1941                 /* For idm_pdu_free sanity check */
1942                 result->isp_flags |= IDM_PDU_ALLOC;
1943                 /* pointer arithmetic */
1944                 result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1945                 result->isp_hdrlen = hdrlen;
1946                 result->isp_hdrbuflen = hdrlen;
1947                 result->isp_transport_hdrlen = 0;
1948                 if (datalen != 0)
1949                         result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1950                 result->isp_datalen = datalen;
1951                 result->isp_databuflen = datalen;
1952                 result->isp_magic = IDM_PDU_MAGIC;
1953         }
1954 
1955         return (result);
1956 }
1957 
1958 /*
1959  * Typical idm_pdu_alloc invocation, will block for resources.
1960  */
1961 idm_pdu_t *
1962 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1963 {
1964         return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1965 }
1966 
1967 /*
1968  * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1969  * are not available.  Needed for transport-layer allocations which may
1970  * be invoking in interrupt context.
1971  */
1972 idm_pdu_t *
1973 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1974 {
1975         return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1976 }
1977 
1978 /*
1979  * Free a PDU previously allocated with idm_pdu_alloc() including any
1980  * header and data space allocated as part of the original request.
1981  * Additional memory regions referenced by subsequent modification of
1982  * the isp_hdr and/or isp_data fields will not be freed.
1983  */
1984 void
1985 idm_pdu_free(idm_pdu_t *pdu)
1986 {
1987         /* Make sure the structure was allocated using idm_pdu_alloc() */
1988         ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1989         kmem_free(pdu,
1990             sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1991 }
1992 
1993 /*
1994  * Initialize the connection, private and callback fields in a PDU.
1995  */
1996 void
1997 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1998 {
1999         /*
2000          * idm_pdu_complete() will call idm_pdu_free if the callback is
2001          * NULL.  This will only work if the PDU was originally allocated
2002          * with idm_pdu_alloc().
2003          */
2004         ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
2005             (cb != NULL));
2006         pdu->isp_magic = IDM_PDU_MAGIC;
2007         pdu->isp_ic = ic;
2008         pdu->isp_private = private;
2009         pdu->isp_callback = cb;
2010 }
2011 
2012 /*
2013  * Initialize the header and header length field.  This function should
2014  * not be used to adjust the header length in a buffer allocated via
2015  * pdu_pdu_alloc since it overwrites the existing header pointer.
2016  */
2017 void
2018 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2019 {
2020         pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2021         pdu->isp_hdrlen = hdrlen;
2022 }
2023 
2024 /*
2025  * Initialize the data and data length fields.  This function should
2026  * not be used to adjust the data length of a buffer allocated via
2027  * idm_pdu_alloc since it overwrites the existing data pointer.
2028  */
2029 void
2030 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2031 {
2032         pdu->isp_data = data;
2033         pdu->isp_datalen = datalen;
2034 }
2035 
2036 void
2037 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2038 {
2039         if (pdu->isp_callback) {
2040                 pdu->isp_status = status;
2041                 (*pdu->isp_callback)(pdu, status);
2042         } else {
2043                 idm_pdu_free(pdu);
2044         }
2045 }
2046 
2047 /*
2048  * State machine auditing
2049  */
2050 
2051 void
2052 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2053 {
2054         bzero(audit_buf, sizeof (sm_audit_buf_t));
2055         audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2056 }
2057 
2058 static
2059 sm_audit_record_t *
2060 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2061     sm_audit_sm_type_t sm_type,
2062     int current_state)
2063 {
2064         sm_audit_record_t *sar;
2065 
2066         sar = audit_buf->sab_records;
2067         sar += audit_buf->sab_index;
2068         audit_buf->sab_index++;
2069         audit_buf->sab_index &= audit_buf->sab_max_index;
2070 
2071         sar->sar_type = r_type;
2072         gethrestime(&sar->sar_timestamp);
2073         sar->sar_sm_type = sm_type;
2074         sar->sar_state = current_state;
2075 
2076         return (sar);
2077 }
2078 
2079 void
2080 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2081     sm_audit_sm_type_t sm_type, int current_state,
2082     int event, uintptr_t event_info)
2083 {
2084         sm_audit_record_t *sar;
2085 
2086         sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2087             sm_type, current_state);
2088         sar->sar_event = event;
2089         sar->sar_event_info = event_info;
2090 }
2091 
2092 void
2093 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2094     sm_audit_sm_type_t sm_type, int current_state, int new_state)
2095 {
2096         sm_audit_record_t *sar;
2097 
2098         sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2099             sm_type, current_state);
2100         sar->sar_new_state = new_state;
2101 }
2102 
2103 
2104 /*
2105  * Object reference tracking
2106  */
2107 
2108 void
2109 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2110 {
2111         bzero(refcnt, sizeof (*refcnt));
2112         idm_refcnt_reset(refcnt);
2113         refcnt->ir_referenced_obj = referenced_obj;
2114         bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2115         refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2116         mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2117         cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2118 }
2119 
2120 void
2121 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2122 {
2123         /*
2124          * Grab the mutex to there are no other lingering threads holding
2125          * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2126          * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2127          */
2128         mutex_enter(&refcnt->ir_mutex);
2129         ASSERT(refcnt->ir_refcnt == 0);
2130         cv_destroy(&refcnt->ir_cv);
2131         mutex_destroy(&refcnt->ir_mutex);
2132 }
2133 
2134 void
2135 idm_refcnt_reset(idm_refcnt_t *refcnt)
2136 {
2137         refcnt->ir_waiting = REF_NOWAIT;
2138         refcnt->ir_refcnt = 0;
2139 }
2140 
2141 void
2142 idm_refcnt_hold(idm_refcnt_t *refcnt)
2143 {
2144         /*
2145          * Nothing should take a hold on an object after a call to
2146          * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2147          */
2148         ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2149 
2150         mutex_enter(&refcnt->ir_mutex);
2151         refcnt->ir_refcnt++;
2152         REFCNT_AUDIT(refcnt);
2153         mutex_exit(&refcnt->ir_mutex);
2154 }
2155 
2156 static void
2157 idm_refcnt_unref_task(void *refcnt_void)
2158 {
2159         idm_refcnt_t *refcnt = refcnt_void;
2160 
2161         mutex_enter(&refcnt->ir_mutex);
2162         REFCNT_AUDIT(refcnt);
2163         mutex_exit(&refcnt->ir_mutex);
2164         (*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2165 }
2166 
2167 void
2168 idm_refcnt_rele(idm_refcnt_t *refcnt)
2169 {
2170         mutex_enter(&refcnt->ir_mutex);
2171         ASSERT(refcnt->ir_refcnt > 0);
2172         refcnt->ir_refcnt--;
2173         REFCNT_AUDIT(refcnt);
2174         if (refcnt->ir_waiting == REF_NOWAIT) {
2175                 /* No one is waiting on this object */
2176                 mutex_exit(&refcnt->ir_mutex);
2177                 return;
2178         }
2179 
2180         /*
2181          * Someone is waiting for this object to go idle so check if
2182          * refcnt is 0.  Waiting on an object then later grabbing another
2183          * reference is not allowed so we don't need to handle that case.
2184          */
2185         if (refcnt->ir_refcnt == 0) {
2186                 if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2187                         if (taskq_dispatch(idm.idm_global_taskq,
2188                             &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2189                                 cmn_err(CE_WARN,
2190                                     "idm_refcnt_rele: Couldn't dispatch task");
2191                         }
2192                 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2193                         cv_signal(&refcnt->ir_cv);
2194                 }
2195         }
2196         mutex_exit(&refcnt->ir_mutex);
2197 }
2198 
2199 void
2200 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2201 {
2202         mutex_enter(&refcnt->ir_mutex);
2203         ASSERT(refcnt->ir_refcnt > 0);
2204         refcnt->ir_refcnt--;
2205         REFCNT_AUDIT(refcnt);
2206 
2207         /*
2208          * Someone is waiting for this object to go idle so check if
2209          * refcnt is 0.  Waiting on an object then later grabbing another
2210          * reference is not allowed so we don't need to handle that case.
2211          */
2212         if (refcnt->ir_refcnt == 0) {
2213                 refcnt->ir_cb = cb_func;
2214                 refcnt->ir_waiting = REF_WAIT_ASYNC;
2215                 if (taskq_dispatch(idm.idm_global_taskq,
2216                     &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2217                         cmn_err(CE_WARN,
2218                             "idm_refcnt_rele: Couldn't dispatch task");
2219                 }
2220         }
2221         mutex_exit(&refcnt->ir_mutex);
2222 }
2223 
2224 void
2225 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2226 {
2227         mutex_enter(&refcnt->ir_mutex);
2228         refcnt->ir_waiting = REF_WAIT_SYNC;
2229         REFCNT_AUDIT(refcnt);
2230         while (refcnt->ir_refcnt != 0)
2231                 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2232         mutex_exit(&refcnt->ir_mutex);
2233 }
2234 
2235 void
2236 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2237 {
2238         mutex_enter(&refcnt->ir_mutex);
2239         refcnt->ir_waiting = REF_WAIT_ASYNC;
2240         refcnt->ir_cb = cb_func;
2241         REFCNT_AUDIT(refcnt);
2242         /*
2243          * It's possible we don't have any references.  To make things easier
2244          * on the caller use a taskq to call the callback instead of
2245          * calling it synchronously
2246          */
2247         if (refcnt->ir_refcnt == 0) {
2248                 if (taskq_dispatch(idm.idm_global_taskq,
2249                     &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2250                         cmn_err(CE_WARN,
2251                             "idm_refcnt_async_wait_ref: "
2252                             "Couldn't dispatch task");
2253                 }
2254         }
2255         mutex_exit(&refcnt->ir_mutex);
2256 }
2257 
2258 void
2259 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2260     idm_refcnt_cb_t *cb_func)
2261 {
2262         mutex_enter(&refcnt->ir_mutex);
2263         if (refcnt->ir_refcnt == 0) {
2264                 mutex_exit(&refcnt->ir_mutex);
2265                 (*cb_func)(refcnt->ir_referenced_obj);
2266                 return;
2267         }
2268         mutex_exit(&refcnt->ir_mutex);
2269 }
2270 
2271 /*
2272  * used to determine the status of the refcnt.
2273  *
2274  * if refcnt is 0 return is 0
2275  * if refcnt is negative return is -1
2276  * if refcnt > 0 and no waiters return is 1
2277  * if refcnt > 0 and waiters return is 2
2278  */
2279 int
2280 idm_refcnt_is_held(idm_refcnt_t *refcnt)
2281 {
2282         if (refcnt->ir_refcnt < 0)
2283                 return (-1);
2284 
2285         if (refcnt->ir_refcnt == 0)
2286                 return (0);
2287 
2288         if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0)
2289                 return (1);
2290 
2291         return (2);
2292 }
2293 
2294 void
2295 idm_conn_hold(idm_conn_t *ic)
2296 {
2297         idm_refcnt_hold(&ic->ic_refcnt);
2298 }
2299 
2300 void
2301 idm_conn_rele(idm_conn_t *ic)
2302 {
2303         idm_refcnt_rele(&ic->ic_refcnt);
2304 }
2305 
2306 void
2307 idm_conn_set_target_name(idm_conn_t *ic, char *target_name)
2308 {
2309         (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1);
2310 }
2311 
2312 void
2313 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name)
2314 {
2315         (void) strlcpy(ic->ic_initiator_name, initiator_name,
2316             ISCSI_MAX_NAME_LEN + 1);
2317 }
2318 
2319 void
2320 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN])
2321 {
2322         (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1,
2323             "%02x%02x%02x%02x%02x%02x",
2324             isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]);
2325 }
2326 
2327 static int
2328 _idm_init(void)
2329 {
2330         /* Initialize the rwlock for the taskid table */
2331         rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2332 
2333         /* Initialize the global mutex and taskq */
2334         mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2335 
2336         cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2337         cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2338 
2339         /*
2340          * The maximum allocation needs to be high here since there can be
2341          * many concurrent tasks using the global taskq.
2342          */
2343         idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2344             128, 16384, TASKQ_PREPOPULATE);
2345         if (idm.idm_global_taskq == NULL) {
2346                 cv_destroy(&idm.idm_wd_cv);
2347                 cv_destroy(&idm.idm_tgt_svc_cv);
2348                 mutex_destroy(&idm.idm_global_mutex);
2349                 rw_destroy(&idm.idm_taskid_table_lock);
2350                 return (ENOMEM);
2351         }
2352 
2353         /* Start watchdog thread */
2354         idm.idm_wd_thread = thread_create(NULL, 0,
2355             idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2356         if (idm.idm_wd_thread == NULL) {
2357                 /* Couldn't create the watchdog thread */
2358                 taskq_destroy(idm.idm_global_taskq);
2359                 cv_destroy(&idm.idm_wd_cv);
2360                 cv_destroy(&idm.idm_tgt_svc_cv);
2361                 mutex_destroy(&idm.idm_global_mutex);
2362                 rw_destroy(&idm.idm_taskid_table_lock);
2363                 return (ENOMEM);
2364         }
2365 
2366         /* Pause until the watchdog thread is running */
2367         mutex_enter(&idm.idm_global_mutex);
2368         while (!idm.idm_wd_thread_running)
2369                 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2370         mutex_exit(&idm.idm_global_mutex);
2371 
2372         /*
2373          * Allocate the task ID table and set "next" to 0.
2374          */
2375 
2376         idm.idm_taskid_max = idm_max_taskids;
2377         idm.idm_taskid_table = (idm_task_t **)
2378             kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2379         idm.idm_taskid_next = 0;
2380 
2381         /* Create the global buffer and task kmem caches */
2382         idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2383             sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2384 
2385         /*
2386          * Note, we're explicitly allocating an additional iSER header-
2387          * sized chunk for each of these elements. See idm_task_constructor().
2388          */
2389         idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2390             sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2391             &idm_task_constructor, &idm_task_destructor,
2392             NULL, NULL, NULL, KM_SLEEP);
2393 
2394         /* Create the service and connection context lists */
2395         list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2396             offsetof(idm_svc_t, is_list_node));
2397         list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2398             offsetof(idm_conn_t, ic_list_node));
2399         list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2400             offsetof(idm_conn_t, ic_list_node));
2401 
2402         /* Initialize the native sockets transport */
2403         idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2404 
2405         /* Create connection ID pool */
2406         (void) idm_idpool_create(&idm.idm_conn_id_pool);
2407 
2408         return (DDI_SUCCESS);
2409 }
2410 
2411 static int
2412 _idm_fini(void)
2413 {
2414         if (!list_is_empty(&idm.idm_ini_conn_list) ||
2415             !list_is_empty(&idm.idm_tgt_conn_list) ||
2416             !list_is_empty(&idm.idm_tgt_svc_list)) {
2417                 return (EBUSY);
2418         }
2419 
2420         mutex_enter(&idm.idm_global_mutex);
2421         idm.idm_wd_thread_running = B_FALSE;
2422         cv_signal(&idm.idm_wd_cv);
2423         mutex_exit(&idm.idm_global_mutex);
2424 
2425         thread_join(idm.idm_wd_thread_did);
2426 
2427         idm_idpool_destroy(&idm.idm_conn_id_pool);
2428 
2429         /* Close any LDI handles we have open on transport drivers */
2430         mutex_enter(&idm.idm_global_mutex);
2431         idm_transport_teardown();
2432         mutex_exit(&idm.idm_global_mutex);
2433 
2434         /* Teardown the native sockets transport */
2435         idm_so_fini();
2436 
2437         list_destroy(&idm.idm_ini_conn_list);
2438         list_destroy(&idm.idm_tgt_conn_list);
2439         list_destroy(&idm.idm_tgt_svc_list);
2440         kmem_cache_destroy(idm.idm_task_cache);
2441         kmem_cache_destroy(idm.idm_buf_cache);
2442         kmem_free(idm.idm_taskid_table,
2443             idm.idm_taskid_max * sizeof (idm_task_t *));
2444         mutex_destroy(&idm.idm_global_mutex);
2445         cv_destroy(&idm.idm_wd_cv);
2446         cv_destroy(&idm.idm_tgt_svc_cv);
2447         rw_destroy(&idm.idm_taskid_table_lock);
2448 
2449         return (0);
2450 }