1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  * Copyright 2019 Joyent, Inc.
  27  */
  28 
  29 #include <sys/conf.h>
  30 #include <sys/file.h>
  31 #include <sys/ddi.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/modctl.h>
  34 #include <sys/scsi/scsi.h>
  35 #include <sys/scsi/impl/scsi_reset_notify.h>
  36 #include <sys/scsi/generic/mode.h>
  37 #include <sys/disp.h>
  38 #include <sys/byteorder.h>
  39 #include <sys/atomic.h>
  40 #include <sys/sdt.h>
  41 #include <sys/dkio.h>
  42 #include <sys/dkioc_free_util.h>
  43 
  44 #include <sys/stmf.h>
  45 #include <sys/lpif.h>
  46 #include <sys/portif.h>
  47 #include <sys/stmf_ioctl.h>
  48 #include <sys/stmf_sbd_ioctl.h>
  49 
  50 #include "stmf_sbd.h"
  51 #include "sbd_impl.h"
  52 
  53 #define SCSI2_CONFLICT_FREE_CMDS(cdb)   ( \
  54         /* ----------------------- */                                      \
  55         /* Refer Both              */                                      \
  56         /* SPC-2 (rev 20) Table 10 */                                      \
  57         /* SPC-3 (rev 23) Table 31 */                                      \
  58         /* ----------------------- */                                      \
  59         ((cdb[0]) == SCMD_INQUIRY)                                      || \
  60         ((cdb[0]) == SCMD_LOG_SENSE_G1)                                 || \
  61         ((cdb[0]) == SCMD_RELEASE)                                      || \
  62         ((cdb[0]) == SCMD_RELEASE_G1)                                   || \
  63         ((cdb[0]) == SCMD_REPORT_LUNS)                                  || \
  64         ((cdb[0]) == SCMD_REQUEST_SENSE)                                || \
  65         /* PREVENT ALLOW MEDIUM REMOVAL with prevent == 0 */               \
  66         ((((cdb[0]) == SCMD_DOORLOCK) && (((cdb[4]) & 0x3) == 0)))  || \
  67         /* SERVICE ACTION IN with READ MEDIA SERIAL NUMBER (0x01) */       \
  68         (((cdb[0]) == SCMD_SVC_ACTION_IN_G5) && (                          \
  69             ((cdb[1]) & 0x1F) == 0x01))                                     || \
  70         /* MAINTENANCE IN with service actions REPORT ALIASES (0x0Bh) */   \
  71         /* REPORT DEVICE IDENTIFIER (0x05)  REPORT PRIORITY (0x0Eh) */     \
  72         /* REPORT TARGET PORT GROUPS (0x0A) REPORT TIMESTAMP (0x0F) */     \
  73         (((cdb[0]) == SCMD_MAINTENANCE_IN) && (                            \
  74             (((cdb[1]) & 0x1F) == 0x0B) ||                                 \
  75             (((cdb[1]) & 0x1F) == 0x05) ||                                 \
  76             (((cdb[1]) & 0x1F) == 0x0E) ||                                 \
  77             (((cdb[1]) & 0x1F) == 0x0A) ||                                 \
  78             (((cdb[1]) & 0x1F) == 0x0F)))                           || \
  79         /* ----------------------- */                                      \
  80         /* SBC-3 (rev 17) Table 3  */                                      \
  81         /* ----------------------- */                                      \
  82         /* READ CAPACITY(10) */                                            \
  83         ((cdb[0]) == SCMD_READ_CAPACITY)                                || \
  84         /* READ CAPACITY(16) */                                            \
  85         (((cdb[0]) == SCMD_SVC_ACTION_IN_G4) && (                          \
  86             ((cdb[1]) & 0x1F) == 0x10))                                     || \
  87         /* START STOP UNIT with START bit 0 and POWER CONDITION 0  */      \
  88         (((cdb[0]) == SCMD_START_STOP) && (                                \
  89             (((cdb[4]) & 0xF0) == 0) && (((cdb[4]) & 0x01) == 0))))
  90 /* End of SCSI2_CONFLICT_FREE_CMDS */
  91 
  92 uint8_t HardwareAcceleratedInit = 1;
  93 uint8_t sbd_unmap_enable = 1;           /* allow unmap by default */
  94 
  95 /*
  96  * An /etc/system tunable which specifies the maximum number of LBAs supported
  97  * in a single UNMAP operation. Default is 0x002000 blocks or 4MB in size.
  98  */
  99 int stmf_sbd_unmap_max_nblks  = 0x002000;
 100 
 101 /*
 102  * An /etc/system tunable which indicates if READ ops can run on the standby
 103  * path or return an error.
 104  */
 105 int stmf_standby_fail_reads = 0;
 106 
 107 stmf_status_t sbd_lu_reset_state(stmf_lu_t *lu);
 108 static void sbd_handle_sync_cache(struct scsi_task *task,
 109     struct stmf_data_buf *initial_dbuf);
 110 void sbd_handle_read_xfer_completion(struct scsi_task *task,
 111     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf);
 112 void sbd_handle_short_write_xfer_completion(scsi_task_t *task,
 113     stmf_data_buf_t *dbuf);
 114 void sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf,
 115     uint32_t buflen);
 116 void sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf);
 117 void sbd_handle_identifying_info(scsi_task_t *task, stmf_data_buf_t *dbuf);
 118 
 119 static void sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf,
 120     uint32_t buflen);
 121 static void sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf);
 122 
 123 extern void sbd_pgr_initialize_it(scsi_task_t *, sbd_it_data_t *);
 124 extern int sbd_pgr_reservation_conflict(scsi_task_t *, struct sbd_lu *sl);
 125 extern void sbd_pgr_reset(sbd_lu_t *);
 126 extern void sbd_pgr_remove_it_handle(sbd_lu_t *, sbd_it_data_t *);
 127 extern void sbd_handle_pgr_in_cmd(scsi_task_t *, stmf_data_buf_t *);
 128 extern void sbd_handle_pgr_out_cmd(scsi_task_t *, stmf_data_buf_t *);
 129 extern void sbd_handle_pgr_out_data(scsi_task_t *, stmf_data_buf_t *);
 130 void sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
 131     int first_xfer);
 132 static void sbd_handle_write_same(scsi_task_t *task,
 133     struct stmf_data_buf *initial_dbuf);
 134 static void sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
 135     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
 136 static void sbd_handle_write_same_xfer_completion(struct scsi_task *task,
 137     sbd_cmd_t *scmd, struct stmf_data_buf *dbuf, uint8_t dbuf_reusable);
 138 /*
 139  * IMPORTANT NOTE:
 140  * =================
 141  * The whole world here is based on the assumption that everything within
 142  * a scsi task executes in a single threaded manner, even the aborts.
 143  * Dont ever change that. There wont be any performance gain but there
 144  * will be tons of race conditions.
 145  */
 146 
 147 void
 148 sbd_do_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
 149     struct stmf_data_buf *dbuf)
 150 {
 151         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 152         uint64_t laddr;
 153         uint32_t len, buflen, iolen;
 154         int ndx;
 155         int bufs_to_take;
 156 
 157         /* Lets try not to hog all the buffers the port has. */
 158         bufs_to_take = ((task->task_max_nbufs > 2) &&
 159             (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
 160             task->task_max_nbufs;
 161 
 162         len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ?
 163             dbuf->db_buf_size : ATOMIC32_GET(scmd->len);
 164         laddr = scmd->addr + scmd->current_ro;
 165 
 166         for (buflen = 0, ndx = 0; (buflen < len) &&
 167             (ndx < dbuf->db_sglist_length); ndx++) {
 168                 iolen = min(len - buflen, dbuf->db_sglist[ndx].seg_length);
 169                 if (iolen == 0)
 170                         break;
 171                 if (sbd_data_read(sl, task, laddr, (uint64_t)iolen,
 172                     dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
 173                         scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
 174                         /* Do not need to do xfer anymore, just complete it */
 175                         dbuf->db_data_size = 0;
 176                         dbuf->db_xfer_status = STMF_SUCCESS;
 177                         sbd_handle_read_xfer_completion(task, scmd, dbuf);
 178                         return;
 179                 }
 180                 buflen += iolen;
 181                 laddr += (uint64_t)iolen;
 182         }
 183         dbuf->db_relative_offset = scmd->current_ro;
 184         dbuf->db_data_size = buflen;
 185         dbuf->db_flags = DB_DIRECTION_TO_RPORT;
 186         (void) stmf_xfer_data(task, dbuf, 0);
 187         atomic_add_32(&scmd->len, -buflen);
 188         scmd->current_ro += buflen;
 189         if (ATOMIC32_GET(scmd->len) &&
 190             (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
 191                 uint32_t maxsize, minsize, old_minsize;
 192 
 193                 maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128 * 1024 :
 194                     ATOMIC32_GET(scmd->len);
 195                 minsize = maxsize >> 2;
 196                 do {
 197                         /*
 198                          * A bad port implementation can keep on failing the
 199                          * the request but keep on sending us a false
 200                          * minsize.
 201                          */
 202                         old_minsize = minsize;
 203                         dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
 204                 } while ((dbuf == NULL) && (old_minsize > minsize) &&
 205                     (minsize >= 512));
 206                 if (dbuf == NULL) {
 207                         return;
 208                 }
 209                 atomic_inc_8(&scmd->nbufs);
 210                 sbd_do_read_xfer(task, scmd, dbuf);
 211         }
 212 }
 213 
 214 /*
 215  * sbd_zcopy: Bail-out switch for reduced copy path.
 216  *
 217  * 0 - read & write off
 218  * 1 - read & write on
 219  * 2 - only read on
 220  * 4 - only write on
 221  */
 222 int sbd_zcopy = 1;      /* enable zcopy read & write path */
 223 uint32_t sbd_max_xfer_len = 0;          /* Valid if non-zero */
 224 uint32_t sbd_1st_xfer_len = 0;          /* Valid if non-zero */
 225 uint32_t sbd_copy_threshold = 0;                /* Valid if non-zero */
 226 
 227 static void
 228 sbd_do_sgl_read_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
 229 {
 230         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 231         sbd_zvol_io_t *zvio;
 232         int ret, final_xfer;
 233         uint64_t offset;
 234         uint32_t xfer_len, max_len, first_len;
 235         stmf_status_t xstat;
 236         stmf_data_buf_t *dbuf;
 237         uint_t nblks;
 238         uint64_t blksize = sl->sl_blksize;
 239         size_t db_private_sz;
 240         hrtime_t xfer_start;
 241         uintptr_t pad;
 242 
 243         ASSERT(rw_read_held(&sl->sl_access_state_lock));
 244         ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
 245 
 246         /*
 247          * Calculate the limits on xfer_len to the minimum of :
 248          *    - task limit
 249          *    - lun limit
 250          *    - sbd global limit if set
 251          *    - first xfer limit if set
 252          *
 253          * First, protect against silly over-ride value
 254          */
 255         if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
 256                 cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
 257                     sbd_max_xfer_len);
 258                 sbd_max_xfer_len = 0;
 259         }
 260         if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
 261                 cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
 262                     sbd_1st_xfer_len);
 263                 sbd_1st_xfer_len = 0;
 264         }
 265 
 266         max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
 267         if (sbd_max_xfer_len)
 268                 max_len = MIN(max_len, sbd_max_xfer_len);
 269         /*
 270          * Special case the first xfer if hints are set.
 271          */
 272         if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
 273                 /* global over-ride has precedence */
 274                 if (sbd_1st_xfer_len)
 275                         first_len = sbd_1st_xfer_len;
 276                 else
 277                         first_len = task->task_1st_xfer_len;
 278         } else {
 279                 first_len = 0;
 280         }
 281 
 282         while (ATOMIC32_GET(scmd->len) &&
 283             ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
 284 
 285                 xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
 286                 if (first_len) {
 287                         xfer_len = MIN(xfer_len, first_len);
 288                         first_len = 0;
 289                 }
 290                 if (ATOMIC32_GET(scmd->len) == xfer_len) {
 291                         final_xfer = 1;
 292                 } else {
 293                         /*
 294                          * Attempt to end xfer on a block boundary.
 295                          * The only way this does not happen is if the
 296                          * xfer_len is small enough to stay contained
 297                          * within the same block.
 298                          */
 299                         uint64_t xfer_offset, xfer_aligned_end;
 300 
 301                         final_xfer = 0;
 302                         xfer_offset = scmd->addr + scmd->current_ro;
 303                         xfer_aligned_end =
 304                             P2ALIGN(xfer_offset+xfer_len, blksize);
 305                         if (xfer_aligned_end > xfer_offset)
 306                                 xfer_len = xfer_aligned_end - xfer_offset;
 307                 }
 308                 /*
 309                  * Allocate object to track the read and reserve
 310                  * enough space for scatter/gather list.
 311                  */
 312                 offset = scmd->addr + scmd->current_ro;
 313                 nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
 314 
 315                 db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
 316                     (nblks * sizeof (stmf_sglist_ent_t));
 317                 dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
 318                     AF_DONTZERO);
 319                 /*
 320                  * Setup the dbuf
 321                  *
 322                  * XXX Framework does not handle variable length sglists
 323                  * properly, so setup db_lu_private and db_port_private
 324                  * fields here. db_stmf_private is properly set for
 325                  * calls to stmf_free.
 326                  */
 327                 if (dbuf->db_port_private == NULL) {
 328                         /*
 329                          * XXX Framework assigns space to PP after db_sglist[0]
 330                          */
 331                         cmn_err(CE_PANIC, "db_port_private == NULL");
 332                 }
 333                 pad = (uintptr_t)&dbuf->db_sglist[nblks];
 334                 dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
 335                 dbuf->db_port_private = NULL;
 336                 dbuf->db_buf_size = xfer_len;
 337                 dbuf->db_data_size = xfer_len;
 338                 dbuf->db_relative_offset = scmd->current_ro;
 339                 dbuf->db_sglist_length = (uint16_t)nblks;
 340                 dbuf->db_xfer_status = 0;
 341                 dbuf->db_handle = 0;
 342 
 343                 dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
 344                     DB_DIRECTION_TO_RPORT | DB_LU_DATA_BUF);
 345                 if (final_xfer)
 346                         dbuf->db_flags |= DB_SEND_STATUS_GOOD;
 347 
 348                 zvio = dbuf->db_lu_private;
 349                 /* Need absolute offset for zvol access */
 350                 zvio->zvio_offset = offset;
 351                 zvio->zvio_flags = ZVIO_SYNC;
 352 
 353                 /*
 354                  * Accounting for start of read.
 355                  * Note there is no buffer address for the probe yet.
 356                  */
 357                 xfer_start = gethrtime();
 358                 DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
 359                     uint8_t *, NULL, uint64_t, xfer_len,
 360                     uint64_t, offset, scsi_task_t *, task);
 361 
 362                 ret = sbd_zvol_alloc_read_bufs(sl, dbuf);
 363 
 364                 stmf_lu_xfer_done(task, B_TRUE /* read */,
 365                     (gethrtime() - xfer_start));
 366                 DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
 367                     uint8_t *, NULL, uint64_t, xfer_len,
 368                     uint64_t, offset, int, ret, scsi_task_t *, task);
 369 
 370                 if (ret != 0) {
 371                         /*
 372                          * Read failure from the backend.
 373                          */
 374                         stmf_free(dbuf);
 375                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
 376                                 /* nothing queued, just finish */
 377                                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 378                                 sbd_ats_remove_by_task(task);
 379                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 380                                     STMF_SAA_READ_ERROR);
 381                                 rw_exit(&sl->sl_access_state_lock);
 382                         } else {
 383                                 /* process failure when other dbufs finish */
 384                                 scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
 385                         }
 386                         return;
 387                 }
 388 
 389                 /*
 390                  * Allow PP to do setup
 391                  */
 392                 xstat = stmf_setup_dbuf(task, dbuf, 0);
 393                 if (xstat != STMF_SUCCESS) {
 394                         /*
 395                          * This could happen if the driver cannot get the
 396                          * DDI resources it needs for this request.
 397                          * If other dbufs are queued, try again when the next
 398                          * one completes, otherwise give up.
 399                          */
 400                         sbd_zvol_rele_read_bufs(sl, dbuf);
 401                         stmf_free(dbuf);
 402                         if (ATOMIC8_GET(scmd->nbufs) > 0) {
 403                                 /* completion of previous dbuf will retry */
 404                                 return;
 405                         }
 406                         /*
 407                          * Done with this command.
 408                          */
 409                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 410                         sbd_ats_remove_by_task(task);
 411                         if (first_xfer)
 412                                 stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 413                         else
 414                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 415                                     STMF_SAA_READ_ERROR);
 416                         rw_exit(&sl->sl_access_state_lock);
 417                         return;
 418                 }
 419                 /*
 420                  * dbuf is now queued on task
 421                  */
 422                 atomic_inc_8(&scmd->nbufs);
 423 
 424                 /* XXX leave this in for FW? */
 425                 DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
 426                     struct stmf_data_buf *, dbuf, uint64_t, offset,
 427                     uint32_t, xfer_len);
 428                 /*
 429                  * Do not pass STMF_IOF_LU_DONE so that the zvol
 430                  * state can be released in the completion callback.
 431                  */
 432                 xstat = stmf_xfer_data(task, dbuf, 0);
 433                 switch (xstat) {
 434                 case STMF_SUCCESS:
 435                         break;
 436                 case STMF_BUSY:
 437                         /*
 438                          * The dbuf is queued on the task, but unknown
 439                          * to the PP, thus no completion will occur.
 440                          */
 441                         sbd_zvol_rele_read_bufs(sl, dbuf);
 442                         stmf_teardown_dbuf(task, dbuf);
 443                         stmf_free(dbuf);
 444                         atomic_dec_8(&scmd->nbufs);
 445                         if (ATOMIC8_GET(scmd->nbufs) > 0) {
 446                                 /* completion of previous dbuf will retry */
 447                                 return;
 448                         }
 449                         /*
 450                          * Done with this command.
 451                          */
 452                         rw_exit(&sl->sl_access_state_lock);
 453                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 454                         sbd_ats_remove_by_task(task);
 455                         if (first_xfer)
 456                                 stmf_scsilib_send_status(task, STATUS_QFULL, 0);
 457                         else
 458                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 459                                     STMF_SAA_READ_ERROR);
 460                         return;
 461                 case STMF_ABORTED:
 462                         /*
 463                          * Completion from task_done will cleanup
 464                          */
 465                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 466                         sbd_ats_remove_by_task(task);
 467                         return;
 468                 }
 469                 /*
 470                  * Update the xfer progress.
 471                  */
 472                 ASSERT(scmd->len >= xfer_len);
 473                 atomic_add_32(&scmd->len, -xfer_len);
 474                 scmd->current_ro += xfer_len;
 475         }
 476 }
 477 
 478 void
 479 sbd_handle_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
 480     struct stmf_data_buf *dbuf)
 481 {
 482         if (dbuf->db_xfer_status != STMF_SUCCESS) {
 483                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 484                     dbuf->db_xfer_status, NULL);
 485                 return;
 486         }
 487         task->task_nbytes_transferred += dbuf->db_data_size;
 488         if (ATOMIC32_GET(scmd->len) == 0 ||
 489             scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 490                 stmf_free_dbuf(task, dbuf);
 491                 atomic_dec_8(&scmd->nbufs);
 492                 if (ATOMIC8_GET(scmd->nbufs))
 493                         return; /* wait for all buffers to complete */
 494                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 495                 sbd_ats_remove_by_task(task);
 496                 if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL)
 497                         stmf_scsilib_send_status(task, STATUS_CHECK,
 498                             STMF_SAA_READ_ERROR);
 499                 else
 500                         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
 501                 return;
 502         }
 503         if (dbuf->db_flags & DB_DONT_REUSE) {
 504                 /* allocate new dbuf */
 505                 uint32_t maxsize, minsize, old_minsize;
 506                 stmf_free_dbuf(task, dbuf);
 507 
 508                 maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ?
 509                     128 * 1024 : ATOMIC32_GET(scmd->len);
 510                 minsize = maxsize >> 2;
 511                 do {
 512                         old_minsize = minsize;
 513                         dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
 514                 } while ((dbuf == NULL) && (old_minsize > minsize) &&
 515                     (minsize >= 512));
 516                 if (dbuf == NULL) {
 517                         atomic_dec_8(&scmd->nbufs);
 518                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
 519                                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
 520                                     STMF_ALLOC_FAILURE, NULL);
 521                         }
 522                         return;
 523                 }
 524         }
 525         sbd_do_read_xfer(task, scmd, dbuf);
 526 }
 527 
 528 /*
 529  * This routine must release the DMU resources and free the dbuf
 530  * in all cases.  If this is the final dbuf of the task, then drop
 531  * the reader lock on the LU state. If there are no errors and more
 532  * work to do, then queue more xfer operations.
 533  */
 534 void
 535 sbd_handle_sgl_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
 536     struct stmf_data_buf *dbuf)
 537 {
 538         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 539         stmf_status_t xfer_status;
 540         uint32_t data_size;
 541         int scmd_err;
 542 
 543         ASSERT(dbuf->db_lu_private);
 544         ASSERT(scmd->cmd_type == SBD_CMD_SCSI_READ);
 545 
 546         atomic_dec_8(&scmd->nbufs);      /* account for this dbuf */
 547         /*
 548          * Release the DMU resources.
 549          */
 550         sbd_zvol_rele_read_bufs(sl, dbuf);
 551         /*
 552          * Release the dbuf after retrieving needed fields.
 553          */
 554         xfer_status = dbuf->db_xfer_status;
 555         data_size = dbuf->db_data_size;
 556         stmf_teardown_dbuf(task, dbuf);
 557         stmf_free(dbuf);
 558         /*
 559          * Release the state lock if this is the last completion.
 560          * If this is the last dbuf on task and all data has been
 561          * transferred or an error encountered, then no more dbufs
 562          * will be queued.
 563          */
 564         scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
 565             (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
 566             (xfer_status != STMF_SUCCESS));
 567         if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
 568             (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
 569                 /* all DMU state has been released */
 570                 rw_exit(&sl->sl_access_state_lock);
 571         }
 572 
 573         /*
 574          * If there have been no errors, either complete the task
 575          * or issue more data xfer operations.
 576          */
 577         if (!scmd_err) {
 578                 /*
 579                  * This chunk completed successfully
 580                  */
 581                 task->task_nbytes_transferred += data_size;
 582                 if (ATOMIC8_GET(scmd->nbufs) == 0 &&
 583                     ATOMIC32_GET(scmd->len) == 0) {
 584                         /*
 585                          * This command completed successfully
 586                          *
 587                          * Status was sent along with data, so no status
 588                          * completion will occur. Tell stmf we are done.
 589                          */
 590                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 591                         sbd_ats_remove_by_task(task);
 592                         stmf_task_lu_done(task);
 593                         return;
 594                 }
 595                 /*
 596                  * Start more xfers
 597                  */
 598                 sbd_do_sgl_read_xfer(task, scmd, 0);
 599                 return;
 600         }
 601         /*
 602          * Sort out the failure
 603          */
 604         if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
 605                 /*
 606                  * If a previous error occurred, leave the command active
 607                  * and wait for the last completion to send the status check.
 608                  */
 609                 if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 610                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
 611                                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 612                                 sbd_ats_remove_by_task(task);
 613                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 614                                     STMF_SAA_READ_ERROR);
 615                         }
 616                         return;
 617                 }
 618                 /*
 619                  * Must have been a failure on current dbuf
 620                  */
 621                 ASSERT(xfer_status != STMF_SUCCESS);
 622 
 623                 /*
 624                  * Actually this is a bug. stmf abort should have reset the
 625                  * active flag but since its been there for some time.
 626                  * I wont change it.
 627                  */
 628                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 629                 sbd_ats_remove_by_task(task);
 630                 stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
 631         }
 632 }
 633 
 634 void
 635 sbd_handle_sgl_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
 636     struct stmf_data_buf *dbuf)
 637 {
 638         sbd_zvol_io_t *zvio = dbuf->db_lu_private;
 639         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 640         int ret;
 641         int scmd_err, scmd_xfer_done;
 642         stmf_status_t xfer_status = dbuf->db_xfer_status;
 643         uint32_t data_size = dbuf->db_data_size;
 644         hrtime_t xfer_start;
 645 
 646         ASSERT(zvio);
 647 
 648         /*
 649          * Allow PP to free up resources before releasing the write bufs
 650          * as writing to the backend could take some time.
 651          */
 652         stmf_teardown_dbuf(task, dbuf);
 653 
 654         atomic_dec_8(&scmd->nbufs);      /* account for this dbuf */
 655         /*
 656          * All data was queued and this is the last completion,
 657          * but there could still be an error.
 658          */
 659         scmd_xfer_done = (ATOMIC32_GET(scmd->len) == 0 &&
 660             (ATOMIC8_GET(scmd->nbufs) == 0));
 661         scmd_err = (((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
 662             (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) ||
 663             (xfer_status != STMF_SUCCESS));
 664 
 665         xfer_start = gethrtime();
 666         DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
 667             uint8_t *, NULL, uint64_t, data_size,
 668             uint64_t, zvio->zvio_offset, scsi_task_t *, task);
 669 
 670         if (scmd_err) {
 671                 /* just return the write buffers */
 672                 sbd_zvol_rele_write_bufs_abort(sl, dbuf);
 673                 ret = 0;
 674         } else {
 675                 if (scmd_xfer_done)
 676                         zvio->zvio_flags = ZVIO_COMMIT;
 677                 else
 678                         zvio->zvio_flags = 0;
 679                 /* write the data */
 680                 ret = sbd_zvol_rele_write_bufs(sl, dbuf);
 681         }
 682 
 683         stmf_lu_xfer_done(task, B_FALSE /* write */,
 684             (gethrtime() - xfer_start));
 685         DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
 686             uint8_t *, NULL, uint64_t, data_size,
 687             uint64_t, zvio->zvio_offset, int, ret,  scsi_task_t *, task);
 688 
 689         if (ret != 0) {
 690                 /* update the error flag */
 691                 scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
 692                 scmd_err = 1;
 693         }
 694 
 695         /* Release the dbuf */
 696         stmf_free(dbuf);
 697 
 698         /*
 699          * Release the state lock if this is the last completion.
 700          * If this is the last dbuf on task and all data has been
 701          * transferred or an error encountered, then no more dbufs
 702          * will be queued.
 703          */
 704         if ((ATOMIC8_GET(scmd->nbufs) == 0) &&
 705             (ATOMIC32_GET(scmd->len) == 0 || scmd_err)) {
 706                 /* all DMU state has been released */
 707                 rw_exit(&sl->sl_access_state_lock);
 708         }
 709         /*
 710          * If there have been no errors, either complete the task
 711          * or issue more data xfer operations.
 712          */
 713         if (!scmd_err) {
 714                 /* This chunk completed successfully */
 715                 task->task_nbytes_transferred += data_size;
 716                 if (scmd_xfer_done) {
 717                         /* This command completed successfully */
 718                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 719                         sbd_ats_remove_by_task(task);
 720                         if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
 721                             (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
 722                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 723                                     STMF_SAA_WRITE_ERROR);
 724                         } else {
 725                                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
 726                         }
 727                         return;
 728                 }
 729                 /*
 730                  * Start more xfers
 731                  */
 732                 sbd_do_sgl_write_xfer(task, scmd, 0);
 733                 return;
 734         }
 735         /*
 736          * Sort out the failure
 737          */
 738         if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
 739                 if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
 740                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
 741                                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 742                                 sbd_ats_remove_by_task(task);
 743                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 744                                     STMF_SAA_WRITE_ERROR);
 745                         }
 746                         /*
 747                          * Leave the command active until last dbuf completes.
 748                          */
 749                         return;
 750                 }
 751                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
 752                 sbd_ats_remove_by_task(task);
 753                 ASSERT(xfer_status != STMF_SUCCESS);
 754                 stmf_abort(STMF_QUEUE_TASK_ABORT, task, xfer_status, NULL);
 755         }
 756 }
 757 
 758 /*
 759  * Handle a copy operation using the zvol interface.
 760  *
 761  * Similar to the sbd_data_read/write path, except it goes directly through
 762  * the zvol interfaces. It can pass a port provider sglist in the
 763  * form of uio which is lost through the vn_rdwr path.
 764  *
 765  * Returns:
 766  *      STMF_SUCCESS - request handled
 767  *      STMF_FAILURE - request not handled, caller must deal with error
 768  */
 769 static stmf_status_t
 770 sbd_copy_rdwr(scsi_task_t *task, uint64_t laddr, stmf_data_buf_t *dbuf,
 771     int cmd, int commit)
 772 {
 773         sbd_lu_t                *sl = task->task_lu->lu_provider_private;
 774         struct uio              uio;
 775         struct iovec            *iov, *tiov, iov1[8];
 776         uint32_t                len, resid;
 777         int                     ret, i, iovcnt, flags;
 778         hrtime_t                xfer_start;
 779         boolean_t               is_read;
 780 
 781         ASSERT(cmd == SBD_CMD_SCSI_READ || cmd == SBD_CMD_SCSI_WRITE);
 782 
 783         is_read = (cmd == SBD_CMD_SCSI_READ) ? B_TRUE : B_FALSE;
 784         iovcnt = dbuf->db_sglist_length;
 785         /* use the stack for small iovecs */
 786         if (iovcnt > 8) {
 787                 iov = kmem_alloc(iovcnt * sizeof (*iov), KM_SLEEP);
 788         } else {
 789                 iov = &iov1[0];
 790         }
 791 
 792         /* Convert dbuf sglist to iovec format */
 793         len = dbuf->db_data_size;
 794         resid = len;
 795         tiov = iov;
 796         for (i = 0; i < iovcnt; i++) {
 797                 tiov->iov_base = (caddr_t)dbuf->db_sglist[i].seg_addr;
 798                 tiov->iov_len = MIN(resid, dbuf->db_sglist[i].seg_length);
 799                 resid -= tiov->iov_len;
 800                 tiov++;
 801         }
 802         if (resid != 0) {
 803                 cmn_err(CE_WARN, "inconsistant sglist rem %d", resid);
 804                 if (iov != &iov1[0])
 805                         kmem_free(iov, iovcnt * sizeof (*iov));
 806                 return (STMF_FAILURE);
 807         }
 808         /* Setup the uio struct */
 809         uio.uio_iov = iov;
 810         uio.uio_iovcnt = iovcnt;
 811         uio.uio_loffset = laddr;
 812         uio.uio_segflg = (short)UIO_SYSSPACE;
 813         uio.uio_resid = (uint64_t)len;
 814         uio.uio_llimit = RLIM64_INFINITY;
 815 
 816         xfer_start = gethrtime();
 817         if (is_read == B_TRUE) {
 818                 uio.uio_fmode = FREAD;
 819                 uio.uio_extflg = UIO_COPY_CACHED;
 820                 DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
 821                     uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
 822                     scsi_task_t *, task);
 823 
 824                 /* Fetch the data */
 825                 ret = sbd_zvol_copy_read(sl, &uio);
 826 
 827                 DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
 828                     uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
 829                     scsi_task_t *, task);
 830         } else {
 831                 uio.uio_fmode = FWRITE;
 832                 uio.uio_extflg = UIO_COPY_DEFAULT;
 833                 DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
 834                     uint8_t *, NULL, uint64_t, len, uint64_t, laddr,
 835                     scsi_task_t *, task);
 836 
 837                 flags = (commit) ? ZVIO_COMMIT : 0;
 838                 /* Write the data */
 839                 ret = sbd_zvol_copy_write(sl, &uio, flags);
 840 
 841                 DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
 842                     uint8_t *, NULL, uint64_t, len, uint64_t, laddr, int, ret,
 843                     scsi_task_t *, task);
 844         }
 845         /* finalize accounting */
 846         stmf_lu_xfer_done(task, is_read, (gethrtime() - xfer_start));
 847 
 848         if (iov != &iov1[0])
 849                 kmem_free(iov, iovcnt * sizeof (*iov));
 850         if (ret != 0) {
 851                 /* Backend I/O error */
 852                 return (STMF_FAILURE);
 853         }
 854         return (STMF_SUCCESS);
 855 }
 856 
 857 void
 858 sbd_handle_read(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
 859 {
 860         uint64_t lba, laddr;
 861         uint64_t blkcount;
 862         uint32_t len;
 863         uint8_t op = task->task_cdb[0];
 864         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
 865         sbd_cmd_t *scmd;
 866         stmf_data_buf_t *dbuf;
 867         int fast_path;
 868         boolean_t fua_bit = B_FALSE;
 869 
 870         /*
 871          * Check to see if the command is READ(10), READ(12), or READ(16).
 872          * If it is then check for bit 3 being set to indicate if Forced
 873          * Unit Access is being requested. If so, we'll bypass the use of
 874          * DMA buffers to simplify support of this feature.
 875          */
 876         if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
 877             (op == SCMD_READ_G5)) &&
 878             (task->task_cdb[1] & BIT_3)) {
 879                 fua_bit = B_TRUE;
 880         }
 881         if (op == SCMD_READ) {
 882                 lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
 883                 len = (uint32_t)task->task_cdb[4];
 884 
 885                 if (len == 0) {
 886                         len = 256;
 887                 }
 888         } else if (op == SCMD_READ_G1) {
 889                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
 890                 len = READ_SCSI16(&task->task_cdb[7], uint32_t);
 891         } else if (op == SCMD_READ_G5) {
 892                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
 893                 len = READ_SCSI32(&task->task_cdb[6], uint32_t);
 894         } else if (op == SCMD_READ_G4) {
 895                 lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
 896                 len = READ_SCSI32(&task->task_cdb[10], uint32_t);
 897         } else {
 898                 stmf_scsilib_send_status(task, STATUS_CHECK,
 899                     STMF_SAA_INVALID_OPCODE);
 900                 return;
 901         }
 902 
 903         laddr = lba << sl->sl_data_blocksize_shift;
 904         blkcount = len;
 905         len <<= sl->sl_data_blocksize_shift;
 906 
 907         if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
 908                 stmf_scsilib_send_status(task, STATUS_CHECK,
 909                     STMF_SAA_LBA_OUT_OF_RANGE);
 910                 return;
 911         }
 912 
 913         task->task_cmd_xfer_length = len;
 914         if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
 915                 task->task_expected_xfer_length = len;
 916         }
 917 
 918         if (len != task->task_expected_xfer_length) {
 919                 fast_path = 0;
 920                 len = (len > task->task_expected_xfer_length) ?
 921                     task->task_expected_xfer_length : len;
 922         } else {
 923                 fast_path = 1;
 924         }
 925 
 926         if (len == 0) {
 927                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
 928                 return;
 929         }
 930 
 931         if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
 932             SBD_SUCCESS) {
 933                 if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
 934                         stmf_scsilib_send_status(task, STATUS_BUSY, 0);
 935                 }
 936                 return;
 937         }
 938         /*
 939          * Determine if this read can directly use DMU buffers.
 940          */
 941         if (sbd_zcopy & (2|1) &&            /* Debug switch */
 942             initial_dbuf == NULL &&             /* No PP buffer passed in */
 943             sl->sl_flags & SL_CALL_ZVOL &&       /* zvol backing store */
 944             (task->task_additional_flags &
 945             TASK_AF_ACCEPT_LU_DBUF) &&          /* PP allows it */
 946             !fua_bit) {
 947                 /*
 948                  * Reduced copy path
 949                  */
 950                 uint32_t copy_threshold, minsize;
 951                 int ret;
 952 
 953                 /*
 954                  * The sl_access_state_lock will be held shared
 955                  * for the entire request and released when all
 956                  * dbufs have completed.
 957                  */
 958                 rw_enter(&sl->sl_access_state_lock, RW_READER);
 959                 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
 960                         rw_exit(&sl->sl_access_state_lock);
 961                         sbd_ats_remove_by_task(task);
 962                         stmf_scsilib_send_status(task, STATUS_CHECK,
 963                             STMF_SAA_READ_ERROR);
 964                         return;
 965                 }
 966 
 967                 /*
 968                  * Check if setup is more expensive than copying the data.
 969                  *
 970                  * Use the global over-ride sbd_zcopy_threshold if set.
 971                  */
 972                 copy_threshold = (sbd_copy_threshold > 0) ?
 973                     sbd_copy_threshold : task->task_copy_threshold;
 974                 minsize = len;
 975                 if (len < copy_threshold &&
 976                     (dbuf = stmf_alloc_dbuf(task, len, &minsize, 0)) != 0) {
 977 
 978                         ret = sbd_copy_rdwr(task, laddr, dbuf,
 979                             SBD_CMD_SCSI_READ, 0);
 980                         /* done with the backend */
 981                         rw_exit(&sl->sl_access_state_lock);
 982                         sbd_ats_remove_by_task(task);
 983                         if (ret != 0) {
 984                                 /* backend error */
 985                                 stmf_scsilib_send_status(task, STATUS_CHECK,
 986                                     STMF_SAA_READ_ERROR);
 987                         } else {
 988                                 /* send along good data */
 989                                 dbuf->db_relative_offset = 0;
 990                                 dbuf->db_data_size = len;
 991                                 dbuf->db_flags = DB_SEND_STATUS_GOOD |
 992                                     DB_DIRECTION_TO_RPORT;
 993                                 /* XXX keep for FW? */
 994                                 DTRACE_PROBE4(sbd__xfer,
 995                                     struct scsi_task *, task,
 996                                     struct stmf_data_buf *, dbuf,
 997                                     uint64_t, laddr, uint32_t, len);
 998                                 (void) stmf_xfer_data(task, dbuf,
 999                                     STMF_IOF_LU_DONE);
1000                         }
1001                         return;
1002                 }
1003 
1004                 /* committed to reduced copy */
1005                 if (task->task_lu_private) {
1006                         scmd = (sbd_cmd_t *)task->task_lu_private;
1007                 } else {
1008                         scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1009                             KM_SLEEP);
1010                         task->task_lu_private = scmd;
1011                 }
1012                 /*
1013                  * Setup scmd to track read progress.
1014                  */
1015                 scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1016                 scmd->cmd_type = SBD_CMD_SCSI_READ;
1017                 scmd->nbufs = 0;
1018                 scmd->addr = laddr;
1019                 scmd->len = len;
1020                 scmd->current_ro = 0;
1021                 /*
1022                  * Kick-off the read.
1023                  */
1024                 sbd_do_sgl_read_xfer(task, scmd, 1);
1025                 return;
1026         }
1027 
1028         if (initial_dbuf == NULL) {
1029                 uint32_t maxsize, minsize, old_minsize;
1030 
1031                 maxsize = (len > (128*1024)) ? 128*1024 : len;
1032                 minsize = maxsize >> 2;
1033                 do {
1034                         old_minsize = minsize;
1035                         initial_dbuf = stmf_alloc_dbuf(task, maxsize,
1036                             &minsize, 0);
1037                 } while ((initial_dbuf == NULL) && (old_minsize > minsize) &&
1038                     (minsize >= 512));
1039                 if (initial_dbuf == NULL) {
1040                         sbd_ats_remove_by_task(task);
1041                         stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1042                         return;
1043                 }
1044         }
1045         dbuf = initial_dbuf;
1046 
1047         if ((dbuf->db_buf_size >= len) && fast_path &&
1048             (dbuf->db_sglist_length == 1)) {
1049                 if (sbd_data_read(sl, task, laddr, (uint64_t)len,
1050                     dbuf->db_sglist[0].seg_addr) == STMF_SUCCESS) {
1051                         dbuf->db_relative_offset = 0;
1052                         dbuf->db_data_size = len;
1053                         dbuf->db_flags = DB_SEND_STATUS_GOOD |
1054                             DB_DIRECTION_TO_RPORT;
1055                         /* XXX keep for FW? */
1056                         DTRACE_PROBE4(sbd__xfer, struct scsi_task *, task,
1057                             struct stmf_data_buf *, dbuf,
1058                             uint64_t, laddr, uint32_t, len);
1059                         (void) stmf_xfer_data(task, dbuf, STMF_IOF_LU_DONE);
1060                 } else {
1061                         stmf_scsilib_send_status(task, STATUS_CHECK,
1062                             STMF_SAA_READ_ERROR);
1063                 }
1064                 sbd_ats_remove_by_task(task);
1065                 return;
1066         }
1067 
1068         if (task->task_lu_private) {
1069                 scmd = (sbd_cmd_t *)task->task_lu_private;
1070         } else {
1071                 scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1072                 task->task_lu_private = scmd;
1073         }
1074         scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED;
1075         scmd->cmd_type = SBD_CMD_SCSI_READ;
1076         scmd->nbufs = 1;
1077         scmd->addr = laddr;
1078         scmd->len = len;
1079         scmd->current_ro = 0;
1080 
1081         sbd_do_read_xfer(task, scmd, dbuf);
1082 }
1083 
1084 void
1085 sbd_do_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
1086     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1087 {
1088         uint32_t len;
1089         int bufs_to_take;
1090 
1091         if (ATOMIC32_GET(scmd->len) == 0) {
1092                 goto DO_WRITE_XFER_DONE;
1093         }
1094 
1095         /* Lets try not to hog all the buffers the port has. */
1096         bufs_to_take = ((task->task_max_nbufs > 2) &&
1097             (task->task_cmd_xfer_length < (32 * 1024))) ? 2 :
1098             task->task_max_nbufs;
1099 
1100         if ((dbuf != NULL) &&
1101             ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
1102                 /* free current dbuf and allocate a new one */
1103                 stmf_free_dbuf(task, dbuf);
1104                 dbuf = NULL;
1105         }
1106         if (ATOMIC8_GET(scmd->nbufs) >= bufs_to_take) {
1107                 goto DO_WRITE_XFER_DONE;
1108         }
1109         if (dbuf == NULL) {
1110                 uint32_t maxsize, minsize, old_minsize;
1111 
1112                 maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
1113                     ATOMIC32_GET(scmd->len);
1114                 minsize = maxsize >> 2;
1115                 do {
1116                         old_minsize = minsize;
1117                         dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
1118                 } while ((dbuf == NULL) && (old_minsize > minsize) &&
1119                     (minsize >= 512));
1120                 if (dbuf == NULL) {
1121                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
1122                                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1123                                     STMF_ALLOC_FAILURE, NULL);
1124                         }
1125                         return;
1126                 }
1127         }
1128 
1129         len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
1130             ATOMIC32_GET(scmd->len);
1131 
1132         dbuf->db_relative_offset = scmd->current_ro;
1133         dbuf->db_data_size = len;
1134         dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1135         (void) stmf_xfer_data(task, dbuf, 0);
1136         /* outstanding port xfers and bufs used */
1137         atomic_inc_8(&scmd->nbufs);
1138         atomic_add_32(&scmd->len, -len);
1139         scmd->current_ro += len;
1140 
1141         if ((ATOMIC32_GET(scmd->len) != 0) &&
1142             (ATOMIC8_GET(scmd->nbufs) < bufs_to_take)) {
1143                 sbd_do_write_xfer(task, scmd, NULL, 0);
1144         }
1145         return;
1146 
1147 DO_WRITE_XFER_DONE:
1148         if (dbuf != NULL) {
1149                 stmf_free_dbuf(task, dbuf);
1150         }
1151 }
1152 
1153 void
1154 sbd_do_sgl_write_xfer(struct scsi_task *task, sbd_cmd_t *scmd, int first_xfer)
1155 {
1156         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1157         sbd_zvol_io_t *zvio;
1158         int ret;
1159         uint32_t xfer_len, max_len, first_len;
1160         stmf_status_t xstat;
1161         stmf_data_buf_t *dbuf;
1162         uint_t nblks;
1163         uint64_t blksize = sl->sl_blksize;
1164         uint64_t offset;
1165         size_t db_private_sz;
1166         uintptr_t pad;
1167 
1168         ASSERT(rw_read_held(&sl->sl_access_state_lock));
1169         ASSERT((sl->sl_flags & SL_MEDIA_LOADED) != 0);
1170 
1171         /*
1172          * Calculate the limits on xfer_len to the minimum of :
1173          *    - task limit
1174          *    - lun limit
1175          *    - sbd global limit if set
1176          *    - first xfer limit if set
1177          *
1178          * First, protect against silly over-ride value
1179          */
1180         if (sbd_max_xfer_len && ((sbd_max_xfer_len % DEV_BSIZE) != 0)) {
1181                 cmn_err(CE_WARN, "sbd_max_xfer_len invalid %d, resetting\n",
1182                     sbd_max_xfer_len);
1183                 sbd_max_xfer_len = 0;
1184         }
1185         if (sbd_1st_xfer_len && ((sbd_1st_xfer_len % DEV_BSIZE) != 0)) {
1186                 cmn_err(CE_WARN, "sbd_1st_xfer_len invalid %d, resetting\n",
1187                     sbd_1st_xfer_len);
1188                 sbd_1st_xfer_len = 0;
1189         }
1190 
1191         max_len = MIN(task->task_max_xfer_len, sl->sl_max_xfer_len);
1192         if (sbd_max_xfer_len)
1193                 max_len = MIN(max_len, sbd_max_xfer_len);
1194         /*
1195          * Special case the first xfer if hints are set.
1196          */
1197         if (first_xfer && (sbd_1st_xfer_len || task->task_1st_xfer_len)) {
1198                 /* global over-ride has precedence */
1199                 if (sbd_1st_xfer_len)
1200                         first_len = sbd_1st_xfer_len;
1201                 else
1202                         first_len = task->task_1st_xfer_len;
1203         } else {
1204                 first_len = 0;
1205         }
1206 
1207 
1208         while (ATOMIC32_GET(scmd->len) &&
1209             ATOMIC8_GET(scmd->nbufs) < task->task_max_nbufs) {
1210                 xfer_len = MIN(max_len, ATOMIC32_GET(scmd->len));
1211                 if (first_len) {
1212                         xfer_len = MIN(xfer_len, first_len);
1213                         first_len = 0;
1214                 }
1215                 if (xfer_len < ATOMIC32_GET(scmd->len)) {
1216                         /*
1217                          * Attempt to end xfer on a block boundary.
1218                          * The only way this does not happen is if the
1219                          * xfer_len is small enough to stay contained
1220                          * within the same block.
1221                          */
1222                         uint64_t xfer_offset, xfer_aligned_end;
1223 
1224                         xfer_offset = scmd->addr + scmd->current_ro;
1225                         xfer_aligned_end =
1226                             P2ALIGN(xfer_offset+xfer_len, blksize);
1227                         if (xfer_aligned_end > xfer_offset)
1228                                 xfer_len = xfer_aligned_end - xfer_offset;
1229                 }
1230                 /*
1231                  * Allocate object to track the write and reserve
1232                  * enough space for scatter/gather list.
1233                  */
1234                 offset = scmd->addr + scmd->current_ro;
1235                 nblks = sbd_zvol_numsegs(sl, offset, xfer_len);
1236                 db_private_sz = sizeof (*zvio) + sizeof (uintptr_t) /* PAD */ +
1237                     (nblks * sizeof (stmf_sglist_ent_t));
1238                 dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, db_private_sz,
1239                     AF_DONTZERO);
1240 
1241                 /*
1242                  * Setup the dbuf
1243                  *
1244                  * XXX Framework does not handle variable length sglists
1245                  * properly, so setup db_lu_private and db_port_private
1246                  * fields here. db_stmf_private is properly set for
1247                  * calls to stmf_free.
1248                  */
1249                 if (dbuf->db_port_private == NULL) {
1250                         /*
1251                          * XXX Framework assigns space to PP after db_sglist[0]
1252                          */
1253                         cmn_err(CE_PANIC, "db_port_private == NULL");
1254                 }
1255                 pad = (uintptr_t)&dbuf->db_sglist[nblks];
1256                 dbuf->db_lu_private = (void *)P2ROUNDUP(pad, sizeof (pad));
1257                 dbuf->db_port_private = NULL;
1258                 dbuf->db_buf_size = xfer_len;
1259                 dbuf->db_data_size = xfer_len;
1260                 dbuf->db_relative_offset = scmd->current_ro;
1261                 dbuf->db_sglist_length = (uint16_t)nblks;
1262                 dbuf->db_xfer_status = 0;
1263                 dbuf->db_handle = 0;
1264                 dbuf->db_flags = (DB_DONT_CACHE | DB_DONT_REUSE |
1265                     DB_DIRECTION_FROM_RPORT | DB_LU_DATA_BUF);
1266 
1267                 zvio = dbuf->db_lu_private;
1268                 zvio->zvio_offset = offset;
1269 
1270                 /* get the buffers */
1271                 ret = sbd_zvol_alloc_write_bufs(sl, dbuf);
1272                 if (ret != 0) {
1273                         /*
1274                          * Could not allocate buffers from the backend;
1275                          * treat it like an IO error.
1276                          */
1277                         stmf_free(dbuf);
1278                         scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1279                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
1280                                 /*
1281                                  * Nothing queued, so no completions coming
1282                                  */
1283                                 sbd_ats_remove_by_task(task);
1284                                 stmf_scsilib_send_status(task, STATUS_CHECK,
1285                                     STMF_SAA_WRITE_ERROR);
1286                                 rw_exit(&sl->sl_access_state_lock);
1287                         }
1288                         /*
1289                          * Completions of previous buffers will cleanup.
1290                          */
1291                         return;
1292                 }
1293 
1294                 /*
1295                  * Allow PP to do setup
1296                  */
1297                 xstat = stmf_setup_dbuf(task, dbuf, 0);
1298                 if (xstat != STMF_SUCCESS) {
1299                         /*
1300                          * This could happen if the driver cannot get the
1301                          * DDI resources it needs for this request.
1302                          * If other dbufs are queued, try again when the next
1303                          * one completes, otherwise give up.
1304                          */
1305                         sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1306                         stmf_free(dbuf);
1307                         if (ATOMIC8_GET(scmd->nbufs) > 0) {
1308                                 /* completion of previous dbuf will retry */
1309                                 return;
1310                         }
1311                         /*
1312                          * Done with this command.
1313                          */
1314                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1315                         sbd_ats_remove_by_task(task);
1316                         if (first_xfer)
1317                                 stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1318                         else
1319                                 stmf_scsilib_send_status(task, STATUS_CHECK,
1320                                     STMF_SAA_WRITE_ERROR);
1321                         rw_exit(&sl->sl_access_state_lock);
1322                         return;
1323                 }
1324 
1325                 /*
1326                  * dbuf is now queued on task
1327                  */
1328                 atomic_inc_8(&scmd->nbufs);
1329 
1330                 xstat = stmf_xfer_data(task, dbuf, 0);
1331                 switch (xstat) {
1332                 case STMF_SUCCESS:
1333                         break;
1334                 case STMF_BUSY:
1335                         /*
1336                          * The dbuf is queued on the task, but unknown
1337                          * to the PP, thus no completion will occur.
1338                          */
1339                         sbd_zvol_rele_write_bufs_abort(sl, dbuf);
1340                         stmf_teardown_dbuf(task, dbuf);
1341                         stmf_free(dbuf);
1342                         atomic_dec_8(&scmd->nbufs);
1343                         if (ATOMIC8_GET(scmd->nbufs) > 0) {
1344                                 /* completion of previous dbuf will retry */
1345                                 return;
1346                         }
1347                         /*
1348                          * Done with this command.
1349                          */
1350                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1351                         sbd_ats_remove_by_task(task);
1352                         if (first_xfer)
1353                                 stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1354                         else
1355                                 stmf_scsilib_send_status(task, STATUS_CHECK,
1356                                     STMF_SAA_WRITE_ERROR);
1357                         rw_exit(&sl->sl_access_state_lock);
1358                         return;
1359                 case STMF_ABORTED:
1360                         /*
1361                          * Completion code will cleanup.
1362                          */
1363                         scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1364                         return;
1365                 }
1366                 /*
1367                  * Update the xfer progress.
1368                  */
1369                 atomic_add_32(&scmd->len, -xfer_len);
1370                 scmd->current_ro += xfer_len;
1371         }
1372 }
1373 
1374 void
1375 sbd_handle_write_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1376     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
1377 {
1378         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1379         uint64_t laddr;
1380         uint32_t buflen, iolen;
1381         int ndx;
1382         uint8_t op = task->task_cdb[0];
1383         boolean_t fua_bit = B_FALSE;
1384 
1385         if (ATOMIC8_GET(scmd->nbufs) > 0) {
1386                 /*
1387                  * Decrement the count to indicate the port xfer
1388                  * into the dbuf has completed even though the buf is
1389                  * still in use here in the LU provider.
1390                  */
1391                 atomic_dec_8(&scmd->nbufs);
1392         }
1393 
1394         if (dbuf->db_xfer_status != STMF_SUCCESS) {
1395                 sbd_ats_remove_by_task(task);
1396                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1397                     dbuf->db_xfer_status, NULL);
1398                 return;
1399         }
1400 
1401         if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1402                 goto WRITE_XFER_DONE;
1403         }
1404 
1405         if (ATOMIC32_GET(scmd->len) != 0) {
1406                 /*
1407                  * Initiate the next port xfer to occur in parallel
1408                  * with writing this buf.
1409                  */
1410                 sbd_do_write_xfer(task, scmd, NULL, 0);
1411         }
1412 
1413         /*
1414          * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1415          * If it is then check for bit 3 being set to indicate if Forced
1416          * Unit Access is being requested. If so, we'll bypass the direct
1417          * call and handle it in sbd_data_write().
1418          */
1419         if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1420             (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1421                 fua_bit = B_TRUE;
1422         }
1423         laddr = scmd->addr + dbuf->db_relative_offset;
1424 
1425         /*
1426          * If this is going to a zvol, use the direct call to
1427          * sbd_zvol_copy_{read,write}. The direct call interface is
1428          * restricted to PPs that accept sglists, but that is not required.
1429          */
1430         if (sl->sl_flags & SL_CALL_ZVOL &&
1431             (task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF) &&
1432             (sbd_zcopy & (4|1)) && !fua_bit) {
1433                 int commit;
1434 
1435                 commit = (ATOMIC32_GET(scmd->len) == 0 &&
1436                     ATOMIC8_GET(scmd->nbufs) == 0);
1437                 rw_enter(&sl->sl_access_state_lock, RW_READER);
1438                 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 ||
1439                     sbd_copy_rdwr(task, laddr, dbuf, SBD_CMD_SCSI_WRITE,
1440                     commit) != STMF_SUCCESS)
1441                         scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1442                 rw_exit(&sl->sl_access_state_lock);
1443                 buflen = dbuf->db_data_size;
1444         } else {
1445                 for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
1446                     (ndx < dbuf->db_sglist_length); ndx++) {
1447                         iolen = min(dbuf->db_data_size - buflen,
1448                             dbuf->db_sglist[ndx].seg_length);
1449                         if (iolen == 0)
1450                                 break;
1451                         if (sbd_data_write(sl, task, laddr, (uint64_t)iolen,
1452                             dbuf->db_sglist[ndx].seg_addr) != STMF_SUCCESS) {
1453                                 scmd->flags |= SBD_SCSI_CMD_XFER_FAIL;
1454                                 break;
1455                         }
1456                         buflen += iolen;
1457                         laddr += (uint64_t)iolen;
1458                 }
1459         }
1460         task->task_nbytes_transferred += buflen;
1461 WRITE_XFER_DONE:
1462         if (ATOMIC32_GET(scmd->len) == 0 ||
1463             scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1464                 stmf_free_dbuf(task, dbuf);
1465                 if (ATOMIC8_GET(scmd->nbufs))
1466                         return; /* wait for all buffers to complete */
1467                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1468                 sbd_ats_remove_by_task(task);
1469                 if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
1470                         stmf_scsilib_send_status(task, STATUS_CHECK,
1471                             STMF_SAA_WRITE_ERROR);
1472                 } else {
1473                         /*
1474                          * If SYNC_WRITE flag is on then we need to flush
1475                          * cache before sending status.
1476                          * Note: this may be a no-op because of how
1477                          * SL_WRITEBACK_CACHE_DISABLE and
1478                          * SL_FLUSH_ON_DISABLED_WRITECACHE are set, but not
1479                          * worth code complexity of checking those in this code
1480                          * path, SBD_SCSI_CMD_SYNC_WRITE is rarely set.
1481                          */
1482                         if ((scmd->flags & SBD_SCSI_CMD_SYNC_WRITE) &&
1483                             (sbd_flush_data_cache(sl, 0) != SBD_SUCCESS)) {
1484                                 stmf_scsilib_send_status(task, STATUS_CHECK,
1485                                     STMF_SAA_WRITE_ERROR);
1486                         } else {
1487                                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1488                         }
1489                 }
1490                 return;
1491         }
1492         sbd_do_write_xfer(task, scmd, dbuf, dbuf_reusable);
1493 }
1494 
1495 /*
1496  * Return true if copy avoidance is beneficial.
1497  */
1498 static int
1499 sbd_zcopy_write_useful(scsi_task_t *task, uint64_t laddr, uint32_t len,
1500     uint64_t blksize)
1501 {
1502         /*
1503          * If there is a global copy threshold over-ride, use it.
1504          * Otherwise use the PP value with the caveat that at least
1505          * 1/2 the data must avoid being copied to be useful.
1506          */
1507         if (sbd_copy_threshold > 0) {
1508                 return (len >= sbd_copy_threshold);
1509         } else {
1510                 uint64_t no_copy_span;
1511 
1512                 /* sub-blocksize writes always copy */
1513                 if (len < task->task_copy_threshold || len < blksize)
1514                         return (0);
1515                 /*
1516                  * Calculate amount of data that will avoid the copy path.
1517                  * The calculation is only valid if len >= blksize.
1518                  */
1519                 no_copy_span = P2ALIGN(laddr+len, blksize) -
1520                     P2ROUNDUP(laddr, blksize);
1521                 return (no_copy_span >= len/2);
1522         }
1523 }
1524 
1525 void
1526 sbd_handle_write(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
1527 {
1528         uint64_t lba, laddr;
1529         uint32_t len;
1530         uint8_t op = task->task_cdb[0], do_immediate_data = 0;
1531         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1532         sbd_cmd_t *scmd;
1533         stmf_data_buf_t *dbuf;
1534         uint64_t blkcount;
1535         uint8_t sync_wr_flag = 0;
1536         boolean_t fua_bit = B_FALSE;
1537 
1538         if (sl->sl_flags & SL_WRITE_PROTECTED) {
1539                 stmf_scsilib_send_status(task, STATUS_CHECK,
1540                     STMF_SAA_WRITE_PROTECTED);
1541                 return;
1542         }
1543         /*
1544          * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
1545          * If it is then check for bit 3 being set to indicate if Forced
1546          * Unit Access is being requested. If so, we'll bypass the fast path
1547          * code to simplify support of this feature.
1548          */
1549         if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
1550             (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
1551                 fua_bit = B_TRUE;
1552         }
1553         if (op == SCMD_WRITE) {
1554                 lba = READ_SCSI21(&task->task_cdb[1], uint64_t);
1555                 len = (uint32_t)task->task_cdb[4];
1556 
1557                 if (len == 0) {
1558                         len = 256;
1559                 }
1560         } else if (op == SCMD_WRITE_G1) {
1561                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1562                 len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1563         } else if (op == SCMD_WRITE_G5) {
1564                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1565                 len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1566         } else if (op == SCMD_WRITE_G4) {
1567                 lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1568                 len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1569         } else if (op == SCMD_WRITE_VERIFY) {
1570                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1571                 len = READ_SCSI16(&task->task_cdb[7], uint32_t);
1572                 sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1573         } else if (op == SCMD_WRITE_VERIFY_G5) {
1574                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
1575                 len = READ_SCSI32(&task->task_cdb[6], uint32_t);
1576                 sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1577         } else if (op == SCMD_WRITE_VERIFY_G4) {
1578                 lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
1579                 len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1580                 sync_wr_flag = SBD_SCSI_CMD_SYNC_WRITE;
1581         } else {
1582                 stmf_scsilib_send_status(task, STATUS_CHECK,
1583                     STMF_SAA_INVALID_OPCODE);
1584                 return;
1585         }
1586 
1587         laddr = lba << sl->sl_data_blocksize_shift;
1588         blkcount = len;
1589         len <<= sl->sl_data_blocksize_shift;
1590 
1591         if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
1592                 stmf_scsilib_send_status(task, STATUS_CHECK,
1593                     STMF_SAA_LBA_OUT_OF_RANGE);
1594                 return;
1595         }
1596 
1597         task->task_cmd_xfer_length = len;
1598         if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1599                 task->task_expected_xfer_length = len;
1600         }
1601 
1602         len = (len > task->task_expected_xfer_length) ?
1603             task->task_expected_xfer_length : len;
1604 
1605         if (len == 0) {
1606                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1607                 return;
1608         }
1609 
1610         if (sbd_ats_handling_before_io(task, sl, lba, blkcount) !=
1611             SBD_SUCCESS) {
1612                 if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS) {
1613                         stmf_scsilib_send_status(task, STATUS_BUSY, 0);
1614                 }
1615                 return;
1616         }
1617 
1618         if (sbd_zcopy & (4|1) &&            /* Debug switch */
1619             initial_dbuf == NULL &&             /* No PP buf passed in */
1620             sl->sl_flags & SL_CALL_ZVOL &&       /* zvol backing store */
1621             (task->task_additional_flags &
1622             TASK_AF_ACCEPT_LU_DBUF) &&          /* PP allows it */
1623             sbd_zcopy_write_useful(task, laddr, len, sl->sl_blksize) &&
1624             !fua_bit) {
1625 
1626                 /*
1627                  * XXX Note that disallowing initial_dbuf will eliminate
1628                  * iSCSI from participating. For small writes, that is
1629                  * probably ok. For large writes, it may be best to just
1630                  * copy the data from the initial dbuf and use zcopy for
1631                  * the rest.
1632                  */
1633                 rw_enter(&sl->sl_access_state_lock, RW_READER);
1634                 if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
1635                         rw_exit(&sl->sl_access_state_lock);
1636                         sbd_ats_remove_by_task(task);
1637                         stmf_scsilib_send_status(task, STATUS_CHECK,
1638                             STMF_SAA_READ_ERROR);
1639                         return;
1640                 }
1641                 /*
1642                  * Setup scmd to track the write progress.
1643                  */
1644                 if (task->task_lu_private) {
1645                         scmd = (sbd_cmd_t *)task->task_lu_private;
1646                 } else {
1647                         scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t),
1648                             KM_SLEEP);
1649                         task->task_lu_private = scmd;
1650                 }
1651                 scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1652                     sync_wr_flag;
1653                 scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1654                 scmd->nbufs = 0;
1655                 scmd->addr = laddr;
1656                 scmd->len = len;
1657                 scmd->current_ro = 0;
1658                 sbd_do_sgl_write_xfer(task, scmd, 1);
1659                 return;
1660         }
1661 
1662         if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
1663                 if (initial_dbuf->db_data_size > len) {
1664                         if (initial_dbuf->db_data_size >
1665                             task->task_expected_xfer_length) {
1666                                 /* protocol error */
1667                                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1668                                     STMF_INVALID_ARG, NULL);
1669                                 return;
1670                         }
1671                         initial_dbuf->db_data_size = len;
1672                 }
1673                 do_immediate_data = 1;
1674         }
1675         dbuf = initial_dbuf;
1676 
1677         if (task->task_lu_private) {
1678                 scmd = (sbd_cmd_t *)task->task_lu_private;
1679         } else {
1680                 scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1681                 task->task_lu_private = scmd;
1682         }
1683         scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_ATS_RELATED |
1684             sync_wr_flag;
1685         scmd->cmd_type = SBD_CMD_SCSI_WRITE;
1686         scmd->nbufs = 0;
1687         scmd->addr = laddr;
1688         scmd->len = len;
1689         scmd->current_ro = 0;
1690 
1691         if (do_immediate_data) {
1692                 /*
1693                  * Account for data passed in this write command
1694                  */
1695                 (void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
1696                 atomic_add_32(&scmd->len, -dbuf->db_data_size);
1697                 scmd->current_ro += dbuf->db_data_size;
1698                 dbuf->db_xfer_status = STMF_SUCCESS;
1699                 sbd_handle_write_xfer_completion(task, scmd, dbuf, 0);
1700         } else {
1701                 sbd_do_write_xfer(task, scmd, dbuf, 0);
1702         }
1703 }
1704 
1705 /*
1706  * Utility routine to handle small non performance data transfers to the
1707  * initiators. dbuf is an initial data buf (if any), 'p' points to a data
1708  * buffer which is source of data for transfer, cdb_xfer_size is the
1709  * transfer size based on CDB, cmd_xfer_size is the actual amount of data
1710  * which this command would transfer (the size of data pointed to by 'p').
1711  */
1712 void
1713 sbd_handle_short_read_transfers(scsi_task_t *task, stmf_data_buf_t *dbuf,
1714     uint8_t *p, uint32_t cdb_xfer_size, uint32_t cmd_xfer_size)
1715 {
1716         uint32_t bufsize, ndx;
1717         sbd_cmd_t *scmd;
1718 
1719         cmd_xfer_size = min(cmd_xfer_size, cdb_xfer_size);
1720 
1721         task->task_cmd_xfer_length = cmd_xfer_size;
1722         if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1723                 task->task_expected_xfer_length = cmd_xfer_size;
1724         } else {
1725                 cmd_xfer_size = min(cmd_xfer_size,
1726                     task->task_expected_xfer_length);
1727         }
1728 
1729         if (cmd_xfer_size == 0) {
1730                 stmf_scsilib_send_status(task, STATUS_CHECK,
1731                     STMF_SAA_INVALID_FIELD_IN_CDB);
1732                 return;
1733         }
1734         if (dbuf == NULL) {
1735                 uint32_t minsize = cmd_xfer_size;
1736 
1737                 dbuf = stmf_alloc_dbuf(task, cmd_xfer_size, &minsize, 0);
1738         }
1739         if (dbuf == NULL) {
1740                 stmf_scsilib_send_status(task, STATUS_QFULL, 0);
1741                 return;
1742         }
1743 
1744         for (bufsize = 0, ndx = 0; bufsize < cmd_xfer_size; ndx++) {
1745                 uint8_t *d;
1746                 uint32_t s;
1747 
1748                 d = dbuf->db_sglist[ndx].seg_addr;
1749                 s = min((cmd_xfer_size - bufsize),
1750                     dbuf->db_sglist[ndx].seg_length);
1751                 bcopy(p+bufsize, d, s);
1752                 bufsize += s;
1753         }
1754         dbuf->db_relative_offset = 0;
1755         dbuf->db_data_size = cmd_xfer_size;
1756         dbuf->db_flags = DB_DIRECTION_TO_RPORT;
1757 
1758         if (task->task_lu_private == NULL) {
1759                 task->task_lu_private =
1760                     kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
1761         }
1762         scmd = (sbd_cmd_t *)task->task_lu_private;
1763 
1764         scmd->cmd_type = SBD_CMD_SMALL_READ;
1765         scmd->flags = SBD_SCSI_CMD_ACTIVE;
1766         (void) stmf_xfer_data(task, dbuf, 0);
1767 }
1768 
1769 void
1770 sbd_handle_short_read_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
1771     struct stmf_data_buf *dbuf)
1772 {
1773         if (dbuf->db_xfer_status != STMF_SUCCESS) {
1774                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1775                     dbuf->db_xfer_status, NULL);
1776                 return;
1777         }
1778         task->task_nbytes_transferred = dbuf->db_data_size;
1779         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1780         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
1781 }
1782 
1783 void
1784 sbd_handle_short_write_transfers(scsi_task_t *task,
1785     stmf_data_buf_t *dbuf, uint32_t cdb_xfer_size)
1786 {
1787         sbd_cmd_t *scmd;
1788 
1789         task->task_cmd_xfer_length = cdb_xfer_size;
1790         if (task->task_additional_flags & TASK_AF_NO_EXPECTED_XFER_LENGTH) {
1791                 task->task_expected_xfer_length = cdb_xfer_size;
1792         } else {
1793                 cdb_xfer_size = min(cdb_xfer_size,
1794                     task->task_expected_xfer_length);
1795         }
1796 
1797         if (cdb_xfer_size == 0) {
1798                 stmf_scsilib_send_status(task, STATUS_CHECK,
1799                     STMF_SAA_INVALID_FIELD_IN_CDB);
1800                 return;
1801         }
1802         if (task->task_lu_private == NULL) {
1803                 task->task_lu_private = kmem_zalloc(sizeof (sbd_cmd_t),
1804                     KM_SLEEP);
1805         } else {
1806                 bzero(task->task_lu_private, sizeof (sbd_cmd_t));
1807         }
1808         scmd = (sbd_cmd_t *)task->task_lu_private;
1809         scmd->cmd_type = SBD_CMD_SMALL_WRITE;
1810         scmd->flags = SBD_SCSI_CMD_ACTIVE;
1811         scmd->len = cdb_xfer_size;
1812         if (dbuf == NULL) {
1813                 uint32_t minsize = cdb_xfer_size;
1814 
1815                 dbuf = stmf_alloc_dbuf(task, cdb_xfer_size, &minsize, 0);
1816                 if (dbuf == NULL) {
1817                         stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1818                             STMF_ALLOC_FAILURE, NULL);
1819                         return;
1820                 }
1821                 dbuf->db_data_size = cdb_xfer_size;
1822                 dbuf->db_relative_offset = 0;
1823                 dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
1824                 (void) stmf_xfer_data(task, dbuf, 0);
1825         } else {
1826                 if (dbuf->db_data_size < cdb_xfer_size) {
1827                         stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1828                             STMF_ABORTED, NULL);
1829                         return;
1830                 }
1831                 dbuf->db_data_size = cdb_xfer_size;
1832                 sbd_handle_short_write_xfer_completion(task, dbuf);
1833         }
1834 }
1835 
1836 void
1837 sbd_handle_short_write_xfer_completion(scsi_task_t *task,
1838     stmf_data_buf_t *dbuf)
1839 {
1840         sbd_cmd_t *scmd;
1841         stmf_status_t st_ret;
1842         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1843 
1844         /*
1845          * For now lets assume we will get only one sglist element
1846          * for short writes. If that ever changes, we should allocate
1847          * a local buffer and copy all the sg elements to one linear space.
1848          */
1849         if ((dbuf->db_xfer_status != STMF_SUCCESS) ||
1850             (dbuf->db_sglist_length > 1)) {
1851                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1852                     dbuf->db_xfer_status, NULL);
1853                 return;
1854         }
1855 
1856         task->task_nbytes_transferred = dbuf->db_data_size;
1857         scmd = (sbd_cmd_t *)task->task_lu_private;
1858         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
1859 
1860         /* Lets find out who to call */
1861         switch (task->task_cdb[0]) {
1862         case SCMD_MODE_SELECT:
1863         case SCMD_MODE_SELECT_G1:
1864                 if (sl->sl_access_state == SBD_LU_STANDBY) {
1865                         st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1866                         if (st_ret != STMF_SUCCESS) {
1867                                 stmf_scsilib_send_status(task, STATUS_CHECK,
1868                                     STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1869                         }
1870                 } else {
1871                         sbd_handle_mode_select_xfer(task,
1872                             dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1873                 }
1874                 break;
1875         case SCMD_UNMAP:
1876                 sbd_handle_unmap_xfer(task,
1877                     dbuf->db_sglist[0].seg_addr, dbuf->db_data_size);
1878                 break;
1879         case SCMD_EXTENDED_COPY:
1880                 sbd_handle_xcopy_xfer(task, dbuf->db_sglist[0].seg_addr);
1881                 break;
1882         case SCMD_PERSISTENT_RESERVE_OUT:
1883                 if (sl->sl_access_state == SBD_LU_STANDBY) {
1884                         st_ret = stmf_proxy_scsi_cmd(task, dbuf);
1885                         if (st_ret != STMF_SUCCESS) {
1886                                 stmf_scsilib_send_status(task, STATUS_CHECK,
1887                                     STMF_SAA_LU_NO_ACCESS_UNAVAIL);
1888                         }
1889                 } else {
1890                         sbd_handle_pgr_out_data(task, dbuf);
1891                 }
1892                 break;
1893         default:
1894                 /* This should never happen */
1895                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
1896                     STMF_ABORTED, NULL);
1897         }
1898 }
1899 
1900 void
1901 sbd_handle_read_capacity(struct scsi_task *task,
1902     struct stmf_data_buf *initial_dbuf)
1903 {
1904         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1905         uint32_t cdb_len;
1906         uint8_t p[32];
1907         uint64_t s;
1908         uint16_t blksize;
1909 
1910         s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
1911         s--;
1912         blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
1913 
1914         switch (task->task_cdb[0]) {
1915         case SCMD_READ_CAPACITY:
1916                 if (s & 0xffffffff00000000ull) {
1917                         p[0] = p[1] = p[2] = p[3] = 0xFF;
1918                 } else {
1919                         p[0] = (s >> 24) & 0xff;
1920                         p[1] = (s >> 16) & 0xff;
1921                         p[2] = (s >> 8) & 0xff;
1922                         p[3] = s & 0xff;
1923                 }
1924                 p[4] = 0; p[5] = 0;
1925                 p[6] = (blksize >> 8) & 0xff;
1926                 p[7] = blksize & 0xff;
1927                 sbd_handle_short_read_transfers(task, initial_dbuf, p, 8, 8);
1928                 break;
1929 
1930         case SCMD_SVC_ACTION_IN_G4:
1931                 cdb_len = READ_SCSI32(&task->task_cdb[10], uint32_t);
1932                 bzero(p, 32);
1933                 p[0] = (s >> 56) & 0xff;
1934                 p[1] = (s >> 48) & 0xff;
1935                 p[2] = (s >> 40) & 0xff;
1936                 p[3] = (s >> 32) & 0xff;
1937                 p[4] = (s >> 24) & 0xff;
1938                 p[5] = (s >> 16) & 0xff;
1939                 p[6] = (s >> 8) & 0xff;
1940                 p[7] = s & 0xff;
1941                 p[10] = (blksize >> 8) & 0xff;
1942                 p[11] = blksize & 0xff;
1943                 if (sl->sl_flags & SL_UNMAP_ENABLED) {
1944                         p[14] = 0x80;
1945                 }
1946                 sbd_handle_short_read_transfers(task, initial_dbuf, p,
1947                     cdb_len, 32);
1948                 break;
1949         }
1950 }
1951 
1952 void
1953 sbd_calc_geometry(uint64_t s, uint16_t blksize, uint8_t *nsectors,
1954     uint8_t *nheads, uint32_t *ncyl)
1955 {
1956         if (s < (4ull * 1024ull * 1024ull * 1024ull)) {
1957                 *nsectors = 32;
1958                 *nheads = 8;
1959         } else {
1960                 *nsectors = 254;
1961                 *nheads = 254;
1962         }
1963         *ncyl = s / ((uint64_t)blksize * (uint64_t)(*nsectors) *
1964             (uint64_t)(*nheads));
1965 }
1966 
1967 void
1968 sbd_handle_mode_sense(struct scsi_task *task,
1969     struct stmf_data_buf *initial_dbuf, uint8_t *buf)
1970 {
1971         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
1972         uint32_t cmd_size, n;
1973         uint8_t *cdb;
1974         uint32_t ncyl;
1975         uint8_t nsectors, nheads;
1976         uint8_t page, ctrl, header_size;
1977         uint16_t nbytes;
1978         uint8_t *p;
1979         uint64_t s = sl->sl_lu_size;
1980         uint32_t dev_spec_param_offset;
1981 
1982         p = buf;        /* buf is assumed to be zeroed out and large enough */
1983         n = 0;
1984         cdb = &task->task_cdb[0];
1985         page = cdb[2] & 0x3F;
1986         ctrl = (cdb[2] >> 6) & 3;
1987 
1988         if (cdb[0] == SCMD_MODE_SENSE) {
1989                 cmd_size = cdb[4];
1990                 header_size = 4;
1991                 dev_spec_param_offset = 2;
1992         } else {
1993                 cmd_size = READ_SCSI16(&cdb[7], uint32_t);
1994                 header_size = 8;
1995                 dev_spec_param_offset = 3;
1996         }
1997 
1998         /* Now validate the command */
1999         if ((cdb[2] != 0) && (page != MODEPAGE_ALLPAGES) &&
2000             (page != MODEPAGE_CACHING) && (page != MODEPAGE_CTRL_MODE) &&
2001             (page != MODEPAGE_FORMAT) && (page != MODEPAGE_GEOMETRY)) {
2002                 stmf_scsilib_send_status(task, STATUS_CHECK,
2003                     STMF_SAA_INVALID_FIELD_IN_CDB);
2004                 return;
2005         }
2006 
2007         /* We will update the length in the mode header at the end */
2008 
2009         /* Block dev device specific param in mode param header has wp bit */
2010         if (sl->sl_flags & SL_WRITE_PROTECTED) {
2011                 p[n + dev_spec_param_offset] = BIT_7;
2012         }
2013         n += header_size;
2014         /* We are not going to return any block descriptor */
2015 
2016         nbytes = ((uint16_t)1) << sl->sl_data_blocksize_shift;
2017         sbd_calc_geometry(s, nbytes, &nsectors, &nheads, &ncyl);
2018 
2019         if ((page == MODEPAGE_FORMAT) || (page == MODEPAGE_ALLPAGES)) {
2020                 p[n] = 0x03;
2021                 p[n+1] = 0x16;
2022                 if (ctrl != 1) {
2023                         p[n + 11] = nsectors;
2024                         p[n + 12] = nbytes >> 8;
2025                         p[n + 13] = nbytes & 0xff;
2026                         p[n + 20] = 0x80;
2027                 }
2028                 n += 24;
2029         }
2030         if ((page == MODEPAGE_GEOMETRY) || (page == MODEPAGE_ALLPAGES)) {
2031                 p[n] = 0x04;
2032                 p[n + 1] = 0x16;
2033                 if (ctrl != 1) {
2034                         p[n + 2] = ncyl >> 16;
2035                         p[n + 3] = ncyl >> 8;
2036                         p[n + 4] = ncyl & 0xff;
2037                         p[n + 5] = nheads;
2038                         p[n + 20] = 0x15;
2039                         p[n + 21] = 0x18;
2040                 }
2041                 n += 24;
2042         }
2043         if ((page == MODEPAGE_CACHING) || (page == MODEPAGE_ALLPAGES)) {
2044                 struct mode_caching *mode_caching_page;
2045 
2046                 mode_caching_page = (struct mode_caching *)&p[n];
2047 
2048                 mode_caching_page->mode_page.code = MODEPAGE_CACHING;
2049                 mode_caching_page->mode_page.ps = 1; /* A saveable page */
2050                 mode_caching_page->mode_page.length = 0x12;
2051 
2052                 switch (ctrl) {
2053                 case (0):
2054                         /* Current */
2055                         if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2056                                 mode_caching_page->wce = 1;
2057                         }
2058                         break;
2059 
2060                 case (1):
2061                         /* Changeable */
2062                         if ((sl->sl_flags &
2063                             SL_WRITEBACK_CACHE_SET_UNSUPPORTED) == 0) {
2064                                 mode_caching_page->wce = 1;
2065                         }
2066                         break;
2067 
2068                 default:
2069                         if ((sl->sl_flags &
2070                             SL_SAVED_WRITE_CACHE_DISABLE) == 0) {
2071                                 mode_caching_page->wce = 1;
2072                         }
2073                         break;
2074                 }
2075                 n += (sizeof (struct mode_page) +
2076                     mode_caching_page->mode_page.length);
2077         }
2078         if ((page == MODEPAGE_CTRL_MODE) || (page == MODEPAGE_ALLPAGES)) {
2079                 struct mode_control_scsi3 *mode_control_page;
2080 
2081                 mode_control_page = (struct mode_control_scsi3 *)&p[n];
2082 
2083                 mode_control_page->mode_page.code = MODEPAGE_CTRL_MODE;
2084                 mode_control_page->mode_page.length =
2085                     PAGELENGTH_MODE_CONTROL_SCSI3;
2086                 if (ctrl != 1) {
2087                         /* If not looking for changeable values, report this. */
2088                         mode_control_page->que_mod = CTRL_QMOD_UNRESTRICT;
2089                 }
2090                 n += (sizeof (struct mode_page) +
2091                     mode_control_page->mode_page.length);
2092         }
2093 
2094         if (cdb[0] == SCMD_MODE_SENSE) {
2095                 if (n > 255) {
2096                         stmf_scsilib_send_status(task, STATUS_CHECK,
2097                             STMF_SAA_INVALID_FIELD_IN_CDB);
2098                         return;
2099                 }
2100                 /*
2101                  * Mode parameter header length doesn't include the number
2102                  * of bytes in the length field, so adjust the count.
2103                  * Byte count minus header length field size.
2104                  */
2105                 buf[0] = (n - header_size) & 0xff;
2106         } else {
2107                 /* Byte count minus header length field size. */
2108                 buf[1] = (n - header_size) & 0xff;
2109                 buf[0] = ((n - header_size) >> 8) & 0xff;
2110         }
2111 
2112         sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2113             cmd_size, n);
2114 }
2115 
2116 void
2117 sbd_handle_mode_select(scsi_task_t *task, stmf_data_buf_t *dbuf)
2118 {
2119         uint32_t cmd_xfer_len;
2120 
2121         if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2122                 cmd_xfer_len = (uint32_t)task->task_cdb[4];
2123         } else {
2124                 cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2125         }
2126 
2127         if ((task->task_cdb[1] & 0xFE) != 0x10) {
2128                 stmf_scsilib_send_status(task, STATUS_CHECK,
2129                     STMF_SAA_INVALID_FIELD_IN_CDB);
2130                 return;
2131         }
2132 
2133         if (cmd_xfer_len == 0) {
2134                 /* zero byte mode selects are allowed */
2135                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2136                 return;
2137         }
2138 
2139         sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2140 }
2141 
2142 void
2143 sbd_handle_mode_select_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2144 {
2145         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2146         sbd_it_data_t *it;
2147         int hdr_len, bd_len;
2148         sbd_status_t sret;
2149         int i;
2150 
2151         if (task->task_cdb[0] == SCMD_MODE_SELECT) {
2152                 hdr_len = 4;
2153         } else {
2154                 hdr_len = 8;
2155         }
2156 
2157         if (buflen < hdr_len)
2158                 goto mode_sel_param_len_err;
2159 
2160         bd_len = hdr_len == 4 ? buf[3] : READ_SCSI16(&buf[6], int);
2161 
2162         if (buflen < (hdr_len + bd_len + 2))
2163                 goto mode_sel_param_len_err;
2164 
2165         buf += hdr_len + bd_len;
2166         buflen -= hdr_len + bd_len;
2167 
2168         if ((buf[0] != 8) || (buflen != ((uint32_t)buf[1] + 2))) {
2169                 goto mode_sel_param_len_err;
2170         }
2171 
2172         if (buf[2] & 0xFB) {
2173                 goto mode_sel_param_field_err;
2174         }
2175 
2176         for (i = 3; i < (buf[1] + 2); i++) {
2177                 if (buf[i]) {
2178                         goto mode_sel_param_field_err;
2179                 }
2180         }
2181 
2182         sret = SBD_SUCCESS;
2183 
2184         /* All good. Lets handle the write cache change, if any */
2185         if (buf[2] & BIT_2) {
2186                 sret = sbd_wcd_set(0, sl);
2187         } else {
2188                 sret = sbd_wcd_set(1, sl);
2189         }
2190 
2191         if (sret != SBD_SUCCESS) {
2192                 stmf_scsilib_send_status(task, STATUS_CHECK,
2193                     STMF_SAA_WRITE_ERROR);
2194                 return;
2195         }
2196 
2197         /* set on the device passed, now set the flags */
2198         mutex_enter(&sl->sl_lock);
2199         if (buf[2] & BIT_2) {
2200                 sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2201         } else {
2202                 sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2203         }
2204 
2205         for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
2206                 if (it == task->task_lu_itl_handle)
2207                         continue;
2208                 it->sbd_it_ua_conditions |= SBD_UA_MODE_PARAMETERS_CHANGED;
2209         }
2210 
2211         if (task->task_cdb[1] & 1) {
2212                 if (buf[2] & BIT_2) {
2213                         sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2214                 } else {
2215                         sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2216                 }
2217                 mutex_exit(&sl->sl_lock);
2218                 sret = sbd_write_lu_info(sl);
2219         } else {
2220                 mutex_exit(&sl->sl_lock);
2221         }
2222         if (sret == SBD_SUCCESS) {
2223                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2224         } else {
2225                 stmf_scsilib_send_status(task, STATUS_CHECK,
2226                     STMF_SAA_WRITE_ERROR);
2227         }
2228         return;
2229 
2230 mode_sel_param_len_err:
2231         stmf_scsilib_send_status(task, STATUS_CHECK,
2232             STMF_SAA_PARAM_LIST_LENGTH_ERROR);
2233         return;
2234 mode_sel_param_field_err:
2235         stmf_scsilib_send_status(task, STATUS_CHECK,
2236             STMF_SAA_INVALID_FIELD_IN_PARAM_LIST);
2237 }
2238 
2239 /*
2240  * Command support added from SPC-4 r24
2241  * Supports info type 0, 2, 127
2242  */
2243 void
2244 sbd_handle_identifying_info(struct scsi_task *task,
2245     stmf_data_buf_t *initial_dbuf)
2246 {
2247         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2248         uint8_t *cdb;
2249         uint32_t cmd_size;
2250         uint32_t param_len;
2251         uint32_t xfer_size;
2252         uint8_t info_type;
2253         uint8_t *buf, *p;
2254 
2255         cdb = &task->task_cdb[0];
2256         cmd_size = READ_SCSI32(&cdb[6], uint32_t);
2257         info_type = cdb[10]>>1;
2258 
2259         /* Validate the command */
2260         if (cmd_size < 4) {
2261                 stmf_scsilib_send_status(task, STATUS_CHECK,
2262                     STMF_SAA_INVALID_FIELD_IN_CDB);
2263                 return;
2264         }
2265 
2266         p = buf = kmem_zalloc(260, KM_SLEEP);
2267 
2268         switch (info_type) {
2269                 case 0:
2270                         /*
2271                          * No value is supplied but this info type
2272                          * is mandatory.
2273                          */
2274                         xfer_size = 4;
2275                         break;
2276                 case 2:
2277                         mutex_enter(&sl->sl_lock);
2278                         param_len = strlcpy((char *)(p+4), sl->sl_alias, 256);
2279                         mutex_exit(&sl->sl_lock);
2280                         /* text info must be null terminated */
2281                         if (++param_len > 256)
2282                                 param_len = 256;
2283                         SCSI_WRITE16(p+2, param_len);
2284                         xfer_size = param_len + 4;
2285                         break;
2286                 case 127:
2287                         /* 0 and 2 descriptor supported */
2288                         SCSI_WRITE16(p+2, 8); /* set param length */
2289                         p += 8;
2290                         *p = 4; /* set type to 2 (7 hi bits) */
2291                         p += 2;
2292                         SCSI_WRITE16(p, 256); /* 256 max length */
2293                         xfer_size = 12;
2294                         break;
2295                 default:
2296                         stmf_scsilib_send_status(task, STATUS_CHECK,
2297                             STMF_SAA_INVALID_FIELD_IN_CDB);
2298                         kmem_free(buf, 260);
2299                         return;
2300         }
2301         sbd_handle_short_read_transfers(task, initial_dbuf, buf,
2302             cmd_size, xfer_size);
2303         kmem_free(buf, 260);
2304 }
2305 
2306 /*
2307  * This function parse through a string, passed to it as a pointer to a string,
2308  * by adjusting the pointer to the first non-space character and returns
2309  * the count/length of the first bunch of non-space characters. Multiple
2310  * Management URLs are stored as a space delimited string in sl_mgmt_url
2311  * field of sbd_lu_t. This function is used to retrieve one url at a time.
2312  *
2313  * i/p : pointer to pointer to a url string
2314  * o/p : Adjust the pointer to the url to the first non white character
2315  *       and returns the length of the URL
2316  */
2317 uint16_t
2318 sbd_parse_mgmt_url(char **url_addr)
2319 {
2320         uint16_t url_length = 0;
2321         char *url;
2322         url = *url_addr;
2323 
2324         while (*url != '\0') {
2325                 if (*url == ' ' || *url == '\t' || *url == '\n') {
2326                         (*url_addr)++;
2327                         url = *url_addr;
2328                 } else {
2329                         break;
2330                 }
2331         }
2332 
2333         while (*url != '\0') {
2334                 if (*url == ' ' || *url == '\t' ||
2335                     *url == '\n' || *url == '\0') {
2336                         break;
2337                 }
2338                 url++;
2339                 url_length++;
2340         }
2341         return (url_length);
2342 }
2343 
2344 /* Try to make this the size of a kmem allocation cache. */
2345 static uint_t sbd_write_same_optimal_chunk = 128 * 1024;
2346 
2347 static sbd_status_t
2348 sbd_write_same_data(struct scsi_task *task, sbd_cmd_t *scmd)
2349 {
2350         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2351         uint64_t addr, len, sz_done;
2352         uint32_t big_buf_size, xfer_size, off;
2353         uint8_t *big_buf;
2354         sbd_status_t ret;
2355 
2356         if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2357                 addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2358                 len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2359         } else {
2360                 addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2361                 len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2362         }
2363         addr <<= sl->sl_data_blocksize_shift;
2364         len <<= sl->sl_data_blocksize_shift;
2365 
2366         /*
2367          * Reminders:
2368          *    "len" is total size of what we wish to "write same".
2369          *
2370          *    xfer_size will be scmd->trans_data_len, which is the length
2371          *    of the pattern we wish to replicate over "len".  We replicate
2372          *    "xfer_size" of pattern over "len".
2373          *
2374          *    big_buf_size is set to an ideal actual-write size for an output
2375          *    operation.  It may be the same as "len".  If it's not, it should
2376          *    be an exact multiple of "xfer_size" so we don't get pattern
2377          *    breakage until the very end of "len".
2378          */
2379         big_buf_size = len > sbd_write_same_optimal_chunk ?
2380             sbd_write_same_optimal_chunk : (uint32_t)len;
2381         xfer_size = scmd->trans_data_len;
2382 
2383         /*
2384          * All transfers should be an integral multiple of the sector size.
2385          */
2386         ASSERT((big_buf_size % xfer_size) == 0);
2387 
2388         /*
2389          * Don't sleep for the allocation, and don't make the system
2390          * reclaim memory.  Trade higher I/Os if in a low-memory situation.
2391          */
2392         big_buf = kmem_alloc(big_buf_size, KM_NOSLEEP_LAZY);
2393 
2394         if (big_buf == NULL) {
2395                 /*
2396                  * Just send it in terms of of the transmitted data.  This
2397                  * will be very slow.
2398                  */
2399                 DTRACE_PROBE1(write__same__low__memory, uint64_t, big_buf_size);
2400                 big_buf = scmd->trans_data;
2401                 big_buf_size = scmd->trans_data_len;
2402         } else {
2403                 /*
2404                  * We already ASSERT()ed big_buf_size is an integral multiple
2405                  * of xfer_size.
2406                  */
2407                 for (off = 0; off < big_buf_size; off += xfer_size)
2408                         bcopy(scmd->trans_data, big_buf + off, xfer_size);
2409         }
2410 
2411         /* Do the actual I/O.  Recycle xfer_size now to be write size. */
2412         DTRACE_PROBE1(write__same__io__begin, uint64_t, len);
2413         for (sz_done = 0; sz_done < len; sz_done += (uint64_t)xfer_size) {
2414                 xfer_size = ((big_buf_size + sz_done) <= len) ? big_buf_size :
2415                     len - sz_done;
2416                 ret = sbd_data_write(sl, task, addr + sz_done,
2417                     (uint64_t)xfer_size, big_buf);
2418                 if (ret != SBD_SUCCESS)
2419                         break;
2420         }
2421         DTRACE_PROBE2(write__same__io__end, uint64_t, len, uint64_t, sz_done);
2422 
2423         if (big_buf != scmd->trans_data)
2424                 kmem_free(big_buf, big_buf_size);
2425 
2426         return (ret);
2427 }
2428 
2429 static void
2430 sbd_write_same_release_resources(struct scsi_task *task)
2431 {
2432         sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
2433 
2434         if (scmd->nbufs == 0XFF)
2435                 cmn_err(CE_WARN, "%s invalid buffer count %x",
2436                     __func__, scmd->nbufs);
2437         if ((scmd->trans_data_len != 0) && (scmd->trans_data != NULL))
2438                 kmem_free(scmd->trans_data, scmd->trans_data_len);
2439         scmd->trans_data = NULL;
2440         scmd->trans_data_len = 0;
2441         scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
2442 }
2443 
2444 static void
2445 sbd_handle_write_same_xfer_completion(struct scsi_task *task, sbd_cmd_t *scmd,
2446     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2447 {
2448         uint64_t laddr;
2449         uint32_t buflen, iolen;
2450         int ndx, ret;
2451 
2452         if (ATOMIC8_GET(scmd->nbufs) > 0) {
2453                 atomic_dec_8(&scmd->nbufs);
2454         }
2455 
2456         if (dbuf->db_xfer_status != STMF_SUCCESS) {
2457                 sbd_write_same_release_resources(task);
2458                 sbd_ats_remove_by_task(task);
2459                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2460                     dbuf->db_xfer_status, NULL);
2461                 return;
2462         }
2463 
2464         if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2465                 goto write_same_xfer_done;
2466         }
2467 
2468         /* if this is a unnessary callback just return */
2469         if (((scmd->flags & SBD_SCSI_CMD_TRANS_DATA) == 0) ||
2470             ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0) ||
2471             (scmd->trans_data == NULL)) {
2472                 sbd_ats_remove_by_task(task);
2473                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2474                 return;
2475         }
2476 
2477         if (ATOMIC32_GET(scmd->len) != 0) {
2478                 /*
2479                  * Initiate the next port xfer to occur in parallel
2480                  * with writing this buf.
2481                  */
2482                 sbd_do_write_same_xfer(task, scmd, NULL, 0);
2483         }
2484 
2485         laddr = dbuf->db_relative_offset;
2486 
2487         for (buflen = 0, ndx = 0; (buflen < dbuf->db_data_size) &&
2488             (ndx < dbuf->db_sglist_length); ndx++) {
2489                 iolen = min(dbuf->db_data_size - buflen,
2490                     dbuf->db_sglist[ndx].seg_length);
2491                 if (iolen == 0)
2492                         break;
2493                 bcopy(dbuf->db_sglist[ndx].seg_addr, &scmd->trans_data[laddr],
2494                     iolen);
2495                 buflen += iolen;
2496                 laddr += (uint64_t)iolen;
2497         }
2498         task->task_nbytes_transferred += buflen;
2499 
2500 write_same_xfer_done:
2501         if (ATOMIC32_GET(scmd->len) == 0 ||
2502             scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2503                 stmf_free_dbuf(task, dbuf);
2504                 if (ATOMIC8_GET(scmd->nbufs) > 0)
2505                         return;
2506                 scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
2507                 if (scmd->flags & SBD_SCSI_CMD_XFER_FAIL) {
2508                         sbd_ats_remove_by_task(task);
2509                         sbd_write_same_release_resources(task);
2510                         stmf_scsilib_send_status(task, STATUS_CHECK,
2511                             STMF_SAA_WRITE_ERROR);
2512                 } else {
2513                         ret = sbd_write_same_data(task, scmd);
2514                         sbd_ats_remove_by_task(task);
2515                         sbd_write_same_release_resources(task);
2516                         if (ret != SBD_SUCCESS) {
2517                                 stmf_scsilib_send_status(task, STATUS_CHECK,
2518                                     STMF_SAA_WRITE_ERROR);
2519                         } else {
2520                                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2521                         }
2522                 }
2523                 return;
2524         }
2525         sbd_do_write_same_xfer(task, scmd, dbuf, dbuf_reusable);
2526 }
2527 
2528 static void
2529 sbd_do_write_same_xfer(struct scsi_task *task, sbd_cmd_t *scmd,
2530     struct stmf_data_buf *dbuf, uint8_t dbuf_reusable)
2531 {
2532         uint32_t len;
2533 
2534         if (ATOMIC32_GET(scmd->len) == 0) {
2535                 if (dbuf != NULL)
2536                         stmf_free_dbuf(task, dbuf);
2537                 return;
2538         }
2539 
2540         if ((dbuf != NULL) &&
2541             ((dbuf->db_flags & DB_DONT_REUSE) || (dbuf_reusable == 0))) {
2542                 /* free current dbuf and allocate a new one */
2543                 stmf_free_dbuf(task, dbuf);
2544                 dbuf = NULL;
2545         }
2546         if (dbuf == NULL) {
2547                 uint32_t maxsize, minsize, old_minsize;
2548 
2549                 maxsize = (ATOMIC32_GET(scmd->len) > (128*1024)) ? 128*1024 :
2550                     ATOMIC32_GET(scmd->len);
2551                 minsize = maxsize >> 2;
2552                 do {
2553                         old_minsize = minsize;
2554                         dbuf = stmf_alloc_dbuf(task, maxsize, &minsize, 0);
2555                 } while ((dbuf == NULL) && (old_minsize > minsize) &&
2556                     (minsize >= 512));
2557                 if (dbuf == NULL) {
2558                         sbd_ats_remove_by_task(task);
2559                         sbd_write_same_release_resources(task);
2560                         if (ATOMIC8_GET(scmd->nbufs) == 0) {
2561                                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2562                                     STMF_ALLOC_FAILURE, NULL);
2563                         }
2564                         return;
2565                 }
2566         }
2567 
2568         len = ATOMIC32_GET(scmd->len) > dbuf->db_buf_size ? dbuf->db_buf_size :
2569             ATOMIC32_GET(scmd->len);
2570 
2571         dbuf->db_relative_offset = scmd->current_ro;
2572         dbuf->db_data_size = len;
2573         dbuf->db_flags = DB_DIRECTION_FROM_RPORT;
2574         (void) stmf_xfer_data(task, dbuf, 0);
2575         /* outstanding port xfers and bufs used */
2576         atomic_inc_8(&scmd->nbufs);
2577         atomic_add_32(&scmd->len, -len);
2578         scmd->current_ro += len;
2579 }
2580 
2581 static void
2582 sbd_handle_write_same(scsi_task_t *task, struct stmf_data_buf *initial_dbuf)
2583 {
2584         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2585         uint64_t addr, len;
2586         sbd_cmd_t *scmd;
2587         stmf_data_buf_t *dbuf;
2588         uint8_t unmap;
2589         uint8_t do_immediate_data = 0;
2590 
2591         if (HardwareAcceleratedInit == 0) {
2592                 stmf_scsilib_send_status(task, STATUS_CHECK,
2593                     STMF_SAA_INVALID_OPCODE);
2594                 return;
2595         }
2596 
2597         task->task_cmd_xfer_length = 0;
2598         if (task->task_additional_flags &
2599             TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2600                 task->task_expected_xfer_length = 0;
2601         }
2602         if (sl->sl_flags & SL_WRITE_PROTECTED) {
2603                 stmf_scsilib_send_status(task, STATUS_CHECK,
2604                     STMF_SAA_WRITE_PROTECTED);
2605                 return;
2606         }
2607         if (task->task_cdb[1] & 0xF7) {
2608                 stmf_scsilib_send_status(task, STATUS_CHECK,
2609                     STMF_SAA_INVALID_FIELD_IN_CDB);
2610                 return;
2611         }
2612         unmap = task->task_cdb[1] & 0x08;
2613 
2614         if (unmap && ((sl->sl_flags & SL_UNMAP_ENABLED) == 0)) {
2615                 stmf_scsilib_send_status(task, STATUS_CHECK,
2616                     STMF_SAA_INVALID_FIELD_IN_CDB);
2617                 return;
2618         }
2619 
2620         if (task->task_cdb[0] == SCMD_WRITE_SAME_G1) {
2621                 addr = READ_SCSI32(&task->task_cdb[2], uint64_t);
2622                 len = READ_SCSI16(&task->task_cdb[7], uint64_t);
2623         } else {
2624                 addr = READ_SCSI64(&task->task_cdb[2], uint64_t);
2625                 len = READ_SCSI32(&task->task_cdb[10], uint64_t);
2626         }
2627 
2628         if (len == 0) {
2629                 stmf_scsilib_send_status(task, STATUS_CHECK,
2630                     STMF_SAA_INVALID_FIELD_IN_CDB);
2631                 return;
2632         }
2633 
2634         if (sbd_ats_handling_before_io(task, sl, addr, len) !=
2635             SBD_SUCCESS) {
2636                 if (stmf_task_poll_lu(task, 10) != STMF_SUCCESS)
2637                         stmf_scsilib_send_status(task, STATUS_BUSY, 0);
2638                 return;
2639         }
2640 
2641         addr <<= sl->sl_data_blocksize_shift;
2642         len <<= sl->sl_data_blocksize_shift;
2643 
2644         /* Check if the command is for the unmap function */
2645         if (unmap) {
2646                 dkioc_free_list_t *dfl = kmem_zalloc(DFL_SZ(1), KM_SLEEP);
2647 
2648                 dfl->dfl_num_exts = 1;
2649                 dfl->dfl_exts[0].dfle_start = addr;
2650                 dfl->dfl_exts[0].dfle_length = len;
2651                 if (sbd_unmap(sl, dfl) != 0) {
2652                         stmf_scsilib_send_status(task, STATUS_CHECK,
2653                             STMF_SAA_LBA_OUT_OF_RANGE);
2654                 } else {
2655                         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2656                 }
2657                 dfl_free(dfl);
2658                 return;
2659         }
2660 
2661         /* Write same function */
2662 
2663         task->task_cmd_xfer_length = 1 << sl->sl_data_blocksize_shift;
2664         if (task->task_additional_flags &
2665             TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2666                 task->task_expected_xfer_length = task->task_cmd_xfer_length;
2667         }
2668         if ((addr + len) > sl->sl_lu_size) {
2669                 sbd_ats_remove_by_task(task);
2670                 stmf_scsilib_send_status(task, STATUS_CHECK,
2671                     STMF_SAA_LBA_OUT_OF_RANGE);
2672                 return;
2673         }
2674 
2675         /* For rest of this I/O the transfer length is 1 block */
2676         len = ((uint64_t)1) << sl->sl_data_blocksize_shift;
2677 
2678         /* Some basic checks */
2679         if ((len == 0) || (len != task->task_expected_xfer_length)) {
2680                 sbd_ats_remove_by_task(task);
2681                 stmf_scsilib_send_status(task, STATUS_CHECK,
2682                     STMF_SAA_INVALID_FIELD_IN_CDB);
2683                 return;
2684         }
2685 
2686 
2687         if ((initial_dbuf != NULL) && (task->task_flags & TF_INITIAL_BURST)) {
2688                 if (initial_dbuf->db_data_size > len) {
2689                         if (initial_dbuf->db_data_size >
2690                             task->task_expected_xfer_length) {
2691                                 /* protocol error */
2692                                 sbd_ats_remove_by_task(task);
2693                                 stmf_abort(STMF_QUEUE_TASK_ABORT, task,
2694                                     STMF_INVALID_ARG, NULL);
2695                                 return;
2696                         }
2697                         initial_dbuf->db_data_size = (uint32_t)len;
2698                 }
2699                 do_immediate_data = 1;
2700         }
2701         dbuf = initial_dbuf;
2702 
2703         if (task->task_lu_private) {
2704                 scmd = (sbd_cmd_t *)task->task_lu_private;
2705         } else {
2706                 scmd = (sbd_cmd_t *)kmem_alloc(sizeof (sbd_cmd_t), KM_SLEEP);
2707                 task->task_lu_private = scmd;
2708         }
2709         scmd->flags = SBD_SCSI_CMD_ACTIVE | SBD_SCSI_CMD_TRANS_DATA |
2710             SBD_SCSI_CMD_ATS_RELATED;
2711         scmd->cmd_type = SBD_CMD_SCSI_WRITE;
2712         scmd->nbufs = 0;
2713         scmd->len = (uint32_t)len;
2714         scmd->trans_data_len = (uint32_t)len;
2715         scmd->trans_data = kmem_alloc((size_t)len, KM_SLEEP);
2716         scmd->current_ro = 0;
2717 
2718         if (do_immediate_data) {
2719                 /*
2720                  * Account for data passed in this write command
2721                  */
2722                 (void) stmf_xfer_data(task, dbuf, STMF_IOF_STATS_ONLY);
2723                 atomic_add_32(&scmd->len, -dbuf->db_data_size);
2724                 scmd->current_ro += dbuf->db_data_size;
2725                 dbuf->db_xfer_status = STMF_SUCCESS;
2726                 sbd_handle_write_same_xfer_completion(task, scmd, dbuf, 0);
2727         } else {
2728                 sbd_do_write_same_xfer(task, scmd, dbuf, 0);
2729         }
2730 }
2731 
2732 static void
2733 sbd_handle_unmap(scsi_task_t *task, stmf_data_buf_t *dbuf)
2734 {
2735         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2736         uint32_t cmd_xfer_len;
2737 
2738         if (sbd_unmap_enable == 0) {
2739                 stmf_scsilib_send_status(task, STATUS_CHECK,
2740                     STMF_SAA_INVALID_OPCODE);
2741                 return;
2742         }
2743 
2744         if (sl->sl_flags & SL_WRITE_PROTECTED) {
2745                 stmf_scsilib_send_status(task, STATUS_CHECK,
2746                     STMF_SAA_WRITE_PROTECTED);
2747                 return;
2748         }
2749         cmd_xfer_len = READ_SCSI16(&task->task_cdb[7], uint32_t);
2750 
2751         if (task->task_cdb[1] & 1) {
2752                 stmf_scsilib_send_status(task, STATUS_CHECK,
2753                     STMF_SAA_INVALID_FIELD_IN_CDB);
2754                 return;
2755         }
2756 
2757         if (cmd_xfer_len == 0) {
2758                 task->task_cmd_xfer_length = 0;
2759                 if (task->task_additional_flags &
2760                     TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2761                         task->task_expected_xfer_length = 0;
2762                 }
2763                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2764                 return;
2765         }
2766 
2767         sbd_handle_short_write_transfers(task, dbuf, cmd_xfer_len);
2768 }
2769 
2770 static void
2771 sbd_handle_unmap_xfer(scsi_task_t *task, uint8_t *buf, uint32_t buflen)
2772 {
2773         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2774         uint32_t ulen, dlen, num_desc;
2775         uint64_t addr, len;
2776         uint8_t *p;
2777         dkioc_free_list_t *dfl;
2778         int ret;
2779         int i;
2780 
2781         if (buflen < 24) {
2782                 stmf_scsilib_send_status(task, STATUS_CHECK,
2783                     STMF_SAA_INVALID_FIELD_IN_CDB);
2784                 return;
2785         }
2786         ulen = READ_SCSI16(buf, uint32_t);
2787         dlen = READ_SCSI16(buf + 2, uint32_t);
2788         num_desc = dlen >> 4;
2789         if (((ulen + 2) != buflen) || ((dlen + 8) != buflen) || (dlen & 0xf) ||
2790             (num_desc == 0)) {
2791                 stmf_scsilib_send_status(task, STATUS_CHECK,
2792                     STMF_SAA_INVALID_FIELD_IN_CDB);
2793                 return;
2794         }
2795 
2796         dfl = kmem_zalloc(DFL_SZ(num_desc), KM_SLEEP);
2797         dfl->dfl_num_exts = num_desc;
2798         /*
2799          * This should use ATS locking but that was disabled by the
2800          * changes to ZFS top take advantage of TRIM in SSDs.
2801          *
2802          * Since the entire list is passed to ZFS in one list ATS
2803          * locking is not done.  This may be detectable, and if it is
2804          * then the entire list needs to be locked and then after the
2805          * unmap completes the entire list must be unlocked
2806          */
2807         for (p = buf + 8, i = 0; num_desc; num_desc--, p += 16, i++) {
2808                 addr = READ_SCSI64(p, uint64_t);
2809                 len = READ_SCSI32(p+8, uint64_t);
2810                 addr <<= sl->sl_data_blocksize_shift;
2811                 len <<= sl->sl_data_blocksize_shift;
2812 
2813                 /* Prepare a list of extents to unmap */
2814                 dfl->dfl_exts[i].dfle_start = addr;
2815                 dfl->dfl_exts[i].dfle_length = len;
2816 
2817                 /* release the overlap */
2818         }
2819         ASSERT(i == dfl->dfl_num_exts);
2820 
2821         /* Finally execute the unmap operations in a single step */
2822         ret = sbd_unmap(sl, dfl);
2823         dfl_free(dfl);
2824         if (ret != 0) {
2825                 stmf_scsilib_send_status(task, STATUS_CHECK,
2826                     STMF_SAA_LBA_OUT_OF_RANGE);
2827                 return;
2828         }
2829 
2830         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2831 }
2832 
2833 void
2834 sbd_handle_inquiry(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
2835 {
2836         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
2837         uint8_t *cdbp = (uint8_t *)&task->task_cdb[0];
2838         uint8_t *p;
2839         uint8_t byte0;
2840         uint8_t page_length;
2841         uint16_t bsize = 512;
2842         uint16_t cmd_size;
2843         uint32_t xfer_size = 4;
2844         uint32_t mgmt_url_size = 0;
2845         uint8_t exp;
2846         uint64_t s;
2847         char *mgmt_url = NULL;
2848 
2849 
2850         byte0 = DTYPE_DIRECT;
2851         /*
2852          * Basic protocol checks.
2853          */
2854 
2855         if ((((cdbp[1] & 1) == 0) && cdbp[2]) || cdbp[5]) {
2856                 stmf_scsilib_send_status(task, STATUS_CHECK,
2857                     STMF_SAA_INVALID_FIELD_IN_CDB);
2858                 return;
2859         }
2860 
2861         /*
2862          * Zero byte allocation length is not an error.  Just
2863          * return success.
2864          */
2865 
2866         cmd_size = (((uint16_t)cdbp[3]) << 8) | cdbp[4];
2867 
2868         if (cmd_size == 0) {
2869                 task->task_cmd_xfer_length = 0;
2870                 if (task->task_additional_flags &
2871                     TASK_AF_NO_EXPECTED_XFER_LENGTH) {
2872                         task->task_expected_xfer_length = 0;
2873                 }
2874                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
2875                 return;
2876         }
2877 
2878         /*
2879          * Standard inquiry
2880          */
2881 
2882         if ((cdbp[1] & 1) == 0) {
2883                 int     i;
2884                 struct scsi_inquiry *inq;
2885 
2886                 p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2887                 inq = (struct scsi_inquiry *)p;
2888 
2889                 page_length = 69;
2890                 xfer_size = page_length + 5;
2891 
2892                 inq->inq_dtype = DTYPE_DIRECT;
2893                 inq->inq_ansi = 5;   /* SPC-3 */
2894                 inq->inq_hisup = 1;
2895                 inq->inq_rdf = 2;    /* Response data format for SPC-3 */
2896                 inq->inq_len = page_length;
2897 
2898                 inq->inq_tpgs = TPGS_FAILOVER_IMPLICIT;
2899                 inq->inq_cmdque = 1;
2900                 inq->inq_3pc = 1;
2901 
2902                 if (sl->sl_flags & SL_VID_VALID) {
2903                         bcopy(sl->sl_vendor_id, inq->inq_vid, 8);
2904                 } else {
2905                         bcopy(sbd_vendor_id, inq->inq_vid, 8);
2906                 }
2907 
2908                 if (sl->sl_flags & SL_PID_VALID) {
2909                         bcopy(sl->sl_product_id, inq->inq_pid, 16);
2910                 } else {
2911                         bcopy(sbd_product_id, inq->inq_pid, 16);
2912                 }
2913 
2914                 if (sl->sl_flags & SL_REV_VALID) {
2915                         bcopy(sl->sl_revision, inq->inq_revision, 4);
2916                 } else {
2917                         bcopy(sbd_revision, inq->inq_revision, 4);
2918                 }
2919 
2920                 /* Adding Version Descriptors */
2921                 i = 0;
2922                 /* SAM-3 no version */
2923                 inq->inq_vd[i].inq_vd_msb = 0x00;
2924                 inq->inq_vd[i].inq_vd_lsb = 0x60;
2925                 i++;
2926 
2927                 /* transport */
2928                 switch (task->task_lport->lport_id->protocol_id) {
2929                 case PROTOCOL_FIBRE_CHANNEL:
2930                         inq->inq_vd[i].inq_vd_msb = 0x09;
2931                         inq->inq_vd[i].inq_vd_lsb = 0x00;
2932                         i++;
2933                         break;
2934 
2935                 case PROTOCOL_PARALLEL_SCSI:
2936                 case PROTOCOL_SSA:
2937                 case PROTOCOL_IEEE_1394:
2938                         /* Currently no claims of conformance */
2939                         break;
2940 
2941                 case PROTOCOL_SRP:
2942                         inq->inq_vd[i].inq_vd_msb = 0x09;
2943                         inq->inq_vd[i].inq_vd_lsb = 0x40;
2944                         i++;
2945                         break;
2946 
2947                 case PROTOCOL_iSCSI:
2948                         inq->inq_vd[i].inq_vd_msb = 0x09;
2949                         inq->inq_vd[i].inq_vd_lsb = 0x60;
2950                         i++;
2951                         break;
2952 
2953                 case PROTOCOL_SAS:
2954                 case PROTOCOL_ADT:
2955                 case PROTOCOL_ATAPI:
2956                 default:
2957                         /* Currently no claims of conformance */
2958                         break;
2959                 }
2960 
2961                 /* SPC-3 no version */
2962                 inq->inq_vd[i].inq_vd_msb = 0x03;
2963                 inq->inq_vd[i].inq_vd_lsb = 0x00;
2964                 i++;
2965 
2966                 /* SBC-2 no version */
2967                 inq->inq_vd[i].inq_vd_msb = 0x03;
2968                 inq->inq_vd[i].inq_vd_lsb = 0x20;
2969 
2970                 sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
2971                     min(cmd_size, xfer_size));
2972                 kmem_free(p, bsize);
2973 
2974                 return;
2975         }
2976 
2977         rw_enter(&sbd_global_prop_lock, RW_READER);
2978         if (sl->sl_mgmt_url) {
2979                 mgmt_url_size = strlen(sl->sl_mgmt_url);
2980                 mgmt_url = sl->sl_mgmt_url;
2981         } else if (sbd_mgmt_url) {
2982                 mgmt_url_size = strlen(sbd_mgmt_url);
2983                 mgmt_url = sbd_mgmt_url;
2984         }
2985 
2986         /*
2987          * EVPD handling
2988          */
2989 
2990         /* Default 512 bytes may not be enough, increase bsize if necessary */
2991         if (cdbp[2] == 0x83 || cdbp[2] == 0x85) {
2992                 if (bsize <  cmd_size)
2993                         bsize = cmd_size;
2994         }
2995         p = (uint8_t *)kmem_zalloc(bsize, KM_SLEEP);
2996 
2997         switch (cdbp[2]) {
2998         case 0x00:
2999                 page_length = 5 + (mgmt_url_size ? 1 : 0);
3000 
3001                 if (sl->sl_flags & SL_UNMAP_ENABLED)
3002                         page_length += 1;
3003 
3004                 p[0] = byte0;
3005                 p[3] = page_length;
3006                 /* Supported VPD pages in ascending order */
3007                 /* CSTYLED */
3008                 {
3009                         uint8_t i = 5;
3010 
3011                         p[i++] = 0x80;
3012                         p[i++] = 0x83;
3013                         if (mgmt_url_size != 0)
3014                                 p[i++] = 0x85;
3015                         p[i++] = 0x86;
3016                         p[i++] = 0xb0;
3017                         if (sl->sl_flags & SL_UNMAP_ENABLED) {
3018                                 p[i++] = 0xb2;
3019                         }
3020                 }
3021                 xfer_size = page_length + 4;
3022                 break;
3023 
3024         case 0x80:
3025                 if (sl->sl_serial_no_size) {
3026                         page_length = sl->sl_serial_no_size;
3027                         bcopy(sl->sl_serial_no, p + 4, sl->sl_serial_no_size);
3028                 } else {
3029                         /* if no serial num is specified set 4 spaces */
3030                         page_length = 4;
3031                         bcopy("    ", p + 4, 4);
3032                 }
3033                 p[0] = byte0;
3034                 p[1] = 0x80;
3035                 p[3] = page_length;
3036                 xfer_size = page_length + 4;
3037                 break;
3038 
3039         case 0x83:
3040                 xfer_size = stmf_scsilib_prepare_vpd_page83(task, p,
3041                     bsize, byte0, STMF_VPD_LU_ID|STMF_VPD_TARGET_ID|
3042                     STMF_VPD_TP_GROUP|STMF_VPD_RELATIVE_TP_ID);
3043                 break;
3044 
3045         case 0x85:
3046                 if (mgmt_url_size == 0) {
3047                         stmf_scsilib_send_status(task, STATUS_CHECK,
3048                             STMF_SAA_INVALID_FIELD_IN_CDB);
3049                         goto err_done;
3050                 } /* CSTYLED */
3051                 {
3052                         uint16_t idx, newidx, sz, url_size;
3053                         char *url;
3054 
3055                         p[0] = byte0;
3056                         p[1] = 0x85;
3057 
3058                         idx = 4;
3059                         url = mgmt_url;
3060                         url_size = sbd_parse_mgmt_url(&url);
3061                         /* Creating Network Service Descriptors */
3062                         while (url_size != 0) {
3063                                 /* Null terminated and 4 Byte aligned */
3064                                 sz = url_size + 1;
3065                                 sz += (sz % 4) ? 4 - (sz % 4) : 0;
3066                                 newidx = idx + sz + 4;
3067 
3068                                 if (newidx < bsize) {
3069                                         /*
3070                                          * SPC-3r23 : Table 320  (Sec 7.6.5)
3071                                          * (Network service descriptor format
3072                                          *
3073                                          * Note: Hard coding service type as
3074                                          * "Storage Configuration Service".
3075                                          */
3076                                         p[idx] = 1;
3077                                         SCSI_WRITE16(p + idx + 2, sz);
3078                                         bcopy(url, p + idx + 4, url_size);
3079                                         xfer_size = newidx + 4;
3080                                 }
3081                                 idx = newidx;
3082 
3083                                 /* skip to next mgmt url if any */
3084                                 url += url_size;
3085                                 url_size = sbd_parse_mgmt_url(&url);
3086                         }
3087 
3088                         /* Total descriptor length */
3089                         SCSI_WRITE16(p + 2, idx - 4);
3090                         break;
3091                 }
3092 
3093         case 0x86:
3094                 page_length = 0x3c;
3095 
3096                 p[0] = byte0;
3097                 p[1] = 0x86;            /* Page 86 response */
3098                 p[3] = page_length;
3099 
3100                 /*
3101                  * Bits 0, 1, and 2 will need to be updated
3102                  * to reflect the queue tag handling if/when
3103                  * that is implemented.  For now, we're going
3104                  * to claim support only for Simple TA.
3105                  */
3106                 p[5] = 1;
3107                 xfer_size = page_length + 4;
3108                 break;
3109 
3110         case 0xb0:
3111                 page_length = 0x3c;
3112                 p[0] = byte0;
3113                 p[1] = 0xb0;
3114                 p[3] = page_length;
3115                 p[4] = 1;
3116                 p[5] = sbd_ats_max_nblks();
3117                 if (sl->sl_flags & SL_UNMAP_ENABLED && sbd_unmap_enable) {
3118                         p[20] = (stmf_sbd_unmap_max_nblks >> 24) & 0xff;
3119                         p[21] = (stmf_sbd_unmap_max_nblks >> 16) & 0xff;
3120                         p[22] = (stmf_sbd_unmap_max_nblks >> 8) & 0xff;
3121                         p[23] = stmf_sbd_unmap_max_nblks & 0xff;
3122 
3123                         p[24] = 0;
3124                         p[25] = 0;
3125                         p[26] = 0;
3126                         p[27] = 0xFF;
3127                 }
3128                 xfer_size = page_length + 4;
3129                 break;
3130 
3131         case 0xb2:
3132                 if ((sl->sl_flags & SL_UNMAP_ENABLED) == 0) {
3133                         stmf_scsilib_send_status(task, STATUS_CHECK,
3134                             STMF_SAA_INVALID_FIELD_IN_CDB);
3135                         goto err_done;
3136                 }
3137                 page_length = 4;
3138                 p[0] = byte0;
3139                 p[1] = 0xb2;
3140                 p[3] = page_length;
3141 
3142                 exp = (uint8_t)sl->sl_data_blocksize_shift;
3143                 s = sl->sl_lu_size >> sl->sl_data_blocksize_shift;
3144                 while (s & ((uint64_t)0xFFFFFFFF80000000ull)) {
3145                         s >>= 1;
3146                         exp++;
3147                 }
3148                 p[4] = exp;
3149                 p[5] = 0xc0;    /* Logical provisioning UNMAP and WRITE SAME */
3150                 xfer_size = page_length + 4;
3151                 break;
3152 
3153         default:
3154                 stmf_scsilib_send_status(task, STATUS_CHECK,
3155                     STMF_SAA_INVALID_FIELD_IN_CDB);
3156                 goto err_done;
3157         }
3158 
3159         sbd_handle_short_read_transfers(task, initial_dbuf, p, cmd_size,
3160             min(cmd_size, xfer_size));
3161 err_done:
3162         kmem_free(p, bsize);
3163         rw_exit(&sbd_global_prop_lock);
3164 }
3165 
3166 stmf_status_t
3167 sbd_task_alloc(struct scsi_task *task)
3168 {
3169         if ((task->task_lu_private =
3170             kmem_zalloc(sizeof (sbd_cmd_t), KM_NOSLEEP)) != NULL) {
3171                 sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3172                 scmd->flags = 0;
3173                 return (STMF_SUCCESS);
3174         }
3175         return (STMF_ALLOC_FAILURE);
3176 }
3177 
3178 void
3179 sbd_remove_it_handle(sbd_lu_t *sl, sbd_it_data_t *it)
3180 {
3181         sbd_it_data_t **ppit;
3182 
3183         sbd_pgr_remove_it_handle(sl, it);
3184         mutex_enter(&sl->sl_lock);
3185         for (ppit = &sl->sl_it_list; *ppit != NULL;
3186             ppit = &((*ppit)->sbd_it_next)) {
3187                 if ((*ppit) == it) {
3188                         *ppit = it->sbd_it_next;
3189                         break;
3190                 }
3191         }
3192         mutex_exit(&sl->sl_lock);
3193 
3194         DTRACE_PROBE2(itl__nexus__end, stmf_lu_t *, sl->sl_lu,
3195             sbd_it_data_t *, it);
3196 
3197         kmem_free(it, sizeof (*it));
3198 }
3199 
3200 void
3201 sbd_check_and_clear_scsi2_reservation(sbd_lu_t *sl, sbd_it_data_t *it)
3202 {
3203         mutex_enter(&sl->sl_lock);
3204         if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) == 0) {
3205                 /* If we dont have any reservations, just get out. */
3206                 mutex_exit(&sl->sl_lock);
3207                 return;
3208         }
3209 
3210         if (it == NULL) {
3211                 /* Find the I_T nexus which is holding the reservation. */
3212                 for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3213                         if (it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) {
3214                                 ASSERT(it->sbd_it_session_id ==
3215                                     sl->sl_rs_owner_session_id);
3216                                 break;
3217                         }
3218                 }
3219                 ASSERT(it != NULL);
3220         } else {
3221                 /*
3222                  * We were passed an I_T nexus. If this nexus does not hold
3223                  * the reservation, do nothing. This is why this function is
3224                  * called "check_and_clear".
3225                  */
3226                 if ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0) {
3227                         mutex_exit(&sl->sl_lock);
3228                         return;
3229                 }
3230         }
3231         it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3232         sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3233         mutex_exit(&sl->sl_lock);
3234 }
3235 
3236 /*
3237  * Given a LU and a task, check if the task is causing reservation
3238  * conflict. Returns 1 in case of conflict, 0 otherwise.
3239  * Note that the LU might not be the same LU as in the task but the
3240  * caller makes sure that the LU can be accessed.
3241  */
3242 int
3243 sbd_check_reservation_conflict(struct sbd_lu *sl, struct scsi_task *task)
3244 {
3245         sbd_it_data_t *it;
3246 
3247         it = task->task_lu_itl_handle;
3248         ASSERT(it);
3249         if (sl->sl_access_state == SBD_LU_ACTIVE) {
3250                 if (SBD_PGR_RSVD(sl->sl_pgr)) {
3251                         if (sbd_pgr_reservation_conflict(task, sl)) {
3252                                 return (1);
3253                         }
3254                 } else if ((sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) &&
3255                     ((it->sbd_it_flags & SBD_IT_HAS_SCSI2_RESERVATION) == 0)) {
3256                         if (!(SCSI2_CONFLICT_FREE_CMDS(task->task_cdb))) {
3257                                 return (1);
3258                         }
3259                 }
3260         }
3261 
3262         return (0);
3263 }
3264 
3265 /*
3266  * Keep in mind that sbd_new_task can be called multiple times for the same
3267  * task because of us calling stmf_task_poll_lu resulting in a call to
3268  * sbd_task_poll().
3269  */
3270 void
3271 sbd_new_task(struct scsi_task *task, struct stmf_data_buf *initial_dbuf)
3272 {
3273         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3274         sbd_it_data_t *it;
3275         uint8_t cdb0, cdb1;
3276         stmf_status_t st_ret;
3277 
3278         if ((it = task->task_lu_itl_handle) == NULL) {
3279                 mutex_enter(&sl->sl_lock);
3280                 for (it = sl->sl_it_list; it != NULL; it = it->sbd_it_next) {
3281                         if (it->sbd_it_session_id ==
3282                             task->task_session->ss_session_id) {
3283                                 mutex_exit(&sl->sl_lock);
3284                                 stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3285                                 return;
3286                         }
3287                 }
3288                 it = (sbd_it_data_t *)kmem_zalloc(sizeof (*it), KM_NOSLEEP);
3289                 if (it == NULL) {
3290                         mutex_exit(&sl->sl_lock);
3291                         stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3292                         return;
3293                 }
3294                 it->sbd_it_session_id = task->task_session->ss_session_id;
3295                 bcopy(task->task_lun_no, it->sbd_it_lun, 8);
3296                 it->sbd_it_next = sl->sl_it_list;
3297                 sl->sl_it_list = it;
3298                 mutex_exit(&sl->sl_lock);
3299 
3300                 DTRACE_PROBE1(itl__nexus__start, scsi_task *, task);
3301 
3302                 sbd_pgr_initialize_it(task, it);
3303                 if (stmf_register_itl_handle(task->task_lu, task->task_lun_no,
3304                     task->task_session, it->sbd_it_session_id, it)
3305                     != STMF_SUCCESS) {
3306                         sbd_remove_it_handle(sl, it);
3307                         stmf_scsilib_send_status(task, STATUS_BUSY, 0);
3308                         return;
3309                 }
3310                 task->task_lu_itl_handle = it;
3311                 if (sl->sl_access_state != SBD_LU_STANDBY) {
3312                         it->sbd_it_ua_conditions = SBD_UA_POR;
3313                 }
3314         } else if (it->sbd_it_flags & SBD_IT_PGR_CHECK_FLAG) {
3315                 mutex_enter(&sl->sl_lock);
3316                 it->sbd_it_flags &= ~SBD_IT_PGR_CHECK_FLAG;
3317                 mutex_exit(&sl->sl_lock);
3318                 sbd_pgr_initialize_it(task, it);
3319         }
3320 
3321         if (task->task_mgmt_function) {
3322                 stmf_scsilib_handle_task_mgmt(task);
3323                 return;
3324         }
3325 
3326         /*
3327          * if we're transitioning between access
3328          * states, return NOT READY
3329          */
3330         if (sl->sl_access_state == SBD_LU_TRANSITION_TO_STANDBY ||
3331             sl->sl_access_state == SBD_LU_TRANSITION_TO_ACTIVE) {
3332                 stmf_scsilib_send_status(task, STATUS_CHECK,
3333                     STMF_SAA_LU_NO_ACCESS_TRANSITION);
3334                 return;
3335         }
3336 
3337         cdb0 = task->task_cdb[0];
3338         cdb1 = task->task_cdb[1];
3339         /*
3340          * Special case for different versions of Windows.
3341          * 1) Windows 2012 and VMWare will fail to discover LU's if a READ
3342          *    operation sent down the standby path returns an error. By default
3343          *    standby_fail_reads will be set to 0.
3344          * 2) Windows 2008 R2 has a severe performace problem if READ ops
3345          *    aren't rejected on the standby path. 2008 sends commands
3346          *    down the standby path which then must be proxied over to the
3347          *    active node and back.
3348          */
3349         if ((sl->sl_access_state == SBD_LU_STANDBY) &&
3350             stmf_standby_fail_reads &&
3351             (cdb0 == SCMD_READ || cdb0 == SCMD_READ_G1 ||
3352             cdb0 == SCMD_READ_G4 || cdb0 == SCMD_READ_G5)) {
3353                 stmf_scsilib_send_status(task, STATUS_CHECK,
3354                     STMF_SAA_LU_NO_ACCESS_STANDBY);
3355                 return;
3356         }
3357 
3358         /*
3359          * Don't go further if cmd is unsupported in standby mode
3360          */
3361         if (sl->sl_access_state == SBD_LU_STANDBY) {
3362                 if (cdb0 != SCMD_INQUIRY &&
3363                     cdb0 != SCMD_MODE_SENSE &&
3364                     cdb0 != SCMD_MODE_SENSE_G1 &&
3365                     cdb0 != SCMD_MODE_SELECT &&
3366                     cdb0 != SCMD_MODE_SELECT_G1 &&
3367                     cdb0 != SCMD_RESERVE &&
3368                     cdb0 != SCMD_RELEASE &&
3369                     cdb0 != SCMD_PERSISTENT_RESERVE_OUT &&
3370                     cdb0 != SCMD_PERSISTENT_RESERVE_IN &&
3371                     cdb0 != SCMD_REQUEST_SENSE &&
3372                     cdb0 != SCMD_READ_CAPACITY &&
3373                     cdb0 != SCMD_TEST_UNIT_READY &&
3374                     cdb0 != SCMD_START_STOP &&
3375                     cdb0 != SCMD_READ &&
3376                     cdb0 != SCMD_READ_G1 &&
3377                     cdb0 != SCMD_READ_G4 &&
3378                     cdb0 != SCMD_READ_G5 &&
3379                     !(cdb0 == SCMD_SVC_ACTION_IN_G4 &&
3380                     cdb1 == SSVC_ACTION_READ_CAPACITY_G4) &&
3381                     !(cdb0 == SCMD_MAINTENANCE_IN &&
3382                     (cdb1 & 0x1F) == 0x05) &&
3383                     !(cdb0 == SCMD_MAINTENANCE_IN &&
3384                     (cdb1 & 0x1F) == 0x0A)) {
3385                         stmf_scsilib_send_status(task, STATUS_CHECK,
3386                             STMF_SAA_LU_NO_ACCESS_STANDBY);
3387                         return;
3388                 }
3389         }
3390 
3391         /*
3392          * Checking ua conditions as per SAM3R14 5.3.2 specified order. During
3393          * MPIO/ALUA failover, cmds come in through local ports and proxy port
3394          * port provider (i.e. pppt), we want to report unit attention to
3395          * only local cmds since initiators (Windows MPIO/DSM) would continue
3396          * sending I/O to the target that reported unit attention.
3397          */
3398         if ((it->sbd_it_ua_conditions) &&
3399             !(task->task_additional_flags & TASK_AF_PPPT_TASK) &&
3400             (task->task_cdb[0] != SCMD_INQUIRY)) {
3401                 uint32_t saa = 0;
3402 
3403                 mutex_enter(&sl->sl_lock);
3404                 if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3405                         it->sbd_it_ua_conditions &= ~SBD_UA_POR;
3406                         saa = STMF_SAA_POR;
3407                 } else if (it->sbd_it_ua_conditions &
3408                     SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3409                         it->sbd_it_ua_conditions &=
3410                             ~SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
3411                         saa = STMF_SAA_ASYMMETRIC_ACCESS_CHANGED;
3412                 }
3413                 mutex_exit(&sl->sl_lock);
3414                 if (saa) {
3415                         stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3416                         return;
3417                 }
3418         }
3419 
3420         /* Reservation conflict checks */
3421         if (sbd_check_reservation_conflict(sl, task)) {
3422                 stmf_scsilib_send_status(task,
3423                     STATUS_RESERVATION_CONFLICT, 0);
3424                 return;
3425         }
3426 
3427         /* Rest of the ua conndition checks */
3428         if ((it->sbd_it_ua_conditions) && (task->task_cdb[0] != SCMD_INQUIRY)) {
3429                 uint32_t saa = 0;
3430 
3431                 mutex_enter(&sl->sl_lock);
3432                 if (it->sbd_it_ua_conditions & SBD_UA_CAPACITY_CHANGED) {
3433                         it->sbd_it_ua_conditions &= ~SBD_UA_CAPACITY_CHANGED;
3434                         if ((task->task_cdb[0] == SCMD_READ_CAPACITY) ||
3435                             ((task->task_cdb[0] == SCMD_SVC_ACTION_IN_G4) &&
3436                             (task->task_cdb[1] ==
3437                             SSVC_ACTION_READ_CAPACITY_G4))) {
3438                                 saa = 0;
3439                         } else {
3440                                 saa = STMF_SAA_CAPACITY_DATA_HAS_CHANGED;
3441                         }
3442                 } else if (it->sbd_it_ua_conditions &
3443                     SBD_UA_MODE_PARAMETERS_CHANGED) {
3444                         it->sbd_it_ua_conditions &=
3445                             ~SBD_UA_MODE_PARAMETERS_CHANGED;
3446                         saa = STMF_SAA_MODE_PARAMETERS_CHANGED;
3447                 } else if (it->sbd_it_ua_conditions &
3448                     SBD_UA_ASYMMETRIC_ACCESS_CHANGED) {
3449                         saa = 0;
3450                 } else if (it->sbd_it_ua_conditions & SBD_UA_POR) {
3451                         saa = 0;
3452                 } else if (it->sbd_it_ua_conditions &
3453                     SBD_UA_ACCESS_STATE_TRANSITION) {
3454                         it->sbd_it_ua_conditions &=
3455                             ~SBD_UA_ACCESS_STATE_TRANSITION;
3456                         saa = STMF_SAA_LU_NO_ACCESS_TRANSITION;
3457                 } else {
3458                         it->sbd_it_ua_conditions = 0;
3459                         saa = 0;
3460                 }
3461                 mutex_exit(&sl->sl_lock);
3462                 if (saa) {
3463                         stmf_scsilib_send_status(task, STATUS_CHECK, saa);
3464                         return;
3465                 }
3466         }
3467 
3468         if (sl->sl_access_state == SBD_LU_STANDBY) {
3469                 /*
3470                  * is this a short write?
3471                  * if so, we'll need to wait until we have the buffer
3472                  * before proxying the command
3473                  */
3474                 switch (cdb0) {
3475                         case SCMD_MODE_SELECT:
3476                         case SCMD_MODE_SELECT_G1:
3477                         case SCMD_PERSISTENT_RESERVE_OUT:
3478                                 break;
3479                         default:
3480                                 st_ret = stmf_proxy_scsi_cmd(task,
3481                                     initial_dbuf);
3482                                 if (st_ret != STMF_SUCCESS) {
3483                                         stmf_scsilib_send_status(task,
3484                                             STATUS_CHECK,
3485                                             STMF_SAA_LU_NO_ACCESS_UNAVAIL);
3486                                 }
3487                                 return;
3488                 }
3489         }
3490 
3491         cdb0 = task->task_cdb[0] & 0x1F;
3492 
3493         if ((cdb0 == SCMD_READ) || (cdb0 == SCMD_WRITE)) {
3494                 if (task->task_additional_flags & TASK_AF_PORT_LOAD_HIGH) {
3495                         stmf_scsilib_send_status(task, STATUS_QFULL, 0);
3496                         return;
3497                 }
3498                 if (cdb0 == SCMD_READ) {
3499                         sbd_handle_read(task, initial_dbuf);
3500                         return;
3501                 }
3502                 sbd_handle_write(task, initial_dbuf);
3503                 return;
3504         }
3505 
3506         cdb0 = task->task_cdb[0];
3507         cdb1 = task->task_cdb[1];
3508 
3509         if (cdb0 == SCMD_INQUIRY) {             /* Inquiry */
3510                 sbd_handle_inquiry(task, initial_dbuf);
3511                 return;
3512         }
3513 
3514         if (cdb0  == SCMD_PERSISTENT_RESERVE_OUT) {
3515                 sbd_handle_pgr_out_cmd(task, initial_dbuf);
3516                 return;
3517         }
3518 
3519         if (cdb0  == SCMD_PERSISTENT_RESERVE_IN) {
3520                 sbd_handle_pgr_in_cmd(task, initial_dbuf);
3521                 return;
3522         }
3523 
3524         if (cdb0 == SCMD_RELEASE) {
3525                 if (cdb1) {
3526                         stmf_scsilib_send_status(task, STATUS_CHECK,
3527                             STMF_SAA_INVALID_FIELD_IN_CDB);
3528                         return;
3529                 }
3530 
3531                 mutex_enter(&sl->sl_lock);
3532                 if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3533                         /* If not owner don't release it, just return good */
3534                         if (it->sbd_it_session_id !=
3535                             sl->sl_rs_owner_session_id) {
3536                                 mutex_exit(&sl->sl_lock);
3537                                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3538                                 return;
3539                         }
3540                 }
3541                 sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
3542                 it->sbd_it_flags &= ~SBD_IT_HAS_SCSI2_RESERVATION;
3543                 mutex_exit(&sl->sl_lock);
3544                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3545                 return;
3546         }
3547 
3548         if (cdb0 == SCMD_RESERVE) {
3549                 if (cdb1) {
3550                         stmf_scsilib_send_status(task, STATUS_CHECK,
3551                             STMF_SAA_INVALID_FIELD_IN_CDB);
3552                         return;
3553                 }
3554 
3555                 mutex_enter(&sl->sl_lock);
3556                 if (sl->sl_flags & SL_LU_HAS_SCSI2_RESERVATION) {
3557                         /* If not owner, return conflict status */
3558                         if (it->sbd_it_session_id !=
3559                             sl->sl_rs_owner_session_id) {
3560                                 mutex_exit(&sl->sl_lock);
3561                                 stmf_scsilib_send_status(task,
3562                                     STATUS_RESERVATION_CONFLICT, 0);
3563                                 return;
3564                         }
3565                 }
3566                 sl->sl_flags |= SL_LU_HAS_SCSI2_RESERVATION;
3567                 it->sbd_it_flags |= SBD_IT_HAS_SCSI2_RESERVATION;
3568                 sl->sl_rs_owner_session_id = it->sbd_it_session_id;
3569                 mutex_exit(&sl->sl_lock);
3570                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3571                 return;
3572         }
3573 
3574         if (cdb0 == SCMD_REQUEST_SENSE) {
3575                 /*
3576                  * LU provider needs to store unretrieved sense data
3577                  * (e.g. after power-on/reset).  For now, we'll just
3578                  * return good status with no sense.
3579                  */
3580 
3581                 if ((cdb1 & ~1) || task->task_cdb[2] || task->task_cdb[3] ||
3582                     task->task_cdb[5]) {
3583                         stmf_scsilib_send_status(task, STATUS_CHECK,
3584                             STMF_SAA_INVALID_FIELD_IN_CDB);
3585                 } else {
3586                         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3587                 }
3588 
3589                 return;
3590         }
3591 
3592         /* Report Target Port Groups */
3593         if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3594             ((cdb1 & 0x1F) == 0x0A)) {
3595                 stmf_scsilib_handle_report_tpgs(task, initial_dbuf);
3596                 return;
3597         }
3598 
3599         /* Report Identifying Information */
3600         if ((cdb0 == SCMD_MAINTENANCE_IN) &&
3601             ((cdb1 & 0x1F) == 0x05)) {
3602                 sbd_handle_identifying_info(task, initial_dbuf);
3603                 return;
3604         }
3605 
3606         if (cdb0 == SCMD_START_STOP) {                  /* Start stop */
3607                 task->task_cmd_xfer_length = 0;
3608                 if (task->task_cdb[4] & 0xFC) {
3609                         stmf_scsilib_send_status(task, STATUS_CHECK,
3610                             STMF_SAA_INVALID_FIELD_IN_CDB);
3611                         return;
3612                 }
3613                 if (task->task_cdb[4] & 2) {
3614                         stmf_scsilib_send_status(task, STATUS_CHECK,
3615                             STMF_SAA_INVALID_FIELD_IN_CDB);
3616                 } else {
3617                         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3618                 }
3619                 return;
3620 
3621         }
3622 
3623         if ((cdb0 == SCMD_MODE_SENSE) || (cdb0 == SCMD_MODE_SENSE_G1)) {
3624                 uint8_t *p;
3625                 p = kmem_zalloc(512, KM_SLEEP);
3626                 sbd_handle_mode_sense(task, initial_dbuf, p);
3627                 kmem_free(p, 512);
3628                 return;
3629         }
3630 
3631         if ((cdb0 == SCMD_MODE_SELECT) || (cdb0 == SCMD_MODE_SELECT_G1)) {
3632                 sbd_handle_mode_select(task, initial_dbuf);
3633                 return;
3634         }
3635 
3636         if ((cdb0 == SCMD_UNMAP) && (sl->sl_flags & SL_UNMAP_ENABLED)) {
3637                 sbd_handle_unmap(task, initial_dbuf);
3638                 return;
3639         }
3640 
3641         if ((cdb0 == SCMD_WRITE_SAME_G4) || (cdb0 == SCMD_WRITE_SAME_G1)) {
3642                 sbd_handle_write_same(task, initial_dbuf);
3643                 return;
3644         }
3645 
3646         if (cdb0 == SCMD_COMPARE_AND_WRITE) {
3647                 sbd_handle_ats(task, initial_dbuf);
3648                 return;
3649         }
3650 
3651         if (cdb0 == SCMD_EXTENDED_COPY) {
3652                 sbd_handle_xcopy(task, initial_dbuf);
3653                 return;
3654         }
3655 
3656         if (cdb0 == SCMD_RECV_COPY_RESULTS) {
3657                 sbd_handle_recv_copy_results(task, initial_dbuf);
3658                 return;
3659         }
3660 
3661         if (cdb0 == SCMD_TEST_UNIT_READY) {     /* Test unit ready */
3662                 task->task_cmd_xfer_length = 0;
3663                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3664                 return;
3665         }
3666 
3667         if (cdb0 == SCMD_READ_CAPACITY) {               /* Read Capacity */
3668                 sbd_handle_read_capacity(task, initial_dbuf);
3669                 return;
3670         }
3671 
3672         if (cdb0 == SCMD_SVC_ACTION_IN_G4) { /* Read Capacity or read long */
3673                 if (cdb1 == SSVC_ACTION_READ_CAPACITY_G4) {
3674                         sbd_handle_read_capacity(task, initial_dbuf);
3675                         return;
3676                 /*
3677                  * } else if (cdb1 == SSVC_ACTION_READ_LONG_G4) {
3678                  *      sbd_handle_read(task, initial_dbuf);
3679                  *      return;
3680                  */
3681                 }
3682         }
3683 
3684         /*
3685          * if (cdb0 == SCMD_SVC_ACTION_OUT_G4) {
3686          *      if (cdb1 == SSVC_ACTION_WRITE_LONG_G4) {
3687          *               sbd_handle_write(task, initial_dbuf);
3688          *              return;
3689          *      }
3690          * }
3691          */
3692 
3693         if (cdb0 == SCMD_VERIFY) {
3694                 /*
3695                  * Something more likely needs to be done here.
3696                  */
3697                 task->task_cmd_xfer_length = 0;
3698                 stmf_scsilib_send_status(task, STATUS_GOOD, 0);
3699                 return;
3700         }
3701 
3702         if (cdb0 == SCMD_SYNCHRONIZE_CACHE ||
3703             cdb0 == SCMD_SYNCHRONIZE_CACHE_G4) {
3704                 sbd_handle_sync_cache(task, initial_dbuf);
3705                 return;
3706         }
3707 
3708         /*
3709          * Write and Verify use the same path as write, but don't clutter the
3710          * performance path above with checking for write_verify opcodes.  We
3711          * rely on zfs's integrity checks for the "Verify" part of Write &
3712          * Verify.  (Even if we did a read to "verify" we'd merely be reading
3713          * cache, not actual media.)
3714          * Therefore we
3715          *   a) only support this if sbd_is_zvol, and
3716          *   b) run the IO through the normal write path with a forced
3717          *      sbd_flush_data_cache at the end.
3718          */
3719 
3720         if ((sl->sl_flags & SL_ZFS_META) && (
3721             cdb0 == SCMD_WRITE_VERIFY ||
3722             cdb0 == SCMD_WRITE_VERIFY_G4 ||
3723             cdb0 == SCMD_WRITE_VERIFY_G5)) {
3724                 sbd_handle_write(task, initial_dbuf);
3725                 return;
3726         }
3727         stmf_scsilib_send_status(task, STATUS_CHECK, STMF_SAA_INVALID_OPCODE);
3728 }
3729 
3730 void
3731 sbd_dbuf_xfer_done(struct scsi_task *task, struct stmf_data_buf *dbuf)
3732 {
3733         sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3734 
3735         if (dbuf->db_flags & DB_LU_DATA_BUF) {
3736                 /*
3737                  * Buffers passed in from the LU always complete
3738                  * even if the task is no longer active.
3739                  */
3740                 ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3741                 ASSERT(scmd);
3742                 switch (scmd->cmd_type) {
3743                 case (SBD_CMD_SCSI_READ):
3744                         sbd_handle_sgl_read_xfer_completion(task, scmd, dbuf);
3745                         break;
3746                 case (SBD_CMD_SCSI_WRITE):
3747                         sbd_handle_sgl_write_xfer_completion(task, scmd, dbuf);
3748                         break;
3749                 default:
3750                         cmn_err(CE_PANIC, "Unknown cmd type, task = %p",
3751                             (void *)task);
3752                         break;
3753                 }
3754                 return;
3755         }
3756 
3757         if ((scmd == NULL) || ((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0))
3758                 return;
3759 
3760         switch (scmd->cmd_type) {
3761         case (SBD_CMD_SCSI_READ):
3762                 sbd_handle_read_xfer_completion(task, scmd, dbuf);
3763                 break;
3764 
3765         case (SBD_CMD_SCSI_WRITE):
3766                 switch (task->task_cdb[0]) {
3767                 case SCMD_WRITE_SAME_G1:
3768                 case SCMD_WRITE_SAME_G4:
3769                         sbd_handle_write_same_xfer_completion(task, scmd, dbuf,
3770                             1);
3771                         break;
3772                 case SCMD_COMPARE_AND_WRITE:
3773                         sbd_handle_ats_xfer_completion(task, scmd, dbuf, 1);
3774                         break;
3775                 default:
3776                         sbd_handle_write_xfer_completion(task, scmd, dbuf, 1);
3777                         /* FALLTHRU */
3778                 }
3779                 break;
3780 
3781         case (SBD_CMD_SMALL_READ):
3782                 sbd_handle_short_read_xfer_completion(task, scmd, dbuf);
3783                 break;
3784 
3785         case (SBD_CMD_SMALL_WRITE):
3786                 sbd_handle_short_write_xfer_completion(task, dbuf);
3787                 break;
3788 
3789         default:
3790                 cmn_err(CE_PANIC, "Unknown cmd type, task = %p", (void *)task);
3791                 break;
3792         }
3793 }
3794 
3795 /* ARGSUSED */
3796 void
3797 sbd_send_status_done(struct scsi_task *task)
3798 {
3799         cmn_err(CE_PANIC,
3800             "sbd_send_status_done: this should not have been called");
3801 }
3802 
3803 void
3804 sbd_task_free(struct scsi_task *task)
3805 {
3806         if (task->task_lu_private) {
3807                 sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3808                 if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3809                         cmn_err(CE_PANIC, "cmd is active, task = %p",
3810                             (void *)task);
3811                 }
3812                 kmem_free(scmd, sizeof (sbd_cmd_t));
3813         }
3814 }
3815 
3816 /*
3817  * Aborts are synchronus w.r.t. I/O AND
3818  * All the I/O which SBD does is synchronous AND
3819  * Everything within a task is single threaded.
3820  *   IT MEANS
3821  * If this function is called, we are doing nothing with this task
3822  * inside of sbd module.
3823  */
3824 /* ARGSUSED */
3825 stmf_status_t
3826 sbd_abort(struct stmf_lu *lu, int abort_cmd, void *arg, uint32_t flags)
3827 {
3828         sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3829         scsi_task_t *task;
3830 
3831         if (abort_cmd == STMF_LU_RESET_STATE) {
3832                 return (sbd_lu_reset_state(lu));
3833         }
3834 
3835         if (abort_cmd == STMF_LU_ITL_HANDLE_REMOVED) {
3836                 sbd_check_and_clear_scsi2_reservation(sl, (sbd_it_data_t *)arg);
3837                 sbd_remove_it_handle(sl, (sbd_it_data_t *)arg);
3838                 return (STMF_SUCCESS);
3839         }
3840 
3841         ASSERT(abort_cmd == STMF_LU_ABORT_TASK);
3842         task = (scsi_task_t *)arg;
3843         sbd_ats_remove_by_task(task);
3844         if (task->task_lu_private) {
3845                 sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3846 
3847                 if (scmd->flags & SBD_SCSI_CMD_ACTIVE) {
3848                         if (scmd->flags & SBD_SCSI_CMD_TRANS_DATA) {
3849                                 kmem_free(scmd->trans_data,
3850                                     scmd->trans_data_len);
3851                                 scmd->flags &= ~SBD_SCSI_CMD_TRANS_DATA;
3852                         }
3853                         scmd->flags &= ~SBD_SCSI_CMD_ACTIVE;
3854                         return (STMF_ABORT_SUCCESS);
3855                 }
3856         }
3857 
3858         return (STMF_NOT_FOUND);
3859 }
3860 
3861 void
3862 sbd_task_poll(struct scsi_task *task)
3863 {
3864         stmf_data_buf_t *initial_dbuf;
3865 
3866         initial_dbuf = stmf_handle_to_buf(task, 0);
3867         sbd_new_task(task, initial_dbuf);
3868 }
3869 
3870 /*
3871  * This function is called during task clean-up if the
3872  * DB_LU_FLAG is set on the dbuf. This should only be called for
3873  * abort processing after sbd_abort has been called for the task.
3874  */
3875 void
3876 sbd_dbuf_free(struct scsi_task *task, struct stmf_data_buf *dbuf)
3877 {
3878         sbd_cmd_t *scmd = (sbd_cmd_t *)task->task_lu_private;
3879         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
3880 
3881         ASSERT(dbuf->db_lu_private);
3882         ASSERT(scmd && ATOMIC8_GET(scmd->nbufs) > 0);
3883         ASSERT((scmd->flags & SBD_SCSI_CMD_ACTIVE) == 0);
3884         ASSERT(dbuf->db_flags & DB_LU_DATA_BUF);
3885         ASSERT(task->task_additional_flags & TASK_AF_ACCEPT_LU_DBUF);
3886         ASSERT((curthread->t_flag & T_INTR_THREAD) == 0);
3887 
3888         if (scmd->cmd_type == SBD_CMD_SCSI_READ) {
3889                 sbd_zvol_rele_read_bufs(sl, dbuf);
3890         } else if (scmd->cmd_type == SBD_CMD_SCSI_WRITE) {
3891                 sbd_zvol_rele_write_bufs_abort(sl, dbuf);
3892         } else {
3893                 cmn_err(CE_PANIC, "Unknown cmd type %d, task = %p",
3894                     scmd->cmd_type, (void *)task);
3895         }
3896         if (atomic_dec_8_nv(&scmd->nbufs) == 0)
3897                 rw_exit(&sl->sl_access_state_lock);
3898         stmf_teardown_dbuf(task, dbuf);
3899         stmf_free(dbuf);
3900 }
3901 
3902 /* ARGSUSED */
3903 void
3904 sbd_ctl(struct stmf_lu *lu, int cmd, void *arg)
3905 {
3906         sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3907         stmf_change_status_t st;
3908 
3909         ASSERT((cmd == STMF_CMD_LU_ONLINE) ||
3910             (cmd == STMF_CMD_LU_OFFLINE) ||
3911             (cmd == STMF_ACK_LU_ONLINE_COMPLETE) ||
3912             (cmd == STMF_ACK_LU_OFFLINE_COMPLETE));
3913 
3914         st.st_completion_status = STMF_SUCCESS;
3915         st.st_additional_info = NULL;
3916 
3917         switch (cmd) {
3918         case STMF_CMD_LU_ONLINE:
3919                 if (sl->sl_state == STMF_STATE_ONLINE)
3920                         st.st_completion_status = STMF_ALREADY;
3921                 else if (sl->sl_state != STMF_STATE_OFFLINE)
3922                         st.st_completion_status = STMF_FAILURE;
3923                 if (st.st_completion_status == STMF_SUCCESS) {
3924                         sl->sl_state = STMF_STATE_ONLINE;
3925                         sl->sl_state_not_acked = 1;
3926                 }
3927                 (void) stmf_ctl(STMF_CMD_LU_ONLINE_COMPLETE, lu, &st);
3928                 break;
3929 
3930         case STMF_CMD_LU_OFFLINE:
3931                 if (sl->sl_state == STMF_STATE_OFFLINE)
3932                         st.st_completion_status = STMF_ALREADY;
3933                 else if (sl->sl_state != STMF_STATE_ONLINE)
3934                         st.st_completion_status = STMF_FAILURE;
3935                 if (st.st_completion_status == STMF_SUCCESS) {
3936                         sl->sl_flags &= ~(SL_MEDIUM_REMOVAL_PREVENTED |
3937                             SL_LU_HAS_SCSI2_RESERVATION);
3938                         sl->sl_state = STMF_STATE_OFFLINE;
3939                         sl->sl_state_not_acked = 1;
3940                         sbd_pgr_reset(sl);
3941                 }
3942                 (void) stmf_ctl(STMF_CMD_LU_OFFLINE_COMPLETE, lu, &st);
3943                 break;
3944 
3945         case STMF_ACK_LU_ONLINE_COMPLETE:
3946                 /* Fallthrough */
3947         case STMF_ACK_LU_OFFLINE_COMPLETE:
3948                 sl->sl_state_not_acked = 0;
3949                 break;
3950 
3951         }
3952 }
3953 
3954 /* ARGSUSED */
3955 stmf_status_t
3956 sbd_info(uint32_t cmd, stmf_lu_t *lu, void *arg, uint8_t *buf,
3957     uint32_t *bufsizep)
3958 {
3959         return (STMF_NOT_SUPPORTED);
3960 }
3961 
3962 stmf_status_t
3963 sbd_lu_reset_state(stmf_lu_t *lu)
3964 {
3965         sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3966 
3967         mutex_enter(&sl->sl_lock);
3968         if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
3969                 sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
3970                 mutex_exit(&sl->sl_lock);
3971                 if (sl->sl_access_state == SBD_LU_ACTIVE) {
3972                         (void) sbd_wcd_set(1, sl);
3973                 }
3974         } else {
3975                 sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
3976                 mutex_exit(&sl->sl_lock);
3977                 if (sl->sl_access_state == SBD_LU_ACTIVE) {
3978                         (void) sbd_wcd_set(0, sl);
3979                 }
3980         }
3981         sbd_pgr_reset(sl);
3982         sbd_check_and_clear_scsi2_reservation(sl, NULL);
3983         if (stmf_deregister_all_lu_itl_handles(lu) != STMF_SUCCESS) {
3984                 return (STMF_FAILURE);
3985         }
3986         return (STMF_SUCCESS);
3987 }
3988 
3989 sbd_status_t
3990 sbd_flush_data_cache(sbd_lu_t *sl, int fsync_done)
3991 {
3992         sbd_status_t ret = SBD_SUCCESS;
3993 
3994         rw_enter(&sl->sl_access_state_lock, RW_READER);
3995         if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3996                 ret = SBD_FILEIO_FAILURE;
3997                 goto flush_fail;
3998         }
3999         if (fsync_done)
4000                 goto over_fsync;
4001         if ((sl->sl_data_vtype == VREG) || (sl->sl_data_vtype == VBLK)) {
4002                 if (VOP_FSYNC(sl->sl_data_vp, FSYNC, kcred, NULL)) {
4003                         ret = SBD_FAILURE;
4004                         goto flush_fail;
4005                 }
4006         }
4007 over_fsync:
4008         if (((sl->sl_data_vtype == VCHR) || (sl->sl_data_vtype == VBLK)) &&
4009             ((sl->sl_flags & SL_NO_DATA_DKIOFLUSH) == 0)) {
4010                 int r = 0;
4011 
4012                 ret = VOP_IOCTL(sl->sl_data_vp, DKIOCFLUSHWRITECACHE, 0,
4013                     FKIOCTL, kcred, &r, NULL);
4014                 if ((ret == ENOTTY) || (ret == ENOTSUP)) {
4015                         mutex_enter(&sl->sl_lock);
4016                         sl->sl_flags |= SL_NO_DATA_DKIOFLUSH;
4017                         mutex_exit(&sl->sl_lock);
4018                 } else {
4019                         ret = (ret != 0) ? SBD_FAILURE : SBD_SUCCESS;
4020                 }
4021         }
4022 flush_fail:
4023         rw_exit(&sl->sl_access_state_lock);
4024 
4025         return (ret);
4026 }
4027 
4028 /* ARGSUSED */
4029 static void
4030 sbd_handle_sync_cache(struct scsi_task *task,
4031     struct stmf_data_buf *initial_dbuf)
4032 {
4033         sbd_lu_t *sl = (sbd_lu_t *)task->task_lu->lu_provider_private;
4034         uint64_t        lba, laddr;
4035         sbd_status_t    sret;
4036         uint32_t        len;
4037         int             is_g4 = 0;
4038         int             immed;
4039 
4040         task->task_cmd_xfer_length = 0;
4041         /*
4042          * Determine if this is a 10 or 16 byte CDB
4043          */
4044 
4045         if (task->task_cdb[0] == SCMD_SYNCHRONIZE_CACHE_G4)
4046                 is_g4 = 1;
4047 
4048         /*
4049          * Determine other requested parameters
4050          *
4051          * We don't have a non-volatile cache, so don't care about SYNC_NV.
4052          * Do not support the IMMED bit.
4053          */
4054 
4055         immed = (task->task_cdb[1] & 0x02);
4056 
4057         if (immed) {
4058                 stmf_scsilib_send_status(task, STATUS_CHECK,
4059                     STMF_SAA_INVALID_FIELD_IN_CDB);
4060                 return;
4061         }
4062 
4063         /*
4064          * Check to be sure we're not being asked to sync an LBA
4065          * that is out of range.  While checking, verify reserved fields.
4066          */
4067 
4068         if (is_g4) {
4069                 if ((task->task_cdb[1] & 0xf9) || task->task_cdb[14] ||
4070                     task->task_cdb[15]) {
4071                         stmf_scsilib_send_status(task, STATUS_CHECK,
4072                             STMF_SAA_INVALID_FIELD_IN_CDB);
4073                         return;
4074                 }
4075 
4076                 lba = READ_SCSI64(&task->task_cdb[2], uint64_t);
4077                 len = READ_SCSI32(&task->task_cdb[10], uint32_t);
4078         } else {
4079                 if ((task->task_cdb[1] & 0xf9) || task->task_cdb[6] ||
4080                     task->task_cdb[9]) {
4081                         stmf_scsilib_send_status(task, STATUS_CHECK,
4082                             STMF_SAA_INVALID_FIELD_IN_CDB);
4083                         return;
4084                 }
4085 
4086                 lba = READ_SCSI32(&task->task_cdb[2], uint64_t);
4087                 len = READ_SCSI16(&task->task_cdb[7], uint32_t);
4088         }
4089 
4090         laddr = lba << sl->sl_data_blocksize_shift;
4091         len <<= sl->sl_data_blocksize_shift;
4092 
4093         if ((laddr + (uint64_t)len) > sl->sl_lu_size) {
4094                 stmf_scsilib_send_status(task, STATUS_CHECK,
4095                     STMF_SAA_LBA_OUT_OF_RANGE);
4096                 return;
4097         }
4098 
4099         sret = sbd_flush_data_cache(sl, 0);
4100         if (sret != SBD_SUCCESS) {
4101                 stmf_scsilib_send_status(task, STATUS_CHECK,
4102                     STMF_SAA_WRITE_ERROR);
4103                 return;
4104         }
4105 
4106         stmf_scsilib_send_status(task, STATUS_GOOD, 0);
4107 }