Print this page
NEX-1889 upstream
        
@@ -22,10 +22,11 @@
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright 2014 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright (c) 2014, Tegile Systems Inc. All rights reserved.
  */
 
 /*
  * Copyright (c) 2000 to 2010, LSI Corporation.
  * All rights reserved.
@@ -243,11 +244,11 @@
 static void mptsas_cmd_timeout(mptsas_t *mpt, mptsas_target_t *ptgt);
 
 static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd);
 static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
     uint8_t *data, uint32_t request_size, uint32_t reply_size,
-    uint32_t data_size, uint32_t direction, uint8_t *dataout,
+    uint32_t data_size, uint8_t direction, uint8_t *dataout,
     uint32_t dataout_size, short timeout, int mode);
 static int mptsas_free_devhdl(mptsas_t *mpt, uint16_t devhdl);
 
 static uint8_t mptsas_get_fw_diag_buffer_number(mptsas_t *mpt,
     uint32_t unique_id);
@@ -489,11 +490,11 @@
         1,              /* minxfer - gran. of DMA engine        */
         0x00ffffffull,  /* maxxfer - gran. of DMA engine        */
         0xffffffffull,  /* max segment size (DMA boundary)      */
         MPTSAS_MAX_DMA_SEGS, /* scatter/gather list length      */
         512,            /* granularity - device transfer size   */
-        DDI_DMA_RELAXED_ORDERING        /* flags, enable relaxed ordering */
+        0               /* flags, set to 0 */
 };
 
 ddi_device_acc_attr_t mptsas_dev_attr = {
         DDI_DEVICE_ATTR_V1,
         DDI_STRUCTURE_LE_ACC,
@@ -567,11 +568,11 @@
 
 /*
  * Local static data
  */
 #if defined(MPTSAS_DEBUG)
-uint32_t mptsas_debug_flags = 0;
+uint32_t mptsas_debug_flags = 0x0;
 #endif  /* defined(MPTSAS_DEBUG) */
 uint32_t mptsas_debug_resets = 0;
 
 static kmutex_t         mptsas_global_mutex;
 static void             *mptsas_state;          /* soft state ptr */
@@ -585,11 +586,25 @@
 static clock_t mptsas_scsi_watchdog_tick;
 static clock_t mptsas_tick;
 static timeout_id_t mptsas_reset_watch;
 static timeout_id_t mptsas_timeout_id;
 static int mptsas_timeouts_enabled = 0;
+
 /*
+ * The only software retriction on switching msg buffers to 64 bit seems to
+ * be the Auto Request Sense interface. The high 32 bits for all such
+ * requests appear to be required to sit in the same 4G segment.
+ * See initialization of SenseBufferAddressHigh in mptsas_init.c, and
+ * the use of SenseBufferLowAddress in requests. Note that there is
+ * currently a dependency on scsi_alloc_consistent_buf() adhering to
+ * this requirement.
+ * There is also a question about improved performance over PCI/PCIX
+ * if transfers are within the first 4Gb.
+ */
+static int mptsas_use_64bit_msgaddr = 0;
+
+/*
  * warlock directives
  */
 _NOTE(SCHEME_PROTECTS_DATA("unique per pkt", scsi_pkt \
         mptsas_cmd NcrTableIndirect buf scsi_cdb scsi_status))
 _NOTE(SCHEME_PROTECTS_DATA("unique per pkt", smp_pkt))
@@ -1163,11 +1178,15 @@
         mpt->m_dip = dip;
         mpt->m_instance = instance;
 
         /* Make a per-instance copy of the structures */
         mpt->m_io_dma_attr = mptsas_dma_attrs64;
+        if (mptsas_use_64bit_msgaddr) {
+                mpt->m_msg_dma_attr = mptsas_dma_attrs64;
+        } else {
         mpt->m_msg_dma_attr = mptsas_dma_attrs;
+        }
         mpt->m_reg_acc_attr = mptsas_dev_attr;
         mpt->m_dev_acc_attr = mptsas_dev_attr;
 
         /*
          * Initialize FMA
@@ -1259,10 +1278,19 @@
                 }
                 mutex_exit(&mpt->m_doneq_mutex);
                 doneq_thread_create++;
         }
 
+        /*
+         * Disable hardware interrupt since we're not ready to
+         * handle it yet.
+         */
+        MPTSAS_DISABLE_INTR(mpt);
+        if (mptsas_register_intrs(mpt) == FALSE)
+                goto fail;
+        intr_added++;
+
         /* Initialize mutex used in interrupt handler */
         mutex_init(&mpt->m_mutex, NULL, MUTEX_DRIVER,
             DDI_INTR_PRI(mpt->m_intr_pri));
         mutex_init(&mpt->m_passthru_mutex, NULL, MUTEX_DRIVER, NULL);
         mutex_init(&mpt->m_tx_waitq_mutex, NULL, MUTEX_DRIVER,
@@ -1278,19 +1306,10 @@
         cv_init(&mpt->m_fw_cv, NULL, CV_DRIVER, NULL);
         cv_init(&mpt->m_config_cv, NULL, CV_DRIVER, NULL);
         cv_init(&mpt->m_fw_diag_cv, NULL, CV_DRIVER, NULL);
         mutex_init_done++;
 
-        /*
-         * Disable hardware interrupt since we're not ready to
-         * handle it yet.
-         */
-        MPTSAS_DISABLE_INTR(mpt);
-        if (mptsas_register_intrs(mpt) == FALSE)
-                goto fail;
-        intr_added++;
-
         mutex_enter(&mpt->m_mutex);
         /*
          * Initialize power management component
          */
         if (mpt->m_options & MPTSAS_OPT_PM) {
@@ -2220,11 +2239,11 @@
         /*
          * create kmem cache for packets
          */
         (void) sprintf(buf, "mptsas%d_cache", instance);
         mpt->m_kmem_cache = kmem_cache_create(buf,
-            sizeof (struct mptsas_cmd) + scsi_pkt_size(), 8,
+            sizeof (struct mptsas_cmd) + scsi_pkt_size(), 16,
             mptsas_kmem_cache_constructor, mptsas_kmem_cache_destructor,
             NULL, (void *)mpt, NULL, 0);
 
         if (mpt->m_kmem_cache == NULL) {
                 mptsas_log(mpt, CE_WARN, "creating kmem cache failed");
@@ -2235,11 +2254,11 @@
          * create kmem cache for extra SGL frames if SGL cannot
          * be accomodated into main request frame.
          */
         (void) sprintf(buf, "mptsas%d_cache_frames", instance);
         mpt->m_cache_frames = kmem_cache_create(buf,
-            sizeof (mptsas_cache_frames_t), 8,
+            sizeof (mptsas_cache_frames_t), 16,
             mptsas_cache_frames_constructor, mptsas_cache_frames_destructor,
             NULL, (void *)mpt, NULL, 0);
 
         if (mpt->m_cache_frames == NULL) {
                 mptsas_log(mpt, CE_WARN, "creating cache for frames failed");
@@ -3969,11 +3988,11 @@
         /*
          * Store the SGL memory address.  This chip uses this
          * address to dma to and from the driver.  The second
          * address is the address mpt uses to fill in the SGL.
          */
-        p->m_phys_addr = cookie.dmac_address;
+        p->m_phys_addr = cookie.dmac_laddress;
 
         return (DDI_SUCCESS);
 }
 
 static void
@@ -4176,58 +4195,27 @@
         }
         (*pkt->pkt_comp)(pkt);
 }
 
 static void
-mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd, uint32_t *control,
-        pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+mptsas_sge_mainframe(mptsas_cmd_t *cmd, pMpi2SCSIIORequest_t frame,
+                ddi_acc_handle_t acc_hdl, uint_t cookiec,
+                uint32_t end_flags)
 {
-        uint_t                  cookiec;
+        pMpi2SGESimple64_t      sge;
         mptti_t                 *dmap;
         uint32_t                flags;
-        pMpi2SGESimple64_t      sge;
-        pMpi2SGEChain64_t       sgechain;
-        ASSERT(cmd->cmd_flags & CFLAG_DMAVALID);
 
-        /*
-         * Save the number of entries in the DMA
-         * Scatter/Gather list
-         */
-        cookiec = cmd->cmd_cookiec;
-
-        NDBG1(("mptsas_sge_setup: cookiec=%d", cookiec));
-
-        /*
-         * Set read/write bit in control.
-         */
-        if (cmd->cmd_flags & CFLAG_DMASEND) {
-                *control |= MPI2_SCSIIO_CONTROL_WRITE;
-        } else {
-                *control |= MPI2_SCSIIO_CONTROL_READ;
-        }
-
-        ddi_put32(acc_hdl, &frame->DataLength, cmd->cmd_dmacount);
-
-        /*
-         * We have 2 cases here.  First where we can fit all the
-         * SG elements into the main frame, and the case
-         * where we can't.
-         * If we have more cookies than we can attach to a frame
-         * we will need to use a chain element to point
-         * a location of memory where the rest of the S/G
-         * elements reside.
-         */
-        if (cookiec <= MPTSAS_MAX_FRAME_SGES64(mpt)) {
                 dmap = cmd->cmd_sg;
+
                 sge = (pMpi2SGESimple64_t)(&frame->SGL);
                 while (cookiec--) {
-                        ddi_put32(acc_hdl,
-                            &sge->Address.Low, dmap->addr.address64.Low);
-                        ddi_put32(acc_hdl,
-                            &sge->Address.High, dmap->addr.address64.High);
-                        ddi_put32(acc_hdl, &sge->FlagsLength,
-                            dmap->count);
+                ddi_put32(acc_hdl, &sge->Address.Low,
+                    dmap->addr.address64.Low);
+                ddi_put32(acc_hdl, &sge->Address.High,
+                    dmap->addr.address64.High);
+                ddi_put32(acc_hdl, &sge->FlagsLength, dmap->count);
                         flags = ddi_get32(acc_hdl, &sge->FlagsLength);
                         flags |= ((uint32_t)
                             (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
                             MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
                             MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
@@ -4236,15 +4224,11 @@
                         /*
                          * If this is the last cookie, we set the flags
                          * to indicate so
                          */
                         if (cookiec == 0) {
-                                flags |=
-                                    ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT
-                                    | MPI2_SGE_FLAGS_END_OF_BUFFER
-                                    | MPI2_SGE_FLAGS_END_OF_LIST) <<
-                                    MPI2_SGE_FLAGS_SHIFT);
+                        flags |= end_flags;
                         }
                         if (cmd->cmd_flags & CFLAG_DMASEND) {
                                 flags |= (MPI2_SGE_FLAGS_HOST_TO_IOC <<
                                     MPI2_SGE_FLAGS_SHIFT);
                         } else {
@@ -4253,11 +4237,30 @@
                         }
                         ddi_put32(acc_hdl, &sge->FlagsLength, flags);
                         dmap++;
                         sge++;
                 }
-        } else {
+}
+
+static void
+mptsas_sge_chain(mptsas_t *mpt, mptsas_cmd_t *cmd,
+    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+{
+        pMpi2SGESimple64_t      sge;
+        pMpi2SGEChain64_t       sgechain;
+        uint64_t                nframe_phys_addr;
+        uint_t                  cookiec;
+        mptti_t                 *dmap;
+        uint32_t                flags;
+        int                     i, j, k, l, frames, sgemax;
+        int                     temp, maxframe_sges;
+        uint8_t                 chainflags;
+        uint16_t                chainlength;
+        mptsas_cache_frames_t   *p;
+
+        cookiec = cmd->cmd_cookiec;
+
                 /*
                  * Hereby we start to deal with multiple frames.
                  * The process is as follows:
                  * 1. Determine how many frames are needed for SGL element
                  *    storage; Note that all frames are stored in contiguous
@@ -4284,32 +4287,26 @@
                  *    hold SGL elements with the last 1 or 2 double-words
                  *    (4 or 8 bytes) un-used. On these controllers, we should
                  *    recognize that there's not enough room for another SGL
                  *    element and move the sge pointer to the next frame.
                  */
-                int             i, j, k, l, frames, sgemax;
-                int             temp;
-                uint8_t         chainflags;
-                uint16_t        chainlength;
-                mptsas_cache_frames_t *p;
 
                 /*
                  * Sgemax is the number of SGE's that will fit
                  * each extra frame and frames is total
                  * number of frames we'll need.  1 sge entry per
                  * frame is reseverd for the chain element thus the -1 below.
                  */
-                sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_SGE_SIMPLE64))
-                    - 1);
-                temp = (cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) / sgemax;
+        sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_SGE_SIMPLE64)) - 1);
+        maxframe_sges = MPTSAS_MAX_FRAME_SGES64(mpt);
+        temp = (cookiec - (maxframe_sges - 1)) / sgemax;
 
                 /*
                  * A little check to see if we need to round up the number
                  * of frames we need
                  */
-                if ((cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) - (temp *
-                    sgemax) > 1) {
+        if ((cookiec - (maxframe_sges - 1)) - (temp * sgemax) > 1) {
                         frames = (temp + 1);
                 } else {
                         frames = temp;
                 }
                 dmap = cmd->cmd_sg;
@@ -4316,46 +4313,19 @@
                 sge = (pMpi2SGESimple64_t)(&frame->SGL);
 
                 /*
                  * First fill in the main frame
                  */
-                for (j = 1; j < MPTSAS_MAX_FRAME_SGES64(mpt); j++) {
-                        ddi_put32(acc_hdl, &sge->Address.Low,
-                            dmap->addr.address64.Low);
-                        ddi_put32(acc_hdl, &sge->Address.High,
-                            dmap->addr.address64.High);
-                        ddi_put32(acc_hdl, &sge->FlagsLength, dmap->count);
-                        flags = ddi_get32(acc_hdl, &sge->FlagsLength);
-                        flags |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
-                            MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
-                            MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
-                            MPI2_SGE_FLAGS_SHIFT);
+        j = maxframe_sges - 1;
+        mptsas_sge_mainframe(cmd, frame, acc_hdl, j,
+            ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT) <<
+            MPI2_SGE_FLAGS_SHIFT));
+        dmap += j;
+        sge += j;
+        j++;
 
                         /*
-                         * If this is the last SGE of this frame
-                         * we set the end of list flag
-                         */
-                        if (j == (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) {
-                                flags |= ((uint32_t)
-                                    (MPI2_SGE_FLAGS_LAST_ELEMENT) <<
-                                    MPI2_SGE_FLAGS_SHIFT);
-                        }
-                        if (cmd->cmd_flags & CFLAG_DMASEND) {
-                                flags |=
-                                    (MPI2_SGE_FLAGS_HOST_TO_IOC <<
-                                    MPI2_SGE_FLAGS_SHIFT);
-                        } else {
-                                flags |=
-                                    (MPI2_SGE_FLAGS_IOC_TO_HOST <<
-                                    MPI2_SGE_FLAGS_SHIFT);
-                        }
-                        ddi_put32(acc_hdl, &sge->FlagsLength, flags);
-                        dmap++;
-                        sge++;
-                }
-
-                /*
                  * Fill in the chain element in the main frame.
                  * About calculation on ChainOffset:
                  * 1. Struct msg_scsi_io_request has 4 double-words (16 bytes)
                  *    in the end reserved for SGL element storage
                  *    (MPI2_SGE_IO_UNION); we should count it in our
@@ -4394,13 +4364,12 @@
 
                 p = cmd->cmd_extra_frames;
 
                 ddi_put16(acc_hdl, &sgechain->Length, chainlength);
                 ddi_put32(acc_hdl, &sgechain->Address.Low,
-                    p->m_phys_addr);
-                /* SGL is allocated in the first 4G mem range */
-                ddi_put32(acc_hdl, &sgechain->Address.High, 0);
+            (p->m_phys_addr&0xffffffffull));
+        ddi_put32(acc_hdl, &sgechain->Address.High, p->m_phys_addr>>32);
 
                 /*
                  * If there are more than 2 frames left we have to
                  * fill in the next chain offset to the location of
                  * the chain element in the next frame.
@@ -4454,16 +4423,18 @@
                                          * k is the frame counter and (k + 1)
                                          * is the number of the next frame.
                                          * Note that frames are in contiguous
                                          * memory space.
                                          */
+                                nframe_phys_addr = p->m_phys_addr +
+                                    (mpt->m_req_frame_size * k);
                                         ddi_put32(p->m_acc_hdl,
                                             &sgechain->Address.Low,
-                                            (p->m_phys_addr +
-                                            (mpt->m_req_frame_size * k)));
+                                    nframe_phys_addr&0xffffffffull);
                                         ddi_put32(p->m_acc_hdl,
-                                            &sgechain->Address.High, 0);
+                                    &sgechain->Address.High,
+                                    nframe_phys_addr>>32);
 
                                         /*
                                          * If there are more than 2 frames left
                                          * we have to next chain offset to
                                          * the location of the chain element
@@ -4562,13 +4533,379 @@
 
                 /*
                  * Sync DMA with the chain buffers that were just created
                  */
                 (void) ddi_dma_sync(p->m_dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
+}
+
+static void
+mptsas_ieee_sge_mainframe(mptsas_cmd_t *cmd, pMpi2SCSIIORequest_t frame,
+    ddi_acc_handle_t acc_hdl, uint_t cookiec,
+    uint8_t end_flag)
+{
+        pMpi2IeeeSgeSimple64_t  ieeesge;
+        mptti_t                 *dmap;
+        uint8_t                 flags;
+
+        dmap = cmd->cmd_sg;
+
+        NDBG1(("mptsas_ieee_sge_mainframe: cookiec=%d, %s", cookiec,
+            cmd->cmd_flags & CFLAG_DMASEND?"Out":"In"));
+
+        ieeesge = (pMpi2IeeeSgeSimple64_t)(&frame->SGL);
+        while (cookiec--) {
+                ddi_put32(acc_hdl, &ieeesge->Address.Low,
+                    dmap->addr.address64.Low);
+                ddi_put32(acc_hdl, &ieeesge->Address.High,
+                    dmap->addr.address64.High);
+                ddi_put32(acc_hdl, &ieeesge->Length, dmap->count);
+                NDBG1(("mptsas_ieee_sge_mainframe: len=%d", dmap->count));
+                flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
+                    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+
+                /*
+                 * If this is the last cookie, we set the flags
+                 * to indicate so
+                 */
+                if (cookiec == 0) {
+                        flags |= end_flag;
         }
+
+                /*
+                 * XXX: Hmmm, what about the direction based on
+                 * cmd->cmd_flags & CFLAG_DMASEND?
+                 */
+                ddi_put8(acc_hdl, &ieeesge->Flags, flags);
+                dmap++;
+                ieeesge++;
+        }
 }
 
+static void
+mptsas_ieee_sge_chain(mptsas_t *mpt, mptsas_cmd_t *cmd,
+    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+{
+        pMpi2IeeeSgeSimple64_t  ieeesge;
+        pMpi25IeeeSgeChain64_t  ieeesgechain;
+        uint64_t                nframe_phys_addr;
+        uint_t                  cookiec;
+        mptti_t                 *dmap;
+        uint8_t                 flags;
+        int                     i, j, k, l, frames, sgemax;
+        int                     temp, maxframe_sges;
+        uint8_t                 chainflags;
+        uint32_t                chainlength;
+        mptsas_cache_frames_t   *p;
+
+        cookiec = cmd->cmd_cookiec;
+
+        NDBG1(("mptsas_ieee_sge_chain: cookiec=%d", cookiec));
+
+        /*
+         * Hereby we start to deal with multiple frames.
+         * The process is as follows:
+         * 1. Determine how many frames are needed for SGL element
+         *    storage; Note that all frames are stored in contiguous
+         *    memory space and in 64-bit DMA mode each element is
+         *    4 double-words (16 bytes) long.
+         * 2. Fill up the main frame. We need to do this separately
+         *    since it contains the SCSI IO request header and needs
+         *    dedicated processing. Note that the last 4 double-words
+         *    of the SCSI IO header is for SGL element storage
+         *    (MPI2_SGE_IO_UNION).
+         * 3. Fill the chain element in the main frame, so the DMA
+         *    engine can use the following frames.
+         * 4. Enter a loop to fill the remaining frames. Note that the
+         *    last frame contains no chain element.  The remaining
+         *    frames go into the mpt SGL buffer allocated on the fly,
+         *    not immediately following the main message frame, as in
+         *    Gen1.
+         * Some restrictions:
+         * 1. For 64-bit DMA, the simple element and chain element
+         *    are both of 4 double-words (16 bytes) in size, even
+         *    though all frames are stored in the first 4G of mem
+         *    range and the higher 32-bits of the address are always 0.
+         * 2. On some controllers (like the 1064/1068), a frame can
+         *    hold SGL elements with the last 1 or 2 double-words
+         *    (4 or 8 bytes) un-used. On these controllers, we should
+         *    recognize that there's not enough room for another SGL
+         *    element and move the sge pointer to the next frame.
+         */
+
+        /*
+         * Sgemax is the number of SGE's that will fit
+         * each extra frame and frames is total
+         * number of frames we'll need.  1 sge entry per
+         * frame is reseverd for the chain element thus the -1 below.
+         */
+        sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_IEEE_SGE_SIMPLE64))
+            - 1);
+        maxframe_sges = MPTSAS_MAX_FRAME_SGES64(mpt);
+        temp = (cookiec - (maxframe_sges - 1)) / sgemax;
+
+        /*
+         * A little check to see if we need to round up the number
+         * of frames we need
+         */
+        if ((cookiec - (maxframe_sges - 1)) - (temp * sgemax) > 1) {
+                frames = (temp + 1);
+        } else {
+                frames = temp;
+        }
+        NDBG1(("mptsas_ieee_sge_chain: temp=%d, frames=%d", temp, frames));
+        dmap = cmd->cmd_sg;
+        ieeesge = (pMpi2IeeeSgeSimple64_t)(&frame->SGL);
+
+        /*
+         * First fill in the main frame
+         */
+        j = maxframe_sges - 1;
+        mptsas_ieee_sge_mainframe(cmd, frame, acc_hdl, j, 0);
+        dmap += j;
+        ieeesge += j;
+        j++;
+
+        /*
+         * Fill in the chain element in the main frame.
+         * About calculation on ChainOffset:
+         * 1. Struct msg_scsi_io_request has 4 double-words (16 bytes)
+         *    in the end reserved for SGL element storage
+         *    (MPI2_SGE_IO_UNION); we should count it in our
+         *    calculation.  See its definition in the header file.
+         * 2. Constant j is the counter of the current SGL element
+         *    that will be processed, and (j - 1) is the number of
+         *    SGL elements that have been processed (stored in the
+         *    main frame).
+         * 3. ChainOffset value should be in units of quad-words (16
+         *    bytes) so the last value should be divided by 16.
+         */
+        ddi_put8(acc_hdl, &frame->ChainOffset,
+            (sizeof (MPI2_SCSI_IO_REQUEST) -
+            sizeof (MPI2_SGE_IO_UNION) +
+            (j - 1) * sizeof (MPI2_IEEE_SGE_SIMPLE64)) >> 4);
+        ieeesgechain = (pMpi25IeeeSgeChain64_t)ieeesge;
+        chainflags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
+            MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+        ddi_put8(acc_hdl, &ieeesgechain->Flags, chainflags);
+
+        /*
+         * The size of the next frame is the accurate size of space
+         * (in bytes) used to store the SGL elements. j is the counter
+         * of SGL elements. (j - 1) is the number of SGL elements that
+         * have been processed (stored in frames).
+         */
+        if (frames >= 2) {
+                chainlength = mpt->m_req_frame_size /
+                    sizeof (MPI2_IEEE_SGE_SIMPLE64) *
+                    sizeof (MPI2_IEEE_SGE_SIMPLE64);
+        } else {
+                chainlength = ((cookiec - (j - 1)) *
+                    sizeof (MPI2_IEEE_SGE_SIMPLE64));
+        }
+
+        p = cmd->cmd_extra_frames;
+
+        ddi_put32(acc_hdl, &ieeesgechain->Length, chainlength);
+        ddi_put32(acc_hdl, &ieeesgechain->Address.Low,
+            p->m_phys_addr&0xffffffffull);
+        ddi_put32(acc_hdl, &ieeesgechain->Address.High, p->m_phys_addr>>32);
+
+        /*
+         * If there are more than 2 frames left we have to
+         * fill in the next chain offset to the location of
+         * the chain element in the next frame.
+         * sgemax is the number of simple elements in an extra
+         * frame. Note that the value NextChainOffset should be
+         * in double-words (4 bytes).
+         */
+        if (frames >= 2) {
+                ddi_put8(acc_hdl, &ieeesgechain->NextChainOffset,
+                    (sgemax * sizeof (MPI2_IEEE_SGE_SIMPLE64)) >> 4);
+        } else {
+                ddi_put8(acc_hdl, &ieeesgechain->NextChainOffset, 0);
+        }
+
+        /*
+         * Jump to next frame;
+         * Starting here, chain buffers go into the per command SGL.
+         * This buffer is allocated when chain buffers are needed.
+         */
+        ieeesge = (pMpi2IeeeSgeSimple64_t)p->m_frames_addr;
+        i = cookiec;
+
+        /*
+         * Start filling in frames with SGE's.  If we
+         * reach the end of frame and still have SGE's
+         * to fill we need to add a chain element and
+         * use another frame.  j will be our counter
+         * for what cookie we are at and i will be
+         * the total cookiec. k is the current frame
+         */
+        for (k = 1; k <= frames; k++) {
+                for (l = 1; (l <= (sgemax + 1)) && (j <= i); j++, l++) {
+
+                        /*
+                         * If we have reached the end of frame
+                         * and we have more SGE's to fill in
+                         * we have to fill the final entry
+                         * with a chain element and then
+                         * continue to the next frame
+                         */
+                        if ((l == (sgemax + 1)) && (k != frames)) {
+                                ieeesgechain = (pMpi25IeeeSgeChain64_t)ieeesge;
+                                j--;
+                                chainflags =
+                                    MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
+                                    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
+                                ddi_put8(p->m_acc_hdl,
+                                    &ieeesgechain->Flags, chainflags);
+                                /*
+                                 * k is the frame counter and (k + 1)
+                                 * is the number of the next frame.
+                                 * Note that frames are in contiguous
+                                 * memory space.
+                                 */
+                                nframe_phys_addr = p->m_phys_addr +
+                                    (mpt->m_req_frame_size * k);
+                                ddi_put32(p->m_acc_hdl,
+                                    &ieeesgechain->Address.Low,
+                                    nframe_phys_addr&0xffffffffull);
+                                ddi_put32(p->m_acc_hdl,
+                                    &ieeesgechain->Address.High,
+                                    nframe_phys_addr>>32);
+
+                                /*
+                                 * If there are more than 2 frames left
+                                 * we have to next chain offset to
+                                 * the location of the chain element
+                                 * in the next frame and fill in the
+                                 * length of the next chain
+                                 */
+                                if ((frames - k) >= 2) {
+                                        ddi_put8(p->m_acc_hdl,
+                                            &ieeesgechain->NextChainOffset,
+                                            (sgemax *
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64))
+                                            >> 4);
+                                        ddi_put32(p->m_acc_hdl,
+                                            &ieeesgechain->Length,
+                                            mpt->m_req_frame_size /
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64) *
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64));
+                                } else {
+                                        /*
+                                         * This is the last frame. Set
+                                         * the NextChainOffset to 0 and
+                                         * Length is the total size of
+                                         * all remaining simple elements
+                                         */
+                                        ddi_put8(p->m_acc_hdl,
+                                            &ieeesgechain->NextChainOffset,
+                                            0);
+                                        ddi_put32(p->m_acc_hdl,
+                                            &ieeesgechain->Length,
+                                            (cookiec - j) *
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64));
+                                }
+
+                                /* Jump to the next frame */
+                                ieeesge = (pMpi2IeeeSgeSimple64_t)
+                                    ((char *)p->m_frames_addr +
+                                    (int)mpt->m_req_frame_size * k);
+
+                                continue;
+                        }
+
+                        ddi_put32(p->m_acc_hdl,
+                            &ieeesge->Address.Low,
+                            dmap->addr.address64.Low);
+                        ddi_put32(p->m_acc_hdl,
+                            &ieeesge->Address.High,
+                            dmap->addr.address64.High);
+                        ddi_put32(p->m_acc_hdl,
+                            &ieeesge->Length, dmap->count);
+                        flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
+                            MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+
+                        /*
+                         * If we are at the end of the frame and
+                         * there is another frame to fill in
+                         * do we need to do anything?
+                         * if ((l == sgemax) && (k != frames)) {
+                         * }
+                         */
+
+                        /*
+                         * If this is the final cookie set end of list.
+                         */
+                        if (j == i) {
+                                flags |= MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
+                        }
+
+                        ddi_put8(p->m_acc_hdl, &ieeesge->Flags, flags);
+                        dmap++;
+                        ieeesge++;
+                }
+        }
+
+        /*
+         * Sync DMA with the chain buffers that were just created
+         */
+        (void) ddi_dma_sync(p->m_dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
+}
+
+static void
+mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd, uint32_t *control,
+    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+{
+        ASSERT(cmd->cmd_flags & CFLAG_DMAVALID);
+
+        NDBG1(("mptsas_sge_setup: cookiec=%d", cmd->cmd_cookiec));
+
+        /*
+         * Set read/write bit in control.
+         */
+        if (cmd->cmd_flags & CFLAG_DMASEND) {
+                *control |= MPI2_SCSIIO_CONTROL_WRITE;
+        } else {
+                *control |= MPI2_SCSIIO_CONTROL_READ;
+        }
+
+        ddi_put32(acc_hdl, &frame->DataLength, cmd->cmd_dmacount);
+
+        /*
+         * We have 4 cases here.  First where we can fit all the
+         * SG elements into the main frame, and the case
+         * where we can't. The SG element is also different when using
+         * MPI2.5 interface.
+         * If we have more cookies than we can attach to a frame
+         * we will need to use a chain element to point
+         * a location of memory where the rest of the S/G
+         * elements reside.
+         */
+        if (cmd->cmd_cookiec <= MPTSAS_MAX_FRAME_SGES64(mpt)) {
+                if (mpt->m_MPI25) {
+                        mptsas_ieee_sge_mainframe(cmd, frame, acc_hdl,
+                            cmd->cmd_cookiec,
+                            MPI25_IEEE_SGE_FLAGS_END_OF_LIST);
+                } else {
+                        mptsas_sge_mainframe(cmd, frame, acc_hdl,
+                            cmd->cmd_cookiec,
+                            ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT
+                            | MPI2_SGE_FLAGS_END_OF_BUFFER
+                            | MPI2_SGE_FLAGS_END_OF_LIST) <<
+                            MPI2_SGE_FLAGS_SHIFT));
+                }
+        } else {
+                if (mpt->m_MPI25) {
+                        mptsas_ieee_sge_chain(mpt, cmd, frame, acc_hdl);
+                } else {
+                        mptsas_sge_chain(mpt, cmd, frame, acc_hdl);
+                }
+        }
+}
+
 /*
  * Interrupt handling
  * Utility routine.  Poll for status of a command sent to HBA
  * without interrupts (a FLAG_NOINTR command).
  */
@@ -5489,11 +5826,12 @@
          * after they've all been processed.
          */
         reply_type = ddi_get8(mpt->m_acc_post_queue_hdl,
             &reply_desc_union->Default.ReplyFlags);
         reply_type &= MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
-        if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS) {
+        if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS ||
+            reply_type == MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS) {
                 mptsas_handle_scsi_io_success(mpt, reply_desc_union);
         } else if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY) {
                 mptsas_handle_address_reply(mpt, reply_desc_union);
         } else {
                 mptsas_log(mpt, CE_WARN, "?Bad reply type %x", reply_type);
@@ -9303,11 +9641,11 @@
         va_end(ap);
 
 #ifdef PROM_PRINTF
         prom_printf("%s:\t%s\n", mptsas_label, mptsas_log_buf);
 #else
-        scsi_log(dev, mptsas_label, SCSI_DEBUG, "%s\n", mptsas_log_buf);
+        scsi_log(dev, mptsas_label, CE_CONT, "!%s\n", mptsas_log_buf);
 #endif
         mutex_exit(&mptsas_log_mutex);
 }
 #endif
 
@@ -9665,100 +10003,147 @@
         NDBG25(("?pkt_scbp=0x%x cmd_flags=0x%x\n", cmd->cmd_pkt->pkt_scbp ?
             *(cmd->cmd_pkt->pkt_scbp) : 0, cmd->cmd_flags));
 }
 
 static void
-mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd)
+mptsas_passthru_sge(ddi_acc_handle_t acc_hdl, mptsas_pt_request_t *pt,
+    pMpi2SGESimple64_t sgep)
 {
-        caddr_t                 memp;
-        pMPI2RequestHeader_t    request_hdrp;
-        struct scsi_pkt         *pkt = cmd->cmd_pkt;
-        mptsas_pt_request_t     *pt = pkt->pkt_ha_private;
-        uint32_t                request_size, data_size, dataout_size;
-        uint32_t                direction;
+        uint32_t                sge_flags;
+        uint32_t                data_size, dataout_size;
         ddi_dma_cookie_t        data_cookie;
         ddi_dma_cookie_t        dataout_cookie;
-        uint32_t                request_desc_low, request_desc_high = 0;
-        uint32_t                i, sense_bufp;
-        uint8_t                 desc_type;
-        uint8_t                 *request, function;
-        ddi_dma_handle_t        dma_hdl = mpt->m_dma_req_frame_hdl;
-        ddi_acc_handle_t        acc_hdl = mpt->m_acc_req_frame_hdl;
 
-        desc_type = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
-
-        request = pt->request;
-        direction = pt->direction;
-        request_size = pt->request_size;
         data_size = pt->data_size;
         dataout_size = pt->dataout_size;
         data_cookie = pt->data_cookie;
         dataout_cookie = pt->dataout_cookie;
 
-        /*
-         * Store the passthrough message in memory location
-         * corresponding to our slot number
-         */
-        memp = mpt->m_req_frame + (mpt->m_req_frame_size * cmd->cmd_slot);
-        request_hdrp = (pMPI2RequestHeader_t)memp;
-        bzero(memp, mpt->m_req_frame_size);
-
-        for (i = 0; i < request_size; i++) {
-                bcopy(request + i, memp + i, 1);
-        }
-
-        if (data_size || dataout_size) {
-                pMpi2SGESimple64_t      sgep;
-                uint32_t                sge_flags;
-
-                sgep = (pMpi2SGESimple64_t)((uint8_t *)request_hdrp +
-                    request_size);
                 if (dataout_size) {
-
                         sge_flags = dataout_size |
                             ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
                             MPI2_SGE_FLAGS_END_OF_BUFFER |
                             MPI2_SGE_FLAGS_HOST_TO_IOC |
                             MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
                             MPI2_SGE_FLAGS_SHIFT);
                         ddi_put32(acc_hdl, &sgep->FlagsLength, sge_flags);
                         ddi_put32(acc_hdl, &sgep->Address.Low,
-                            (uint32_t)(dataout_cookie.dmac_laddress &
-                            0xffffffffull));
+                    (uint32_t)(dataout_cookie.dmac_laddress & 0xffffffffull));
                         ddi_put32(acc_hdl, &sgep->Address.High,
-                            (uint32_t)(dataout_cookie.dmac_laddress
-                            >> 32));
+                    (uint32_t)(dataout_cookie.dmac_laddress >> 32));
                         sgep++;
                 }
                 sge_flags = data_size;
                 sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
                     MPI2_SGE_FLAGS_LAST_ELEMENT |
                     MPI2_SGE_FLAGS_END_OF_BUFFER |
                     MPI2_SGE_FLAGS_END_OF_LIST |
                     MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
                     MPI2_SGE_FLAGS_SHIFT);
-                if (direction == MPTSAS_PASS_THRU_DIRECTION_WRITE) {
+        if (pt->direction == MPTSAS_PASS_THRU_DIRECTION_WRITE) {
                         sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) <<
                             MPI2_SGE_FLAGS_SHIFT);
                 } else {
                         sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) <<
                             MPI2_SGE_FLAGS_SHIFT);
                 }
-                ddi_put32(acc_hdl, &sgep->FlagsLength,
-                    sge_flags);
+        ddi_put32(acc_hdl, &sgep->FlagsLength, sge_flags);
                 ddi_put32(acc_hdl, &sgep->Address.Low,
-                    (uint32_t)(data_cookie.dmac_laddress &
-                    0xffffffffull));
+            (uint32_t)(data_cookie.dmac_laddress & 0xffffffffull));
                 ddi_put32(acc_hdl, &sgep->Address.High,
                     (uint32_t)(data_cookie.dmac_laddress >> 32));
+}
+
+static void
+mptsas_passthru_ieee_sge(ddi_acc_handle_t acc_hdl, mptsas_pt_request_t *pt,
+    pMpi2IeeeSgeSimple64_t ieeesgep)
+{
+        uint8_t                 sge_flags;
+        uint32_t                data_size, dataout_size;
+        ddi_dma_cookie_t        data_cookie;
+        ddi_dma_cookie_t        dataout_cookie;
+
+        data_size = pt->data_size;
+        dataout_size = pt->dataout_size;
+        data_cookie = pt->data_cookie;
+        dataout_cookie = pt->dataout_cookie;
+
+        sge_flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
+            MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+        if (dataout_size) {
+                ddi_put32(acc_hdl, &ieeesgep->Length, dataout_size);
+                ddi_put32(acc_hdl, &ieeesgep->Address.Low,
+                    (uint32_t)(dataout_cookie.dmac_laddress &
+                    0xffffffffull));
+                ddi_put32(acc_hdl, &ieeesgep->Address.High,
+                    (uint32_t)(dataout_cookie.dmac_laddress >> 32));
+                ddi_put8(acc_hdl, &ieeesgep->Flags, sge_flags);
+                ieeesgep++;
         }
+        sge_flags |= MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
+        ddi_put32(acc_hdl, &ieeesgep->Length, data_size);
+        ddi_put32(acc_hdl, &ieeesgep->Address.Low,
+            (uint32_t)(data_cookie.dmac_laddress & 0xffffffffull));
+        ddi_put32(acc_hdl, &ieeesgep->Address.High,
+            (uint32_t)(data_cookie.dmac_laddress >> 32));
+        ddi_put8(acc_hdl, &ieeesgep->Flags, sge_flags);
+}
 
+static void
+mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd)
+{
+        caddr_t                 memp;
+        pMPI2RequestHeader_t    request_hdrp;
+        struct scsi_pkt         *pkt = cmd->cmd_pkt;
+        mptsas_pt_request_t     *pt = pkt->pkt_ha_private;
+        uint32_t                request_size;
+        uint32_t                request_desc_low, request_desc_high = 0;
+        uint64_t                sense_bufp;
+        uint8_t                 desc_type;
+        uint8_t                 *request, function;
+        ddi_dma_handle_t        dma_hdl = mpt->m_dma_req_frame_hdl;
+        ddi_acc_handle_t        acc_hdl = mpt->m_acc_req_frame_hdl;
+
+        desc_type = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
+
+        request = pt->request;
+        request_size = pt->request_size;
+
+        /*
+         * Store the passthrough message in memory location
+         * corresponding to our slot number
+         */
+        memp = mpt->m_req_frame + (mpt->m_req_frame_size * cmd->cmd_slot);
+        request_hdrp = (pMPI2RequestHeader_t)memp;
+        bzero(memp, mpt->m_req_frame_size);
+
+        bcopy(request, memp, request_size);
+
+        NDBG15(("mptsas_start_passthru: Func 0x%x, MsgFlags 0x%x, "
+            "size=%d, in %d, out %d", request_hdrp->Function,
+            request_hdrp->MsgFlags, request_size,
+            pt->data_size, pt->dataout_size));
+
+        /*
+         * Add an SGE, even if the length is zero.
+         */
+        if (mpt->m_MPI25 && pt->simple == 0) {
+                mptsas_passthru_ieee_sge(acc_hdl, pt,
+                    (pMpi2IeeeSgeSimple64_t)
+                    ((uint8_t *)request_hdrp + pt->sgl_offset));
+        } else {
+                mptsas_passthru_sge(acc_hdl, pt,
+                    (pMpi2SGESimple64_t)
+                    ((uint8_t *)request_hdrp + pt->sgl_offset));
+        }
+
         function = request_hdrp->Function;
         if ((function == MPI2_FUNCTION_SCSI_IO_REQUEST) ||
             (function == MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
                 pMpi2SCSIIORequest_t    scsi_io_req;
 
+                NDBG15(("mptsas_start_passthru: Is SCSI IO Req"));
                 scsi_io_req = (pMpi2SCSIIORequest_t)request_hdrp;
                 /*
                  * Put SGE for data and data_out buffer at the end of
                  * scsi_io_request message header.(64 bytes in total)
                  * Following above SGEs, the residual space will be
@@ -9766,12 +10151,12 @@
                  */
                 ddi_put8(acc_hdl,
                     &scsi_io_req->SenseBufferLength,
                     (uint8_t)(request_size - 64));
 
-                sense_bufp = mpt->m_req_frame_dma_addr +
-                    (mpt->m_req_frame_size * cmd->cmd_slot);
+                sense_bufp = (uint32_t)(mpt->m_req_frame_dma_addr +
+                    (mpt->m_req_frame_size * cmd->cmd_slot) & 0xffffffffull);
                 sense_bufp += 64;
                 ddi_put32(acc_hdl,
                     &scsi_io_req->SenseBufferLowAddress, sense_bufp);
 
                 /*
@@ -9805,16 +10190,377 @@
             (mptsas_check_acc_handle(acc_hdl) != DDI_SUCCESS)) {
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
         }
 }
 
+typedef void (mptsas_pre_f)(mptsas_t *, mptsas_pt_request_t *);
+static mptsas_pre_f     mpi_pre_ioc_facts;
+static mptsas_pre_f     mpi_pre_port_facts;
+static mptsas_pre_f     mpi_pre_fw_download;
+static mptsas_pre_f     mpi_pre_fw_25_download;
+static mptsas_pre_f     mpi_pre_fw_upload;
+static mptsas_pre_f     mpi_pre_fw_25_upload;
+static mptsas_pre_f     mpi_pre_sata_passthrough;
+static mptsas_pre_f     mpi_pre_smp_passthrough;
+static mptsas_pre_f     mpi_pre_config;
+static mptsas_pre_f     mpi_pre_sas_io_unit_control;
+static mptsas_pre_f     mpi_pre_scsi_io_req;
 
+/*
+ * Prepare the pt for a SAS2 FW_DOWNLOAD request.
+ */
+static void
+mpi_pre_fw_download(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWDownloadTCSGE_t tcsge;
+        pMpi2FWDownloadRequest req;
 
+        /*
+         * If SAS3, call separate function.
+         */
+        if (mpt->m_MPI25) {
+                mpi_pre_fw_25_download(mpt, pt);
+                return;
+        }
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. Putting the
+         * SGL after that seems to work, but don't really know
+         * why. Other drivers tend to create an extra SGL and
+         * refer to the TCE through that.
+         */
+        req = (pMpi2FWDownloadRequest)pt->request;
+        tcsge = (pMpi2FWDownloadTCSGE_t)&req->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Download tce invalid!");
+        }
+
+        pt->sgl_offset = offsetof(MPI2_FW_DOWNLOAD_REQUEST, SGL) +
+            sizeof (*tcsge);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_download(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    (int)pt->request_size, (int)pt->sgl_offset,
+                    (int)pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_DOWNLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_download(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_DOWNLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt for a SAS3 FW_DOWNLOAD request.
+ */
+static void
+mpi_pre_fw_25_download(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWDownloadTCSGE_t tcsge;
+        pMpi2FWDownloadRequest req2;
+        pMpi25FWDownloadRequest req25;
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. The new firmware
+         * Doesn't use TCE and has space in the main request for
+         * this information. So move to the right place.
+         */
+        req2 = (pMpi2FWDownloadRequest)pt->request;
+        req25 = (pMpi25FWDownloadRequest)pt->request;
+        tcsge = (pMpi2FWDownloadTCSGE_t)&req2->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Download tce invalid!");
+        }
+        req25->ImageOffset = tcsge->ImageOffset;
+        req25->ImageSize = tcsge->ImageSize;
+
+        pt->sgl_offset = offsetof(MPI25_FW_DOWNLOAD_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_25_download(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_DOWNLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_25_download(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_UPLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt for a SAS2 FW_UPLOAD request.
+ */
+static void
+mpi_pre_fw_upload(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWUploadTCSGE_t tcsge;
+        pMpi2FWUploadRequest_t req;
+
+        /*
+         * If SAS3, call separate function.
+         */
+        if (mpt->m_MPI25) {
+                mpi_pre_fw_25_upload(mpt, pt);
+                return;
+        }
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. Putting the
+         * SGL after that seems to work, but don't really know
+         * why. Other drivers tend to create an extra SGL and
+         * refer to the TCE through that.
+         */
+        req = (pMpi2FWUploadRequest_t)pt->request;
+        tcsge = (pMpi2FWUploadTCSGE_t)&req->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Upload tce invalid!");
+        }
+
+        pt->sgl_offset = offsetof(MPI2_FW_UPLOAD_REQUEST, SGL) +
+            sizeof (*tcsge);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_upload(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_UPLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_upload(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_UPLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt a SAS3 FW_UPLOAD request.
+ */
+static void
+mpi_pre_fw_25_upload(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWUploadTCSGE_t tcsge;
+        pMpi2FWUploadRequest_t req2;
+        pMpi25FWUploadRequest_t req25;
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. The new firmware
+         * Doesn't use TCE and has space in the main request for
+         * this information. So move to the right place.
+         */
+        req2 = (pMpi2FWUploadRequest_t)pt->request;
+        req25 = (pMpi25FWUploadRequest_t)pt->request;
+        tcsge = (pMpi2FWUploadTCSGE_t)&req2->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Upload tce invalid!");
+        }
+        req25->ImageOffset = tcsge->ImageOffset;
+        req25->ImageSize = tcsge->ImageSize;
+
+        pt->sgl_offset = offsetof(MPI25_FW_UPLOAD_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_25_upload(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_UPLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_25_upload(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_UPLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt for an IOC_FACTS request.
+ */
+static void
+mpi_pre_ioc_facts(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        if (pt->request_size != sizeof (MPI2_IOC_FACTS_REQUEST))
+                NDBG15(("mpi_pre_ioc_facts(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size,
+                    (int)sizeof (MPI2_IOC_FACTS_REQUEST),
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_IOC_FACTS_REPLY))
+                NDBG15(("mpi_pre_ioc_facts(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_IOC_FACTS_REPLY)));
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+/*
+ * Prepare the pt for a PORT_FACTS request.
+ */
+static void
+mpi_pre_port_facts(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        if (pt->request_size != sizeof (MPI2_PORT_FACTS_REQUEST))
+                NDBG15(("mpi_pre_port_facts(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size,
+                    (int)sizeof (MPI2_PORT_FACTS_REQUEST),
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_PORT_FACTS_REPLY))
+                NDBG15(("mpi_pre_port_facts(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_PORT_FACTS_REPLY)));
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+/*
+ * Prepare pt for a SATA_PASSTHROUGH request.
+ */
+static void
+mpi_pre_sata_passthrough(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_SATA_PASSTHROUGH_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_sata_passthrough(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_SATA_PASSTHROUGH_REPLY))
+                NDBG15(("mpi_pre_sata_passthrough(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_SATA_PASSTHROUGH_REPLY)));
+}
+
+static void
+mpi_pre_smp_passthrough(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_SMP_PASSTHROUGH_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_smp_passthrough(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_SMP_PASSTHROUGH_REPLY))
+                NDBG15(("mpi_pre_smp_passthrough(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_SMP_PASSTHROUGH_REPLY)));
+}
+
+/*
+ * Prepare pt for a CONFIG request.
+ */
+static void
+mpi_pre_config(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_CONFIG_REQUEST, PageBufferSGE);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_config(): Incorrect req size, 0x%x, "
+                    "should be 0x%x, dataoutsz 0x%x", pt->request_size,
+                    pt->sgl_offset, pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_CONFIG_REPLY))
+                NDBG15(("mpi_pre_config(): Incorrect rep size, 0x%x, "
+                    "should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_CONFIG_REPLY)));
+        pt->simple = 1;
+}
+
+/*
+ * Prepare pt for a SCSI_IO_REQ request.
+ */
+static void
+mpi_pre_scsi_io_req(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_SCSI_IO_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_config(): Incorrect req size, 0x%x, "
+                    "should be 0x%x, dataoutsz 0x%x", pt->request_size,
+                    pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_SCSI_IO_REPLY))
+                NDBG15(("mpi_pre_config(): Incorrect rep size, 0x%x, "
+                    "should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_SCSI_IO_REPLY)));
+}
+
+/*
+ * Prepare the mptsas_cmd for a SAS_IO_UNIT_CONTROL request.
+ */
+static void
+mpi_pre_sas_io_unit_control(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+/*
+ * A set of functions to prepare an mptsas_cmd for the various
+ * supported requests.
+ */
+static struct mptsas_func {
+        U8              Function;
+        char            *Name;
+        mptsas_pre_f    *f_pre;
+} mptsas_func_list[] = {
+        { MPI2_FUNCTION_IOC_FACTS, "IOC_FACTS",         mpi_pre_ioc_facts },
+        { MPI2_FUNCTION_PORT_FACTS, "PORT_FACTS",       mpi_pre_port_facts },
+        { MPI2_FUNCTION_FW_DOWNLOAD, "FW_DOWNLOAD",     mpi_pre_fw_download },
+        { MPI2_FUNCTION_FW_UPLOAD, "FW_UPLOAD",         mpi_pre_fw_upload },
+        { MPI2_FUNCTION_SATA_PASSTHROUGH, "SATA_PASSTHROUGH",
+            mpi_pre_sata_passthrough },
+        { MPI2_FUNCTION_SMP_PASSTHROUGH, "SMP_PASSTHROUGH",
+            mpi_pre_smp_passthrough},
+        { MPI2_FUNCTION_SCSI_IO_REQUEST, "SCSI_IO_REQUEST",
+            mpi_pre_scsi_io_req},
+        { MPI2_FUNCTION_CONFIG, "CONFIG",               mpi_pre_config},
+        { MPI2_FUNCTION_SAS_IO_UNIT_CONTROL, "SAS_IO_UNIT_CONTROL",
+            mpi_pre_sas_io_unit_control },
+        { 0xFF, NULL,                           NULL } /* list end */
+};
+
+static void
+mptsas_prep_sgl_offset(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMPI2RequestHeader_t    hdr;
+        struct mptsas_func      *f;
+
+        hdr = (pMPI2RequestHeader_t)pt->request;
+
+        for (f = mptsas_func_list; f->f_pre != NULL; f++) {
+                if (hdr->Function == f->Function) {
+                        f->f_pre(mpt, pt);
+                        NDBG15(("mptsas_prep_sgl_offset: Function %s,"
+                            " sgl_offset 0x%x", f->Name,
+                            pt->sgl_offset));
+                        return;
+                }
+        }
+        NDBG15(("mptsas_prep_sgl_offset: Unknown Function 0x%02x,"
+            " returning req_size 0x%x for sgl_offset",
+            hdr->Function, pt->request_size));
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+
 static int
 mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
     uint8_t *data, uint32_t request_size, uint32_t reply_size,
-    uint32_t data_size, uint32_t direction, uint8_t *dataout,
+    uint32_t data_size, uint8_t direction, uint8_t *dataout,
     uint32_t dataout_size, short timeout, int mode)
 {
         mptsas_pt_request_t             pt;
         mptsas_dma_alloc_state_t        data_dma_state;
         mptsas_dma_alloc_state_t        dataout_dma_state;
@@ -9887,10 +10633,12 @@
                                 }
                         }
                         mutex_enter(&mpt->m_mutex);
                 }
         }
+        else
+                bzero(&data_dma_state, sizeof (data_dma_state));
 
         if (dataout_size != 0) {
                 dataout_dma_state.size = dataout_size;
                 if (mptsas_dma_alloc(mpt, &dataout_dma_state) != DDI_SUCCESS) {
                         status = ENOMEM;
@@ -9910,10 +10658,12 @@
                                 goto out;
                         }
                 }
                 mutex_enter(&mpt->m_mutex);
         }
+        else
+                bzero(&dataout_dma_state, sizeof (dataout_dma_state));
 
         if ((rvalue = (mptsas_request_from_pool(mpt, &cmd, &pkt))) == -1) {
                 status = EAGAIN;
                 mptsas_log(mpt, CE_NOTE, "event ack command pool is full");
                 goto out;
@@ -9926,15 +10676,17 @@
 
         cmd->ioc_cmd_slot = (uint32_t)(rvalue);
 
         pt.request = (uint8_t *)request_msg;
         pt.direction = direction;
+        pt.simple = 0;
         pt.request_size = request_size;
         pt.data_size = data_size;
         pt.dataout_size = dataout_size;
         pt.data_cookie = data_dma_state.cookie;
         pt.dataout_cookie = dataout_dma_state.cookie;
+        mptsas_prep_sgl_offset(mpt, &pt);
 
         /*
          * Form a blank cmd/pkt to store the acknowledgement message
          */
         pkt->pkt_cdbp           = (opaque_t)&cmd->cmd_cdb[0];
@@ -10143,11 +10895,11 @@
                 return (mptsas_do_passthru(mpt,
                     (uint8_t *)((uintptr_t)data->PtrRequest),
                     (uint8_t *)((uintptr_t)data->PtrReply),
                     (uint8_t *)((uintptr_t)data->PtrData),
                     data->RequestSize, data->ReplySize,
-                    data->DataSize, data->DataDirection,
+                    data->DataSize, (uint8_t)data->DataDirection,
                     (uint8_t *)((uintptr_t)data->PtrDataOut),
                     data->DataOutSize, data->Timeout, mode));
         } else {
                 return (EINVAL);
         }
@@ -11228,11 +11980,13 @@
 mptsas_read_adapter_data(mptsas_t *mpt, mptsas_adapter_data_t *adapter_data)
 {
         char    *driver_verstr = MPTSAS_MOD_STRING;
 
         mptsas_lookup_pci_data(mpt, adapter_data);
-        adapter_data->AdapterType = MPTIOCTL_ADAPTER_TYPE_SAS2;
+        adapter_data->AdapterType = mpt->m_MPI25 ?
+            MPTIOCTL_ADAPTER_TYPE_SAS3 :
+            MPTIOCTL_ADAPTER_TYPE_SAS2;
         adapter_data->PCIDeviceHwId = (uint32_t)mpt->m_devid;
         adapter_data->PCIDeviceHwRev = (uint32_t)mpt->m_revid;
         adapter_data->SubSystemId = (uint32_t)mpt->m_ssid;
         adapter_data->SubsystemVendorId = (uint32_t)mpt->m_svid;
         (void) strcpy((char *)&adapter_data->DriverVersion[0], driver_verstr);
@@ -14980,11 +15734,11 @@
 static int mptsas_smp_start(struct smp_pkt *smp_pkt)
 {
         uint64_t                        wwn;
         Mpi2SmpPassthroughRequest_t     req;
         Mpi2SmpPassthroughReply_t       rep;
-        uint32_t                        direction = 0;
+        uint8_t                         direction = 0;
         mptsas_t                        *mpt;
         int                             ret;
         uint64_t                        tmp64;
 
         mpt = (mptsas_t *)smp_pkt->smp_pkt_address->