Print this page
MFV: illumos-gate@2aba3acda67326648fd60aaf2bfb4e18ee8c04ed
9816 Multi-TRB xhci transfers should use event data
9817 xhci needs to always set slot context
8550 increase xhci bulk transfer sgl count
9818 xhci_transfer_get_tdsize can return values that are too large
Reviewed by: Alex Wilson <alex.wilson@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
Author: Robert Mustacchi <rm@joyent.com>
        
@@ -8,11 +8,11 @@
  * source.  A copy of the CDDL is also available via the Internet at
  * http://www.illumos.org/license/CDDL.
  */
 
 /*
- * Copyright 2016 Joyent, Inc.
+ * Copyright (c) 2018, Joyent, Inc.
  */
 
 /*
  * xHCI Endpoint Initialization and Management
  *
@@ -873,13 +873,15 @@
 
         if (xhci_ring_trb_space(rp, xt->xt_ntrbs) == B_FALSE)
                 return (USB_NO_RESOURCES);
 
         for (i = xt->xt_ntrbs - 1; i > 0; i--) {
-                xhci_ring_trb_fill(rp, i, &xt->xt_trbs[i], B_TRUE);
+                xhci_ring_trb_fill(rp, i, &xt->xt_trbs[i], &xt->xt_trbs_pa[i],
+                    B_TRUE);
         }
-        xhci_ring_trb_fill(rp, 0U, &xt->xt_trbs[0], B_FALSE);
+        xhci_ring_trb_fill(rp, 0U, &xt->xt_trbs[0], &xt->xt_trbs_pa[0],
+            B_FALSE);
 
         XHCI_DMA_SYNC(rp->xr_dma, DDI_DMA_SYNC_FORDEV);
         xhci_ring_trb_produce(rp, xt->xt_ntrbs);
         list_insert_tail(&xep->xep_transfers, xt);
 
@@ -907,12 +909,14 @@
         return (xhci_endpoint_ring(xhcip, xd, xep));
 }
 
 static xhci_transfer_t *
 xhci_endpoint_determine_transfer(xhci_t *xhcip, xhci_endpoint_t *xep,
-    xhci_trb_t *trb, int *offp)
+    xhci_trb_t *trb, uint_t *offp)
 {
+        uint_t i;
+        uint64_t addr;
         xhci_transfer_t *xt;
 
         ASSERT(xhcip != NULL);
         ASSERT(offp != NULL);
         ASSERT(xep != NULL);
@@ -920,15 +924,45 @@
         ASSERT(MUTEX_HELD(&xhcip->xhci_lock));
 
         if ((xt = list_head(&xep->xep_transfers)) == NULL)
                 return (NULL);
 
-        *offp = xhci_ring_trb_valid_range(&xep->xep_ring, LE_64(trb->trb_addr),
-            xt->xt_ntrbs);
-        if (*offp == -1)
+        addr = LE_64(trb->trb_addr);
+
+        /*
+         * Check if this is the simple case of an event data. If it is, then all
+         * we need to do is look and see its data matches the address of the
+         * transfer.
+         */
+        if (XHCI_TRB_GET_ED(LE_32(trb->trb_flags)) != 0) {
+                if (LE_64(trb->trb_addr) != (uintptr_t)xt)
                 return (NULL);
+
+                *offp = xt->xt_ntrbs - 1;
         return (xt);
+        }
+
+        /*
+         * This represents an error that has occurred. We need to check two
+         * different things. The first is that the TRB PA maps to one of the
+         * TRBs in the transfer. Secondly, we need to make sure that it makes
+         * sense in the context of the ring and our notion of where the tail is.
+         */
+        for (i = 0; i < xt->xt_ntrbs; i++) {
+                if (xt->xt_trbs_pa[i] == addr)
+                        break;
+        }
+
+        if (i == xt->xt_ntrbs)
+                return (NULL);
+
+        if (xhci_ring_trb_valid_range(&xep->xep_ring, LE_64(trb->trb_addr),
+            xt->xt_ntrbs) == -1)
+                return (NULL);
+
+        *offp = i;
+        return (xt);
 }
 
 static void
 xhci_endpoint_reschedule_periodic(xhci_t *xhcip, xhci_device_t *xd,
     xhci_endpoint_t *xep, xhci_transfer_t *xt)
@@ -993,11 +1027,11 @@
  * nothing to do. We'll update everything and call back the framework once we
  * get the status stage.
  */
 static boolean_t
 xhci_endpoint_control_callback(xhci_t *xhcip, xhci_device_t *xd,
-    xhci_endpoint_t *xep, xhci_transfer_t *xt, int off, xhci_trb_t *trb)
+    xhci_endpoint_t *xep, xhci_transfer_t *xt, uint_t off, xhci_trb_t *trb)
 {
         int code;
         usb_ctrl_req_t *ucrp;
         xhci_transfer_t *rem;
 
@@ -1007,14 +1041,13 @@
         ucrp = (usb_ctrl_req_t *)xt->xt_usba_req;
 
         /*
          * Now that we know what this TRB is for, was it for a data/normal stage
          * or is it the status stage. We cheat by looking at the last entry. If
-         * it's a data stage, then we must have gotten a short write. In that
-         * case, we should go through and check to make sure it's allowed. If
-         * not, we need to fail the transfer, try to stop the ring, and make
-         * callbacks. We'll clean up the xhci transfer at this time.
+         * it's a data stage, then we must have gotten a short write. We record
+         * this fact and whether we should consider the transfer fatal for the
+         * subsequent status stage.
          */
         if (off != xt->xt_ntrbs - 1) {
                 uint_t remain;
                 usb_ctrl_req_t *ucrp = (usb_ctrl_req_t *)xt->xt_usba_req;
 
@@ -1148,11 +1181,11 @@
  * Handle things which consist solely of normal tranfers, in other words, bulk
  * and interrupt transfers.
  */
 static boolean_t
 xhci_endpoint_norm_callback(xhci_t *xhcip, xhci_device_t *xd,
-    xhci_endpoint_t *xep, xhci_transfer_t *xt, int off, xhci_trb_t *trb)
+    xhci_endpoint_t *xep, xhci_transfer_t *xt, uint_t off, xhci_trb_t *trb)
 {
         int code;
         usb_cr_t cr;
         xhci_transfer_t *rem;
         int attrs;
@@ -1165,14 +1198,20 @@
             xep->xep_type == USB_EP_ATTR_INTR);
 
         code = XHCI_TRB_GET_CODE(LE_32(trb->trb_status));
 
         if (code == XHCI_CODE_SHORT_XFER) {
-                int residue;
+                uint_t residue;
                 residue = XHCI_TRB_REMAIN(LE_32(trb->trb_status));
+
+                if (xep->xep_type == USB_EP_ATTR_BULK) {
+                        VERIFY3U(XHCI_TRB_GET_ED(LE_32(trb->trb_flags)), !=, 0);
+                        xt->xt_short = residue;
+                } else {
                 xt->xt_short = xt->xt_buffer.xdb_len - residue;
         }
+        }
 
         /*
          * If we have an interrupt from something that's not the last entry,
          * that must mean we had a short transfer, so there's nothing more for
          * us to do at the moment. We won't call back until everything's
@@ -1236,11 +1275,15 @@
                 mp->b_wptr += len;
         }
         cr = USB_CR_OK;
 
 out:
-        VERIFY(xhci_ring_trb_consumed(&xep->xep_ring, LE_64(trb->trb_addr)));
+        /*
+         * Don't use the address from the TRB here. When we're dealing with
+         * event data that will be entirely wrong.
+         */
+        VERIFY(xhci_ring_trb_consumed(&xep->xep_ring, xt->xt_trbs_pa[off]));
         rem = list_remove_head(&xep->xep_transfers);
         VERIFY3P(rem, ==, xt);
         mutex_exit(&xhcip->xhci_lock);
 
         usba_hcdi_cb(xep->xep_pipe, urp, cr);
@@ -1253,11 +1296,11 @@
         return (B_TRUE);
 }
 
 static boolean_t
 xhci_endpoint_isoch_callback(xhci_t *xhcip, xhci_device_t *xd,
-    xhci_endpoint_t *xep, xhci_transfer_t *xt, int off, xhci_trb_t *trb)
+    xhci_endpoint_t *xep, xhci_transfer_t *xt, uint_t off, xhci_trb_t *trb)
 {
         int code;
         usb_cr_t cr;
         xhci_transfer_t *rem;
         usb_isoc_pkt_descr_t *desc;
@@ -1343,20 +1386,55 @@
 
 boolean_t
 xhci_endpoint_transfer_callback(xhci_t *xhcip, xhci_trb_t *trb)
 {
         boolean_t ret;
-        int slot, endpoint, code, off;
+        int slot, endpoint, code;
+        uint_t off;
         xhci_device_t *xd;
         xhci_endpoint_t *xep;
         xhci_transfer_t *xt;
         boolean_t transfer_done;
 
         endpoint = XHCI_TRB_GET_EP(LE_32(trb->trb_flags));
         slot = XHCI_TRB_GET_SLOT(LE_32(trb->trb_flags));
         code = XHCI_TRB_GET_CODE(LE_32(trb->trb_status));
 
+        switch (code) {
+        case XHCI_CODE_RING_UNDERRUN:
+        case XHCI_CODE_RING_OVERRUN:
+                /*
+                 * If we have an ISOC overrun or underrun then there will be no
+                 * valid data pointer in the TRB associated with it. Just drive
+                 * on.
+                 */
+                return (B_TRUE);
+        case XHCI_CODE_UNDEFINED:
+                xhci_error(xhcip, "received transfer trb with undefined fatal "
+                    "error: resetting device");
+                xhci_fm_runtime_reset(xhcip);
+                return (B_FALSE);
+        case XHCI_CODE_XFER_STOPPED:
+        case XHCI_CODE_XFER_STOPINV:
+        case XHCI_CODE_XFER_STOPSHORT:
+                /*
+                 * This causes us to transition the endpoint to a stopped state.
+                 * Each of these indicate a different possible state that we
+                 * have to deal with. Effectively we're going to drop it and
+                 * leave it up to the consumers to figure out what to do. For
+                 * the moment, that's generally okay because stops are only used
+                 * in cases where we're cleaning up outstanding reqs, etc.
+                 *
+                 * We do this before we check for the corresponding transfer as
+                 * this will generally be generated by a command issued that's
+                 * stopping the ring.
+                 */
+                return (B_TRUE);
+        default:
+                break;
+        }
+
         mutex_enter(&xhcip->xhci_lock);
         xd = xhci_device_lookup_by_slot(xhcip, slot);
         if (xd == NULL) {
                 xhci_error(xhcip, "received transfer trb with code %d for "
                     "unknown slot %d and endpoint %d: resetting device", code,
@@ -1379,40 +1457,40 @@
                 xhci_fm_runtime_reset(xhcip);
                 return (B_FALSE);
         }
 
         /*
-         * This TRB should be part of a transfer. If it's not, then we ignore
-         * it. We also check whether or not it's for the first transfer. Because
-         * the rings are serviced in order, it should be.
+         * The TRB that we recieved may be an event data TRB for a bulk
+         * endpoint, a normal or short completion for any other endpoint or an
+         * error. In all cases, we need to figure out what transfer this
+         * corresponds to. If this is an error, then we need to make sure that
+         * the generating ring has been cleaned up.
+         *
+         * TRBs should be delivered in order, based on the ring. If for some
+         * reason we find something that doesn't add up here, then we need to
+         * assume that something has gone horribly wrong in the system and issue
+         * a runtime reset. We issue the runtime reset rather than just trying
+         * to stop and flush the ring, because it's unclear if we could stop
+         * the ring in time.
          */
         if ((xt = xhci_endpoint_determine_transfer(xhcip, xep, trb, &off)) ==
             NULL) {
+                xhci_error(xhcip, "received transfer trb with code %d, slot "
+                    "%d, and endpoint %d, but does not match current transfer "
+                    "for endpoint: resetting device", code, slot, endpoint);
                 mutex_exit(&xhcip->xhci_lock);
-                return (B_TRUE);
+                xhci_fm_runtime_reset(xhcip);
+                return (B_FALSE);
         }
 
         transfer_done = B_FALSE;
 
         switch (code) {
         case XHCI_CODE_SUCCESS:
         case XHCI_CODE_SHORT_XFER:
                 /* Handled by endpoint logic */
                 break;
-        case XHCI_CODE_XFER_STOPPED:
-        case XHCI_CODE_XFER_STOPINV:
-        case XHCI_CODE_XFER_STOPSHORT:
-                /*
-                 * This causes us to transition the endpoint to a stopped state.
-                 * Each of these indicate a different possible state that we
-                 * have to deal with. Effectively we're going to drop it and
-                 * leave it up to the consumers to figure out what to do. For
-                 * the moment, that's generally okay because stops are only used
-                 * in cases where we're cleaning up outstanding reqs, etc.
-                 */
-                mutex_exit(&xhcip->xhci_lock);
-                return (B_TRUE);
         case XHCI_CODE_STALL:
                 /*
                  * This causes us to transition to the halted state;
                  * however, downstream clients are able to handle this just
                  * fine.
@@ -1430,19 +1508,31 @@
         case XHCI_CODE_SPLITERR:
                 transfer_done = B_TRUE;
                 xt->xt_cr = USB_CR_DEV_NOT_RESP;
                 xep->xep_state |= XHCI_ENDPOINT_HALTED;
                 break;
+        case XHCI_CODE_BW_OVERRUN:
+                transfer_done = B_TRUE;
+                xt->xt_cr = USB_CR_DATA_OVERRUN;
+                break;
+        case XHCI_CODE_DATA_BUF:
+                transfer_done = B_TRUE;
+                if (xt->xt_data_tohost)
+                        xt->xt_cr = USB_CR_DATA_OVERRUN;
+                else
+                        xt->xt_cr = USB_CR_DATA_UNDERRUN;
+                break;
         default:
                 /*
                  * Treat these as general unspecified errors that don't cause a
                  * stop of the ring. Even if it does, a subsequent timeout
                  * should occur which causes us to end up dropping a pipe reset
                  * or at least issuing a reset of the device as part of
                  * quiescing.
                  */
                 transfer_done = B_TRUE;
+                xt->xt_cr = USB_CR_HC_HARDWARE_ERR;
                 break;
         }
 
         if (transfer_done == B_TRUE) {
                 xhci_transfer_t *alt;