Print this page
MFV: illumos-gate@2aba3acda67326648fd60aaf2bfb4e18ee8c04ed
9816 Multi-TRB xhci transfers should use event data
9817 xhci needs to always set slot context
8550 increase xhci bulk transfer sgl count
9818 xhci_transfer_get_tdsize can return values that are too large
Reviewed by: Alex Wilson <alex.wilson@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
Author: Robert Mustacchi <rm@joyent.com>

@@ -8,11 +8,11 @@
  * source.  A copy of the CDDL is also available via the Internet at
  * http://www.illumos.org/license/CDDL.
  */
 
 /*
- * Copyright 2016 Joyent, Inc.
+ * Copyright (c) 2018, Joyent, Inc.
  */
 
 #ifndef _SYS_USB_XHCI_XHCI_H
 #define _SYS_USB_XHCI_XHCI_H
 

@@ -54,22 +54,40 @@
  * driver. However, for bulk transfers, which are the largest by far, we want to
  * be able to leverage SGLs to give us more DMA flexibility.
  *
  * We can transfer up to 64K in one transfer request block (TRB) which
  * corresponds to a single SGL entry. Each ring we create is a single page in
- * size and will support at most 256 TRBs. We've selected to use up to 8 SGLs
- * for these transfer cases. This allows us to put up to 512 KiB in a given
- * transfer request and in the worst case, we can have about 30 of them
- * outstanding. Experimentally, this has proven to be sufficient for most of the
- * drivers that we support today.
+ * size and will support at most 256 TRBs. To try and give the operating system
+ * flexibility when allocating DMA transfers, we've opted to allow up to 63
+ * SGLs. Because there isn't a good way to support DMA windows with the xHCI
+ * controller design, if this number is too small then DMA allocations and
+ * binding might fail. If the DMA binding fails, the transfer will fail.
+ *
+ * The reason that we use 63 SGLs and not the expected 64 is that we always need
+ * to allocate an additional TRB for the event data. This leaves us with a
+ * nicely divisible number of entries.
+ *
+ * The final piece of this is the maximum sized transfer that the driver
+ * advertises to the broader framework. This is currently sized at 512 KiB. For
+ * reference the ehci driver sized this value at 640 KiB. It's important to
+ * understand that this isn't reflected in the DMA attribute limitation, because
+ * it's not an attribute of the hardware. Experimentally, this has proven to be
+ * sufficient for most of the drivers that we support today. When considering
+ * increasing this number, please note the impact that might have on the
+ * required number of DMA SGL entries required to satisfy the allocation.
+ *
+ * The value of 512 KiB was originally based on the number of SGLs we supported
+ * multiplied by the maximum transfer size. The original number of
+ * XHCI_TRANSFER_DMA_SGL was 8. The 512 KiB value was based upon taking the
+ * number of SGLs and assuming that each TRB used its maximum transfer size of
+ * 64 KiB.
  */
-#define XHCI_TRB_MAX_TRANSFER   65536
+#define XHCI_TRB_MAX_TRANSFER   65536   /* 64 KiB */
 #define XHCI_DMA_ALIGN          64
 #define XHCI_DEF_DMA_SGL        1
-#define XHCI_TRANSFER_DMA_SGL   8
-#define XHCI_MAX_TRANSFER       (XHCI_TRB_MAX_TRANSFER * XHCI_TRANSFER_DMA_SGL)
-#define XHCI_DMA_STRUCT_SIZE    4096
+#define XHCI_TRANSFER_DMA_SGL   63
+#define XHCI_MAX_TRANSFER       524288  /* 512 KiB */
 
 /*
  * Properties and values for rerouting ehci ports to xhci.
  */
 #define XHCI_PROP_REROUTE_DISABLE       0

@@ -96,10 +114,17 @@
                                             (dma).xdb_dma_handle, 0, 0, \
                                             (flag)))
 #endif
 
 /*
+ * TRBs need to indicate the number of remaining USB packets in the overall
+ * transfer. This is a 5-bit value, which means that the maximum value we can
+ * store in that TRD field is 31.
+ */
+#define XHCI_MAX_TDSIZE         31
+
+/*
  * This defines a time in 2-ms ticks that is required to wait for the controller
  * to be ready to go. Section 5.4.8 of the XHCI specification in the description
  * of the PORTSC register indicates that the upper bound is 20 ms. Therefore the
  * number of ticks is 10.
  */

@@ -308,10 +333,11 @@
         uint_t                  xt_short;
         uint_t                  xt_timeout;
         usb_cr_t                xt_cr;
         boolean_t               xt_data_tohost;
         xhci_trb_t              *xt_trbs;
+        uint64_t                *xt_trbs_pa;
         usb_isoc_pkt_descr_t    *xt_isoc;
         usb_opaque_t            xt_usba_req;
 } xhci_transfer_t;
 
 /*

@@ -646,11 +672,11 @@
 
 /*
  * DMA Transfer Ring functions
  */
 extern xhci_transfer_t *xhci_transfer_alloc(xhci_t *, xhci_endpoint_t *, size_t,
-    int, int);
+    uint_t, int);
 extern void xhci_transfer_free(xhci_t *, xhci_transfer_t *);
 extern void xhci_transfer_copy(xhci_transfer_t *, void *, size_t, boolean_t);
 extern int xhci_transfer_sync(xhci_t *, xhci_transfer_t *, uint_t);
 extern void xhci_transfer_trb_fill_data(xhci_endpoint_t *, xhci_transfer_t *,
     int, boolean_t);

@@ -712,11 +738,12 @@
  */
 extern boolean_t xhci_ring_trb_tail_valid(xhci_ring_t *, uint64_t);
 extern int xhci_ring_trb_valid_range(xhci_ring_t *, uint64_t, uint_t);
 
 extern boolean_t xhci_ring_trb_space(xhci_ring_t *, uint_t);
-extern void xhci_ring_trb_fill(xhci_ring_t *, uint_t, xhci_trb_t *, boolean_t);
+extern void xhci_ring_trb_fill(xhci_ring_t *, uint_t, xhci_trb_t *, uint64_t *,
+    boolean_t);
 extern void xhci_ring_trb_produce(xhci_ring_t *, uint_t);
 extern boolean_t xhci_ring_trb_consumed(xhci_ring_t *, uint64_t);
 extern void xhci_ring_trb_put(xhci_ring_t *, xhci_trb_t *);
 extern void xhci_ring_skip(xhci_ring_t *);
 extern void xhci_ring_skip_transfer(xhci_ring_t *, xhci_transfer_t *);