1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2018, Joyent, Inc.
  14  */
  15 
  16 /*
  17  * -----------------------------
  18  * xHCI Ring Management Routines
  19  * -----------------------------
  20  *
  21  * There are three major different types of rings for xHCI, these are:
  22  *
  23  * 1) Command Rings
  24  * 2) Event Rings
  25  * 3) Transfer Rings
  26  *
  27  * Command and Transfer rings function in similar ways while the event rings are
  28  * different. The difference comes in who is the consumer and who is the
  29  * producer. In the case of command and transfer rings, the driver is the
  30  * producer. For the event ring the driver is the consumer.
  31  *
  32  * Each ring in xhci has a synthetic head and tail register. Each entry in a
  33  * ring has a bit that's often referred to as the 'Cycle bit'. The cycle bit is
  34  * toggled as a means of saying that a given entry needs to be consumed.
  35  *
  36  * When a ring is created, all of the data in it is initialized to zero and the
  37  * producer and consumer agree that when the cycle bit is toggled, the ownership
  38  * of the entry is transfered from the producer to the consumer.  For example,
  39  * the command ring defaults to saying that a cycle bit of one is what indicates
  40  * the command is owned by the hardware. So as the driver (the producer) fills
  41  * in entries, the driver toggles the cycle bit from 0->1 as part of writing out
  42  * the TRB.  When the command ring's doorbell is rung, the hardware (the
  43  * consumer) begins processing commands. It will process them until one of two
  44  * things happens:
  45  *
  46  * 1) The hardware encounters an entry with the old cycle bit (0 in this case)
  47  *
  48  * 2) The hardware hits the last entry in the ring which is a special kind of
  49  * entry called a LINK TRB.
  50  *
  51  * A LINK TRB has two purposes:
  52  *
  53  * 1) Indicate where processing should be redirected. This can potentially be to
  54  * another memory segment; however, this driver always programs LINK TRBs to
  55  * point back to the start of the ring.
  56  *
  57  * 2) Indicate whether or not the cycle bit should be changed. We always
  58  * indicate that the cycle bit should be toggled when a LINK TRB is processed.
  59  *
  60  * In this same example, whereas the driver (the producer) would be setting the
  61  * cycle to 1 to indicate that an entry is to be processed, the driver would now
  62  * set it to 0. Similarly, the hardware (the consumer) would be looking for a
  63  * 0 to determine whether or not it should process the entry.
  64  *
  65  * Currently, when the driver allocates rings, it always allocates a single page
  66  * for the ring. The entire page is dedicated to ring use, which is determined
  67  * based on the devices PAGESIZE register. The last entry in a given page is
  68  * always configured as a LINK TRB. As each entry in a ring is 16 bytes, this
  69  * gives us an average of 255 usable descriptors on x86 and 511 on SPARC, as
  70  * PAGESIZE is 4k and 8k respectively.
  71  *
  72  * The driver is always the producer for all rings except for the event ring,
  73  * where it is the consumer.
  74  *
  75  * ----------------------
  76  * Head and Tail Pointers
  77  * ----------------------
  78  *
  79  * Now, while we have the cycle bits for the ring explained, we still need to
  80  * keep track of what we consider the head and tail pointers, what the xHCI
  81  * specification calls enqueue (head) and dequeue (tail) pointers. Now, in all
  82  * the cases here, the actual tracking of the head pointer is basically done by
  83  * the cycle bit; however, we maintain an actual offset in the xhci_ring_t
  84  * structure. The tail is usually less synthetic; however, it's up for different
  85  * folks to maintain it.
  86  *
  87  * We handle the command and transfer rings the same way. The head pointer
  88  * indicates where we should insert the next TRB to transfer. The tail pointer
  89  * indicates the last thing that hardware has told us it has processed. If the
  90  * head and tail point to the same index, then we know the ring is empty.
  91  *
  92  * We increment the head pointer whenever we insert an entry. Note that we do
  93  * not tell hardware about this in any way, it's just maintained by the cycle
  94  * bit. Then, we keep track of what hardware has processed in our tail pointer,
  95  * incrementing it only when we have an interrupt that indicates that it's been
  96  * processed.
  97  *
  98  * One oddity here is that we only get notified of this via the event ring. So
  99  * when the event ring encounters this information, it needs to go back and
 100  * increment our command and transfer ring tails after processing events.
 101  *
 102  * For the event ring, we handle things differently. We still initialize
 103  * everything to zero; however, we start processing things and looking at cycle
 104  * bits only when we get an interrupt from hardware. With the event ring, we do
 105  * *not* maintain a head pointer (it's still in the structure, but unused).  We
 106  * always start processing at the tail pointer and use the cycle bit to indicate
 107  * what we should process. Once we're done incrementing things, we go and notify
 108  * the hardware of how far we got with this process by updating the tail for the
 109  * event ring via a memory mapped register.
 110  */
 111 
 112 #include <sys/usb/hcd/xhci/xhci.h>
 113 
 114 void
 115 xhci_ring_free(xhci_ring_t *xrp)
 116 {
 117         if (xrp->xr_trb != NULL) {
 118                 xhci_dma_free(&xrp->xr_dma);
 119                 xrp->xr_trb = NULL;
 120         }
 121         xrp->xr_ntrb = 0;
 122         xrp->xr_head = 0;
 123         xrp->xr_tail = 0;
 124         xrp->xr_cycle = 0;
 125 }
 126 
 127 /*
 128  * Initialize a ring that hasn't been used and set up its link pointer back to
 129  * it.
 130  */
 131 int
 132 xhci_ring_reset(xhci_t *xhcip, xhci_ring_t *xrp)
 133 {
 134         xhci_trb_t *ltrb;
 135 
 136         ASSERT(xrp->xr_trb != NULL);
 137 
 138         bzero(xrp->xr_trb, sizeof (xhci_trb_t) * xrp->xr_ntrb);
 139         xrp->xr_head = 0;
 140         xrp->xr_tail = 0;
 141         xrp->xr_cycle = 1;
 142 
 143         /*
 144          * Set up the link TRB back to ourselves.
 145          */
 146         ltrb = &xrp->xr_trb[xrp->xr_ntrb - 1];
 147         ltrb->trb_addr = LE_64(xhci_dma_pa(&xrp->xr_dma));
 148         ltrb->trb_flags = LE_32(XHCI_TRB_TYPE_LINK | XHCI_TRB_LINKSEG);
 149 
 150         XHCI_DMA_SYNC(xrp->xr_dma, DDI_DMA_SYNC_FORDEV);
 151         if (xhci_check_dma_handle(xhcip, &xrp->xr_dma) != DDI_FM_OK) {
 152                 ddi_fm_service_impact(xhcip->xhci_dip, DDI_SERVICE_LOST);
 153                 return (EIO);
 154         }
 155 
 156         return (0);
 157 }
 158 
 159 int
 160 xhci_ring_alloc(xhci_t *xhcip, xhci_ring_t *xrp)
 161 {
 162         ddi_dma_attr_t attr;
 163         ddi_device_acc_attr_t acc;
 164 
 165         /*
 166          * We use a transfer attribute for the rings as they require 64-byte
 167          * boundaries.
 168          */
 169         xhci_dma_acc_attr(xhcip, &acc);
 170         xhci_dma_transfer_attr(xhcip, &attr, XHCI_DEF_DMA_SGL);
 171         bzero(xrp, sizeof (xhci_ring_t));
 172         if (xhci_dma_alloc(xhcip, &xrp->xr_dma, &attr, &acc, B_FALSE,
 173             xhcip->xhci_caps.xcap_pagesize, B_FALSE) == B_FALSE)
 174                 return (ENOMEM);
 175         xrp->xr_trb = (xhci_trb_t *)xrp->xr_dma.xdb_va;
 176         xrp->xr_ntrb = xhcip->xhci_caps.xcap_pagesize / sizeof (xhci_trb_t);
 177         return (0);
 178 }
 179 
 180 /*
 181  * Note, caller should have already synced our DMA memory. This should not be
 182  * used for the command ring, as its cycle is maintained by the cycling of the
 183  * head. This function is only used for managing the event ring.
 184  */
 185 xhci_trb_t *
 186 xhci_ring_event_advance(xhci_ring_t *xrp)
 187 {
 188         xhci_trb_t *trb = &xrp->xr_trb[xrp->xr_tail];
 189         VERIFY(xrp->xr_tail < xrp->xr_ntrb);
 190 
 191         if (xrp->xr_cycle != (LE_32(trb->trb_flags) & XHCI_TRB_CYCLE))
 192                 return (NULL);
 193 
 194         /*
 195          * The event ring does not use a link TRB. It instead always uses
 196          * information based on the table to wrap. That means that the last
 197          * entry is in fact going to contain data, so we shouldn't wrap and
 198          * toggle the cycle until after we've processed that, in other words the
 199          * tail equals the total number of entries.
 200          */
 201         xrp->xr_tail++;
 202         if (xrp->xr_tail == xrp->xr_ntrb) {
 203                 xrp->xr_cycle ^= 1;
 204                 xrp->xr_tail = 0;
 205         }
 206 
 207         return (trb);
 208 }
 209 
 210 /*
 211  * When processing the command ring, we're going to get a single event for each
 212  * entry in it. As we've submitted things in order, we need to make sure that
 213  * this address matches the DMA address that we'd expect of the current tail.
 214  */
 215 boolean_t
 216 xhci_ring_trb_tail_valid(xhci_ring_t *xrp, uint64_t dma)
 217 {
 218         uint64_t tail;
 219 
 220         tail = xhci_dma_pa(&xrp->xr_dma) + xrp->xr_tail * sizeof (xhci_trb_t);
 221         return (dma == tail);
 222 }
 223 
 224 /*
 225  * A variant on the above that checks for a given message within a range of
 226  * entries and returns the offset to it from the tail.
 227  */
 228 int
 229 xhci_ring_trb_valid_range(xhci_ring_t *xrp, uint64_t dma, uint_t range)
 230 {
 231         uint_t i;
 232         uint_t tail = xrp->xr_tail;
 233         uint64_t taddr;
 234 
 235         VERIFY(range < xrp->xr_ntrb);
 236         for (i = 0; i < range; i++) {
 237                 taddr = xhci_dma_pa(&xrp->xr_dma) + tail * sizeof (xhci_trb_t);
 238                 if (taddr == dma)
 239                         return (i);
 240 
 241                 tail++;
 242                 if (tail == xrp->xr_ntrb - 1)
 243                         tail = 0;
 244         }
 245 
 246         return (-1);
 247 }
 248 
 249 /*
 250  * Determine whether or not we have enough space for this request in a given
 251  * ring for the given request. Note, we have to be a bit careful here and ensure
 252  * that we properly handle cases where we cross the link TRB and that we don't
 253  * count it.
 254  *
 255  * To determine if we have enough space for a given number of trbs, we need to
 256  * logically advance the head pointer and make sure that we don't cross the tail
 257  * pointer. In other words, if after advancement, head == tail, we're in
 258  * trouble and don't have enough space.
 259  */
 260 boolean_t
 261 xhci_ring_trb_space(xhci_ring_t *xrp, uint_t ntrb)
 262 {
 263         uint_t i;
 264         uint_t head = xrp->xr_head;
 265 
 266         VERIFY(ntrb > 0);
 267         /* We use < to ignore the link TRB */
 268         VERIFY(ntrb < xrp->xr_ntrb);
 269 
 270         for (i = 0; i < ntrb; i++) {
 271                 head++;
 272                 if (head == xrp->xr_ntrb - 1) {
 273                         head = 0;
 274                 }
 275 
 276                 if (head == xrp->xr_tail)
 277                         return (B_FALSE);
 278         }
 279 
 280         return (B_TRUE);
 281 }
 282 
 283 /*
 284  * Fill in a TRB in the ring at offset trboff. If cycle is currently set to
 285  * B_TRUE, then we fill in the appropriate cycle bit to tell the system to
 286  * advance, otherwise we leave the existing cycle bit untouched so the system
 287  * doesn't accidentally advance until we have everything filled in.
 288  */
 289 void
 290 xhci_ring_trb_fill(xhci_ring_t *xrp, uint_t trboff, xhci_trb_t *host_trb,
 291     uint64_t *trb_pap, boolean_t put_cycle)
 292 {
 293         uint_t i;
 294         uint32_t flags;
 295         uint_t ent = xrp->xr_head;
 296         uint8_t cycle = xrp->xr_cycle;
 297         xhci_trb_t *trb;
 298 
 299         for (i = 0; i < trboff; i++) {
 300                 ent++;
 301                 if (ent == xrp->xr_ntrb - 1) {
 302                         ent = 0;
 303                         cycle ^= 1;
 304                 }
 305         }
 306 
 307         /*
 308          * If we're being asked to not update the cycle for it to be valid to be
 309          * produced, we need to xor this once again to get to the inappropriate
 310          * value.
 311          */
 312         if (put_cycle == B_FALSE)
 313                 cycle ^= 1;
 314 
 315         trb = &xrp->xr_trb[ent];
 316 
 317         trb->trb_addr = host_trb->trb_addr;
 318         trb->trb_status = host_trb->trb_status;
 319         flags = host_trb->trb_flags;
 320         if (cycle == 0) {
 321                 flags &= ~LE_32(XHCI_TRB_CYCLE);
 322         } else {
 323                 flags |= LE_32(XHCI_TRB_CYCLE);
 324         }
 325 
 326         trb->trb_flags = flags;
 327 
 328         if (trb_pap != NULL) {
 329                 uint64_t pa;
 330 
 331                 /*
 332                  * This logic only works if we have a single cookie address.
 333                  * However, this is prettty tightly assumed for rings through
 334                  * the xhci driver at this time.
 335                  */
 336                 ASSERT3U(xrp->xr_dma.xdb_ncookies, ==, 1);
 337                 pa = xrp->xr_dma.xdb_cookies[0].dmac_laddress;
 338                 pa += ((uintptr_t)trb - (uintptr_t)&xrp->xr_trb[0]);
 339                 *trb_pap = pa;
 340         }
 341 }
 342 
 343 /*
 344  * Update our metadata for the ring and verify the cycle bit is correctly set
 345  * for the first trb. It is expected that it is incorrectly set.
 346  */
 347 void
 348 xhci_ring_trb_produce(xhci_ring_t *xrp, uint_t ntrb)
 349 {
 350         uint_t i, ohead;
 351         xhci_trb_t *trb;
 352 
 353         VERIFY(ntrb > 0);
 354 
 355         ohead = xrp->xr_head;
 356 
 357         /*
 358          * As part of updating the head, we need to make sure we correctly
 359          * update the cycle bit of the link TRB. So we always do this first
 360          * before we update the old head, to try and get a consistent view of
 361          * the cycle bit.
 362          */
 363         for (i = 0; i < ntrb; i++) {
 364                 xrp->xr_head++;
 365                 /*
 366                  * If we're updating the link TRB, we also need to make sure
 367                  * that the Chain bit is set if we're in the middle of a TD
 368                  * comprised of multiple TRDs. Thankfully the algorithmn here is
 369                  * simple: set it to the value of the previous TRB.
 370                  */
 371                 if (xrp->xr_head == xrp->xr_ntrb - 1) {
 372                         trb = &xrp->xr_trb[xrp->xr_ntrb - 1];
 373                         if (xrp->xr_trb[xrp->xr_ntrb - 2].trb_flags &
 374                             XHCI_TRB_CHAIN) {
 375                                 trb->trb_flags |= XHCI_TRB_CHAIN;
 376                         } else {
 377                                 trb->trb_flags &= ~XHCI_TRB_CHAIN;
 378 
 379                         }
 380                         trb->trb_flags ^= LE_32(XHCI_TRB_CYCLE);
 381                         xrp->xr_cycle ^= 1;
 382                         xrp->xr_head = 0;
 383                 }
 384         }
 385 
 386         trb = &xrp->xr_trb[ohead];
 387         trb->trb_flags ^= LE_32(XHCI_TRB_CYCLE);
 388 }
 389 
 390 /*
 391  * This is a convenience wrapper for the single TRB case to make callers less
 392  * likely to mess up some of the required semantics.
 393  */
 394 void
 395 xhci_ring_trb_put(xhci_ring_t *xrp, xhci_trb_t *trb)
 396 {
 397         xhci_ring_trb_fill(xrp, 0U, trb, NULL, B_FALSE);
 398         xhci_ring_trb_produce(xrp, 1U);
 399 }
 400 
 401 /*
 402  * Update the tail pointer for a ring based on the DMA address of a consumed
 403  * entry. Note, this entry indicates what we just processed, therefore we should
 404  * bump the tail entry to the next one.
 405  */
 406 boolean_t
 407 xhci_ring_trb_consumed(xhci_ring_t *xrp, uint64_t dma)
 408 {
 409         uint64_t pa = xhci_dma_pa(&xrp->xr_dma);
 410         uint64_t high = pa + xrp->xr_ntrb * sizeof (xhci_trb_t);
 411 
 412         if (dma < pa || dma >= high ||
 413             dma % sizeof (xhci_trb_t) != 0)
 414                 return (B_FALSE);
 415 
 416         dma -= pa;
 417         dma /= sizeof (xhci_trb_t);
 418 
 419         VERIFY(dma < xrp->xr_ntrb);
 420 
 421         xrp->xr_tail = dma + 1;
 422         if (xrp->xr_tail == xrp->xr_ntrb - 1)
 423                 xrp->xr_tail = 0;
 424 
 425         return (B_TRUE);
 426 }
 427 
 428 /*
 429  * The ring represented here has been reset and we're being asked to basically
 430  * skip all outstanding entries. Note, this shouldn't be used for the event
 431  * ring. Because the cycle bit is toggled whenever the head moves past the link
 432  * trb, the cycle bit is already correct. So in this case, it's really just a
 433  * matter of setting the current tail equal to the head, at which point we
 434  * consider things empty.
 435  */
 436 void
 437 xhci_ring_skip(xhci_ring_t *xrp)
 438 {
 439         xrp->xr_tail = xrp->xr_head;
 440 }
 441 
 442 /*
 443  * A variant on the normal skip. This basically just tells us to make sure that
 444  * that everything this transfer represents has been skipped. Callers need to
 445  * make sure that this is actually the first transfer in the ring. Like above,
 446  * we don't need to touch the cycle bit.
 447  */
 448 void
 449 xhci_ring_skip_transfer(xhci_ring_t *xrp, xhci_transfer_t *xt)
 450 {
 451         uint_t i;
 452 
 453         for (i = 0; i < xt->xt_ntrbs; i++) {
 454                 xrp->xr_tail++;
 455                 if (xrp->xr_tail == xrp->xr_ntrb - 1)
 456                         xrp->xr_tail = 0;
 457         }
 458 }