Print this page
MFV: illumos-gate@2aba3acda67326648fd60aaf2bfb4e18ee8c04ed
9816 Multi-TRB xhci transfers should use event data
9817 xhci needs to always set slot context
8550 increase xhci bulk transfer sgl count
9818 xhci_transfer_get_tdsize can return values that are too large
Reviewed by: Alex Wilson <alex.wilson@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Joshua M. Clulow <josh@sysmgr.org>
Author: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/usb/hcd/xhci/xhci_dma.c
          +++ new/usr/src/uts/common/io/usb/hcd/xhci/xhci_dma.c
↓ open down ↓ 2 lines elided ↑ open up ↑
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13      - * Copyright 2016 Joyent, Inc.
       13 + * Copyright (c) 2018, Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * xHCI DMA Management Routines
  18   18   *
  19   19   * Please see the big theory statement in xhci.c for more information.
  20   20   */
  21   21  
  22   22  #include <sys/usb/hcd/xhci/xhci.h>
  23   23  
↓ open down ↓ 237 lines elided ↑ open up ↑
 261  261  
 262  262  void
 263  263  xhci_transfer_free(xhci_t *xhcip, xhci_transfer_t *xt)
 264  264  {
 265  265          if (xt == NULL)
 266  266                  return;
 267  267  
 268  268          VERIFY(xhcip != NULL);
 269  269          xhci_dma_free(&xt->xt_buffer);
 270  270          if (xt->xt_isoc != NULL) {
 271      -                ASSERT(xt->xt_ntrbs > 0);
      271 +                ASSERT3U(xt->xt_ntrbs, >, 0);
 272  272                  kmem_free(xt->xt_isoc, sizeof (usb_isoc_pkt_descr_t) *
 273  273                      xt->xt_ntrbs);
 274  274                  xt->xt_isoc = NULL;
 275  275          }
 276  276          if (xt->xt_trbs != NULL) {
 277      -                ASSERT(xt->xt_ntrbs > 0);
      277 +                ASSERT3U(xt->xt_ntrbs, >, 0);
 278  278                  kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * xt->xt_ntrbs);
 279  279                  xt->xt_trbs = NULL;
 280  280          }
      281 +        if (xt->xt_trbs_pa != NULL) {
      282 +                ASSERT3U(xt->xt_ntrbs, >, 0);
      283 +                kmem_free(xt->xt_trbs_pa, sizeof (uint64_t) * xt->xt_ntrbs);
      284 +                xt->xt_trbs_pa = NULL;
      285 +        }
 281  286          kmem_free(xt, sizeof (xhci_transfer_t));
 282  287  }
 283  288  
 284  289  xhci_transfer_t *
 285      -xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size, int trbs,
 286      -    int usb_flags)
      290 +xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size,
      291 +    uint_t trbs, int usb_flags)
 287  292  {
 288  293          int kmflags;
 289  294          boolean_t dmawait;
 290  295          xhci_transfer_t *xt;
 291  296          ddi_device_acc_attr_t acc;
 292  297          ddi_dma_attr_t attr;
 293  298  
 294  299          if (usb_flags & USB_FLAGS_SLEEP) {
 295  300                  kmflags = KM_SLEEP;
 296  301                  dmawait = B_TRUE;
↓ open down ↓ 15 lines elided ↑ open up ↑
 312  317                   * However, for control transfers, we currently opt to keep
 313  318                   * things a bit simpler and use our default of one SGL.  There's
 314  319                   * no good technical reason for this, rather it just keeps
 315  320                   * things a bit easier.
 316  321                   *
 317  322                   * To simplify things, we don't use additional SGL entries for
 318  323                   * ISOC transfers. While this isn't the best, it isn't too far
 319  324                   * off from what ehci and co. have done before. If this becomes
 320  325                   * a technical issue, it's certainly possible to increase the
 321  326                   * SGL entry count.
      327 +                 *
      328 +                 * When we use the larger SGL count, we change our strategy for
      329 +                 * being notified. In such a case we will opt to use an event
      330 +                 * data packet. This helps deal with cases where some
      331 +                 * controllers don't properly generate events for the last entry
      332 +                 * in a TD with IOC when IOSP is set.
 322  333                   */
 323      -                if (xep->xep_type == USB_EP_ATTR_BULK)
      334 +                if (xep->xep_type == USB_EP_ATTR_BULK) {
 324  335                          sgl = XHCI_TRANSFER_DMA_SGL;
      336 +                        trbs++;
      337 +                }
 325  338  
 326  339                  xhci_dma_acc_attr(xhcip, &acc);
 327  340                  xhci_dma_transfer_attr(xhcip, &attr, sgl);
 328  341                  if (xhci_dma_alloc(xhcip, &xt->xt_buffer, &attr, &acc, B_FALSE,
 329  342                      size, dmawait) == B_FALSE) {
 330  343                          kmem_free(xt, sizeof (xhci_transfer_t));
 331  344                          return (NULL);
 332  345                  }
 333  346  
 334  347                  /*
↓ open down ↓ 4 lines elided ↑ open up ↑
 339  352                          trbs += xt->xt_buffer.xdb_ncookies;
 340  353          }
 341  354  
 342  355          xt->xt_trbs = kmem_zalloc(sizeof (xhci_trb_t) * trbs, kmflags);
 343  356          if (xt->xt_trbs == NULL) {
 344  357                  xhci_dma_free(&xt->xt_buffer);
 345  358                  kmem_free(xt, sizeof (xhci_transfer_t));
 346  359                  return (NULL);
 347  360          }
 348  361  
      362 +        xt->xt_trbs_pa = kmem_zalloc(sizeof (uint64_t) * trbs, kmflags);
      363 +        if (xt->xt_trbs_pa == NULL) {
      364 +                kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs);
      365 +                xhci_dma_free(&xt->xt_buffer);
      366 +                kmem_free(xt, sizeof (xhci_transfer_t));
      367 +                return (NULL);
      368 +        }
      369 +
 349  370          /*
 350  371           * For ISOCH transfers, we need to also allocate the results data.
 351  372           */
 352  373          if (xep->xep_type == USB_EP_ATTR_ISOCH) {
 353  374                  xt->xt_isoc = kmem_zalloc(sizeof (usb_isoc_pkt_descr_t) * trbs,
 354  375                      kmflags);
 355  376                  if (xt->xt_isoc == NULL) {
      377 +                        kmem_free(xt->xt_trbs_pa, sizeof (uint64_t) * trbs);
 356  378                          kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs);
 357  379                          xhci_dma_free(&xt->xt_buffer);
 358  380                          kmem_free(xt, sizeof (xhci_transfer_t));
 359  381                          return (NULL);
 360  382                  }
 361  383          }
 362  384  
 363  385          xt->xt_ntrbs = trbs;
 364  386          xt->xt_cr = USB_CR_OK;
 365  387  
↓ open down ↓ 29 lines elided ↑ open up ↑
 395  417   * 4.11.2.4. While it might be tempting to just try and calculate the number of
 396  418   * packets based on simple rounding of the remaining number of bytes, that
 397  419   * misses a critical problem -- DMA boundaries may cause us to need additional
 398  420   * packets that are missed initially. Consider a transfer made up of four
 399  421   * different DMA buffers sized in bytes: 4096, 4096, 256, 256, with a 512 byte
 400  422   * packet size.
 401  423   *
 402  424   * Remain       4608    512     256     0
 403  425   * Bytes        4096    4096    256     256
 404  426   * Naive TD     9       1       1       0
 405      - * Act TD       10      2       1       0
      427 + * Act TD       10      2       1       0
 406  428   *
 407  429   * This means that the only safe way forward here is to work backwards and see
 408  430   * how many we need to work up to this point.
 409  431   */
 410      -static int
      432 +static uint_t
 411  433  xhci_transfer_get_tdsize(xhci_transfer_t *xt, uint_t off, uint_t mps)
 412  434  {
 413  435          int i;
 414  436          uint_t npkt = 0;
 415  437  
 416  438          /*
 417  439           * There are always zero packets for the last TRB.
 418  440           */
 419  441          ASSERT(xt->xt_buffer.xdb_ncookies > 0);
 420  442          for (i = xt->xt_buffer.xdb_ncookies - 1; i > off; i--) {
 421      -                size_t len;
 422      -
 423      -                /*
 424      -                 * The maximum value we can return is 31 packets. So, in that
 425      -                 * case we short-circuit and return.
 426      -                 */
 427      -                if (npkt >= 31)
 428      -                        return (31);
 429      -
 430      -                len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size, mps);
      443 +                size_t len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size,
      444 +                    mps);
 431  445                  npkt += len / mps;
 432  446          }
 433  447  
      448 +        /*
      449 +         * Make sure to clamp this value otherwise we risk truncation.
      450 +         */
      451 +        if (npkt >= XHCI_MAX_TDSIZE)
      452 +                return (XHCI_MAX_TDSIZE);
      453 +
 434  454          return (npkt);
 435  455  }
 436  456  
 437  457  void
 438  458  xhci_transfer_trb_fill_data(xhci_endpoint_t *xep, xhci_transfer_t *xt, int off,
 439  459      boolean_t in)
 440  460  {
 441  461          uint_t mps, tdsize, flags;
 442  462          int i;
 443  463  
 444  464          VERIFY(xt->xt_buffer.xdb_ncookies > 0);
 445  465          VERIFY(xep->xep_pipe != NULL);
 446  466          VERIFY(off + xt->xt_buffer.xdb_ncookies <= xt->xt_ntrbs);
 447  467          mps = xep->xep_pipe->p_ep.wMaxPacketSize;
 448  468  
      469 +        if (in == B_TRUE) {
      470 +                xt->xt_data_tohost = B_TRUE;
      471 +        }
      472 +
      473 +        /*
      474 +         * We assume that if we have a non-bulk endpoint, then we should only
      475 +         * have a single cookie. This falls out from the default SGL length that
      476 +         * we use for these other device types.
      477 +         */
      478 +        if (xep->xep_type != USB_EP_ATTR_BULK) {
      479 +                VERIFY3U(xt->xt_buffer.xdb_ncookies, ==, 1);
      480 +        }
      481 +
 449  482          for (i = 0; i < xt->xt_buffer.xdb_ncookies; i++) {
 450  483                  uint64_t pa, dmasz;
 451  484  
 452  485                  pa = xt->xt_buffer.xdb_cookies[i].dmac_laddress;
 453  486                  dmasz = xt->xt_buffer.xdb_cookies[i].dmac_size;
 454  487  
 455  488                  tdsize = xhci_transfer_get_tdsize(xt, i, mps);
 456  489  
 457  490                  flags = XHCI_TRB_TYPE_NORMAL;
 458  491                  if (i == 0 && xep->xep_type == USB_EP_ATTR_CONTROL) {
 459  492                          flags = XHCI_TRB_TYPE_DATA;
 460  493                          if (in == B_TRUE)
 461  494                                  flags |= XHCI_TRB_DIR_IN;
 462  495                  }
 463  496  
 464  497                  /*
 465      -                 * When reading data in (from the device), we may get shorter
 466      -                 * transfers than the buffer allowed for. To make sure we get
 467      -                 * notified about that and handle that, we need to set the ISP
 468      -                 * flag.
      498 +                 * If we have more than one cookie, then we need to set chaining
      499 +                 * on every TRB and the last TRB will turn into an event data
      500 +                 * TRB. If we only have a single TRB, then we just set interrupt
      501 +                 * on completion (IOC). There's no need to specifically set
      502 +                 * interrupt on short packet (IOSP) in that case, as we'll
      503 +                 * always get the event notification. We still need the chain
      504 +                 * bit set on the last packet, so we can chain into the event
      505 +                 * data. Even if all the data on a bulk endpoint (the only
      506 +                 * endpoint type that uses chaining today) has only one cookie,
      507 +                 * then we'll still schedule an event data block.
 469  508                   */
 470      -                if (in == B_TRUE) {
 471      -                        flags |= XHCI_TRB_ISP;
 472      -                        xt->xt_data_tohost = B_TRUE;
 473      -                }
 474      -
 475      -                /*
 476      -                 * When we have more than one cookie, we are technically
 477      -                 * chaining together things according to the controllers view,
 478      -                 * hence why we need to set the chain flag.
 479      -                 */
 480      -                if (xt->xt_buffer.xdb_ncookies > 1 &&
 481      -                    i != (xt->xt_buffer.xdb_ncookies - 1)) {
      509 +                if (xep->xep_type == USB_EP_ATTR_BULK ||
      510 +                    xt->xt_buffer.xdb_ncookies > 1) {
 482  511                          flags |= XHCI_TRB_CHAIN;
 483  512                  }
 484  513  
 485  514                  /*
 486      -                 * If we have a non-control transfer, then we need to make sure
 487      -                 * that we set ourselves up to be interrupted, which we set for
 488      -                 * the last entry.
      515 +                 * What we set for the last TRB depends on the type of the
      516 +                 * endpoint. If it's a bulk endpoint, then we have to set
      517 +                 * evaluate next trb (ENT) so we successfully process the event
      518 +                 * data TRB we'll set up. Otherwise, we need to make sure that
      519 +                 * we set interrupt on completion, so we get the event. However,
      520 +                 * we don't set the event on control endpoints, as the status
      521 +                 * stage TD will be the one where we get the event. But, we do
      522 +                 * still need an interrupt on short packet, because technically
      523 +                 * the status stage is in its own TD.
 489  524                   */
 490      -                if (i + 1 == xt->xt_buffer.xdb_ncookies &&
 491      -                    xep->xep_type != USB_EP_ATTR_CONTROL) {
 492      -                        flags |= XHCI_TRB_IOC;
      525 +                if (i + 1 == xt->xt_buffer.xdb_ncookies) {
      526 +                        switch (xep->xep_type) {
      527 +                        case USB_EP_ATTR_BULK:
      528 +                                flags |= XHCI_TRB_ENT;
      529 +                                break;
      530 +                        case USB_EP_ATTR_CONTROL:
      531 +                                flags |= XHCI_TRB_ISP;
      532 +                                break;
      533 +                        default:
      534 +                                flags |= XHCI_TRB_IOC;
      535 +                                break;
      536 +                        }
 493  537                  }
 494  538  
 495  539                  xt->xt_trbs[off + i].trb_addr = LE_64(pa);
 496  540                  xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_LEN(dmasz) |
 497  541                      XHCI_TRB_TDREM(tdsize) | XHCI_TRB_INTR(0));
 498  542                  xt->xt_trbs[off + i].trb_flags = LE_32(flags);
 499  543          }
      544 +
      545 +        /*
      546 +         * The last TRB in any bulk transfer is the Event Data TRB.
      547 +         */
      548 +        if (xep->xep_type == USB_EP_ATTR_BULK) {
      549 +                VERIFY(off + xt->xt_buffer.xdb_ncookies + 1 <= xt->xt_ntrbs);
      550 +                xt->xt_trbs[off + i].trb_addr = LE_64((uintptr_t)xt);
      551 +                xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_INTR(0));
      552 +                xt->xt_trbs[off + i].trb_flags = LE_32(XHCI_TRB_TYPE_EVENT |
      553 +                    XHCI_TRB_IOC);
      554 +        }
 500  555  }
 501  556  
 502  557  /*
 503  558   * These are utility functions for isochronus transfers to help calculate the
 504  559   * transfer burst count (TBC) and transfer last burst packet count (TLPBC)
 505  560   * entries for an isochronus entry. See xHCI 1.1 / 4.11.2.3 for how to calculate
 506  561   * them.
 507  562   */
 508  563  void
 509  564  xhci_transfer_calculate_isoc(xhci_device_t *xd, xhci_endpoint_t *xep,
↓ open down ↓ 26 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX