1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2016 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * xHCI DMA Management Routines
  18  *
  19  * Please see the big theory statement in xhci.c for more information.
  20  */
  21 
  22 #include <sys/usb/hcd/xhci/xhci.h>
  23 
  24 int
  25 xhci_check_dma_handle(xhci_t *xhcip, xhci_dma_buffer_t *xdb)
  26 {
  27         ddi_fm_error_t de;
  28 
  29         if (!DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps))
  30                 return (0);
  31 
  32         ddi_fm_dma_err_get(xdb->xdb_dma_handle, &de, DDI_FME_VERSION);
  33         return (de.fme_status);
  34 }
  35 
  36 void
  37 xhci_dma_acc_attr(xhci_t *xhcip, ddi_device_acc_attr_t *accp)
  38 {
  39         accp->devacc_attr_version = DDI_DEVICE_ATTR_V0;
  40         accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
  41         accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC;
  42 
  43         if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) {
  44                 accp->devacc_attr_access = DDI_FLAGERR_ACC;
  45         } else {
  46                 accp->devacc_attr_access = DDI_DEFAULT_ACC;
  47         }
  48 }
  49 
  50 /*
  51  * These are DMA attributes that we assign when making a transfer. The SGL is
  52  * variable and based on the caller, which varies based on the type of transfer
  53  * we're doing.
  54  */
  55 void
  56 xhci_dma_transfer_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp, uint_t sgl)
  57 {
  58         VERIFY3U(sgl, >, 0);
  59         VERIFY3U(sgl, <=, XHCI_TRANSFER_DMA_SGL);
  60         attrp->dma_attr_version = DMA_ATTR_V0;
  61 
  62         /*
  63          * The range of data that we can use is based on what hardware supports.
  64          */
  65         attrp->dma_attr_addr_lo = 0x0;
  66         if (xhcip->xhci_caps.xcap_flags & XCAP_AC64) {
  67                 attrp->dma_attr_addr_hi = UINT64_MAX;
  68         } else {
  69                 attrp->dma_attr_addr_hi = UINT32_MAX;
  70         }
  71 
  72         /*
  73          * The count max indicates the total amount that will fit into one
  74          * cookie, which is one TRB in our world. In other words 64k.
  75          */
  76         attrp->dma_attr_count_max = XHCI_TRB_MAX_TRANSFER;
  77 
  78         /*
  79          * The alignment and segment are related. The alignment describes the
  80          * alignment of the PA. The segment describes a boundary that the DMA
  81          * allocation cannot cross. In other words, for a given chunk of memory
  82          * it cannot cross a 64-byte boundary. However, the physical address
  83          * only needs to be aligned to align bytes.
  84          */
  85         attrp->dma_attr_align = XHCI_DMA_ALIGN;
  86         attrp->dma_attr_seg = XHCI_TRB_MAX_TRANSFER - 1;
  87 
  88 
  89         attrp->dma_attr_burstsizes = 0xfff;
  90 
  91         /*
  92          * This is the maximum we can send. Technically this is limited by the
  93          * descriptors and not by hardware, hence why we use a large value for
  94          * the max that'll be less than any memory allocation we ever throw at
  95          * it.
  96          */
  97         attrp->dma_attr_minxfer = 0x1;
  98         attrp->dma_attr_maxxfer = UINT32_MAX;
  99 
 100         /*
 101          * This is determined by the caller.
 102          */
 103         attrp->dma_attr_sgllen = sgl;
 104 
 105         /*
 106          * The granularity describes the addressing granularity. e.g. can things
 107          * ask for chunks in units of this number of bytes. For PCI this should
 108          * always be one.
 109          */
 110         attrp->dma_attr_granular = 1;
 111 
 112         if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) {
 113                 attrp->dma_attr_flags = DDI_DMA_FLAGERR;
 114         } else {
 115                 attrp->dma_attr_flags = 0;
 116         }
 117 }
 118 
 119 /*
 120  * This routine tries to create DMA attributes for normal allocations for data
 121  * structures and the like. By default we use the same values as the transfer
 122  * attributes, but have explicit comments about how they're different.
 123  */
 124 void
 125 xhci_dma_dma_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp)
 126 {
 127         /*
 128          * Note, we always use a single SGL for these DMA allocations as these
 129          * are used for small data structures.
 130          */
 131         xhci_dma_transfer_attr(xhcip, attrp, XHCI_DEF_DMA_SGL);
 132 
 133         /*
 134          * The maximum size of any of these structures is 4k as opposed to the
 135          * 64K max described above. Similarly the boundary requirement is
 136          * reduced to 4k.
 137          */
 138         attrp->dma_attr_count_max = xhcip->xhci_caps.xcap_pagesize;
 139         attrp->dma_attr_maxxfer = xhcip->xhci_caps.xcap_pagesize;
 140         attrp->dma_attr_seg = xhcip->xhci_caps.xcap_pagesize - 1;
 141 }
 142 
 143 /*
 144  * Fill in attributes for a scratchpad entry. The scratchpad entries are
 145  * somewhat different in so far as they are closest to a normal DMA attribute,
 146  * except they have stricter alignments, needing to be page sized.
 147  *
 148  * In addition, because we never access this memory ourselves, we can just mark
 149  * it all as relaxed ordering.
 150  */
 151 void
 152 xhci_dma_scratchpad_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp)
 153 {
 154         xhci_dma_dma_attr(xhcip, attrp);
 155         attrp->dma_attr_align = xhcip->xhci_caps.xcap_pagesize;
 156         attrp->dma_attr_flags |= DDI_DMA_RELAXED_ORDERING;
 157 }
 158 
 159 /*
 160  * This should be used for the simple case of a single SGL entry, which is the
 161  * vast majority of the non-transfer allocations.
 162  */
 163 uint64_t
 164 xhci_dma_pa(xhci_dma_buffer_t *xdb)
 165 {
 166         ASSERT(xdb->xdb_ncookies == 1);
 167         return (xdb->xdb_cookies[0].dmac_laddress);
 168 }
 169 
 170 void
 171 xhci_dma_free(xhci_dma_buffer_t *xdb)
 172 {
 173         if (xdb->xdb_ncookies != 0) {
 174                 VERIFY(xdb->xdb_dma_handle != NULL);
 175                 (void) ddi_dma_unbind_handle(xdb->xdb_dma_handle);
 176                 xdb->xdb_ncookies = 0;
 177                 bzero(xdb->xdb_cookies, sizeof (ddi_dma_cookie_t) *
 178                     XHCI_TRANSFER_DMA_SGL);
 179                 xdb->xdb_len = 0;
 180         }
 181 
 182         if (xdb->xdb_acc_handle != NULL) {
 183                 ddi_dma_mem_free(&xdb->xdb_acc_handle);
 184                 xdb->xdb_acc_handle = NULL;
 185                 xdb->xdb_va = NULL;
 186         }
 187 
 188         if (xdb->xdb_dma_handle != NULL) {
 189                 ddi_dma_free_handle(&xdb->xdb_dma_handle);
 190                 xdb->xdb_dma_handle = NULL;
 191         }
 192 
 193         ASSERT(xdb->xdb_va == NULL);
 194         ASSERT(xdb->xdb_ncookies == 0);
 195         ASSERT(xdb->xdb_cookies[0].dmac_laddress == 0);
 196         ASSERT(xdb->xdb_len == 0);
 197 }
 198 
 199 boolean_t
 200 xhci_dma_alloc(xhci_t *xhcip, xhci_dma_buffer_t *xdb,
 201     ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero,
 202     size_t size, boolean_t wait)
 203 {
 204         int ret, i;
 205         uint_t flags = DDI_DMA_CONSISTENT;
 206         size_t len;
 207         ddi_dma_cookie_t cookie;
 208         uint_t ncookies;
 209         int (*memcb)(caddr_t);
 210 
 211         if (wait == B_TRUE) {
 212                 memcb = DDI_DMA_SLEEP;
 213         } else {
 214                 memcb = DDI_DMA_DONTWAIT;
 215         }
 216 
 217         ret = ddi_dma_alloc_handle(xhcip->xhci_dip, attrp, memcb, NULL,
 218             &xdb->xdb_dma_handle);
 219         if (ret != 0) {
 220                 xhci_log(xhcip, "!failed to allocate DMA handle: %d", ret);
 221                 xdb->xdb_dma_handle = NULL;
 222                 return (B_FALSE);
 223         }
 224 
 225         ret = ddi_dma_mem_alloc(xdb->xdb_dma_handle, size, accp, flags, memcb,
 226             NULL, &xdb->xdb_va, &len, &xdb->xdb_acc_handle);
 227         if (ret != DDI_SUCCESS) {
 228                 xhci_log(xhcip, "!failed to allocate DMA memory: %d", ret);
 229                 xdb->xdb_va = NULL;
 230                 xdb->xdb_acc_handle = NULL;
 231                 xhci_dma_free(xdb);
 232                 return (B_FALSE);
 233         }
 234 
 235         if (zero == B_TRUE)
 236                 bzero(xdb->xdb_va, len);
 237 
 238         ret = ddi_dma_addr_bind_handle(xdb->xdb_dma_handle, NULL,
 239             xdb->xdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, &cookie,
 240             &ncookies);
 241         if (ret != 0) {
 242                 xhci_log(xhcip, "!failed to bind DMA memory: %d", ret);
 243                 xhci_dma_free(xdb);
 244                 return (B_FALSE);
 245         }
 246 
 247         /*
 248          * Note we explicitly store the logical length of this allocation. The
 249          * physical length is available via the cookies.
 250          */
 251         xdb->xdb_len = size;
 252         xdb->xdb_ncookies = ncookies;
 253         xdb->xdb_cookies[0] = cookie;
 254         for (i = 1; i < ncookies; i++) {
 255                 ddi_dma_nextcookie(xdb->xdb_dma_handle, &xdb->xdb_cookies[i]);
 256         }
 257 
 258 
 259         return (B_TRUE);
 260 }
 261 
 262 void
 263 xhci_transfer_free(xhci_t *xhcip, xhci_transfer_t *xt)
 264 {
 265         if (xt == NULL)
 266                 return;
 267 
 268         VERIFY(xhcip != NULL);
 269         xhci_dma_free(&xt->xt_buffer);
 270         if (xt->xt_isoc != NULL) {
 271                 ASSERT(xt->xt_ntrbs > 0);
 272                 kmem_free(xt->xt_isoc, sizeof (usb_isoc_pkt_descr_t) *
 273                     xt->xt_ntrbs);
 274                 xt->xt_isoc = NULL;
 275         }
 276         if (xt->xt_trbs != NULL) {
 277                 ASSERT(xt->xt_ntrbs > 0);
 278                 kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * xt->xt_ntrbs);
 279                 xt->xt_trbs = NULL;
 280         }
 281         kmem_free(xt, sizeof (xhci_transfer_t));
 282 }
 283 
 284 xhci_transfer_t *
 285 xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size, int trbs,
 286     int usb_flags)
 287 {
 288         int kmflags;
 289         boolean_t dmawait;
 290         xhci_transfer_t *xt;
 291         ddi_device_acc_attr_t acc;
 292         ddi_dma_attr_t attr;
 293 
 294         if (usb_flags & USB_FLAGS_SLEEP) {
 295                 kmflags = KM_SLEEP;
 296                 dmawait = B_TRUE;
 297         } else {
 298                 kmflags = KM_NOSLEEP;
 299                 dmawait = B_FALSE;
 300         }
 301 
 302         xt = kmem_zalloc(sizeof (xhci_transfer_t), kmflags);
 303         if (xt == NULL)
 304                 return (NULL);
 305 
 306         if (size != 0) {
 307                 int sgl = XHCI_DEF_DMA_SGL;
 308 
 309                 /*
 310                  * For BULK transfers, we always increase the number of SGL
 311                  * entries that we support to make things easier for the kernel.
 312                  * However, for control transfers, we currently opt to keep
 313                  * things a bit simpler and use our default of one SGL.  There's
 314                  * no good technical reason for this, rather it just keeps
 315                  * things a bit easier.
 316                  *
 317                  * To simplify things, we don't use additional SGL entries for
 318                  * ISOC transfers. While this isn't the best, it isn't too far
 319                  * off from what ehci and co. have done before. If this becomes
 320                  * a technical issue, it's certainly possible to increase the
 321                  * SGL entry count.
 322                  */
 323                 if (xep->xep_type == USB_EP_ATTR_BULK)
 324                         sgl = XHCI_TRANSFER_DMA_SGL;
 325 
 326                 xhci_dma_acc_attr(xhcip, &acc);
 327                 xhci_dma_transfer_attr(xhcip, &attr, sgl);
 328                 if (xhci_dma_alloc(xhcip, &xt->xt_buffer, &attr, &acc, B_FALSE,
 329                     size, dmawait) == B_FALSE) {
 330                         kmem_free(xt, sizeof (xhci_transfer_t));
 331                         return (NULL);
 332                 }
 333 
 334                 /*
 335                  * ISOC transfers are a bit special and don't need additional
 336                  * TRBs for data.
 337                  */
 338                 if (xep->xep_type != USB_EP_ATTR_ISOCH)
 339                         trbs += xt->xt_buffer.xdb_ncookies;
 340         }
 341 
 342         xt->xt_trbs = kmem_zalloc(sizeof (xhci_trb_t) * trbs, kmflags);
 343         if (xt->xt_trbs == NULL) {
 344                 xhci_dma_free(&xt->xt_buffer);
 345                 kmem_free(xt, sizeof (xhci_transfer_t));
 346                 return (NULL);
 347         }
 348 
 349         /*
 350          * For ISOCH transfers, we need to also allocate the results data.
 351          */
 352         if (xep->xep_type == USB_EP_ATTR_ISOCH) {
 353                 xt->xt_isoc = kmem_zalloc(sizeof (usb_isoc_pkt_descr_t) * trbs,
 354                     kmflags);
 355                 if (xt->xt_isoc == NULL) {
 356                         kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs);
 357                         xhci_dma_free(&xt->xt_buffer);
 358                         kmem_free(xt, sizeof (xhci_transfer_t));
 359                         return (NULL);
 360                 }
 361         }
 362 
 363         xt->xt_ntrbs = trbs;
 364         xt->xt_cr = USB_CR_OK;
 365 
 366         return (xt);
 367 }
 368 
 369 /*
 370  * Abstract the notion of copying out to handle the case of multiple DMA
 371  * cookies. If tobuf is true, we are copying to the kernel provided buffer,
 372  * otherwise we're copying into the DMA memory.
 373  */
 374 void
 375 xhci_transfer_copy(xhci_transfer_t *xt, void *buf, size_t len,
 376     boolean_t tobuf)
 377 {
 378         void *dmabuf = xt->xt_buffer.xdb_va;
 379         if (tobuf == B_TRUE)
 380                 bcopy(dmabuf, buf, len);
 381         else
 382                 bcopy(buf, dmabuf, len);
 383 }
 384 
 385 int
 386 xhci_transfer_sync(xhci_t *xhcip, xhci_transfer_t *xt, uint_t type)
 387 {
 388         XHCI_DMA_SYNC(xt->xt_buffer, type);
 389         return (xhci_check_dma_handle(xhcip, &xt->xt_buffer));
 390 }
 391 
 392 /*
 393  * We're required to try and inform the xHCI controller about the number of data
 394  * packets that are required. The algorithm to use is described in xHCI 1.1 /
 395  * 4.11.2.4. While it might be tempting to just try and calculate the number of
 396  * packets based on simple rounding of the remaining number of bytes, that
 397  * misses a critical problem -- DMA boundaries may cause us to need additional
 398  * packets that are missed initially. Consider a transfer made up of four
 399  * different DMA buffers sized in bytes: 4096, 4096, 256, 256, with a 512 byte
 400  * packet size.
 401  *
 402  * Remain       4608    512     256     0
 403  * Bytes        4096    4096    256     256
 404  * Naive TD     9       1       1       0
 405  * Act TD       10      2       1       0
 406  *
 407  * This means that the only safe way forward here is to work backwards and see
 408  * how many we need to work up to this point.
 409  */
 410 static int
 411 xhci_transfer_get_tdsize(xhci_transfer_t *xt, uint_t off, uint_t mps)
 412 {
 413         int i;
 414         uint_t npkt = 0;
 415 
 416         /*
 417          * There are always zero packets for the last TRB.
 418          */
 419         ASSERT(xt->xt_buffer.xdb_ncookies > 0);
 420         for (i = xt->xt_buffer.xdb_ncookies - 1; i > off; i--) {
 421                 size_t len;
 422 
 423                 /*
 424                  * The maximum value we can return is 31 packets. So, in that
 425                  * case we short-circuit and return.
 426                  */
 427                 if (npkt >= 31)
 428                         return (31);
 429 
 430                 len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size, mps);
 431                 npkt += len / mps;
 432         }
 433 
 434         return (npkt);
 435 }
 436 
 437 void
 438 xhci_transfer_trb_fill_data(xhci_endpoint_t *xep, xhci_transfer_t *xt, int off,
 439     boolean_t in)
 440 {
 441         uint_t mps, tdsize, flags;
 442         int i;
 443 
 444         VERIFY(xt->xt_buffer.xdb_ncookies > 0);
 445         VERIFY(xep->xep_pipe != NULL);
 446         VERIFY(off + xt->xt_buffer.xdb_ncookies <= xt->xt_ntrbs);
 447         mps = xep->xep_pipe->p_ep.wMaxPacketSize;
 448 
 449         for (i = 0; i < xt->xt_buffer.xdb_ncookies; i++) {
 450                 uint64_t pa, dmasz;
 451 
 452                 pa = xt->xt_buffer.xdb_cookies[i].dmac_laddress;
 453                 dmasz = xt->xt_buffer.xdb_cookies[i].dmac_size;
 454 
 455                 tdsize = xhci_transfer_get_tdsize(xt, i, mps);
 456 
 457                 flags = XHCI_TRB_TYPE_NORMAL;
 458                 if (i == 0 && xep->xep_type == USB_EP_ATTR_CONTROL) {
 459                         flags = XHCI_TRB_TYPE_DATA;
 460                         if (in == B_TRUE)
 461                                 flags |= XHCI_TRB_DIR_IN;
 462                 }
 463 
 464                 /*
 465                  * When reading data in (from the device), we may get shorter
 466                  * transfers than the buffer allowed for. To make sure we get
 467                  * notified about that and handle that, we need to set the ISP
 468                  * flag.
 469                  */
 470                 if (in == B_TRUE) {
 471                         flags |= XHCI_TRB_ISP;
 472                         xt->xt_data_tohost = B_TRUE;
 473                 }
 474 
 475                 /*
 476                  * When we have more than one cookie, we are technically
 477                  * chaining together things according to the controllers view,
 478                  * hence why we need to set the chain flag.
 479                  */
 480                 if (xt->xt_buffer.xdb_ncookies > 1 &&
 481                     i != (xt->xt_buffer.xdb_ncookies - 1)) {
 482                         flags |= XHCI_TRB_CHAIN;
 483                 }
 484 
 485                 /*
 486                  * If we have a non-control transfer, then we need to make sure
 487                  * that we set ourselves up to be interrupted, which we set for
 488                  * the last entry.
 489                  */
 490                 if (i + 1 == xt->xt_buffer.xdb_ncookies &&
 491                     xep->xep_type != USB_EP_ATTR_CONTROL) {
 492                         flags |= XHCI_TRB_IOC;
 493                 }
 494 
 495                 xt->xt_trbs[off + i].trb_addr = LE_64(pa);
 496                 xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_LEN(dmasz) |
 497                     XHCI_TRB_TDREM(tdsize) | XHCI_TRB_INTR(0));
 498                 xt->xt_trbs[off + i].trb_flags = LE_32(flags);
 499         }
 500 }
 501 
 502 /*
 503  * These are utility functions for isochronus transfers to help calculate the
 504  * transfer burst count (TBC) and transfer last burst packet count (TLPBC)
 505  * entries for an isochronus entry. See xHCI 1.1 / 4.11.2.3 for how to calculate
 506  * them.
 507  */
 508 void
 509 xhci_transfer_calculate_isoc(xhci_device_t *xd, xhci_endpoint_t *xep,
 510     uint_t trb_len, uint_t *tbc, uint_t *tlbpc)
 511 {
 512         uint_t mps, tdpc, burst;
 513 
 514         /*
 515          * Even if we're asked to send no data, that actually requires the
 516          * equivalent of sending one byte of data.
 517          */
 518         if (trb_len == 0)
 519                 trb_len = 1;
 520 
 521         mps = XHCI_EPCTX_GET_MPS(xd->xd_endout[xep->xep_num]->xec_info2);
 522         burst = XHCI_EPCTX_GET_MAXB(xd->xd_endout[xep->xep_num]->xec_info2);
 523 
 524         /*
 525          * This is supposed to correspond to the Transfer Descriptor Packet
 526          * Count from xHCI 1.1 / 4.14.1.
 527          */
 528         tdpc = howmany(trb_len, mps);
 529         *tbc = howmany(tdpc, burst + 1) - 1;
 530 
 531         if ((tdpc % (burst + 1)) == 0)
 532                 *tlbpc = burst;
 533         else
 534                 *tlbpc = (tdpc % (burst + 1)) - 1;
 535 }