1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2018 Nexenta Systems, Inc.
  28  * Copyright (c) 2011 Bayard G. Bell.  All rights reserved.
  29  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  30  * Copyright 2017 Joyent, Inc.
  31  */
  32 
  33 /*
  34  * x86 root nexus driver
  35  */
  36 
  37 #include <sys/sysmacros.h>
  38 #include <sys/conf.h>
  39 #include <sys/autoconf.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/debug.h>
  42 #include <sys/psw.h>
  43 #include <sys/ddidmareq.h>
  44 #include <sys/promif.h>
  45 #include <sys/devops.h>
  46 #include <sys/kmem.h>
  47 #include <sys/cmn_err.h>
  48 #include <vm/seg.h>
  49 #include <vm/seg_kmem.h>
  50 #include <vm/seg_dev.h>
  51 #include <sys/vmem.h>
  52 #include <sys/mman.h>
  53 #include <vm/hat.h>
  54 #include <vm/as.h>
  55 #include <vm/page.h>
  56 #include <sys/avintr.h>
  57 #include <sys/errno.h>
  58 #include <sys/modctl.h>
  59 #include <sys/ddi_impldefs.h>
  60 #include <sys/sunddi.h>
  61 #include <sys/sunndi.h>
  62 #include <sys/mach_intr.h>
  63 #include <sys/psm.h>
  64 #include <sys/ontrap.h>
  65 #include <sys/atomic.h>
  66 #include <sys/sdt.h>
  67 #include <sys/rootnex.h>
  68 #include <vm/hat_i86.h>
  69 #include <sys/ddifm.h>
  70 #include <sys/ddi_isa.h>
  71 #include <sys/apic.h>
  72 
  73 #ifdef __xpv
  74 #include <sys/bootinfo.h>
  75 #include <sys/hypervisor.h>
  76 #include <sys/bootconf.h>
  77 #include <vm/kboot_mmu.h>
  78 #endif
  79 
  80 #if defined(__amd64) && !defined(__xpv)
  81 #include <sys/immu.h>
  82 #endif
  83 
  84 
  85 /*
  86  * enable/disable extra checking of function parameters. Useful for debugging
  87  * drivers.
  88  */
  89 #ifdef  DEBUG
  90 int rootnex_alloc_check_parms = 1;
  91 int rootnex_bind_check_parms = 1;
  92 int rootnex_bind_check_inuse = 1;
  93 int rootnex_unbind_verify_buffer = 0;
  94 int rootnex_sync_check_parms = 1;
  95 #else
  96 int rootnex_alloc_check_parms = 0;
  97 int rootnex_bind_check_parms = 0;
  98 int rootnex_bind_check_inuse = 0;
  99 int rootnex_unbind_verify_buffer = 0;
 100 int rootnex_sync_check_parms = 0;
 101 #endif
 102 
 103 boolean_t rootnex_dmar_not_setup;
 104 
 105 /* Master Abort and Target Abort panic flag */
 106 int rootnex_fm_ma_ta_panic_flag = 0;
 107 
 108 /* Semi-temporary patchables to phase in bug fixes, test drivers, etc. */
 109 int rootnex_bind_fail = 1;
 110 int rootnex_bind_warn = 1;
 111 uint8_t *rootnex_warn_list;
 112 /* bitmasks for rootnex_warn_list. Up to 8 different warnings with uint8_t */
 113 #define ROOTNEX_BIND_WARNING    (0x1 << 0)
 114 
 115 /*
 116  * revert back to old broken behavior of always sync'ing entire copy buffer.
 117  * This is useful if be have a buggy driver which doesn't correctly pass in
 118  * the offset and size into ddi_dma_sync().
 119  */
 120 int rootnex_sync_ignore_params = 0;
 121 
 122 /*
 123  * For the 64-bit kernel, pre-alloc enough cookies for a 256K buffer plus 1
 124  * page for alignment. For the 32-bit kernel, pre-alloc enough cookies for a
 125  * 64K buffer plus 1 page for alignment (we have less kernel space in a 32-bit
 126  * kernel). Allocate enough windows to handle a 256K buffer w/ at least 65
 127  * sgllen DMA engine, and enough copybuf buffer state pages to handle 2 pages
 128  * (< 8K). We will still need to allocate the copy buffer during bind though
 129  * (if we need one). These can only be modified in /etc/system before rootnex
 130  * attach.
 131  */
 132 #if defined(__amd64)
 133 int rootnex_prealloc_cookies = 65;
 134 int rootnex_prealloc_windows = 4;
 135 int rootnex_prealloc_copybuf = 2;
 136 #else
 137 int rootnex_prealloc_cookies = 33;
 138 int rootnex_prealloc_windows = 4;
 139 int rootnex_prealloc_copybuf = 2;
 140 #endif
 141 
 142 /* driver global state */
 143 static rootnex_state_t *rootnex_state;
 144 
 145 #ifdef DEBUG
 146 /* shortcut to rootnex counters */
 147 static uint64_t *rootnex_cnt;
 148 #endif
 149 
 150 /*
 151  * XXX - does x86 even need these or are they left over from the SPARC days?
 152  */
 153 /* statically defined integer/boolean properties for the root node */
 154 static rootnex_intprop_t rootnex_intprp[] = {
 155         { "PAGESIZE",                   PAGESIZE },
 156         { "MMU_PAGESIZE",               MMU_PAGESIZE },
 157         { "MMU_PAGEOFFSET",             MMU_PAGEOFFSET },
 158         { DDI_RELATIVE_ADDRESSING,      1 },
 159 };
 160 #define NROOT_INTPROPS  (sizeof (rootnex_intprp) / sizeof (rootnex_intprop_t))
 161 
 162 /*
 163  * If we're dom0, we're using a real device so we need to load
 164  * the cookies with MFNs instead of PFNs.
 165  */
 166 #ifdef __xpv
 167 typedef maddr_t rootnex_addr_t;
 168 #define ROOTNEX_PADDR_TO_RBASE(pa)      \
 169         (DOMAIN_IS_INITDOMAIN(xen_info) ? pa_to_ma(pa) : (pa))
 170 #else
 171 typedef paddr_t rootnex_addr_t;
 172 #define ROOTNEX_PADDR_TO_RBASE(pa)      (pa)
 173 #endif
 174 
 175 static struct cb_ops rootnex_cb_ops = {
 176         nodev,          /* open */
 177         nodev,          /* close */
 178         nodev,          /* strategy */
 179         nodev,          /* print */
 180         nodev,          /* dump */
 181         nodev,          /* read */
 182         nodev,          /* write */
 183         nodev,          /* ioctl */
 184         nodev,          /* devmap */
 185         nodev,          /* mmap */
 186         nodev,          /* segmap */
 187         nochpoll,       /* chpoll */
 188         ddi_prop_op,    /* cb_prop_op */
 189         NULL,           /* struct streamtab */
 190         D_NEW | D_MP | D_HOTPLUG, /* compatibility flags */
 191         CB_REV,         /* Rev */
 192         nodev,          /* cb_aread */
 193         nodev           /* cb_awrite */
 194 };
 195 
 196 static int rootnex_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
 197     off_t offset, off_t len, caddr_t *vaddrp);
 198 static int rootnex_map_fault(dev_info_t *dip, dev_info_t *rdip,
 199     struct hat *hat, struct seg *seg, caddr_t addr,
 200     struct devpage *dp, pfn_t pfn, uint_t prot, uint_t lock);
 201 static int rootnex_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
 202     ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg,
 203     ddi_dma_handle_t *handlep);
 204 static int rootnex_dma_freehdl(dev_info_t *dip, dev_info_t *rdip,
 205     ddi_dma_handle_t handle);
 206 static int rootnex_dma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
 207     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
 208     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 209 static int rootnex_dma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
 210     ddi_dma_handle_t handle);
 211 static int rootnex_dma_sync(dev_info_t *dip, dev_info_t *rdip,
 212     ddi_dma_handle_t handle, off_t off, size_t len, uint_t cache_flags);
 213 static int rootnex_dma_win(dev_info_t *dip, dev_info_t *rdip,
 214     ddi_dma_handle_t handle, uint_t win, off_t *offp, size_t *lenp,
 215     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 216 static int rootnex_dma_mctl(dev_info_t *dip, dev_info_t *rdip,
 217     ddi_dma_handle_t handle, enum ddi_dma_ctlops request,
 218     off_t *offp, size_t *lenp, caddr_t *objp, uint_t cache_flags);
 219 static int rootnex_ctlops(dev_info_t *dip, dev_info_t *rdip,
 220     ddi_ctl_enum_t ctlop, void *arg, void *result);
 221 static int rootnex_fm_init(dev_info_t *dip, dev_info_t *tdip, int tcap,
 222     ddi_iblock_cookie_t *ibc);
 223 static int rootnex_intr_ops(dev_info_t *pdip, dev_info_t *rdip,
 224     ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
 225 static int rootnex_alloc_intr_fixed(dev_info_t *, ddi_intr_handle_impl_t *,
 226     void *);
 227 static int rootnex_free_intr_fixed(dev_info_t *, ddi_intr_handle_impl_t *);
 228 
 229 static int rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
 230     ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg,
 231     ddi_dma_handle_t *handlep);
 232 static int rootnex_coredma_freehdl(dev_info_t *dip, dev_info_t *rdip,
 233     ddi_dma_handle_t handle);
 234 static int rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
 235     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
 236     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 237 static int rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
 238     ddi_dma_handle_t handle);
 239 #if defined(__amd64) && !defined(__xpv)
 240 static void rootnex_coredma_reset_cookies(dev_info_t *dip,
 241     ddi_dma_handle_t handle);
 242 static int rootnex_coredma_get_cookies(dev_info_t *dip, ddi_dma_handle_t handle,
 243     ddi_dma_cookie_t **cookiepp, uint_t *ccountp);
 244 static int rootnex_coredma_set_cookies(dev_info_t *dip, ddi_dma_handle_t handle,
 245     ddi_dma_cookie_t *cookiep, uint_t ccount);
 246 static int rootnex_coredma_clear_cookies(dev_info_t *dip,
 247     ddi_dma_handle_t handle);
 248 static int rootnex_coredma_get_sleep_flags(ddi_dma_handle_t handle);
 249 #endif
 250 static int rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip,
 251     ddi_dma_handle_t handle, off_t off, size_t len, uint_t cache_flags);
 252 static int rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip,
 253     ddi_dma_handle_t handle, uint_t win, off_t *offp, size_t *lenp,
 254     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 255 
 256 #if defined(__amd64) && !defined(__xpv)
 257 static int rootnex_coredma_hdl_setprivate(dev_info_t *dip, dev_info_t *rdip,
 258     ddi_dma_handle_t handle, void *v);
 259 static void *rootnex_coredma_hdl_getprivate(dev_info_t *dip, dev_info_t *rdip,
 260     ddi_dma_handle_t handle);
 261 #endif
 262 
 263 
 264 static struct bus_ops rootnex_bus_ops = {
 265         BUSO_REV,
 266         rootnex_map,
 267         NULL,
 268         NULL,
 269         NULL,
 270         rootnex_map_fault,
 271         0,
 272         rootnex_dma_allochdl,
 273         rootnex_dma_freehdl,
 274         rootnex_dma_bindhdl,
 275         rootnex_dma_unbindhdl,
 276         rootnex_dma_sync,
 277         rootnex_dma_win,
 278         rootnex_dma_mctl,
 279         rootnex_ctlops,
 280         ddi_bus_prop_op,
 281         i_ddi_rootnex_get_eventcookie,
 282         i_ddi_rootnex_add_eventcall,
 283         i_ddi_rootnex_remove_eventcall,
 284         i_ddi_rootnex_post_event,
 285         0,                      /* bus_intr_ctl */
 286         0,                      /* bus_config */
 287         0,                      /* bus_unconfig */
 288         rootnex_fm_init,        /* bus_fm_init */
 289         NULL,                   /* bus_fm_fini */
 290         NULL,                   /* bus_fm_access_enter */
 291         NULL,                   /* bus_fm_access_exit */
 292         NULL,                   /* bus_powr */
 293         rootnex_intr_ops        /* bus_intr_op */
 294 };
 295 
 296 static int rootnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
 297 static int rootnex_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
 298 static int rootnex_quiesce(dev_info_t *dip);
 299 
 300 static struct dev_ops rootnex_ops = {
 301         DEVO_REV,
 302         0,
 303         ddi_no_info,
 304         nulldev,
 305         nulldev,
 306         rootnex_attach,
 307         rootnex_detach,
 308         nulldev,
 309         &rootnex_cb_ops,
 310         &rootnex_bus_ops,
 311         NULL,
 312         rootnex_quiesce,                /* quiesce */
 313 };
 314 
 315 static struct modldrv rootnex_modldrv = {
 316         &mod_driverops,
 317         "i86pc root nexus",
 318         &rootnex_ops
 319 };
 320 
 321 static struct modlinkage rootnex_modlinkage = {
 322         MODREV_1,
 323         (void *)&rootnex_modldrv,
 324         NULL
 325 };
 326 
 327 #if defined(__amd64) && !defined(__xpv)
 328 static iommulib_nexops_t iommulib_nexops = {
 329         IOMMU_NEXOPS_VERSION,
 330         "Rootnex IOMMU ops Vers 1.1",
 331         NULL,
 332         rootnex_coredma_allochdl,
 333         rootnex_coredma_freehdl,
 334         rootnex_coredma_bindhdl,
 335         rootnex_coredma_unbindhdl,
 336         rootnex_coredma_reset_cookies,
 337         rootnex_coredma_get_cookies,
 338         rootnex_coredma_set_cookies,
 339         rootnex_coredma_clear_cookies,
 340         rootnex_coredma_get_sleep_flags,
 341         rootnex_coredma_sync,
 342         rootnex_coredma_win,
 343         rootnex_coredma_hdl_setprivate,
 344         rootnex_coredma_hdl_getprivate
 345 };
 346 #endif
 347 
 348 /*
 349  *  extern hacks
 350  */
 351 extern struct seg_ops segdev_ops;
 352 extern int ignore_hardware_nodes;       /* force flag from ddi_impl.c */
 353 #ifdef  DDI_MAP_DEBUG
 354 extern int ddi_map_debug_flag;
 355 #define ddi_map_debug   if (ddi_map_debug_flag) prom_printf
 356 #endif
 357 extern void i86_pp_map(page_t *pp, caddr_t kaddr);
 358 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
 359 extern int (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
 360     psm_intr_op_t, int *);
 361 extern int impl_ddi_sunbus_initchild(dev_info_t *dip);
 362 extern void impl_ddi_sunbus_removechild(dev_info_t *dip);
 363 
 364 /*
 365  * Use device arena to use for device control register mappings.
 366  * Various kernel memory walkers (debugger, dtrace) need to know
 367  * to avoid this address range to prevent undesired device activity.
 368  */
 369 extern void *device_arena_alloc(size_t size, int vm_flag);
 370 extern void device_arena_free(void * vaddr, size_t size);
 371 
 372 
 373 /*
 374  *  Internal functions
 375  */
 376 static int rootnex_dma_init();
 377 static void rootnex_add_props(dev_info_t *);
 378 static int rootnex_ctl_reportdev(dev_info_t *dip);
 379 static struct intrspec *rootnex_get_ispec(dev_info_t *rdip, int inum);
 380 static int rootnex_map_regspec(ddi_map_req_t *mp, caddr_t *vaddrp);
 381 static int rootnex_unmap_regspec(ddi_map_req_t *mp, caddr_t *vaddrp);
 382 static int rootnex_map_handle(ddi_map_req_t *mp);
 383 static void rootnex_clean_dmahdl(ddi_dma_impl_t *hp);
 384 static int rootnex_valid_alloc_parms(ddi_dma_attr_t *attr, uint_t maxsegsize);
 385 static int rootnex_valid_bind_parms(ddi_dma_req_t *dmareq,
 386     ddi_dma_attr_t *attr);
 387 static void rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
 388     rootnex_sglinfo_t *sglinfo);
 389 static void rootnex_dvma_get_sgl(ddi_dma_obj_t *dmar_object,
 390     ddi_dma_cookie_t *sgl, rootnex_sglinfo_t *sglinfo);
 391 static int rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
 392     rootnex_dma_t *dma, ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag);
 393 static int rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
 394     rootnex_dma_t *dma, ddi_dma_attr_t *attr);
 395 static void rootnex_teardown_copybuf(rootnex_dma_t *dma);
 396 static int rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
 397     ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag);
 398 static void rootnex_teardown_windows(rootnex_dma_t *dma);
 399 static void rootnex_init_win(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
 400     rootnex_window_t *window, ddi_dma_cookie_t *cookie, off_t cur_offset);
 401 static void rootnex_setup_cookie(ddi_dma_obj_t *dmar_object,
 402     rootnex_dma_t *dma, ddi_dma_cookie_t *cookie, off_t cur_offset,
 403     size_t *copybuf_used, page_t **cur_pp);
 404 static int rootnex_sgllen_window_boundary(ddi_dma_impl_t *hp,
 405     rootnex_dma_t *dma, rootnex_window_t **windowp, ddi_dma_cookie_t *cookie,
 406     ddi_dma_attr_t *attr, off_t cur_offset);
 407 static int rootnex_copybuf_window_boundary(ddi_dma_impl_t *hp,
 408     rootnex_dma_t *dma, rootnex_window_t **windowp,
 409     ddi_dma_cookie_t *cookie, off_t cur_offset, size_t *copybuf_used);
 410 static int rootnex_maxxfer_window_boundary(ddi_dma_impl_t *hp,
 411     rootnex_dma_t *dma, rootnex_window_t **windowp, ddi_dma_cookie_t *cookie);
 412 static int rootnex_valid_sync_parms(ddi_dma_impl_t *hp, rootnex_window_t *win,
 413     off_t offset, size_t size, uint_t cache_flags);
 414 static int rootnex_verify_buffer(rootnex_dma_t *dma);
 415 static int rootnex_dma_check(dev_info_t *dip, const void *handle,
 416     const void *comp_addr, const void *not_used);
 417 static boolean_t rootnex_need_bounce_seg(ddi_dma_obj_t *dmar_object,
 418     rootnex_sglinfo_t *sglinfo);
 419 static struct as *rootnex_get_as(ddi_dma_obj_t *dmar_object);
 420 
 421 /*
 422  * _init()
 423  *
 424  */
 425 int
 426 _init(void)
 427 {
 428 
 429         rootnex_state = NULL;
 430         return (mod_install(&rootnex_modlinkage));
 431 }
 432 
 433 
 434 /*
 435  * _info()
 436  *
 437  */
 438 int
 439 _info(struct modinfo *modinfop)
 440 {
 441         return (mod_info(&rootnex_modlinkage, modinfop));
 442 }
 443 
 444 
 445 /*
 446  * _fini()
 447  *
 448  */
 449 int
 450 _fini(void)
 451 {
 452         return (EBUSY);
 453 }
 454 
 455 
 456 /*
 457  * rootnex_attach()
 458  *
 459  */
 460 static int
 461 rootnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 462 {
 463         int fmcap;
 464         int e;
 465 
 466         switch (cmd) {
 467         case DDI_ATTACH:
 468                 break;
 469         case DDI_RESUME:
 470 #if defined(__amd64) && !defined(__xpv)
 471                 return (immu_unquiesce());
 472 #else
 473                 return (DDI_SUCCESS);
 474 #endif
 475         default:
 476                 return (DDI_FAILURE);
 477         }
 478 
 479         /*
 480          * We should only have one instance of rootnex. Save it away since we
 481          * don't have an easy way to get it back later.
 482          */
 483         ASSERT(rootnex_state == NULL);
 484         rootnex_state = kmem_zalloc(sizeof (rootnex_state_t), KM_SLEEP);
 485 
 486         rootnex_state->r_dip = dip;
 487         rootnex_state->r_err_ibc = (ddi_iblock_cookie_t)ipltospl(15);
 488         rootnex_state->r_reserved_msg_printed = B_FALSE;
 489 #ifdef DEBUG
 490         rootnex_cnt = &rootnex_state->r_counters[0];
 491 #endif
 492 
 493         /*
 494          * Set minimum fm capability level for i86pc platforms and then
 495          * initialize error handling. Since we're the rootnex, we don't
 496          * care what's returned in the fmcap field.
 497          */
 498         ddi_system_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
 499             DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
 500         fmcap = ddi_system_fmcap;
 501         ddi_fm_init(dip, &fmcap, &rootnex_state->r_err_ibc);
 502 
 503         /* initialize DMA related state */
 504         e = rootnex_dma_init();
 505         if (e != DDI_SUCCESS) {
 506                 kmem_free(rootnex_state, sizeof (rootnex_state_t));
 507                 return (DDI_FAILURE);
 508         }
 509 
 510         /* Add static root node properties */
 511         rootnex_add_props(dip);
 512 
 513         /* since we can't call ddi_report_dev() */
 514         cmn_err(CE_CONT, "?root nexus = %s\n", ddi_get_name(dip));
 515 
 516         /* Initialize rootnex event handle */
 517         i_ddi_rootnex_init_events(dip);
 518 
 519 #if defined(__amd64) && !defined(__xpv)
 520         e = iommulib_nexus_register(dip, &iommulib_nexops,
 521             &rootnex_state->r_iommulib_handle);
 522 
 523         ASSERT(e == DDI_SUCCESS);
 524 #endif
 525 
 526         return (DDI_SUCCESS);
 527 }
 528 
 529 
 530 /*
 531  * rootnex_detach()
 532  *
 533  */
 534 /*ARGSUSED*/
 535 static int
 536 rootnex_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 537 {
 538         switch (cmd) {
 539         case DDI_SUSPEND:
 540 #if defined(__amd64) && !defined(__xpv)
 541                 return (immu_quiesce());
 542 #else
 543                 return (DDI_SUCCESS);
 544 #endif
 545         default:
 546                 return (DDI_FAILURE);
 547         }
 548         /*NOTREACHED*/
 549 
 550 }
 551 
 552 
 553 /*
 554  * rootnex_dma_init()
 555  *
 556  */
 557 /*ARGSUSED*/
 558 static int
 559 rootnex_dma_init()
 560 {
 561         size_t bufsize;
 562 
 563 
 564         /*
 565          * size of our cookie/window/copybuf state needed in dma bind that we
 566          * pre-alloc in dma_alloc_handle
 567          */
 568         rootnex_state->r_prealloc_cookies = rootnex_prealloc_cookies;
 569         rootnex_state->r_prealloc_size =
 570             (rootnex_state->r_prealloc_cookies * sizeof (ddi_dma_cookie_t)) +
 571             (rootnex_prealloc_windows * sizeof (rootnex_window_t)) +
 572             (rootnex_prealloc_copybuf * sizeof (rootnex_pgmap_t));
 573 
 574         /*
 575          * setup DDI DMA handle kmem cache, align each handle on 64 bytes,
 576          * allocate 16 extra bytes for struct pointer alignment
 577          * (p->dmai_private & dma->dp_prealloc_buffer)
 578          */
 579         bufsize = sizeof (ddi_dma_impl_t) + sizeof (rootnex_dma_t) +
 580             rootnex_state->r_prealloc_size + 0x10;
 581         rootnex_state->r_dmahdl_cache = kmem_cache_create("rootnex_dmahdl",
 582             bufsize, 64, NULL, NULL, NULL, NULL, NULL, 0);
 583         if (rootnex_state->r_dmahdl_cache == NULL) {
 584                 return (DDI_FAILURE);
 585         }
 586 
 587         /*
 588          * allocate array to track which major numbers we have printed warnings
 589          * for.
 590          */
 591         rootnex_warn_list = kmem_zalloc(devcnt * sizeof (*rootnex_warn_list),
 592             KM_SLEEP);
 593 
 594         return (DDI_SUCCESS);
 595 }
 596 
 597 
 598 /*
 599  * rootnex_add_props()
 600  *
 601  */
 602 static void
 603 rootnex_add_props(dev_info_t *dip)
 604 {
 605         rootnex_intprop_t *rpp;
 606         int i;
 607 
 608         /* Add static integer/boolean properties to the root node */
 609         rpp = rootnex_intprp;
 610         for (i = 0; i < NROOT_INTPROPS; i++) {
 611                 (void) e_ddi_prop_update_int(DDI_DEV_T_NONE, dip,
 612                     rpp[i].prop_name, rpp[i].prop_value);
 613         }
 614 }
 615 
 616 
 617 
 618 /*
 619  * *************************
 620  *  ctlops related routines
 621  * *************************
 622  */
 623 
 624 /*
 625  * rootnex_ctlops()
 626  *
 627  */
 628 /*ARGSUSED*/
 629 static int
 630 rootnex_ctlops(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
 631     void *arg, void *result)
 632 {
 633         int n, *ptr;
 634         struct ddi_parent_private_data *pdp;
 635 
 636         switch (ctlop) {
 637         case DDI_CTLOPS_DMAPMAPC:
 638                 /*
 639                  * Return 'partial' to indicate that dma mapping
 640                  * has to be done in the main MMU.
 641                  */
 642                 return (DDI_DMA_PARTIAL);
 643 
 644         case DDI_CTLOPS_BTOP:
 645                 /*
 646                  * Convert byte count input to physical page units.
 647                  * (byte counts that are not a page-size multiple
 648                  * are rounded down)
 649                  */
 650                 *(ulong_t *)result = btop(*(ulong_t *)arg);
 651                 return (DDI_SUCCESS);
 652 
 653         case DDI_CTLOPS_PTOB:
 654                 /*
 655                  * Convert size in physical pages to bytes
 656                  */
 657                 *(ulong_t *)result = ptob(*(ulong_t *)arg);
 658                 return (DDI_SUCCESS);
 659 
 660         case DDI_CTLOPS_BTOPR:
 661                 /*
 662                  * Convert byte count input to physical page units
 663                  * (byte counts that are not a page-size multiple
 664                  * are rounded up)
 665                  */
 666                 *(ulong_t *)result = btopr(*(ulong_t *)arg);
 667                 return (DDI_SUCCESS);
 668 
 669         case DDI_CTLOPS_INITCHILD:
 670                 return (impl_ddi_sunbus_initchild(arg));
 671 
 672         case DDI_CTLOPS_UNINITCHILD:
 673                 impl_ddi_sunbus_removechild(arg);
 674                 return (DDI_SUCCESS);
 675 
 676         case DDI_CTLOPS_REPORTDEV:
 677                 return (rootnex_ctl_reportdev(rdip));
 678 
 679         case DDI_CTLOPS_IOMIN:
 680                 /*
 681                  * Nothing to do here but reflect back..
 682                  */
 683                 return (DDI_SUCCESS);
 684 
 685         case DDI_CTLOPS_REGSIZE:
 686         case DDI_CTLOPS_NREGS:
 687                 break;
 688 
 689         case DDI_CTLOPS_SIDDEV:
 690                 if (ndi_dev_is_prom_node(rdip))
 691                         return (DDI_SUCCESS);
 692                 if (ndi_dev_is_persistent_node(rdip))
 693                         return (DDI_SUCCESS);
 694                 return (DDI_FAILURE);
 695 
 696         case DDI_CTLOPS_POWER:
 697                 return ((*pm_platform_power)((power_req_t *)arg));
 698 
 699         case DDI_CTLOPS_RESERVED0: /* Was DDI_CTLOPS_NINTRS, obsolete */
 700         case DDI_CTLOPS_RESERVED1: /* Was DDI_CTLOPS_POKE_INIT, obsolete */
 701         case DDI_CTLOPS_RESERVED2: /* Was DDI_CTLOPS_POKE_FLUSH, obsolete */
 702         case DDI_CTLOPS_RESERVED3: /* Was DDI_CTLOPS_POKE_FINI, obsolete */
 703         case DDI_CTLOPS_RESERVED4: /* Was DDI_CTLOPS_INTR_HILEVEL, obsolete */
 704         case DDI_CTLOPS_RESERVED5: /* Was DDI_CTLOPS_XLATE_INTRS, obsolete */
 705                 if (!rootnex_state->r_reserved_msg_printed) {
 706                         rootnex_state->r_reserved_msg_printed = B_TRUE;
 707                         cmn_err(CE_WARN, "Failing ddi_ctlops call(s) for "
 708                             "1 or more reserved/obsolete operations.");
 709                 }
 710                 return (DDI_FAILURE);
 711 
 712         default:
 713                 return (DDI_FAILURE);
 714         }
 715         /*
 716          * The rest are for "hardware" properties
 717          */
 718         if ((pdp = ddi_get_parent_data(rdip)) == NULL)
 719                 return (DDI_FAILURE);
 720 
 721         if (ctlop == DDI_CTLOPS_NREGS) {
 722                 ptr = (int *)result;
 723                 *ptr = pdp->par_nreg;
 724         } else {
 725                 off_t *size = (off_t *)result;
 726 
 727                 ptr = (int *)arg;
 728                 n = *ptr;
 729                 if (n >= pdp->par_nreg) {
 730                         return (DDI_FAILURE);
 731                 }
 732                 *size = (off_t)pdp->par_reg[n].regspec_size;
 733         }
 734         return (DDI_SUCCESS);
 735 }
 736 
 737 /*ARGSUSED*/
 738 static int
 739 rootnex_ctl_reportdev(dev_info_t *dev)
 740 {
 741         return (DDI_SUCCESS);
 742 }
 743 
 744 
 745 /*
 746  * ******************
 747  *  map related code
 748  * ******************
 749  */
 750 
 751 /*
 752  * rootnex_map()
 753  *
 754  */
 755 static int
 756 rootnex_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp, off_t offset,
 757     off_t len, caddr_t *vaddrp)
 758 {
 759         struct regspec *orp = NULL;
 760         struct regspec64 rp = { 0 };
 761         ddi_map_req_t mr = *mp;         /* Get private copy of request */
 762 
 763         mp = &mr;
 764 
 765         switch (mp->map_op)  {
 766         case DDI_MO_MAP_LOCKED:
 767         case DDI_MO_UNMAP:
 768         case DDI_MO_MAP_HANDLE:
 769                 break;
 770         default:
 771 #ifdef  DDI_MAP_DEBUG
 772                 cmn_err(CE_WARN, "rootnex_map: unimplemented map op %d.",
 773                     mp->map_op);
 774 #endif  /* DDI_MAP_DEBUG */
 775                 return (DDI_ME_UNIMPLEMENTED);
 776         }
 777 
 778         if (mp->map_flags & DDI_MF_USER_MAPPING)  {
 779 #ifdef  DDI_MAP_DEBUG
 780                 cmn_err(CE_WARN, "rootnex_map: unimplemented map type: user.");
 781 #endif  /* DDI_MAP_DEBUG */
 782                 return (DDI_ME_UNIMPLEMENTED);
 783         }
 784 
 785         /*
 786          * First, we need to get the original regspec out before we convert it
 787          * to the extended format. If we have a register number, then we need to
 788          * convert that to a regspec.
 789          */
 790         if (mp->map_type == DDI_MT_RNUMBER)  {
 791 
 792                 int rnumber = mp->map_obj.rnumber;
 793 #ifdef  DDI_MAP_DEBUG
 794                 static char *out_of_range =
 795                     "rootnex_map: Out of range rnumber <%d>, device <%s>";
 796 #endif  /* DDI_MAP_DEBUG */
 797 
 798                 orp = i_ddi_rnumber_to_regspec(rdip, rnumber);
 799                 if (orp == NULL) {
 800 #ifdef  DDI_MAP_DEBUG
 801                         cmn_err(CE_WARN, out_of_range, rnumber,
 802                             ddi_get_name(rdip));
 803 #endif  /* DDI_MAP_DEBUG */
 804                         return (DDI_ME_RNUMBER_RANGE);
 805                 }
 806         } else if (!(mp->map_flags & DDI_MF_EXT_REGSPEC)) {
 807                 orp = mp->map_obj.rp;
 808         }
 809 
 810         /*
 811          * Ensure that we are always using a 64-bit extended regspec regardless
 812          * of what was passed into us. If the child driver is using a 64-bit
 813          * regspec, then we need to make sure that we copy this to the local
 814          * regspec64, rp.
 815          */
 816         if (orp != NULL) {
 817                 rp.regspec_bustype = orp->regspec_bustype;
 818                 rp.regspec_addr = orp->regspec_addr;
 819                 rp.regspec_size = orp->regspec_size;
 820         } else {
 821                 struct regspec64 *rp64;
 822                 rp64 = (struct regspec64 *)mp->map_obj.rp;
 823                 rp = *rp64;
 824         }
 825 
 826         mp->map_type = DDI_MT_REGSPEC;
 827         mp->map_flags |= DDI_MF_EXT_REGSPEC;
 828         mp->map_obj.rp = (struct regspec *)&rp;
 829 
 830         /*
 831          * Adjust offset and length correspnding to called values...
 832          * XXX: A non-zero length means override the one in the regspec
 833          * XXX: (regardless of what's in the parent's range?)
 834          */
 835 
 836 #ifdef  DDI_MAP_DEBUG
 837         cmn_err(CE_CONT, "rootnex: <%s,%s> <0x%x, 0x%x, 0x%d> offset %d len %d "
 838             "handle 0x%x\n", ddi_get_name(dip), ddi_get_name(rdip),
 839             rp.regspec_bustype, rp.regspec_addr, rp.regspec_size, offset,
 840             len, mp->map_handlep);
 841 #endif  /* DDI_MAP_DEBUG */
 842 
 843         /*
 844          * I/O or memory mapping:
 845          *
 846          *      <bustype=0, addr=x, len=x>: memory
 847          *      <bustype=1, addr=x, len=x>: i/o
 848          *      <bustype>1, addr=0, len=x>: x86-compatibility i/o
 849          */
 850 
 851         if (rp.regspec_bustype > 1 && rp.regspec_addr != 0) {
 852                 cmn_err(CE_WARN, "<%s,%s> invalid register spec"
 853                     " <0x%" PRIx64 ", 0x%" PRIx64 ", 0x%" PRIx64 ">",
 854                     ddi_get_name(dip), ddi_get_name(rdip), rp.regspec_bustype,
 855                     rp.regspec_addr, rp.regspec_size);
 856                 return (DDI_ME_INVAL);
 857         }
 858 
 859         if (rp.regspec_bustype > 1 && rp.regspec_addr == 0) {
 860                 /*
 861                  * compatibility i/o mapping
 862                  */
 863                 rp.regspec_bustype += offset;
 864         } else {
 865                 /*
 866                  * Normal memory or i/o mapping
 867                  */
 868                 rp.regspec_addr += offset;
 869         }
 870 
 871         if (len != 0)
 872                 rp.regspec_size = len;
 873 
 874 #ifdef  DDI_MAP_DEBUG
 875         cmn_err(CE_CONT, "             <%s,%s> <0x%" PRIx64 ", 0x%" PRIx64
 876             ", 0x%" PRId64 "> offset %d len %d handle 0x%x\n",
 877             ddi_get_name(dip), ddi_get_name(rdip), rp.regspec_bustype,
 878             rp.regspec_addr, rp.regspec_size, offset, len, mp->map_handlep);
 879 #endif  /* DDI_MAP_DEBUG */
 880 
 881 
 882         /*
 883          * The x86 root nexus does not have any notion of valid ranges of
 884          * addresses. Its children have valid ranges, but because there are none
 885          * for the nexus, we don't need to call i_ddi_apply_range().  Verify
 886          * that is the case.
 887          */
 888         ASSERT0(sparc_pd_getnrng(dip));
 889 
 890         switch (mp->map_op)  {
 891         case DDI_MO_MAP_LOCKED:
 892 
 893                 /*
 894                  * Set up the locked down kernel mapping to the regspec...
 895                  */
 896 
 897                 return (rootnex_map_regspec(mp, vaddrp));
 898 
 899         case DDI_MO_UNMAP:
 900 
 901                 /*
 902                  * Release mapping...
 903                  */
 904 
 905                 return (rootnex_unmap_regspec(mp, vaddrp));
 906 
 907         case DDI_MO_MAP_HANDLE:
 908 
 909                 return (rootnex_map_handle(mp));
 910 
 911         default:
 912                 return (DDI_ME_UNIMPLEMENTED);
 913         }
 914 }
 915 
 916 
 917 /*
 918  * rootnex_map_fault()
 919  *
 920  *      fault in mappings for requestors
 921  */
 922 /*ARGSUSED*/
 923 static int
 924 rootnex_map_fault(dev_info_t *dip, dev_info_t *rdip, struct hat *hat,
 925     struct seg *seg, caddr_t addr, struct devpage *dp, pfn_t pfn, uint_t prot,
 926     uint_t lock)
 927 {
 928 
 929 #ifdef  DDI_MAP_DEBUG
 930         ddi_map_debug("rootnex_map_fault: address <%x> pfn <%x>", addr, pfn);
 931         ddi_map_debug(" Seg <%s>\n",
 932             seg->s_ops == &segdev_ops ? "segdev" :
 933             seg == &kvseg ? "segkmem" : "NONE!");
 934 #endif  /* DDI_MAP_DEBUG */
 935 
 936         /*
 937          * This is all terribly broken, but it is a start
 938          *
 939          * XXX  Note that this test means that segdev_ops
 940          *      must be exported from seg_dev.c.
 941          * XXX  What about devices with their own segment drivers?
 942          */
 943         if (seg->s_ops == &segdev_ops) {
 944                 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 945 
 946                 if (hat == NULL) {
 947                         /*
 948                          * This is one plausible interpretation of
 949                          * a null hat i.e. use the first hat on the
 950                          * address space hat list which by convention is
 951                          * the hat of the system MMU.  At alternative
 952                          * would be to panic .. this might well be better ..
 953                          */
 954                         ASSERT(AS_READ_HELD(seg->s_as));
 955                         hat = seg->s_as->a_hat;
 956                         cmn_err(CE_NOTE, "rootnex_map_fault: nil hat");
 957                 }
 958                 hat_devload(hat, addr, MMU_PAGESIZE, pfn, prot | sdp->hat_attr,
 959                     (lock ? HAT_LOAD_LOCK : HAT_LOAD));
 960         } else if (seg == &kvseg && dp == NULL) {
 961                 hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pfn, prot,
 962                     HAT_LOAD_LOCK);
 963         } else
 964                 return (DDI_FAILURE);
 965         return (DDI_SUCCESS);
 966 }
 967 
 968 
 969 static int
 970 rootnex_map_regspec(ddi_map_req_t *mp, caddr_t *vaddrp)
 971 {
 972         rootnex_addr_t rbase;
 973         void *cvaddr;
 974         uint64_t npages, pgoffset;
 975         struct regspec64 *rp;
 976         ddi_acc_hdl_t *hp;
 977         ddi_acc_impl_t *ap;
 978         uint_t  hat_acc_flags;
 979         paddr_t pbase;
 980 
 981         ASSERT(mp->map_flags & DDI_MF_EXT_REGSPEC);
 982         rp = (struct regspec64 *)mp->map_obj.rp;
 983         hp = mp->map_handlep;
 984 
 985 #ifdef  DDI_MAP_DEBUG
 986         ddi_map_debug(
 987             "rootnex_map_regspec: <0x%x 0x%x 0x%x> handle 0x%x\n",
 988             rp->regspec_bustype, rp->regspec_addr,
 989             rp->regspec_size, mp->map_handlep);
 990 #endif  /* DDI_MAP_DEBUG */
 991 
 992         /*
 993          * I/O or memory mapping
 994          *
 995          *      <bustype=0, addr=x, len=x>: memory
 996          *      <bustype=1, addr=x, len=x>: i/o
 997          *      <bustype>1, addr=0, len=x>: x86-compatibility i/o
 998          */
 999 
1000         if (rp->regspec_bustype > 1 && rp->regspec_addr != 0) {
1001                 cmn_err(CE_WARN, "rootnex: invalid register spec"
1002                     " <0x%" PRIx64 ", 0x%" PRIx64", 0x%" PRIx64">",
1003                     rp->regspec_bustype, rp->regspec_addr, rp->regspec_size);
1004                 return (DDI_FAILURE);
1005         }
1006 
1007         if (rp->regspec_bustype != 0) {
1008                 /*
1009                  * I/O space - needs a handle.
1010                  */
1011                 if (hp == NULL) {
1012                         return (DDI_FAILURE);
1013                 }
1014                 ap = (ddi_acc_impl_t *)hp->ah_platform_private;
1015                 ap->ahi_acc_attr |= DDI_ACCATTR_IO_SPACE;
1016                 impl_acc_hdl_init(hp);
1017 
1018                 if (mp->map_flags & DDI_MF_DEVICE_MAPPING) {
1019 #ifdef  DDI_MAP_DEBUG
1020                         ddi_map_debug("rootnex_map_regspec: mmap() "
1021                             "to I/O space is not supported.\n");
1022 #endif  /* DDI_MAP_DEBUG */
1023                         return (DDI_ME_INVAL);
1024                 } else {
1025                         /*
1026                          * 1275-compliant vs. compatibility i/o mapping
1027                          */
1028                         *vaddrp =
1029                             (rp->regspec_bustype > 1 && rp->regspec_addr == 0) ?
1030                             ((caddr_t)(uintptr_t)rp->regspec_bustype) :
1031                             ((caddr_t)(uintptr_t)rp->regspec_addr);
1032 #ifdef __xpv
1033                         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1034                                 hp->ah_pfn = xen_assign_pfn(
1035                                     mmu_btop((ulong_t)rp->regspec_addr &
1036                                     MMU_PAGEMASK));
1037                         } else {
1038                                 hp->ah_pfn = mmu_btop(
1039                                     (ulong_t)rp->regspec_addr & MMU_PAGEMASK);
1040                         }
1041 #else
1042                         hp->ah_pfn = mmu_btop((ulong_t)rp->regspec_addr &
1043                             MMU_PAGEMASK);
1044 #endif
1045                         hp->ah_pnum = mmu_btopr(rp->regspec_size +
1046                             (ulong_t)rp->regspec_addr & MMU_PAGEOFFSET);
1047                 }
1048 
1049 #ifdef  DDI_MAP_DEBUG
1050                 ddi_map_debug(
1051             "rootnex_map_regspec: \"Mapping\" %d bytes I/O space at 0x%x\n",
1052                     rp->regspec_size, *vaddrp);
1053 #endif  /* DDI_MAP_DEBUG */
1054                 return (DDI_SUCCESS);
1055         }
1056 
1057         /*
1058          * Memory space
1059          */
1060 
1061         if (hp != NULL) {
1062                 /*
1063                  * hat layer ignores
1064                  * hp->ah_acc.devacc_attr_endian_flags.
1065                  */
1066                 switch (hp->ah_acc.devacc_attr_dataorder) {
1067                 case DDI_STRICTORDER_ACC:
1068                         hat_acc_flags = HAT_STRICTORDER;
1069                         break;
1070                 case DDI_UNORDERED_OK_ACC:
1071                         hat_acc_flags = HAT_UNORDERED_OK;
1072                         break;
1073                 case DDI_MERGING_OK_ACC:
1074                         hat_acc_flags = HAT_MERGING_OK;
1075                         break;
1076                 case DDI_LOADCACHING_OK_ACC:
1077                         hat_acc_flags = HAT_LOADCACHING_OK;
1078                         break;
1079                 case DDI_STORECACHING_OK_ACC:
1080                         hat_acc_flags = HAT_STORECACHING_OK;
1081                         break;
1082                 }
1083                 ap = (ddi_acc_impl_t *)hp->ah_platform_private;
1084                 ap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1085                 impl_acc_hdl_init(hp);
1086                 hp->ah_hat_flags = hat_acc_flags;
1087         } else {
1088                 hat_acc_flags = HAT_STRICTORDER;
1089         }
1090 
1091         rbase = (rootnex_addr_t)(rp->regspec_addr & MMU_PAGEMASK);
1092 #ifdef __xpv
1093         /*
1094          * If we're dom0, we're using a real device so we need to translate
1095          * the MA to a PA.
1096          */
1097         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1098                 pbase = pfn_to_pa(xen_assign_pfn(mmu_btop(rbase)));
1099         } else {
1100                 pbase = rbase;
1101         }
1102 #else
1103         pbase = rbase;
1104 #endif
1105         pgoffset = (ulong_t)rp->regspec_addr & MMU_PAGEOFFSET;
1106 
1107         if (rp->regspec_size == 0) {
1108 #ifdef  DDI_MAP_DEBUG
1109                 ddi_map_debug("rootnex_map_regspec: zero regspec_size\n");
1110 #endif  /* DDI_MAP_DEBUG */
1111                 return (DDI_ME_INVAL);
1112         }
1113 
1114         if (mp->map_flags & DDI_MF_DEVICE_MAPPING) {
1115                 /* extra cast to make gcc happy */
1116                 *vaddrp = (caddr_t)((uintptr_t)mmu_btop(pbase));
1117         } else {
1118                 npages = mmu_btopr(rp->regspec_size + pgoffset);
1119 
1120 #ifdef  DDI_MAP_DEBUG
1121                 ddi_map_debug("rootnex_map_regspec: Mapping %d pages "
1122                     "physical %llx", npages, pbase);
1123 #endif  /* DDI_MAP_DEBUG */
1124 
1125                 cvaddr = device_arena_alloc(ptob(npages), VM_NOSLEEP);
1126                 if (cvaddr == NULL)
1127                         return (DDI_ME_NORESOURCES);
1128 
1129                 /*
1130                  * Now map in the pages we've allocated...
1131                  */
1132                 hat_devload(kas.a_hat, cvaddr, mmu_ptob(npages),
1133                     mmu_btop(pbase), mp->map_prot | hat_acc_flags,
1134                     HAT_LOAD_LOCK);
1135                 *vaddrp = (caddr_t)cvaddr + pgoffset;
1136 
1137                 /* save away pfn and npages for FMA */
1138                 hp = mp->map_handlep;
1139                 if (hp) {
1140                         hp->ah_pfn = mmu_btop(pbase);
1141                         hp->ah_pnum = npages;
1142                 }
1143         }
1144 
1145 #ifdef  DDI_MAP_DEBUG
1146         ddi_map_debug("at virtual 0x%x\n", *vaddrp);
1147 #endif  /* DDI_MAP_DEBUG */
1148         return (DDI_SUCCESS);
1149 }
1150 
1151 
1152 static int
1153 rootnex_unmap_regspec(ddi_map_req_t *mp, caddr_t *vaddrp)
1154 {
1155         caddr_t addr = (caddr_t)*vaddrp;
1156         uint64_t npages, pgoffset;
1157         struct regspec64 *rp;
1158 
1159         if (mp->map_flags & DDI_MF_DEVICE_MAPPING)
1160                 return (0);
1161 
1162         ASSERT(mp->map_flags & DDI_MF_EXT_REGSPEC);
1163         rp = (struct regspec64 *)mp->map_obj.rp;
1164 
1165         if (rp->regspec_size == 0) {
1166 #ifdef  DDI_MAP_DEBUG
1167                 ddi_map_debug("rootnex_unmap_regspec: zero regspec_size\n");
1168 #endif  /* DDI_MAP_DEBUG */
1169                 return (DDI_ME_INVAL);
1170         }
1171 
1172         /*
1173          * I/O or memory mapping:
1174          *
1175          *      <bustype=0, addr=x, len=x>: memory
1176          *      <bustype=1, addr=x, len=x>: i/o
1177          *      <bustype>1, addr=0, len=x>: x86-compatibility i/o
1178          */
1179         if (rp->regspec_bustype != 0) {
1180                 /*
1181                  * This is I/O space, which requires no particular
1182                  * processing on unmap since it isn't mapped in the
1183                  * first place.
1184                  */
1185                 return (DDI_SUCCESS);
1186         }
1187 
1188         /*
1189          * Memory space
1190          */
1191         pgoffset = (uintptr_t)addr & MMU_PAGEOFFSET;
1192         npages = mmu_btopr(rp->regspec_size + pgoffset);
1193         hat_unload(kas.a_hat, addr - pgoffset, ptob(npages), HAT_UNLOAD_UNLOCK);
1194         device_arena_free(addr - pgoffset, ptob(npages));
1195 
1196         /*
1197          * Destroy the pointer - the mapping has logically gone
1198          */
1199         *vaddrp = NULL;
1200 
1201         return (DDI_SUCCESS);
1202 }
1203 
1204 static int
1205 rootnex_map_handle(ddi_map_req_t *mp)
1206 {
1207         rootnex_addr_t rbase;
1208         ddi_acc_hdl_t *hp;
1209         uint64_t pgoffset;
1210         struct regspec64 *rp;
1211         paddr_t pbase;
1212 
1213         rp = (struct regspec64 *)mp->map_obj.rp;
1214 
1215 #ifdef  DDI_MAP_DEBUG
1216         ddi_map_debug(
1217             "rootnex_map_handle: <0x%x 0x%x 0x%x> handle 0x%x\n",
1218             rp->regspec_bustype, rp->regspec_addr,
1219             rp->regspec_size, mp->map_handlep);
1220 #endif  /* DDI_MAP_DEBUG */
1221 
1222         /*
1223          * I/O or memory mapping:
1224          *
1225          *      <bustype=0, addr=x, len=x>: memory
1226          *      <bustype=1, addr=x, len=x>: i/o
1227          *      <bustype>1, addr=0, len=x>: x86-compatibility i/o
1228          */
1229         if (rp->regspec_bustype != 0) {
1230                 /*
1231                  * This refers to I/O space, and we don't support "mapping"
1232                  * I/O space to a user.
1233                  */
1234                 return (DDI_FAILURE);
1235         }
1236 
1237         /*
1238          * Set up the hat_flags for the mapping.
1239          */
1240         hp = mp->map_handlep;
1241 
1242         switch (hp->ah_acc.devacc_attr_endian_flags) {
1243         case DDI_NEVERSWAP_ACC:
1244                 hp->ah_hat_flags = HAT_NEVERSWAP | HAT_STRICTORDER;
1245                 break;
1246         case DDI_STRUCTURE_LE_ACC:
1247                 hp->ah_hat_flags = HAT_STRUCTURE_LE;
1248                 break;
1249         case DDI_STRUCTURE_BE_ACC:
1250                 return (DDI_FAILURE);
1251         default:
1252                 return (DDI_REGS_ACC_CONFLICT);
1253         }
1254 
1255         switch (hp->ah_acc.devacc_attr_dataorder) {
1256         case DDI_STRICTORDER_ACC:
1257                 break;
1258         case DDI_UNORDERED_OK_ACC:
1259                 hp->ah_hat_flags |= HAT_UNORDERED_OK;
1260                 break;
1261         case DDI_MERGING_OK_ACC:
1262                 hp->ah_hat_flags |= HAT_MERGING_OK;
1263                 break;
1264         case DDI_LOADCACHING_OK_ACC:
1265                 hp->ah_hat_flags |= HAT_LOADCACHING_OK;
1266                 break;
1267         case DDI_STORECACHING_OK_ACC:
1268                 hp->ah_hat_flags |= HAT_STORECACHING_OK;
1269                 break;
1270         default:
1271                 return (DDI_FAILURE);
1272         }
1273 
1274         rbase = (rootnex_addr_t)rp->regspec_addr &
1275             (~(rootnex_addr_t)MMU_PAGEOFFSET);
1276         pgoffset = (ulong_t)rp->regspec_addr & MMU_PAGEOFFSET;
1277 
1278         if (rp->regspec_size == 0)
1279                 return (DDI_ME_INVAL);
1280 
1281 #ifdef __xpv
1282         /*
1283          * If we're dom0, we're using a real device so we need to translate
1284          * the MA to a PA.
1285          */
1286         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1287                 pbase = pfn_to_pa(xen_assign_pfn(mmu_btop(rbase))) |
1288                     (rbase & MMU_PAGEOFFSET);
1289         } else {
1290                 pbase = rbase;
1291         }
1292 #else
1293         pbase = rbase;
1294 #endif
1295 
1296         hp->ah_pfn = mmu_btop(pbase);
1297         hp->ah_pnum = mmu_btopr(rp->regspec_size + pgoffset);
1298 
1299         return (DDI_SUCCESS);
1300 }
1301 
1302 
1303 
1304 /*
1305  * ************************
1306  *  interrupt related code
1307  * ************************
1308  */
1309 
1310 /*
1311  * rootnex_intr_ops()
1312  *      bus_intr_op() function for interrupt support
1313  */
1314 /* ARGSUSED */
1315 static int
1316 rootnex_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1317     ddi_intr_handle_impl_t *hdlp, void *result)
1318 {
1319         struct intrspec                 *ispec;
1320 
1321         DDI_INTR_NEXDBG((CE_CONT,
1322             "rootnex_intr_ops: pdip = %p, rdip = %p, intr_op = %x, hdlp = %p\n",
1323             (void *)pdip, (void *)rdip, intr_op, (void *)hdlp));
1324 
1325         /* Process the interrupt operation */
1326         switch (intr_op) {
1327         case DDI_INTROP_GETCAP:
1328                 /* First check with pcplusmp */
1329                 if (psm_intr_ops == NULL)
1330                         return (DDI_FAILURE);
1331 
1332                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_GET_CAP, result)) {
1333                         *(int *)result = 0;
1334                         return (DDI_FAILURE);
1335                 }
1336                 break;
1337         case DDI_INTROP_SETCAP:
1338                 if (psm_intr_ops == NULL)
1339                         return (DDI_FAILURE);
1340 
1341                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_SET_CAP, result))
1342                         return (DDI_FAILURE);
1343                 break;
1344         case DDI_INTROP_ALLOC:
1345                 ASSERT(hdlp->ih_type == DDI_INTR_TYPE_FIXED);
1346                 return (rootnex_alloc_intr_fixed(rdip, hdlp, result));
1347         case DDI_INTROP_FREE:
1348                 ASSERT(hdlp->ih_type == DDI_INTR_TYPE_FIXED);
1349                 return (rootnex_free_intr_fixed(rdip, hdlp));
1350         case DDI_INTROP_GETPRI:
1351                 if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1352                         return (DDI_FAILURE);
1353                 *(int *)result = ispec->intrspec_pri;
1354                 break;
1355         case DDI_INTROP_SETPRI:
1356                 /* Validate the interrupt priority passed to us */
1357                 if (*(int *)result > LOCK_LEVEL)
1358                         return (DDI_FAILURE);
1359 
1360                 /* Ensure that PSM is all initialized and ispec is ok */
1361                 if ((psm_intr_ops == NULL) ||
1362                     ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL))
1363                         return (DDI_FAILURE);
1364 
1365                 /* Change the priority */
1366                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_SET_PRI, result) ==
1367                     PSM_FAILURE)
1368                         return (DDI_FAILURE);
1369 
1370                 /* update the ispec with the new priority */
1371                 ispec->intrspec_pri =  *(int *)result;
1372                 break;
1373         case DDI_INTROP_ADDISR:
1374                 if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1375                         return (DDI_FAILURE);
1376                 ispec->intrspec_func = hdlp->ih_cb_func;
1377                 break;
1378         case DDI_INTROP_REMISR:
1379                 if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1380                         return (DDI_FAILURE);
1381                 ispec->intrspec_func = (uint_t (*)()) 0;
1382                 break;
1383         case DDI_INTROP_ENABLE:
1384                 if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1385                         return (DDI_FAILURE);
1386 
1387                 /* Call psmi to translate irq with the dip */
1388                 if (psm_intr_ops == NULL)
1389                         return (DDI_FAILURE);
1390 
1391                 ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp = ispec;
1392                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_XLATE_VECTOR,
1393                     (int *)&hdlp->ih_vector) == PSM_FAILURE)
1394                         return (DDI_FAILURE);
1395 
1396                 /* Add the interrupt handler */
1397                 if (!add_avintr((void *)hdlp, ispec->intrspec_pri,
1398                     hdlp->ih_cb_func, DEVI(rdip)->devi_name, hdlp->ih_vector,
1399                     hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, NULL, rdip))
1400                         return (DDI_FAILURE);
1401                 break;
1402         case DDI_INTROP_DISABLE:
1403                 if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1404                         return (DDI_FAILURE);
1405 
1406                 /* Call psm_ops() to translate irq with the dip */
1407                 if (psm_intr_ops == NULL)
1408                         return (DDI_FAILURE);
1409 
1410                 ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp = ispec;
1411                 (void) (*psm_intr_ops)(rdip, hdlp,
1412                     PSM_INTR_OP_XLATE_VECTOR, (int *)&hdlp->ih_vector);
1413 
1414                 /* Remove the interrupt handler */
1415                 rem_avintr((void *)hdlp, ispec->intrspec_pri,
1416                     hdlp->ih_cb_func, hdlp->ih_vector);
1417                 break;
1418         case DDI_INTROP_SETMASK:
1419                 if (psm_intr_ops == NULL)
1420                         return (DDI_FAILURE);
1421 
1422                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_SET_MASK, NULL))
1423                         return (DDI_FAILURE);
1424                 break;
1425         case DDI_INTROP_CLRMASK:
1426                 if (psm_intr_ops == NULL)
1427                         return (DDI_FAILURE);
1428 
1429                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_CLEAR_MASK, NULL))
1430                         return (DDI_FAILURE);
1431                 break;
1432         case DDI_INTROP_GETPENDING:
1433                 if (psm_intr_ops == NULL)
1434                         return (DDI_FAILURE);
1435 
1436                 if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_GET_PENDING,
1437                     result)) {
1438                         *(int *)result = 0;
1439                         return (DDI_FAILURE);
1440                 }
1441                 break;
1442         case DDI_INTROP_NAVAIL:
1443         case DDI_INTROP_NINTRS:
1444                 *(int *)result = i_ddi_get_intx_nintrs(rdip);
1445                 if (*(int *)result == 0) {
1446                         /*
1447                          * Special case for 'pcic' driver' only. This driver
1448                          * driver is a child of 'isa' and 'rootnex' drivers.
1449                          *
1450                          * See detailed comments on this in the function
1451                          * rootnex_get_ispec().
1452                          *
1453                          * Children of 'pcic' send 'NINITR' request all the
1454                          * way to rootnex driver. But, the 'pdp->par_nintr'
1455                          * field may not initialized. So, we fake it here
1456                          * to return 1 (a la what PCMCIA nexus does).
1457                          */
1458                         if (strcmp(ddi_get_name(rdip), "pcic") == 0)
1459                                 *(int *)result = 1;
1460                         else
1461                                 return (DDI_FAILURE);
1462                 }
1463                 break;
1464         case DDI_INTROP_SUPPORTED_TYPES:
1465                 *(int *)result = DDI_INTR_TYPE_FIXED;   /* Always ... */
1466                 break;
1467         default:
1468                 return (DDI_FAILURE);
1469         }
1470 
1471         return (DDI_SUCCESS);
1472 }
1473 
1474 
1475 /*
1476  * rootnex_get_ispec()
1477  *      convert an interrupt number to an interrupt specification.
1478  *      The interrupt number determines which interrupt spec will be
1479  *      returned if more than one exists.
1480  *
1481  *      Look into the parent private data area of the 'rdip' to find out
1482  *      the interrupt specification.  First check to make sure there is
1483  *      one that matchs "inumber" and then return a pointer to it.
1484  *
1485  *      Return NULL if one could not be found.
1486  *
1487  *      NOTE: This is needed for rootnex_intr_ops()
1488  */
1489 static struct intrspec *
1490 rootnex_get_ispec(dev_info_t *rdip, int inum)
1491 {
1492         struct ddi_parent_private_data *pdp = ddi_get_parent_data(rdip);
1493 
1494         /*
1495          * Special case handling for drivers that provide their own
1496          * intrspec structures instead of relying on the DDI framework.
1497          *
1498          * A broken hardware driver in ON could potentially provide its
1499          * own intrspec structure, instead of relying on the hardware.
1500          * If these drivers are children of 'rootnex' then we need to
1501          * continue to provide backward compatibility to them here.
1502          *
1503          * Following check is a special case for 'pcic' driver which
1504          * was found to have broken hardwre andby provides its own intrspec.
1505          *
1506          * Verbatim comments from this driver are shown here:
1507          * "Don't use the ddi_add_intr since we don't have a
1508          * default intrspec in all cases."
1509          *
1510          * Since an 'ispec' may not be always created for it,
1511          * check for that and create one if so.
1512          *
1513          * NOTE: Currently 'pcic' is the only driver found to do this.
1514          */
1515         if (!pdp->par_intr && strcmp(ddi_get_name(rdip), "pcic") == 0) {
1516                 pdp->par_nintr = 1;
1517                 pdp->par_intr = kmem_zalloc(sizeof (struct intrspec) *
1518                     pdp->par_nintr, KM_SLEEP);
1519         }
1520 
1521         /* Validate the interrupt number */
1522         if (inum >= pdp->par_nintr)
1523                 return (NULL);
1524 
1525         /* Get the interrupt structure pointer and return that */
1526         return ((struct intrspec *)&pdp->par_intr[inum]);
1527 }
1528 
1529 /*
1530  * Allocate interrupt vector for FIXED (legacy) type.
1531  */
1532 static int
1533 rootnex_alloc_intr_fixed(dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp,
1534     void *result)
1535 {
1536         struct intrspec         *ispec;
1537         ddi_intr_handle_impl_t  info_hdl;
1538         int                     ret;
1539         int                     free_phdl = 0;
1540         apic_get_type_t         type_info;
1541 
1542         if (psm_intr_ops == NULL)
1543                 return (DDI_FAILURE);
1544 
1545         if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1546                 return (DDI_FAILURE);
1547 
1548         /*
1549          * If the PSM module is "APIX" then pass the request for it
1550          * to allocate the vector now.
1551          */
1552         bzero(&info_hdl, sizeof (ddi_intr_handle_impl_t));
1553         info_hdl.ih_private = &type_info;
1554         if ((*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE, NULL) ==
1555             PSM_SUCCESS && strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
1556                 if (hdlp->ih_private == NULL) { /* allocate phdl structure */
1557                         free_phdl = 1;
1558                         i_ddi_alloc_intr_phdl(hdlp);
1559                 }
1560                 ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp = ispec;
1561                 ret = (*psm_intr_ops)(rdip, hdlp,
1562                     PSM_INTR_OP_ALLOC_VECTORS, result);
1563                 if (free_phdl) { /* free up the phdl structure */
1564                         free_phdl = 0;
1565                         i_ddi_free_intr_phdl(hdlp);
1566                         hdlp->ih_private = NULL;
1567                 }
1568         } else {
1569                 /*
1570                  * No APIX module; fall back to the old scheme where the
1571                  * interrupt vector is allocated during ddi_enable_intr() call.
1572                  */
1573                 hdlp->ih_pri = ispec->intrspec_pri;
1574                 *(int *)result = hdlp->ih_scratch1;
1575                 ret = DDI_SUCCESS;
1576         }
1577 
1578         return (ret);
1579 }
1580 
1581 /*
1582  * Free up interrupt vector for FIXED (legacy) type.
1583  */
1584 static int
1585 rootnex_free_intr_fixed(dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp)
1586 {
1587         struct intrspec                 *ispec;
1588         struct ddi_parent_private_data  *pdp;
1589         ddi_intr_handle_impl_t          info_hdl;
1590         int                             ret;
1591         apic_get_type_t                 type_info;
1592 
1593         if (psm_intr_ops == NULL)
1594                 return (DDI_FAILURE);
1595 
1596         /*
1597          * If the PSM module is "APIX" then pass the request for it
1598          * to free up the vector now.
1599          */
1600         bzero(&info_hdl, sizeof (ddi_intr_handle_impl_t));
1601         info_hdl.ih_private = &type_info;
1602         if ((*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE, NULL) ==
1603             PSM_SUCCESS && strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
1604                 if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1605                         return (DDI_FAILURE);
1606                 ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp = ispec;
1607                 ret = (*psm_intr_ops)(rdip, hdlp,
1608                     PSM_INTR_OP_FREE_VECTORS, NULL);
1609         } else {
1610                 /*
1611                  * No APIX module; fall back to the old scheme where
1612                  * the interrupt vector was already freed during
1613                  * ddi_disable_intr() call.
1614                  */
1615                 ret = DDI_SUCCESS;
1616         }
1617 
1618         pdp = ddi_get_parent_data(rdip);
1619 
1620         /*
1621          * Special case for 'pcic' driver' only.
1622          * If an intrspec was created for it, clean it up here
1623          * See detailed comments on this in the function
1624          * rootnex_get_ispec().
1625          */
1626         if (pdp->par_intr && strcmp(ddi_get_name(rdip), "pcic") == 0) {
1627                 kmem_free(pdp->par_intr, sizeof (struct intrspec) *
1628                     pdp->par_nintr);
1629                 /*
1630                  * Set it to zero; so that
1631                  * DDI framework doesn't free it again
1632                  */
1633                 pdp->par_intr = NULL;
1634                 pdp->par_nintr = 0;
1635         }
1636 
1637         return (ret);
1638 }
1639 
1640 
1641 /*
1642  * ******************
1643  *  dma related code
1644  * ******************
1645  */
1646 
1647 /*ARGSUSED*/
1648 static int
1649 rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
1650     ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg,
1651     ddi_dma_handle_t *handlep)
1652 {
1653         uint64_t maxsegmentsize_ll;
1654         uint_t maxsegmentsize;
1655         ddi_dma_impl_t *hp;
1656         rootnex_dma_t *dma;
1657         uint64_t count_max;
1658         uint64_t seg;
1659         int kmflag;
1660         int e;
1661 
1662 
1663         /* convert our sleep flags */
1664         if (waitfp == DDI_DMA_SLEEP) {
1665                 kmflag = KM_SLEEP;
1666         } else {
1667                 kmflag = KM_NOSLEEP;
1668         }
1669 
1670         /*
1671          * We try to do only one memory allocation here. We'll do a little
1672          * pointer manipulation later. If the bind ends up taking more than
1673          * our prealloc's space, we'll have to allocate more memory in the
1674          * bind operation. Not great, but much better than before and the
1675          * best we can do with the current bind interfaces.
1676          */
1677         hp = kmem_cache_alloc(rootnex_state->r_dmahdl_cache, kmflag);
1678         if (hp == NULL)
1679                 return (DDI_DMA_NORESOURCES);
1680 
1681         /* Do our pointer manipulation now, align the structures */
1682         hp->dmai_private = (void *)(((uintptr_t)hp +
1683             (uintptr_t)sizeof (ddi_dma_impl_t) + 0x7) & ~0x7);
1684         dma = (rootnex_dma_t *)hp->dmai_private;
1685         dma->dp_prealloc_buffer = (uchar_t *)(((uintptr_t)dma +
1686             sizeof (rootnex_dma_t) + 0x7) & ~0x7);
1687 
1688         /* setup the handle */
1689         rootnex_clean_dmahdl(hp);
1690         hp->dmai_error.err_fep = NULL;
1691         hp->dmai_error.err_cf = NULL;
1692         dma->dp_dip = rdip;
1693         dma->dp_sglinfo.si_flags = attr->dma_attr_flags;
1694         dma->dp_sglinfo.si_min_addr = attr->dma_attr_addr_lo;
1695 
1696         /*
1697          * The BOUNCE_ON_SEG workaround is not needed when an IOMMU
1698          * is being used. Set the upper limit to the seg value.
1699          * There will be enough DVMA space to always get addresses
1700          * that will match the constraints.
1701          */
1702         if (IOMMU_USED(rdip) &&
1703             (attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)) {
1704                 dma->dp_sglinfo.si_max_addr = attr->dma_attr_seg;
1705                 dma->dp_sglinfo.si_flags &= ~_DDI_DMA_BOUNCE_ON_SEG;
1706         } else
1707                 dma->dp_sglinfo.si_max_addr = attr->dma_attr_addr_hi;
1708 
1709         hp->dmai_minxfer = attr->dma_attr_minxfer;
1710         hp->dmai_burstsizes = attr->dma_attr_burstsizes;
1711         hp->dmai_rdip = rdip;
1712         hp->dmai_attr = *attr;
1713 
1714         if (attr->dma_attr_seg >= dma->dp_sglinfo.si_max_addr)
1715                 dma->dp_sglinfo.si_cancross = B_FALSE;
1716         else
1717                 dma->dp_sglinfo.si_cancross = B_TRUE;
1718 
1719         /* we don't need to worry about the SPL since we do a tryenter */
1720         mutex_init(&dma->dp_mutex, NULL, MUTEX_DRIVER, NULL);
1721 
1722         /*
1723          * Figure out our maximum segment size. If the segment size is greater
1724          * than 4G, we will limit it to (4G - 1) since the max size of a dma
1725          * object (ddi_dma_obj_t.dmao_size) is 32 bits. dma_attr_seg and
1726          * dma_attr_count_max are size-1 type values.
1727          *
1728          * Maximum segment size is the largest physically contiguous chunk of
1729          * memory that we can return from a bind (i.e. the maximum size of a
1730          * single cookie).
1731          */
1732 
1733         /* handle the rollover cases */
1734         seg = attr->dma_attr_seg + 1;
1735         if (seg < attr->dma_attr_seg) {
1736                 seg = attr->dma_attr_seg;
1737         }
1738         count_max = attr->dma_attr_count_max + 1;
1739         if (count_max < attr->dma_attr_count_max) {
1740                 count_max = attr->dma_attr_count_max;
1741         }
1742 
1743         /*
1744          * granularity may or may not be a power of two. If it isn't, we can't
1745          * use a simple mask.
1746          */
1747         if (!ISP2(attr->dma_attr_granular)) {
1748                 dma->dp_granularity_power_2 = B_FALSE;
1749         } else {
1750                 dma->dp_granularity_power_2 = B_TRUE;
1751         }
1752 
1753         /*
1754          * maxxfer should be a whole multiple of granularity. If we're going to
1755          * break up a window because we're greater than maxxfer, we might as
1756          * well make sure it's maxxfer is a whole multiple so we don't have to
1757          * worry about triming the window later on for this case.
1758          */
1759         if (attr->dma_attr_granular > 1) {
1760                 if (dma->dp_granularity_power_2) {
1761                         dma->dp_maxxfer = attr->dma_attr_maxxfer -
1762                             (attr->dma_attr_maxxfer &
1763                             (attr->dma_attr_granular - 1));
1764                 } else {
1765                         dma->dp_maxxfer = attr->dma_attr_maxxfer -
1766                             (attr->dma_attr_maxxfer % attr->dma_attr_granular);
1767                 }
1768         } else {
1769                 dma->dp_maxxfer = attr->dma_attr_maxxfer;
1770         }
1771 
1772         maxsegmentsize_ll = MIN(seg, dma->dp_maxxfer);
1773         maxsegmentsize_ll = MIN(maxsegmentsize_ll, count_max);
1774         if (maxsegmentsize_ll == 0 || (maxsegmentsize_ll > 0xFFFFFFFF)) {
1775                 maxsegmentsize = 0xFFFFFFFF;
1776         } else {
1777                 maxsegmentsize = maxsegmentsize_ll;
1778         }
1779         dma->dp_sglinfo.si_max_cookie_size = maxsegmentsize;
1780         dma->dp_sglinfo.si_segmask = attr->dma_attr_seg;
1781 
1782         /* check the ddi_dma_attr arg to make sure it makes a little sense */
1783         if (rootnex_alloc_check_parms) {
1784                 e = rootnex_valid_alloc_parms(attr, maxsegmentsize);
1785                 if (e != DDI_SUCCESS) {
1786                         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ALLOC_FAIL]);
1787                         (void) rootnex_dma_freehdl(dip, rdip,
1788                             (ddi_dma_handle_t)hp);
1789                         return (e);
1790                 }
1791         }
1792 
1793         *handlep = (ddi_dma_handle_t)hp;
1794 
1795         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1796         ROOTNEX_DPROBE1(rootnex__alloc__handle, uint64_t,
1797             rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1798 
1799         return (DDI_SUCCESS);
1800 }
1801 
1802 
1803 /*
1804  * rootnex_dma_allochdl()
1805  *    called from ddi_dma_alloc_handle().
1806  */
1807 static int
1808 rootnex_dma_allochdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
1809     int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *handlep)
1810 {
1811         int retval = DDI_SUCCESS;
1812 #if defined(__amd64) && !defined(__xpv)
1813 
1814         if (IOMMU_UNITIALIZED(rdip)) {
1815                 retval = iommulib_nex_open(dip, rdip);
1816 
1817                 if (retval != DDI_SUCCESS && retval != DDI_ENOTSUP)
1818                         return (retval);
1819         }
1820 
1821         if (IOMMU_UNUSED(rdip)) {
1822                 retval = rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
1823                     handlep);
1824         } else {
1825                 retval = iommulib_nexdma_allochdl(dip, rdip, attr,
1826                     waitfp, arg, handlep);
1827         }
1828 #else
1829         retval = rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
1830             handlep);
1831 #endif
1832         switch (retval) {
1833         case DDI_DMA_NORESOURCES:
1834                 if (waitfp != DDI_DMA_DONTWAIT) {
1835                         ddi_set_callback(waitfp, arg,
1836                             &rootnex_state->r_dvma_call_list_id);
1837                 }
1838                 break;
1839         case DDI_SUCCESS:
1840                 ndi_fmc_insert(rdip, DMA_HANDLE, *handlep, NULL);
1841                 break;
1842         default:
1843                 break;
1844         }
1845         return (retval);
1846 }
1847 
1848 /*ARGSUSED*/
1849 static int
1850 rootnex_coredma_freehdl(dev_info_t *dip, dev_info_t *rdip,
1851     ddi_dma_handle_t handle)
1852 {
1853         ddi_dma_impl_t *hp;
1854         rootnex_dma_t *dma;
1855 
1856 
1857         hp = (ddi_dma_impl_t *)handle;
1858         dma = (rootnex_dma_t *)hp->dmai_private;
1859 
1860         /* unbind should have been called first */
1861         ASSERT(!dma->dp_inuse);
1862 
1863         mutex_destroy(&dma->dp_mutex);
1864         kmem_cache_free(rootnex_state->r_dmahdl_cache, hp);
1865 
1866         ROOTNEX_DPROF_DEC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1867         ROOTNEX_DPROBE1(rootnex__free__handle, uint64_t,
1868             rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1869 
1870         return (DDI_SUCCESS);
1871 }
1872 
1873 /*
1874  * rootnex_dma_freehdl()
1875  *    called from ddi_dma_free_handle().
1876  */
1877 static int
1878 rootnex_dma_freehdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle)
1879 {
1880         int ret;
1881 
1882         ndi_fmc_remove(rdip, DMA_HANDLE, handle);
1883 #if defined(__amd64) && !defined(__xpv)
1884         if (IOMMU_USED(rdip))
1885                 ret = iommulib_nexdma_freehdl(dip, rdip, handle);
1886         else
1887 #endif
1888         ret = rootnex_coredma_freehdl(dip, rdip, handle);
1889 
1890         if (rootnex_state->r_dvma_call_list_id)
1891                 ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
1892 
1893         return (ret);
1894 }
1895 
1896 /*ARGSUSED*/
1897 static int
1898 rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
1899     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
1900     ddi_dma_cookie_t *cookiep, uint_t *ccountp)
1901 {
1902         rootnex_sglinfo_t *sinfo;
1903         ddi_dma_obj_t *dmao;
1904 #if defined(__amd64) && !defined(__xpv)
1905         struct dvmaseg *dvs;
1906         ddi_dma_cookie_t *cookie;
1907 #endif
1908         ddi_dma_attr_t *attr;
1909         ddi_dma_impl_t *hp;
1910         rootnex_dma_t *dma;
1911         int kmflag;
1912         int e;
1913         uint_t ncookies;
1914 
1915         hp = (ddi_dma_impl_t *)handle;
1916         dma = (rootnex_dma_t *)hp->dmai_private;
1917         dmao = &dma->dp_dma;
1918         sinfo = &dma->dp_sglinfo;
1919         attr = &hp->dmai_attr;
1920 
1921         /* convert the sleep flags */
1922         if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
1923                 dma->dp_sleep_flags = kmflag = KM_SLEEP;
1924         } else {
1925                 dma->dp_sleep_flags = kmflag = KM_NOSLEEP;
1926         }
1927 
1928         hp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
1929 
1930         /*
1931          * This is useful for debugging a driver. Not as useful in a production
1932          * system. The only time this will fail is if you have a driver bug.
1933          */
1934         if (rootnex_bind_check_inuse) {
1935                 /*
1936                  * No one else should ever have this lock unless someone else
1937                  * is trying to use this handle. So contention on the lock
1938                  * is the same as inuse being set.
1939                  */
1940                 e = mutex_tryenter(&dma->dp_mutex);
1941                 if (e == 0) {
1942                         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1943                         return (DDI_DMA_INUSE);
1944                 }
1945                 if (dma->dp_inuse) {
1946                         mutex_exit(&dma->dp_mutex);
1947                         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1948                         return (DDI_DMA_INUSE);
1949                 }
1950                 dma->dp_inuse = B_TRUE;
1951                 mutex_exit(&dma->dp_mutex);
1952         }
1953 
1954         /* check the ddi_dma_attr arg to make sure it makes a little sense */
1955         if (rootnex_bind_check_parms) {
1956                 e = rootnex_valid_bind_parms(dmareq, attr);
1957                 if (e != DDI_SUCCESS) {
1958                         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1959                         rootnex_clean_dmahdl(hp);
1960                         return (e);
1961                 }
1962         }
1963 
1964         /* save away the original bind info */
1965         dma->dp_dma = dmareq->dmar_object;
1966 
1967 #if defined(__amd64) && !defined(__xpv)
1968         if (IOMMU_USED(rdip)) {
1969                 dmao = &dma->dp_dvma;
1970                 e = iommulib_nexdma_mapobject(dip, rdip, handle, dmareq, dmao);
1971                 switch (e) {
1972                 case DDI_SUCCESS:
1973                         if (sinfo->si_cancross ||
1974                             dmao->dmao_obj.dvma_obj.dv_nseg != 1 ||
1975                             dmao->dmao_size > sinfo->si_max_cookie_size) {
1976                                 dma->dp_dvma_used = B_TRUE;
1977                                 break;
1978                         }
1979                         sinfo->si_sgl_size = 1;
1980                         hp->dmai_rflags |= DMP_NOSYNC;
1981 
1982                         dma->dp_dvma_used = B_TRUE;
1983                         dma->dp_need_to_free_cookie = B_FALSE;
1984 
1985                         dvs = &dmao->dmao_obj.dvma_obj.dv_seg[0];
1986                         cookie = hp->dmai_cookie = dma->dp_cookies =
1987                             (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
1988                         cookie->dmac_laddress = dvs->dvs_start +
1989                             dmao->dmao_obj.dvma_obj.dv_off;
1990                         cookie->dmac_size = dvs->dvs_len;
1991                         cookie->dmac_type = 0;
1992 
1993                         ROOTNEX_DPROBE1(rootnex__bind__dvmafast, dev_info_t *,
1994                             rdip);
1995                         goto fast;
1996                 case DDI_ENOTSUP:
1997                         break;
1998                 default:
1999                         rootnex_clean_dmahdl(hp);
2000                         return (e);
2001                 }
2002         }
2003 #endif
2004 
2005         /*
2006          * Figure out a rough estimate of what maximum number of pages
2007          * this buffer could use (a high estimate of course).
2008          */
2009         sinfo->si_max_pages = mmu_btopr(dma->dp_dma.dmao_size) + 1;
2010 
2011         if (dma->dp_dvma_used) {
2012                 /*
2013                  * The number of physical pages is the worst case.
2014                  *
2015                  * For DVMA, the worst case is the length divided
2016                  * by the maximum cookie length, plus 1. Add to that
2017                  * the number of segment boundaries potentially crossed, and
2018                  * the additional number of DVMA segments that was returned.
2019                  *
2020                  * In the normal case, for modern devices, si_cancross will
2021                  * be false, and dv_nseg will be 1, and the fast path will
2022                  * have been taken above.
2023                  */
2024                 ncookies = (dma->dp_dma.dmao_size / sinfo->si_max_cookie_size)
2025                     + 1;
2026                 if (sinfo->si_cancross)
2027                         ncookies +=
2028                             (dma->dp_dma.dmao_size / attr->dma_attr_seg) + 1;
2029                 ncookies += (dmao->dmao_obj.dvma_obj.dv_nseg - 1);
2030 
2031                 sinfo->si_max_pages = MIN(sinfo->si_max_pages, ncookies);
2032         }
2033 
2034         /*
2035          * We'll use the pre-allocated cookies for any bind that will *always*
2036          * fit (more important to be consistent, we don't want to create
2037          * additional degenerate cases).
2038          */
2039         if (sinfo->si_max_pages <= rootnex_state->r_prealloc_cookies) {
2040                 dma->dp_cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
2041                 dma->dp_need_to_free_cookie = B_FALSE;
2042                 ROOTNEX_DPROBE2(rootnex__bind__prealloc, dev_info_t *, rdip,
2043                     uint_t, sinfo->si_max_pages);
2044 
2045         /*
2046          * For anything larger than that, we'll go ahead and allocate the
2047          * maximum number of pages we expect to see. Hopefuly, we won't be
2048          * seeing this path in the fast path for high performance devices very
2049          * frequently.
2050          *
2051          * a ddi bind interface that allowed the driver to provide storage to
2052          * the bind interface would speed this case up.
2053          */
2054         } else {
2055                 /*
2056                  * Save away how much memory we allocated. If we're doing a
2057                  * nosleep, the alloc could fail...
2058                  */
2059                 dma->dp_cookie_size = sinfo->si_max_pages *
2060                     sizeof (ddi_dma_cookie_t);
2061                 dma->dp_cookies = kmem_alloc(dma->dp_cookie_size, kmflag);
2062                 if (dma->dp_cookies == NULL) {
2063                         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
2064                         rootnex_clean_dmahdl(hp);
2065                         return (DDI_DMA_NORESOURCES);
2066                 }
2067                 dma->dp_need_to_free_cookie = B_TRUE;
2068                 ROOTNEX_DPROBE2(rootnex__bind__alloc, dev_info_t *, rdip,
2069                     uint_t, sinfo->si_max_pages);
2070         }
2071         hp->dmai_cookie = dma->dp_cookies;
2072 
2073         /*
2074          * Get the real sgl. rootnex_get_sgl will fill in cookie array while
2075          * looking at the constraints in the dma structure. It will then put
2076          * some additional state about the sgl in the dma struct (i.e. is
2077          * the sgl clean, or do we need to do some munging; how many pages
2078          * need to be copied, etc.)
2079          */
2080         if (dma->dp_dvma_used)
2081                 rootnex_dvma_get_sgl(dmao, dma->dp_cookies, &dma->dp_sglinfo);
2082         else
2083                 rootnex_get_sgl(dmao, dma->dp_cookies, &dma->dp_sglinfo);
2084 
2085 out:
2086         ASSERT(sinfo->si_sgl_size <= sinfo->si_max_pages);
2087         /* if we don't need a copy buffer, we don't need to sync */
2088         if (sinfo->si_copybuf_req == 0) {
2089                 hp->dmai_rflags |= DMP_NOSYNC;
2090         }
2091 
2092         /*
2093          * if we don't need the copybuf and we don't need to do a partial,  we
2094          * hit the fast path. All the high performance devices should be trying
2095          * to hit this path. To hit this path, a device should be able to reach
2096          * all of memory, shouldn't try to bind more than it can transfer, and
2097          * the buffer shouldn't require more cookies than the driver/device can
2098          * handle [sgllen]).
2099          *
2100          * Note that negative values of dma_attr_sgllen are supposed
2101          * to mean unlimited, but we just cast them to mean a
2102          * "ridiculous large limit".  This saves some extra checks on
2103          * hot paths.
2104          */
2105         if ((sinfo->si_copybuf_req == 0) &&
2106             (sinfo->si_sgl_size <= (unsigned)attr->dma_attr_sgllen) &&
2107             (dmao->dmao_size <= dma->dp_maxxfer)) {
2108 fast:
2109                 /*
2110                  * If the driver supports FMA, insert the handle in the FMA DMA
2111                  * handle cache.
2112                  */
2113                 if (attr->dma_attr_flags & DDI_DMA_FLAGERR)
2114                         hp->dmai_error.err_cf = rootnex_dma_check;
2115 
2116                 /*
2117                  * copy out the first cookie and ccountp, set the cookie
2118                  * pointer to the second cookie. The first cookie is passed
2119                  * back on the stack. Additional cookies are accessed via
2120                  * ddi_dma_nextcookie()
2121                  */
2122                 *cookiep = dma->dp_cookies[0];
2123                 *ccountp = sinfo->si_sgl_size;
2124                 hp->dmai_cookie++;
2125                 hp->dmai_rflags &= ~DDI_DMA_PARTIAL;
2126                 ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2127                 ROOTNEX_DPROBE4(rootnex__bind__fast, dev_info_t *, rdip,
2128                     uint64_t, rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS],
2129                     uint_t, dmao->dmao_size, uint_t, *ccountp);
2130 
2131 
2132                 return (DDI_DMA_MAPPED);
2133         }
2134 
2135         /*
2136          * go to the slow path, we may need to alloc more memory, create
2137          * multiple windows, and munge up a sgl to make the device happy.
2138          */
2139 
2140         /*
2141          * With the IOMMU mapobject method used, we should never hit
2142          * the slow path. If we do, something is seriously wrong.
2143          * Clean up and return an error.
2144          */
2145 
2146 #if defined(__amd64) && !defined(__xpv)
2147 
2148         if (dma->dp_dvma_used) {
2149                 (void) iommulib_nexdma_unmapobject(dip, rdip, handle,
2150                     &dma->dp_dvma);
2151                 e = DDI_DMA_NOMAPPING;
2152         } else {
2153 #endif
2154                 e = rootnex_bind_slowpath(hp, dmareq, dma, attr, &dma->dp_dma,
2155                     kmflag);
2156 #if defined(__amd64) && !defined(__xpv)
2157         }
2158 #endif
2159         if ((e != DDI_DMA_MAPPED) && (e != DDI_DMA_PARTIAL_MAP)) {
2160                 if (dma->dp_need_to_free_cookie) {
2161                         kmem_free(dma->dp_cookies, dma->dp_cookie_size);
2162                 }
2163                 ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
2164                 rootnex_clean_dmahdl(hp); /* must be after free cookie */
2165                 return (e);
2166         }
2167 
2168         /*
2169          * If the driver supports FMA, insert the handle in the FMA DMA handle
2170          * cache.
2171          */
2172         if (attr->dma_attr_flags & DDI_DMA_FLAGERR)
2173                 hp->dmai_error.err_cf = rootnex_dma_check;
2174 
2175         /* if the first window uses the copy buffer, sync it for the device */
2176         if ((dma->dp_window[dma->dp_current_win].wd_dosync) &&
2177             (hp->dmai_rflags & DDI_DMA_WRITE)) {
2178                 (void) rootnex_coredma_sync(dip, rdip, handle, 0, 0,
2179                     DDI_DMA_SYNC_FORDEV);
2180         }
2181 
2182         /*
2183          * copy out the first cookie and ccountp, set the cookie pointer to the
2184          * second cookie. Make sure the partial flag is set/cleared correctly.
2185          * If we have a partial map (i.e. multiple windows), the number of
2186          * cookies we return is the number of cookies in the first window.
2187          */
2188         if (e == DDI_DMA_MAPPED) {
2189                 hp->dmai_rflags &= ~DDI_DMA_PARTIAL;
2190                 *ccountp = sinfo->si_sgl_size;
2191                 hp->dmai_nwin = 1;
2192         } else {
2193                 hp->dmai_rflags |= DDI_DMA_PARTIAL;
2194                 *ccountp = dma->dp_window[dma->dp_current_win].wd_cookie_cnt;
2195                 ASSERT(hp->dmai_nwin <= dma->dp_max_win);
2196         }
2197         *cookiep = dma->dp_cookies[0];
2198         hp->dmai_cookie++;
2199 
2200         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2201         ROOTNEX_DPROBE4(rootnex__bind__slow, dev_info_t *, rdip, uint64_t,
2202             rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS], uint_t,
2203             dmao->dmao_size, uint_t, *ccountp);
2204         return (e);
2205 }
2206 
2207 /*
2208  * rootnex_dma_bindhdl()
2209  *    called from ddi_dma_addr_bind_handle() and ddi_dma_buf_bind_handle().
2210  */
2211 static int
2212 rootnex_dma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
2213     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
2214     ddi_dma_cookie_t *cookiep, uint_t *ccountp)
2215 {
2216         int ret;
2217 #if defined(__amd64) && !defined(__xpv)
2218         if (IOMMU_USED(rdip))
2219                 ret = iommulib_nexdma_bindhdl(dip, rdip, handle, dmareq,
2220                     cookiep, ccountp);
2221         else
2222 #endif
2223         ret = rootnex_coredma_bindhdl(dip, rdip, handle, dmareq,
2224             cookiep, ccountp);
2225 
2226         if (ret == DDI_DMA_NORESOURCES && dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
2227                 ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
2228                     &rootnex_state->r_dvma_call_list_id);
2229         }
2230 
2231         return (ret);
2232 }
2233 
2234 
2235 
2236 /*ARGSUSED*/
2237 static int
2238 rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
2239     ddi_dma_handle_t handle)
2240 {
2241         ddi_dma_impl_t *hp;
2242         rootnex_dma_t *dma;
2243         int e;
2244 
2245         hp = (ddi_dma_impl_t *)handle;
2246         dma = (rootnex_dma_t *)hp->dmai_private;
2247 
2248         /* make sure the buffer wasn't free'd before calling unbind */
2249         if (rootnex_unbind_verify_buffer) {
2250                 e = rootnex_verify_buffer(dma);
2251                 if (e != DDI_SUCCESS) {
2252                         ASSERT(0);
2253                         return (DDI_FAILURE);
2254                 }
2255         }
2256 
2257         /* sync the current window before unbinding the buffer */
2258         if (dma->dp_window && dma->dp_window[dma->dp_current_win].wd_dosync &&
2259             (hp->dmai_rflags & DDI_DMA_READ)) {
2260                 (void) rootnex_coredma_sync(dip, rdip, handle, 0, 0,
2261                     DDI_DMA_SYNC_FORCPU);
2262         }
2263 
2264         /*
2265          * cleanup and copy buffer or window state. if we didn't use the copy
2266          * buffer or windows, there won't be much to do :-)
2267          */
2268         rootnex_teardown_copybuf(dma);
2269         rootnex_teardown_windows(dma);
2270 
2271 #if defined(__amd64) && !defined(__xpv)
2272         if (IOMMU_USED(rdip) && dma->dp_dvma_used)
2273                 (void) iommulib_nexdma_unmapobject(dip, rdip, handle,
2274                     &dma->dp_dvma);
2275 #endif
2276 
2277         /*
2278          * If we had to allocate space to for the worse case sgl (it didn't
2279          * fit into our pre-allocate buffer), free that up now
2280          */
2281         if (dma->dp_need_to_free_cookie) {
2282                 kmem_free(dma->dp_cookies, dma->dp_cookie_size);
2283         }
2284 
2285         /*
2286          * clean up the handle so it's ready for the next bind (i.e. if the
2287          * handle is reused).
2288          */
2289         rootnex_clean_dmahdl(hp);
2290         hp->dmai_error.err_cf = NULL;
2291 
2292         ROOTNEX_DPROF_DEC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2293         ROOTNEX_DPROBE1(rootnex__unbind, uint64_t,
2294             rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2295 
2296         return (DDI_SUCCESS);
2297 }
2298 
2299 /*
2300  * rootnex_dma_unbindhdl()
2301  *    called from ddi_dma_unbind_handle()
2302  */
2303 /*ARGSUSED*/
2304 static int
2305 rootnex_dma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
2306     ddi_dma_handle_t handle)
2307 {
2308         int ret;
2309 
2310 #if defined(__amd64) && !defined(__xpv)
2311         if (IOMMU_USED(rdip))
2312                 ret = iommulib_nexdma_unbindhdl(dip, rdip, handle);
2313         else
2314 #endif
2315         ret = rootnex_coredma_unbindhdl(dip, rdip, handle);
2316 
2317         if (rootnex_state->r_dvma_call_list_id)
2318                 ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
2319 
2320         return (ret);
2321 }
2322 
2323 #if defined(__amd64) && !defined(__xpv)
2324 
2325 static int
2326 rootnex_coredma_get_sleep_flags(ddi_dma_handle_t handle)
2327 {
2328         ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2329         rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2330 
2331         if (dma->dp_sleep_flags != KM_SLEEP &&
2332             dma->dp_sleep_flags != KM_NOSLEEP)
2333                 cmn_err(CE_PANIC, "kmem sleep flags not set in DMA handle");
2334         return (dma->dp_sleep_flags);
2335 }
2336 /*ARGSUSED*/
2337 static void
2338 rootnex_coredma_reset_cookies(dev_info_t *dip, ddi_dma_handle_t handle)
2339 {
2340         ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2341         rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2342         rootnex_window_t *window;
2343 
2344         if (dma->dp_window) {
2345                 window = &dma->dp_window[dma->dp_current_win];
2346                 hp->dmai_cookie = window->wd_first_cookie;
2347         } else {
2348                 hp->dmai_cookie = dma->dp_cookies;
2349         }
2350         hp->dmai_cookie++;
2351 }
2352 
2353 /*ARGSUSED*/
2354 static int
2355 rootnex_coredma_get_cookies(dev_info_t *dip, ddi_dma_handle_t handle,
2356     ddi_dma_cookie_t **cookiepp, uint_t *ccountp)
2357 {
2358         int i;
2359         int km_flags;
2360         ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2361         rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2362         rootnex_window_t *window;
2363         ddi_dma_cookie_t *cp;
2364         ddi_dma_cookie_t *cookie;
2365 
2366         ASSERT(*cookiepp == NULL);
2367         ASSERT(*ccountp == 0);
2368 
2369         if (dma->dp_window) {
2370                 window = &dma->dp_window[dma->dp_current_win];
2371                 cp = window->wd_first_cookie;
2372                 *ccountp = window->wd_cookie_cnt;
2373         } else {
2374                 cp = dma->dp_cookies;
2375                 *ccountp = dma->dp_sglinfo.si_sgl_size;
2376         }
2377 
2378         km_flags = rootnex_coredma_get_sleep_flags(handle);
2379         cookie = kmem_zalloc(sizeof (ddi_dma_cookie_t) * (*ccountp), km_flags);
2380         if (cookie == NULL) {
2381                 return (DDI_DMA_NORESOURCES);
2382         }
2383 
2384         for (i = 0; i < *ccountp; i++) {
2385                 cookie[i].dmac_notused = cp[i].dmac_notused;
2386                 cookie[i].dmac_type = cp[i].dmac_type;
2387                 cookie[i].dmac_address = cp[i].dmac_address;
2388                 cookie[i].dmac_size = cp[i].dmac_size;
2389         }
2390 
2391         *cookiepp = cookie;
2392 
2393         return (DDI_SUCCESS);
2394 }
2395 
2396 /*ARGSUSED*/
2397 static int
2398 rootnex_coredma_set_cookies(dev_info_t *dip, ddi_dma_handle_t handle,
2399     ddi_dma_cookie_t *cookiep, uint_t ccount)
2400 {
2401         ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2402         rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2403         rootnex_window_t *window;
2404         ddi_dma_cookie_t *cur_cookiep;
2405 
2406         ASSERT(cookiep);
2407         ASSERT(ccount != 0);
2408         ASSERT(dma->dp_need_to_switch_cookies == B_FALSE);
2409 
2410         if (dma->dp_window) {
2411                 window = &dma->dp_window[dma->dp_current_win];
2412                 dma->dp_saved_cookies = window->wd_first_cookie;
2413                 window->wd_first_cookie = cookiep;
2414                 ASSERT(ccount == window->wd_cookie_cnt);
2415                 cur_cookiep = (hp->dmai_cookie - dma->dp_saved_cookies)
2416                     + window->wd_first_cookie;
2417         } else {
2418                 dma->dp_saved_cookies = dma->dp_cookies;
2419                 dma->dp_cookies = cookiep;
2420                 ASSERT(ccount == dma->dp_sglinfo.si_sgl_size);
2421                 cur_cookiep = (hp->dmai_cookie - dma->dp_saved_cookies)
2422                     + dma->dp_cookies;
2423         }
2424 
2425         dma->dp_need_to_switch_cookies = B_TRUE;
2426         hp->dmai_cookie = cur_cookiep;
2427 
2428         return (DDI_SUCCESS);
2429 }
2430 
2431 /*ARGSUSED*/
2432 static int
2433 rootnex_coredma_clear_cookies(dev_info_t *dip, ddi_dma_handle_t handle)
2434 {
2435         ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2436         rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2437         rootnex_window_t *window;
2438         ddi_dma_cookie_t *cur_cookiep;
2439         ddi_dma_cookie_t *cookie_array;
2440         uint_t ccount;
2441 
2442         /* check if cookies have not been switched */
2443         if (dma->dp_need_to_switch_cookies == B_FALSE)
2444                 return (DDI_SUCCESS);
2445 
2446         ASSERT(dma->dp_saved_cookies);
2447 
2448         if (dma->dp_window) {
2449                 window = &dma->dp_window[dma->dp_current_win];
2450                 cookie_array = window->wd_first_cookie;
2451                 window->wd_first_cookie = dma->dp_saved_cookies;
2452                 dma->dp_saved_cookies = NULL;
2453                 ccount = window->wd_cookie_cnt;
2454                 cur_cookiep = (hp->dmai_cookie - cookie_array)
2455                     + window->wd_first_cookie;
2456         } else {
2457                 cookie_array = dma->dp_cookies;
2458                 dma->dp_cookies = dma->dp_saved_cookies;
2459                 dma->dp_saved_cookies = NULL;
2460                 ccount = dma->dp_sglinfo.si_sgl_size;
2461                 cur_cookiep = (hp->dmai_cookie - cookie_array)
2462                     + dma->dp_cookies;
2463         }
2464 
2465         kmem_free(cookie_array, sizeof (ddi_dma_cookie_t) * ccount);
2466 
2467         hp->dmai_cookie = cur_cookiep;
2468 
2469         dma->dp_need_to_switch_cookies = B_FALSE;
2470 
2471         return (DDI_SUCCESS);
2472 }
2473 
2474 #endif
2475 
2476 static struct as *
2477 rootnex_get_as(ddi_dma_obj_t *dmao)
2478 {
2479         struct as *asp;
2480 
2481         switch (dmao->dmao_type) {
2482         case DMA_OTYP_VADDR:
2483         case DMA_OTYP_BUFVADDR:
2484                 asp = dmao->dmao_obj.virt_obj.v_as;
2485                 if (asp == NULL)
2486                         asp = &kas;
2487                 break;
2488         default:
2489                 asp = NULL;
2490                 break;
2491         }
2492         return (asp);
2493 }
2494 
2495 /*
2496  * rootnex_verify_buffer()
2497  *   verify buffer wasn't free'd
2498  */
2499 static int
2500 rootnex_verify_buffer(rootnex_dma_t *dma)
2501 {
2502         page_t **pplist;
2503         caddr_t vaddr;
2504         uint_t pcnt;
2505         uint_t poff;
2506         page_t *pp;
2507         char b;
2508         int i;
2509 
2510         /* Figure out how many pages this buffer occupies */
2511         if (dma->dp_dma.dmao_type == DMA_OTYP_PAGES) {
2512                 poff = dma->dp_dma.dmao_obj.pp_obj.pp_offset & MMU_PAGEOFFSET;
2513         } else {
2514                 vaddr = dma->dp_dma.dmao_obj.virt_obj.v_addr;
2515                 poff = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2516         }
2517         pcnt = mmu_btopr(dma->dp_dma.dmao_size + poff);
2518 
2519         switch (dma->dp_dma.dmao_type) {
2520         case DMA_OTYP_PAGES:
2521                 /*
2522                  * for a linked list of pp's walk through them to make sure
2523                  * they're locked and not free.
2524                  */
2525                 pp = dma->dp_dma.dmao_obj.pp_obj.pp_pp;
2526                 for (i = 0; i < pcnt; i++) {
2527                         if (PP_ISFREE(pp) || !PAGE_LOCKED(pp)) {
2528                                 return (DDI_FAILURE);
2529                         }
2530                         pp = pp->p_next;
2531                 }
2532                 break;
2533 
2534         case DMA_OTYP_VADDR:
2535         case DMA_OTYP_BUFVADDR:
2536                 pplist = dma->dp_dma.dmao_obj.virt_obj.v_priv;
2537                 /*
2538                  * for an array of pp's walk through them to make sure they're
2539                  * not free. It's possible that they may not be locked.
2540                  */
2541                 if (pplist) {
2542                         for (i = 0; i < pcnt; i++) {
2543                                 if (PP_ISFREE(pplist[i])) {
2544                                         return (DDI_FAILURE);
2545                                 }
2546                         }
2547 
2548                 /* For a virtual address, try to peek at each page */
2549                 } else {
2550                         if (rootnex_get_as(&dma->dp_dma) == &kas) {
2551                                 for (i = 0; i < pcnt; i++) {
2552                                         if (ddi_peek8(NULL, vaddr, &b) ==
2553                                             DDI_FAILURE)
2554                                                 return (DDI_FAILURE);
2555                                         vaddr += MMU_PAGESIZE;
2556                                 }
2557                         }
2558                 }
2559                 break;
2560 
2561         default:
2562                 cmn_err(CE_PANIC, "rootnex_verify_buffer: bad DMA object");
2563                 break;
2564         }
2565 
2566         return (DDI_SUCCESS);
2567 }
2568 
2569 
2570 /*
2571  * rootnex_clean_dmahdl()
2572  *    Clean the dma handle. This should be called on a handle alloc and an
2573  *    unbind handle. Set the handle state to the default settings.
2574  */
2575 static void
2576 rootnex_clean_dmahdl(ddi_dma_impl_t *hp)
2577 {
2578         rootnex_dma_t *dma;
2579 
2580 
2581         dma = (rootnex_dma_t *)hp->dmai_private;
2582 
2583         hp->dmai_nwin = 0;
2584         dma->dp_current_cookie = 0;
2585         dma->dp_copybuf_size = 0;
2586         dma->dp_window = NULL;
2587         dma->dp_cbaddr = NULL;
2588         dma->dp_inuse = B_FALSE;
2589         dma->dp_dvma_used = B_FALSE;
2590         dma->dp_need_to_free_cookie = B_FALSE;
2591         dma->dp_need_to_switch_cookies = B_FALSE;
2592         dma->dp_saved_cookies = NULL;
2593         dma->dp_sleep_flags = KM_PANIC;
2594         dma->dp_need_to_free_window = B_FALSE;
2595         dma->dp_partial_required = B_FALSE;
2596         dma->dp_trim_required = B_FALSE;
2597         dma->dp_sglinfo.si_copybuf_req = 0;
2598 #if !defined(__amd64)
2599         dma->dp_cb_remaping = B_FALSE;
2600         dma->dp_kva = NULL;
2601 #endif
2602 
2603         /* FMA related initialization */
2604         hp->dmai_fault = 0;
2605         hp->dmai_fault_check = NULL;
2606         hp->dmai_fault_notify = NULL;
2607         hp->dmai_error.err_ena = 0;
2608         hp->dmai_error.err_status = DDI_FM_OK;
2609         hp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
2610         hp->dmai_error.err_ontrap = NULL;
2611 }
2612 
2613 
2614 /*
2615  * rootnex_valid_alloc_parms()
2616  *    Called in ddi_dma_alloc_handle path to validate its parameters.
2617  */
2618 static int
2619 rootnex_valid_alloc_parms(ddi_dma_attr_t *attr, uint_t maxsegmentsize)
2620 {
2621         if ((attr->dma_attr_seg < MMU_PAGEOFFSET) ||
2622             (attr->dma_attr_count_max < MMU_PAGEOFFSET) ||
2623             (attr->dma_attr_granular > MMU_PAGESIZE) ||
2624             (attr->dma_attr_maxxfer < MMU_PAGESIZE)) {
2625                 return (DDI_DMA_BADATTR);
2626         }
2627 
2628         if (attr->dma_attr_addr_hi <= attr->dma_attr_addr_lo) {
2629                 return (DDI_DMA_BADATTR);
2630         }
2631 
2632         if ((attr->dma_attr_seg & MMU_PAGEOFFSET) != MMU_PAGEOFFSET ||
2633             MMU_PAGESIZE & (attr->dma_attr_granular - 1) ||
2634             attr->dma_attr_sgllen == 0) {
2635                 return (DDI_DMA_BADATTR);
2636         }
2637 
2638         /* We should be able to DMA into every byte offset in a page */
2639         if (maxsegmentsize < MMU_PAGESIZE) {
2640                 return (DDI_DMA_BADATTR);
2641         }
2642 
2643         /* if we're bouncing on seg, seg must be <= addr_hi */
2644         if ((attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG) &&
2645             (attr->dma_attr_seg > attr->dma_attr_addr_hi)) {
2646                 return (DDI_DMA_BADATTR);
2647         }
2648         return (DDI_SUCCESS);
2649 }
2650 
2651 /*
2652  * rootnex_valid_bind_parms()
2653  *    Called in ddi_dma_*_bind_handle path to validate its parameters.
2654  */
2655 /* ARGSUSED */
2656 static int
2657 rootnex_valid_bind_parms(ddi_dma_req_t *dmareq, ddi_dma_attr_t *attr)
2658 {
2659 #if !defined(__amd64)
2660         /*
2661          * we only support up to a 2G-1 transfer size on 32-bit kernels so
2662          * we can track the offset for the obsoleted interfaces.
2663          */
2664         if (dmareq->dmar_object.dmao_size > 0x7FFFFFFF) {
2665                 return (DDI_DMA_TOOBIG);
2666         }
2667 #endif
2668 
2669         return (DDI_SUCCESS);
2670 }
2671 
2672 
2673 /*
2674  * rootnex_need_bounce_seg()
2675  *    check to see if the buffer lives on both side of the seg.
2676  */
2677 static boolean_t
2678 rootnex_need_bounce_seg(ddi_dma_obj_t *dmar_object, rootnex_sglinfo_t *sglinfo)
2679 {
2680         ddi_dma_atyp_t buftype;
2681         rootnex_addr_t raddr;
2682         boolean_t lower_addr;
2683         boolean_t upper_addr;
2684         uint64_t offset;
2685         page_t **pplist;
2686         uint64_t paddr;
2687         uint32_t psize;
2688         uint32_t size;
2689         caddr_t vaddr;
2690         uint_t pcnt;
2691         page_t *pp;
2692 
2693 
2694         /* shortcuts */
2695         pplist = dmar_object->dmao_obj.virt_obj.v_priv;
2696         vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2697         buftype = dmar_object->dmao_type;
2698         size = dmar_object->dmao_size;
2699 
2700         lower_addr = B_FALSE;
2701         upper_addr = B_FALSE;
2702         pcnt = 0;
2703 
2704         /*
2705          * Process the first page to handle the initial offset of the buffer.
2706          * We'll use the base address we get later when we loop through all
2707          * the pages.
2708          */
2709         if (buftype == DMA_OTYP_PAGES) {
2710                 pp = dmar_object->dmao_obj.pp_obj.pp_pp;
2711                 offset =  dmar_object->dmao_obj.pp_obj.pp_offset &
2712                     MMU_PAGEOFFSET;
2713                 paddr = pfn_to_pa(pp->p_pagenum) + offset;
2714                 psize = MIN(size, (MMU_PAGESIZE - offset));
2715                 pp = pp->p_next;
2716                 sglinfo->si_asp = NULL;
2717         } else if (pplist != NULL) {
2718                 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2719                 sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
2720                 if (sglinfo->si_asp == NULL) {
2721                         sglinfo->si_asp = &kas;
2722                 }
2723                 paddr = pfn_to_pa(pplist[pcnt]->p_pagenum);
2724                 paddr += offset;
2725                 psize = MIN(size, (MMU_PAGESIZE - offset));
2726                 pcnt++;
2727         } else {
2728                 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2729                 sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
2730                 if (sglinfo->si_asp == NULL) {
2731                         sglinfo->si_asp = &kas;
2732                 }
2733                 paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, vaddr));
2734                 paddr += offset;
2735                 psize = MIN(size, (MMU_PAGESIZE - offset));
2736                 vaddr += psize;
2737         }
2738 
2739         raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
2740 
2741         if ((raddr + psize) > sglinfo->si_segmask) {
2742                 upper_addr = B_TRUE;
2743         } else {
2744                 lower_addr = B_TRUE;
2745         }
2746         size -= psize;
2747 
2748         /*
2749          * Walk through the rest of the pages in the buffer. Track to see
2750          * if we have pages on both sides of the segment boundary.
2751          */
2752         while (size > 0) {
2753                 /* partial or full page */
2754                 psize = MIN(size, MMU_PAGESIZE);
2755 
2756                 if (buftype == DMA_OTYP_PAGES) {
2757                         /* get the paddr from the page_t */
2758                         ASSERT(!PP_ISFREE(pp) && PAGE_LOCKED(pp));
2759                         paddr = pfn_to_pa(pp->p_pagenum);
2760                         pp = pp->p_next;
2761                 } else if (pplist != NULL) {
2762                         /* index into the array of page_t's to get the paddr */
2763                         ASSERT(!PP_ISFREE(pplist[pcnt]));
2764                         paddr = pfn_to_pa(pplist[pcnt]->p_pagenum);
2765                         pcnt++;
2766                 } else {
2767                         /* call into the VM to get the paddr */
2768                         paddr =  pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat,
2769                             vaddr));
2770                         vaddr += psize;
2771                 }
2772 
2773                 raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
2774 
2775                 if ((raddr + psize) > sglinfo->si_segmask) {
2776                         upper_addr = B_TRUE;
2777                 } else {
2778                         lower_addr = B_TRUE;
2779                 }
2780                 /*
2781                  * if the buffer lives both above and below the segment
2782                  * boundary, or the current page is the page immediately
2783                  * after the segment, we will use a copy/bounce buffer for
2784                  * all pages > seg.
2785                  */
2786                 if ((lower_addr && upper_addr) ||
2787                     (raddr == (sglinfo->si_segmask + 1))) {
2788                         return (B_TRUE);
2789                 }
2790 
2791                 size -= psize;
2792         }
2793 
2794         return (B_FALSE);
2795 }
2796 
2797 /*
2798  * rootnex_get_sgl()
2799  *    Called in bind fastpath to get the sgl. Most of this will be replaced
2800  *    with a call to the vm layer when vm2.0 comes around...
2801  */
2802 static void
2803 rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
2804     rootnex_sglinfo_t *sglinfo)
2805 {
2806         ddi_dma_atyp_t buftype;
2807         rootnex_addr_t raddr;
2808         uint64_t last_page;
2809         uint64_t offset;
2810         uint64_t addrhi;
2811         uint64_t addrlo;
2812         uint64_t maxseg;
2813         page_t **pplist;
2814         uint64_t paddr;
2815         uint32_t psize;
2816         uint32_t size;
2817         caddr_t vaddr;
2818         uint_t pcnt;
2819         page_t *pp;
2820         uint_t cnt;
2821 
2822 
2823         /* shortcuts */
2824         pplist = dmar_object->dmao_obj.virt_obj.v_priv;
2825         vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2826         maxseg = sglinfo->si_max_cookie_size;
2827         buftype = dmar_object->dmao_type;
2828         addrhi = sglinfo->si_max_addr;
2829         addrlo = sglinfo->si_min_addr;
2830         size = dmar_object->dmao_size;
2831 
2832         pcnt = 0;
2833         cnt = 0;
2834 
2835 
2836         /*
2837          * check to see if we need to use the copy buffer for pages over
2838          * the segment attr.
2839          */
2840         sglinfo->si_bounce_on_seg = B_FALSE;
2841         if (sglinfo->si_flags & _DDI_DMA_BOUNCE_ON_SEG) {
2842                 sglinfo->si_bounce_on_seg = rootnex_need_bounce_seg(
2843                     dmar_object, sglinfo);
2844         }
2845 
2846         /*
2847          * if we were passed down a linked list of pages, i.e. pointer to
2848          * page_t, use this to get our physical address and buf offset.
2849          */
2850         if (buftype == DMA_OTYP_PAGES) {
2851                 pp = dmar_object->dmao_obj.pp_obj.pp_pp;
2852                 ASSERT(!PP_ISFREE(pp) && PAGE_LOCKED(pp));
2853                 offset =  dmar_object->dmao_obj.pp_obj.pp_offset &
2854                     MMU_PAGEOFFSET;
2855                 paddr = pfn_to_pa(pp->p_pagenum) + offset;
2856                 psize = MIN(size, (MMU_PAGESIZE - offset));
2857                 pp = pp->p_next;
2858                 sglinfo->si_asp = NULL;
2859 
2860         /*
2861          * We weren't passed down a linked list of pages, but if we were passed
2862          * down an array of pages, use this to get our physical address and buf
2863          * offset.
2864          */
2865         } else if (pplist != NULL) {
2866                 ASSERT((buftype == DMA_OTYP_VADDR) ||
2867                     (buftype == DMA_OTYP_BUFVADDR));
2868 
2869                 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2870                 sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
2871                 if (sglinfo->si_asp == NULL) {
2872                         sglinfo->si_asp = &kas;
2873                 }
2874 
2875                 ASSERT(!PP_ISFREE(pplist[pcnt]));
2876                 paddr = pfn_to_pa(pplist[pcnt]->p_pagenum);
2877                 paddr += offset;
2878                 psize = MIN(size, (MMU_PAGESIZE - offset));
2879                 pcnt++;
2880 
2881         /*
2882          * All we have is a virtual address, we'll need to call into the VM
2883          * to get the physical address.
2884          */
2885         } else {
2886                 ASSERT((buftype == DMA_OTYP_VADDR) ||
2887                     (buftype == DMA_OTYP_BUFVADDR));
2888 
2889                 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2890                 sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
2891                 if (sglinfo->si_asp == NULL) {
2892                         sglinfo->si_asp = &kas;
2893                 }
2894 
2895                 paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, vaddr));
2896                 paddr += offset;
2897                 psize = MIN(size, (MMU_PAGESIZE - offset));
2898                 vaddr += psize;
2899         }
2900 
2901         raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
2902 
2903         /*
2904          * Setup the first cookie with the physical address of the page and the
2905          * size of the page (which takes into account the initial offset into
2906          * the page.
2907          */
2908         sgl[cnt].dmac_laddress = raddr;
2909         sgl[cnt].dmac_size = psize;
2910         sgl[cnt].dmac_type = 0;
2911 
2912         /*
2913          * Save away the buffer offset into the page. We'll need this later in
2914          * the copy buffer code to help figure out the page index within the
2915          * buffer and the offset into the current page.
2916          */
2917         sglinfo->si_buf_offset = offset;
2918 
2919         /*
2920          * If we are using the copy buffer for anything over the segment
2921          * boundary, and this page is over the segment boundary.
2922          *   OR
2923          * if the DMA engine can't reach the physical address.
2924          */
2925         if (((sglinfo->si_bounce_on_seg) &&
2926             ((raddr + psize) > sglinfo->si_segmask)) ||
2927             ((raddr < addrlo) || ((raddr + psize) > addrhi))) {
2928                 /*
2929                  * Increase how much copy buffer we use. We always increase by
2930                  * pagesize so we don't have to worry about converting offsets.
2931                  * Set a flag in the cookies dmac_type to indicate that it uses
2932                  * the copy buffer. If this isn't the last cookie, go to the
2933                  * next cookie (since we separate each page which uses the copy
2934                  * buffer in case the copy buffer is not physically contiguous.
2935                  */
2936                 sglinfo->si_copybuf_req += MMU_PAGESIZE;
2937                 sgl[cnt].dmac_type = ROOTNEX_USES_COPYBUF;
2938                 if ((cnt + 1) < sglinfo->si_max_pages) {
2939                         cnt++;
2940                         sgl[cnt].dmac_laddress = 0;
2941                         sgl[cnt].dmac_size = 0;
2942                         sgl[cnt].dmac_type = 0;
2943                 }
2944         }
2945 
2946         /*
2947          * save this page's physical address so we can figure out if the next
2948          * page is physically contiguous. Keep decrementing size until we are
2949          * done with the buffer.
2950          */
2951         last_page = raddr & MMU_PAGEMASK;
2952         size -= psize;
2953 
2954         while (size > 0) {
2955                 /* Get the size for this page (i.e. partial or full page) */
2956                 psize = MIN(size, MMU_PAGESIZE);
2957 
2958                 if (buftype == DMA_OTYP_PAGES) {
2959                         /* get the paddr from the page_t */
2960                         ASSERT(!PP_ISFREE(pp) && PAGE_LOCKED(pp));
2961                         paddr = pfn_to_pa(pp->p_pagenum);
2962                         pp = pp->p_next;
2963                 } else if (pplist != NULL) {
2964                         /* index into the array of page_t's to get the paddr */
2965                         ASSERT(!PP_ISFREE(pplist[pcnt]));
2966                         paddr = pfn_to_pa(pplist[pcnt]->p_pagenum);
2967                         pcnt++;
2968                 } else {
2969                         /* call into the VM to get the paddr */
2970                         paddr =  pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat,
2971                             vaddr));
2972                         vaddr += psize;
2973                 }
2974 
2975                 raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
2976 
2977                 /*
2978                  * If we are using the copy buffer for anything over the
2979                  * segment boundary, and this page is over the segment
2980                  * boundary.
2981                  *   OR
2982                  * if the DMA engine can't reach the physical address.
2983                  */
2984                 if (((sglinfo->si_bounce_on_seg) &&
2985                     ((raddr + psize) > sglinfo->si_segmask)) ||
2986                     ((raddr < addrlo) || ((raddr + psize) > addrhi))) {
2987 
2988                         sglinfo->si_copybuf_req += MMU_PAGESIZE;
2989 
2990                         /*
2991                          * if there is something in the current cookie, go to
2992                          * the next one. We only want one page in a cookie which
2993                          * uses the copybuf since the copybuf doesn't have to
2994                          * be physically contiguous.
2995                          */
2996                         if (sgl[cnt].dmac_size != 0) {
2997                                 cnt++;
2998                         }
2999                         sgl[cnt].dmac_laddress = raddr;
3000                         sgl[cnt].dmac_size = psize;
3001 #if defined(__amd64)
3002                         sgl[cnt].dmac_type = ROOTNEX_USES_COPYBUF;
3003 #else
3004                         /*
3005                          * save the buf offset for 32-bit kernel. used in the
3006                          * obsoleted interfaces.
3007                          */
3008                         sgl[cnt].dmac_type = ROOTNEX_USES_COPYBUF |
3009                             (dmar_object->dmao_size - size);
3010 #endif
3011                         /* if this isn't the last cookie, go to the next one */
3012                         if ((cnt + 1) < sglinfo->si_max_pages) {
3013                                 cnt++;
3014                                 sgl[cnt].dmac_laddress = 0;
3015                                 sgl[cnt].dmac_size = 0;
3016                                 sgl[cnt].dmac_type = 0;
3017                         }
3018 
3019                 /*
3020                  * this page didn't need the copy buffer, if it's not physically
3021                  * contiguous, or it would put us over a segment boundary, or it
3022                  * puts us over the max cookie size, or the current sgl doesn't
3023                  * have anything in it.
3024                  */
3025                 } else if (((last_page + MMU_PAGESIZE) != raddr) ||
3026                     !(raddr & sglinfo->si_segmask) ||
3027                     ((sgl[cnt].dmac_size + psize) > maxseg) ||
3028                     (sgl[cnt].dmac_size == 0)) {
3029                         /*
3030                          * if we're not already in a new cookie, go to the next
3031                          * cookie.
3032                          */
3033                         if (sgl[cnt].dmac_size != 0) {
3034                                 cnt++;
3035                         }
3036 
3037                         /* save the cookie information */
3038                         sgl[cnt].dmac_laddress = raddr;
3039                         sgl[cnt].dmac_size = psize;
3040 #if defined(__amd64)
3041                         sgl[cnt].dmac_type = 0;
3042 #else
3043                         /*
3044                          * save the buf offset for 32-bit kernel. used in the
3045                          * obsoleted interfaces.
3046                          */
3047                         sgl[cnt].dmac_type = dmar_object->dmao_size - size;
3048 #endif
3049 
3050                 /*
3051                  * this page didn't need the copy buffer, it is physically
3052                  * contiguous with the last page, and it's <= the max cookie
3053                  * size.
3054                  */
3055                 } else {
3056                         sgl[cnt].dmac_size += psize;
3057 
3058                         /*
3059                          * if this exactly ==  the maximum cookie size, and
3060                          * it isn't the last cookie, go to the next cookie.
3061                          */
3062                         if (((sgl[cnt].dmac_size + psize) == maxseg) &&
3063                             ((cnt + 1) < sglinfo->si_max_pages)) {
3064                                 cnt++;
3065                                 sgl[cnt].dmac_laddress = 0;
3066                                 sgl[cnt].dmac_size = 0;
3067                                 sgl[cnt].dmac_type = 0;
3068                         }
3069                 }
3070 
3071                 /*
3072                  * save this page's physical address so we can figure out if the
3073                  * next page is physically contiguous. Keep decrementing size
3074                  * until we are done with the buffer.
3075                  */
3076                 last_page = raddr;
3077                 size -= psize;
3078         }
3079 
3080         /* we're done, save away how many cookies the sgl has */
3081         if (sgl[cnt].dmac_size == 0) {
3082                 ASSERT(cnt < sglinfo->si_max_pages);
3083                 sglinfo->si_sgl_size = cnt;
3084         } else {
3085                 sglinfo->si_sgl_size = cnt + 1;
3086         }
3087 }
3088 
3089 static void
3090 rootnex_dvma_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
3091     rootnex_sglinfo_t *sglinfo)
3092 {
3093         uint64_t offset;
3094         uint64_t maxseg;
3095         uint64_t dvaddr;
3096         struct dvmaseg *dvs;
3097         uint64_t paddr;
3098         uint32_t psize, ssize;
3099         uint32_t size;
3100         uint_t cnt;
3101         int physcontig;
3102 
3103         ASSERT(dmar_object->dmao_type == DMA_OTYP_DVADDR);
3104 
3105         /* shortcuts */
3106         maxseg = sglinfo->si_max_cookie_size;
3107         size = dmar_object->dmao_size;
3108 
3109         cnt = 0;
3110         sglinfo->si_bounce_on_seg = B_FALSE;
3111 
3112         dvs = dmar_object->dmao_obj.dvma_obj.dv_seg;
3113         offset = dmar_object->dmao_obj.dvma_obj.dv_off;
3114         ssize = dvs->dvs_len;
3115         paddr = dvs->dvs_start;
3116         paddr += offset;
3117         psize = MIN(ssize, (maxseg - offset));
3118         dvaddr = paddr + psize;
3119         ssize -= psize;
3120 
3121         sgl[cnt].dmac_laddress = paddr;
3122         sgl[cnt].dmac_size = psize;
3123         sgl[cnt].dmac_type = 0;
3124 
3125         size -= psize;
3126         while (size > 0) {
3127                 if (ssize == 0) {
3128                         dvs++;
3129                         ssize = dvs->dvs_len;
3130                         dvaddr = dvs->dvs_start;
3131                         physcontig = 0;
3132                 } else
3133                         physcontig = 1;
3134 
3135                 paddr = dvaddr;
3136                 psize = MIN(ssize, maxseg);
3137                 dvaddr += psize;
3138                 ssize -= psize;
3139 
3140                 if (!physcontig || !(paddr & sglinfo->si_segmask) ||
3141                     ((sgl[cnt].dmac_size + psize) > maxseg) ||
3142                     (sgl[cnt].dmac_size == 0)) {
3143                         /*
3144                          * if we're not already in a new cookie, go to the next
3145                          * cookie.
3146                          */
3147                         if (sgl[cnt].dmac_size != 0) {
3148                                 cnt++;
3149                         }
3150 
3151                         /* save the cookie information */
3152                         sgl[cnt].dmac_laddress = paddr;
3153                         sgl[cnt].dmac_size = psize;
3154                         sgl[cnt].dmac_type = 0;
3155                 } else {
3156                         sgl[cnt].dmac_size += psize;
3157 
3158                         /*
3159                          * if this exactly ==  the maximum cookie size, and
3160                          * it isn't the last cookie, go to the next cookie.
3161                          */
3162                         if (((sgl[cnt].dmac_size + psize) == maxseg) &&
3163                             ((cnt + 1) < sglinfo->si_max_pages)) {
3164                                 cnt++;
3165                                 sgl[cnt].dmac_laddress = 0;
3166                                 sgl[cnt].dmac_size = 0;
3167                                 sgl[cnt].dmac_type = 0;
3168                         }
3169                 }
3170                 size -= psize;
3171         }
3172 
3173         /* we're done, save away how many cookies the sgl has */
3174         if (sgl[cnt].dmac_size == 0) {
3175                 sglinfo->si_sgl_size = cnt;
3176         } else {
3177                 sglinfo->si_sgl_size = cnt + 1;
3178         }
3179 }
3180 
3181 /*
3182  * rootnex_bind_slowpath()
3183  *    Call in the bind path if the calling driver can't use the sgl without
3184  *    modifying it. We either need to use the copy buffer and/or we will end up
3185  *    with a partial bind.
3186  */
3187 static int
3188 rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
3189     rootnex_dma_t *dma, ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag)
3190 {
3191         rootnex_sglinfo_t *sinfo;
3192         rootnex_window_t *window;
3193         ddi_dma_cookie_t *cookie;
3194         size_t copybuf_used;
3195         size_t dmac_size;
3196         boolean_t partial;
3197         off_t cur_offset;
3198         page_t *cur_pp;
3199         major_t mnum;
3200         int e;
3201         int i;
3202 
3203 
3204         sinfo = &dma->dp_sglinfo;
3205         copybuf_used = 0;
3206         partial = B_FALSE;
3207 
3208         /*
3209          * If we're using the copybuf, set the copybuf state in dma struct.
3210          * Needs to be first since it sets the copy buffer size.
3211          */
3212         if (sinfo->si_copybuf_req != 0) {
3213                 e = rootnex_setup_copybuf(hp, dmareq, dma, attr);
3214                 if (e != DDI_SUCCESS) {
3215                         return (e);
3216                 }
3217         } else {
3218                 dma->dp_copybuf_size = 0;
3219         }
3220 
3221         /*
3222          * Figure out if we need to do a partial mapping. If so, figure out
3223          * if we need to trim the buffers when we munge the sgl.
3224          */
3225         if ((dma->dp_copybuf_size < sinfo->si_copybuf_req) ||
3226             (dmao->dmao_size > dma->dp_maxxfer) ||
3227             ((unsigned)attr->dma_attr_sgllen < sinfo->si_sgl_size)) {
3228                 dma->dp_partial_required = B_TRUE;
3229                 if (attr->dma_attr_granular != 1) {
3230                         dma->dp_trim_required = B_TRUE;
3231                 }
3232         } else {
3233                 dma->dp_partial_required = B_FALSE;
3234                 dma->dp_trim_required = B_FALSE;
3235         }
3236 
3237         /* If we need to do a partial bind, make sure the driver supports it */
3238         if (dma->dp_partial_required &&
3239             !(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
3240 
3241                 mnum = ddi_driver_major(dma->dp_dip);
3242                 /*
3243                  * patchable which allows us to print one warning per major
3244                  * number.
3245                  */
3246                 if ((rootnex_bind_warn) &&
3247                     ((rootnex_warn_list[mnum] & ROOTNEX_BIND_WARNING) == 0)) {
3248                         rootnex_warn_list[mnum] |= ROOTNEX_BIND_WARNING;
3249                         cmn_err(CE_WARN, "!%s: coding error detected, the "
3250                             "driver is using ddi_dma_attr(9S) incorrectly. "
3251                             "There is a small risk of data corruption in "
3252                             "particular with large I/Os. The driver should be "
3253                             "replaced with a corrected version for proper "
3254                             "system operation. To disable this warning, add "
3255                             "'set rootnex:rootnex_bind_warn=0' to "
3256                             "/etc/system(4).", ddi_driver_name(dma->dp_dip));
3257                 }
3258                 return (DDI_DMA_TOOBIG);
3259         }
3260 
3261         /*
3262          * we might need multiple windows, setup state to handle them. In this
3263          * code path, we will have at least one window.
3264          */
3265         e = rootnex_setup_windows(hp, dma, attr, dmao, kmflag);
3266         if (e != DDI_SUCCESS) {
3267                 rootnex_teardown_copybuf(dma);
3268                 return (e);
3269         }
3270 
3271         window = &dma->dp_window[0];
3272         cookie = &dma->dp_cookies[0];
3273         cur_offset = 0;
3274         rootnex_init_win(hp, dma, window, cookie, cur_offset);
3275         if (dmao->dmao_type == DMA_OTYP_PAGES) {
3276                 cur_pp = dmareq->dmar_object.dmao_obj.pp_obj.pp_pp;
3277         }
3278 
3279         /* loop though all the cookies we got back from get_sgl() */
3280         for (i = 0; i < sinfo->si_sgl_size; i++) {
3281                 /*
3282                  * If we're using the copy buffer, check this cookie and setup
3283                  * its associated copy buffer state. If this cookie uses the
3284                  * copy buffer, make sure we sync this window during dma_sync.
3285                  */
3286                 if (dma->dp_copybuf_size > 0) {
3287                         rootnex_setup_cookie(dmao, dma, cookie,
3288                             cur_offset, &copybuf_used, &cur_pp);
3289                         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3290                                 window->wd_dosync = B_TRUE;
3291                         }
3292                 }
3293 
3294                 /*
3295                  * save away the cookie size, since it could be modified in
3296                  * the windowing code.
3297                  */
3298                 dmac_size = cookie->dmac_size;
3299 
3300                 /* if we went over max copybuf size */
3301                 if (dma->dp_copybuf_size &&
3302                     (copybuf_used > dma->dp_copybuf_size)) {
3303                         partial = B_TRUE;
3304                         e = rootnex_copybuf_window_boundary(hp, dma, &window,
3305                             cookie, cur_offset, &copybuf_used);
3306                         if (e != DDI_SUCCESS) {
3307                                 rootnex_teardown_copybuf(dma);
3308                                 rootnex_teardown_windows(dma);
3309                                 return (e);
3310                         }
3311 
3312                         /*
3313                          * if the coookie uses the copy buffer, make sure the
3314                          * new window we just moved to is set to sync.
3315                          */
3316                         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3317                                 window->wd_dosync = B_TRUE;
3318                         }
3319                         ROOTNEX_DPROBE1(rootnex__copybuf__window, dev_info_t *,
3320                             dma->dp_dip);
3321 
3322                 /* if the cookie cnt == max sgllen, move to the next window */
3323                 } else if (window->wd_cookie_cnt >=
3324                     (unsigned)attr->dma_attr_sgllen) {
3325                         partial = B_TRUE;
3326                         ASSERT(window->wd_cookie_cnt == attr->dma_attr_sgllen);
3327                         e = rootnex_sgllen_window_boundary(hp, dma, &window,
3328                             cookie, attr, cur_offset);
3329                         if (e != DDI_SUCCESS) {
3330                                 rootnex_teardown_copybuf(dma);
3331                                 rootnex_teardown_windows(dma);
3332                                 return (e);
3333                         }
3334 
3335                         /*
3336                          * if the coookie uses the copy buffer, make sure the
3337                          * new window we just moved to is set to sync.
3338                          */
3339                         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3340                                 window->wd_dosync = B_TRUE;
3341                         }
3342                         ROOTNEX_DPROBE1(rootnex__sgllen__window, dev_info_t *,
3343                             dma->dp_dip);
3344 
3345                 /* else if we will be over maxxfer */
3346                 } else if ((window->wd_size + dmac_size) >
3347                     dma->dp_maxxfer) {
3348                         partial = B_TRUE;
3349                         e = rootnex_maxxfer_window_boundary(hp, dma, &window,
3350                             cookie);
3351                         if (e != DDI_SUCCESS) {
3352                                 rootnex_teardown_copybuf(dma);
3353                                 rootnex_teardown_windows(dma);
3354                                 return (e);
3355                         }
3356 
3357                         /*
3358                          * if the coookie uses the copy buffer, make sure the
3359                          * new window we just moved to is set to sync.
3360                          */
3361                         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3362                                 window->wd_dosync = B_TRUE;
3363                         }
3364                         ROOTNEX_DPROBE1(rootnex__maxxfer__window, dev_info_t *,
3365                             dma->dp_dip);
3366 
3367                 /* else this cookie fits in the current window */
3368                 } else {
3369                         window->wd_cookie_cnt++;
3370                         window->wd_size += dmac_size;
3371                 }
3372 
3373                 /* track our offset into the buffer, go to the next cookie */
3374                 ASSERT(dmac_size <= dmao->dmao_size);
3375                 ASSERT(cookie->dmac_size <= dmac_size);
3376                 cur_offset += dmac_size;
3377                 cookie++;
3378         }
3379 
3380         /* if we ended up with a zero sized window in the end, clean it up */
3381         if (window->wd_size == 0) {
3382                 hp->dmai_nwin--;
3383                 window--;
3384         }
3385 
3386         ASSERT(window->wd_trim.tr_trim_last == B_FALSE);
3387 
3388         if (!partial) {
3389                 return (DDI_DMA_MAPPED);
3390         }
3391 
3392         ASSERT(dma->dp_partial_required);
3393         return (DDI_DMA_PARTIAL_MAP);
3394 }
3395 
3396 /*
3397  * rootnex_setup_copybuf()
3398  *    Called in bind slowpath. Figures out if we're going to use the copy
3399  *    buffer, and if we do, sets up the basic state to handle it.
3400  */
3401 static int
3402 rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
3403     rootnex_dma_t *dma, ddi_dma_attr_t *attr)
3404 {
3405         rootnex_sglinfo_t *sinfo;
3406         ddi_dma_attr_t lattr;
3407         size_t max_copybuf;
3408         int cansleep;
3409         int e;
3410 #if !defined(__amd64)
3411         int vmflag;
3412 #endif
3413 
3414         ASSERT(!dma->dp_dvma_used);
3415 
3416         sinfo = &dma->dp_sglinfo;
3417 
3418         /* read this first so it's consistent through the routine  */
3419         max_copybuf = i_ddi_copybuf_size() & MMU_PAGEMASK;
3420 
3421         /* We need to call into the rootnex on ddi_dma_sync() */
3422         hp->dmai_rflags &= ~DMP_NOSYNC;
3423 
3424         /* make sure the copybuf size <= the max size */
3425         dma->dp_copybuf_size = MIN(sinfo->si_copybuf_req, max_copybuf);
3426         ASSERT((dma->dp_copybuf_size & MMU_PAGEOFFSET) == 0);
3427 
3428 #if !defined(__amd64)
3429         /*
3430          * if we don't have kva space to copy to/from, allocate the KVA space
3431          * now. We only do this for the 32-bit kernel. We use seg kpm space for
3432          * the 64-bit kernel.
3433          */
3434         if ((dmareq->dmar_object.dmao_type == DMA_OTYP_PAGES) ||
3435             (dmareq->dmar_object.dmao_obj.virt_obj.v_as != NULL)) {
3436 
3437                 /* convert the sleep flags */
3438                 if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
3439                         vmflag = VM_SLEEP;
3440                 } else {
3441                         vmflag = VM_NOSLEEP;
3442                 }
3443 
3444                 /* allocate Kernel VA space that we can bcopy to/from */
3445                 dma->dp_kva = vmem_alloc(heap_arena, dma->dp_copybuf_size,
3446                     vmflag);
3447                 if (dma->dp_kva == NULL) {
3448                         return (DDI_DMA_NORESOURCES);
3449                 }
3450         }
3451 #endif
3452 
3453         /* convert the sleep flags */
3454         if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
3455                 cansleep = 1;
3456         } else {
3457                 cansleep = 0;
3458         }
3459 
3460         /*
3461          * Allocate the actual copy buffer. This needs to fit within the DMA
3462          * engine limits, so we can't use kmem_alloc... We don't need
3463          * contiguous memory (sgllen) since we will be forcing windows on
3464          * sgllen anyway.
3465          */
3466         lattr = *attr;
3467         lattr.dma_attr_align = MMU_PAGESIZE;
3468         lattr.dma_attr_sgllen = -1;     /* no limit */
3469         /*
3470          * if we're using the copy buffer because of seg, use that for our
3471          * upper address limit.
3472          */
3473         if (sinfo->si_bounce_on_seg) {
3474                 lattr.dma_attr_addr_hi = lattr.dma_attr_seg;
3475         }
3476         e = i_ddi_mem_alloc(dma->dp_dip, &lattr, dma->dp_copybuf_size, cansleep,
3477             0, NULL, &dma->dp_cbaddr, &dma->dp_cbsize, NULL);
3478         if (e != DDI_SUCCESS) {
3479 #if !defined(__amd64)
3480                 if (dma->dp_kva != NULL) {
3481                         vmem_free(heap_arena, dma->dp_kva,
3482                             dma->dp_copybuf_size);
3483                 }
3484 #endif
3485                 return (DDI_DMA_NORESOURCES);
3486         }
3487 
3488         ROOTNEX_DPROBE2(rootnex__alloc__copybuf, dev_info_t *, dma->dp_dip,
3489             size_t, dma->dp_copybuf_size);
3490 
3491         return (DDI_SUCCESS);
3492 }
3493 
3494 
3495 /*
3496  * rootnex_setup_windows()
3497  *    Called in bind slowpath to setup the window state. We always have windows
3498  *    in the slowpath. Even if the window count = 1.
3499  */
3500 static int
3501 rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
3502     ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag)
3503 {
3504         rootnex_window_t *windowp;
3505         rootnex_sglinfo_t *sinfo;
3506         size_t copy_state_size;
3507         size_t win_state_size;
3508         size_t state_available;
3509         size_t space_needed;
3510         uint_t copybuf_win;
3511         uint_t maxxfer_win;
3512         size_t space_used;
3513         uint_t sglwin;
3514 
3515 
3516         sinfo = &dma->dp_sglinfo;
3517 
3518         dma->dp_current_win = 0;
3519         hp->dmai_nwin = 0;
3520 
3521         /* If we don't need to do a partial, we only have one window */
3522         if (!dma->dp_partial_required) {
3523                 dma->dp_max_win = 1;
3524 
3525         /*
3526          * we need multiple windows, need to figure out the worse case number
3527          * of windows.
3528          */
3529         } else {
3530                 /*
3531                  * if we need windows because we need more copy buffer that
3532                  * we allow, the worse case number of windows we could need
3533                  * here would be (copybuf space required / copybuf space that
3534                  * we have) plus one for remainder, and plus 2 to handle the
3535                  * extra pages on the trim for the first and last pages of the
3536                  * buffer (a page is the minimum window size so under the right
3537                  * attr settings, you could have a window for each page).
3538                  * The last page will only be hit here if the size is not a
3539                  * multiple of the granularity (which theoretically shouldn't
3540                  * be the case but never has been enforced, so we could have
3541                  * broken things without it).
3542                  */
3543                 if (sinfo->si_copybuf_req > dma->dp_copybuf_size) {
3544                         ASSERT(dma->dp_copybuf_size > 0);
3545                         copybuf_win = (sinfo->si_copybuf_req /
3546                             dma->dp_copybuf_size) + 1 + 2;
3547                 } else {
3548                         copybuf_win = 0;
3549                 }
3550 
3551                 /*
3552                  * if we need windows because we have more cookies than the H/W
3553                  * can handle, the number of windows we would need here would
3554                  * be (cookie count / cookies count H/W supports minus 1[for
3555                  * trim]) plus one for remainder.
3556                  */
3557                 if ((unsigned)attr->dma_attr_sgllen < sinfo->si_sgl_size) {
3558                         sglwin = (sinfo->si_sgl_size /
3559                             (attr->dma_attr_sgllen - 1)) + 1;
3560                 } else {
3561                         sglwin = 0;
3562                 }
3563 
3564                 /*
3565                  * if we need windows because we're binding more memory than the
3566                  * H/W can transfer at once, the number of windows we would need
3567                  * here would be (xfer count / max xfer H/W supports) plus one
3568                  * for remainder, and plus 2 to handle the extra pages on the
3569                  * trim (see above comment about trim)
3570                  */
3571                 if (dmao->dmao_size > dma->dp_maxxfer) {
3572                         maxxfer_win = (dmao->dmao_size /
3573                             dma->dp_maxxfer) + 1 + 2;
3574                 } else {
3575                         maxxfer_win = 0;
3576                 }
3577                 dma->dp_max_win =  copybuf_win + sglwin + maxxfer_win;
3578                 ASSERT(dma->dp_max_win > 0);
3579         }
3580         win_state_size = dma->dp_max_win * sizeof (rootnex_window_t);
3581 
3582         /*
3583          * Get space for window and potential copy buffer state. Before we
3584          * go and allocate memory, see if we can get away with using what's
3585          * left in the pre-allocted state or the dynamically allocated sgl.
3586          */
3587         space_used = (uintptr_t)(sinfo->si_sgl_size *
3588             sizeof (ddi_dma_cookie_t));
3589 
3590         /* if we dynamically allocated space for the cookies */
3591         if (dma->dp_need_to_free_cookie) {
3592                 /* if we have more space in the pre-allocted buffer, use it */
3593                 ASSERT(space_used <= dma->dp_cookie_size);
3594                 if ((dma->dp_cookie_size - space_used) <=
3595                     rootnex_state->r_prealloc_size) {
3596                         state_available = rootnex_state->r_prealloc_size;
3597                         windowp = (rootnex_window_t *)dma->dp_prealloc_buffer;
3598 
3599                 /*
3600                  * else, we have more free space in the dynamically allocated
3601                  * buffer, i.e. the buffer wasn't worse case fragmented so we
3602                  * didn't need a lot of cookies.
3603                  */
3604                 } else {
3605                         state_available = dma->dp_cookie_size - space_used;
3606                         windowp = (rootnex_window_t *)
3607                             &dma->dp_cookies[sinfo->si_sgl_size];
3608                 }
3609 
3610         /* we used the pre-alloced buffer */
3611         } else {
3612                 ASSERT(space_used <= rootnex_state->r_prealloc_size);
3613                 state_available = rootnex_state->r_prealloc_size - space_used;
3614                 windowp = (rootnex_window_t *)
3615                     &dma->dp_cookies[sinfo->si_sgl_size];
3616         }
3617 
3618         /*
3619          * figure out how much state we need to track the copy buffer. Add an
3620          * addition 8 bytes for pointer alignemnt later.
3621          */
3622         if (dma->dp_copybuf_size > 0) {
3623                 copy_state_size = sinfo->si_max_pages *
3624                     sizeof (rootnex_pgmap_t);
3625         } else {
3626                 copy_state_size = 0;
3627         }
3628         /* add an additional 8 bytes for pointer alignment */
3629         space_needed = win_state_size + copy_state_size + 0x8;
3630 
3631         /* if we have enough space already, use it */
3632         if (state_available >= space_needed) {
3633                 dma->dp_window = windowp;
3634                 dma->dp_need_to_free_window = B_FALSE;
3635 
3636         /* not enough space, need to allocate more. */
3637         } else {
3638                 dma->dp_window = kmem_alloc(space_needed, kmflag);
3639                 if (dma->dp_window == NULL) {
3640                         return (DDI_DMA_NORESOURCES);
3641                 }
3642                 dma->dp_need_to_free_window = B_TRUE;
3643                 dma->dp_window_size = space_needed;
3644                 ROOTNEX_DPROBE2(rootnex__bind__sp__alloc, dev_info_t *,
3645                     dma->dp_dip, size_t, space_needed);
3646         }
3647 
3648         /*
3649          * we allocate copy buffer state and window state at the same time.
3650          * setup our copy buffer state pointers. Make sure it's aligned.
3651          */
3652         if (dma->dp_copybuf_size > 0) {
3653                 dma->dp_pgmap = (rootnex_pgmap_t *)(((uintptr_t)
3654                     &dma->dp_window[dma->dp_max_win] + 0x7) & ~0x7);
3655 
3656 #if !defined(__amd64)
3657                 /*
3658                  * make sure all pm_mapped, pm_vaddr, and pm_pp are set to
3659                  * false/NULL. Should be quicker to bzero vs loop and set.
3660                  */
3661                 bzero(dma->dp_pgmap, copy_state_size);
3662 #endif
3663         } else {
3664                 dma->dp_pgmap = NULL;
3665         }
3666 
3667         return (DDI_SUCCESS);
3668 }
3669 
3670 
3671 /*
3672  * rootnex_teardown_copybuf()
3673  *    cleans up after rootnex_setup_copybuf()
3674  */
3675 static void
3676 rootnex_teardown_copybuf(rootnex_dma_t *dma)
3677 {
3678 #if !defined(__amd64)
3679         int i;
3680 
3681         /*
3682          * if we allocated kernel heap VMEM space, go through all the pages and
3683          * map out any of the ones that we're mapped into the kernel heap VMEM
3684          * arena. Then free the VMEM space.
3685          */
3686         if (dma->dp_kva != NULL) {
3687                 for (i = 0; i < dma->dp_sglinfo.si_max_pages; i++) {
3688                         if (dma->dp_pgmap[i].pm_mapped) {
3689                                 hat_unload(kas.a_hat, dma->dp_pgmap[i].pm_kaddr,
3690                                     MMU_PAGESIZE, HAT_UNLOAD);
3691                                 dma->dp_pgmap[i].pm_mapped = B_FALSE;
3692                         }
3693                 }
3694 
3695                 vmem_free(heap_arena, dma->dp_kva, dma->dp_copybuf_size);
3696         }
3697 
3698 #endif
3699 
3700         /* if we allocated a copy buffer, free it */
3701         if (dma->dp_cbaddr != NULL) {
3702                 i_ddi_mem_free(dma->dp_cbaddr, NULL);
3703         }
3704 }
3705 
3706 
3707 /*
3708  * rootnex_teardown_windows()
3709  *    cleans up after rootnex_setup_windows()
3710  */
3711 static void
3712 rootnex_teardown_windows(rootnex_dma_t *dma)
3713 {
3714         /*
3715          * if we had to allocate window state on the last bind (because we
3716          * didn't have enough pre-allocated space in the handle), free it.
3717          */
3718         if (dma->dp_need_to_free_window) {
3719                 kmem_free(dma->dp_window, dma->dp_window_size);
3720         }
3721 }
3722 
3723 
3724 /*
3725  * rootnex_init_win()
3726  *    Called in bind slow path during creation of a new window. Initializes
3727  *    window state to default values.
3728  */
3729 /*ARGSUSED*/
3730 static void
3731 rootnex_init_win(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
3732     rootnex_window_t *window, ddi_dma_cookie_t *cookie, off_t cur_offset)
3733 {
3734         hp->dmai_nwin++;
3735         window->wd_dosync = B_FALSE;
3736         window->wd_offset = cur_offset;
3737         window->wd_size = 0;
3738         window->wd_first_cookie = cookie;
3739         window->wd_cookie_cnt = 0;
3740         window->wd_trim.tr_trim_first = B_FALSE;
3741         window->wd_trim.tr_trim_last = B_FALSE;
3742         window->wd_trim.tr_first_copybuf_win = B_FALSE;
3743         window->wd_trim.tr_last_copybuf_win = B_FALSE;
3744 #if !defined(__amd64)
3745         window->wd_remap_copybuf = dma->dp_cb_remaping;
3746 #endif
3747 }
3748 
3749 
3750 /*
3751  * rootnex_setup_cookie()
3752  *    Called in the bind slow path when the sgl uses the copy buffer. If any of
3753  *    the sgl uses the copy buffer, we need to go through each cookie, figure
3754  *    out if it uses the copy buffer, and if it does, save away everything we'll
3755  *    need during sync.
3756  */
3757 static void
3758 rootnex_setup_cookie(ddi_dma_obj_t *dmar_object, rootnex_dma_t *dma,
3759     ddi_dma_cookie_t *cookie, off_t cur_offset, size_t *copybuf_used,
3760     page_t **cur_pp)
3761 {
3762         boolean_t copybuf_sz_power_2;
3763         rootnex_sglinfo_t *sinfo;
3764         paddr_t paddr;
3765         uint_t pidx;
3766         uint_t pcnt;
3767         off_t poff;
3768 #if defined(__amd64)
3769         pfn_t pfn;
3770 #else
3771         page_t **pplist;
3772 #endif
3773 
3774         ASSERT(dmar_object->dmao_type != DMA_OTYP_DVADDR);
3775 
3776         sinfo = &dma->dp_sglinfo;
3777 
3778         /*
3779          * Calculate the page index relative to the start of the buffer. The
3780          * index to the current page for our buffer is the offset into the
3781          * first page of the buffer plus our current offset into the buffer
3782          * itself, shifted of course...
3783          */
3784         pidx = (sinfo->si_buf_offset + cur_offset) >> MMU_PAGESHIFT;
3785         ASSERT(pidx < sinfo->si_max_pages);
3786 
3787         /* if this cookie uses the copy buffer */
3788         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3789                 /*
3790                  * NOTE: we know that since this cookie uses the copy buffer, it
3791                  * is <= MMU_PAGESIZE.
3792                  */
3793 
3794                 /*
3795                  * get the offset into the page. For the 64-bit kernel, get the
3796                  * pfn which we'll use with seg kpm.
3797                  */
3798                 poff = cookie->dmac_laddress & MMU_PAGEOFFSET;
3799 #if defined(__amd64)
3800                 /* mfn_to_pfn() is a NOP on i86pc */
3801                 pfn = mfn_to_pfn(cookie->dmac_laddress >> MMU_PAGESHIFT);
3802 #endif /* __amd64 */
3803 
3804                 /* figure out if the copybuf size is a power of 2 */
3805                 if (!ISP2(dma->dp_copybuf_size)) {
3806                         copybuf_sz_power_2 = B_FALSE;
3807                 } else {
3808                         copybuf_sz_power_2 = B_TRUE;
3809                 }
3810 
3811                 /* This page uses the copy buffer */
3812                 dma->dp_pgmap[pidx].pm_uses_copybuf = B_TRUE;
3813 
3814                 /*
3815                  * save the copy buffer KVA that we'll use with this page.
3816                  * if we still fit within the copybuf, it's a simple add.
3817                  * otherwise, we need to wrap over using & or % accordingly.
3818                  */
3819                 if ((*copybuf_used + MMU_PAGESIZE) <= dma->dp_copybuf_size) {
3820                         dma->dp_pgmap[pidx].pm_cbaddr = dma->dp_cbaddr +
3821                             *copybuf_used;
3822                 } else {
3823                         if (copybuf_sz_power_2) {
3824                                 dma->dp_pgmap[pidx].pm_cbaddr = (caddr_t)(
3825                                     (uintptr_t)dma->dp_cbaddr +
3826                                     (*copybuf_used &
3827                                     (dma->dp_copybuf_size - 1)));
3828                         } else {
3829                                 dma->dp_pgmap[pidx].pm_cbaddr = (caddr_t)(
3830                                     (uintptr_t)dma->dp_cbaddr +
3831                                     (*copybuf_used % dma->dp_copybuf_size));
3832                         }
3833                 }
3834 
3835                 /*
3836                  * over write the cookie physical address with the address of
3837                  * the physical address of the copy buffer page that we will
3838                  * use.
3839                  */
3840                 paddr = pfn_to_pa(hat_getpfnum(kas.a_hat,
3841                     dma->dp_pgmap[pidx].pm_cbaddr)) + poff;
3842 
3843                 cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(paddr);
3844 
3845                 /* if we have a kernel VA, it's easy, just save that address */
3846                 if ((dmar_object->dmao_type != DMA_OTYP_PAGES) &&
3847                     (sinfo->si_asp == &kas)) {
3848                         /*
3849                          * save away the page aligned virtual address of the
3850                          * driver buffer. Offsets are handled in the sync code.
3851                          */
3852                         dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)(((uintptr_t)
3853                             dmar_object->dmao_obj.virt_obj.v_addr + cur_offset)
3854                             & MMU_PAGEMASK);
3855 #if !defined(__amd64)
3856                         /*
3857                          * we didn't need to, and will never need to map this
3858                          * page.
3859                          */
3860                         dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
3861 #endif
3862 
3863                 /* we don't have a kernel VA. We need one for the bcopy. */
3864                 } else {
3865 #if defined(__amd64)
3866                         /*
3867                          * for the 64-bit kernel, it's easy. We use seg kpm to
3868                          * get a Kernel VA for the corresponding pfn.
3869                          */
3870                         dma->dp_pgmap[pidx].pm_kaddr = hat_kpm_pfn2va(pfn);
3871 #else
3872                         /*
3873                          * for the 32-bit kernel, this is a pain. First we'll
3874                          * save away the page_t or user VA for this page. This
3875                          * is needed in rootnex_dma_win() when we switch to a
3876                          * new window which requires us to re-map the copy
3877                          * buffer.
3878                          */
3879                         pplist = dmar_object->dmao_obj.virt_obj.v_priv;
3880                         if (dmar_object->dmao_type == DMA_OTYP_PAGES) {
3881                                 dma->dp_pgmap[pidx].pm_pp = *cur_pp;
3882                                 dma->dp_pgmap[pidx].pm_vaddr = NULL;
3883                         } else if (pplist != NULL) {
3884                                 dma->dp_pgmap[pidx].pm_pp = pplist[pidx];
3885                                 dma->dp_pgmap[pidx].pm_vaddr = NULL;
3886                         } else {
3887                                 dma->dp_pgmap[pidx].pm_pp = NULL;
3888                                 dma->dp_pgmap[pidx].pm_vaddr = (caddr_t)
3889                                     (((uintptr_t)
3890                                     dmar_object->dmao_obj.virt_obj.v_addr +
3891                                     cur_offset) & MMU_PAGEMASK);
3892                         }
3893 
3894                         /*
3895                          * save away the page aligned virtual address which was
3896                          * allocated from the kernel heap arena (taking into
3897                          * account if we need more copy buffer than we alloced
3898                          * and use multiple windows to handle this, i.e. &,%).
3899                          * NOTE: there isn't and physical memory backing up this
3900                          * virtual address space currently.
3901                          */
3902                         if ((*copybuf_used + MMU_PAGESIZE) <=
3903                             dma->dp_copybuf_size) {
3904                                 dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)
3905                                     (((uintptr_t)dma->dp_kva + *copybuf_used) &
3906                                     MMU_PAGEMASK);
3907                         } else {
3908                                 if (copybuf_sz_power_2) {
3909                                         dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)
3910                                             (((uintptr_t)dma->dp_kva +
3911                                             (*copybuf_used &
3912                                             (dma->dp_copybuf_size - 1))) &
3913                                             MMU_PAGEMASK);
3914                                 } else {
3915                                         dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)
3916                                             (((uintptr_t)dma->dp_kva +
3917                                             (*copybuf_used %
3918                                             dma->dp_copybuf_size)) &
3919                                             MMU_PAGEMASK);
3920                                 }
3921                         }
3922 
3923                         /*
3924                          * if we haven't used up the available copy buffer yet,
3925                          * map the kva to the physical page.
3926                          */
3927                         if (!dma->dp_cb_remaping && ((*copybuf_used +
3928                             MMU_PAGESIZE) <= dma->dp_copybuf_size)) {
3929                                 dma->dp_pgmap[pidx].pm_mapped = B_TRUE;
3930                                 if (dma->dp_pgmap[pidx].pm_pp != NULL) {
3931                                         i86_pp_map(dma->dp_pgmap[pidx].pm_pp,
3932                                             dma->dp_pgmap[pidx].pm_kaddr);
3933                                 } else {
3934                                         i86_va_map(dma->dp_pgmap[pidx].pm_vaddr,
3935                                             sinfo->si_asp,
3936                                             dma->dp_pgmap[pidx].pm_kaddr);
3937                                 }
3938 
3939                         /*
3940                          * we've used up the available copy buffer, this page
3941                          * will have to be mapped during rootnex_dma_win() when
3942                          * we switch to a new window which requires a re-map
3943                          * the copy buffer. (32-bit kernel only)
3944                          */
3945                         } else {
3946                                 dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
3947                         }
3948 #endif
3949                         /* go to the next page_t */
3950                         if (dmar_object->dmao_type == DMA_OTYP_PAGES) {
3951                                 *cur_pp = (*cur_pp)->p_next;
3952                         }
3953                 }
3954 
3955                 /* add to the copy buffer count */
3956                 *copybuf_used += MMU_PAGESIZE;
3957 
3958         /*
3959          * This cookie doesn't use the copy buffer. Walk through the pages this
3960          * cookie occupies to reflect this.
3961          */
3962         } else {
3963                 /*
3964                  * figure out how many pages the cookie occupies. We need to
3965                  * use the original page offset of the buffer and the cookies
3966                  * offset in the buffer to do this.
3967                  */
3968                 poff = (sinfo->si_buf_offset + cur_offset) & MMU_PAGEOFFSET;
3969                 pcnt = mmu_btopr(cookie->dmac_size + poff);
3970 
3971                 while (pcnt > 0) {
3972 #if !defined(__amd64)
3973                         /*
3974                          * the 32-bit kernel doesn't have seg kpm, so we need
3975                          * to map in the driver buffer (if it didn't come down
3976                          * with a kernel VA) on the fly. Since this page doesn't
3977                          * use the copy buffer, it's not, or will it ever, have
3978                          * to be mapped in.
3979                          */
3980                         dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
3981 #endif
3982                         dma->dp_pgmap[pidx].pm_uses_copybuf = B_FALSE;
3983 
3984                         /*
3985                          * we need to update pidx and cur_pp or we'll loose
3986                          * track of where we are.
3987                          */
3988                         if (dmar_object->dmao_type == DMA_OTYP_PAGES) {
3989                                 *cur_pp = (*cur_pp)->p_next;
3990                         }
3991                         pidx++;
3992                         pcnt--;
3993                 }
3994         }
3995 }
3996 
3997 
3998 /*
3999  * rootnex_sgllen_window_boundary()
4000  *    Called in the bind slow path when the next cookie causes us to exceed (in
4001  *    this case == since we start at 0 and sgllen starts at 1) the maximum sgl
4002  *    length supported by the DMA H/W.
4003  */
4004 static int
4005 rootnex_sgllen_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
4006     rootnex_window_t **windowp, ddi_dma_cookie_t *cookie, ddi_dma_attr_t *attr,
4007     off_t cur_offset)
4008 {
4009         off_t new_offset;
4010         size_t trim_sz;
4011         off_t coffset;
4012 
4013 
4014         /*
4015          * if we know we'll never have to trim, it's pretty easy. Just move to
4016          * the next window and init it. We're done.
4017          */
4018         if (!dma->dp_trim_required) {
4019                 (*windowp)++;
4020                 rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
4021                 (*windowp)->wd_cookie_cnt++;
4022                 (*windowp)->wd_size = cookie->dmac_size;
4023                 return (DDI_SUCCESS);
4024         }
4025 
4026         /* figure out how much we need to trim from the window */
4027         ASSERT(attr->dma_attr_granular != 0);
4028         if (dma->dp_granularity_power_2) {
4029                 trim_sz = (*windowp)->wd_size & (attr->dma_attr_granular - 1);
4030         } else {
4031                 trim_sz = (*windowp)->wd_size % attr->dma_attr_granular;
4032         }
4033 
4034         /* The window's a whole multiple of granularity. We're done */
4035         if (trim_sz == 0) {
4036                 (*windowp)++;
4037                 rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
4038                 (*windowp)->wd_cookie_cnt++;
4039                 (*windowp)->wd_size = cookie->dmac_size;
4040                 return (DDI_SUCCESS);
4041         }
4042 
4043         /*
4044          * The window's not a whole multiple of granularity, since we know this
4045          * is due to the sgllen, we need to go back to the last cookie and trim
4046          * that one, add the left over part of the old cookie into the new
4047          * window, and then add in the new cookie into the new window.
4048          */
4049 
4050         /*
4051          * make sure the driver isn't making us do something bad... Trimming and
4052          * sgllen == 1 don't go together.
4053          */
4054         if (attr->dma_attr_sgllen == 1) {
4055                 return (DDI_DMA_NOMAPPING);
4056         }
4057 
4058         /*
4059          * first, setup the current window to account for the trim. Need to go
4060          * back to the last cookie for this.
4061          */
4062         cookie--;
4063         (*windowp)->wd_trim.tr_trim_last = B_TRUE;
4064         (*windowp)->wd_trim.tr_last_cookie = cookie;
4065         (*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
4066         ASSERT(cookie->dmac_size > trim_sz);
4067         (*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
4068         (*windowp)->wd_size -= trim_sz;
4069 
4070         /* save the buffer offsets for the next window */
4071         coffset = cookie->dmac_size - trim_sz;
4072         new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
4073 
4074         /*
4075          * set this now in case this is the first window. all other cases are
4076          * set in dma_win()
4077          */
4078         cookie->dmac_size = (*windowp)->wd_trim.tr_last_size;
4079 
4080         /*
4081          * initialize the next window using what's left over in the previous
4082          * cookie.
4083          */
4084         (*windowp)++;
4085         rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
4086         (*windowp)->wd_cookie_cnt++;
4087         (*windowp)->wd_trim.tr_trim_first = B_TRUE;
4088         (*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress + coffset;
4089         (*windowp)->wd_trim.tr_first_size = trim_sz;
4090         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
4091                 (*windowp)->wd_dosync = B_TRUE;
4092         }
4093 
4094         /*
4095          * now go back to the current cookie and add it to the new window. set
4096          * the new window size to the what was left over from the previous
4097          * cookie and what's in the current cookie.
4098          */
4099         cookie++;
4100         (*windowp)->wd_cookie_cnt++;
4101         (*windowp)->wd_size = trim_sz + cookie->dmac_size;
4102 
4103         /*
4104          * trim plus the next cookie could put us over maxxfer (a cookie can be
4105          * a max size of maxxfer). Handle that case.
4106          */
4107         if ((*windowp)->wd_size > dma->dp_maxxfer) {
4108                 /*
4109                  * maxxfer is already a whole multiple of granularity, and this
4110                  * trim will be <= the previous trim (since a cookie can't be
4111                  * larger than maxxfer). Make things simple here.
4112                  */
4113                 trim_sz = (*windowp)->wd_size - dma->dp_maxxfer;
4114                 (*windowp)->wd_trim.tr_trim_last = B_TRUE;
4115                 (*windowp)->wd_trim.tr_last_cookie = cookie;
4116                 (*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
4117                 (*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
4118                 (*windowp)->wd_size -= trim_sz;
4119                 ASSERT((*windowp)->wd_size == dma->dp_maxxfer);
4120 
4121                 /* save the buffer offsets for the next window */
4122                 coffset = cookie->dmac_size - trim_sz;
4123                 new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
4124 
4125                 /* setup the next window */
4126                 (*windowp)++;
4127                 rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
4128                 (*windowp)->wd_cookie_cnt++;
4129                 (*windowp)->wd_trim.tr_trim_first = B_TRUE;
4130                 (*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress +
4131                     coffset;
4132                 (*windowp)->wd_trim.tr_first_size = trim_sz;
4133         }
4134 
4135         return (DDI_SUCCESS);
4136 }
4137 
4138 
4139 /*
4140  * rootnex_copybuf_window_boundary()
4141  *    Called in bind slowpath when we get to a window boundary because we used
4142  *    up all the copy buffer that we have.
4143  */
4144 static int
4145 rootnex_copybuf_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
4146     rootnex_window_t **windowp, ddi_dma_cookie_t *cookie, off_t cur_offset,
4147     size_t *copybuf_used)
4148 {
4149         rootnex_sglinfo_t *sinfo;
4150         off_t new_offset;
4151         size_t trim_sz;
4152         paddr_t paddr;
4153         off_t coffset;
4154         uint_t pidx;
4155         off_t poff;
4156 
4157 
4158         sinfo = &dma->dp_sglinfo;
4159 
4160         /*
4161          * the copy buffer should be a whole multiple of page size. We know that
4162          * this cookie is <= MMU_PAGESIZE.
4163          */
4164         ASSERT(cookie->dmac_size <= MMU_PAGESIZE);
4165 
4166         /*
4167          * from now on, all new windows in this bind need to be re-mapped during
4168          * ddi_dma_getwin() (32-bit kernel only). i.e. we ran out out copybuf
4169          * space...
4170          */
4171 #if !defined(__amd64)
4172         dma->dp_cb_remaping = B_TRUE;
4173 #endif
4174 
4175         /* reset copybuf used */
4176         *copybuf_used = 0;
4177 
4178         /*
4179          * if we don't have to trim (since granularity is set to 1), go to the
4180          * next window and add the current cookie to it. We know the current
4181          * cookie uses the copy buffer since we're in this code path.
4182          */
4183         if (!dma->dp_trim_required) {
4184                 (*windowp)++;
4185                 rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
4186 
4187                 /* Add this cookie to the new window */
4188                 (*windowp)->wd_cookie_cnt++;
4189                 (*windowp)->wd_size += cookie->dmac_size;
4190                 *copybuf_used += MMU_PAGESIZE;
4191                 return (DDI_SUCCESS);
4192         }
4193 
4194         /*
4195          * *** may need to trim, figure it out.
4196          */
4197 
4198         /* figure out how much we need to trim from the window */
4199         if (dma->dp_granularity_power_2) {
4200                 trim_sz = (*windowp)->wd_size &
4201                     (hp->dmai_attr.dma_attr_granular - 1);
4202         } else {
4203                 trim_sz = (*windowp)->wd_size % hp->dmai_attr.dma_attr_granular;
4204         }
4205 
4206         /*
4207          * if the window's a whole multiple of granularity, go to the next
4208          * window, init it, then add in the current cookie. We know the current
4209          * cookie uses the copy buffer since we're in this code path.
4210          */
4211         if (trim_sz == 0) {
4212                 (*windowp)++;
4213                 rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
4214 
4215                 /* Add this cookie to the new window */
4216                 (*windowp)->wd_cookie_cnt++;
4217                 (*windowp)->wd_size += cookie->dmac_size;
4218                 *copybuf_used += MMU_PAGESIZE;
4219                 return (DDI_SUCCESS);
4220         }
4221 
4222         /*
4223          * *** We figured it out, we definitly need to trim
4224          */
4225 
4226         /*
4227          * make sure the driver isn't making us do something bad...
4228          * Trimming and sgllen == 1 don't go together.
4229          */
4230         if (hp->dmai_attr.dma_attr_sgllen == 1) {
4231                 return (DDI_DMA_NOMAPPING);
4232         }
4233 
4234         /*
4235          * first, setup the current window to account for the trim. Need to go
4236          * back to the last cookie for this. Some of the last cookie will be in
4237          * the current window, and some of the last cookie will be in the new
4238          * window. All of the current cookie will be in the new window.
4239          */
4240         cookie--;
4241         (*windowp)->wd_trim.tr_trim_last = B_TRUE;
4242         (*windowp)->wd_trim.tr_last_cookie = cookie;
4243         (*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
4244         ASSERT(cookie->dmac_size > trim_sz);
4245         (*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
4246         (*windowp)->wd_size -= trim_sz;
4247 
4248         /*
4249          * we're trimming the last cookie (not the current cookie). So that
4250          * last cookie may have or may not have been using the copy buffer (
4251          * we know the cookie passed in uses the copy buffer since we're in
4252          * this code path).
4253          *
4254          * If the last cookie doesn't use the copy buffer, nothing special to
4255          * do. However, if it does uses the copy buffer, it will be both the
4256          * last page in the current window and the first page in the next
4257          * window. Since we are reusing the copy buffer (and KVA space on the
4258          * 32-bit kernel), this page will use the end of the copy buffer in the
4259          * current window, and the start of the copy buffer in the next window.
4260          * Track that info... The cookie physical address was already set to
4261          * the copy buffer physical address in setup_cookie..
4262          */
4263         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
4264                 pidx = (sinfo->si_buf_offset + (*windowp)->wd_offset +
4265                     (*windowp)->wd_size) >> MMU_PAGESHIFT;
4266                 (*windowp)->wd_trim.tr_last_copybuf_win = B_TRUE;
4267                 (*windowp)->wd_trim.tr_last_pidx = pidx;
4268                 (*windowp)->wd_trim.tr_last_cbaddr =
4269                     dma->dp_pgmap[pidx].pm_cbaddr;
4270 #if !defined(__amd64)
4271                 (*windowp)->wd_trim.tr_last_kaddr =
4272                     dma->dp_pgmap[pidx].pm_kaddr;
4273 #endif
4274         }
4275 
4276         /* save the buffer offsets for the next window */
4277         coffset = cookie->dmac_size - trim_sz;
4278         new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
4279 
4280         /*
4281          * set this now in case this is the first window. all other cases are
4282          * set in dma_win()
4283          */
4284         cookie->dmac_size = (*windowp)->wd_trim.tr_last_size;
4285 
4286         /*
4287          * initialize the next window using what's left over in the previous
4288          * cookie.
4289          */
4290         (*windowp)++;
4291         rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
4292         (*windowp)->wd_cookie_cnt++;
4293         (*windowp)->wd_trim.tr_trim_first = B_TRUE;
4294         (*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress + coffset;
4295         (*windowp)->wd_trim.tr_first_size = trim_sz;
4296 
4297         /*
4298          * again, we're tracking if the last cookie uses the copy buffer.
4299          * read the comment above for more info on why we need to track
4300          * additional state.
4301          *
4302          * For the first cookie in the new window, we need reset the physical
4303          * address to DMA into to the start of the copy buffer plus any
4304          * initial page offset which may be present.
4305          */
4306         if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
4307                 (*windowp)->wd_dosync = B_TRUE;
4308                 (*windowp)->wd_trim.tr_first_copybuf_win = B_TRUE;
4309                 (*windowp)->wd_trim.tr_first_pidx = pidx;
4310                 (*windowp)->wd_trim.tr_first_cbaddr = dma->dp_cbaddr;
4311                 poff = (*windowp)->wd_trim.tr_first_paddr & MMU_PAGEOFFSET;
4312 
4313                 paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, dma->dp_cbaddr)) +
4314                     poff;
4315                 (*windowp)->wd_trim.tr_first_paddr =
4316                     ROOTNEX_PADDR_TO_RBASE(paddr);
4317 
4318 #if !defined(__amd64)
4319                 (*windowp)->wd_trim.tr_first_kaddr = dma->dp_kva;
4320 #endif
4321                 /* account for the cookie copybuf usage in the new window */
4322                 *copybuf_used += MMU_PAGESIZE;
4323 
4324                 /*
4325                  * every piece of code has to have a hack, and here is this
4326                  * ones :-)
4327                  *
4328                  * There is a complex interaction between setup_cookie and the
4329                  * copybuf window boundary. The complexity had to be in either
4330                  * the maxxfer window, or the copybuf window, and I chose the
4331                  * copybuf code.
4332                  *
4333                  * So in this code path, we have taken the last cookie,
4334                  * virtually broken it in half due to the trim, and it happens
4335                  * to use the copybuf which further complicates life. At the
4336                  * same time, we have already setup the current cookie, which
4337                  * is now wrong. More background info: the current cookie uses
4338                  * the copybuf, so it is only a page long max. So we need to
4339                  * fix the current cookies copy buffer address, physical
4340                  * address, and kva for the 32-bit kernel. We due this by
4341                  * bumping them by page size (of course, we can't due this on
4342                  * the physical address since the copy buffer may not be
4343                  * physically contiguous).
4344                  */
4345                 cookie++;
4346                 dma->dp_pgmap[pidx + 1].pm_cbaddr += MMU_PAGESIZE;
4347                 poff = cookie->dmac_laddress & MMU_PAGEOFFSET;
4348 
4349                 paddr = pfn_to_pa(hat_getpfnum(kas.a_hat,
4350                     dma->dp_pgmap[pidx + 1].pm_cbaddr)) + poff;
4351                 cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(paddr);
4352 
4353 #if !defined(__amd64)
4354                 ASSERT(dma->dp_pgmap[pidx + 1].pm_mapped == B_FALSE);
4355                 dma->dp_pgmap[pidx + 1].pm_kaddr += MMU_PAGESIZE;
4356 #endif
4357         } else {
4358                 /* go back to the current cookie */
4359                 cookie++;
4360         }
4361 
4362         /*
4363          * add the current cookie to the new window. set the new window size to
4364          * the what was left over from the previous cookie and what's in the
4365          * current cookie.
4366          */
4367         (*windowp)->wd_cookie_cnt++;
4368         (*windowp)->wd_size = trim_sz + cookie->dmac_size;
4369         ASSERT((*windowp)->wd_size < dma->dp_maxxfer);
4370 
4371         /*
4372          * we know that the cookie passed in always uses the copy buffer. We
4373          * wouldn't be here if it didn't.
4374          */
4375         *copybuf_used += MMU_PAGESIZE;
4376 
4377         return (DDI_SUCCESS);
4378 }
4379 
4380 
4381 /*
4382  * rootnex_maxxfer_window_boundary()
4383  *    Called in bind slowpath when we get to a window boundary because we will
4384  *    go over maxxfer.
4385  */
4386 static int
4387 rootnex_maxxfer_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
4388     rootnex_window_t **windowp, ddi_dma_cookie_t *cookie)
4389 {
4390         size_t dmac_size;
4391         off_t new_offset;
4392         size_t trim_sz;
4393         off_t coffset;
4394 
4395 
4396         /*
4397          * calculate how much we have to trim off of the current cookie to equal
4398          * maxxfer. We don't have to account for granularity here since our
4399          * maxxfer already takes that into account.
4400          */
4401         trim_sz = ((*windowp)->wd_size + cookie->dmac_size) - dma->dp_maxxfer;
4402         ASSERT(trim_sz <= cookie->dmac_size);
4403         ASSERT(trim_sz <= dma->dp_maxxfer);
4404 
4405         /* save cookie size since we need it later and we might change it */
4406         dmac_size = cookie->dmac_size;
4407 
4408         /*
4409          * if we're not trimming the entire cookie, setup the current window to
4410          * account for the trim.
4411          */
4412         if (trim_sz < cookie->dmac_size) {
4413                 (*windowp)->wd_cookie_cnt++;
4414                 (*windowp)->wd_trim.tr_trim_last = B_TRUE;
4415                 (*windowp)->wd_trim.tr_last_cookie = cookie;
4416                 (*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
4417                 (*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
4418                 (*windowp)->wd_size = dma->dp_maxxfer;
4419 
4420                 /*
4421                  * set the adjusted cookie size now in case this is the first
4422                  * window. All other windows are taken care of in get win
4423                  */
4424                 cookie->dmac_size = (*windowp)->wd_trim.tr_last_size;
4425         }
4426 
4427         /*
4428          * coffset is the current offset within the cookie, new_offset is the
4429          * current offset with the entire buffer.
4430          */
4431         coffset = dmac_size - trim_sz;
4432         new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
4433 
4434         /* initialize the next window */
4435         (*windowp)++;
4436         rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
4437         (*windowp)->wd_cookie_cnt++;
4438         (*windowp)->wd_size = trim_sz;
4439         if (trim_sz < dmac_size) {
4440                 (*windowp)->wd_trim.tr_trim_first = B_TRUE;
4441                 (*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress +
4442                     coffset;
4443                 (*windowp)->wd_trim.tr_first_size = trim_sz;
4444         }
4445 
4446         return (DDI_SUCCESS);
4447 }
4448 
4449 
4450 /*ARGSUSED*/
4451 static int
4452 rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4453     off_t off, size_t len, uint_t cache_flags)
4454 {
4455         rootnex_sglinfo_t *sinfo;
4456         rootnex_pgmap_t *cbpage;
4457         rootnex_window_t *win;
4458         ddi_dma_impl_t *hp;
4459         rootnex_dma_t *dma;
4460         caddr_t fromaddr;
4461         caddr_t toaddr;
4462         uint_t psize;
4463         off_t offset;
4464         uint_t pidx;
4465         size_t size;
4466         off_t poff;
4467         int e;
4468 
4469 
4470         hp = (ddi_dma_impl_t *)handle;
4471         dma = (rootnex_dma_t *)hp->dmai_private;
4472         sinfo = &dma->dp_sglinfo;
4473 
4474         /*
4475          * if we don't have any windows, we don't need to sync. A copybuf
4476          * will cause us to have at least one window.
4477          */
4478         if (dma->dp_window == NULL) {
4479                 return (DDI_SUCCESS);
4480         }
4481 
4482         /* This window may not need to be sync'd */
4483         win = &dma->dp_window[dma->dp_current_win];
4484         if (!win->wd_dosync) {
4485                 return (DDI_SUCCESS);
4486         }
4487 
4488         /* handle off and len special cases */
4489         if ((off == 0) || (rootnex_sync_ignore_params)) {
4490                 offset = win->wd_offset;
4491         } else {
4492                 offset = off;
4493         }
4494         if ((len == 0) || (rootnex_sync_ignore_params)) {
4495                 size = win->wd_size;
4496         } else {
4497                 size = len;
4498         }
4499 
4500         /* check the sync args to make sure they make a little sense */
4501         if (rootnex_sync_check_parms) {
4502                 e = rootnex_valid_sync_parms(hp, win, offset, size,
4503                     cache_flags);
4504                 if (e != DDI_SUCCESS) {
4505                         ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_SYNC_FAIL]);
4506                         return (DDI_FAILURE);
4507                 }
4508         }
4509 
4510         /*
4511          * special case the first page to handle the offset into the page. The
4512          * offset to the current page for our buffer is the offset into the
4513          * first page of the buffer plus our current offset into the buffer
4514          * itself, masked of course.
4515          */
4516         poff = (sinfo->si_buf_offset + offset) & MMU_PAGEOFFSET;
4517         psize = MIN((MMU_PAGESIZE - poff), size);
4518 
4519         /* go through all the pages that we want to sync */
4520         while (size > 0) {
4521                 /*
4522                  * Calculate the page index relative to the start of the buffer.
4523                  * The index to the current page for our buffer is the offset
4524                  * into the first page of the buffer plus our current offset
4525                  * into the buffer itself, shifted of course...
4526                  */
4527                 pidx = (sinfo->si_buf_offset + offset) >> MMU_PAGESHIFT;
4528                 ASSERT(pidx < sinfo->si_max_pages);
4529 
4530                 /*
4531                  * if this page uses the copy buffer, we need to sync it,
4532                  * otherwise, go on to the next page.
4533                  */
4534                 cbpage = &dma->dp_pgmap[pidx];
4535                 ASSERT((cbpage->pm_uses_copybuf == B_TRUE) ||
4536                     (cbpage->pm_uses_copybuf == B_FALSE));
4537                 if (cbpage->pm_uses_copybuf) {
4538                         /* cbaddr and kaddr should be page aligned */
4539                         ASSERT(((uintptr_t)cbpage->pm_cbaddr &
4540                             MMU_PAGEOFFSET) == 0);
4541                         ASSERT(((uintptr_t)cbpage->pm_kaddr &
4542                             MMU_PAGEOFFSET) == 0);
4543 
4544                         /*
4545                          * if we're copying for the device, we are going to
4546                          * copy from the drivers buffer and to the rootnex
4547                          * allocated copy buffer.
4548                          */
4549                         if (cache_flags == DDI_DMA_SYNC_FORDEV) {
4550                                 fromaddr = cbpage->pm_kaddr + poff;
4551                                 toaddr = cbpage->pm_cbaddr + poff;
4552                                 ROOTNEX_DPROBE2(rootnex__sync__dev,
4553                                     dev_info_t *, dma->dp_dip, size_t, psize);
4554 
4555                         /*
4556                          * if we're copying for the cpu/kernel, we are going to
4557                          * copy from the rootnex allocated copy buffer to the
4558                          * drivers buffer.
4559                          */
4560                         } else {
4561                                 fromaddr = cbpage->pm_cbaddr + poff;
4562                                 toaddr = cbpage->pm_kaddr + poff;
4563                                 ROOTNEX_DPROBE2(rootnex__sync__cpu,
4564                                     dev_info_t *, dma->dp_dip, size_t, psize);
4565                         }
4566 
4567                         bcopy(fromaddr, toaddr, psize);
4568                 }
4569 
4570                 /*
4571                  * decrement size until we're done, update our offset into the
4572                  * buffer, and get the next page size.
4573                  */
4574                 size -= psize;
4575                 offset += psize;
4576                 psize = MIN(MMU_PAGESIZE, size);
4577 
4578                 /* page offset is zero for the rest of this loop */
4579                 poff = 0;
4580         }
4581 
4582         return (DDI_SUCCESS);
4583 }
4584 
4585 /*
4586  * rootnex_dma_sync()
4587  *    called from ddi_dma_sync() if DMP_NOSYNC is not set in hp->dmai_rflags.
4588  *    We set DMP_NOSYNC if we're not using the copy buffer. If DMP_NOSYNC
4589  *    is set, ddi_dma_sync() returns immediately passing back success.
4590  */
4591 /*ARGSUSED*/
4592 static int
4593 rootnex_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4594     off_t off, size_t len, uint_t cache_flags)
4595 {
4596 #if defined(__amd64) && !defined(__xpv)
4597         if (IOMMU_USED(rdip)) {
4598                 return (iommulib_nexdma_sync(dip, rdip, handle, off, len,
4599                     cache_flags));
4600         }
4601 #endif
4602         return (rootnex_coredma_sync(dip, rdip, handle, off, len,
4603             cache_flags));
4604 }
4605 
4606 /*
4607  * rootnex_valid_sync_parms()
4608  *    checks the parameters passed to sync to verify they are correct.
4609  */
4610 static int
4611 rootnex_valid_sync_parms(ddi_dma_impl_t *hp, rootnex_window_t *win,
4612     off_t offset, size_t size, uint_t cache_flags)
4613 {
4614         off_t woffset;
4615 
4616 
4617         /*
4618          * the first part of the test to make sure the offset passed in is
4619          * within the window.
4620          */
4621         if (offset < win->wd_offset) {
4622                 return (DDI_FAILURE);
4623         }
4624 
4625         /*
4626          * second and last part of the test to make sure the offset and length
4627          * passed in is within the window.
4628          */
4629         woffset = offset - win->wd_offset;
4630         if ((woffset + size) > win->wd_size) {
4631                 return (DDI_FAILURE);
4632         }
4633 
4634         /*
4635          * if we are sync'ing for the device, the DDI_DMA_WRITE flag should
4636          * be set too.
4637          */
4638         if ((cache_flags == DDI_DMA_SYNC_FORDEV) &&
4639             (hp->dmai_rflags & DDI_DMA_WRITE)) {
4640                 return (DDI_SUCCESS);
4641         }
4642 
4643         /*
4644          * at this point, either DDI_DMA_SYNC_FORCPU or DDI_DMA_SYNC_FORKERNEL
4645          * should be set. Also DDI_DMA_READ should be set in the flags.
4646          */
4647         if (((cache_flags == DDI_DMA_SYNC_FORCPU) ||
4648             (cache_flags == DDI_DMA_SYNC_FORKERNEL)) &&
4649             (hp->dmai_rflags & DDI_DMA_READ)) {
4650                 return (DDI_SUCCESS);
4651         }
4652 
4653         return (DDI_FAILURE);
4654 }
4655 
4656 
4657 /*ARGSUSED*/
4658 static int
4659 rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4660     uint_t win, off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
4661     uint_t *ccountp)
4662 {
4663         rootnex_window_t *window;
4664         rootnex_trim_t *trim;
4665         ddi_dma_impl_t *hp;
4666         rootnex_dma_t *dma;
4667         ddi_dma_obj_t *dmao;
4668 #if !defined(__amd64)
4669         rootnex_sglinfo_t *sinfo;
4670         rootnex_pgmap_t *pmap;
4671         uint_t pidx;
4672         uint_t pcnt;
4673         off_t poff;
4674         int i;
4675 #endif
4676 
4677 
4678         hp = (ddi_dma_impl_t *)handle;
4679         dma = (rootnex_dma_t *)hp->dmai_private;
4680 #if !defined(__amd64)
4681         sinfo = &dma->dp_sglinfo;
4682 #endif
4683 
4684         /* If we try and get a window which doesn't exist, return failure */
4685         if (win >= hp->dmai_nwin) {
4686                 ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
4687                 return (DDI_FAILURE);
4688         }
4689 
4690         dmao = dma->dp_dvma_used ? &dma->dp_dvma : &dma->dp_dma;
4691 
4692         /*
4693          * if we don't have any windows, and they're asking for the first
4694          * window, setup the cookie pointer to the first cookie in the bind.
4695          * setup our return values, then increment the cookie since we return
4696          * the first cookie on the stack.
4697          */
4698         if (dma->dp_window == NULL) {
4699                 if (win != 0) {
4700                         ROOTNEX_DPROF_INC(
4701                             &rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
4702                         return (DDI_FAILURE);
4703                 }
4704                 hp->dmai_cookie = dma->dp_cookies;
4705                 *offp = 0;
4706                 *lenp = dmao->dmao_size;
4707                 *ccountp = dma->dp_sglinfo.si_sgl_size;
4708                 *cookiep = hp->dmai_cookie[0];
4709                 hp->dmai_cookie++;
4710                 return (DDI_SUCCESS);
4711         }
4712 
4713         /* sync the old window before moving on to the new one */
4714         window = &dma->dp_window[dma->dp_current_win];
4715         if ((window->wd_dosync) && (hp->dmai_rflags & DDI_DMA_READ)) {
4716                 (void) rootnex_coredma_sync(dip, rdip, handle, 0, 0,
4717                     DDI_DMA_SYNC_FORCPU);
4718         }
4719 
4720 #if !defined(__amd64)
4721         /*
4722          * before we move to the next window, if we need to re-map, unmap all
4723          * the pages in this window.
4724          */
4725         if (dma->dp_cb_remaping) {
4726                 /*
4727                  * If we switch to this window again, we'll need to map in
4728                  * on the fly next time.
4729                  */
4730                 window->wd_remap_copybuf = B_TRUE;
4731 
4732                 /*
4733                  * calculate the page index into the buffer where this window
4734                  * starts, and the number of pages this window takes up.
4735                  */
4736                 pidx = (sinfo->si_buf_offset + window->wd_offset) >>
4737                     MMU_PAGESHIFT;
4738                 poff = (sinfo->si_buf_offset + window->wd_offset) &
4739                     MMU_PAGEOFFSET;
4740                 pcnt = mmu_btopr(window->wd_size + poff);
4741                 ASSERT((pidx + pcnt) <= sinfo->si_max_pages);
4742 
4743                 /* unmap pages which are currently mapped in this window */
4744                 for (i = 0; i < pcnt; i++) {
4745                         if (dma->dp_pgmap[pidx].pm_mapped) {
4746                                 hat_unload(kas.a_hat,
4747                                     dma->dp_pgmap[pidx].pm_kaddr, MMU_PAGESIZE,
4748                                     HAT_UNLOAD);
4749                                 dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
4750                         }
4751                         pidx++;
4752                 }
4753         }
4754 #endif
4755 
4756         /*
4757          * Move to the new window.
4758          * NOTE: current_win must be set for sync to work right
4759          */
4760         dma->dp_current_win = win;
4761         window = &dma->dp_window[win];
4762 
4763         /* if needed, adjust the first and/or last cookies for trim */
4764         trim = &window->wd_trim;
4765         if (trim->tr_trim_first) {
4766                 window->wd_first_cookie->dmac_laddress = trim->tr_first_paddr;
4767                 window->wd_first_cookie->dmac_size = trim->tr_first_size;
4768 #if !defined(__amd64)
4769                 window->wd_first_cookie->dmac_type =
4770                     (window->wd_first_cookie->dmac_type &
4771                     ROOTNEX_USES_COPYBUF) + window->wd_offset;
4772 #endif
4773                 if (trim->tr_first_copybuf_win) {
4774                         dma->dp_pgmap[trim->tr_first_pidx].pm_cbaddr =
4775                             trim->tr_first_cbaddr;
4776 #if !defined(__amd64)
4777                         dma->dp_pgmap[trim->tr_first_pidx].pm_kaddr =
4778                             trim->tr_first_kaddr;
4779 #endif
4780                 }
4781         }
4782         if (trim->tr_trim_last) {
4783                 trim->tr_last_cookie->dmac_laddress = trim->tr_last_paddr;
4784                 trim->tr_last_cookie->dmac_size = trim->tr_last_size;
4785                 if (trim->tr_last_copybuf_win) {
4786                         dma->dp_pgmap[trim->tr_last_pidx].pm_cbaddr =
4787                             trim->tr_last_cbaddr;
4788 #if !defined(__amd64)
4789                         dma->dp_pgmap[trim->tr_last_pidx].pm_kaddr =
4790                             trim->tr_last_kaddr;
4791 #endif
4792                 }
4793         }
4794 
4795         /*
4796          * setup the cookie pointer to the first cookie in the window. setup
4797          * our return values, then increment the cookie since we return the
4798          * first cookie on the stack.
4799          */
4800         hp->dmai_cookie = window->wd_first_cookie;
4801         *offp = window->wd_offset;
4802         *lenp = window->wd_size;
4803         *ccountp = window->wd_cookie_cnt;
4804         *cookiep = hp->dmai_cookie[0];
4805         hp->dmai_cookie++;
4806 
4807 #if !defined(__amd64)
4808         /* re-map copybuf if required for this window */
4809         if (dma->dp_cb_remaping) {
4810                 /*
4811                  * calculate the page index into the buffer where this
4812                  * window starts.
4813                  */
4814                 pidx = (sinfo->si_buf_offset + window->wd_offset) >>
4815                     MMU_PAGESHIFT;
4816                 ASSERT(pidx < sinfo->si_max_pages);
4817 
4818                 /*
4819                  * the first page can get unmapped if it's shared with the
4820                  * previous window. Even if the rest of this window is already
4821                  * mapped in, we need to still check this one.
4822                  */
4823                 pmap = &dma->dp_pgmap[pidx];
4824                 if ((pmap->pm_uses_copybuf) && (pmap->pm_mapped == B_FALSE)) {
4825                         if (pmap->pm_pp != NULL) {
4826                                 pmap->pm_mapped = B_TRUE;
4827                                 i86_pp_map(pmap->pm_pp, pmap->pm_kaddr);
4828                         } else if (pmap->pm_vaddr != NULL) {
4829                                 pmap->pm_mapped = B_TRUE;
4830                                 i86_va_map(pmap->pm_vaddr, sinfo->si_asp,
4831                                     pmap->pm_kaddr);
4832                         }
4833                 }
4834                 pidx++;
4835 
4836                 /* map in the rest of the pages if required */
4837                 if (window->wd_remap_copybuf) {
4838                         window->wd_remap_copybuf = B_FALSE;
4839 
4840                         /* figure out many pages this window takes up */
4841                         poff = (sinfo->si_buf_offset + window->wd_offset) &
4842                             MMU_PAGEOFFSET;
4843                         pcnt = mmu_btopr(window->wd_size + poff);
4844                         ASSERT(((pidx - 1) + pcnt) <= sinfo->si_max_pages);
4845 
4846                         /* map pages which require it */
4847                         for (i = 1; i < pcnt; i++) {
4848                                 pmap = &dma->dp_pgmap[pidx];
4849                                 if (pmap->pm_uses_copybuf) {
4850                                         ASSERT(pmap->pm_mapped == B_FALSE);
4851                                         if (pmap->pm_pp != NULL) {
4852                                                 pmap->pm_mapped = B_TRUE;
4853                                                 i86_pp_map(pmap->pm_pp,
4854                                                     pmap->pm_kaddr);
4855                                         } else if (pmap->pm_vaddr != NULL) {
4856                                                 pmap->pm_mapped = B_TRUE;
4857                                                 i86_va_map(pmap->pm_vaddr,
4858                                                     sinfo->si_asp,
4859                                                     pmap->pm_kaddr);
4860                                         }
4861                                 }
4862                                 pidx++;
4863                         }
4864                 }
4865         }
4866 #endif
4867 
4868         /* if the new window uses the copy buffer, sync it for the device */
4869         if ((window->wd_dosync) && (hp->dmai_rflags & DDI_DMA_WRITE)) {
4870                 (void) rootnex_coredma_sync(dip, rdip, handle, 0, 0,
4871                     DDI_DMA_SYNC_FORDEV);
4872         }
4873 
4874         return (DDI_SUCCESS);
4875 }
4876 
4877 /*
4878  * rootnex_dma_win()
4879  *    called from ddi_dma_getwin()
4880  */
4881 /*ARGSUSED*/
4882 static int
4883 rootnex_dma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4884     uint_t win, off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
4885     uint_t *ccountp)
4886 {
4887 #if defined(__amd64) && !defined(__xpv)
4888         if (IOMMU_USED(rdip)) {
4889                 return (iommulib_nexdma_win(dip, rdip, handle, win, offp, lenp,
4890                     cookiep, ccountp));
4891         }
4892 #endif
4893 
4894         return (rootnex_coredma_win(dip, rdip, handle, win, offp, lenp,
4895             cookiep, ccountp));
4896 }
4897 
4898 #if defined(__amd64) && !defined(__xpv)
4899 /*ARGSUSED*/
4900 static int
4901 rootnex_coredma_hdl_setprivate(dev_info_t *dip, dev_info_t *rdip,
4902     ddi_dma_handle_t handle, void *v)
4903 {
4904         ddi_dma_impl_t *hp;
4905         rootnex_dma_t *dma;
4906 
4907         hp = (ddi_dma_impl_t *)handle;
4908         dma = (rootnex_dma_t *)hp->dmai_private;
4909         dma->dp_iommu_private = v;
4910 
4911         return (DDI_SUCCESS);
4912 }
4913 
4914 /*ARGSUSED*/
4915 static void *
4916 rootnex_coredma_hdl_getprivate(dev_info_t *dip, dev_info_t *rdip,
4917     ddi_dma_handle_t handle)
4918 {
4919         ddi_dma_impl_t *hp;
4920         rootnex_dma_t *dma;
4921 
4922         hp = (ddi_dma_impl_t *)handle;
4923         dma = (rootnex_dma_t *)hp->dmai_private;
4924 
4925         return (dma->dp_iommu_private);
4926 }
4927 #endif
4928 
4929 /*
4930  * ************************
4931  *  obsoleted dma routines
4932  * ************************
4933  */
4934 
4935 /*
4936  * rootnex_dma_mctl()
4937  *
4938  * We don't support this legacy interface any more on x86.
4939  */
4940 /* ARGSUSED */
4941 static int
4942 rootnex_dma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4943     enum ddi_dma_ctlops request, off_t *offp, size_t *lenp, caddr_t *objpp,
4944     uint_t cache_flags)
4945 {
4946         /*
4947          * The only thing dma_mctl is usef for anymore is legacy SPARC
4948          * dvma and sbus-specific routines.
4949          */
4950         return (DDI_FAILURE);
4951 }
4952 
4953 /*
4954  * *********
4955  *  FMA Code
4956  * *********
4957  */
4958 
4959 /*
4960  * rootnex_fm_init()
4961  *    FMA init busop
4962  */
4963 /* ARGSUSED */
4964 static int
4965 rootnex_fm_init(dev_info_t *dip, dev_info_t *tdip, int tcap,
4966     ddi_iblock_cookie_t *ibc)
4967 {
4968         *ibc = rootnex_state->r_err_ibc;
4969 
4970         return (ddi_system_fmcap);
4971 }
4972 
4973 /*
4974  * rootnex_dma_check()
4975  *    Function called after a dma fault occurred to find out whether the
4976  *    fault address is associated with a driver that is able to handle faults
4977  *    and recover from faults.
4978  */
4979 /* ARGSUSED */
4980 static int
4981 rootnex_dma_check(dev_info_t *dip, const void *handle, const void *addr,
4982     const void *not_used)
4983 {
4984         rootnex_window_t *window;
4985         uint64_t start_addr;
4986         uint64_t fault_addr;
4987         ddi_dma_impl_t *hp;
4988         rootnex_dma_t *dma;
4989         uint64_t end_addr;
4990         size_t csize;
4991         int i;
4992         int j;
4993 
4994 
4995         /* The driver has to set DDI_DMA_FLAGERR to recover from dma faults */
4996         hp = (ddi_dma_impl_t *)handle;
4997         ASSERT(hp);
4998 
4999         dma = (rootnex_dma_t *)hp->dmai_private;
5000 
5001         /* Get the address that we need to search for */
5002         fault_addr = *(uint64_t *)addr;
5003 
5004         /*
5005          * if we don't have any windows, we can just walk through all the
5006          * cookies.
5007          */
5008         if (dma->dp_window == NULL) {
5009                 /* for each cookie */
5010                 for (i = 0; i < dma->dp_sglinfo.si_sgl_size; i++) {
5011                         /*
5012                          * if the faulted address is within the physical address
5013                          * range of the cookie, return DDI_FM_NONFATAL.
5014                          */
5015                         if ((fault_addr >= dma->dp_cookies[i].dmac_laddress) &&
5016                             (fault_addr <= (dma->dp_cookies[i].dmac_laddress +
5017                             dma->dp_cookies[i].dmac_size))) {
5018                                 return (DDI_FM_NONFATAL);
5019                         }
5020                 }
5021 
5022                 /* fault_addr not within this DMA handle */
5023                 return (DDI_FM_UNKNOWN);
5024         }
5025 
5026         /* we have mutiple windows, walk through each window */
5027         for (i = 0; i < hp->dmai_nwin; i++) {
5028                 window = &dma->dp_window[i];
5029 
5030                 /* Go through all the cookies in the window */
5031                 for (j = 0; j < window->wd_cookie_cnt; j++) {
5032 
5033                         start_addr = window->wd_first_cookie[j].dmac_laddress;
5034                         csize = window->wd_first_cookie[j].dmac_size;
5035 
5036                         /*
5037                          * if we are trimming the first cookie in the window,
5038                          * and this is the first cookie, adjust the start
5039                          * address and size of the cookie to account for the
5040                          * trim.
5041                          */
5042                         if (window->wd_trim.tr_trim_first && (j == 0)) {
5043                                 start_addr = window->wd_trim.tr_first_paddr;
5044                                 csize = window->wd_trim.tr_first_size;
5045                         }
5046 
5047                         /*
5048                          * if we are trimming the last cookie in the window,
5049                          * and this is the last cookie, adjust the start
5050                          * address and size of the cookie to account for the
5051                          * trim.
5052                          */
5053                         if (window->wd_trim.tr_trim_last &&
5054                             (j == (window->wd_cookie_cnt - 1))) {
5055                                 start_addr = window->wd_trim.tr_last_paddr;
5056                                 csize = window->wd_trim.tr_last_size;
5057                         }
5058 
5059                         end_addr = start_addr + csize;
5060 
5061                         /*
5062                          * if the faulted address is within the physical
5063                          * address of the cookie, return DDI_FM_NONFATAL.
5064                          */
5065                         if ((fault_addr >= start_addr) &&
5066                             (fault_addr <= end_addr)) {
5067                                 return (DDI_FM_NONFATAL);
5068                         }
5069                 }
5070         }
5071 
5072         /* fault_addr not within this DMA handle */
5073         return (DDI_FM_UNKNOWN);
5074 }
5075 
5076 /*ARGSUSED*/
5077 static int
5078 rootnex_quiesce(dev_info_t *dip)
5079 {
5080 #if defined(__amd64) && !defined(__xpv)
5081         return (immu_quiesce());
5082 #else
5083         return (DDI_SUCCESS);
5084 #endif
5085 }
5086 
5087 #if defined(__xpv)
5088 void
5089 immu_init(void)
5090 {
5091         ;
5092 }
5093 
5094 void
5095 immu_startup(void)
5096 {
5097         ;
5098 }
5099 /*ARGSUSED*/
5100 void
5101 immu_physmem_update(uint64_t addr, uint64_t size)
5102 {
5103         ;
5104 }
5105 #endif