1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
  25  * Copyright 2014 Pluribus Networks, Inc.
  26  * Copyright 2016 Nexenta Systems, Inc.
  27  */
  28 
  29 /*
  30  * PC specific DDI implementation
  31  */
  32 #include <sys/types.h>
  33 #include <sys/autoconf.h>
  34 #include <sys/avintr.h>
  35 #include <sys/bootconf.h>
  36 #include <sys/conf.h>
  37 #include <sys/cpuvar.h>
  38 #include <sys/ddi_impldefs.h>
  39 #include <sys/ddi_subrdefs.h>
  40 #include <sys/ethernet.h>
  41 #include <sys/fp.h>
  42 #include <sys/instance.h>
  43 #include <sys/kmem.h>
  44 #include <sys/machsystm.h>
  45 #include <sys/modctl.h>
  46 #include <sys/promif.h>
  47 #include <sys/prom_plat.h>
  48 #include <sys/sunndi.h>
  49 #include <sys/ndi_impldefs.h>
  50 #include <sys/ddi_impldefs.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/systeminfo.h>
  53 #include <sys/utsname.h>
  54 #include <sys/atomic.h>
  55 #include <sys/spl.h>
  56 #include <sys/archsystm.h>
  57 #include <vm/seg_kmem.h>
  58 #include <sys/ontrap.h>
  59 #include <sys/fm/protocol.h>
  60 #include <sys/ramdisk.h>
  61 #include <sys/sunndi.h>
  62 #include <sys/vmem.h>
  63 #include <sys/pci_impl.h>
  64 #if defined(__xpv)
  65 #include <sys/hypervisor.h>
  66 #endif
  67 #include <sys/mach_intr.h>
  68 #include <vm/hat_i86.h>
  69 #include <sys/x86_archext.h>
  70 #include <sys/avl.h>
  71 #include <sys/font.h>
  72 
  73 /*
  74  * DDI Boot Configuration
  75  */
  76 
  77 /*
  78  * Platform drivers on this platform
  79  */
  80 char *platform_module_list[] = {
  81         "acpippm",
  82         "ppm",
  83         (char *)0
  84 };
  85 
  86 /* pci bus resource maps */
  87 struct pci_bus_resource *pci_bus_res;
  88 
  89 size_t dma_max_copybuf_size = 0x101000;         /* 1M + 4K */
  90 
  91 uint64_t ramdisk_start, ramdisk_end;
  92 
  93 int pseudo_isa = 0;
  94 
  95 /*
  96  * Forward declarations
  97  */
  98 static int getlongprop_buf();
  99 static void get_boot_properties(void);
 100 static void impl_bus_initialprobe(void);
 101 static void impl_bus_reprobe(void);
 102 
 103 static int poke_mem(peekpoke_ctlops_t *in_args);
 104 static int peek_mem(peekpoke_ctlops_t *in_args);
 105 
 106 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
 107 
 108 #if defined(__amd64) && !defined(__xpv)
 109 extern void immu_init(void);
 110 #endif
 111 
 112 /*
 113  * We use an AVL tree to store contiguous address allocations made with the
 114  * kalloca() routine, so that we can return the size to free with kfreea().
 115  * Note that in the future it would be vastly faster if we could eliminate
 116  * this lookup by insisting that all callers keep track of their own sizes,
 117  * just as for kmem_alloc().
 118  */
 119 struct ctgas {
 120         avl_node_t ctg_link;
 121         void *ctg_addr;
 122         size_t ctg_size;
 123 };
 124 
 125 static avl_tree_t ctgtree;
 126 
 127 static kmutex_t         ctgmutex;
 128 #define CTGLOCK()       mutex_enter(&ctgmutex)
 129 #define CTGUNLOCK()     mutex_exit(&ctgmutex)
 130 
 131 /*
 132  * Minimum pfn value of page_t's put on the free list.  This is to simplify
 133  * support of ddi dma memory requests which specify small, non-zero addr_lo
 134  * values.
 135  *
 136  * The default value of 2, which corresponds to the only known non-zero addr_lo
 137  * value used, means a single page will be sacrificed (pfn typically starts
 138  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
 139  * otherwise mp startup panics.
 140  */
 141 pfn_t   ddiphysmin = 2;
 142 
 143 static void
 144 check_driver_disable(void)
 145 {
 146         int proplen = 128;
 147         char *prop_name;
 148         char *drv_name, *propval;
 149         major_t major;
 150 
 151         prop_name = kmem_alloc(proplen, KM_SLEEP);
 152         for (major = 0; major < devcnt; major++) {
 153                 drv_name = ddi_major_to_name(major);
 154                 if (drv_name == NULL)
 155                         continue;
 156                 (void) snprintf(prop_name, proplen, "disable-%s", drv_name);
 157                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
 158                     DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
 159                         if (strcmp(propval, "true") == 0) {
 160                                 devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
 161                                 cmn_err(CE_NOTE, "driver %s disabled",
 162                                     drv_name);
 163                         }
 164                         ddi_prop_free(propval);
 165                 }
 166         }
 167         kmem_free(prop_name, proplen);
 168 }
 169 
 170 
 171 /*
 172  * Configure the hardware on the system.
 173  * Called before the rootfs is mounted
 174  */
 175 void
 176 configure(void)
 177 {
 178         extern void i_ddi_init_root();
 179 
 180 #if defined(__i386)
 181         extern int fpu_pentium_fdivbug;
 182 #endif  /* __i386 */
 183         extern int fpu_ignored;
 184 
 185         /*
 186          * Determine if an FPU is attached
 187          */
 188 
 189         fpu_probe();
 190 
 191 #if defined(__i386)
 192         if (fpu_pentium_fdivbug) {
 193                 printf("\
 194 FP hardware exhibits Pentium floating point divide problem\n");
 195         }
 196 #endif  /* __i386 */
 197 
 198         if (fpu_ignored) {
 199                 printf("FP hardware will not be used\n");
 200         } else if (!fpu_exists) {
 201                 printf("No FPU in configuration\n");
 202         }
 203 
 204         /*
 205          * Initialize devices on the machine.
 206          * Uses configuration tree built by the PROMs to determine what
 207          * is present, and builds a tree of prototype dev_info nodes
 208          * corresponding to the hardware which identified itself.
 209          */
 210 
 211         /*
 212          * Initialize root node.
 213          */
 214         i_ddi_init_root();
 215 
 216         /* reprogram devices not set up by firmware (BIOS) */
 217         impl_bus_reprobe();
 218 
 219 #if defined(__amd64) && !defined(__xpv)
 220         /*
 221          * Setup but don't startup the IOMMU
 222          * Startup happens later via a direct call
 223          * to IOMMU code by boot code.
 224          * At this point, all PCI bus renumbering
 225          * is done, so safe to init the IMMU
 226          * AKA Intel IOMMU.
 227          */
 228         immu_init();
 229 #endif
 230 
 231         /*
 232          * attach the isa nexus to get ACPI resource usage
 233          * isa is "kind of" a pseudo node
 234          */
 235 #if defined(__xpv)
 236         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
 237                 if (pseudo_isa)
 238                         (void) i_ddi_attach_pseudo_node("isa");
 239                 else
 240                         (void) i_ddi_attach_hw_nodes("isa");
 241         }
 242 #else
 243         if (pseudo_isa)
 244                 (void) i_ddi_attach_pseudo_node("isa");
 245         else
 246                 (void) i_ddi_attach_hw_nodes("isa");
 247 #endif
 248 }
 249 
 250 /*
 251  * The "status" property indicates the operational status of a device.
 252  * If this property is present, the value is a string indicating the
 253  * status of the device as follows:
 254  *
 255  *      "okay"          operational.
 256  *      "disabled"      not operational, but might become operational.
 257  *      "fail"          not operational because a fault has been detected,
 258  *                      and it is unlikely that the device will become
 259  *                      operational without repair. no additional details
 260  *                      are available.
 261  *      "fail-xxx"      not operational because a fault has been detected,
 262  *                      and it is unlikely that the device will become
 263  *                      operational without repair. "xxx" is additional
 264  *                      human-readable information about the particular
 265  *                      fault condition that was detected.
 266  *
 267  * The absence of this property means that the operational status is
 268  * unknown or okay.
 269  *
 270  * This routine checks the status property of the specified device node
 271  * and returns 0 if the operational status indicates failure, and 1 otherwise.
 272  *
 273  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
 274  * And, in that case, the property may not even be a string. So we carefully
 275  * check for the value "fail", in the beginning of the string, noting
 276  * the property length.
 277  */
 278 int
 279 status_okay(int id, char *buf, int buflen)
 280 {
 281         char status_buf[OBP_MAXPROPNAME];
 282         char *bufp = buf;
 283         int len = buflen;
 284         int proplen;
 285         static const char *status = "status";
 286         static const char *fail = "fail";
 287         int fail_len = (int)strlen(fail);
 288 
 289         /*
 290          * Get the proplen ... if it's smaller than "fail",
 291          * or doesn't exist ... then we don't care, since
 292          * the value can't begin with the char string "fail".
 293          *
 294          * NB: proplen, if it's a string, includes the NULL in the
 295          * the size of the property, and fail_len does not.
 296          */
 297         proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
 298         if (proplen <= fail_len)     /* nonexistant or uninteresting len */
 299                 return (1);
 300 
 301         /*
 302          * if a buffer was provided, use it
 303          */
 304         if ((buf == (char *)NULL) || (buflen <= 0)) {
 305                 bufp = status_buf;
 306                 len = sizeof (status_buf);
 307         }
 308         *bufp = (char)0;
 309 
 310         /*
 311          * Get the property into the buffer, to the extent of the buffer,
 312          * and in case the buffer is smaller than the property size,
 313          * NULL terminate the buffer. (This handles the case where
 314          * a buffer was passed in and the caller wants to print the
 315          * value, but the buffer was too small).
 316          */
 317         (void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
 318             (caddr_t)bufp, len);
 319         *(bufp + len - 1) = (char)0;
 320 
 321         /*
 322          * If the value begins with the char string "fail",
 323          * then it means the node is failed. We don't care
 324          * about any other values. We assume the node is ok
 325          * although it might be 'disabled'.
 326          */
 327         if (strncmp(bufp, fail, fail_len) == 0)
 328                 return (0);
 329 
 330         return (1);
 331 }
 332 
 333 /*
 334  * Check the status of the device node passed as an argument.
 335  *
 336  *      if ((status is OKAY) || (status is DISABLED))
 337  *              return DDI_SUCCESS
 338  *      else
 339  *              print a warning and return DDI_FAILURE
 340  */
 341 /*ARGSUSED1*/
 342 int
 343 check_status(int id, char *name, dev_info_t *parent)
 344 {
 345         char status_buf[64];
 346         char devtype_buf[OBP_MAXPROPNAME];
 347         int retval = DDI_FAILURE;
 348 
 349         /*
 350          * is the status okay?
 351          */
 352         if (status_okay(id, status_buf, sizeof (status_buf)))
 353                 return (DDI_SUCCESS);
 354 
 355         /*
 356          * a status property indicating bad memory will be associated
 357          * with a node which has a "device_type" property with a value of
 358          * "memory-controller". in this situation, return DDI_SUCCESS
 359          */
 360         if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
 361             sizeof (devtype_buf)) > 0) {
 362                 if (strcmp(devtype_buf, "memory-controller") == 0)
 363                         retval = DDI_SUCCESS;
 364         }
 365 
 366         /*
 367          * print the status property information
 368          */
 369         cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
 370         return (retval);
 371 }
 372 
 373 /*ARGSUSED*/
 374 uint_t
 375 softlevel1(caddr_t arg1, caddr_t arg2)
 376 {
 377         softint();
 378         return (1);
 379 }
 380 
 381 /*
 382  * Allow for implementation specific correction of PROM property values.
 383  */
 384 
 385 /*ARGSUSED*/
 386 void
 387 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
 388     caddr_t buffer)
 389 {
 390         /*
 391          * There are no adjustments needed in this implementation.
 392          */
 393 }
 394 
 395 static int
 396 getlongprop_buf(int id, char *name, char *buf, int maxlen)
 397 {
 398         int size;
 399 
 400         size = prom_getproplen((pnode_t)id, name);
 401         if (size <= 0 || (size > maxlen - 1))
 402                 return (-1);
 403 
 404         if (-1 == prom_getprop((pnode_t)id, name, buf))
 405                 return (-1);
 406 
 407         if (strcmp("name", name) == 0) {
 408                 if (buf[size - 1] != '\0') {
 409                         buf[size] = '\0';
 410                         size += 1;
 411                 }
 412         }
 413 
 414         return (size);
 415 }
 416 
 417 static int
 418 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
 419 {
 420         int ret;
 421 
 422         if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
 423             DDI_PROP_DONTPASS, pname, pval, plen))
 424             == DDI_PROP_SUCCESS) {
 425                 *plen = (*plen) * (sizeof (int));
 426         }
 427         return (ret);
 428 }
 429 
 430 
 431 /*
 432  * Node Configuration
 433  */
 434 
 435 struct prop_ispec {
 436         uint_t  pri, vec;
 437 };
 438 
 439 /*
 440  * For the x86, we're prepared to claim that the interrupt string
 441  * is in the form of a list of <ipl,vec> specifications.
 442  */
 443 
 444 #define VEC_MIN 1
 445 #define VEC_MAX 255
 446 
 447 static int
 448 impl_xlate_intrs(dev_info_t *child, int *in,
 449     struct ddi_parent_private_data *pdptr)
 450 {
 451         size_t size;
 452         int n;
 453         struct intrspec *new;
 454         caddr_t got_prop;
 455         int *inpri;
 456         int got_len;
 457         extern int ignore_hardware_nodes;       /* force flag from ddi_impl.c */
 458 
 459         static char bad_intr_fmt[] =
 460             "bad interrupt spec from %s%d - ipl %d, irq %d\n";
 461 
 462         /*
 463          * determine if the driver is expecting the new style "interrupts"
 464          * property which just contains the IRQ, or the old style which
 465          * contains pairs of <IPL,IRQ>.  if it is the new style, we always
 466          * assign IPL 5 unless an "interrupt-priorities" property exists.
 467          * in that case, the "interrupt-priorities" property contains the
 468          * IPL values that match, one for one, the IRQ values in the
 469          * "interrupts" property.
 470          */
 471         inpri = NULL;
 472         if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 473             "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
 474                 /* the old style "interrupts" property... */
 475 
 476                 /*
 477                  * The list consists of <ipl,vec> elements
 478                  */
 479                 if ((n = (*in++ >> 1)) < 1)
 480                         return (DDI_FAILURE);
 481 
 482                 pdptr->par_nintr = n;
 483                 size = n * sizeof (struct intrspec);
 484                 new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
 485 
 486                 while (n--) {
 487                         int level = *in++;
 488                         int vec = *in++;
 489 
 490                         if (level < 1 || level > MAXIPL ||
 491                             vec < VEC_MIN || vec > VEC_MAX) {
 492                                 cmn_err(CE_CONT, bad_intr_fmt,
 493                                     DEVI(child)->devi_name,
 494                                     DEVI(child)->devi_instance, level, vec);
 495                                 goto broken;
 496                         }
 497                         new->intrspec_pri = level;
 498                         if (vec != 2)
 499                                 new->intrspec_vec = vec;
 500                         else
 501                                 /*
 502                                  * irq 2 on the PC bus is tied to irq 9
 503                                  * on ISA, EISA and MicroChannel
 504                                  */
 505                                 new->intrspec_vec = 9;
 506                         new++;
 507                 }
 508 
 509                 return (DDI_SUCCESS);
 510         } else {
 511                 /* the new style "interrupts" property... */
 512 
 513                 /*
 514                  * The list consists of <vec> elements
 515                  */
 516                 if ((n = (*in++)) < 1)
 517                         return (DDI_FAILURE);
 518 
 519                 pdptr->par_nintr = n;
 520                 size = n * sizeof (struct intrspec);
 521                 new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
 522 
 523                 /* XXX check for "interrupt-priorities" property... */
 524                 if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 525                     "interrupt-priorities", (caddr_t)&got_prop, &got_len)
 526                     == DDI_PROP_SUCCESS) {
 527                         if (n != (got_len / sizeof (int))) {
 528                                 cmn_err(CE_CONT,
 529                                     "bad interrupt-priorities length"
 530                                     " from %s%d: expected %d, got %d\n",
 531                                     DEVI(child)->devi_name,
 532                                     DEVI(child)->devi_instance, n,
 533                                     (int)(got_len / sizeof (int)));
 534                                 goto broken;
 535                         }
 536                         inpri = (int *)got_prop;
 537                 }
 538 
 539                 while (n--) {
 540                         int level;
 541                         int vec = *in++;
 542 
 543                         if (inpri == NULL)
 544                                 level = 5;
 545                         else
 546                                 level = *inpri++;
 547 
 548                         if (level < 1 || level > MAXIPL ||
 549                             vec < VEC_MIN || vec > VEC_MAX) {
 550                                 cmn_err(CE_CONT, bad_intr_fmt,
 551                                     DEVI(child)->devi_name,
 552                                     DEVI(child)->devi_instance, level, vec);
 553                                 goto broken;
 554                         }
 555                         new->intrspec_pri = level;
 556                         if (vec != 2)
 557                                 new->intrspec_vec = vec;
 558                         else
 559                                 /*
 560                                  * irq 2 on the PC bus is tied to irq 9
 561                                  * on ISA, EISA and MicroChannel
 562                                  */
 563                                 new->intrspec_vec = 9;
 564                         new++;
 565                 }
 566 
 567                 if (inpri != NULL)
 568                         kmem_free(got_prop, got_len);
 569                 return (DDI_SUCCESS);
 570         }
 571 
 572 broken:
 573         kmem_free(pdptr->par_intr, size);
 574         pdptr->par_intr = NULL;
 575         pdptr->par_nintr = 0;
 576         if (inpri != NULL)
 577                 kmem_free(got_prop, got_len);
 578 
 579         return (DDI_FAILURE);
 580 }
 581 
 582 /*
 583  * Create a ddi_parent_private_data structure from the ddi properties of
 584  * the dev_info node.
 585  *
 586  * The "reg" and either an "intr" or "interrupts" properties are required
 587  * if the driver wishes to create mappings or field interrupts on behalf
 588  * of the device.
 589  *
 590  * The "reg" property is assumed to be a list of at least one triple
 591  *
 592  *      <bustype, address, size>*1
 593  *
 594  * The "intr" property is assumed to be a list of at least one duple
 595  *
 596  *      <SPARC ipl, vector#>*1
 597  *
 598  * The "interrupts" property is assumed to be a list of at least one
 599  * n-tuples that describes the interrupt capabilities of the bus the device
 600  * is connected to.  For SBus, this looks like
 601  *
 602  *      <SBus-level>*1
 603  *
 604  * (This property obsoletes the 'intr' property).
 605  *
 606  * The "ranges" property is optional.
 607  */
 608 void
 609 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
 610 {
 611         struct ddi_parent_private_data *pdptr;
 612         int n;
 613         int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
 614         uint_t reg_len, rng_len, intr_len, irupts_len;
 615 
 616         *ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
 617 
 618         /*
 619          * Handle the 'reg' property.
 620          */
 621         if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
 622             DDI_PROP_SUCCESS) && (reg_len != 0)) {
 623                 pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
 624                 pdptr->par_reg = (struct regspec *)reg_prop;
 625         }
 626 
 627         /*
 628          * See if I have a range (adding one where needed - this
 629          * means to add one for sbus node in sun4c, when romvec > 0,
 630          * if no range is already defined in the PROM node.
 631          * (Currently no sun4c PROMS define range properties,
 632          * but they should and may in the future.)  For the SBus
 633          * node, the range is defined by the SBus reg property.
 634          */
 635         if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
 636             == DDI_PROP_SUCCESS) {
 637                 pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
 638                 pdptr->par_rng = (struct rangespec *)rng_prop;
 639         }
 640 
 641         /*
 642          * Handle the 'intr' and 'interrupts' properties
 643          */
 644 
 645         /*
 646          * For backwards compatibility
 647          * we first look for the 'intr' property for the device.
 648          */
 649         if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
 650             != DDI_PROP_SUCCESS) {
 651                 intr_len = 0;
 652         }
 653 
 654         /*
 655          * If we're to support bus adapters and future platforms cleanly,
 656          * we need to support the generalized 'interrupts' property.
 657          */
 658         if (get_prop_int_array(child, "interrupts", &irupts_prop,
 659             &irupts_len) != DDI_PROP_SUCCESS) {
 660                 irupts_len = 0;
 661         } else if (intr_len != 0) {
 662                 /*
 663                  * If both 'intr' and 'interrupts' are defined,
 664                  * then 'interrupts' wins and we toss the 'intr' away.
 665                  */
 666                 ddi_prop_free((void *)intr_prop);
 667                 intr_len = 0;
 668         }
 669 
 670         if (intr_len != 0) {
 671 
 672                 /*
 673                  * Translate the 'intr' property into an array
 674                  * an array of struct intrspec's.  There's not really
 675                  * very much to do here except copy what's out there.
 676                  */
 677 
 678                 struct intrspec *new;
 679                 struct prop_ispec *l;
 680 
 681                 n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
 682                 l = (struct prop_ispec *)intr_prop;
 683                 pdptr->par_intr =
 684                     new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
 685                 while (n--) {
 686                         new->intrspec_pri = l->pri;
 687                         new->intrspec_vec = l->vec;
 688                         new++;
 689                         l++;
 690                 }
 691                 ddi_prop_free((void *)intr_prop);
 692 
 693         } else if ((n = irupts_len) != 0) {
 694                 size_t size;
 695                 int *out;
 696 
 697                 /*
 698                  * Translate the 'interrupts' property into an array
 699                  * of intrspecs for the rest of the DDI framework to
 700                  * toy with.  Only our ancestors really know how to
 701                  * do this, so ask 'em.  We massage the 'interrupts'
 702                  * property so that it is pre-pended by a count of
 703                  * the number of integers in the argument.
 704                  */
 705                 size = sizeof (int) + n;
 706                 out = kmem_alloc(size, KM_SLEEP);
 707                 *out = n / sizeof (int);
 708                 bcopy(irupts_prop, out + 1, (size_t)n);
 709                 ddi_prop_free((void *)irupts_prop);
 710                 if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
 711                         cmn_err(CE_CONT,
 712                             "Unable to translate 'interrupts' for %s%d\n",
 713                             DEVI(child)->devi_binding_name,
 714                             DEVI(child)->devi_instance);
 715                 }
 716                 kmem_free(out, size);
 717         }
 718 }
 719 
 720 /*
 721  * Name a child
 722  */
 723 static int
 724 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
 725 {
 726         /*
 727          * Fill in parent-private data and this function returns to us
 728          * an indication if it used "registers" to fill in the data.
 729          */
 730         if (ddi_get_parent_data(child) == NULL) {
 731                 struct ddi_parent_private_data *pdptr;
 732                 make_ddi_ppd(child, &pdptr);
 733                 ddi_set_parent_data(child, pdptr);
 734         }
 735 
 736         name[0] = '\0';
 737         if (sparc_pd_getnreg(child) > 0) {
 738                 (void) snprintf(name, namelen, "%x,%x",
 739                     (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
 740                     (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
 741         }
 742 
 743         return (DDI_SUCCESS);
 744 }
 745 
 746 /*
 747  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
 748  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
 749  * the children of sun busses based on the reg spec.
 750  *
 751  * Handles the following properties (in make_ddi_ppd):
 752  *      Property                value
 753  *        Name                  type
 754  *      reg             register spec
 755  *      intr            old-form interrupt spec
 756  *      interrupts      new (bus-oriented) interrupt spec
 757  *      ranges          range spec
 758  */
 759 int
 760 impl_ddi_sunbus_initchild(dev_info_t *child)
 761 {
 762         char name[MAXNAMELEN];
 763         void impl_ddi_sunbus_removechild(dev_info_t *);
 764 
 765         /*
 766          * Name the child, also makes parent private data
 767          */
 768         (void) impl_sunbus_name_child(child, name, MAXNAMELEN);
 769         ddi_set_name_addr(child, name);
 770 
 771         /*
 772          * Attempt to merge a .conf node; if successful, remove the
 773          * .conf node.
 774          */
 775         if ((ndi_dev_is_persistent_node(child) == 0) &&
 776             (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
 777                 /*
 778                  * Return failure to remove node
 779                  */
 780                 impl_ddi_sunbus_removechild(child);
 781                 return (DDI_FAILURE);
 782         }
 783         return (DDI_SUCCESS);
 784 }
 785 
 786 void
 787 impl_free_ddi_ppd(dev_info_t *dip)
 788 {
 789         struct ddi_parent_private_data *pdptr;
 790         size_t n;
 791 
 792         if ((pdptr = ddi_get_parent_data(dip)) == NULL)
 793                 return;
 794 
 795         if ((n = (size_t)pdptr->par_nintr) != 0)
 796                 /*
 797                  * Note that kmem_free is used here (instead of
 798                  * ddi_prop_free) because the contents of the
 799                  * property were placed into a separate buffer and
 800                  * mucked with a bit before being stored in par_intr.
 801                  * The actual return value from the prop lookup
 802                  * was freed with ddi_prop_free previously.
 803                  */
 804                 kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
 805 
 806         if ((n = (size_t)pdptr->par_nrng) != 0)
 807                 ddi_prop_free((void *)pdptr->par_rng);
 808 
 809         if ((n = pdptr->par_nreg) != 0)
 810                 ddi_prop_free((void *)pdptr->par_reg);
 811 
 812         kmem_free(pdptr, sizeof (*pdptr));
 813         ddi_set_parent_data(dip, NULL);
 814 }
 815 
 816 void
 817 impl_ddi_sunbus_removechild(dev_info_t *dip)
 818 {
 819         impl_free_ddi_ppd(dip);
 820         ddi_set_name_addr(dip, NULL);
 821         /*
 822          * Strip the node to properly convert it back to prototype form
 823          */
 824         impl_rem_dev_props(dip);
 825 }
 826 
 827 /*
 828  * DDI Interrupt
 829  */
 830 
 831 /*
 832  * turn this on to force isa, eisa, and mca device to ignore the new
 833  * hardware nodes in the device tree (normally turned on only for
 834  * drivers that need it by setting the property "ignore-hardware-nodes"
 835  * in their driver.conf file).
 836  *
 837  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
 838  *              as safety valve.
 839  */
 840 int ignore_hardware_nodes = 0;
 841 
 842 /*
 843  * Local data
 844  */
 845 static struct impl_bus_promops *impl_busp;
 846 
 847 
 848 /*
 849  * New DDI interrupt framework
 850  */
 851 
 852 /*
 853  * i_ddi_intr_ops:
 854  *
 855  * This is the interrupt operator function wrapper for the bus function
 856  * bus_intr_op.
 857  */
 858 int
 859 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
 860     ddi_intr_handle_impl_t *hdlp, void * result)
 861 {
 862         dev_info_t      *pdip = (dev_info_t *)DEVI(dip)->devi_parent;
 863         int             ret = DDI_FAILURE;
 864 
 865         /* request parent to process this interrupt op */
 866         if (NEXUS_HAS_INTR_OP(pdip))
 867                 ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
 868                     pdip, rdip, op, hdlp, result);
 869         else
 870                 cmn_err(CE_WARN, "Failed to process interrupt "
 871                     "for %s%d due to down-rev nexus driver %s%d",
 872                     ddi_get_name(rdip), ddi_get_instance(rdip),
 873                     ddi_get_name(pdip), ddi_get_instance(pdip));
 874         return (ret);
 875 }
 876 
 877 /*
 878  * i_ddi_add_softint - allocate and add a soft interrupt to the system
 879  */
 880 int
 881 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
 882 {
 883         int ret;
 884 
 885         /* add soft interrupt handler */
 886         ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
 887             DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
 888         return (ret ? DDI_SUCCESS : DDI_FAILURE);
 889 }
 890 
 891 
 892 void
 893 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
 894 {
 895         (void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
 896 }
 897 
 898 
 899 extern void (*setsoftint)(int, struct av_softinfo *);
 900 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
 901 
 902 int
 903 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
 904 {
 905         if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
 906                 return (DDI_EPENDING);
 907 
 908         update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
 909 
 910         (*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
 911         return (DDI_SUCCESS);
 912 }
 913 
 914 /*
 915  * i_ddi_set_softint_pri:
 916  *
 917  * The way this works is that it first tries to add a softint vector
 918  * at the new priority in hdlp. If that succeeds; then it removes the
 919  * existing softint vector at the old priority.
 920  */
 921 int
 922 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
 923 {
 924         int ret;
 925 
 926         /*
 927          * If a softint is pending at the old priority then fail the request.
 928          */
 929         if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
 930                 return (DDI_FAILURE);
 931 
 932         ret = av_softint_movepri((void *)hdlp, old_pri);
 933         return (ret ? DDI_SUCCESS : DDI_FAILURE);
 934 }
 935 
 936 void
 937 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
 938 {
 939         hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
 940 }
 941 
 942 void
 943 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
 944 {
 945         kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
 946         hdlp->ih_private = NULL;
 947 }
 948 
 949 int
 950 i_ddi_get_intx_nintrs(dev_info_t *dip)
 951 {
 952         struct ddi_parent_private_data *pdp;
 953 
 954         if ((pdp = ddi_get_parent_data(dip)) == NULL)
 955                 return (0);
 956 
 957         return (pdp->par_nintr);
 958 }
 959 
 960 /*
 961  * DDI Memory/DMA
 962  */
 963 
 964 /*
 965  * Support for allocating DMAable memory to implement
 966  * ddi_dma_mem_alloc(9F) interface.
 967  */
 968 
 969 #define KA_ALIGN_SHIFT  7
 970 #define KA_ALIGN        (1 << KA_ALIGN_SHIFT)
 971 #define KA_NCACHE       (PAGESHIFT + 1 - KA_ALIGN_SHIFT)
 972 
 973 /*
 974  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
 975  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
 976  */
 977 
 978 static ddi_dma_attr_t kmem_io_attr = {
 979         DMA_ATTR_V0,
 980         0x0000000000000000ULL,          /* dma_attr_addr_lo */
 981         0x0000000000000000ULL,          /* dma_attr_addr_hi */
 982         0x00ffffff,
 983         0x1000,                         /* dma_attr_align */
 984         1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
 985 };
 986 
 987 /* kmem io memory ranges and indices */
 988 enum {
 989         IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
 990         IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
 991 };
 992 
 993 static struct {
 994         vmem_t          *kmem_io_arena;
 995         kmem_cache_t    *kmem_io_cache[KA_NCACHE];
 996         ddi_dma_attr_t  kmem_io_attr;
 997 } kmem_io[MAX_MEM_RANGES];
 998 
 999 static int kmem_io_idx;         /* index of first populated kmem_io[] */
1000 
1001 static page_t *
1002 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
1003 {
1004         extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1005             uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1006 
1007         return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1008             PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1009 }
1010 
1011 #ifdef __xpv
1012 static void
1013 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
1014 {
1015         extern void page_destroy_io(page_t *);
1016         segkmem_xfree(vmp, ptr, size, page_destroy_io);
1017 }
1018 #endif
1019 
1020 static void *
1021 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1022 {
1023         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1024             page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1025 }
1026 
1027 static void *
1028 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1029 {
1030         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1031             page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1032 }
1033 
1034 static void *
1035 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1036 {
1037         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1038             page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1039 }
1040 
1041 static void *
1042 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1043 {
1044         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1045             page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1046 }
1047 
1048 static void *
1049 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1050 {
1051         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1052             page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1053 }
1054 
1055 static void *
1056 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1057 {
1058         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1059             page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1060 }
1061 
1062 static void *
1063 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1064 {
1065         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1066             page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1067 }
1068 
1069 static void *
1070 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1071 {
1072         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1073             page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1074 }
1075 
1076 static void *
1077 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1078 {
1079         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1080             page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1081 }
1082 
1083 static void *
1084 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1085 {
1086         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1087             page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1088 }
1089 
1090 static void *
1091 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1092 {
1093         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1094             page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1095 }
1096 
1097 struct {
1098         uint64_t        io_limit;
1099         char            *io_name;
1100         void            *(*io_alloc)(vmem_t *, size_t, int);
1101         int             io_initial;     /* kmem_io_init during startup */
1102 } io_arena_params[MAX_MEM_RANGES] = {
1103         {0x000fffffffffffffULL, "kmem_io_4P",   segkmem_alloc_io_4P,    1},
1104         {0x0000000fffffffffULL, "kmem_io_64G",  segkmem_alloc_io_64G,   0},
1105         {0x00000000ffffffffULL, "kmem_io_4G",   segkmem_alloc_io_4G,    1},
1106         {0x000000007fffffffULL, "kmem_io_2G",   segkmem_alloc_io_2G,    1},
1107         {0x000000003fffffffULL, "kmem_io_1G",   segkmem_alloc_io_1G,    0},
1108         {0x000000001fffffffULL, "kmem_io_512M", segkmem_alloc_io_512M,  0},
1109         {0x000000000fffffffULL, "kmem_io_256M", segkmem_alloc_io_256M,  0},
1110         {0x0000000007ffffffULL, "kmem_io_128M", segkmem_alloc_io_128M,  0},
1111         {0x0000000003ffffffULL, "kmem_io_64M",  segkmem_alloc_io_64M,   0},
1112         {0x0000000001ffffffULL, "kmem_io_32M",  segkmem_alloc_io_32M,   0},
1113         {0x0000000000ffffffULL, "kmem_io_16M",  segkmem_alloc_io_16M,   1}
1114 };
1115 
1116 void
1117 kmem_io_init(int a)
1118 {
1119         int     c;
1120         char name[40];
1121 
1122         kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1123             NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1124 #ifdef __xpv
1125             segkmem_free_io,
1126 #else
1127             segkmem_free,
1128 #endif
1129             heap_arena, 0, VM_SLEEP);
1130 
1131         for (c = 0; c < KA_NCACHE; c++) {
1132                 size_t size = KA_ALIGN << c;
1133                 (void) sprintf(name, "%s_%lu",
1134                     io_arena_params[a].io_name, size);
1135                 kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1136                     size, size, NULL, NULL, NULL, NULL,
1137                     kmem_io[a].kmem_io_arena, 0);
1138         }
1139 }
1140 
1141 /*
1142  * Return the index of the highest memory range for addr.
1143  */
1144 static int
1145 kmem_io_index(uint64_t addr)
1146 {
1147         int n;
1148 
1149         for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1150                 if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1151                         if (kmem_io[n].kmem_io_arena == NULL)
1152                                 kmem_io_init(n);
1153                         return (n);
1154                 }
1155         }
1156         panic("kmem_io_index: invalid addr - must be at least 16m");
1157 
1158         /*NOTREACHED*/
1159 }
1160 
1161 /*
1162  * Return the index of the next kmem_io populated memory range
1163  * after curindex.
1164  */
1165 static int
1166 kmem_io_index_next(int curindex)
1167 {
1168         int n;
1169 
1170         for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1171                 if (kmem_io[n].kmem_io_arena)
1172                         return (n);
1173         }
1174         return (-1);
1175 }
1176 
1177 /*
1178  * allow kmem to be mapped in with different PTE cache attribute settings.
1179  * Used by i_ddi_mem_alloc()
1180  */
1181 int
1182 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1183 {
1184         uint_t hat_flags;
1185         caddr_t kva_end;
1186         uint_t hat_attr;
1187         pfn_t pfn;
1188 
1189         if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1190                 return (-1);
1191         }
1192 
1193         hat_attr &= ~HAT_ORDER_MASK;
1194         hat_attr |= order | HAT_NOSYNC;
1195         hat_flags = HAT_LOAD_LOCK;
1196 
1197         kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1198             (uintptr_t)PAGEMASK);
1199         kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1200 
1201         while (kva < kva_end) {
1202                 pfn = hat_getpfnum(kas.a_hat, kva);
1203                 hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1204                 hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1205                 kva += MMU_PAGESIZE;
1206         }
1207 
1208         return (0);
1209 }
1210 
1211 static int
1212 ctgcompare(const void *a1, const void *a2)
1213 {
1214         /* we just want to compare virtual addresses */
1215         a1 = ((struct ctgas *)a1)->ctg_addr;
1216         a2 = ((struct ctgas *)a2)->ctg_addr;
1217         return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1218 }
1219 
1220 void
1221 ka_init(void)
1222 {
1223         int a;
1224         paddr_t maxphysaddr;
1225 #if !defined(__xpv)
1226         extern pfn_t physmax;
1227 
1228         maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1229 #else
1230         maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1231             XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1232 #endif
1233 
1234         ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1235 
1236         for (a = 0; a < MAX_MEM_RANGES; a++) {
1237                 if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1238                         if (maxphysaddr > io_arena_params[a + 1].io_limit)
1239                                 io_arena_params[a].io_limit = maxphysaddr;
1240                         else
1241                                 a++;
1242                         break;
1243                 }
1244         }
1245         kmem_io_idx = a;
1246 
1247         for (; a < MAX_MEM_RANGES; a++) {
1248                 kmem_io[a].kmem_io_attr = kmem_io_attr;
1249                 kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1250                     io_arena_params[a].io_limit;
1251                 /*
1252                  * initialize kmem_io[] arena/cache corresponding to
1253                  * maxphysaddr and to the "common" io memory ranges that
1254                  * have io_initial set to a non-zero value.
1255                  */
1256                 if (io_arena_params[a].io_initial || a == kmem_io_idx)
1257                         kmem_io_init(a);
1258         }
1259 
1260         /* initialize ctgtree */
1261         avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1262             offsetof(struct ctgas, ctg_link));
1263 }
1264 
1265 /*
1266  * put contig address/size
1267  */
1268 static void *
1269 putctgas(void *addr, size_t size)
1270 {
1271         struct ctgas    *ctgp;
1272         if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1273                 ctgp->ctg_addr = addr;
1274                 ctgp->ctg_size = size;
1275                 CTGLOCK();
1276                 avl_add(&ctgtree, ctgp);
1277                 CTGUNLOCK();
1278         }
1279         return (ctgp);
1280 }
1281 
1282 /*
1283  * get contig size by addr
1284  */
1285 static size_t
1286 getctgsz(void *addr)
1287 {
1288         struct ctgas    *ctgp;
1289         struct ctgas    find;
1290         size_t          sz = 0;
1291 
1292         find.ctg_addr = addr;
1293         CTGLOCK();
1294         if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1295                 avl_remove(&ctgtree, ctgp);
1296         }
1297         CTGUNLOCK();
1298 
1299         if (ctgp != NULL) {
1300                 sz = ctgp->ctg_size;
1301                 kmem_free(ctgp, sizeof (*ctgp));
1302         }
1303 
1304         return (sz);
1305 }
1306 
1307 /*
1308  * contig_alloc:
1309  *
1310  *      allocates contiguous memory to satisfy the 'size' and dma attributes
1311  *      specified in 'attr'.
1312  *
1313  *      Not all of memory need to be physically contiguous if the
1314  *      scatter-gather list length is greater than 1.
1315  */
1316 
1317 /*ARGSUSED*/
1318 void *
1319 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1320 {
1321         pgcnt_t         pgcnt = btopr(size);
1322         size_t          asize = pgcnt * PAGESIZE;
1323         page_t          *ppl;
1324         int             pflag;
1325         void            *addr;
1326 
1327         extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1328             uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1329 
1330         /* segkmem_xalloc */
1331 
1332         if (align <= PAGESIZE)
1333                 addr = vmem_alloc(heap_arena, asize,
1334                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1335         else
1336                 addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1337                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1338         if (addr) {
1339                 ASSERT(!((uintptr_t)addr & (align - 1)));
1340 
1341                 if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1342                         vmem_free(heap_arena, addr, asize);
1343                         return (NULL);
1344                 }
1345                 pflag = PG_EXCL;
1346 
1347                 if (cansleep)
1348                         pflag |= PG_WAIT;
1349 
1350                 /* 4k req gets from freelists rather than pfn search */
1351                 if (pgcnt > 1 || align > PAGESIZE)
1352                         pflag |= PG_PHYSCONTIG;
1353 
1354                 ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1355                     asize, pflag, &kas, (caddr_t)addr, attr);
1356 
1357                 if (!ppl) {
1358                         vmem_free(heap_arena, addr, asize);
1359                         page_unresv(pgcnt);
1360                         return (NULL);
1361                 }
1362 
1363                 while (ppl != NULL) {
1364                         page_t  *pp = ppl;
1365                         page_sub(&ppl, pp);
1366                         ASSERT(page_iolock_assert(pp));
1367                         page_io_unlock(pp);
1368                         page_downgrade(pp);
1369                         hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1370                             pp, (PROT_ALL & ~PROT_USER) |
1371                             HAT_NOSYNC, HAT_LOAD_LOCK);
1372                 }
1373         }
1374         return (addr);
1375 }
1376 
1377 void
1378 contig_free(void *addr, size_t size)
1379 {
1380         pgcnt_t pgcnt = btopr(size);
1381         size_t  asize = pgcnt * PAGESIZE;
1382         caddr_t a, ea;
1383         page_t  *pp;
1384 
1385         hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1386 
1387         for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1388                 pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1389                 if (!pp)
1390                         panic("contig_free: contig pp not found");
1391 
1392                 if (!page_tryupgrade(pp)) {
1393                         page_unlock(pp);
1394                         pp = page_lookup(&kvp,
1395                             (u_offset_t)(uintptr_t)a, SE_EXCL);
1396                         if (pp == NULL)
1397                                 panic("contig_free: page freed");
1398                 }
1399                 page_destroy(pp, 0);
1400         }
1401 
1402         page_unresv(pgcnt);
1403         vmem_free(heap_arena, addr, asize);
1404 }
1405 
1406 /*
1407  * Allocate from the system, aligned on a specific boundary.
1408  * The alignment, if non-zero, must be a power of 2.
1409  */
1410 static void *
1411 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1412     ddi_dma_attr_t *attr)
1413 {
1414         size_t *addr, *raddr, rsize;
1415         size_t hdrsize = 4 * sizeof (size_t);   /* must be power of 2 */
1416         int a, i, c;
1417         vmem_t *vmp;
1418         kmem_cache_t *cp = NULL;
1419 
1420         if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1421                 return (NULL);
1422 
1423         align = MAX(align, hdrsize);
1424         ASSERT((align & (align - 1)) == 0);
1425 
1426         /*
1427          * All of our allocators guarantee 16-byte alignment, so we don't
1428          * need to reserve additional space for the header.
1429          * To simplify picking the correct kmem_io_cache, we round up to
1430          * a multiple of KA_ALIGN.
1431          */
1432         rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1433 
1434         if (physcontig && rsize > PAGESIZE) {
1435                 if (addr = contig_alloc(size, attr, align, cansleep)) {
1436                         if (!putctgas(addr, size))
1437                                 contig_free(addr, size);
1438                         else
1439                                 return (addr);
1440                 }
1441                 return (NULL);
1442         }
1443 
1444         a = kmem_io_index(attr->dma_attr_addr_hi);
1445 
1446         if (rsize > PAGESIZE) {
1447                 vmp = kmem_io[a].kmem_io_arena;
1448                 raddr = vmem_alloc(vmp, rsize,
1449                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1450         } else {
1451                 c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1452                 cp = kmem_io[a].kmem_io_cache[c];
1453                 raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1454                     KM_NOSLEEP);
1455         }
1456 
1457         if (raddr == NULL) {
1458                 int     na;
1459 
1460                 ASSERT(cansleep == 0);
1461                 if (rsize > PAGESIZE)
1462                         return (NULL);
1463                 /*
1464                  * System does not have memory in the requested range.
1465                  * Try smaller kmem io ranges and larger cache sizes
1466                  * to see if there might be memory available in
1467                  * these other caches.
1468                  */
1469 
1470                 for (na = kmem_io_index_next(a); na >= 0;
1471                     na = kmem_io_index_next(na)) {
1472                         ASSERT(kmem_io[na].kmem_io_arena);
1473                         cp = kmem_io[na].kmem_io_cache[c];
1474                         raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1475                         if (raddr)
1476                                 goto kallocdone;
1477                 }
1478                 /* now try the larger kmem io cache sizes */
1479                 for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1480                         for (i = c + 1; i < KA_NCACHE; i++) {
1481                                 cp = kmem_io[na].kmem_io_cache[i];
1482                                 raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1483                                 if (raddr)
1484                                         goto kallocdone;
1485                         }
1486                 }
1487                 return (NULL);
1488         }
1489 
1490 kallocdone:
1491         ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1492             rsize > PAGESIZE);
1493 
1494         addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1495         ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1496 
1497         addr[-4] = (size_t)cp;
1498         addr[-3] = (size_t)vmp;
1499         addr[-2] = (size_t)raddr;
1500         addr[-1] = rsize;
1501 
1502         return (addr);
1503 }
1504 
1505 static void
1506 kfreea(void *addr)
1507 {
1508         size_t          size;
1509 
1510         if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1511                 contig_free(addr, size);
1512         } else {
1513                 size_t  *saddr = addr;
1514                 if (saddr[-4] == 0)
1515                         vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1516                             saddr[-1]);
1517                 else
1518                         kmem_cache_free((kmem_cache_t *)saddr[-4],
1519                             (void *)saddr[-2]);
1520         }
1521 }
1522 
1523 /*ARGSUSED*/
1524 void
1525 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1526 {
1527 }
1528 
1529 /*
1530  * Check if the specified cache attribute is supported on the platform.
1531  * This function must be called before i_ddi_cacheattr_to_hatacc().
1532  */
1533 boolean_t
1534 i_ddi_check_cache_attr(uint_t flags)
1535 {
1536         /*
1537          * The cache attributes are mutually exclusive. Any combination of
1538          * the attributes leads to a failure.
1539          */
1540         uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1541         if ((cache_attr != 0) && !ISP2(cache_attr))
1542                 return (B_FALSE);
1543 
1544         /* All cache attributes are supported on X86/X64 */
1545         if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1546             IOMEM_DATA_UC_WR_COMBINE))
1547                 return (B_TRUE);
1548 
1549         /* undefined attributes */
1550         return (B_FALSE);
1551 }
1552 
1553 /* set HAT cache attributes from the cache attributes */
1554 void
1555 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1556 {
1557         uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1558         static char *fname = "i_ddi_cacheattr_to_hatacc";
1559 
1560         /*
1561          * If write-combining is not supported, then it falls back
1562          * to uncacheable.
1563          */
1564         if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1565             !is_x86_feature(x86_featureset, X86FSET_PAT))
1566                 cache_attr = IOMEM_DATA_UNCACHED;
1567 
1568         /*
1569          * set HAT attrs according to the cache attrs.
1570          */
1571         switch (cache_attr) {
1572         case IOMEM_DATA_UNCACHED:
1573                 *hataccp &= ~HAT_ORDER_MASK;
1574                 *hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1575                 break;
1576         case IOMEM_DATA_UC_WR_COMBINE:
1577                 *hataccp &= ~HAT_ORDER_MASK;
1578                 *hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1579                 break;
1580         case IOMEM_DATA_CACHED:
1581                 *hataccp &= ~HAT_ORDER_MASK;
1582                 *hataccp |= HAT_UNORDERED_OK;
1583                 break;
1584         /*
1585          * This case must not occur because the cache attribute is scrutinized
1586          * before this function is called.
1587          */
1588         default:
1589                 /*
1590                  * set cacheable to hat attrs.
1591                  */
1592                 *hataccp &= ~HAT_ORDER_MASK;
1593                 *hataccp |= HAT_UNORDERED_OK;
1594                 cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1595                     fname, cache_attr);
1596         }
1597 }
1598 
1599 /*
1600  * This should actually be called i_ddi_dma_mem_alloc. There should
1601  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1602  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1603  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1604  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1605  * so far which is used for both, DMA and PIO, we have to use the DMA
1606  * ctl ops to make everybody happy.
1607  */
1608 /*ARGSUSED*/
1609 int
1610 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1611     size_t length, int cansleep, int flags,
1612     ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1613     size_t *real_length, ddi_acc_hdl_t *ap)
1614 {
1615         caddr_t a;
1616         int iomin;
1617         ddi_acc_impl_t *iap;
1618         int physcontig = 0;
1619         pgcnt_t npages;
1620         pgcnt_t minctg;
1621         uint_t order;
1622         int e;
1623 
1624         /*
1625          * Check legality of arguments
1626          */
1627         if (length == 0 || kaddrp == NULL || attr == NULL) {
1628                 return (DDI_FAILURE);
1629         }
1630 
1631         if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1632             !ISP2(attr->dma_attr_align) || !ISP2(attr->dma_attr_minxfer)) {
1633                 return (DDI_FAILURE);
1634         }
1635 
1636         /*
1637          * figure out most restrictive alignment requirement
1638          */
1639         iomin = attr->dma_attr_minxfer;
1640         iomin = maxbit(iomin, attr->dma_attr_align);
1641         if (iomin == 0)
1642                 return (DDI_FAILURE);
1643 
1644         ASSERT((iomin & (iomin - 1)) == 0);
1645 
1646         /*
1647          * if we allocate memory with IOMEM_DATA_UNCACHED or
1648          * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1649          * memory that ends on a page boundry.
1650          * Don't want to have to different cache mappings to the same
1651          * physical page.
1652          */
1653         if (OVERRIDE_CACHE_ATTR(flags)) {
1654                 iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1655                 length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1656         }
1657 
1658         /*
1659          * Determine if we need to satisfy the request for physically
1660          * contiguous memory or alignments larger than pagesize.
1661          */
1662         npages = btopr(length + attr->dma_attr_align);
1663         minctg = howmany(npages, attr->dma_attr_sgllen);
1664 
1665         if (minctg > 1) {
1666                 uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1667                 /*
1668                  * verify that the minimum contig requirement for the
1669                  * actual length does not cross segment boundary.
1670                  */
1671                 length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1672                     size_t);
1673                 npages = btopr(length);
1674                 minctg = howmany(npages, attr->dma_attr_sgllen);
1675                 if (minctg > pfnseg + 1)
1676                         return (DDI_FAILURE);
1677                 physcontig = 1;
1678         } else {
1679                 length = P2ROUNDUP_TYPED(length, iomin, size_t);
1680         }
1681 
1682         /*
1683          * Allocate the requested amount from the system.
1684          */
1685         a = kalloca(length, iomin, cansleep, physcontig, attr);
1686 
1687         if ((*kaddrp = a) == NULL)
1688                 return (DDI_FAILURE);
1689 
1690         /*
1691          * if we to modify the cache attributes, go back and muck with the
1692          * mappings.
1693          */
1694         if (OVERRIDE_CACHE_ATTR(flags)) {
1695                 order = 0;
1696                 i_ddi_cacheattr_to_hatacc(flags, &order);
1697                 e = kmem_override_cache_attrs(a, length, order);
1698                 if (e != 0) {
1699                         kfreea(a);
1700                         return (DDI_FAILURE);
1701                 }
1702         }
1703 
1704         if (real_length) {
1705                 *real_length = length;
1706         }
1707         if (ap) {
1708                 /*
1709                  * initialize access handle
1710                  */
1711                 iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1712                 iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1713                 impl_acc_hdl_init(ap);
1714         }
1715 
1716         return (DDI_SUCCESS);
1717 }
1718 
1719 /* ARGSUSED */
1720 void
1721 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1722 {
1723         if (ap != NULL) {
1724                 /*
1725                  * if we modified the cache attributes on alloc, go back and
1726                  * fix them since this memory could be returned to the
1727                  * general pool.
1728                  */
1729                 if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1730                         uint_t order = 0;
1731                         int e;
1732                         i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1733                         e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1734                         if (e != 0) {
1735                                 cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1736                                     "override cache attrs, memory leaked\n");
1737                                 return;
1738                         }
1739                 }
1740         }
1741         kfreea(kaddr);
1742 }
1743 
1744 /*
1745  * Access Barriers
1746  *
1747  */
1748 /*ARGSUSED*/
1749 int
1750 i_ddi_ontrap(ddi_acc_handle_t hp)
1751 {
1752         return (DDI_FAILURE);
1753 }
1754 
1755 /*ARGSUSED*/
1756 void
1757 i_ddi_notrap(ddi_acc_handle_t hp)
1758 {
1759 }
1760 
1761 
1762 /*
1763  * Misc Functions
1764  */
1765 
1766 /*
1767  * Implementation instance override functions
1768  *
1769  * No override on i86pc
1770  */
1771 /*ARGSUSED*/
1772 uint_t
1773 impl_assign_instance(dev_info_t *dip)
1774 {
1775         return ((uint_t)-1);
1776 }
1777 
1778 /*ARGSUSED*/
1779 int
1780 impl_keep_instance(dev_info_t *dip)
1781 {
1782 
1783 #if defined(__xpv)
1784         /*
1785          * Do not persist instance numbers assigned to devices in dom0
1786          */
1787         dev_info_t *pdip;
1788         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1789                 if (((pdip = ddi_get_parent(dip)) != NULL) &&
1790                     (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1791                         return (DDI_SUCCESS);
1792         }
1793 #endif
1794         return (DDI_FAILURE);
1795 }
1796 
1797 /*ARGSUSED*/
1798 int
1799 impl_free_instance(dev_info_t *dip)
1800 {
1801         return (DDI_FAILURE);
1802 }
1803 
1804 /*ARGSUSED*/
1805 int
1806 impl_check_cpu(dev_info_t *devi)
1807 {
1808         return (DDI_SUCCESS);
1809 }
1810 
1811 /*
1812  * Referenced in common/cpr_driver.c: Power off machine.
1813  * Don't know how to power off i86pc.
1814  */
1815 void
1816 arch_power_down()
1817 {}
1818 
1819 /*
1820  * Copy name to property_name, since name
1821  * is in the low address range below kernelbase.
1822  */
1823 static void
1824 copy_boot_str(const char *boot_str, char *kern_str, int len)
1825 {
1826         int i = 0;
1827 
1828         while (i < len - 1 && boot_str[i] != '\0') {
1829                 kern_str[i] = boot_str[i];
1830                 i++;
1831         }
1832 
1833         kern_str[i] = 0;        /* null terminate */
1834         if (boot_str[i] != '\0')
1835                 cmn_err(CE_WARN,
1836                     "boot property string is truncated to %s", kern_str);
1837 }
1838 
1839 static void
1840 get_boot_properties(void)
1841 {
1842         extern char hw_provider[];
1843         dev_info_t *devi;
1844         char *name;
1845         int length, flags;
1846         char property_name[50], property_val[50];
1847         void *bop_staging_area;
1848 
1849         bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1850 
1851         /*
1852          * Import "root" properties from the boot.
1853          *
1854          * We do this by invoking BOP_NEXTPROP until the list
1855          * is completely copied in.
1856          */
1857 
1858         devi = ddi_root_node();
1859         for (name = BOP_NEXTPROP(bootops, "");          /* get first */
1860             name;                                       /* NULL => DONE */
1861             name = BOP_NEXTPROP(bootops, name)) {       /* get next */
1862 
1863                 /* copy string to memory above kernelbase */
1864                 copy_boot_str(name, property_name, 50);
1865 
1866                 /*
1867                  * Skip vga properties. They will be picked up later
1868                  * by get_vga_properties.
1869                  */
1870                 if (strcmp(property_name, "display-edif-block") == 0 ||
1871                     strcmp(property_name, "display-edif-id") == 0) {
1872                         continue;
1873                 }
1874 
1875                 length = BOP_GETPROPLEN(bootops, property_name);
1876                 if (length < 0)
1877                         continue;
1878                 if (length > MMU_PAGESIZE) {
1879                         cmn_err(CE_NOTE,
1880                             "boot property %s longer than 0x%x, ignored\n",
1881                             property_name, MMU_PAGESIZE);
1882                         continue;
1883                 }
1884                 BOP_GETPROP(bootops, property_name, bop_staging_area);
1885                 flags = do_bsys_getproptype(bootops, property_name);
1886 
1887                 /*
1888                  * special properties:
1889                  * si-machine, si-hw-provider
1890                  *      goes to kernel data structures.
1891                  * bios-boot-device and stdout
1892                  *      goes to hardware property list so it may show up
1893                  *      in the prtconf -vp output. This is needed by
1894                  *      Install/Upgrade. Once we fix install upgrade,
1895                  *      this can be taken out.
1896                  */
1897                 if (strcmp(name, "si-machine") == 0) {
1898                         (void) strncpy(utsname.machine, bop_staging_area,
1899                             SYS_NMLN);
1900                         utsname.machine[SYS_NMLN - 1] = '\0';
1901                         continue;
1902                 }
1903                 if (strcmp(name, "si-hw-provider") == 0) {
1904                         (void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1905                         hw_provider[SYS_NMLN - 1] = '\0';
1906                         continue;
1907                 }
1908                 if (strcmp(name, "bios-boot-device") == 0) {
1909                         copy_boot_str(bop_staging_area, property_val, 50);
1910                         (void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1911                             property_name, property_val);
1912                         continue;
1913                 }
1914                 if (strcmp(name, "stdout") == 0) {
1915                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1916                             property_name, *((int *)bop_staging_area));
1917                         continue;
1918                 }
1919 
1920                 /* Boolean property */
1921                 if (length == 0) {
1922                         (void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1923                             DDI_PROP_CANSLEEP, property_name, NULL, 0);
1924                         continue;
1925                 }
1926 
1927                 /* Now anything else based on type. */
1928                 switch (flags) {
1929                 case DDI_PROP_TYPE_INT:
1930                         if (length == sizeof (int)) {
1931                                 (void) e_ddi_prop_update_int(DDI_DEV_T_NONE,
1932                                     devi, property_name,
1933                                     *((int *)bop_staging_area));
1934                         } else {
1935                                 (void) e_ddi_prop_update_int_array(
1936                                     DDI_DEV_T_NONE, devi, property_name,
1937                                     bop_staging_area, length / sizeof (int));
1938                         }
1939                         break;
1940                 case DDI_PROP_TYPE_STRING:
1941                         (void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1942                             property_name, bop_staging_area);
1943                         break;
1944                 case DDI_PROP_TYPE_BYTE:
1945                         (void) e_ddi_prop_update_byte_array(DDI_DEV_T_NONE,
1946                             devi, property_name, bop_staging_area, length);
1947                         break;
1948                 case DDI_PROP_TYPE_INT64:
1949                         if (length == sizeof (uint64_t)) {
1950                                 (void) e_ddi_prop_update_int64(DDI_DEV_T_NONE,
1951                                     devi, property_name,
1952                                     *((uint64_t *)bop_staging_area));
1953                         } else {
1954                                 (void) e_ddi_prop_update_int64_array(
1955                                     DDI_DEV_T_NONE, devi, property_name,
1956                                     bop_staging_area,
1957                                     length / sizeof (uint64_t));
1958                         }
1959                         break;
1960                 default:
1961                         /* Property type unknown, use old prop interface */
1962                         (void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1963                             DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1964                             length);
1965                 }
1966         }
1967 
1968         kmem_free(bop_staging_area, MMU_PAGESIZE);
1969 }
1970 
1971 static void
1972 get_vga_properties(void)
1973 {
1974         dev_info_t *devi;
1975         major_t major;
1976         char *name;
1977         int length;
1978         char property_val[50];
1979         void *bop_staging_area;
1980 
1981         /*
1982          * XXXX Hack Allert!
1983          * There really needs to be a better way for identifying various
1984          * console framebuffers and their related issues.  Till then,
1985          * check for this one as a replacement to vgatext.
1986          */
1987         major = ddi_name_to_major("ragexl");
1988         if (major == (major_t)-1) {
1989                 major = ddi_name_to_major("vgatext");
1990                 if (major == (major_t)-1)
1991                         return;
1992         }
1993         devi = devnamesp[major].dn_head;
1994         if (devi == NULL)
1995                 return;
1996 
1997         bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1998 
1999         /*
2000          * Import "vga" properties from the boot.
2001          */
2002         name = "display-edif-block";
2003         length = BOP_GETPROPLEN(bootops, name);
2004         if (length > 0 && length < MMU_PAGESIZE) {
2005                 BOP_GETPROP(bootops, name, bop_staging_area);
2006                 (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
2007                     devi, name, bop_staging_area, length);
2008         }
2009 
2010         /*
2011          * kdmconfig is also looking for display-type and
2012          * video-adapter-type. We default to color and svga.
2013          *
2014          * Could it be "monochrome", "vga"?
2015          * Nah, you've got to come to the 21st century...
2016          * And you can set monitor type manually in kdmconfig
2017          * if you are really an old junky.
2018          */
2019         (void) ndi_prop_update_string(DDI_DEV_T_NONE,
2020             devi, "display-type", "color");
2021         (void) ndi_prop_update_string(DDI_DEV_T_NONE,
2022             devi, "video-adapter-type", "svga");
2023 
2024         name = "display-edif-id";
2025         length = BOP_GETPROPLEN(bootops, name);
2026         if (length > 0 && length < MMU_PAGESIZE) {
2027                 BOP_GETPROP(bootops, name, bop_staging_area);
2028                 copy_boot_str(bop_staging_area, property_val, length);
2029                 (void) ndi_prop_update_string(DDI_DEV_T_NONE,
2030                     devi, name, property_val);
2031         }
2032 
2033         kmem_free(bop_staging_area, MMU_PAGESIZE);
2034 }
2035 
2036 /*
2037  * Copy console font to kernel memory. The temporary font setup
2038  * to use font module was done in early console setup, using low
2039  * memory and data from font module. Now we need to allocate
2040  * kernel memory and copy data over, so the low memory can be freed.
2041  * We can have at most one entry in font list from early boot.
2042  */
2043 static void
2044 get_console_font(void)
2045 {
2046         struct fontlist *fp, *fl;
2047         bitmap_data_t *bd;
2048         struct font *fd, *tmp;
2049         int i;
2050 
2051         if (STAILQ_EMPTY(&fonts))
2052                 return;
2053 
2054         fl = STAILQ_FIRST(&fonts);
2055         STAILQ_REMOVE_HEAD(&fonts, font_next);
2056         fp = kmem_zalloc(sizeof (*fp), KM_SLEEP);
2057         bd = kmem_zalloc(sizeof (*bd), KM_SLEEP);
2058         fd = kmem_zalloc(sizeof (*fd), KM_SLEEP);
2059 
2060         fp->font_name = NULL;
2061         fp->font_flags = FONT_BOOT;
2062         fp->font_data = bd;
2063 
2064         bd->width = fl->font_data->width;
2065         bd->height = fl->font_data->height;
2066         bd->uncompressed_size = fl->font_data->uncompressed_size;
2067         bd->font = fd;
2068 
2069         tmp = fl->font_data->font;
2070         fd->vf_width = tmp->vf_width;
2071         fd->vf_height = tmp->vf_height;
2072         for (i = 0; i < VFNT_MAPS; i++) {
2073                 if (tmp->vf_map_count[i] == 0)
2074                         continue;
2075                 fd->vf_map_count[i] = tmp->vf_map_count[i];
2076                 fd->vf_map[i] = kmem_alloc(fd->vf_map_count[i] *
2077                     sizeof (*fd->vf_map[i]), KM_SLEEP);
2078                 bcopy(tmp->vf_map[i], fd->vf_map[i], fd->vf_map_count[i] *
2079                     sizeof (*fd->vf_map[i]));
2080         }
2081         fd->vf_bytes = kmem_alloc(bd->uncompressed_size, KM_SLEEP);
2082         bcopy(tmp->vf_bytes, fd->vf_bytes, bd->uncompressed_size);
2083         STAILQ_INSERT_HEAD(&fonts, fp, font_next);
2084 }
2085 
2086 /*
2087  * This is temporary, but absolutely necessary.  If we are being
2088  * booted with a device tree created by the DevConf project's bootconf
2089  * program, then we have device information nodes that reflect
2090  * reality.  At this point in time in the Solaris release schedule, the
2091  * kernel drivers aren't prepared for reality.  They still depend on their
2092  * own ad-hoc interpretations of the properties created when their .conf
2093  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2094  * property to prevent them from using the nodes passed up from the bootconf
2095  * device tree.
2096  *
2097  * Trying to assemble root file system drivers as we are booting from
2098  * devconf will fail if the kernel driver is basing its name_addr's on the
2099  * psuedo-node device info while the bootpath passed up from bootconf is using
2100  * reality-based name_addrs.  We help the boot along in this case by
2101  * looking at the pre-bootconf bootpath and determining if we would have
2102  * successfully matched if that had been the bootpath we had chosen.
2103  *
2104  * Note that we only even perform this extra check if we've booted
2105  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2106  * we're trying to match the name_addr specified in the 1275 bootpath.
2107  */
2108 
2109 #define MAXCOMPONENTLEN 32
2110 
2111 int
2112 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2113 {
2114         /*
2115          *  There are multiple criteria to be met before we can even
2116          *  consider allowing a name_addr match here.
2117          *
2118          *  1) We must have been booted such that the bootconf program
2119          *      created device tree nodes and properties.  This can be
2120          *      determined by examining the 'bootpath' property.  This
2121          *      property will be a non-null string iff bootconf was
2122          *      involved in the boot.
2123          *
2124          *  2) The module that we want to match must be the boot device.
2125          *
2126          *  3) The instance of the module we are thinking of letting be
2127          *      our match must be ignoring hardware nodes.
2128          *
2129          *  4) The name_addr we want to match must be the name_addr
2130          *      specified in the 1275 bootpath.
2131          */
2132         static char bootdev_module[MAXCOMPONENTLEN];
2133         static char bootdev_oldmod[MAXCOMPONENTLEN];
2134         static char bootdev_newaddr[MAXCOMPONENTLEN];
2135         static char bootdev_oldaddr[MAXCOMPONENTLEN];
2136         static int  quickexit;
2137 
2138         char *daddr;
2139         int dlen;
2140 
2141         char    *lkupname;
2142         int     rv = DDI_FAILURE;
2143 
2144         if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2145             "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2146             (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2147             "ignore-hardware-nodes", -1) != -1)) {
2148                 if (strcmp(daddr, caddr) == 0) {
2149                         return (DDI_SUCCESS);
2150                 }
2151         }
2152 
2153         if (quickexit)
2154                 return (rv);
2155 
2156         if (bootdev_module[0] == '\0') {
2157                 char *addrp, *eoaddrp;
2158                 char *busp, *modp, *atp;
2159                 char *bp1275, *bp;
2160                 int  bp1275len, bplen;
2161 
2162                 bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2163 
2164                 if (ddi_getlongprop(DDI_DEV_T_ANY,
2165                     ddi_root_node(), 0, "bootpath",
2166                     (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2167                     bp1275len <= 1) {
2168                         /*
2169                          * We didn't boot from bootconf so we never need to
2170                          * do any special matches.
2171                          */
2172                         quickexit = 1;
2173                         if (bp1275)
2174                                 kmem_free(bp1275, bp1275len);
2175                         return (rv);
2176                 }
2177 
2178                 if (ddi_getlongprop(DDI_DEV_T_ANY,
2179                     ddi_root_node(), 0, "boot-path",
2180                     (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2181                         /*
2182                          * No fallback position for matching. This is
2183                          * certainly unexpected, but we'll handle it
2184                          * just in case.
2185                          */
2186                         quickexit = 1;
2187                         kmem_free(bp1275, bp1275len);
2188                         if (bp)
2189                                 kmem_free(bp, bplen);
2190                         return (rv);
2191                 }
2192 
2193                 /*
2194                  *  Determine boot device module and 1275 name_addr
2195                  *
2196                  *  bootpath assumed to be of the form /bus/module@name_addr
2197                  */
2198                 if (busp = strchr(bp1275, '/')) {
2199                         if (modp = strchr(busp + 1, '/')) {
2200                                 if (atp = strchr(modp + 1, '@')) {
2201                                         *atp = '\0';
2202                                         addrp = atp + 1;
2203                                         if (eoaddrp = strchr(addrp, '/'))
2204                                                 *eoaddrp = '\0';
2205                                 }
2206                         }
2207                 }
2208 
2209                 if (modp && addrp) {
2210                         (void) strncpy(bootdev_module, modp + 1,
2211                             MAXCOMPONENTLEN);
2212                         bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2213 
2214                         (void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2215                         bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2216                 } else {
2217                         quickexit = 1;
2218                         kmem_free(bp1275, bp1275len);
2219                         kmem_free(bp, bplen);
2220                         return (rv);
2221                 }
2222 
2223                 /*
2224                  *  Determine fallback name_addr
2225                  *
2226                  *  10/3/96 - Also save fallback module name because it
2227                  *  might actually be different than the current module
2228                  *  name.  E.G., ISA pnp drivers have new names.
2229                  *
2230                  *  bootpath assumed to be of the form /bus/module@name_addr
2231                  */
2232                 addrp = NULL;
2233                 if (busp = strchr(bp, '/')) {
2234                         if (modp = strchr(busp + 1, '/')) {
2235                                 if (atp = strchr(modp + 1, '@')) {
2236                                         *atp = '\0';
2237                                         addrp = atp + 1;
2238                                         if (eoaddrp = strchr(addrp, '/'))
2239                                                 *eoaddrp = '\0';
2240                                 }
2241                         }
2242                 }
2243 
2244                 if (modp && addrp) {
2245                         (void) strncpy(bootdev_oldmod, modp + 1,
2246                             MAXCOMPONENTLEN);
2247                         bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2248 
2249                         (void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2250                         bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2251                 }
2252 
2253                 /* Free up the bootpath storage now that we're done with it. */
2254                 kmem_free(bp1275, bp1275len);
2255                 kmem_free(bp, bplen);
2256 
2257                 if (bootdev_oldaddr[0] == '\0') {
2258                         quickexit = 1;
2259                         return (rv);
2260                 }
2261         }
2262 
2263         if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2264             (strcmp(bootdev_module, lkupname) == 0 ||
2265             strcmp(bootdev_oldmod, lkupname) == 0) &&
2266             ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2267             "ignore-hardware-nodes", -1) != -1) ||
2268             ignore_hardware_nodes) &&
2269             strcmp(bootdev_newaddr, caddr) == 0 &&
2270             strcmp(bootdev_oldaddr, naddr) == 0) {
2271                 rv = DDI_SUCCESS;
2272         }
2273 
2274         return (rv);
2275 }
2276 
2277 /*
2278  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2279  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2280  */
2281 /*ARGSUSED*/
2282 int
2283 e_ddi_copyfromdev(dev_info_t *devi,
2284     off_t off, const void *devaddr, void *kaddr, size_t len)
2285 {
2286         bcopy(devaddr, kaddr, len);
2287         return (0);
2288 }
2289 
2290 /*
2291  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2292  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2293  */
2294 /*ARGSUSED*/
2295 int
2296 e_ddi_copytodev(dev_info_t *devi,
2297     off_t off, const void *kaddr, void *devaddr, size_t len)
2298 {
2299         bcopy(kaddr, devaddr, len);
2300         return (0);
2301 }
2302 
2303 
2304 static int
2305 poke_mem(peekpoke_ctlops_t *in_args)
2306 {
2307         int err = DDI_SUCCESS;
2308         on_trap_data_t otd;
2309 
2310         /* Set up protected environment. */
2311         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2312                 switch (in_args->size) {
2313                 case sizeof (uint8_t):
2314                         *(uint8_t *)(in_args->dev_addr) =
2315                             *(uint8_t *)in_args->host_addr;
2316                         break;
2317 
2318                 case sizeof (uint16_t):
2319                         *(uint16_t *)(in_args->dev_addr) =
2320                             *(uint16_t *)in_args->host_addr;
2321                         break;
2322 
2323                 case sizeof (uint32_t):
2324                         *(uint32_t *)(in_args->dev_addr) =
2325                             *(uint32_t *)in_args->host_addr;
2326                         break;
2327 
2328                 case sizeof (uint64_t):
2329                         *(uint64_t *)(in_args->dev_addr) =
2330                             *(uint64_t *)in_args->host_addr;
2331                         break;
2332 
2333                 default:
2334                         err = DDI_FAILURE;
2335                         break;
2336                 }
2337         } else
2338                 err = DDI_FAILURE;
2339 
2340         /* Take down protected environment. */
2341         no_trap();
2342 
2343         return (err);
2344 }
2345 
2346 
2347 static int
2348 peek_mem(peekpoke_ctlops_t *in_args)
2349 {
2350         int err = DDI_SUCCESS;
2351         on_trap_data_t otd;
2352 
2353         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2354                 switch (in_args->size) {
2355                 case sizeof (uint8_t):
2356                         *(uint8_t *)in_args->host_addr =
2357                             *(uint8_t *)in_args->dev_addr;
2358                         break;
2359 
2360                 case sizeof (uint16_t):
2361                         *(uint16_t *)in_args->host_addr =
2362                             *(uint16_t *)in_args->dev_addr;
2363                         break;
2364 
2365                 case sizeof (uint32_t):
2366                         *(uint32_t *)in_args->host_addr =
2367                             *(uint32_t *)in_args->dev_addr;
2368                         break;
2369 
2370                 case sizeof (uint64_t):
2371                         *(uint64_t *)in_args->host_addr =
2372                             *(uint64_t *)in_args->dev_addr;
2373                         break;
2374 
2375                 default:
2376                         err = DDI_FAILURE;
2377                         break;
2378                 }
2379         } else
2380                 err = DDI_FAILURE;
2381 
2382         no_trap();
2383         return (err);
2384 }
2385 
2386 
2387 /*
2388  * This is called only to process peek/poke when the DIP is NULL.
2389  * Assume that this is for memory, as nexi take care of device safe accesses.
2390  */
2391 int
2392 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2393 {
2394         return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2395 }
2396 
2397 /*
2398  * we've just done a cautious put/get. Check if it was successful by
2399  * calling pci_ereport_post() on all puts and for any gets that return -1
2400  */
2401 static int
2402 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2403     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2404 {
2405         int     rval = DDI_SUCCESS;
2406         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2407         ddi_fm_error_t de;
2408         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2409         ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2410         int check_err = 0;
2411         int repcount = in_args->repcount;
2412 
2413         if (ctlop == DDI_CTLOPS_POKE &&
2414             hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2415                 return (DDI_SUCCESS);
2416 
2417         if (ctlop == DDI_CTLOPS_PEEK &&
2418             hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2419                 for (; repcount; repcount--) {
2420                         switch (in_args->size) {
2421                         case sizeof (uint8_t):
2422                                 if (*(uint8_t *)in_args->host_addr == 0xff)
2423                                         check_err = 1;
2424                                 break;
2425                         case sizeof (uint16_t):
2426                                 if (*(uint16_t *)in_args->host_addr == 0xffff)
2427                                         check_err = 1;
2428                                 break;
2429                         case sizeof (uint32_t):
2430                                 if (*(uint32_t *)in_args->host_addr ==
2431                                     0xffffffff)
2432                                         check_err = 1;
2433                                 break;
2434                         case sizeof (uint64_t):
2435                                 if (*(uint64_t *)in_args->host_addr ==
2436                                     0xffffffffffffffff)
2437                                         check_err = 1;
2438                                 break;
2439                         }
2440                 }
2441                 if (check_err == 0)
2442                         return (DDI_SUCCESS);
2443         }
2444         /*
2445          * for a cautious put or get or a non-cautious get that returned -1 call
2446          * io framework to see if there really was an error
2447          */
2448         bzero(&de, sizeof (ddi_fm_error_t));
2449         de.fme_version = DDI_FME_VERSION;
2450         de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2451         if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2452                 de.fme_flag = DDI_FM_ERR_EXPECTED;
2453                 de.fme_acc_handle = in_args->handle;
2454         } else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2455                 /*
2456                  * We only get here with DDI_DEFAULT_ACC for config space gets.
2457                  * Non-hardened drivers may be probing the hardware and
2458                  * expecting -1 returned. So need to treat errors on
2459                  * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2460                  */
2461                 de.fme_flag = DDI_FM_ERR_EXPECTED;
2462                 de.fme_acc_handle = in_args->handle;
2463         } else {
2464                 /*
2465                  * Hardened driver doing protected accesses shouldn't
2466                  * get errors unless there's a hardware problem. Treat
2467                  * as nonfatal if there's an error, but set UNEXPECTED
2468                  * so we raise ereports on any errors and potentially
2469                  * fault the device
2470                  */
2471                 de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2472         }
2473         (void) scan(dip, &de);
2474         if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2475             de.fme_status != DDI_FM_OK) {
2476                 ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2477                 rval = DDI_FAILURE;
2478                 errp->err_ena = de.fme_ena;
2479                 errp->err_expected = de.fme_flag;
2480                 errp->err_status = DDI_FM_NONFATAL;
2481         }
2482         return (rval);
2483 }
2484 
2485 /*
2486  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2487  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2488  * recurse, so assume all puts are OK and gets have failed if they return -1
2489  */
2490 static int
2491 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2492 {
2493         int rval = DDI_SUCCESS;
2494         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2495         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2496         ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2497         int repcount = in_args->repcount;
2498 
2499         if (ctlop == DDI_CTLOPS_POKE)
2500                 return (rval);
2501 
2502         for (; repcount; repcount--) {
2503                 switch (in_args->size) {
2504                 case sizeof (uint8_t):
2505                         if (*(uint8_t *)in_args->host_addr == 0xff)
2506                                 rval = DDI_FAILURE;
2507                         break;
2508                 case sizeof (uint16_t):
2509                         if (*(uint16_t *)in_args->host_addr == 0xffff)
2510                                 rval = DDI_FAILURE;
2511                         break;
2512                 case sizeof (uint32_t):
2513                         if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2514                                 rval = DDI_FAILURE;
2515                         break;
2516                 case sizeof (uint64_t):
2517                         if (*(uint64_t *)in_args->host_addr ==
2518                             0xffffffffffffffff)
2519                                 rval = DDI_FAILURE;
2520                         break;
2521                 }
2522         }
2523         if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2524             rval == DDI_FAILURE) {
2525                 ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2526                 errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2527                 errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2528                 errp->err_status = DDI_FM_NONFATAL;
2529         }
2530         return (rval);
2531 }
2532 
2533 int
2534 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2535     ddi_ctl_enum_t ctlop, void *arg, void *result,
2536     int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2537     void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2538     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2539 {
2540         int rval;
2541         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2542         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2543 
2544         /*
2545          * this function only supports cautious accesses, not peeks/pokes
2546          * which don't have a handle
2547          */
2548         if (hp == NULL)
2549                 return (DDI_FAILURE);
2550 
2551         if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2552                 if (!mutex_tryenter(err_mutexp)) {
2553                         /*
2554                          * As this may be a recursive call from within
2555                          * pci_ereport_post() we can't wait for the mutexes.
2556                          * Fortunately we know someone is already calling
2557                          * pci_ereport_post() which will handle the error bits
2558                          * for us, and as this is a config space access we can
2559                          * just do the access and check return value for -1
2560                          * using pci_peekpoke_check_nofma().
2561                          */
2562                         rval = handler(dip, rdip, ctlop, arg, result);
2563                         if (rval == DDI_SUCCESS)
2564                                 rval = pci_peekpoke_check_nofma(arg, ctlop);
2565                         return (rval);
2566                 }
2567                 /*
2568                  * This can't be a recursive call. Drop the err_mutex and get
2569                  * both mutexes in the right order. If an error hasn't already
2570                  * been detected by the ontrap code, use pci_peekpoke_check_fma
2571                  * which will call pci_ereport_post() to check error status.
2572                  */
2573                 mutex_exit(err_mutexp);
2574         }
2575         mutex_enter(peek_poke_mutexp);
2576         rval = handler(dip, rdip, ctlop, arg, result);
2577         if (rval == DDI_SUCCESS) {
2578                 mutex_enter(err_mutexp);
2579                 rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2580                 mutex_exit(err_mutexp);
2581         }
2582         mutex_exit(peek_poke_mutexp);
2583         return (rval);
2584 }
2585 
2586 void
2587 impl_setup_ddi(void)
2588 {
2589 #if !defined(__xpv)
2590         extern void startup_bios_disk(void);
2591         extern int post_fastreboot;
2592 #endif
2593         dev_info_t *xdip, *isa_dip;
2594         rd_existing_t rd_mem_prop;
2595         int err;
2596 
2597         ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2598             (pnode_t)DEVI_SID_NODEID, &xdip);
2599 
2600         (void) BOP_GETPROP(bootops,
2601             "ramdisk_start", (void *)&ramdisk_start);
2602         (void) BOP_GETPROP(bootops,
2603             "ramdisk_end", (void *)&ramdisk_end);
2604 
2605 #ifdef __xpv
2606         ramdisk_start -= ONE_GIG;
2607         ramdisk_end -= ONE_GIG;
2608 #endif
2609         rd_mem_prop.phys = ramdisk_start;
2610         rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2611 
2612         (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2613             RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2614             sizeof (rd_mem_prop));
2615         err = ndi_devi_bind_driver(xdip, 0);
2616         ASSERT(err == 0);
2617 
2618         /* isa node */
2619         if (pseudo_isa) {
2620                 ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2621                     (pnode_t)DEVI_SID_NODEID, &isa_dip);
2622                 (void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2623                     "device_type", "isa");
2624                 (void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2625                     "bus-type", "isa");
2626                 (void) ndi_devi_bind_driver(isa_dip, 0);
2627         }
2628 
2629         /*
2630          * Read in the properties from the boot.
2631          */
2632         get_boot_properties();
2633 
2634         /* not framebuffer should be enumerated, if present */
2635         get_vga_properties();
2636 
2637         /* Copy console font if provided by boot. */
2638         get_console_font();
2639 
2640         /*
2641          * Check for administratively disabled drivers.
2642          */
2643         check_driver_disable();
2644 
2645 #if !defined(__xpv)
2646         if (!post_fastreboot && BOP_GETPROPLEN(bootops, "efi-systab") < 0)
2647                 startup_bios_disk();
2648 #endif
2649         /* do bus dependent probes. */
2650         impl_bus_initialprobe();
2651 }
2652 
2653 dev_t
2654 getrootdev(void)
2655 {
2656         /*
2657          * Usually rootfs.bo_name is initialized by the
2658          * the bootpath property from bootenv.rc, but
2659          * defaults to "/ramdisk:a" otherwise.
2660          */
2661         return (ddi_pathname_to_dev_t(rootfs.bo_name));
2662 }
2663 
2664 static struct bus_probe {
2665         struct bus_probe *next;
2666         void (*probe)(int);
2667 } *bus_probes;
2668 
2669 void
2670 impl_bus_add_probe(void (*func)(int))
2671 {
2672         struct bus_probe *probe;
2673         struct bus_probe *lastprobe = NULL;
2674 
2675         probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2676         probe->probe = func;
2677         probe->next = NULL;
2678 
2679         if (!bus_probes) {
2680                 bus_probes = probe;
2681                 return;
2682         }
2683 
2684         lastprobe = bus_probes;
2685         while (lastprobe->next)
2686                 lastprobe = lastprobe->next;
2687         lastprobe->next = probe;
2688 }
2689 
2690 /*ARGSUSED*/
2691 void
2692 impl_bus_delete_probe(void (*func)(int))
2693 {
2694         struct bus_probe *prev = NULL;
2695         struct bus_probe *probe = bus_probes;
2696 
2697         while (probe) {
2698                 if (probe->probe == func)
2699                         break;
2700                 prev = probe;
2701                 probe = probe->next;
2702         }
2703 
2704         if (probe == NULL)
2705                 return;
2706 
2707         if (prev)
2708                 prev->next = probe->next;
2709         else
2710                 bus_probes = probe->next;
2711 
2712         kmem_free(probe, sizeof (struct bus_probe));
2713 }
2714 
2715 /*
2716  * impl_bus_initialprobe
2717  *      Modload the prom simulator, then let it probe to verify existence
2718  *      and type of PCI support.
2719  */
2720 static void
2721 impl_bus_initialprobe(void)
2722 {
2723         struct bus_probe *probe;
2724 
2725         /* load modules to install bus probes */
2726 #if defined(__xpv)
2727         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2728                 if (modload("misc", "pci_autoconfig") < 0) {
2729                         panic("failed to load misc/pci_autoconfig");
2730                 }
2731 
2732                 if (modload("drv", "isa") < 0)
2733                         panic("failed to load drv/isa");
2734         }
2735 
2736         (void) modload("misc", "xpv_autoconfig");
2737 #else
2738         if (modload("misc", "pci_autoconfig") < 0) {
2739                 panic("failed to load misc/pci_autoconfig");
2740         }
2741 
2742         (void) modload("misc", "acpidev");
2743 
2744         if (modload("drv", "isa") < 0)
2745                 panic("failed to load drv/isa");
2746 #endif
2747 
2748         probe = bus_probes;
2749         while (probe) {
2750                 /* run the probe functions */
2751                 (*probe->probe)(0);
2752                 probe = probe->next;
2753         }
2754 }
2755 
2756 /*
2757  * impl_bus_reprobe
2758  *      Reprogram devices not set up by firmware.
2759  */
2760 static void
2761 impl_bus_reprobe(void)
2762 {
2763         struct bus_probe *probe;
2764 
2765         probe = bus_probes;
2766         while (probe) {
2767                 /* run the probe function */
2768                 (*probe->probe)(1);
2769                 probe = probe->next;
2770         }
2771 }
2772 
2773 
2774 /*
2775  * The following functions ready a cautious request to go up to the nexus
2776  * driver.  It is up to the nexus driver to decide how to process the request.
2777  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2778  * differently.
2779  */
2780 
2781 static void
2782 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2783     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2784     ddi_ctl_enum_t cmd)
2785 {
2786         peekpoke_ctlops_t       cautacc_ctlops_arg;
2787 
2788         cautacc_ctlops_arg.size = size;
2789         cautacc_ctlops_arg.dev_addr = dev_addr;
2790         cautacc_ctlops_arg.host_addr = host_addr;
2791         cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2792         cautacc_ctlops_arg.repcount = repcount;
2793         cautacc_ctlops_arg.flags = flags;
2794 
2795         (void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2796             &cautacc_ctlops_arg, NULL);
2797 }
2798 
2799 uint8_t
2800 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2801 {
2802         uint8_t value;
2803         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2804             sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2805 
2806         return (value);
2807 }
2808 
2809 uint16_t
2810 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2811 {
2812         uint16_t value;
2813         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2814             sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2815 
2816         return (value);
2817 }
2818 
2819 uint32_t
2820 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2821 {
2822         uint32_t value;
2823         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2824             sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2825 
2826         return (value);
2827 }
2828 
2829 uint64_t
2830 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2831 {
2832         uint64_t value;
2833         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2834             sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2835 
2836         return (value);
2837 }
2838 
2839 void
2840 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2841 {
2842         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2843             sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2844 }
2845 
2846 void
2847 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2848 {
2849         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2850             sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2851 }
2852 
2853 void
2854 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2855 {
2856         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2857             sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2858 }
2859 
2860 void
2861 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2862 {
2863         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2864             sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2865 }
2866 
2867 void
2868 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2869     size_t repcount, uint_t flags)
2870 {
2871         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2872             sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2873 }
2874 
2875 void
2876 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2877     uint16_t *dev_addr, size_t repcount, uint_t flags)
2878 {
2879         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2880             sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2881 }
2882 
2883 void
2884 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2885     uint32_t *dev_addr, size_t repcount, uint_t flags)
2886 {
2887         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2888             sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2889 }
2890 
2891 void
2892 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2893     uint64_t *dev_addr, size_t repcount, uint_t flags)
2894 {
2895         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2896             sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2897 }
2898 
2899 void
2900 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2901     size_t repcount, uint_t flags)
2902 {
2903         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2904             sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2905 }
2906 
2907 void
2908 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2909     uint16_t *dev_addr, size_t repcount, uint_t flags)
2910 {
2911         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2912             sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2913 }
2914 
2915 void
2916 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2917     uint32_t *dev_addr, size_t repcount, uint_t flags)
2918 {
2919         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2920             sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2921 }
2922 
2923 void
2924 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2925     uint64_t *dev_addr, size_t repcount, uint_t flags)
2926 {
2927         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2928             sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2929 }
2930 
2931 boolean_t
2932 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2933 {
2934         uint64_t hi_pa;
2935 
2936         hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2937         if (attrp->dma_attr_addr_hi < hi_pa) {
2938                 return (B_TRUE);
2939         }
2940 
2941         return (B_FALSE);
2942 }
2943 
2944 size_t
2945 i_ddi_copybuf_size()
2946 {
2947         return (dma_max_copybuf_size);
2948 }
2949 
2950 /*
2951  * i_ddi_dma_max()
2952  *    returns the maximum DMA size which can be performed in a single DMA
2953  *    window taking into account the devices DMA contraints (attrp), the
2954  *    maximum copy buffer size (if applicable), and the worse case buffer
2955  *    fragmentation.
2956  */
2957 /*ARGSUSED*/
2958 uint32_t
2959 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2960 {
2961         uint64_t maxxfer;
2962 
2963 
2964         /*
2965          * take the min of maxxfer and the the worse case fragementation
2966          * (e.g. every cookie <= 1 page)
2967          */
2968         maxxfer = MIN(attrp->dma_attr_maxxfer,
2969             ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2970 
2971         /*
2972          * If the DMA engine can't reach all off memory, we also need to take
2973          * the max size of the copybuf into consideration.
2974          */
2975         if (i_ddi_copybuf_required(attrp)) {
2976                 maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2977         }
2978 
2979         /*
2980          * we only return a 32-bit value. Make sure it's not -1. Round to a
2981          * page so it won't be mistaken for an error value during debug.
2982          */
2983         if (maxxfer >= 0xFFFFFFFF) {
2984                 maxxfer = 0xFFFFF000;
2985         }
2986 
2987         /*
2988          * make sure the value we return is a whole multiple of the
2989          * granlarity.
2990          */
2991         if (attrp->dma_attr_granular > 1) {
2992                 maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2993         }
2994 
2995         return ((uint32_t)maxxfer);
2996 }
2997 
2998 /*ARGSUSED*/
2999 void
3000 translate_devid(dev_info_t *dip)
3001 {
3002 }
3003 
3004 pfn_t
3005 i_ddi_paddr_to_pfn(paddr_t paddr)
3006 {
3007         pfn_t pfn;
3008 
3009 #ifdef __xpv
3010         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
3011                 pfn = xen_assign_pfn(mmu_btop(paddr));
3012         } else {
3013                 pfn = mmu_btop(paddr);
3014         }
3015 #else
3016         pfn = mmu_btop(paddr);
3017 #endif
3018 
3019         return (pfn);
3020 }