1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
  25  * Copyright 2014 Pluribus Networks, Inc.
  26  * Copyright 2016 Nexenta Systems, Inc.
  27  */
  28 
  29 /*
  30  * PC specific DDI implementation
  31  */
  32 #include <sys/types.h>
  33 #include <sys/autoconf.h>
  34 #include <sys/avintr.h>
  35 #include <sys/bootconf.h>
  36 #include <sys/conf.h>
  37 #include <sys/cpuvar.h>
  38 #include <sys/ddi_impldefs.h>
  39 #include <sys/ddi_subrdefs.h>
  40 #include <sys/ethernet.h>
  41 #include <sys/fp.h>
  42 #include <sys/instance.h>
  43 #include <sys/kmem.h>
  44 #include <sys/machsystm.h>
  45 #include <sys/modctl.h>
  46 #include <sys/promif.h>
  47 #include <sys/prom_plat.h>
  48 #include <sys/sunndi.h>
  49 #include <sys/ndi_impldefs.h>
  50 #include <sys/ddi_impldefs.h>
  51 #include <sys/sysmacros.h>
  52 #include <sys/systeminfo.h>
  53 #include <sys/utsname.h>
  54 #include <sys/atomic.h>
  55 #include <sys/spl.h>
  56 #include <sys/archsystm.h>
  57 #include <vm/seg_kmem.h>
  58 #include <sys/ontrap.h>
  59 #include <sys/fm/protocol.h>
  60 #include <sys/ramdisk.h>
  61 #include <sys/sunndi.h>
  62 #include <sys/vmem.h>
  63 #include <sys/pci_impl.h>
  64 #if defined(__xpv)
  65 #include <sys/hypervisor.h>
  66 #endif
  67 #include <sys/mach_intr.h>
  68 #include <vm/hat_i86.h>
  69 #include <sys/x86_archext.h>
  70 #include <sys/avl.h>
  71 
  72 /*
  73  * DDI Boot Configuration
  74  */
  75 
  76 /*
  77  * Platform drivers on this platform
  78  */
  79 char *platform_module_list[] = {
  80         "acpippm",
  81         "ppm",
  82         (char *)0
  83 };
  84 
  85 /* pci bus resource maps */
  86 struct pci_bus_resource *pci_bus_res;
  87 
  88 size_t dma_max_copybuf_size = 0x101000;         /* 1M + 4K */
  89 
  90 uint64_t ramdisk_start, ramdisk_end;
  91 
  92 int pseudo_isa = 0;
  93 
  94 /*
  95  * Forward declarations
  96  */
  97 static int getlongprop_buf();
  98 static void get_boot_properties(void);
  99 static void impl_bus_initialprobe(void);
 100 static void impl_bus_reprobe(void);
 101 
 102 static int poke_mem(peekpoke_ctlops_t *in_args);
 103 static int peek_mem(peekpoke_ctlops_t *in_args);
 104 
 105 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
 106 
 107 #if defined(__amd64) && !defined(__xpv)
 108 extern void immu_init(void);
 109 #endif
 110 
 111 /*
 112  * We use an AVL tree to store contiguous address allocations made with the
 113  * kalloca() routine, so that we can return the size to free with kfreea().
 114  * Note that in the future it would be vastly faster if we could eliminate
 115  * this lookup by insisting that all callers keep track of their own sizes,
 116  * just as for kmem_alloc().
 117  */
 118 struct ctgas {
 119         avl_node_t ctg_link;
 120         void *ctg_addr;
 121         size_t ctg_size;
 122 };
 123 
 124 static avl_tree_t ctgtree;
 125 
 126 static kmutex_t         ctgmutex;
 127 #define CTGLOCK()       mutex_enter(&ctgmutex)
 128 #define CTGUNLOCK()     mutex_exit(&ctgmutex)
 129 
 130 /*
 131  * Minimum pfn value of page_t's put on the free list.  This is to simplify
 132  * support of ddi dma memory requests which specify small, non-zero addr_lo
 133  * values.
 134  *
 135  * The default value of 2, which corresponds to the only known non-zero addr_lo
 136  * value used, means a single page will be sacrificed (pfn typically starts
 137  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
 138  * otherwise mp startup panics.
 139  */
 140 pfn_t   ddiphysmin = 2;
 141 
 142 static void
 143 check_driver_disable(void)
 144 {
 145         int proplen = 128;
 146         char *prop_name;
 147         char *drv_name, *propval;
 148         major_t major;
 149 
 150         prop_name = kmem_alloc(proplen, KM_SLEEP);
 151         for (major = 0; major < devcnt; major++) {
 152                 drv_name = ddi_major_to_name(major);
 153                 if (drv_name == NULL)
 154                         continue;
 155                 (void) snprintf(prop_name, proplen, "disable-%s", drv_name);
 156                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
 157                     DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
 158                         if (strcmp(propval, "true") == 0) {
 159                                 devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
 160                                 cmn_err(CE_NOTE, "driver %s disabled",
 161                                     drv_name);
 162                         }
 163                         ddi_prop_free(propval);
 164                 }
 165         }
 166         kmem_free(prop_name, proplen);
 167 }
 168 
 169 
 170 /*
 171  * Configure the hardware on the system.
 172  * Called before the rootfs is mounted
 173  */
 174 void
 175 configure(void)
 176 {
 177         extern void i_ddi_init_root();
 178 
 179 #if defined(__i386)
 180         extern int fpu_pentium_fdivbug;
 181 #endif  /* __i386 */
 182         extern int fpu_ignored;
 183 
 184         /*
 185          * Determine if an FPU is attached
 186          */
 187 
 188         fpu_probe();
 189 
 190 #if defined(__i386)
 191         if (fpu_pentium_fdivbug) {
 192                 printf("\
 193 FP hardware exhibits Pentium floating point divide problem\n");
 194         }
 195 #endif  /* __i386 */
 196 
 197         if (fpu_ignored) {
 198                 printf("FP hardware will not be used\n");
 199         } else if (!fpu_exists) {
 200                 printf("No FPU in configuration\n");
 201         }
 202 
 203         /*
 204          * Initialize devices on the machine.
 205          * Uses configuration tree built by the PROMs to determine what
 206          * is present, and builds a tree of prototype dev_info nodes
 207          * corresponding to the hardware which identified itself.
 208          */
 209 
 210         /*
 211          * Initialize root node.
 212          */
 213         i_ddi_init_root();
 214 
 215         /* reprogram devices not set up by firmware (BIOS) */
 216         impl_bus_reprobe();
 217 
 218 #if defined(__amd64) && !defined(__xpv)
 219         /*
 220          * Setup but don't startup the IOMMU
 221          * Startup happens later via a direct call
 222          * to IOMMU code by boot code.
 223          * At this point, all PCI bus renumbering
 224          * is done, so safe to init the IMMU
 225          * AKA Intel IOMMU.
 226          */
 227         immu_init();
 228 #endif
 229 
 230         /*
 231          * attach the isa nexus to get ACPI resource usage
 232          * isa is "kind of" a pseudo node
 233          */
 234 #if defined(__xpv)
 235         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
 236                 if (pseudo_isa)
 237                         (void) i_ddi_attach_pseudo_node("isa");
 238                 else
 239                         (void) i_ddi_attach_hw_nodes("isa");
 240         }
 241 #else
 242         if (pseudo_isa)
 243                 (void) i_ddi_attach_pseudo_node("isa");
 244         else
 245                 (void) i_ddi_attach_hw_nodes("isa");
 246 #endif
 247 }
 248 
 249 /*
 250  * The "status" property indicates the operational status of a device.
 251  * If this property is present, the value is a string indicating the
 252  * status of the device as follows:
 253  *
 254  *      "okay"          operational.
 255  *      "disabled"      not operational, but might become operational.
 256  *      "fail"          not operational because a fault has been detected,
 257  *                      and it is unlikely that the device will become
 258  *                      operational without repair. no additional details
 259  *                      are available.
 260  *      "fail-xxx"      not operational because a fault has been detected,
 261  *                      and it is unlikely that the device will become
 262  *                      operational without repair. "xxx" is additional
 263  *                      human-readable information about the particular
 264  *                      fault condition that was detected.
 265  *
 266  * The absence of this property means that the operational status is
 267  * unknown or okay.
 268  *
 269  * This routine checks the status property of the specified device node
 270  * and returns 0 if the operational status indicates failure, and 1 otherwise.
 271  *
 272  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
 273  * And, in that case, the property may not even be a string. So we carefully
 274  * check for the value "fail", in the beginning of the string, noting
 275  * the property length.
 276  */
 277 int
 278 status_okay(int id, char *buf, int buflen)
 279 {
 280         char status_buf[OBP_MAXPROPNAME];
 281         char *bufp = buf;
 282         int len = buflen;
 283         int proplen;
 284         static const char *status = "status";
 285         static const char *fail = "fail";
 286         int fail_len = (int)strlen(fail);
 287 
 288         /*
 289          * Get the proplen ... if it's smaller than "fail",
 290          * or doesn't exist ... then we don't care, since
 291          * the value can't begin with the char string "fail".
 292          *
 293          * NB: proplen, if it's a string, includes the NULL in the
 294          * the size of the property, and fail_len does not.
 295          */
 296         proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
 297         if (proplen <= fail_len)     /* nonexistant or uninteresting len */
 298                 return (1);
 299 
 300         /*
 301          * if a buffer was provided, use it
 302          */
 303         if ((buf == (char *)NULL) || (buflen <= 0)) {
 304                 bufp = status_buf;
 305                 len = sizeof (status_buf);
 306         }
 307         *bufp = (char)0;
 308 
 309         /*
 310          * Get the property into the buffer, to the extent of the buffer,
 311          * and in case the buffer is smaller than the property size,
 312          * NULL terminate the buffer. (This handles the case where
 313          * a buffer was passed in and the caller wants to print the
 314          * value, but the buffer was too small).
 315          */
 316         (void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
 317             (caddr_t)bufp, len);
 318         *(bufp + len - 1) = (char)0;
 319 
 320         /*
 321          * If the value begins with the char string "fail",
 322          * then it means the node is failed. We don't care
 323          * about any other values. We assume the node is ok
 324          * although it might be 'disabled'.
 325          */
 326         if (strncmp(bufp, fail, fail_len) == 0)
 327                 return (0);
 328 
 329         return (1);
 330 }
 331 
 332 /*
 333  * Check the status of the device node passed as an argument.
 334  *
 335  *      if ((status is OKAY) || (status is DISABLED))
 336  *              return DDI_SUCCESS
 337  *      else
 338  *              print a warning and return DDI_FAILURE
 339  */
 340 /*ARGSUSED1*/
 341 int
 342 check_status(int id, char *name, dev_info_t *parent)
 343 {
 344         char status_buf[64];
 345         char devtype_buf[OBP_MAXPROPNAME];
 346         int retval = DDI_FAILURE;
 347 
 348         /*
 349          * is the status okay?
 350          */
 351         if (status_okay(id, status_buf, sizeof (status_buf)))
 352                 return (DDI_SUCCESS);
 353 
 354         /*
 355          * a status property indicating bad memory will be associated
 356          * with a node which has a "device_type" property with a value of
 357          * "memory-controller". in this situation, return DDI_SUCCESS
 358          */
 359         if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
 360             sizeof (devtype_buf)) > 0) {
 361                 if (strcmp(devtype_buf, "memory-controller") == 0)
 362                         retval = DDI_SUCCESS;
 363         }
 364 
 365         /*
 366          * print the status property information
 367          */
 368         cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
 369         return (retval);
 370 }
 371 
 372 /*ARGSUSED*/
 373 uint_t
 374 softlevel1(caddr_t arg1, caddr_t arg2)
 375 {
 376         softint();
 377         return (1);
 378 }
 379 
 380 /*
 381  * Allow for implementation specific correction of PROM property values.
 382  */
 383 
 384 /*ARGSUSED*/
 385 void
 386 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
 387     caddr_t buffer)
 388 {
 389         /*
 390          * There are no adjustments needed in this implementation.
 391          */
 392 }
 393 
 394 static int
 395 getlongprop_buf(int id, char *name, char *buf, int maxlen)
 396 {
 397         int size;
 398 
 399         size = prom_getproplen((pnode_t)id, name);
 400         if (size <= 0 || (size > maxlen - 1))
 401                 return (-1);
 402 
 403         if (-1 == prom_getprop((pnode_t)id, name, buf))
 404                 return (-1);
 405 
 406         if (strcmp("name", name) == 0) {
 407                 if (buf[size - 1] != '\0') {
 408                         buf[size] = '\0';
 409                         size += 1;
 410                 }
 411         }
 412 
 413         return (size);
 414 }
 415 
 416 static int
 417 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
 418 {
 419         int ret;
 420 
 421         if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
 422             DDI_PROP_DONTPASS, pname, pval, plen))
 423             == DDI_PROP_SUCCESS) {
 424                 *plen = (*plen) * (sizeof (int));
 425         }
 426         return (ret);
 427 }
 428 
 429 
 430 /*
 431  * Node Configuration
 432  */
 433 
 434 struct prop_ispec {
 435         uint_t  pri, vec;
 436 };
 437 
 438 /*
 439  * For the x86, we're prepared to claim that the interrupt string
 440  * is in the form of a list of <ipl,vec> specifications.
 441  */
 442 
 443 #define VEC_MIN 1
 444 #define VEC_MAX 255
 445 
 446 static int
 447 impl_xlate_intrs(dev_info_t *child, int *in,
 448     struct ddi_parent_private_data *pdptr)
 449 {
 450         size_t size;
 451         int n;
 452         struct intrspec *new;
 453         caddr_t got_prop;
 454         int *inpri;
 455         int got_len;
 456         extern int ignore_hardware_nodes;       /* force flag from ddi_impl.c */
 457 
 458         static char bad_intr_fmt[] =
 459             "bad interrupt spec from %s%d - ipl %d, irq %d\n";
 460 
 461         /*
 462          * determine if the driver is expecting the new style "interrupts"
 463          * property which just contains the IRQ, or the old style which
 464          * contains pairs of <IPL,IRQ>.  if it is the new style, we always
 465          * assign IPL 5 unless an "interrupt-priorities" property exists.
 466          * in that case, the "interrupt-priorities" property contains the
 467          * IPL values that match, one for one, the IRQ values in the
 468          * "interrupts" property.
 469          */
 470         inpri = NULL;
 471         if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 472             "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
 473                 /* the old style "interrupts" property... */
 474 
 475                 /*
 476                  * The list consists of <ipl,vec> elements
 477                  */
 478                 if ((n = (*in++ >> 1)) < 1)
 479                         return (DDI_FAILURE);
 480 
 481                 pdptr->par_nintr = n;
 482                 size = n * sizeof (struct intrspec);
 483                 new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
 484 
 485                 while (n--) {
 486                         int level = *in++;
 487                         int vec = *in++;
 488 
 489                         if (level < 1 || level > MAXIPL ||
 490                             vec < VEC_MIN || vec > VEC_MAX) {
 491                                 cmn_err(CE_CONT, bad_intr_fmt,
 492                                     DEVI(child)->devi_name,
 493                                     DEVI(child)->devi_instance, level, vec);
 494                                 goto broken;
 495                         }
 496                         new->intrspec_pri = level;
 497                         if (vec != 2)
 498                                 new->intrspec_vec = vec;
 499                         else
 500                                 /*
 501                                  * irq 2 on the PC bus is tied to irq 9
 502                                  * on ISA, EISA and MicroChannel
 503                                  */
 504                                 new->intrspec_vec = 9;
 505                         new++;
 506                 }
 507 
 508                 return (DDI_SUCCESS);
 509         } else {
 510                 /* the new style "interrupts" property... */
 511 
 512                 /*
 513                  * The list consists of <vec> elements
 514                  */
 515                 if ((n = (*in++)) < 1)
 516                         return (DDI_FAILURE);
 517 
 518                 pdptr->par_nintr = n;
 519                 size = n * sizeof (struct intrspec);
 520                 new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
 521 
 522                 /* XXX check for "interrupt-priorities" property... */
 523                 if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 524                     "interrupt-priorities", (caddr_t)&got_prop, &got_len)
 525                     == DDI_PROP_SUCCESS) {
 526                         if (n != (got_len / sizeof (int))) {
 527                                 cmn_err(CE_CONT,
 528                                     "bad interrupt-priorities length"
 529                                     " from %s%d: expected %d, got %d\n",
 530                                     DEVI(child)->devi_name,
 531                                     DEVI(child)->devi_instance, n,
 532                                     (int)(got_len / sizeof (int)));
 533                                 goto broken;
 534                         }
 535                         inpri = (int *)got_prop;
 536                 }
 537 
 538                 while (n--) {
 539                         int level;
 540                         int vec = *in++;
 541 
 542                         if (inpri == NULL)
 543                                 level = 5;
 544                         else
 545                                 level = *inpri++;
 546 
 547                         if (level < 1 || level > MAXIPL ||
 548                             vec < VEC_MIN || vec > VEC_MAX) {
 549                                 cmn_err(CE_CONT, bad_intr_fmt,
 550                                     DEVI(child)->devi_name,
 551                                     DEVI(child)->devi_instance, level, vec);
 552                                 goto broken;
 553                         }
 554                         new->intrspec_pri = level;
 555                         if (vec != 2)
 556                                 new->intrspec_vec = vec;
 557                         else
 558                                 /*
 559                                  * irq 2 on the PC bus is tied to irq 9
 560                                  * on ISA, EISA and MicroChannel
 561                                  */
 562                                 new->intrspec_vec = 9;
 563                         new++;
 564                 }
 565 
 566                 if (inpri != NULL)
 567                         kmem_free(got_prop, got_len);
 568                 return (DDI_SUCCESS);
 569         }
 570 
 571 broken:
 572         kmem_free(pdptr->par_intr, size);
 573         pdptr->par_intr = NULL;
 574         pdptr->par_nintr = 0;
 575         if (inpri != NULL)
 576                 kmem_free(got_prop, got_len);
 577 
 578         return (DDI_FAILURE);
 579 }
 580 
 581 /*
 582  * Create a ddi_parent_private_data structure from the ddi properties of
 583  * the dev_info node.
 584  *
 585  * The "reg" and either an "intr" or "interrupts" properties are required
 586  * if the driver wishes to create mappings or field interrupts on behalf
 587  * of the device.
 588  *
 589  * The "reg" property is assumed to be a list of at least one triple
 590  *
 591  *      <bustype, address, size>*1
 592  *
 593  * The "intr" property is assumed to be a list of at least one duple
 594  *
 595  *      <SPARC ipl, vector#>*1
 596  *
 597  * The "interrupts" property is assumed to be a list of at least one
 598  * n-tuples that describes the interrupt capabilities of the bus the device
 599  * is connected to.  For SBus, this looks like
 600  *
 601  *      <SBus-level>*1
 602  *
 603  * (This property obsoletes the 'intr' property).
 604  *
 605  * The "ranges" property is optional.
 606  */
 607 void
 608 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
 609 {
 610         struct ddi_parent_private_data *pdptr;
 611         int n;
 612         int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
 613         uint_t reg_len, rng_len, intr_len, irupts_len;
 614 
 615         *ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
 616 
 617         /*
 618          * Handle the 'reg' property.
 619          */
 620         if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
 621             DDI_PROP_SUCCESS) && (reg_len != 0)) {
 622                 pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
 623                 pdptr->par_reg = (struct regspec *)reg_prop;
 624         }
 625 
 626         /*
 627          * See if I have a range (adding one where needed - this
 628          * means to add one for sbus node in sun4c, when romvec > 0,
 629          * if no range is already defined in the PROM node.
 630          * (Currently no sun4c PROMS define range properties,
 631          * but they should and may in the future.)  For the SBus
 632          * node, the range is defined by the SBus reg property.
 633          */
 634         if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
 635             == DDI_PROP_SUCCESS) {
 636                 pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
 637                 pdptr->par_rng = (struct rangespec *)rng_prop;
 638         }
 639 
 640         /*
 641          * Handle the 'intr' and 'interrupts' properties
 642          */
 643 
 644         /*
 645          * For backwards compatibility
 646          * we first look for the 'intr' property for the device.
 647          */
 648         if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
 649             != DDI_PROP_SUCCESS) {
 650                 intr_len = 0;
 651         }
 652 
 653         /*
 654          * If we're to support bus adapters and future platforms cleanly,
 655          * we need to support the generalized 'interrupts' property.
 656          */
 657         if (get_prop_int_array(child, "interrupts", &irupts_prop,
 658             &irupts_len) != DDI_PROP_SUCCESS) {
 659                 irupts_len = 0;
 660         } else if (intr_len != 0) {
 661                 /*
 662                  * If both 'intr' and 'interrupts' are defined,
 663                  * then 'interrupts' wins and we toss the 'intr' away.
 664                  */
 665                 ddi_prop_free((void *)intr_prop);
 666                 intr_len = 0;
 667         }
 668 
 669         if (intr_len != 0) {
 670 
 671                 /*
 672                  * Translate the 'intr' property into an array
 673                  * an array of struct intrspec's.  There's not really
 674                  * very much to do here except copy what's out there.
 675                  */
 676 
 677                 struct intrspec *new;
 678                 struct prop_ispec *l;
 679 
 680                 n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
 681                 l = (struct prop_ispec *)intr_prop;
 682                 pdptr->par_intr =
 683                     new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
 684                 while (n--) {
 685                         new->intrspec_pri = l->pri;
 686                         new->intrspec_vec = l->vec;
 687                         new++;
 688                         l++;
 689                 }
 690                 ddi_prop_free((void *)intr_prop);
 691 
 692         } else if ((n = irupts_len) != 0) {
 693                 size_t size;
 694                 int *out;
 695 
 696                 /*
 697                  * Translate the 'interrupts' property into an array
 698                  * of intrspecs for the rest of the DDI framework to
 699                  * toy with.  Only our ancestors really know how to
 700                  * do this, so ask 'em.  We massage the 'interrupts'
 701                  * property so that it is pre-pended by a count of
 702                  * the number of integers in the argument.
 703                  */
 704                 size = sizeof (int) + n;
 705                 out = kmem_alloc(size, KM_SLEEP);
 706                 *out = n / sizeof (int);
 707                 bcopy(irupts_prop, out + 1, (size_t)n);
 708                 ddi_prop_free((void *)irupts_prop);
 709                 if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
 710                         cmn_err(CE_CONT,
 711                             "Unable to translate 'interrupts' for %s%d\n",
 712                             DEVI(child)->devi_binding_name,
 713                             DEVI(child)->devi_instance);
 714                 }
 715                 kmem_free(out, size);
 716         }
 717 }
 718 
 719 /*
 720  * Name a child
 721  */
 722 static int
 723 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
 724 {
 725         /*
 726          * Fill in parent-private data and this function returns to us
 727          * an indication if it used "registers" to fill in the data.
 728          */
 729         if (ddi_get_parent_data(child) == NULL) {
 730                 struct ddi_parent_private_data *pdptr;
 731                 make_ddi_ppd(child, &pdptr);
 732                 ddi_set_parent_data(child, pdptr);
 733         }
 734 
 735         name[0] = '\0';
 736         if (sparc_pd_getnreg(child) > 0) {
 737                 (void) snprintf(name, namelen, "%x,%x",
 738                     (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
 739                     (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
 740         }
 741 
 742         return (DDI_SUCCESS);
 743 }
 744 
 745 /*
 746  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
 747  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
 748  * the children of sun busses based on the reg spec.
 749  *
 750  * Handles the following properties (in make_ddi_ppd):
 751  *      Property                value
 752  *        Name                  type
 753  *      reg             register spec
 754  *      intr            old-form interrupt spec
 755  *      interrupts      new (bus-oriented) interrupt spec
 756  *      ranges          range spec
 757  */
 758 int
 759 impl_ddi_sunbus_initchild(dev_info_t *child)
 760 {
 761         char name[MAXNAMELEN];
 762         void impl_ddi_sunbus_removechild(dev_info_t *);
 763 
 764         /*
 765          * Name the child, also makes parent private data
 766          */
 767         (void) impl_sunbus_name_child(child, name, MAXNAMELEN);
 768         ddi_set_name_addr(child, name);
 769 
 770         /*
 771          * Attempt to merge a .conf node; if successful, remove the
 772          * .conf node.
 773          */
 774         if ((ndi_dev_is_persistent_node(child) == 0) &&
 775             (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
 776                 /*
 777                  * Return failure to remove node
 778                  */
 779                 impl_ddi_sunbus_removechild(child);
 780                 return (DDI_FAILURE);
 781         }
 782         return (DDI_SUCCESS);
 783 }
 784 
 785 void
 786 impl_free_ddi_ppd(dev_info_t *dip)
 787 {
 788         struct ddi_parent_private_data *pdptr;
 789         size_t n;
 790 
 791         if ((pdptr = ddi_get_parent_data(dip)) == NULL)
 792                 return;
 793 
 794         if ((n = (size_t)pdptr->par_nintr) != 0)
 795                 /*
 796                  * Note that kmem_free is used here (instead of
 797                  * ddi_prop_free) because the contents of the
 798                  * property were placed into a separate buffer and
 799                  * mucked with a bit before being stored in par_intr.
 800                  * The actual return value from the prop lookup
 801                  * was freed with ddi_prop_free previously.
 802                  */
 803                 kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
 804 
 805         if ((n = (size_t)pdptr->par_nrng) != 0)
 806                 ddi_prop_free((void *)pdptr->par_rng);
 807 
 808         if ((n = pdptr->par_nreg) != 0)
 809                 ddi_prop_free((void *)pdptr->par_reg);
 810 
 811         kmem_free(pdptr, sizeof (*pdptr));
 812         ddi_set_parent_data(dip, NULL);
 813 }
 814 
 815 void
 816 impl_ddi_sunbus_removechild(dev_info_t *dip)
 817 {
 818         impl_free_ddi_ppd(dip);
 819         ddi_set_name_addr(dip, NULL);
 820         /*
 821          * Strip the node to properly convert it back to prototype form
 822          */
 823         impl_rem_dev_props(dip);
 824 }
 825 
 826 /*
 827  * DDI Interrupt
 828  */
 829 
 830 /*
 831  * turn this on to force isa, eisa, and mca device to ignore the new
 832  * hardware nodes in the device tree (normally turned on only for
 833  * drivers that need it by setting the property "ignore-hardware-nodes"
 834  * in their driver.conf file).
 835  *
 836  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
 837  *              as safety valve.
 838  */
 839 int ignore_hardware_nodes = 0;
 840 
 841 /*
 842  * Local data
 843  */
 844 static struct impl_bus_promops *impl_busp;
 845 
 846 
 847 /*
 848  * New DDI interrupt framework
 849  */
 850 
 851 /*
 852  * i_ddi_intr_ops:
 853  *
 854  * This is the interrupt operator function wrapper for the bus function
 855  * bus_intr_op.
 856  */
 857 int
 858 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
 859     ddi_intr_handle_impl_t *hdlp, void * result)
 860 {
 861         dev_info_t      *pdip = (dev_info_t *)DEVI(dip)->devi_parent;
 862         int             ret = DDI_FAILURE;
 863 
 864         /* request parent to process this interrupt op */
 865         if (NEXUS_HAS_INTR_OP(pdip))
 866                 ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
 867                     pdip, rdip, op, hdlp, result);
 868         else
 869                 cmn_err(CE_WARN, "Failed to process interrupt "
 870                     "for %s%d due to down-rev nexus driver %s%d",
 871                     ddi_get_name(rdip), ddi_get_instance(rdip),
 872                     ddi_get_name(pdip), ddi_get_instance(pdip));
 873         return (ret);
 874 }
 875 
 876 /*
 877  * i_ddi_add_softint - allocate and add a soft interrupt to the system
 878  */
 879 int
 880 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
 881 {
 882         int ret;
 883 
 884         /* add soft interrupt handler */
 885         ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
 886             DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
 887         return (ret ? DDI_SUCCESS : DDI_FAILURE);
 888 }
 889 
 890 
 891 void
 892 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
 893 {
 894         (void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
 895 }
 896 
 897 
 898 extern void (*setsoftint)(int, struct av_softinfo *);
 899 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
 900 
 901 int
 902 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
 903 {
 904         if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
 905                 return (DDI_EPENDING);
 906 
 907         update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
 908 
 909         (*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
 910         return (DDI_SUCCESS);
 911 }
 912 
 913 /*
 914  * i_ddi_set_softint_pri:
 915  *
 916  * The way this works is that it first tries to add a softint vector
 917  * at the new priority in hdlp. If that succeeds; then it removes the
 918  * existing softint vector at the old priority.
 919  */
 920 int
 921 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
 922 {
 923         int ret;
 924 
 925         /*
 926          * If a softint is pending at the old priority then fail the request.
 927          */
 928         if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
 929                 return (DDI_FAILURE);
 930 
 931         ret = av_softint_movepri((void *)hdlp, old_pri);
 932         return (ret ? DDI_SUCCESS : DDI_FAILURE);
 933 }
 934 
 935 void
 936 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
 937 {
 938         hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
 939 }
 940 
 941 void
 942 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
 943 {
 944         kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
 945         hdlp->ih_private = NULL;
 946 }
 947 
 948 int
 949 i_ddi_get_intx_nintrs(dev_info_t *dip)
 950 {
 951         struct ddi_parent_private_data *pdp;
 952 
 953         if ((pdp = ddi_get_parent_data(dip)) == NULL)
 954                 return (0);
 955 
 956         return (pdp->par_nintr);
 957 }
 958 
 959 /*
 960  * DDI Memory/DMA
 961  */
 962 
 963 /*
 964  * Support for allocating DMAable memory to implement
 965  * ddi_dma_mem_alloc(9F) interface.
 966  */
 967 
 968 #define KA_ALIGN_SHIFT  7
 969 #define KA_ALIGN        (1 << KA_ALIGN_SHIFT)
 970 #define KA_NCACHE       (PAGESHIFT + 1 - KA_ALIGN_SHIFT)
 971 
 972 /*
 973  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
 974  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
 975  */
 976 
 977 static ddi_dma_attr_t kmem_io_attr = {
 978         DMA_ATTR_V0,
 979         0x0000000000000000ULL,          /* dma_attr_addr_lo */
 980         0x0000000000000000ULL,          /* dma_attr_addr_hi */
 981         0x00ffffff,
 982         0x1000,                         /* dma_attr_align */
 983         1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
 984 };
 985 
 986 /* kmem io memory ranges and indices */
 987 enum {
 988         IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
 989         IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
 990 };
 991 
 992 static struct {
 993         vmem_t          *kmem_io_arena;
 994         kmem_cache_t    *kmem_io_cache[KA_NCACHE];
 995         ddi_dma_attr_t  kmem_io_attr;
 996 } kmem_io[MAX_MEM_RANGES];
 997 
 998 static int kmem_io_idx;         /* index of first populated kmem_io[] */
 999 
1000 static page_t *
1001 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
1002 {
1003         extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1004             uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1005 
1006         return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1007             PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1008 }
1009 
1010 #ifdef __xpv
1011 static void
1012 segkmem_free_io(vmem_t *vmp, void * ptr, size_t size)
1013 {
1014         extern void page_destroy_io(page_t *);
1015         segkmem_xfree(vmp, ptr, size, page_destroy_io);
1016 }
1017 #endif
1018 
1019 static void *
1020 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1021 {
1022         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1023             page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1024 }
1025 
1026 static void *
1027 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1028 {
1029         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1030             page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1031 }
1032 
1033 static void *
1034 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1035 {
1036         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1037             page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1038 }
1039 
1040 static void *
1041 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1042 {
1043         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1044             page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1045 }
1046 
1047 static void *
1048 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1049 {
1050         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1051             page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1052 }
1053 
1054 static void *
1055 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1056 {
1057         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1058             page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1059 }
1060 
1061 static void *
1062 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1063 {
1064         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1065             page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1066 }
1067 
1068 static void *
1069 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1070 {
1071         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1072             page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1073 }
1074 
1075 static void *
1076 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1077 {
1078         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1079             page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1080 }
1081 
1082 static void *
1083 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1084 {
1085         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1086             page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1087 }
1088 
1089 static void *
1090 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1091 {
1092         return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1093             page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1094 }
1095 
1096 struct {
1097         uint64_t        io_limit;
1098         char            *io_name;
1099         void            *(*io_alloc)(vmem_t *, size_t, int);
1100         int             io_initial;     /* kmem_io_init during startup */
1101 } io_arena_params[MAX_MEM_RANGES] = {
1102         {0x000fffffffffffffULL, "kmem_io_4P",   segkmem_alloc_io_4P,    1},
1103         {0x0000000fffffffffULL, "kmem_io_64G",  segkmem_alloc_io_64G,   0},
1104         {0x00000000ffffffffULL, "kmem_io_4G",   segkmem_alloc_io_4G,    1},
1105         {0x000000007fffffffULL, "kmem_io_2G",   segkmem_alloc_io_2G,    1},
1106         {0x000000003fffffffULL, "kmem_io_1G",   segkmem_alloc_io_1G,    0},
1107         {0x000000001fffffffULL, "kmem_io_512M", segkmem_alloc_io_512M,  0},
1108         {0x000000000fffffffULL, "kmem_io_256M", segkmem_alloc_io_256M,  0},
1109         {0x0000000007ffffffULL, "kmem_io_128M", segkmem_alloc_io_128M,  0},
1110         {0x0000000003ffffffULL, "kmem_io_64M",  segkmem_alloc_io_64M,   0},
1111         {0x0000000001ffffffULL, "kmem_io_32M",  segkmem_alloc_io_32M,   0},
1112         {0x0000000000ffffffULL, "kmem_io_16M",  segkmem_alloc_io_16M,   1}
1113 };
1114 
1115 void
1116 kmem_io_init(int a)
1117 {
1118         int     c;
1119         char name[40];
1120 
1121         kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1122             NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1123 #ifdef __xpv
1124             segkmem_free_io,
1125 #else
1126             segkmem_free,
1127 #endif
1128             heap_arena, 0, VM_SLEEP);
1129 
1130         for (c = 0; c < KA_NCACHE; c++) {
1131                 size_t size = KA_ALIGN << c;
1132                 (void) sprintf(name, "%s_%lu",
1133                     io_arena_params[a].io_name, size);
1134                 kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1135                     size, size, NULL, NULL, NULL, NULL,
1136                     kmem_io[a].kmem_io_arena, 0);
1137         }
1138 }
1139 
1140 /*
1141  * Return the index of the highest memory range for addr.
1142  */
1143 static int
1144 kmem_io_index(uint64_t addr)
1145 {
1146         int n;
1147 
1148         for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1149                 if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1150                         if (kmem_io[n].kmem_io_arena == NULL)
1151                                 kmem_io_init(n);
1152                         return (n);
1153                 }
1154         }
1155         panic("kmem_io_index: invalid addr - must be at least 16m");
1156 
1157         /*NOTREACHED*/
1158 }
1159 
1160 /*
1161  * Return the index of the next kmem_io populated memory range
1162  * after curindex.
1163  */
1164 static int
1165 kmem_io_index_next(int curindex)
1166 {
1167         int n;
1168 
1169         for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1170                 if (kmem_io[n].kmem_io_arena)
1171                         return (n);
1172         }
1173         return (-1);
1174 }
1175 
1176 /*
1177  * allow kmem to be mapped in with different PTE cache attribute settings.
1178  * Used by i_ddi_mem_alloc()
1179  */
1180 int
1181 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1182 {
1183         uint_t hat_flags;
1184         caddr_t kva_end;
1185         uint_t hat_attr;
1186         pfn_t pfn;
1187 
1188         if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1189                 return (-1);
1190         }
1191 
1192         hat_attr &= ~HAT_ORDER_MASK;
1193         hat_attr |= order | HAT_NOSYNC;
1194         hat_flags = HAT_LOAD_LOCK;
1195 
1196         kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1197             (uintptr_t)PAGEMASK);
1198         kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1199 
1200         while (kva < kva_end) {
1201                 pfn = hat_getpfnum(kas.a_hat, kva);
1202                 hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1203                 hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1204                 kva += MMU_PAGESIZE;
1205         }
1206 
1207         return (0);
1208 }
1209 
1210 static int
1211 ctgcompare(const void *a1, const void *a2)
1212 {
1213         /* we just want to compare virtual addresses */
1214         a1 = ((struct ctgas *)a1)->ctg_addr;
1215         a2 = ((struct ctgas *)a2)->ctg_addr;
1216         return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1217 }
1218 
1219 void
1220 ka_init(void)
1221 {
1222         int a;
1223         paddr_t maxphysaddr;
1224 #if !defined(__xpv)
1225         extern pfn_t physmax;
1226 
1227         maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1228 #else
1229         maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1230             XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1231 #endif
1232 
1233         ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1234 
1235         for (a = 0; a < MAX_MEM_RANGES; a++) {
1236                 if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1237                         if (maxphysaddr > io_arena_params[a + 1].io_limit)
1238                                 io_arena_params[a].io_limit = maxphysaddr;
1239                         else
1240                                 a++;
1241                         break;
1242                 }
1243         }
1244         kmem_io_idx = a;
1245 
1246         for (; a < MAX_MEM_RANGES; a++) {
1247                 kmem_io[a].kmem_io_attr = kmem_io_attr;
1248                 kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1249                     io_arena_params[a].io_limit;
1250                 /*
1251                  * initialize kmem_io[] arena/cache corresponding to
1252                  * maxphysaddr and to the "common" io memory ranges that
1253                  * have io_initial set to a non-zero value.
1254                  */
1255                 if (io_arena_params[a].io_initial || a == kmem_io_idx)
1256                         kmem_io_init(a);
1257         }
1258 
1259         /* initialize ctgtree */
1260         avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1261             offsetof(struct ctgas, ctg_link));
1262 }
1263 
1264 /*
1265  * put contig address/size
1266  */
1267 static void *
1268 putctgas(void *addr, size_t size)
1269 {
1270         struct ctgas    *ctgp;
1271         if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1272                 ctgp->ctg_addr = addr;
1273                 ctgp->ctg_size = size;
1274                 CTGLOCK();
1275                 avl_add(&ctgtree, ctgp);
1276                 CTGUNLOCK();
1277         }
1278         return (ctgp);
1279 }
1280 
1281 /*
1282  * get contig size by addr
1283  */
1284 static size_t
1285 getctgsz(void *addr)
1286 {
1287         struct ctgas    *ctgp;
1288         struct ctgas    find;
1289         size_t          sz = 0;
1290 
1291         find.ctg_addr = addr;
1292         CTGLOCK();
1293         if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1294                 avl_remove(&ctgtree, ctgp);
1295         }
1296         CTGUNLOCK();
1297 
1298         if (ctgp != NULL) {
1299                 sz = ctgp->ctg_size;
1300                 kmem_free(ctgp, sizeof (*ctgp));
1301         }
1302 
1303         return (sz);
1304 }
1305 
1306 /*
1307  * contig_alloc:
1308  *
1309  *      allocates contiguous memory to satisfy the 'size' and dma attributes
1310  *      specified in 'attr'.
1311  *
1312  *      Not all of memory need to be physically contiguous if the
1313  *      scatter-gather list length is greater than 1.
1314  */
1315 
1316 /*ARGSUSED*/
1317 void *
1318 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1319 {
1320         pgcnt_t         pgcnt = btopr(size);
1321         size_t          asize = pgcnt * PAGESIZE;
1322         page_t          *ppl;
1323         int             pflag;
1324         void            *addr;
1325 
1326         extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1327             uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1328 
1329         /* segkmem_xalloc */
1330 
1331         if (align <= PAGESIZE)
1332                 addr = vmem_alloc(heap_arena, asize,
1333                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1334         else
1335                 addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1336                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1337         if (addr) {
1338                 ASSERT(!((uintptr_t)addr & (align - 1)));
1339 
1340                 if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1341                         vmem_free(heap_arena, addr, asize);
1342                         return (NULL);
1343                 }
1344                 pflag = PG_EXCL;
1345 
1346                 if (cansleep)
1347                         pflag |= PG_WAIT;
1348 
1349                 /* 4k req gets from freelists rather than pfn search */
1350                 if (pgcnt > 1 || align > PAGESIZE)
1351                         pflag |= PG_PHYSCONTIG;
1352 
1353                 ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1354                     asize, pflag, &kas, (caddr_t)addr, attr);
1355 
1356                 if (!ppl) {
1357                         vmem_free(heap_arena, addr, asize);
1358                         page_unresv(pgcnt);
1359                         return (NULL);
1360                 }
1361 
1362                 while (ppl != NULL) {
1363                         page_t  *pp = ppl;
1364                         page_sub(&ppl, pp);
1365                         ASSERT(page_iolock_assert(pp));
1366                         page_io_unlock(pp);
1367                         page_downgrade(pp);
1368                         hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1369                             pp, (PROT_ALL & ~PROT_USER) |
1370                             HAT_NOSYNC, HAT_LOAD_LOCK);
1371                 }
1372         }
1373         return (addr);
1374 }
1375 
1376 void
1377 contig_free(void *addr, size_t size)
1378 {
1379         pgcnt_t pgcnt = btopr(size);
1380         size_t  asize = pgcnt * PAGESIZE;
1381         caddr_t a, ea;
1382         page_t  *pp;
1383 
1384         hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1385 
1386         for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1387                 pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1388                 if (!pp)
1389                         panic("contig_free: contig pp not found");
1390 
1391                 if (!page_tryupgrade(pp)) {
1392                         page_unlock(pp);
1393                         pp = page_lookup(&kvp,
1394                             (u_offset_t)(uintptr_t)a, SE_EXCL);
1395                         if (pp == NULL)
1396                                 panic("contig_free: page freed");
1397                 }
1398                 page_destroy(pp, 0);
1399         }
1400 
1401         page_unresv(pgcnt);
1402         vmem_free(heap_arena, addr, asize);
1403 }
1404 
1405 /*
1406  * Allocate from the system, aligned on a specific boundary.
1407  * The alignment, if non-zero, must be a power of 2.
1408  */
1409 static void *
1410 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1411     ddi_dma_attr_t *attr)
1412 {
1413         size_t *addr, *raddr, rsize;
1414         size_t hdrsize = 4 * sizeof (size_t);   /* must be power of 2 */
1415         int a, i, c;
1416         vmem_t *vmp;
1417         kmem_cache_t *cp = NULL;
1418 
1419         if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1420                 return (NULL);
1421 
1422         align = MAX(align, hdrsize);
1423         ASSERT((align & (align - 1)) == 0);
1424 
1425         /*
1426          * All of our allocators guarantee 16-byte alignment, so we don't
1427          * need to reserve additional space for the header.
1428          * To simplify picking the correct kmem_io_cache, we round up to
1429          * a multiple of KA_ALIGN.
1430          */
1431         rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1432 
1433         if (physcontig && rsize > PAGESIZE) {
1434                 if (addr = contig_alloc(size, attr, align, cansleep)) {
1435                         if (!putctgas(addr, size))
1436                                 contig_free(addr, size);
1437                         else
1438                                 return (addr);
1439                 }
1440                 return (NULL);
1441         }
1442 
1443         a = kmem_io_index(attr->dma_attr_addr_hi);
1444 
1445         if (rsize > PAGESIZE) {
1446                 vmp = kmem_io[a].kmem_io_arena;
1447                 raddr = vmem_alloc(vmp, rsize,
1448                     (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1449         } else {
1450                 c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1451                 cp = kmem_io[a].kmem_io_cache[c];
1452                 raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1453                     KM_NOSLEEP);
1454         }
1455 
1456         if (raddr == NULL) {
1457                 int     na;
1458 
1459                 ASSERT(cansleep == 0);
1460                 if (rsize > PAGESIZE)
1461                         return (NULL);
1462                 /*
1463                  * System does not have memory in the requested range.
1464                  * Try smaller kmem io ranges and larger cache sizes
1465                  * to see if there might be memory available in
1466                  * these other caches.
1467                  */
1468 
1469                 for (na = kmem_io_index_next(a); na >= 0;
1470                     na = kmem_io_index_next(na)) {
1471                         ASSERT(kmem_io[na].kmem_io_arena);
1472                         cp = kmem_io[na].kmem_io_cache[c];
1473                         raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1474                         if (raddr)
1475                                 goto kallocdone;
1476                 }
1477                 /* now try the larger kmem io cache sizes */
1478                 for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1479                         for (i = c + 1; i < KA_NCACHE; i++) {
1480                                 cp = kmem_io[na].kmem_io_cache[i];
1481                                 raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1482                                 if (raddr)
1483                                         goto kallocdone;
1484                         }
1485                 }
1486                 return (NULL);
1487         }
1488 
1489 kallocdone:
1490         ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1491             rsize > PAGESIZE);
1492 
1493         addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1494         ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1495 
1496         addr[-4] = (size_t)cp;
1497         addr[-3] = (size_t)vmp;
1498         addr[-2] = (size_t)raddr;
1499         addr[-1] = rsize;
1500 
1501         return (addr);
1502 }
1503 
1504 static void
1505 kfreea(void *addr)
1506 {
1507         size_t          size;
1508 
1509         if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1510                 contig_free(addr, size);
1511         } else {
1512                 size_t  *saddr = addr;
1513                 if (saddr[-4] == 0)
1514                         vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1515                             saddr[-1]);
1516                 else
1517                         kmem_cache_free((kmem_cache_t *)saddr[-4],
1518                             (void *)saddr[-2]);
1519         }
1520 }
1521 
1522 /*ARGSUSED*/
1523 void
1524 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1525 {
1526 }
1527 
1528 /*
1529  * Check if the specified cache attribute is supported on the platform.
1530  * This function must be called before i_ddi_cacheattr_to_hatacc().
1531  */
1532 boolean_t
1533 i_ddi_check_cache_attr(uint_t flags)
1534 {
1535         /*
1536          * The cache attributes are mutually exclusive. Any combination of
1537          * the attributes leads to a failure.
1538          */
1539         uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1540         if ((cache_attr != 0) && !ISP2(cache_attr))
1541                 return (B_FALSE);
1542 
1543         /* All cache attributes are supported on X86/X64 */
1544         if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1545             IOMEM_DATA_UC_WR_COMBINE))
1546                 return (B_TRUE);
1547 
1548         /* undefined attributes */
1549         return (B_FALSE);
1550 }
1551 
1552 /* set HAT cache attributes from the cache attributes */
1553 void
1554 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1555 {
1556         uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1557         static char *fname = "i_ddi_cacheattr_to_hatacc";
1558 
1559         /*
1560          * If write-combining is not supported, then it falls back
1561          * to uncacheable.
1562          */
1563         if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1564             !is_x86_feature(x86_featureset, X86FSET_PAT))
1565                 cache_attr = IOMEM_DATA_UNCACHED;
1566 
1567         /*
1568          * set HAT attrs according to the cache attrs.
1569          */
1570         switch (cache_attr) {
1571         case IOMEM_DATA_UNCACHED:
1572                 *hataccp &= ~HAT_ORDER_MASK;
1573                 *hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1574                 break;
1575         case IOMEM_DATA_UC_WR_COMBINE:
1576                 *hataccp &= ~HAT_ORDER_MASK;
1577                 *hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1578                 break;
1579         case IOMEM_DATA_CACHED:
1580                 *hataccp &= ~HAT_ORDER_MASK;
1581                 *hataccp |= HAT_UNORDERED_OK;
1582                 break;
1583         /*
1584          * This case must not occur because the cache attribute is scrutinized
1585          * before this function is called.
1586          */
1587         default:
1588                 /*
1589                  * set cacheable to hat attrs.
1590                  */
1591                 *hataccp &= ~HAT_ORDER_MASK;
1592                 *hataccp |= HAT_UNORDERED_OK;
1593                 cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1594                     fname, cache_attr);
1595         }
1596 }
1597 
1598 /*
1599  * This should actually be called i_ddi_dma_mem_alloc. There should
1600  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1601  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1602  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1603  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1604  * so far which is used for both, DMA and PIO, we have to use the DMA
1605  * ctl ops to make everybody happy.
1606  */
1607 /*ARGSUSED*/
1608 int
1609 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1610     size_t length, int cansleep, int flags,
1611     ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1612     size_t *real_length, ddi_acc_hdl_t *ap)
1613 {
1614         caddr_t a;
1615         int iomin;
1616         ddi_acc_impl_t *iap;
1617         int physcontig = 0;
1618         pgcnt_t npages;
1619         pgcnt_t minctg;
1620         uint_t order;
1621         int e;
1622 
1623         /*
1624          * Check legality of arguments
1625          */
1626         if (length == 0 || kaddrp == NULL || attr == NULL) {
1627                 return (DDI_FAILURE);
1628         }
1629 
1630         if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1631             !ISP2(attr->dma_attr_align) || !ISP2(attr->dma_attr_minxfer)) {
1632                 return (DDI_FAILURE);
1633         }
1634 
1635         /*
1636          * figure out most restrictive alignment requirement
1637          */
1638         iomin = attr->dma_attr_minxfer;
1639         iomin = maxbit(iomin, attr->dma_attr_align);
1640         if (iomin == 0)
1641                 return (DDI_FAILURE);
1642 
1643         ASSERT((iomin & (iomin - 1)) == 0);
1644 
1645         /*
1646          * if we allocate memory with IOMEM_DATA_UNCACHED or
1647          * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1648          * memory that ends on a page boundry.
1649          * Don't want to have to different cache mappings to the same
1650          * physical page.
1651          */
1652         if (OVERRIDE_CACHE_ATTR(flags)) {
1653                 iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1654                 length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1655         }
1656 
1657         /*
1658          * Determine if we need to satisfy the request for physically
1659          * contiguous memory or alignments larger than pagesize.
1660          */
1661         npages = btopr(length + attr->dma_attr_align);
1662         minctg = howmany(npages, attr->dma_attr_sgllen);
1663 
1664         if (minctg > 1) {
1665                 uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1666                 /*
1667                  * verify that the minimum contig requirement for the
1668                  * actual length does not cross segment boundary.
1669                  */
1670                 length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1671                     size_t);
1672                 npages = btopr(length);
1673                 minctg = howmany(npages, attr->dma_attr_sgllen);
1674                 if (minctg > pfnseg + 1)
1675                         return (DDI_FAILURE);
1676                 physcontig = 1;
1677         } else {
1678                 length = P2ROUNDUP_TYPED(length, iomin, size_t);
1679         }
1680 
1681         /*
1682          * Allocate the requested amount from the system.
1683          */
1684         a = kalloca(length, iomin, cansleep, physcontig, attr);
1685 
1686         if ((*kaddrp = a) == NULL)
1687                 return (DDI_FAILURE);
1688 
1689         /*
1690          * if we to modify the cache attributes, go back and muck with the
1691          * mappings.
1692          */
1693         if (OVERRIDE_CACHE_ATTR(flags)) {
1694                 order = 0;
1695                 i_ddi_cacheattr_to_hatacc(flags, &order);
1696                 e = kmem_override_cache_attrs(a, length, order);
1697                 if (e != 0) {
1698                         kfreea(a);
1699                         return (DDI_FAILURE);
1700                 }
1701         }
1702 
1703         if (real_length) {
1704                 *real_length = length;
1705         }
1706         if (ap) {
1707                 /*
1708                  * initialize access handle
1709                  */
1710                 iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1711                 iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1712                 impl_acc_hdl_init(ap);
1713         }
1714 
1715         return (DDI_SUCCESS);
1716 }
1717 
1718 /* ARGSUSED */
1719 void
1720 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1721 {
1722         if (ap != NULL) {
1723                 /*
1724                  * if we modified the cache attributes on alloc, go back and
1725                  * fix them since this memory could be returned to the
1726                  * general pool.
1727                  */
1728                 if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1729                         uint_t order = 0;
1730                         int e;
1731                         i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1732                         e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1733                         if (e != 0) {
1734                                 cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1735                                     "override cache attrs, memory leaked\n");
1736                                 return;
1737                         }
1738                 }
1739         }
1740         kfreea(kaddr);
1741 }
1742 
1743 /*
1744  * Access Barriers
1745  *
1746  */
1747 /*ARGSUSED*/
1748 int
1749 i_ddi_ontrap(ddi_acc_handle_t hp)
1750 {
1751         return (DDI_FAILURE);
1752 }
1753 
1754 /*ARGSUSED*/
1755 void
1756 i_ddi_notrap(ddi_acc_handle_t hp)
1757 {
1758 }
1759 
1760 
1761 /*
1762  * Misc Functions
1763  */
1764 
1765 /*
1766  * Implementation instance override functions
1767  *
1768  * No override on i86pc
1769  */
1770 /*ARGSUSED*/
1771 uint_t
1772 impl_assign_instance(dev_info_t *dip)
1773 {
1774         return ((uint_t)-1);
1775 }
1776 
1777 /*ARGSUSED*/
1778 int
1779 impl_keep_instance(dev_info_t *dip)
1780 {
1781 
1782 #if defined(__xpv)
1783         /*
1784          * Do not persist instance numbers assigned to devices in dom0
1785          */
1786         dev_info_t *pdip;
1787         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1788                 if (((pdip = ddi_get_parent(dip)) != NULL) &&
1789                     (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1790                         return (DDI_SUCCESS);
1791         }
1792 #endif
1793         return (DDI_FAILURE);
1794 }
1795 
1796 /*ARGSUSED*/
1797 int
1798 impl_free_instance(dev_info_t *dip)
1799 {
1800         return (DDI_FAILURE);
1801 }
1802 
1803 /*ARGSUSED*/
1804 int
1805 impl_check_cpu(dev_info_t *devi)
1806 {
1807         return (DDI_SUCCESS);
1808 }
1809 
1810 /*
1811  * Referenced in common/cpr_driver.c: Power off machine.
1812  * Don't know how to power off i86pc.
1813  */
1814 void
1815 arch_power_down()
1816 {}
1817 
1818 /*
1819  * Copy name to property_name, since name
1820  * is in the low address range below kernelbase.
1821  */
1822 static void
1823 copy_boot_str(const char *boot_str, char *kern_str, int len)
1824 {
1825         int i = 0;
1826 
1827         while (i < len - 1 && boot_str[i] != '\0') {
1828                 kern_str[i] = boot_str[i];
1829                 i++;
1830         }
1831 
1832         kern_str[i] = 0;        /* null terminate */
1833         if (boot_str[i] != '\0')
1834                 cmn_err(CE_WARN,
1835                     "boot property string is truncated to %s", kern_str);
1836 }
1837 
1838 static void
1839 get_boot_properties(void)
1840 {
1841         extern char hw_provider[];
1842         dev_info_t *devi;
1843         char *name;
1844         int length;
1845         char property_name[50], property_val[50];
1846         void *bop_staging_area;
1847 
1848         bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1849 
1850         /*
1851          * Import "root" properties from the boot.
1852          *
1853          * We do this by invoking BOP_NEXTPROP until the list
1854          * is completely copied in.
1855          */
1856 
1857         devi = ddi_root_node();
1858         for (name = BOP_NEXTPROP(bootops, "");          /* get first */
1859             name;                                       /* NULL => DONE */
1860             name = BOP_NEXTPROP(bootops, name)) {       /* get next */
1861 
1862                 /* copy string to memory above kernelbase */
1863                 copy_boot_str(name, property_name, 50);
1864 
1865                 /*
1866                  * Skip vga properties. They will be picked up later
1867                  * by get_vga_properties.
1868                  */
1869                 if (strcmp(property_name, "display-edif-block") == 0 ||
1870                     strcmp(property_name, "display-edif-id") == 0) {
1871                         continue;
1872                 }
1873 
1874                 length = BOP_GETPROPLEN(bootops, property_name);
1875                 if (length == 0)
1876                         continue;
1877                 if (length > MMU_PAGESIZE) {
1878                         cmn_err(CE_NOTE,
1879                             "boot property %s longer than 0x%x, ignored\n",
1880                             property_name, MMU_PAGESIZE);
1881                         continue;
1882                 }
1883                 BOP_GETPROP(bootops, property_name, bop_staging_area);
1884 
1885                 /*
1886                  * special properties:
1887                  * si-machine, si-hw-provider
1888                  *      goes to kernel data structures.
1889                  * bios-boot-device and stdout
1890                  *      goes to hardware property list so it may show up
1891                  *      in the prtconf -vp output. This is needed by
1892                  *      Install/Upgrade. Once we fix install upgrade,
1893                  *      this can be taken out.
1894                  */
1895                 if (strcmp(name, "si-machine") == 0) {
1896                         (void) strncpy(utsname.machine, bop_staging_area,
1897                             SYS_NMLN);
1898                         utsname.machine[SYS_NMLN - 1] = (char)NULL;
1899                 } else if (strcmp(name, "si-hw-provider") == 0) {
1900                         (void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1901                         hw_provider[SYS_NMLN - 1] = (char)NULL;
1902                 } else if (strcmp(name, "bios-boot-device") == 0) {
1903                         copy_boot_str(bop_staging_area, property_val, 50);
1904                         (void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1905                             property_name, property_val);
1906                 } else if (strcmp(name, "acpi-root-tab") == 0) {
1907                         (void) ndi_prop_update_int64(DDI_DEV_T_NONE, devi,
1908                             property_name, *((int64_t *)bop_staging_area));
1909                 } else if (strcmp(name, "smbios-address") == 0) {
1910                         (void) ndi_prop_update_int64(DDI_DEV_T_NONE, devi,
1911                             property_name, *((int64_t *)bop_staging_area));
1912                 } else if (strcmp(name, "efi-systab") == 0) {
1913                         (void) ndi_prop_update_int64(DDI_DEV_T_NONE, devi,
1914                             property_name, *((int64_t *)bop_staging_area));
1915                 } else if (strcmp(name, "efi-systype") == 0) {
1916                         copy_boot_str(bop_staging_area, property_val, 50);
1917                         (void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1918                             property_name, property_val);
1919                 } else if (strcmp(name, "stdout") == 0) {
1920                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1921                             property_name, *((int *)bop_staging_area));
1922                 } else if (strcmp(name, "boot-args") == 0) {
1923                         copy_boot_str(bop_staging_area, property_val, 50);
1924                         (void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1925                             property_name, property_val);
1926                 } else if (strcmp(name, "bootargs") == 0) {
1927                         copy_boot_str(bop_staging_area, property_val, 50);
1928                         (void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1929                             property_name, property_val);
1930                 } else if (strcmp(name, "bootp-response") == 0) {
1931                         (void) e_ddi_prop_update_byte_array(DDI_DEV_T_NONE,
1932                             devi, property_name, bop_staging_area, length);
1933                 } else if (strcmp(name, "ramdisk_start") == 0) {
1934                         (void) e_ddi_prop_update_int64(DDI_DEV_T_NONE, devi,
1935                             property_name, *((int64_t *)bop_staging_area));
1936                 } else if (strcmp(name, "ramdisk_end") == 0) {
1937                         (void) e_ddi_prop_update_int64(DDI_DEV_T_NONE, devi,
1938                             property_name, *((int64_t *)bop_staging_area));
1939                 } else if (strncmp(name, "module-addr-", 12) == 0) {
1940                         (void) e_ddi_prop_update_int64(DDI_DEV_T_NONE, devi,
1941                             property_name, *((int64_t *)bop_staging_area));
1942                 } else if (strncmp(name, "module-size-", 12) == 0) {
1943                         (void) e_ddi_prop_update_int64(DDI_DEV_T_NONE, devi,
1944                             property_name, *((int64_t *)bop_staging_area));
1945                 } else {
1946                         /* Property type unknown, use old prop interface */
1947                         (void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1948                             DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1949                             length);
1950                 }
1951         }
1952 
1953         kmem_free(bop_staging_area, MMU_PAGESIZE);
1954 }
1955 
1956 static void
1957 get_vga_properties(void)
1958 {
1959         dev_info_t *devi;
1960         major_t major;
1961         char *name;
1962         int length;
1963         char property_val[50];
1964         void *bop_staging_area;
1965 
1966         /*
1967          * XXXX Hack Allert!
1968          * There really needs to be a better way for identifying various
1969          * console framebuffers and their related issues.  Till then,
1970          * check for this one as a replacement to vgatext.
1971          */
1972         major = ddi_name_to_major("ragexl");
1973         if (major == (major_t)-1) {
1974                 major = ddi_name_to_major("vgatext");
1975                 if (major == (major_t)-1)
1976                         return;
1977         }
1978         devi = devnamesp[major].dn_head;
1979         if (devi == NULL)
1980                 return;
1981 
1982         bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1983 
1984         /*
1985          * Import "vga" properties from the boot.
1986          */
1987         name = "display-edif-block";
1988         length = BOP_GETPROPLEN(bootops, name);
1989         if (length > 0 && length < MMU_PAGESIZE) {
1990                 BOP_GETPROP(bootops, name, bop_staging_area);
1991                 (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
1992                     devi, name, bop_staging_area, length);
1993         }
1994 
1995         /*
1996          * kdmconfig is also looking for display-type and
1997          * video-adapter-type. We default to color and svga.
1998          *
1999          * Could it be "monochrome", "vga"?
2000          * Nah, you've got to come to the 21st century...
2001          * And you can set monitor type manually in kdmconfig
2002          * if you are really an old junky.
2003          */
2004         (void) ndi_prop_update_string(DDI_DEV_T_NONE,
2005             devi, "display-type", "color");
2006         (void) ndi_prop_update_string(DDI_DEV_T_NONE,
2007             devi, "video-adapter-type", "svga");
2008 
2009         name = "display-edif-id";
2010         length = BOP_GETPROPLEN(bootops, name);
2011         if (length > 0 && length < MMU_PAGESIZE) {
2012                 BOP_GETPROP(bootops, name, bop_staging_area);
2013                 copy_boot_str(bop_staging_area, property_val, length);
2014                 (void) ndi_prop_update_string(DDI_DEV_T_NONE,
2015                     devi, name, property_val);
2016         }
2017 
2018         kmem_free(bop_staging_area, MMU_PAGESIZE);
2019 }
2020 
2021 
2022 /*
2023  * This is temporary, but absolutely necessary.  If we are being
2024  * booted with a device tree created by the DevConf project's bootconf
2025  * program, then we have device information nodes that reflect
2026  * reality.  At this point in time in the Solaris release schedule, the
2027  * kernel drivers aren't prepared for reality.  They still depend on their
2028  * own ad-hoc interpretations of the properties created when their .conf
2029  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2030  * property to prevent them from using the nodes passed up from the bootconf
2031  * device tree.
2032  *
2033  * Trying to assemble root file system drivers as we are booting from
2034  * devconf will fail if the kernel driver is basing its name_addr's on the
2035  * psuedo-node device info while the bootpath passed up from bootconf is using
2036  * reality-based name_addrs.  We help the boot along in this case by
2037  * looking at the pre-bootconf bootpath and determining if we would have
2038  * successfully matched if that had been the bootpath we had chosen.
2039  *
2040  * Note that we only even perform this extra check if we've booted
2041  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2042  * we're trying to match the name_addr specified in the 1275 bootpath.
2043  */
2044 
2045 #define MAXCOMPONENTLEN 32
2046 
2047 int
2048 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2049 {
2050         /*
2051          *  There are multiple criteria to be met before we can even
2052          *  consider allowing a name_addr match here.
2053          *
2054          *  1) We must have been booted such that the bootconf program
2055          *      created device tree nodes and properties.  This can be
2056          *      determined by examining the 'bootpath' property.  This
2057          *      property will be a non-null string iff bootconf was
2058          *      involved in the boot.
2059          *
2060          *  2) The module that we want to match must be the boot device.
2061          *
2062          *  3) The instance of the module we are thinking of letting be
2063          *      our match must be ignoring hardware nodes.
2064          *
2065          *  4) The name_addr we want to match must be the name_addr
2066          *      specified in the 1275 bootpath.
2067          */
2068         static char bootdev_module[MAXCOMPONENTLEN];
2069         static char bootdev_oldmod[MAXCOMPONENTLEN];
2070         static char bootdev_newaddr[MAXCOMPONENTLEN];
2071         static char bootdev_oldaddr[MAXCOMPONENTLEN];
2072         static int  quickexit;
2073 
2074         char *daddr;
2075         int dlen;
2076 
2077         char    *lkupname;
2078         int     rv = DDI_FAILURE;
2079 
2080         if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2081             "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2082             (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2083             "ignore-hardware-nodes", -1) != -1)) {
2084                 if (strcmp(daddr, caddr) == 0) {
2085                         return (DDI_SUCCESS);
2086                 }
2087         }
2088 
2089         if (quickexit)
2090                 return (rv);
2091 
2092         if (bootdev_module[0] == '\0') {
2093                 char *addrp, *eoaddrp;
2094                 char *busp, *modp, *atp;
2095                 char *bp1275, *bp;
2096                 int  bp1275len, bplen;
2097 
2098                 bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2099 
2100                 if (ddi_getlongprop(DDI_DEV_T_ANY,
2101                     ddi_root_node(), 0, "bootpath",
2102                     (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2103                     bp1275len <= 1) {
2104                         /*
2105                          * We didn't boot from bootconf so we never need to
2106                          * do any special matches.
2107                          */
2108                         quickexit = 1;
2109                         if (bp1275)
2110                                 kmem_free(bp1275, bp1275len);
2111                         return (rv);
2112                 }
2113 
2114                 if (ddi_getlongprop(DDI_DEV_T_ANY,
2115                     ddi_root_node(), 0, "boot-path",
2116                     (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2117                         /*
2118                          * No fallback position for matching. This is
2119                          * certainly unexpected, but we'll handle it
2120                          * just in case.
2121                          */
2122                         quickexit = 1;
2123                         kmem_free(bp1275, bp1275len);
2124                         if (bp)
2125                                 kmem_free(bp, bplen);
2126                         return (rv);
2127                 }
2128 
2129                 /*
2130                  *  Determine boot device module and 1275 name_addr
2131                  *
2132                  *  bootpath assumed to be of the form /bus/module@name_addr
2133                  */
2134                 if (busp = strchr(bp1275, '/')) {
2135                         if (modp = strchr(busp + 1, '/')) {
2136                                 if (atp = strchr(modp + 1, '@')) {
2137                                         *atp = '\0';
2138                                         addrp = atp + 1;
2139                                         if (eoaddrp = strchr(addrp, '/'))
2140                                                 *eoaddrp = '\0';
2141                                 }
2142                         }
2143                 }
2144 
2145                 if (modp && addrp) {
2146                         (void) strncpy(bootdev_module, modp + 1,
2147                             MAXCOMPONENTLEN);
2148                         bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2149 
2150                         (void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2151                         bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2152                 } else {
2153                         quickexit = 1;
2154                         kmem_free(bp1275, bp1275len);
2155                         kmem_free(bp, bplen);
2156                         return (rv);
2157                 }
2158 
2159                 /*
2160                  *  Determine fallback name_addr
2161                  *
2162                  *  10/3/96 - Also save fallback module name because it
2163                  *  might actually be different than the current module
2164                  *  name.  E.G., ISA pnp drivers have new names.
2165                  *
2166                  *  bootpath assumed to be of the form /bus/module@name_addr
2167                  */
2168                 addrp = NULL;
2169                 if (busp = strchr(bp, '/')) {
2170                         if (modp = strchr(busp + 1, '/')) {
2171                                 if (atp = strchr(modp + 1, '@')) {
2172                                         *atp = '\0';
2173                                         addrp = atp + 1;
2174                                         if (eoaddrp = strchr(addrp, '/'))
2175                                                 *eoaddrp = '\0';
2176                                 }
2177                         }
2178                 }
2179 
2180                 if (modp && addrp) {
2181                         (void) strncpy(bootdev_oldmod, modp + 1,
2182                             MAXCOMPONENTLEN);
2183                         bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2184 
2185                         (void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2186                         bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2187                 }
2188 
2189                 /* Free up the bootpath storage now that we're done with it. */
2190                 kmem_free(bp1275, bp1275len);
2191                 kmem_free(bp, bplen);
2192 
2193                 if (bootdev_oldaddr[0] == '\0') {
2194                         quickexit = 1;
2195                         return (rv);
2196                 }
2197         }
2198 
2199         if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2200             (strcmp(bootdev_module, lkupname) == 0 ||
2201             strcmp(bootdev_oldmod, lkupname) == 0) &&
2202             ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2203             "ignore-hardware-nodes", -1) != -1) ||
2204             ignore_hardware_nodes) &&
2205             strcmp(bootdev_newaddr, caddr) == 0 &&
2206             strcmp(bootdev_oldaddr, naddr) == 0) {
2207                 rv = DDI_SUCCESS;
2208         }
2209 
2210         return (rv);
2211 }
2212 
2213 /*
2214  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2215  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2216  */
2217 /*ARGSUSED*/
2218 int
2219 e_ddi_copyfromdev(dev_info_t *devi,
2220     off_t off, const void *devaddr, void *kaddr, size_t len)
2221 {
2222         bcopy(devaddr, kaddr, len);
2223         return (0);
2224 }
2225 
2226 /*
2227  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2228  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2229  */
2230 /*ARGSUSED*/
2231 int
2232 e_ddi_copytodev(dev_info_t *devi,
2233     off_t off, const void *kaddr, void *devaddr, size_t len)
2234 {
2235         bcopy(kaddr, devaddr, len);
2236         return (0);
2237 }
2238 
2239 
2240 static int
2241 poke_mem(peekpoke_ctlops_t *in_args)
2242 {
2243         int err = DDI_SUCCESS;
2244         on_trap_data_t otd;
2245 
2246         /* Set up protected environment. */
2247         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2248                 switch (in_args->size) {
2249                 case sizeof (uint8_t):
2250                         *(uint8_t *)(in_args->dev_addr) =
2251                             *(uint8_t *)in_args->host_addr;
2252                         break;
2253 
2254                 case sizeof (uint16_t):
2255                         *(uint16_t *)(in_args->dev_addr) =
2256                             *(uint16_t *)in_args->host_addr;
2257                         break;
2258 
2259                 case sizeof (uint32_t):
2260                         *(uint32_t *)(in_args->dev_addr) =
2261                             *(uint32_t *)in_args->host_addr;
2262                         break;
2263 
2264                 case sizeof (uint64_t):
2265                         *(uint64_t *)(in_args->dev_addr) =
2266                             *(uint64_t *)in_args->host_addr;
2267                         break;
2268 
2269                 default:
2270                         err = DDI_FAILURE;
2271                         break;
2272                 }
2273         } else
2274                 err = DDI_FAILURE;
2275 
2276         /* Take down protected environment. */
2277         no_trap();
2278 
2279         return (err);
2280 }
2281 
2282 
2283 static int
2284 peek_mem(peekpoke_ctlops_t *in_args)
2285 {
2286         int err = DDI_SUCCESS;
2287         on_trap_data_t otd;
2288 
2289         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2290                 switch (in_args->size) {
2291                 case sizeof (uint8_t):
2292                         *(uint8_t *)in_args->host_addr =
2293                             *(uint8_t *)in_args->dev_addr;
2294                         break;
2295 
2296                 case sizeof (uint16_t):
2297                         *(uint16_t *)in_args->host_addr =
2298                             *(uint16_t *)in_args->dev_addr;
2299                         break;
2300 
2301                 case sizeof (uint32_t):
2302                         *(uint32_t *)in_args->host_addr =
2303                             *(uint32_t *)in_args->dev_addr;
2304                         break;
2305 
2306                 case sizeof (uint64_t):
2307                         *(uint64_t *)in_args->host_addr =
2308                             *(uint64_t *)in_args->dev_addr;
2309                         break;
2310 
2311                 default:
2312                         err = DDI_FAILURE;
2313                         break;
2314                 }
2315         } else
2316                 err = DDI_FAILURE;
2317 
2318         no_trap();
2319         return (err);
2320 }
2321 
2322 
2323 /*
2324  * This is called only to process peek/poke when the DIP is NULL.
2325  * Assume that this is for memory, as nexi take care of device safe accesses.
2326  */
2327 int
2328 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2329 {
2330         return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2331 }
2332 
2333 /*
2334  * we've just done a cautious put/get. Check if it was successful by
2335  * calling pci_ereport_post() on all puts and for any gets that return -1
2336  */
2337 static int
2338 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2339     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2340 {
2341         int     rval = DDI_SUCCESS;
2342         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2343         ddi_fm_error_t de;
2344         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2345         ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2346         int check_err = 0;
2347         int repcount = in_args->repcount;
2348 
2349         if (ctlop == DDI_CTLOPS_POKE &&
2350             hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2351                 return (DDI_SUCCESS);
2352 
2353         if (ctlop == DDI_CTLOPS_PEEK &&
2354             hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2355                 for (; repcount; repcount--) {
2356                         switch (in_args->size) {
2357                         case sizeof (uint8_t):
2358                                 if (*(uint8_t *)in_args->host_addr == 0xff)
2359                                         check_err = 1;
2360                                 break;
2361                         case sizeof (uint16_t):
2362                                 if (*(uint16_t *)in_args->host_addr == 0xffff)
2363                                         check_err = 1;
2364                                 break;
2365                         case sizeof (uint32_t):
2366                                 if (*(uint32_t *)in_args->host_addr ==
2367                                     0xffffffff)
2368                                         check_err = 1;
2369                                 break;
2370                         case sizeof (uint64_t):
2371                                 if (*(uint64_t *)in_args->host_addr ==
2372                                     0xffffffffffffffff)
2373                                         check_err = 1;
2374                                 break;
2375                         }
2376                 }
2377                 if (check_err == 0)
2378                         return (DDI_SUCCESS);
2379         }
2380         /*
2381          * for a cautious put or get or a non-cautious get that returned -1 call
2382          * io framework to see if there really was an error
2383          */
2384         bzero(&de, sizeof (ddi_fm_error_t));
2385         de.fme_version = DDI_FME_VERSION;
2386         de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2387         if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2388                 de.fme_flag = DDI_FM_ERR_EXPECTED;
2389                 de.fme_acc_handle = in_args->handle;
2390         } else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2391                 /*
2392                  * We only get here with DDI_DEFAULT_ACC for config space gets.
2393                  * Non-hardened drivers may be probing the hardware and
2394                  * expecting -1 returned. So need to treat errors on
2395                  * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2396                  */
2397                 de.fme_flag = DDI_FM_ERR_EXPECTED;
2398                 de.fme_acc_handle = in_args->handle;
2399         } else {
2400                 /*
2401                  * Hardened driver doing protected accesses shouldn't
2402                  * get errors unless there's a hardware problem. Treat
2403                  * as nonfatal if there's an error, but set UNEXPECTED
2404                  * so we raise ereports on any errors and potentially
2405                  * fault the device
2406                  */
2407                 de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2408         }
2409         (void) scan(dip, &de);
2410         if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2411             de.fme_status != DDI_FM_OK) {
2412                 ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2413                 rval = DDI_FAILURE;
2414                 errp->err_ena = de.fme_ena;
2415                 errp->err_expected = de.fme_flag;
2416                 errp->err_status = DDI_FM_NONFATAL;
2417         }
2418         return (rval);
2419 }
2420 
2421 /*
2422  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2423  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2424  * recurse, so assume all puts are OK and gets have failed if they return -1
2425  */
2426 static int
2427 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2428 {
2429         int rval = DDI_SUCCESS;
2430         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2431         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2432         ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2433         int repcount = in_args->repcount;
2434 
2435         if (ctlop == DDI_CTLOPS_POKE)
2436                 return (rval);
2437 
2438         for (; repcount; repcount--) {
2439                 switch (in_args->size) {
2440                 case sizeof (uint8_t):
2441                         if (*(uint8_t *)in_args->host_addr == 0xff)
2442                                 rval = DDI_FAILURE;
2443                         break;
2444                 case sizeof (uint16_t):
2445                         if (*(uint16_t *)in_args->host_addr == 0xffff)
2446                                 rval = DDI_FAILURE;
2447                         break;
2448                 case sizeof (uint32_t):
2449                         if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2450                                 rval = DDI_FAILURE;
2451                         break;
2452                 case sizeof (uint64_t):
2453                         if (*(uint64_t *)in_args->host_addr ==
2454                             0xffffffffffffffff)
2455                                 rval = DDI_FAILURE;
2456                         break;
2457                 }
2458         }
2459         if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2460             rval == DDI_FAILURE) {
2461                 ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2462                 errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2463                 errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2464                 errp->err_status = DDI_FM_NONFATAL;
2465         }
2466         return (rval);
2467 }
2468 
2469 int
2470 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2471     ddi_ctl_enum_t ctlop, void *arg, void *result,
2472     int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2473     void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2474     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2475 {
2476         int rval;
2477         peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2478         ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2479 
2480         /*
2481          * this function only supports cautious accesses, not peeks/pokes
2482          * which don't have a handle
2483          */
2484         if (hp == NULL)
2485                 return (DDI_FAILURE);
2486 
2487         if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2488                 if (!mutex_tryenter(err_mutexp)) {
2489                         /*
2490                          * As this may be a recursive call from within
2491                          * pci_ereport_post() we can't wait for the mutexes.
2492                          * Fortunately we know someone is already calling
2493                          * pci_ereport_post() which will handle the error bits
2494                          * for us, and as this is a config space access we can
2495                          * just do the access and check return value for -1
2496                          * using pci_peekpoke_check_nofma().
2497                          */
2498                         rval = handler(dip, rdip, ctlop, arg, result);
2499                         if (rval == DDI_SUCCESS)
2500                                 rval = pci_peekpoke_check_nofma(arg, ctlop);
2501                         return (rval);
2502                 }
2503                 /*
2504                  * This can't be a recursive call. Drop the err_mutex and get
2505                  * both mutexes in the right order. If an error hasn't already
2506                  * been detected by the ontrap code, use pci_peekpoke_check_fma
2507                  * which will call pci_ereport_post() to check error status.
2508                  */
2509                 mutex_exit(err_mutexp);
2510         }
2511         mutex_enter(peek_poke_mutexp);
2512         rval = handler(dip, rdip, ctlop, arg, result);
2513         if (rval == DDI_SUCCESS) {
2514                 mutex_enter(err_mutexp);
2515                 rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2516                 mutex_exit(err_mutexp);
2517         }
2518         mutex_exit(peek_poke_mutexp);
2519         return (rval);
2520 }
2521 
2522 void
2523 impl_setup_ddi(void)
2524 {
2525 #if !defined(__xpv)
2526         extern void startup_bios_disk(void);
2527         extern int post_fastreboot;
2528 #endif
2529         dev_info_t *xdip, *isa_dip;
2530         rd_existing_t rd_mem_prop;
2531         int err;
2532 
2533         ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2534             (pnode_t)DEVI_SID_NODEID, &xdip);
2535 
2536         (void) BOP_GETPROP(bootops,
2537             "ramdisk_start", (void *)&ramdisk_start);
2538         (void) BOP_GETPROP(bootops,
2539             "ramdisk_end", (void *)&ramdisk_end);
2540 
2541 #ifdef __xpv
2542         ramdisk_start -= ONE_GIG;
2543         ramdisk_end -= ONE_GIG;
2544 #endif
2545         rd_mem_prop.phys = ramdisk_start;
2546         rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2547 
2548         (void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2549             RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2550             sizeof (rd_mem_prop));
2551         err = ndi_devi_bind_driver(xdip, 0);
2552         ASSERT(err == 0);
2553 
2554         /* isa node */
2555         if (pseudo_isa) {
2556                 ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2557                     (pnode_t)DEVI_SID_NODEID, &isa_dip);
2558                 (void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2559                     "device_type", "isa");
2560                 (void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2561                     "bus-type", "isa");
2562                 (void) ndi_devi_bind_driver(isa_dip, 0);
2563         }
2564 
2565         /*
2566          * Read in the properties from the boot.
2567          */
2568         get_boot_properties();
2569 
2570         /* not framebuffer should be enumerated, if present */
2571         get_vga_properties();
2572 
2573         /*
2574          * Check for administratively disabled drivers.
2575          */
2576         check_driver_disable();
2577 
2578 #if !defined(__xpv)
2579         if (!post_fastreboot && BOP_GETPROPLEN(bootops, "efi-systab") < 0)
2580                 startup_bios_disk();
2581 #endif
2582         /* do bus dependent probes. */
2583         impl_bus_initialprobe();
2584 }
2585 
2586 dev_t
2587 getrootdev(void)
2588 {
2589         /*
2590          * Usually rootfs.bo_name is initialized by the
2591          * the bootpath property from bootenv.rc, but
2592          * defaults to "/ramdisk:a" otherwise.
2593          */
2594         return (ddi_pathname_to_dev_t(rootfs.bo_name));
2595 }
2596 
2597 static struct bus_probe {
2598         struct bus_probe *next;
2599         void (*probe)(int);
2600 } *bus_probes;
2601 
2602 void
2603 impl_bus_add_probe(void (*func)(int))
2604 {
2605         struct bus_probe *probe;
2606         struct bus_probe *lastprobe = NULL;
2607 
2608         probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2609         probe->probe = func;
2610         probe->next = NULL;
2611 
2612         if (!bus_probes) {
2613                 bus_probes = probe;
2614                 return;
2615         }
2616 
2617         lastprobe = bus_probes;
2618         while (lastprobe->next)
2619                 lastprobe = lastprobe->next;
2620         lastprobe->next = probe;
2621 }
2622 
2623 /*ARGSUSED*/
2624 void
2625 impl_bus_delete_probe(void (*func)(int))
2626 {
2627         struct bus_probe *prev = NULL;
2628         struct bus_probe *probe = bus_probes;
2629 
2630         while (probe) {
2631                 if (probe->probe == func)
2632                         break;
2633                 prev = probe;
2634                 probe = probe->next;
2635         }
2636 
2637         if (probe == NULL)
2638                 return;
2639 
2640         if (prev)
2641                 prev->next = probe->next;
2642         else
2643                 bus_probes = probe->next;
2644 
2645         kmem_free(probe, sizeof (struct bus_probe));
2646 }
2647 
2648 /*
2649  * impl_bus_initialprobe
2650  *      Modload the prom simulator, then let it probe to verify existence
2651  *      and type of PCI support.
2652  */
2653 static void
2654 impl_bus_initialprobe(void)
2655 {
2656         struct bus_probe *probe;
2657 
2658         /* load modules to install bus probes */
2659 #if defined(__xpv)
2660         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2661                 if (modload("misc", "pci_autoconfig") < 0) {
2662                         panic("failed to load misc/pci_autoconfig");
2663                 }
2664 
2665                 if (modload("drv", "isa") < 0)
2666                         panic("failed to load drv/isa");
2667         }
2668 
2669         (void) modload("misc", "xpv_autoconfig");
2670 #else
2671         if (modload("misc", "pci_autoconfig") < 0) {
2672                 panic("failed to load misc/pci_autoconfig");
2673         }
2674 
2675         (void) modload("misc", "acpidev");
2676 
2677         if (modload("drv", "isa") < 0)
2678                 panic("failed to load drv/isa");
2679 #endif
2680 
2681         probe = bus_probes;
2682         while (probe) {
2683                 /* run the probe functions */
2684                 (*probe->probe)(0);
2685                 probe = probe->next;
2686         }
2687 }
2688 
2689 /*
2690  * impl_bus_reprobe
2691  *      Reprogram devices not set up by firmware.
2692  */
2693 static void
2694 impl_bus_reprobe(void)
2695 {
2696         struct bus_probe *probe;
2697 
2698         probe = bus_probes;
2699         while (probe) {
2700                 /* run the probe function */
2701                 (*probe->probe)(1);
2702                 probe = probe->next;
2703         }
2704 }
2705 
2706 
2707 /*
2708  * The following functions ready a cautious request to go up to the nexus
2709  * driver.  It is up to the nexus driver to decide how to process the request.
2710  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2711  * differently.
2712  */
2713 
2714 static void
2715 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2716     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2717     ddi_ctl_enum_t cmd)
2718 {
2719         peekpoke_ctlops_t       cautacc_ctlops_arg;
2720 
2721         cautacc_ctlops_arg.size = size;
2722         cautacc_ctlops_arg.dev_addr = dev_addr;
2723         cautacc_ctlops_arg.host_addr = host_addr;
2724         cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2725         cautacc_ctlops_arg.repcount = repcount;
2726         cautacc_ctlops_arg.flags = flags;
2727 
2728         (void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2729             &cautacc_ctlops_arg, NULL);
2730 }
2731 
2732 uint8_t
2733 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2734 {
2735         uint8_t value;
2736         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2737             sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2738 
2739         return (value);
2740 }
2741 
2742 uint16_t
2743 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2744 {
2745         uint16_t value;
2746         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2747             sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2748 
2749         return (value);
2750 }
2751 
2752 uint32_t
2753 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2754 {
2755         uint32_t value;
2756         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2757             sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2758 
2759         return (value);
2760 }
2761 
2762 uint64_t
2763 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2764 {
2765         uint64_t value;
2766         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2767             sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2768 
2769         return (value);
2770 }
2771 
2772 void
2773 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2774 {
2775         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2776             sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2777 }
2778 
2779 void
2780 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2781 {
2782         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2783             sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2784 }
2785 
2786 void
2787 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2788 {
2789         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2790             sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2791 }
2792 
2793 void
2794 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2795 {
2796         i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2797             sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2798 }
2799 
2800 void
2801 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2802     size_t repcount, uint_t flags)
2803 {
2804         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2805             sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2806 }
2807 
2808 void
2809 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2810     uint16_t *dev_addr, size_t repcount, uint_t flags)
2811 {
2812         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2813             sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2814 }
2815 
2816 void
2817 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2818     uint32_t *dev_addr, size_t repcount, uint_t flags)
2819 {
2820         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2821             sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2822 }
2823 
2824 void
2825 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2826     uint64_t *dev_addr, size_t repcount, uint_t flags)
2827 {
2828         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2829             sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2830 }
2831 
2832 void
2833 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2834     size_t repcount, uint_t flags)
2835 {
2836         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2837             sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2838 }
2839 
2840 void
2841 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2842     uint16_t *dev_addr, size_t repcount, uint_t flags)
2843 {
2844         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2845             sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2846 }
2847 
2848 void
2849 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2850     uint32_t *dev_addr, size_t repcount, uint_t flags)
2851 {
2852         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2853             sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2854 }
2855 
2856 void
2857 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2858     uint64_t *dev_addr, size_t repcount, uint_t flags)
2859 {
2860         i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2861             sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2862 }
2863 
2864 boolean_t
2865 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2866 {
2867         uint64_t hi_pa;
2868 
2869         hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2870         if (attrp->dma_attr_addr_hi < hi_pa) {
2871                 return (B_TRUE);
2872         }
2873 
2874         return (B_FALSE);
2875 }
2876 
2877 size_t
2878 i_ddi_copybuf_size()
2879 {
2880         return (dma_max_copybuf_size);
2881 }
2882 
2883 /*
2884  * i_ddi_dma_max()
2885  *    returns the maximum DMA size which can be performed in a single DMA
2886  *    window taking into account the devices DMA contraints (attrp), the
2887  *    maximum copy buffer size (if applicable), and the worse case buffer
2888  *    fragmentation.
2889  */
2890 /*ARGSUSED*/
2891 uint32_t
2892 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2893 {
2894         uint64_t maxxfer;
2895 
2896 
2897         /*
2898          * take the min of maxxfer and the the worse case fragementation
2899          * (e.g. every cookie <= 1 page)
2900          */
2901         maxxfer = MIN(attrp->dma_attr_maxxfer,
2902             ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2903 
2904         /*
2905          * If the DMA engine can't reach all off memory, we also need to take
2906          * the max size of the copybuf into consideration.
2907          */
2908         if (i_ddi_copybuf_required(attrp)) {
2909                 maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2910         }
2911 
2912         /*
2913          * we only return a 32-bit value. Make sure it's not -1. Round to a
2914          * page so it won't be mistaken for an error value during debug.
2915          */
2916         if (maxxfer >= 0xFFFFFFFF) {
2917                 maxxfer = 0xFFFFF000;
2918         }
2919 
2920         /*
2921          * make sure the value we return is a whole multiple of the
2922          * granlarity.
2923          */
2924         if (attrp->dma_attr_granular > 1) {
2925                 maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2926         }
2927 
2928         return ((uint32_t)maxxfer);
2929 }
2930 
2931 /*ARGSUSED*/
2932 void
2933 translate_devid(dev_info_t *dip)
2934 {
2935 }
2936 
2937 pfn_t
2938 i_ddi_paddr_to_pfn(paddr_t paddr)
2939 {
2940         pfn_t pfn;
2941 
2942 #ifdef __xpv
2943         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2944                 pfn = xen_assign_pfn(mmu_btop(paddr));
2945         } else {
2946                 pfn = mmu_btop(paddr);
2947         }
2948 #else
2949         pfn = mmu_btop(paddr);
2950 #endif
2951 
2952         return (pfn);
2953 }