1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * Copyright 2013 Joyent, Inc.  All rights reserved.
  27  */
  28 
  29 
  30 #include <sys/types.h>
  31 #include <sys/machparam.h>
  32 #include <sys/x86_archext.h>
  33 #include <sys/systm.h>
  34 #include <sys/mach_mmu.h>
  35 #include <sys/multiboot.h>
  36 #include <sys/multiboot2.h>
  37 #include <sys/multiboot2_impl.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/framebuffer.h>
  40 #include <sys/sha1.h>
  41 #include <util/string.h>
  42 #include <util/strtolctype.h>
  43 #include <sys/efi.h>
  44 
  45 #if defined(__xpv)
  46 
  47 #include <sys/hypervisor.h>
  48 uintptr_t xen_virt_start;
  49 pfn_t *mfn_to_pfn_mapping;
  50 
  51 #else /* !__xpv */
  52 
  53 extern multiboot_header_t mb_header;
  54 extern uint32_t mb2_load_addr;
  55 extern int have_cpuid(void);
  56 
  57 #endif /* !__xpv */
  58 
  59 #include <sys/inttypes.h>
  60 #include <sys/bootinfo.h>
  61 #include <sys/mach_mmu.h>
  62 #include <sys/boot_console.h>
  63 
  64 #include "dboot_asm.h"
  65 #include "dboot_printf.h"
  66 #include "dboot_xboot.h"
  67 #include "dboot_elfload.h"
  68 
  69 #define SHA1_ASCII_LENGTH       (SHA1_DIGEST_LENGTH * 2)
  70 
  71 /*
  72  * This file contains code that runs to transition us from either a multiboot
  73  * compliant loader (32 bit non-paging) or a XPV domain loader to
  74  * regular kernel execution. Its task is to setup the kernel memory image
  75  * and page tables.
  76  *
  77  * The code executes as:
  78  *      - 32 bits under GRUB (for 32 or 64 bit Solaris)
  79  *      - a 32 bit program for the 32-bit PV hypervisor
  80  *      - a 64 bit program for the 64-bit PV hypervisor (at least for now)
  81  *
  82  * Under the PV hypervisor, we must create mappings for any memory beyond the
  83  * initial start of day allocation (such as the kernel itself).
  84  *
  85  * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
  86  * Since we are running in real mode, so all such memory is accessible.
  87  */
  88 
  89 /*
  90  * Standard bits used in PTE (page level) and PTP (internal levels)
  91  */
  92 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
  93 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
  94 
  95 /*
  96  * This is the target addresses (physical) where the kernel text and data
  97  * nucleus pages will be unpacked. On the hypervisor this is actually a
  98  * virtual address.
  99  */
 100 paddr_t ktext_phys;
 101 uint32_t ksize = 2 * FOUR_MEG;  /* kernel nucleus is 8Meg */
 102 
 103 static uint64_t target_kernel_text;     /* value to use for KERNEL_TEXT */
 104 
 105 /*
 106  * The stack is setup in assembler before entering startup_kernel()
 107  */
 108 char stack_space[STACK_SIZE];
 109 
 110 /*
 111  * Used to track physical memory allocation
 112  */
 113 static paddr_t next_avail_addr = 0;
 114 
 115 #if defined(__xpv)
 116 /*
 117  * Additional information needed for hypervisor memory allocation.
 118  * Only memory up to scratch_end is mapped by page tables.
 119  * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
 120  * to derive a pfn from a pointer, you subtract mfn_base.
 121  */
 122 
 123 static paddr_t scratch_end = 0; /* we can't write all of mem here */
 124 static paddr_t mfn_base;                /* addr corresponding to mfn_list[0] */
 125 start_info_t *xen_info;
 126 
 127 #else   /* __xpv */
 128 
 129 /*
 130  * If on the metal, then we have a multiboot loader.
 131  */
 132 uint32_t mb_magic;                      /* magic from boot loader */
 133 uint32_t mb_addr;                       /* multiboot info package from loader */
 134 int multiboot_version;
 135 multiboot_info_t *mb_info;
 136 multiboot2_info_header_t *mb2_info;
 137 multiboot_tag_mmap_t *mb2_mmap_tagp;
 138 int num_entries;                        /* mmap entry count */
 139 boolean_t num_entries_set;              /* is mmap entry count set */
 140 uintptr_t load_addr;
 141 static boot_framebuffer_t framebuffer[2];
 142 static boot_framebuffer_t *fb;
 143 
 144 /* can not be automatic variables because of alignment */
 145 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID;
 146 static efi_guid_t smbios = SMBIOS_TABLE_GUID;
 147 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID;
 148 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID;
 149 #endif  /* __xpv */
 150 
 151 /*
 152  * This contains information passed to the kernel
 153  */
 154 struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */
 155 struct xboot_info *bi;
 156 
 157 /*
 158  * Page table and memory stuff.
 159  */
 160 static paddr_t max_mem;                 /* maximum memory address */
 161 
 162 /*
 163  * Information about processor MMU
 164  */
 165 int amd64_support = 0;
 166 int largepage_support = 0;
 167 int pae_support = 0;
 168 int pge_support = 0;
 169 int NX_support = 0;
 170 int PAT_support = 0;
 171 
 172 /*
 173  * Low 32 bits of kernel entry address passed back to assembler.
 174  * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
 175  */
 176 uint32_t entry_addr_low;
 177 
 178 /*
 179  * Memlists for the kernel. We shouldn't need a lot of these.
 180  */
 181 #define MAX_MEMLIST (50)
 182 struct boot_memlist memlists[MAX_MEMLIST];
 183 uint_t memlists_used = 0;
 184 struct boot_memlist pcimemlists[MAX_MEMLIST];
 185 uint_t pcimemlists_used = 0;
 186 struct boot_memlist rsvdmemlists[MAX_MEMLIST];
 187 uint_t rsvdmemlists_used = 0;
 188 
 189 /*
 190  * This should match what's in the bootloader.  It's arbitrary, but GRUB
 191  * in particular has limitations on how much space it can use before it
 192  * stops working properly.  This should be enough.
 193  */
 194 struct boot_modules modules[MAX_BOOT_MODULES];
 195 uint_t modules_used = 0;
 196 
 197 #ifdef __xpv
 198 /*
 199  * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
 200  * definition in Xen source.
 201  */
 202 typedef struct {
 203         uint32_t        base_addr_low;
 204         uint32_t        base_addr_high;
 205         uint32_t        length_low;
 206         uint32_t        length_high;
 207         uint32_t        type;
 208 } mmap_t;
 209 
 210 /*
 211  * There is 512KB of scratch area after the boot stack page.
 212  * We'll use that for everything except the kernel nucleus pages which are too
 213  * big to fit there and are allocated last anyway.
 214  */
 215 #define MAXMAPS 100
 216 static mmap_t map_buffer[MAXMAPS];
 217 #else
 218 typedef mb_memory_map_t mmap_t;
 219 #endif
 220 
 221 /*
 222  * Debugging macros
 223  */
 224 uint_t prom_debug = 0;
 225 uint_t map_debug = 0;
 226 
 227 static char noname[2] = "-";
 228 
 229 /*
 230  * Either hypervisor-specific or grub-specific code builds the initial
 231  * memlists. This code does the sort/merge/link for final use.
 232  */
 233 static void
 234 sort_physinstall(void)
 235 {
 236         int i;
 237 #if !defined(__xpv)
 238         int j;
 239         struct boot_memlist tmp;
 240 
 241         /*
 242          * Now sort the memlists, in case they weren't in order.
 243          * Yeah, this is a bubble sort; small, simple and easy to get right.
 244          */
 245         DBG_MSG("Sorting phys-installed list\n");
 246         for (j = memlists_used - 1; j > 0; --j) {
 247                 for (i = 0; i < j; ++i) {
 248                         if (memlists[i].addr < memlists[i + 1].addr)
 249                                 continue;
 250                         tmp = memlists[i];
 251                         memlists[i] = memlists[i + 1];
 252                         memlists[i + 1] = tmp;
 253                 }
 254         }
 255 
 256         /*
 257          * Merge any memlists that don't have holes between them.
 258          */
 259         for (i = 0; i <= memlists_used - 1; ++i) {
 260                 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
 261                         continue;
 262 
 263                 if (prom_debug)
 264                         dboot_printf(
 265                             "merging mem segs %" PRIx64 "...%" PRIx64
 266                             " w/ %" PRIx64 "...%" PRIx64 "\n",
 267                             memlists[i].addr,
 268                             memlists[i].addr + memlists[i].size,
 269                             memlists[i + 1].addr,
 270                             memlists[i + 1].addr + memlists[i + 1].size);
 271 
 272                 memlists[i].size += memlists[i + 1].size;
 273                 for (j = i + 1; j < memlists_used - 1; ++j)
 274                         memlists[j] = memlists[j + 1];
 275                 --memlists_used;
 276                 DBG(memlists_used);
 277                 --i;    /* after merging we need to reexamine, so do this */
 278         }
 279 #endif  /* __xpv */
 280 
 281         if (prom_debug) {
 282                 dboot_printf("\nFinal memlists:\n");
 283                 for (i = 0; i < memlists_used; ++i) {
 284                         dboot_printf("\t%d: addr=%" PRIx64 " size=%"
 285                             PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
 286                 }
 287         }
 288 
 289         /*
 290          * link together the memlists with native size pointers
 291          */
 292         memlists[0].next = 0;
 293         memlists[0].prev = 0;
 294         for (i = 1; i < memlists_used; ++i) {
 295                 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
 296                 memlists[i].next = 0;
 297                 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
 298         }
 299         bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
 300         DBG(bi->bi_phys_install);
 301 }
 302 
 303 /*
 304  * build bios reserved memlists
 305  */
 306 static void
 307 build_rsvdmemlists(void)
 308 {
 309         int i;
 310 
 311         rsvdmemlists[0].next = 0;
 312         rsvdmemlists[0].prev = 0;
 313         for (i = 1; i < rsvdmemlists_used; ++i) {
 314                 rsvdmemlists[i].prev =
 315                     (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
 316                 rsvdmemlists[i].next = 0;
 317                 rsvdmemlists[i - 1].next =
 318                     (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
 319         }
 320         bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
 321         DBG(bi->bi_rsvdmem);
 322 }
 323 
 324 #if defined(__xpv)
 325 
 326 /*
 327  * halt on the hypervisor after a delay to drain console output
 328  */
 329 void
 330 dboot_halt(void)
 331 {
 332         uint_t i = 10000;
 333 
 334         while (--i)
 335                 (void) HYPERVISOR_yield();
 336         (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
 337 }
 338 
 339 /*
 340  * From a machine address, find the corresponding pseudo-physical address.
 341  * Pseudo-physical address are contiguous and run from mfn_base in each VM.
 342  * Machine addresses are the real underlying hardware addresses.
 343  * These are needed for page table entries. Note that this routine is
 344  * poorly protected. A bad value of "ma" will cause a page fault.
 345  */
 346 paddr_t
 347 ma_to_pa(maddr_t ma)
 348 {
 349         ulong_t pgoff = ma & MMU_PAGEOFFSET;
 350         ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
 351         paddr_t pa;
 352 
 353         if (pfn >= xen_info->nr_pages)
 354                 return (-(paddr_t)1);
 355         pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
 356 #ifdef DEBUG
 357         if (ma != pa_to_ma(pa))
 358                 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
 359                     "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
 360 #endif
 361         return (pa);
 362 }
 363 
 364 /*
 365  * From a pseudo-physical address, find the corresponding machine address.
 366  */
 367 maddr_t
 368 pa_to_ma(paddr_t pa)
 369 {
 370         pfn_t pfn;
 371         ulong_t mfn;
 372 
 373         pfn = mmu_btop(pa - mfn_base);
 374         if (pa < mfn_base || pfn >= xen_info->nr_pages)
 375                 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
 376         mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
 377 #ifdef DEBUG
 378         if (mfn_to_pfn_mapping[mfn] != pfn)
 379                 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
 380                     pfn, mfn, mfn_to_pfn_mapping[mfn]);
 381 #endif
 382         return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
 383 }
 384 
 385 #endif  /* __xpv */
 386 
 387 x86pte_t
 388 get_pteval(paddr_t table, uint_t index)
 389 {
 390         if (pae_support)
 391                 return (((x86pte_t *)(uintptr_t)table)[index]);
 392         return (((x86pte32_t *)(uintptr_t)table)[index]);
 393 }
 394 
 395 /*ARGSUSED*/
 396 void
 397 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
 398 {
 399 #ifdef __xpv
 400         mmu_update_t t;
 401         maddr_t mtable = pa_to_ma(table);
 402         int retcnt;
 403 
 404         t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
 405         t.val = pteval;
 406         if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
 407                 dboot_panic("HYPERVISOR_mmu_update() failed");
 408 #else /* __xpv */
 409         uintptr_t tab_addr = (uintptr_t)table;
 410 
 411         if (pae_support)
 412                 ((x86pte_t *)tab_addr)[index] = pteval;
 413         else
 414                 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
 415         if (level == top_level && level == 2)
 416                 reload_cr3();
 417 #endif /* __xpv */
 418 }
 419 
 420 paddr_t
 421 make_ptable(x86pte_t *pteval, uint_t level)
 422 {
 423         paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
 424 
 425         if (level == top_level && level == 2)
 426                 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
 427         else
 428                 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
 429 
 430 #ifdef __xpv
 431         /* Remove write permission to the new page table. */
 432         if (HYPERVISOR_update_va_mapping(new_table,
 433             *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
 434                 dboot_panic("HYP_update_va_mapping error");
 435 #endif
 436 
 437         if (map_debug)
 438                 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
 439                     PRIx64 "\n", level, (ulong_t)new_table, *pteval);
 440         return (new_table);
 441 }
 442 
 443 x86pte_t *
 444 map_pte(paddr_t table, uint_t index)
 445 {
 446         return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
 447 }
 448 
 449 /*
 450  * dump out the contents of page tables...
 451  */
 452 static void
 453 dump_tables(void)
 454 {
 455         uint_t save_index[4];   /* for recursion */
 456         char *save_table[4];    /* for recursion */
 457         uint_t  l;
 458         uint64_t va;
 459         uint64_t pgsize;
 460         int index;
 461         int i;
 462         x86pte_t pteval;
 463         char *table;
 464         static char *tablist = "\t\t\t";
 465         char *tabs = tablist + 3 - top_level;
 466         uint_t pa, pa1;
 467 #if !defined(__xpv)
 468 #define maddr_t paddr_t
 469 #endif /* !__xpv */
 470 
 471         dboot_printf("Finished pagetables:\n");
 472         table = (char *)(uintptr_t)top_page_table;
 473         l = top_level;
 474         va = 0;
 475         for (index = 0; index < ptes_per_table; ++index) {
 476                 pgsize = 1ull << shift_amt[l];
 477                 if (pae_support)
 478                         pteval = ((x86pte_t *)table)[index];
 479                 else
 480                         pteval = ((x86pte32_t *)table)[index];
 481                 if (pteval == 0)
 482                         goto next_entry;
 483 
 484                 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
 485                     tabs + l, (void *)table, index, (uint64_t)pteval, va);
 486                 pa = ma_to_pa(pteval & MMU_PAGEMASK);
 487                 dboot_printf(" physaddr=%x\n", pa);
 488 
 489                 /*
 490                  * Don't try to walk hypervisor private pagetables
 491                  */
 492                 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
 493                         save_table[l] = table;
 494                         save_index[l] = index;
 495                         --l;
 496                         index = -1;
 497                         table = (char *)(uintptr_t)
 498                             ma_to_pa(pteval & MMU_PAGEMASK);
 499                         goto recursion;
 500                 }
 501 
 502                 /*
 503                  * shorten dump for consecutive mappings
 504                  */
 505                 for (i = 1; index + i < ptes_per_table; ++i) {
 506                         if (pae_support)
 507                                 pteval = ((x86pte_t *)table)[index + i];
 508                         else
 509                                 pteval = ((x86pte32_t *)table)[index + i];
 510                         if (pteval == 0)
 511                                 break;
 512                         pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
 513                         if (pa1 != pa + i * pgsize)
 514                                 break;
 515                 }
 516                 if (i > 2) {
 517                         dboot_printf("%s...\n", tabs + l);
 518                         va += pgsize * (i - 2);
 519                         index += i - 2;
 520                 }
 521 next_entry:
 522                 va += pgsize;
 523                 if (l == 3 && index == 256)     /* VA hole */
 524                         va = 0xffff800000000000ull;
 525 recursion:
 526                 ;
 527         }
 528         if (l < top_level) {
 529                 ++l;
 530                 index = save_index[l];
 531                 table = save_table[l];
 532                 goto recursion;
 533         }
 534 }
 535 
 536 /*
 537  * Add a mapping for the machine page at the given virtual address.
 538  */
 539 static void
 540 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
 541 {
 542         x86pte_t *ptep;
 543         x86pte_t pteval;
 544 
 545         pteval = ma | pte_bits;
 546         if (level > 0)
 547                 pteval |= PT_PAGESIZE;
 548         if (va >= target_kernel_text && pge_support)
 549                 pteval |= PT_GLOBAL;
 550 
 551         if (map_debug && ma != va)
 552                 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
 553                     " pte=0x%" PRIx64 " l=%d\n",
 554                     (uint64_t)ma, (uint64_t)va, pteval, level);
 555 
 556 #if defined(__xpv)
 557         /*
 558          * see if we can avoid find_pte() on the hypervisor
 559          */
 560         if (HYPERVISOR_update_va_mapping(va, pteval,
 561             UVMF_INVLPG | UVMF_LOCAL) == 0)
 562                 return;
 563 #endif
 564 
 565         /*
 566          * Find the pte that will map this address. This creates any
 567          * missing intermediate level page tables
 568          */
 569         ptep = find_pte(va, NULL, level, 0);
 570 
 571         /*
 572          * When paravirtualized, we must use hypervisor calls to modify the
 573          * PTE, since paging is active. On real hardware we just write to
 574          * the pagetables which aren't in use yet.
 575          */
 576 #if defined(__xpv)
 577         ptep = ptep;    /* shut lint up */
 578         if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
 579                 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
 580                     " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
 581                     (uint64_t)va, level, (uint64_t)ma, pteval);
 582 #else
 583         if (va < 1024 * 1024)
 584                 pteval |= PT_NOCACHE;           /* for video RAM */
 585         if (pae_support)
 586                 *ptep = pteval;
 587         else
 588                 *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
 589 #endif
 590 }
 591 
 592 /*
 593  * Add a mapping for the physical page at the given virtual address.
 594  */
 595 static void
 596 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
 597 {
 598         map_ma_at_va(pa_to_ma(pa), va, level);
 599 }
 600 
 601 /*
 602  * This is called to remove start..end from the
 603  * possible range of PCI addresses.
 604  */
 605 const uint64_t pci_lo_limit = 0x00100000ul;
 606 const uint64_t pci_hi_limit = 0xfff00000ul;
 607 static void
 608 exclude_from_pci(uint64_t start, uint64_t end)
 609 {
 610         int i;
 611         int j;
 612         struct boot_memlist *ml;
 613 
 614         for (i = 0; i < pcimemlists_used; ++i) {
 615                 ml = &pcimemlists[i];
 616 
 617                 /* delete the entire range? */
 618                 if (start <= ml->addr && ml->addr + ml->size <= end) {
 619                         --pcimemlists_used;
 620                         for (j = i; j < pcimemlists_used; ++j)
 621                                 pcimemlists[j] = pcimemlists[j + 1];
 622                         --i;    /* to revisit the new one at this index */
 623                 }
 624 
 625                 /* split a range? */
 626                 else if (ml->addr < start && end < ml->addr + ml->size) {
 627 
 628                         ++pcimemlists_used;
 629                         if (pcimemlists_used > MAX_MEMLIST)
 630                                 dboot_panic("too many pcimemlists");
 631 
 632                         for (j = pcimemlists_used - 1; j > i; --j)
 633                                 pcimemlists[j] = pcimemlists[j - 1];
 634                         ml->size = start - ml->addr;
 635 
 636                         ++ml;
 637                         ml->size = (ml->addr + ml->size) - end;
 638                         ml->addr = end;
 639                         ++i;    /* skip on to next one */
 640                 }
 641 
 642                 /* cut memory off the start? */
 643                 else if (ml->addr < end && end < ml->addr + ml->size) {
 644                         ml->size -= end - ml->addr;
 645                         ml->addr = end;
 646                 }
 647 
 648                 /* cut memory off the end? */
 649                 else if (ml->addr <= start && start < ml->addr + ml->size) {
 650                         ml->size = start - ml->addr;
 651                 }
 652         }
 653 }
 654 
 655 /*
 656  * During memory allocation, find the highest address not used yet.
 657  */
 658 static void
 659 check_higher(paddr_t a)
 660 {
 661         if (a < next_avail_addr)
 662                 return;
 663         next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
 664         DBG(next_avail_addr);
 665 }
 666 
 667 static int
 668 dboot_loader_mmap_entries(void)
 669 {
 670 #if !defined(__xpv)
 671         if (num_entries_set == B_TRUE)
 672                 return (num_entries);
 673 
 674         switch (multiboot_version) {
 675         case 1:
 676                 DBG(mb_info->flags);
 677                 if (mb_info->flags & 0x40) {
 678                         mb_memory_map_t *mmap;
 679 
 680                         DBG(mb_info->mmap_addr);
 681                         DBG(mb_info->mmap_length);
 682                         check_higher(mb_info->mmap_addr + mb_info->mmap_length);
 683 
 684                         for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
 685                             (uint32_t)mmap < mb_info->mmap_addr +
 686                             mb_info->mmap_length;
 687                             mmap = (mb_memory_map_t *)((uint32_t)mmap +
 688                             mmap->size + sizeof (mmap->size)))
 689                                 ++num_entries;
 690 
 691                         num_entries_set = B_TRUE;
 692                 }
 693                 break;
 694         case 2:
 695                 num_entries_set = B_TRUE;
 696                 num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
 697                     mb2_mmap_tagp);
 698                 break;
 699         default:
 700                 dboot_panic("Unknown multiboot version: %d\n",
 701                     multiboot_version);
 702                 break;
 703         }
 704         return (num_entries);
 705 #else
 706         return (MAXMAPS);
 707 #endif
 708 }
 709 
 710 static uint32_t
 711 dboot_loader_mmap_get_type(int index)
 712 {
 713 #if !defined(__xpv)
 714         mb_memory_map_t *mp, *mpend;
 715         int i;
 716 
 717         switch (multiboot_version) {
 718         case 1:
 719                 mp = (mb_memory_map_t *)mb_info->mmap_addr;
 720                 mpend = (mb_memory_map_t *)
 721                     (mb_info->mmap_addr + mb_info->mmap_length);
 722 
 723                 for (i = 0; mp < mpend && i != index; i++)
 724                         mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
 725                             sizeof (mp->size));
 726                 if (mp >= mpend) {
 727                         dboot_panic("dboot_loader_mmap_get_type(): index "
 728                             "out of bounds: %d\n", index);
 729                 }
 730                 return (mp->type);
 731 
 732         case 2:
 733                 return (dboot_multiboot2_mmap_get_type(mb2_info,
 734                     mb2_mmap_tagp, index));
 735 
 736         default:
 737                 dboot_panic("Unknown multiboot version: %d\n",
 738                     multiboot_version);
 739                 break;
 740         }
 741         return (0);
 742 #else
 743         return (map_buffer[index].type);
 744 #endif
 745 }
 746 
 747 static uint64_t
 748 dboot_loader_mmap_get_base(int index)
 749 {
 750 #if !defined(__xpv)
 751         mb_memory_map_t *mp, *mpend;
 752         int i;
 753 
 754         switch (multiboot_version) {
 755         case 1:
 756                 mp = (mb_memory_map_t *)mb_info->mmap_addr;
 757                 mpend = (mb_memory_map_t *)
 758                     (mb_info->mmap_addr + mb_info->mmap_length);
 759 
 760                 for (i = 0; mp < mpend && i != index; i++)
 761                         mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
 762                             sizeof (mp->size));
 763                 if (mp >= mpend) {
 764                         dboot_panic("dboot_loader_mmap_get_base(): index "
 765                             "out of bounds: %d\n", index);
 766                 }
 767                 return (((uint64_t)mp->base_addr_high << 32) +
 768                     (uint64_t)mp->base_addr_low);
 769 
 770         case 2:
 771                 return (dboot_multiboot2_mmap_get_base(mb2_info,
 772                     mb2_mmap_tagp, index));
 773 
 774         default:
 775                 dboot_panic("Unknown multiboot version: %d\n",
 776                     multiboot_version);
 777                 break;
 778         }
 779         return (0);
 780 #else
 781         return (((uint64_t)map_buffer[index].base_addr_high << 32) +
 782             (uint64_t)map_buffer[index].base_addr_low);
 783 #endif
 784 }
 785 
 786 static uint64_t
 787 dboot_loader_mmap_get_length(int index)
 788 {
 789 #if !defined(__xpv)
 790         mb_memory_map_t *mp, *mpend;
 791         int i;
 792 
 793         switch (multiboot_version) {
 794         case 1:
 795                 mp = (mb_memory_map_t *)mb_info->mmap_addr;
 796                 mpend = (mb_memory_map_t *)
 797                     (mb_info->mmap_addr + mb_info->mmap_length);
 798 
 799                 for (i = 0; mp < mpend && i != index; i++)
 800                         mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
 801                             sizeof (mp->size));
 802                 if (mp >= mpend) {
 803                         dboot_panic("dboot_loader_mmap_get_length(): index "
 804                             "out of bounds: %d\n", index);
 805                 }
 806                 return (((uint64_t)mp->length_high << 32) +
 807                     (uint64_t)mp->length_low);
 808 
 809         case 2:
 810                 return (dboot_multiboot2_mmap_get_length(mb2_info,
 811                     mb2_mmap_tagp, index));
 812 
 813         default:
 814                 dboot_panic("Unknown multiboot version: %d\n",
 815                     multiboot_version);
 816                 break;
 817         }
 818         return (0);
 819 #else
 820         return (((uint64_t)map_buffer[index].length_high << 32) +
 821             (uint64_t)map_buffer[index].length_low);
 822 #endif
 823 }
 824 
 825 static void
 826 build_pcimemlists(void)
 827 {
 828         uint64_t page_offset = MMU_PAGEOFFSET;  /* needs to be 64 bits */
 829         uint64_t start;
 830         uint64_t end;
 831         int i, num;
 832 
 833         /*
 834          * initialize
 835          */
 836         pcimemlists[0].addr = pci_lo_limit;
 837         pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
 838         pcimemlists_used = 1;
 839 
 840         num = dboot_loader_mmap_entries();
 841         /*
 842          * Fill in PCI memlists.
 843          */
 844         for (i = 0; i < num; ++i) {
 845                 start = dboot_loader_mmap_get_base(i);
 846                 end = start + dboot_loader_mmap_get_length(i);
 847 
 848                 if (prom_debug)
 849                         dboot_printf("\ttype: %d %" PRIx64 "..%"
 850                             PRIx64 "\n", dboot_loader_mmap_get_type(i),
 851                             start, end);
 852 
 853                 /*
 854                  * page align start and end
 855                  */
 856                 start = (start + page_offset) & ~page_offset;
 857                 end &= ~page_offset;
 858                 if (end <= start)
 859                         continue;
 860 
 861                 exclude_from_pci(start, end);
 862         }
 863 
 864         /*
 865          * Finish off the pcimemlist
 866          */
 867         if (prom_debug) {
 868                 for (i = 0; i < pcimemlists_used; ++i) {
 869                         dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
 870                             PRIx64 "\n", pcimemlists[i].addr,
 871                             pcimemlists[i].addr + pcimemlists[i].size);
 872                 }
 873         }
 874         pcimemlists[0].next = 0;
 875         pcimemlists[0].prev = 0;
 876         for (i = 1; i < pcimemlists_used; ++i) {
 877                 pcimemlists[i].prev =
 878                     (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
 879                 pcimemlists[i].next = 0;
 880                 pcimemlists[i - 1].next =
 881                     (native_ptr_t)(uintptr_t)(pcimemlists + i);
 882         }
 883         bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
 884         DBG(bi->bi_pcimem);
 885 }
 886 
 887 #if defined(__xpv)
 888 /*
 889  * Initialize memory allocator stuff from hypervisor-supplied start info.
 890  */
 891 static void
 892 init_mem_alloc(void)
 893 {
 894         int     local;  /* variables needed to find start region */
 895         paddr_t scratch_start;
 896         xen_memory_map_t map;
 897 
 898         DBG_MSG("Entered init_mem_alloc()\n");
 899 
 900         /*
 901          * Free memory follows the stack. There's at least 512KB of scratch
 902          * space, rounded up to at least 2Mb alignment.  That should be enough
 903          * for the page tables we'll need to build.  The nucleus memory is
 904          * allocated last and will be outside the addressible range.  We'll
 905          * switch to new page tables before we unpack the kernel
 906          */
 907         scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
 908         DBG(scratch_start);
 909         scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
 910         DBG(scratch_end);
 911 
 912         /*
 913          * For paranoia, leave some space between hypervisor data and ours.
 914          * Use 500 instead of 512.
 915          */
 916         next_avail_addr = scratch_end - 500 * 1024;
 917         DBG(next_avail_addr);
 918 
 919         /*
 920          * The domain builder gives us at most 1 module
 921          */
 922         DBG(xen_info->mod_len);
 923         if (xen_info->mod_len > 0) {
 924                 DBG(xen_info->mod_start);
 925                 modules[0].bm_addr =
 926                     (native_ptr_t)(uintptr_t)xen_info->mod_start;
 927                 modules[0].bm_size = xen_info->mod_len;
 928                 bi->bi_module_cnt = 1;
 929                 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
 930         } else {
 931                 bi->bi_module_cnt = 0;
 932                 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL;
 933         }
 934         DBG(bi->bi_module_cnt);
 935         DBG(bi->bi_modules);
 936 
 937         DBG(xen_info->mfn_list);
 938         DBG(xen_info->nr_pages);
 939         max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
 940         DBG(max_mem);
 941 
 942         /*
 943          * Using pseudo-physical addresses, so only 1 memlist element
 944          */
 945         memlists[0].addr = 0;
 946         DBG(memlists[0].addr);
 947         memlists[0].size = max_mem;
 948         DBG(memlists[0].size);
 949         memlists_used = 1;
 950         DBG(memlists_used);
 951 
 952         /*
 953          * finish building physinstall list
 954          */
 955         sort_physinstall();
 956 
 957         /*
 958          * build bios reserved memlists
 959          */
 960         build_rsvdmemlists();
 961 
 962         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
 963                 /*
 964                  * build PCI Memory list
 965                  */
 966                 map.nr_entries = MAXMAPS;
 967                 /*LINTED: constant in conditional context*/
 968                 set_xen_guest_handle(map.buffer, map_buffer);
 969                 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
 970                         dboot_panic("getting XENMEM_machine_memory_map failed");
 971                 build_pcimemlists();
 972         }
 973 }
 974 
 975 #else   /* !__xpv */
 976 
 977 static void
 978 dboot_multiboot1_xboot_consinfo(void)
 979 {
 980         fb->framebuffer = 0;
 981 }
 982 
 983 static void
 984 dboot_multiboot2_xboot_consinfo(void)
 985 {
 986         multiboot_tag_framebuffer_t *fbtag;
 987         fbtag = dboot_multiboot2_find_tag(mb2_info,
 988             MULTIBOOT_TAG_TYPE_FRAMEBUFFER);
 989         fb->framebuffer = (uint64_t)(uintptr_t)fbtag;
 990         fb->boot_fb_virt = 0;
 991 }
 992 
 993 static int
 994 dboot_multiboot_modcount(void)
 995 {
 996         switch (multiboot_version) {
 997         case 1:
 998                 return (mb_info->mods_count);
 999 
1000         case 2:
1001                 return (dboot_multiboot2_modcount(mb2_info));
1002 
1003         default:
1004                 dboot_panic("Unknown multiboot version: %d\n",
1005                     multiboot_version);
1006                 break;
1007         }
1008         return (0);
1009 }
1010 
1011 static uint32_t
1012 dboot_multiboot_modstart(int index)
1013 {
1014         switch (multiboot_version) {
1015         case 1:
1016                 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1017 
1018         case 2:
1019                 return (dboot_multiboot2_modstart(mb2_info, index));
1020 
1021         default:
1022                 dboot_panic("Unknown multiboot version: %d\n",
1023                     multiboot_version);
1024                 break;
1025         }
1026         return (0);
1027 }
1028 
1029 static uint32_t
1030 dboot_multiboot_modend(int index)
1031 {
1032         switch (multiboot_version) {
1033         case 1:
1034                 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1035 
1036         case 2:
1037                 return (dboot_multiboot2_modend(mb2_info, index));
1038 
1039         default:
1040                 dboot_panic("Unknown multiboot version: %d\n",
1041                     multiboot_version);
1042                 break;
1043         }
1044         return (0);
1045 }
1046 
1047 static char *
1048 dboot_multiboot_modcmdline(int index)
1049 {
1050         switch (multiboot_version) {
1051         case 1:
1052                 return ((char *)((mb_module_t *)
1053                     mb_info->mods_addr)[index].mod_name);
1054 
1055         case 2:
1056                 return (dboot_multiboot2_modcmdline(mb2_info, index));
1057 
1058         default:
1059                 dboot_panic("Unknown multiboot version: %d\n",
1060                     multiboot_version);
1061                 break;
1062         }
1063         return (0);
1064 }
1065 
1066 /*
1067  * Find the modules used by console setup.
1068  * Since we need the console to print early boot messages, the console is set up
1069  * before anything else and therefore we need to pick up the needed modules.
1070  *
1071  * Note, we just will search for and if found, will pass the modules
1072  * to console setup, the proper module list processing will happen later.
1073  * Currenly used modules are boot environment and consoler font.
1074  */
1075 static void
1076 dboot_find_console_modules(void)
1077 {
1078         int i, modcount;
1079         uint32_t mod_start, mod_end;
1080         char *cmdline;
1081 
1082         modcount = dboot_multiboot_modcount();
1083         bi->bi_module_cnt = 0;
1084         for (i = 0; i < modcount; ++i) {
1085                 cmdline = dboot_multiboot_modcmdline(i);
1086                 if (cmdline == NULL)
1087                         continue;
1088 
1089                 if (strstr(cmdline, "type=console-font") != NULL)
1090                         modules[bi->bi_module_cnt].bm_type = BMT_FONT;
1091                 else if (strstr(cmdline, "type=environment") != NULL)
1092                         modules[bi->bi_module_cnt].bm_type = BMT_ENV;
1093                 else
1094                         continue;
1095 
1096                 mod_start = dboot_multiboot_modstart(i);
1097                 mod_end = dboot_multiboot_modend(i);
1098                 modules[bi->bi_module_cnt].bm_addr =
1099                     (native_ptr_t)(uintptr_t)mod_start;
1100                 modules[bi->bi_module_cnt].bm_size = mod_end - mod_start;
1101                 modules[bi->bi_module_cnt].bm_name =
1102                     (native_ptr_t)(uintptr_t)NULL;
1103                 modules[bi->bi_module_cnt].bm_hash =
1104                     (native_ptr_t)(uintptr_t)NULL;
1105                 bi->bi_module_cnt++;
1106         }
1107         if (bi->bi_module_cnt != 0)
1108                 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1109 }
1110 
1111 static boolean_t
1112 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1113 {
1114         boolean_t rv = B_FALSE;
1115 
1116         switch (multiboot_version) {
1117         case 1:
1118                 if (mb_info->flags & 0x01) {
1119                         *lower = mb_info->mem_lower;
1120                         *upper = mb_info->mem_upper;
1121                         rv = B_TRUE;
1122                 }
1123                 break;
1124 
1125         case 2:
1126                 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1127 
1128         default:
1129                 dboot_panic("Unknown multiboot version: %d\n",
1130                     multiboot_version);
1131                 break;
1132         }
1133         return (rv);
1134 }
1135 
1136 static uint8_t
1137 dboot_a2h(char v)
1138 {
1139         if (v >= 'a')
1140                 return (v - 'a' + 0xa);
1141         else if (v >= 'A')
1142                 return (v - 'A' + 0xa);
1143         else if (v >= '0')
1144                 return (v - '0');
1145         else
1146                 dboot_panic("bad ASCII hex character %c\n", v);
1147 
1148         return (0);
1149 }
1150 
1151 static void
1152 digest_a2h(const char *ascii, uint8_t *digest)
1153 {
1154         unsigned int i;
1155 
1156         for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1157                 digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1158                 digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1159         }
1160 }
1161 
1162 /*
1163  * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1164  * the ASCII-format hash found in the 40-byte buffer at ascii.  If they
1165  * match, return 0, otherwise -1.  This works only for images smaller than
1166  * 4 GB, which should not be a problem.
1167  */
1168 static int
1169 check_image_hash(uint_t midx)
1170 {
1171         const char *ascii;
1172         const void *image;
1173         size_t len;
1174         SHA1_CTX ctx;
1175         uint8_t digest[SHA1_DIGEST_LENGTH];
1176         uint8_t baseline[SHA1_DIGEST_LENGTH];
1177         unsigned int i;
1178 
1179         ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
1180         image = (const void *)(uintptr_t)modules[midx].bm_addr;
1181         len = (size_t)modules[midx].bm_size;
1182 
1183         digest_a2h(ascii, baseline);
1184 
1185         SHA1Init(&ctx);
1186         SHA1Update(&ctx, image, len);
1187         SHA1Final(digest, &ctx);
1188 
1189         for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1190                 if (digest[i] != baseline[i])
1191                         return (-1);
1192         }
1193 
1194         return (0);
1195 }
1196 
1197 static const char *
1198 type_to_str(boot_module_type_t type)
1199 {
1200         switch (type) {
1201         case BMT_ROOTFS:
1202                 return ("rootfs");
1203         case BMT_FILE:
1204                 return ("file");
1205         case BMT_HASH:
1206                 return ("hash");
1207         case BMT_ENV:
1208                 return ("environment");
1209         case BMT_FONT:
1210                 return ("console-font");
1211         default:
1212                 return ("unknown");
1213         }
1214 }
1215 
1216 static void
1217 check_images(void)
1218 {
1219         uint_t i;
1220         char displayhash[SHA1_ASCII_LENGTH + 1];
1221 
1222         for (i = 0; i < modules_used; i++) {
1223                 if (prom_debug) {
1224                         dboot_printf("module #%d: name %s type %s "
1225                             "addr %lx size %lx\n",
1226                             i, (char *)(uintptr_t)modules[i].bm_name,
1227                             type_to_str(modules[i].bm_type),
1228                             (ulong_t)modules[i].bm_addr,
1229                             (ulong_t)modules[i].bm_size);
1230                 }
1231 
1232                 if (modules[i].bm_type == BMT_HASH ||
1233                     modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) {
1234                         DBG_MSG("module has no hash; skipping check\n");
1235                         continue;
1236                 }
1237                 (void) memcpy(displayhash,
1238                     (void *)(uintptr_t)modules[i].bm_hash,
1239                     SHA1_ASCII_LENGTH);
1240                 displayhash[SHA1_ASCII_LENGTH] = '\0';
1241                 if (prom_debug) {
1242                         dboot_printf("checking expected hash [%s]: ",
1243                             displayhash);
1244                 }
1245 
1246                 if (check_image_hash(i) != 0)
1247                         dboot_panic("hash mismatch!\n");
1248                 else
1249                         DBG_MSG("OK\n");
1250         }
1251 }
1252 
1253 /*
1254  * Determine the module's starting address, size, name, and type, and fill the
1255  * boot_modules structure.  This structure is used by the bop code, except for
1256  * hashes which are checked prior to transferring control to the kernel.
1257  */
1258 static void
1259 process_module(int midx)
1260 {
1261         uint32_t mod_start = dboot_multiboot_modstart(midx);
1262         uint32_t mod_end = dboot_multiboot_modend(midx);
1263         char *cmdline = dboot_multiboot_modcmdline(midx);
1264         char *p, *q;
1265 
1266         check_higher(mod_end);
1267         if (prom_debug) {
1268                 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1269                     midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
1270         }
1271 
1272         if (mod_start > mod_end) {
1273                 dboot_panic("module #%d: module start address 0x%lx greater "
1274                     "than end address 0x%lx", midx,
1275                     (ulong_t)mod_start, (ulong_t)mod_end);
1276         }
1277 
1278         /*
1279          * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1280          * the address of the last valid byte in a module plus 1 as mod_end.
1281          * This is of course a bug; the multiboot specification simply states
1282          * that mod_start and mod_end "contain the start and end addresses of
1283          * the boot module itself" which is pretty obviously not what GRUB is
1284          * doing.  However, fixing it requires that not only this code be
1285          * changed but also that other code consuming this value and values
1286          * derived from it be fixed, and that the kernel and GRUB must either
1287          * both have the bug or neither.  While there are a lot of combinations
1288          * that will work, there are also some that won't, so for simplicity
1289          * we'll just cope with the bug.  That means we won't actually hash the
1290          * byte at mod_end, and we will expect that mod_end for the hash file
1291          * itself is one greater than some multiple of 41 (40 bytes of ASCII
1292          * hash plus a newline for each module).  We set bm_size to the true
1293          * correct number of bytes in each module, achieving exactly this.
1294          */
1295 
1296         modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start;
1297         modules[midx].bm_size = mod_end - mod_start;
1298         modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
1299         modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL;
1300         modules[midx].bm_type = BMT_FILE;
1301 
1302         if (cmdline == NULL) {
1303                 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1304                 return;
1305         }
1306 
1307         p = cmdline;
1308         modules[midx].bm_name =
1309             (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
1310 
1311         while (p != NULL) {
1312                 q = strsep(&p, " \t\f\n\r");
1313                 if (strncmp(q, "name=", 5) == 0) {
1314                         if (q[5] != '\0' && !isspace(q[5])) {
1315                                 modules[midx].bm_name =
1316                                     (native_ptr_t)(uintptr_t)(q + 5);
1317                         }
1318                         continue;
1319                 }
1320 
1321                 if (strncmp(q, "type=", 5) == 0) {
1322                         if (q[5] == '\0' || isspace(q[5]))
1323                                 continue;
1324                         q += 5;
1325                         if (strcmp(q, "rootfs") == 0) {
1326                                 modules[midx].bm_type = BMT_ROOTFS;
1327                         } else if (strcmp(q, "hash") == 0) {
1328                                 modules[midx].bm_type = BMT_HASH;
1329                         } else if (strcmp(q, "environment") == 0) {
1330                                 modules[midx].bm_type = BMT_ENV;
1331                         } else if (strcmp(q, "console-font") == 0) {
1332                                 modules[midx].bm_type = BMT_FONT;
1333                         } else if (strcmp(q, "file") != 0) {
1334                                 dboot_printf("\tmodule #%d: unknown module "
1335                                     "type '%s'; defaulting to 'file'",
1336                                     midx, q);
1337                         }
1338                         continue;
1339                 }
1340 
1341                 if (strncmp(q, "hash=", 5) == 0) {
1342                         if (q[5] != '\0' && !isspace(q[5])) {
1343                                 modules[midx].bm_hash =
1344                                     (native_ptr_t)(uintptr_t)(q + 5);
1345                         }
1346                         continue;
1347                 }
1348 
1349                 dboot_printf("ignoring unknown option '%s'\n", q);
1350         }
1351 }
1352 
1353 /*
1354  * Backward compatibility: if there are exactly one or two modules, both
1355  * of type 'file' and neither with an embedded hash value, we have been
1356  * given the legacy style modules.  In this case we need to treat the first
1357  * module as a rootfs and the second as a hash referencing that module.
1358  * Otherwise, even if the configuration is invalid, we assume that the
1359  * operator knows what he's doing or at least isn't being bitten by this
1360  * interface change.
1361  */
1362 static void
1363 fixup_modules(void)
1364 {
1365         if (modules_used == 0 || modules_used > 2)
1366                 return;
1367 
1368         if (modules[0].bm_type != BMT_FILE ||
1369             modules_used > 1 && modules[1].bm_type != BMT_FILE) {
1370                 return;
1371         }
1372 
1373         if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL ||
1374             modules_used > 1 &&
1375             modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1376                 return;
1377         }
1378 
1379         modules[0].bm_type = BMT_ROOTFS;
1380         if (modules_used > 1) {
1381                 modules[1].bm_type = BMT_HASH;
1382                 modules[1].bm_name = modules[0].bm_name;
1383         }
1384 }
1385 
1386 /*
1387  * For modules that do not have assigned hashes but have a separate hash module,
1388  * find the assigned hash module and set the primary module's bm_hash to point
1389  * to the hash data from that module.  We will then ignore modules of type
1390  * BMT_HASH from this point forward.
1391  */
1392 static void
1393 assign_module_hashes(void)
1394 {
1395         uint_t i, j;
1396 
1397         for (i = 0; i < modules_used; i++) {
1398                 if (modules[i].bm_type == BMT_HASH ||
1399                     modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1400                         continue;
1401                 }
1402 
1403                 for (j = 0; j < modules_used; j++) {
1404                         if (modules[j].bm_type != BMT_HASH ||
1405                             strcmp((char *)(uintptr_t)modules[j].bm_name,
1406                             (char *)(uintptr_t)modules[i].bm_name) != 0) {
1407                                 continue;
1408                         }
1409 
1410                         if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
1411                                 dboot_printf("Short hash module of length "
1412                                     "0x%lx bytes; ignoring\n",
1413                                     (ulong_t)modules[j].bm_size);
1414                         } else {
1415                                 modules[i].bm_hash = modules[j].bm_addr;
1416                         }
1417                         break;
1418                 }
1419         }
1420 }
1421 
1422 /*
1423  * Walk through the module information finding the last used address.
1424  * The first available address will become the top level page table.
1425  */
1426 static void
1427 dboot_process_modules(void)
1428 {
1429         int i, modcount;
1430         extern char _end[];
1431 
1432         DBG_MSG("\nFinding Modules\n");
1433         modcount = dboot_multiboot_modcount();
1434         if (modcount > MAX_BOOT_MODULES) {
1435                 dboot_panic("Too many modules (%d) -- the maximum is %d.",
1436                     modcount, MAX_BOOT_MODULES);
1437         }
1438         /*
1439          * search the modules to find the last used address
1440          * we'll build the module list while we're walking through here
1441          */
1442         check_higher((paddr_t)(uintptr_t)&_end);
1443         for (i = 0; i < modcount; ++i) {
1444                 process_module(i);
1445                 modules_used++;
1446         }
1447         bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1448         DBG(bi->bi_modules);
1449         bi->bi_module_cnt = modcount;
1450         DBG(bi->bi_module_cnt);
1451 
1452         fixup_modules();
1453         assign_module_hashes();
1454         check_images();
1455 }
1456 
1457 /*
1458  * We then build the phys_install memlist from the multiboot information.
1459  */
1460 static void
1461 dboot_process_mmap(void)
1462 {
1463         uint64_t start;
1464         uint64_t end;
1465         uint64_t page_offset = MMU_PAGEOFFSET;  /* needs to be 64 bits */
1466         uint32_t lower, upper;
1467         int i, mmap_entries;
1468 
1469         /*
1470          * Walk through the memory map from multiboot and build our memlist
1471          * structures. Note these will have native format pointers.
1472          */
1473         DBG_MSG("\nFinding Memory Map\n");
1474         num_entries = 0;
1475         num_entries_set = B_FALSE;
1476         max_mem = 0;
1477         if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1478                 for (i = 0; i < mmap_entries; i++) {
1479                         uint32_t type = dboot_loader_mmap_get_type(i);
1480                         start = dboot_loader_mmap_get_base(i);
1481                         end = start + dboot_loader_mmap_get_length(i);
1482 
1483                         if (prom_debug)
1484                                 dboot_printf("\ttype: %d %" PRIx64 "..%"
1485                                     PRIx64 "\n", type, start, end);
1486 
1487                         /*
1488                          * page align start and end
1489                          */
1490                         start = (start + page_offset) & ~page_offset;
1491                         end &= ~page_offset;
1492                         if (end <= start)
1493                                 continue;
1494 
1495                         /*
1496                          * only type 1 is usable RAM
1497                          */
1498                         switch (type) {
1499                         case 1:
1500                                 if (end > max_mem)
1501                                         max_mem = end;
1502                                 memlists[memlists_used].addr = start;
1503                                 memlists[memlists_used].size = end - start;
1504                                 ++memlists_used;
1505                                 if (memlists_used > MAX_MEMLIST)
1506                                         dboot_panic("too many memlists");
1507                                 break;
1508                         case 2:
1509                                 rsvdmemlists[rsvdmemlists_used].addr = start;
1510                                 rsvdmemlists[rsvdmemlists_used].size =
1511                                     end - start;
1512                                 ++rsvdmemlists_used;
1513                                 if (rsvdmemlists_used > MAX_MEMLIST)
1514                                         dboot_panic("too many rsvdmemlists");
1515                                 break;
1516                         default:
1517                                 continue;
1518                         }
1519                 }
1520                 build_pcimemlists();
1521         } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1522                 DBG(lower);
1523                 memlists[memlists_used].addr = 0;
1524                 memlists[memlists_used].size = lower * 1024;
1525                 ++memlists_used;
1526                 DBG(upper);
1527                 memlists[memlists_used].addr = 1024 * 1024;
1528                 memlists[memlists_used].size = upper * 1024;
1529                 ++memlists_used;
1530 
1531                 /*
1532                  * Old platform - assume I/O space at the end of memory.
1533                  */
1534                 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1535                 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1536                 pcimemlists[0].next = 0;
1537                 pcimemlists[0].prev = 0;
1538                 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1539                 DBG(bi->bi_pcimem);
1540         } else {
1541                 dboot_panic("No memory info from boot loader!!!");
1542         }
1543 
1544         /*
1545          * finish processing the physinstall list
1546          */
1547         sort_physinstall();
1548 
1549         /*
1550          * build bios reserved mem lists
1551          */
1552         build_rsvdmemlists();
1553 }
1554 
1555 /*
1556  * The highest address is used as the starting point for dboot's simple
1557  * memory allocator.
1558  *
1559  * Finding the highest address in case of Multiboot 1 protocol is
1560  * quite painful in the sense that some information provided by
1561  * the multiboot info structure points to BIOS data, and some to RAM.
1562  *
1563  * The module list was processed and checked already by dboot_process_modules(),
1564  * so we will check the command line string and the memory map.
1565  *
1566  * This list of to be checked items is based on our current knowledge of
1567  * allocations made by grub1 and will need to be reviewed if there
1568  * are updates about the information provided by Multiboot 1.
1569  *
1570  * In the case of the Multiboot 2, our life is much simpler, as the MB2
1571  * information tag list is one contiguous chunk of memory.
1572  */
1573 static paddr_t
1574 dboot_multiboot1_highest_addr(void)
1575 {
1576         paddr_t addr = (paddr_t)(uintptr_t)NULL;
1577         char *cmdl = (char *)mb_info->cmdline;
1578 
1579         if (mb_info->flags & MB_INFO_CMDLINE)
1580                 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1581 
1582         if (mb_info->flags & MB_INFO_MEM_MAP)
1583                 addr = MAX(addr,
1584                     ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1585         return (addr);
1586 }
1587 
1588 static void
1589 dboot_multiboot_highest_addr(void)
1590 {
1591         paddr_t addr;
1592 
1593         switch (multiboot_version) {
1594         case 1:
1595                 addr = dboot_multiboot1_highest_addr();
1596                 if (addr != (paddr_t)(uintptr_t)NULL)
1597                         check_higher(addr);
1598                 break;
1599         case 2:
1600                 addr = dboot_multiboot2_highest_addr(mb2_info);
1601                 if (addr != (paddr_t)(uintptr_t)NULL)
1602                         check_higher(addr);
1603                 break;
1604         default:
1605                 dboot_panic("Unknown multiboot version: %d\n",
1606                     multiboot_version);
1607                 break;
1608         }
1609 }
1610 
1611 /*
1612  * Walk the boot loader provided information and find the highest free address.
1613  */
1614 static void
1615 init_mem_alloc(void)
1616 {
1617         DBG_MSG("Entered init_mem_alloc()\n");
1618         dboot_process_modules();
1619         dboot_process_mmap();
1620         dboot_multiboot_highest_addr();
1621 }
1622 
1623 static int
1624 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2)
1625 {
1626         int i;
1627 
1628         if (g1->time_low != g2->time_low)
1629                 return (0);
1630         if (g1->time_mid != g2->time_mid)
1631                 return (0);
1632         if (g1->time_hi_and_version != g2->time_hi_and_version)
1633                 return (0);
1634         if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved)
1635                 return (0);
1636         if (g1->clock_seq_low != g2->clock_seq_low)
1637                 return (0);
1638 
1639         for (i = 0; i < 6; i++) {
1640                 if (g1->node_addr[i] != g2->node_addr[i])
1641                         return (0);
1642         }
1643         return (1);
1644 }
1645 
1646 static void
1647 process_efi32(EFI_SYSTEM_TABLE32 *efi)
1648 {
1649         uint32_t entries;
1650         EFI_CONFIGURATION_TABLE32 *config;
1651         int i;
1652 
1653         entries = efi->NumberOfTableEntries;
1654         config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1655             efi->ConfigurationTable;
1656 
1657         for (i = 0; i < entries; i++) {
1658                 if (dboot_same_guids(&config[i].VendorGuid, &smbios3)) {
1659                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1660                             config[i].VendorTable;
1661                 }
1662                 if (bi->bi_smbios == NULL &&
1663                     dboot_same_guids(&config[i].VendorGuid, &smbios)) {
1664                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1665                             config[i].VendorTable;
1666                 }
1667                 if (dboot_same_guids(&config[i].VendorGuid, &acpi2)) {
1668                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1669                             config[i].VendorTable;
1670                 }
1671                 if (bi->bi_acpi_rsdp == NULL &&
1672                     dboot_same_guids(&config[i].VendorGuid, &acpi1)) {
1673                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1674                             config[i].VendorTable;
1675                 }
1676         }
1677 }
1678 
1679 static void
1680 process_efi64(EFI_SYSTEM_TABLE64 *efi)
1681 {
1682         uint64_t entries;
1683         EFI_CONFIGURATION_TABLE64 *config;
1684         int i;
1685 
1686         entries = efi->NumberOfTableEntries;
1687         config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1688             efi->ConfigurationTable;
1689 
1690         for (i = 0; i < entries; i++) {
1691                 if (dboot_same_guids(&config[i].VendorGuid, &smbios3)) {
1692                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1693                             config[i].VendorTable;
1694                 }
1695                 if (bi->bi_smbios == NULL &&
1696                     dboot_same_guids(&config[i].VendorGuid, &smbios)) {
1697                         bi->bi_smbios = (native_ptr_t)(uintptr_t)
1698                             config[i].VendorTable;
1699                 }
1700                 /* Prefer acpi v2+ over v1. */
1701                 if (dboot_same_guids(&config[i].VendorGuid, &acpi2)) {
1702                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1703                             config[i].VendorTable;
1704                 }
1705                 if (bi->bi_acpi_rsdp == NULL &&
1706                     dboot_same_guids(&config[i].VendorGuid, &acpi1)) {
1707                         bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1708                             config[i].VendorTable;
1709                 }
1710         }
1711 }
1712 
1713 static void
1714 dboot_multiboot_get_fwtables(void)
1715 {
1716         multiboot_tag_new_acpi_t *nacpitagp;
1717         multiboot_tag_old_acpi_t *oacpitagp;
1718         multiboot_tag_efi64_t *efi64tagp = NULL;
1719         multiboot_tag_efi32_t *efi32tagp = NULL;
1720 
1721         /* no fw tables from multiboot 1 */
1722         if (multiboot_version != 2)
1723                 return;
1724 
1725         efi64tagp = (multiboot_tag_efi64_t *)
1726             dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64);
1727         if (efi64tagp != NULL) {
1728                 bi->bi_uefi_arch = XBI_UEFI_ARCH_64;
1729                 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1730                     efi64tagp->mb_pointer;
1731                 process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
1732                     efi64tagp->mb_pointer);
1733         } else {
1734                 efi32tagp = (multiboot_tag_efi32_t *)
1735                     dboot_multiboot2_find_tag(mb2_info,
1736                     MULTIBOOT_TAG_TYPE_EFI32);
1737                 if (efi32tagp != NULL) {
1738                         bi->bi_uefi_arch = XBI_UEFI_ARCH_32;
1739                         bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1740                             efi32tagp->mb_pointer;
1741                         process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
1742                             efi32tagp->mb_pointer);
1743                 }
1744         }
1745 
1746         /*
1747          * The ACPI RSDP can be found by scanning the BIOS memory areas or
1748          * from the EFI system table. The boot loader may pass in the address
1749          * it found the ACPI tables at.
1750          */
1751         nacpitagp = (multiboot_tag_new_acpi_t *)
1752             dboot_multiboot2_find_tag(mb2_info,
1753             MULTIBOOT_TAG_TYPE_ACPI_NEW);
1754         oacpitagp = (multiboot_tag_old_acpi_t *)
1755             dboot_multiboot2_find_tag(mb2_info,
1756             MULTIBOOT_TAG_TYPE_ACPI_OLD);
1757 
1758         if (nacpitagp != NULL) {
1759                 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1760                     &nacpitagp->mb_rsdp[0];
1761         } else if (oacpitagp != NULL) {
1762                 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1763                     &oacpitagp->mb_rsdp[0];
1764         }
1765 }
1766 
1767 /* print out EFI version string with newline */
1768 static void
1769 dboot_print_efi_version(uint32_t ver)
1770 {
1771         int rev;
1772 
1773         dboot_printf("%d.", EFI_REV_MAJOR(ver));
1774 
1775         rev = EFI_REV_MINOR(ver);
1776         if ((rev % 10) != 0) {
1777                 dboot_printf("%d.%d\n", rev / 10, rev % 10);
1778         } else {
1779                 dboot_printf("%d\n", rev / 10);
1780         }
1781 }
1782 
1783 static void
1784 print_efi32(EFI_SYSTEM_TABLE32 *efi)
1785 {
1786         uint16_t *data;
1787         EFI_CONFIGURATION_TABLE32 *conf;
1788         int i;
1789 
1790         dboot_printf("EFI32 signature: %llx\n",
1791             (unsigned long long)efi->Hdr.Signature);
1792         dboot_printf("EFI system version: ");
1793         dboot_print_efi_version(efi->Hdr.Revision);
1794         dboot_printf("EFI system vendor: ");
1795         data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1796         for (i = 0; data[i] != 0; i++)
1797                 dboot_printf("%c", (char)data[i]);
1798         dboot_printf("\nEFI firmware revision: ");
1799         dboot_print_efi_version(efi->FirmwareRevision);
1800         dboot_printf("EFI system table number of entries: %d\n",
1801             efi->NumberOfTableEntries);
1802         conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1803             efi->ConfigurationTable;
1804         for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1805                 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1806                     conf[i].VendorGuid.time_low,
1807                     conf[i].VendorGuid.time_mid,
1808                     conf[i].VendorGuid.time_hi_and_version,
1809                     conf[i].VendorGuid.clock_seq_hi_and_reserved,
1810                     conf[i].VendorGuid.clock_seq_low);
1811                 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1812                     conf[i].VendorGuid.node_addr[0],
1813                     conf[i].VendorGuid.node_addr[1],
1814                     conf[i].VendorGuid.node_addr[2],
1815                     conf[i].VendorGuid.node_addr[3],
1816                     conf[i].VendorGuid.node_addr[4],
1817                     conf[i].VendorGuid.node_addr[5]);
1818         }
1819 }
1820 
1821 static void
1822 print_efi64(EFI_SYSTEM_TABLE64 *efi)
1823 {
1824         uint16_t *data;
1825         EFI_CONFIGURATION_TABLE64 *conf;
1826         int i;
1827 
1828         dboot_printf("EFI64 signature: %llx\n",
1829             (unsigned long long)efi->Hdr.Signature);
1830         dboot_printf("EFI system version: ");
1831         dboot_print_efi_version(efi->Hdr.Revision);
1832         dboot_printf("EFI system vendor: ");
1833         data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1834         for (i = 0; data[i] != 0; i++)
1835                 dboot_printf("%c", (char)data[i]);
1836         dboot_printf("\nEFI firmware revision: ");
1837         dboot_print_efi_version(efi->FirmwareRevision);
1838         dboot_printf("EFI system table number of entries: %lld\n",
1839             efi->NumberOfTableEntries);
1840         conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1841             efi->ConfigurationTable;
1842         for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1843                 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1844                     conf[i].VendorGuid.time_low,
1845                     conf[i].VendorGuid.time_mid,
1846                     conf[i].VendorGuid.time_hi_and_version,
1847                     conf[i].VendorGuid.clock_seq_hi_and_reserved,
1848                     conf[i].VendorGuid.clock_seq_low);
1849                 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1850                     conf[i].VendorGuid.node_addr[0],
1851                     conf[i].VendorGuid.node_addr[1],
1852                     conf[i].VendorGuid.node_addr[2],
1853                     conf[i].VendorGuid.node_addr[3],
1854                     conf[i].VendorGuid.node_addr[4],
1855                     conf[i].VendorGuid.node_addr[5]);
1856         }
1857 }
1858 #endif /* !__xpv */
1859 
1860 /*
1861  * Simple memory allocator, allocates aligned physical memory.
1862  * Note that startup_kernel() only allocates memory, never frees.
1863  * Memory usage just grows in an upward direction.
1864  */
1865 static void *
1866 do_mem_alloc(uint32_t size, uint32_t align)
1867 {
1868         uint_t i;
1869         uint64_t best;
1870         uint64_t start;
1871         uint64_t end;
1872 
1873         /*
1874          * make sure size is a multiple of pagesize
1875          */
1876         size = RNDUP(size, MMU_PAGESIZE);
1877         next_avail_addr = RNDUP(next_avail_addr, align);
1878 
1879         /*
1880          * XXPV fixme joe
1881          *
1882          * a really large bootarchive that causes you to run out of memory
1883          * may cause this to blow up
1884          */
1885         /* LINTED E_UNEXPECTED_UINT_PROMOTION */
1886         best = (uint64_t)-size;
1887         for (i = 0; i < memlists_used; ++i) {
1888                 start = memlists[i].addr;
1889 #if defined(__xpv)
1890                 start += mfn_base;
1891 #endif
1892                 end = start + memlists[i].size;
1893 
1894                 /*
1895                  * did we find the desired address?
1896                  */
1897                 if (start <= next_avail_addr && next_avail_addr + size <= end) {
1898                         best = next_avail_addr;
1899                         goto done;
1900                 }
1901 
1902                 /*
1903                  * if not is this address the best so far?
1904                  */
1905                 if (start > next_avail_addr && start < best &&
1906                     RNDUP(start, align) + size <= end)
1907                         best = RNDUP(start, align);
1908         }
1909 
1910         /*
1911          * We didn't find exactly the address we wanted, due to going off the
1912          * end of a memory region. Return the best found memory address.
1913          */
1914 done:
1915         next_avail_addr = best + size;
1916 #if defined(__xpv)
1917         if (next_avail_addr > scratch_end)
1918                 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
1919                     "0x%lx", (ulong_t)next_avail_addr,
1920                     (ulong_t)scratch_end);
1921 #endif
1922         (void) memset((void *)(uintptr_t)best, 0, size);
1923         return ((void *)(uintptr_t)best);
1924 }
1925 
1926 void *
1927 mem_alloc(uint32_t size)
1928 {
1929         return (do_mem_alloc(size, MMU_PAGESIZE));
1930 }
1931 
1932 
1933 /*
1934  * Build page tables to map all of memory used so far as well as the kernel.
1935  */
1936 static void
1937 build_page_tables(void)
1938 {
1939         uint32_t psize;
1940         uint32_t level;
1941         uint32_t off;
1942         uint64_t start;
1943 #if !defined(__xpv)
1944         uint32_t i;
1945         uint64_t end;
1946 #endif  /* __xpv */
1947 
1948         /*
1949          * If we're on metal, we need to create the top level pagetable.
1950          */
1951 #if defined(__xpv)
1952         top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
1953 #else /* __xpv */
1954         top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1955 #endif /* __xpv */
1956         DBG((uintptr_t)top_page_table);
1957 
1958         /*
1959          * Determine if we'll use large mappings for kernel, then map it.
1960          */
1961         if (largepage_support) {
1962                 psize = lpagesize;
1963                 level = 1;
1964         } else {
1965                 psize = MMU_PAGESIZE;
1966                 level = 0;
1967         }
1968 
1969         DBG_MSG("Mapping kernel\n");
1970         DBG(ktext_phys);
1971         DBG(target_kernel_text);
1972         DBG(ksize);
1973         DBG(psize);
1974         for (off = 0; off < ksize; off += psize)
1975                 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1976 
1977         /*
1978          * The kernel will need a 1 page window to work with page tables
1979          */
1980         bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1981         DBG(bi->bi_pt_window);
1982         bi->bi_pte_to_pt_window =
1983             (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1984         DBG(bi->bi_pte_to_pt_window);
1985 
1986 #if defined(__xpv)
1987         if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
1988                 /* If this is a domU we're done. */
1989                 DBG_MSG("\nPage tables constructed\n");
1990                 return;
1991         }
1992 #endif /* __xpv */
1993 
1994         /*
1995          * We need 1:1 mappings for the lower 1M of memory to access
1996          * BIOS tables used by a couple of drivers during boot.
1997          *
1998          * The following code works because our simple memory allocator
1999          * only grows usage in an upwards direction.
2000          *
2001          * Note that by this point in boot some mappings for low memory
2002          * may already exist because we've already accessed device in low
2003          * memory.  (Specifically the video frame buffer and keyboard
2004          * status ports.)  If we're booting on raw hardware then GRUB
2005          * created these mappings for us.  If we're booting under a
2006          * hypervisor then we went ahead and remapped these devices into
2007          * memory allocated within dboot itself.
2008          */
2009         if (map_debug)
2010                 dboot_printf("1:1 map pa=0..1Meg\n");
2011         for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
2012 #if defined(__xpv)
2013                 map_ma_at_va(start, start, 0);
2014 #else /* __xpv */
2015                 map_pa_at_va(start, start, 0);
2016 #endif /* __xpv */
2017         }
2018 
2019 #if !defined(__xpv)
2020 
2021         for (i = 0; i < memlists_used; ++i) {
2022                 start = memlists[i].addr;
2023                 end = start + memlists[i].size;
2024 
2025                 if (map_debug)
2026                         dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2027                             start, end);
2028                 while (start < end && start < next_avail_addr) {
2029                         map_pa_at_va(start, start, 0);
2030                         start += MMU_PAGESIZE;
2031                 }
2032                 if (start >= next_avail_addr)
2033                         break;
2034         }
2035 
2036         /*
2037          * Map framebuffer memory as PT_NOCACHE as this is memory from a
2038          * device and therefore must not be cached.
2039          */
2040         if (fb != NULL && fb->framebuffer != 0) {
2041                 multiboot_tag_framebuffer_t *fb_tagp;
2042                 fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t)
2043                     fb->framebuffer;
2044 
2045                 start = fb_tagp->framebuffer_common.framebuffer_addr;
2046                 end = start + fb_tagp->framebuffer_common.framebuffer_height *
2047                     fb_tagp->framebuffer_common.framebuffer_pitch;
2048 
2049                 /* VGA text memory is already mapped. */
2050                 if (fb_tagp->framebuffer_common.framebuffer_type !=
2051                     MULTIBOOT_FRAMEBUFFER_TYPE_EGA_TEXT) {
2052                         uint64_t vaddr;
2053 
2054 #if defined(_BOOT_TARGET_amd64)
2055                         vaddr = start;
2056 #else
2057                         vaddr = (uintptr_t)mem_alloc(end - start);
2058 #endif
2059                         fb->boot_fb_virt = vaddr;
2060                         if (map_debug) {
2061                                 dboot_printf("FB map pa=%" PRIx64 "..%"
2062                                     PRIx64 "\n", start, end);
2063                         }
2064 
2065                         pte_bits |= PT_NOCACHE;
2066                         if (PAT_support != 0)
2067                                 pte_bits |= PT_PAT_4K;
2068 
2069                         while (start < end) {
2070                                 map_pa_at_va(start, vaddr, 0);
2071                                 start += MMU_PAGESIZE;
2072                                 vaddr += MMU_PAGESIZE;
2073                         }
2074                         pte_bits &= ~PT_NOCACHE;
2075                         if (PAT_support != 0)
2076                                 pte_bits &= ~PT_PAT_4K;
2077                 }
2078         }
2079 #endif /* !__xpv */
2080 
2081         DBG_MSG("\nPage tables constructed\n");
2082 }
2083 
2084 #define NO_MULTIBOOT    \
2085 "multiboot is no longer used to boot the Solaris Operating System.\n\
2086 The grub entry should be changed to:\n\
2087 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
2088 module$ /platform/i86pc/$ISADIR/boot_archive\n\
2089 See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
2090 
2091 static void
2092 dboot_init_xboot_consinfo(void)
2093 {
2094         uintptr_t addr;
2095         /*
2096          * boot info must be 16 byte aligned for 64 bit kernel ABI
2097          */
2098         addr = (uintptr_t)boot_info;
2099         addr = (addr + 0xf) & ~0xf;
2100         bi = (struct xboot_info *)addr;
2101 
2102 #if !defined(__xpv)
2103         /*
2104          * fb info must be 16 byte aligned for 64 bit kernel ABI
2105          */
2106         addr = (uintptr_t)framebuffer;
2107         addr = (addr + 0xf) & ~0xf;
2108         fb = (boot_framebuffer_t *)addr;
2109         bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb;
2110 
2111         switch (multiboot_version) {
2112         case 1:
2113                 dboot_multiboot1_xboot_consinfo();
2114                 break;
2115         case 2:
2116                 dboot_multiboot2_xboot_consinfo();
2117                 break;
2118         default:
2119                 dboot_panic("Unknown multiboot version: %d\n",
2120                     multiboot_version);
2121                 break;
2122         }
2123         /*
2124          * Lookup environment module for the console. Complete module list
2125          * will be built after console setup.
2126          */
2127         dboot_find_console_modules();
2128 #endif
2129 }
2130 
2131 /*
2132  * Set up basic data from the boot loader.
2133  * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
2134  * 32-bit dboot code setup used to set up and start 64-bit kernel.
2135  * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
2136  * start 64-bit illumos kernel.
2137  */
2138 static void
2139 dboot_loader_init(void)
2140 {
2141 #if !defined(__xpv)
2142         mb_info = NULL;
2143         mb2_info = NULL;
2144 
2145         switch (mb_magic) {
2146         case MB_BOOTLOADER_MAGIC:
2147                 multiboot_version = 1;
2148                 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
2149 #if defined(_BOOT_TARGET_amd64)
2150                 load_addr = mb_header.load_addr;
2151 #endif
2152                 break;
2153 
2154         case MULTIBOOT2_BOOTLOADER_MAGIC:
2155                 multiboot_version = 2;
2156                 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
2157                 mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
2158 #if defined(_BOOT_TARGET_amd64)
2159                 load_addr = mb2_load_addr;
2160 #endif
2161                 break;
2162 
2163         default:
2164                 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
2165                 break;
2166         }
2167 #endif  /* !defined(__xpv) */
2168 }
2169 
2170 /* Extract the kernel command line from [multi]boot information. */
2171 static char *
2172 dboot_loader_cmdline(void)
2173 {
2174         char *line = NULL;
2175 
2176 #if defined(__xpv)
2177         line = (char *)xen_info->cmd_line;
2178 #else /* __xpv */
2179 
2180         switch (multiboot_version) {
2181         case 1:
2182                 if (mb_info->flags & MB_INFO_CMDLINE)
2183                         line = (char *)mb_info->cmdline;
2184                 break;
2185 
2186         case 2:
2187                 line = dboot_multiboot2_cmdline(mb2_info);
2188                 break;
2189 
2190         default:
2191                 dboot_panic("Unknown multiboot version: %d\n",
2192                     multiboot_version);
2193                 break;
2194         }
2195 
2196 #endif /* __xpv */
2197 
2198         /*
2199          * Make sure we have valid pointer so the string operations
2200          * will not crash us.
2201          */
2202         if (line == NULL)
2203                 line = "";
2204 
2205         return (line);
2206 }
2207 
2208 static char *
2209 dboot_loader_name(void)
2210 {
2211 #if defined(__xpv)
2212         return (NULL);
2213 #else /* __xpv */
2214         multiboot_tag_string_t *tag;
2215 
2216         switch (multiboot_version) {
2217         case 1:
2218                 return ((char *)mb_info->boot_loader_name);
2219 
2220         case 2:
2221                 tag = dboot_multiboot2_find_tag(mb2_info,
2222                     MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
2223                 return (tag->mb_string);
2224         default:
2225                 dboot_panic("Unknown multiboot version: %d\n",
2226                     multiboot_version);
2227                 break;
2228         }
2229 
2230         return (NULL);
2231 #endif /* __xpv */
2232 }
2233 
2234 /*
2235  * startup_kernel has a pretty simple job. It builds pagetables which reflect
2236  * 1:1 mappings for all memory in use. It then also adds mappings for
2237  * the kernel nucleus at virtual address of target_kernel_text using large page
2238  * mappings. The page table pages are also accessible at 1:1 mapped
2239  * virtual addresses.
2240  */
2241 /*ARGSUSED*/
2242 void
2243 startup_kernel(void)
2244 {
2245         char *cmdline;
2246         char *bootloader;
2247 #if defined(__xpv)
2248         physdev_set_iopl_t set_iopl;
2249 #endif /* __xpv */
2250 
2251         bcons_init(NULL);       /* Set very early console to ttya. */
2252         dboot_loader_init();
2253         /*
2254          * At this point we are executing in a 32 bit real mode.
2255          */
2256 
2257         bootloader = dboot_loader_name();
2258         cmdline = dboot_loader_cmdline();
2259 
2260 #if defined(__xpv)
2261         /*
2262          * For dom0, before we initialize the console subsystem we'll
2263          * need to enable io operations, so set I/O priveldge level to 1.
2264          */
2265         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2266                 set_iopl.iopl = 1;
2267                 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
2268         }
2269 #endif /* __xpv */
2270 
2271         dboot_init_xboot_consinfo();
2272         bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
2273         bcons_init(bi);         /* Now we can set the real console. */
2274 
2275         prom_debug = (find_boot_prop("prom_debug") != NULL);
2276         map_debug = (find_boot_prop("map_debug") != NULL);
2277 
2278 #if !defined(__xpv)
2279         dboot_multiboot_get_fwtables();
2280 #endif
2281         DBG_MSG("\n\nillumos prekernel set: ");
2282         DBG_MSG(cmdline);
2283         DBG_MSG("\n");
2284 
2285         if (bootloader != NULL && prom_debug) {
2286                 dboot_printf("Kernel loaded by: %s\n", bootloader);
2287 #if !defined(__xpv)
2288                 dboot_printf("Using multiboot %d boot protocol.\n",
2289                     multiboot_version);
2290 #endif
2291         }
2292 
2293         if (strstr(cmdline, "multiboot") != NULL) {
2294                 dboot_panic(NO_MULTIBOOT);
2295         }
2296 
2297         DBG((uintptr_t)bi);
2298 #if !defined(__xpv)
2299         DBG((uintptr_t)mb_info);
2300         DBG((uintptr_t)mb2_info);
2301         if (mb2_info != NULL)
2302                 DBG(mb2_info->mbi_total_size);
2303         DBG(bi->bi_acpi_rsdp);
2304         DBG(bi->bi_smbios);
2305         DBG(bi->bi_uefi_arch);
2306         DBG(bi->bi_uefi_systab);
2307 
2308         if (bi->bi_uefi_systab && prom_debug) {
2309                 if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) {
2310                         print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
2311                             bi->bi_uefi_systab);
2312                 } else {
2313                         print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
2314                             bi->bi_uefi_systab);
2315                 }
2316         }
2317 #endif
2318 
2319         /*
2320          * Need correct target_kernel_text value
2321          */
2322 #if defined(_BOOT_TARGET_amd64)
2323         target_kernel_text = KERNEL_TEXT_amd64;
2324 #elif defined(__xpv)
2325         target_kernel_text = KERNEL_TEXT_i386_xpv;
2326 #else
2327         target_kernel_text = KERNEL_TEXT_i386;
2328 #endif
2329         DBG(target_kernel_text);
2330 
2331 #if defined(__xpv)
2332 
2333         /*
2334          * XXPV Derive this stuff from CPUID / what the hypervisor has enabled
2335          */
2336 
2337 #if defined(_BOOT_TARGET_amd64)
2338         /*
2339          * 64-bit hypervisor.
2340          */
2341         amd64_support = 1;
2342         pae_support = 1;
2343 
2344 #else   /* _BOOT_TARGET_amd64 */
2345 
2346         /*
2347          * See if we are running on a PAE Hypervisor
2348          */
2349         {
2350                 xen_capabilities_info_t caps;
2351 
2352                 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2353                         dboot_panic("HYPERVISOR_xen_version(caps) failed");
2354                 caps[sizeof (caps) - 1] = 0;
2355                 if (prom_debug)
2356                         dboot_printf("xen capabilities %s\n", caps);
2357                 if (strstr(caps, "x86_32p") != NULL)
2358                         pae_support = 1;
2359         }
2360 
2361 #endif  /* _BOOT_TARGET_amd64 */
2362         {
2363                 xen_platform_parameters_t p;
2364 
2365                 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2366                         dboot_panic("HYPERVISOR_xen_version(parms) failed");
2367                 DBG(p.virt_start);
2368                 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2369         }
2370 
2371         /*
2372          * The hypervisor loads stuff starting at 1Gig
2373          */
2374         mfn_base = ONE_GIG;
2375         DBG(mfn_base);
2376 
2377         /*
2378          * enable writable page table mode for the hypervisor
2379          */
2380         if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2381             VMASST_TYPE_writable_pagetables) < 0)
2382                 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2383 
2384         /*
2385          * check for NX support
2386          */
2387         if (pae_support) {
2388                 uint32_t eax = 0x80000000;
2389                 uint32_t edx = get_cpuid_edx(&eax);
2390 
2391                 if (eax >= 0x80000001) {
2392                         eax = 0x80000001;
2393                         edx = get_cpuid_edx(&eax);
2394                         if (edx & CPUID_AMD_EDX_NX)
2395                                 NX_support = 1;
2396                 }
2397         }
2398 
2399         /*
2400          * check for PAT support
2401          */
2402         {
2403                 uint32_t eax = 1;
2404                 uint32_t edx = get_cpuid_edx(&eax);
2405 
2406                 if (edx & CPUID_INTC_EDX_PAT)
2407                         PAT_support = 1;
2408         }
2409 #if !defined(_BOOT_TARGET_amd64)
2410 
2411         /*
2412          * The 32-bit hypervisor uses segmentation to protect itself from
2413          * guests. This means when a guest attempts to install a flat 4GB
2414          * code or data descriptor the 32-bit hypervisor will protect itself
2415          * by silently shrinking the segment such that if the guest attempts
2416          * any access where the hypervisor lives a #gp fault is generated.
2417          * The problem is that some applications expect a full 4GB flat
2418          * segment for their current thread pointer and will use negative
2419          * offset segment wrap around to access data. TLS support in linux
2420          * brand is one example of this.
2421          *
2422          * The 32-bit hypervisor can catch the #gp fault in these cases
2423          * and emulate the access without passing the #gp fault to the guest
2424          * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2425          * Seems like this should have been the default.
2426          * Either way, we want the hypervisor -- and not Solaris -- to deal
2427          * to deal with emulating these accesses.
2428          */
2429         if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2430             VMASST_TYPE_4gb_segments) < 0)
2431                 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2432 #endif  /* !_BOOT_TARGET_amd64 */
2433 
2434 #else   /* __xpv */
2435 
2436         /*
2437          * use cpuid to enable MMU features
2438          */
2439         if (have_cpuid()) {
2440                 uint32_t eax, edx;
2441 
2442                 eax = 1;
2443                 edx = get_cpuid_edx(&eax);
2444                 if (edx & CPUID_INTC_EDX_PSE)
2445                         largepage_support = 1;
2446                 if (edx & CPUID_INTC_EDX_PGE)
2447                         pge_support = 1;
2448                 if (edx & CPUID_INTC_EDX_PAE)
2449                         pae_support = 1;
2450                 if (edx & CPUID_INTC_EDX_PAT)
2451                         PAT_support = 1;
2452 
2453                 eax = 0x80000000;
2454                 edx = get_cpuid_edx(&eax);
2455                 if (eax >= 0x80000001) {
2456                         eax = 0x80000001;
2457                         edx = get_cpuid_edx(&eax);
2458                         if (edx & CPUID_AMD_EDX_LM)
2459                                 amd64_support = 1;
2460                         if (edx & CPUID_AMD_EDX_NX)
2461                                 NX_support = 1;
2462                 }
2463         } else {
2464                 dboot_printf("cpuid not supported\n");
2465         }
2466 #endif /* __xpv */
2467 
2468 
2469 #if defined(_BOOT_TARGET_amd64)
2470         if (amd64_support == 0)
2471                 dboot_panic("long mode not supported, rebooting");
2472         else if (pae_support == 0)
2473                 dboot_panic("long mode, but no PAE; rebooting");
2474 #else
2475         /*
2476          * Allow the command line to over-ride use of PAE for 32 bit.
2477          */
2478         if (strstr(cmdline, "disablePAE=true") != NULL) {
2479                 pae_support = 0;
2480                 NX_support = 0;
2481                 amd64_support = 0;
2482         }
2483 #endif
2484 
2485         /*
2486          * initialize the simple memory allocator
2487          */
2488         init_mem_alloc();
2489 
2490 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2491         /*
2492          * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2493          */
2494         if (max_mem < FOUR_GIG && NX_support == 0)
2495                 pae_support = 0;
2496 #endif
2497 
2498         /*
2499          * configure mmu information
2500          */
2501         if (pae_support) {
2502                 shift_amt = shift_amt_pae;
2503                 ptes_per_table = 512;
2504                 pte_size = 8;
2505                 lpagesize = TWO_MEG;
2506 #if defined(_BOOT_TARGET_amd64)
2507                 top_level = 3;
2508 #else
2509                 top_level = 2;
2510 #endif
2511         } else {
2512                 pae_support = 0;
2513                 NX_support = 0;
2514                 shift_amt = shift_amt_nopae;
2515                 ptes_per_table = 1024;
2516                 pte_size = 4;
2517                 lpagesize = FOUR_MEG;
2518                 top_level = 1;
2519         }
2520 
2521         DBG(PAT_support);
2522         DBG(pge_support);
2523         DBG(NX_support);
2524         DBG(largepage_support);
2525         DBG(amd64_support);
2526         DBG(top_level);
2527         DBG(pte_size);
2528         DBG(ptes_per_table);
2529         DBG(lpagesize);
2530 
2531 #if defined(__xpv)
2532         ktext_phys = ONE_GIG;           /* from UNIX Mapfile */
2533 #else
2534         ktext_phys = FOUR_MEG;          /* from UNIX Mapfile */
2535 #endif
2536 
2537 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2538         /*
2539          * For grub, copy kernel bits from the ELF64 file to final place.
2540          */
2541         DBG_MSG("\nAllocating nucleus pages.\n");
2542         ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2543 
2544         if (ktext_phys == 0)
2545                 dboot_panic("failed to allocate aligned kernel memory");
2546         DBG(load_addr);
2547         if (dboot_elfload64(load_addr) != 0)
2548                 dboot_panic("failed to parse kernel ELF image, rebooting");
2549 #endif
2550 
2551         DBG(ktext_phys);
2552 
2553         /*
2554          * Allocate page tables.
2555          */
2556         build_page_tables();
2557 
2558         /*
2559          * return to assembly code to switch to running kernel
2560          */
2561         entry_addr_low = (uint32_t)target_kernel_text;
2562         DBG(entry_addr_low);
2563         bi->bi_use_largepage = largepage_support;
2564         bi->bi_use_pae = pae_support;
2565         bi->bi_use_pge = pge_support;
2566         bi->bi_use_nx = NX_support;
2567 
2568 #if defined(__xpv)
2569 
2570         bi->bi_next_paddr = next_avail_addr - mfn_base;
2571         DBG(bi->bi_next_paddr);
2572         bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2573         DBG(bi->bi_next_vaddr);
2574 
2575         /*
2576          * unmap unused pages in start area to make them available for DMA
2577          */
2578         while (next_avail_addr < scratch_end) {
2579                 (void) HYPERVISOR_update_va_mapping(next_avail_addr,
2580                     0, UVMF_INVLPG | UVMF_LOCAL);
2581                 next_avail_addr += MMU_PAGESIZE;
2582         }
2583 
2584         bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info;
2585         DBG((uintptr_t)HYPERVISOR_shared_info);
2586         bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2587         bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2588 
2589 #else /* __xpv */
2590 
2591         bi->bi_next_paddr = next_avail_addr;
2592         DBG(bi->bi_next_paddr);
2593         bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2594         DBG(bi->bi_next_vaddr);
2595         bi->bi_mb_version = multiboot_version;
2596 
2597         switch (multiboot_version) {
2598         case 1:
2599                 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info;
2600                 break;
2601         case 2:
2602                 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info;
2603                 break;
2604         default:
2605                 dboot_panic("Unknown multiboot version: %d\n",
2606                     multiboot_version);
2607                 break;
2608         }
2609         bi->bi_top_page_table = (uintptr_t)top_page_table;
2610 
2611 #endif /* __xpv */
2612 
2613         bi->bi_kseg_size = FOUR_MEG;
2614         DBG(bi->bi_kseg_size);
2615 
2616 #ifndef __xpv
2617         if (map_debug)
2618                 dump_tables();
2619 #endif
2620 
2621 #ifndef __xpv
2622         /* Update boot info with FB data */
2623         fb->cursor.origin.x = fb_info.cursor.origin.x;
2624         fb->cursor.origin.y = fb_info.cursor.origin.y;
2625         fb->cursor.pos.x = fb_info.cursor.pos.x;
2626         fb->cursor.pos.y = fb_info.cursor.pos.y;
2627         fb->cursor.visible = fb_info.cursor.visible;
2628 #endif
2629 
2630         DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2631 }