io-lx-public-vs-joyent Wdiff usr/src/uts/common/vm/seg_kmem.c

Print this page

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_kmem.c
          +++ new/usr/src/uts/common/vm/seg_kmem.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2016 Joyent, Inc.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/t_lock.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/sysmacros.h>
  30   30  #include <sys/tuneable.h>
  31   31  #include <sys/systm.h>
  32   32  #include <sys/vm.h>
  33   33  #include <sys/kmem.h>
  34   34  #include <sys/vmem.h>
  35   35  #include <sys/mman.h>
  36   36  #include <sys/cmn_err.h>
  37   37  #include <sys/debug.h>
  38   38  #include <sys/dumphdr.h>
  39   39  #include <sys/bootconf.h>
  40   40  #include <sys/lgrp.h>
  41   41  #include <vm/seg_kmem.h>
  42   42  #include <vm/hat.h>
  43   43  #include <vm/page.h>
  44   44  #include <vm/vm_dep.h>
  45   45  #include <vm/faultcode.h>
  46   46  #include <sys/promif.h>
  47   47  #include <vm/seg_kp.h>
  48   48  #include <sys/bitmap.h>
  49   49  #include <sys/mem_cage.h>
  50   50  
  51   51  #ifdef __sparc
  52   52  #include <sys/ivintr.h>
  53   53  #include <sys/panic.h>
  54   54  #endif
  55   55  
  56   56  /*
  57   57   * seg_kmem is the primary kernel memory segment driver.  It
  58   58   * maps the kernel heap [kernelheap, ekernelheap), module text,
  59   59   * and all memory which was allocated before the VM was initialized
  60   60   * into kas.
  61   61   *
  62   62   * Pages which belong to seg_kmem are hashed into &kvp vnode at
  63   63   * an offset equal to (u_offset_t)virt_addr, and have p_lckcnt >= 1.
  64   64   * They must never be paged out since segkmem_fault() is a no-op to
  65   65   * prevent recursive faults.
  66   66   *
  67   67   * Currently, seg_kmem pages are sharelocked (p_sharelock == 1) on
  68   68   * __x86 and are unlocked (p_sharelock == 0) on __sparc.  Once __x86
  69   69   * supports relocation the #ifdef kludges can be removed.
  70   70   *
  71   71   * seg_kmem pages may be subject to relocation by page_relocate(),
  72   72   * provided that the HAT supports it; if this is so, segkmem_reloc
  73   73   * will be set to a nonzero value. All boot time allocated memory as
  74   74   * well as static memory is considered off limits to relocation.
  75   75   * Pages are "relocatable" if p_state does not have P_NORELOC set, so
  76   76   * we request P_NORELOC pages for memory that isn't safe to relocate.
  77   77   *
  78   78   * The kernel heap is logically divided up into four pieces:
  79   79   *
  80   80   *   heap32_arena is for allocations that require 32-bit absolute
  81   81   *   virtual addresses (e.g. code that uses 32-bit pointers/offsets).
  82   82   *
  83   83   *   heap_core is for allocations that require 2GB *relative*
  84   84   *   offsets; in other words all memory from heap_core is within
  85   85   *   2GB of all other memory from the same arena. This is a requirement
  86   86   *   of the addressing modes of some processors in supervisor code.
  87   87   *
  88   88   *   heap_arena is the general heap arena.
  89   89   *
  90   90   *   static_arena is the static memory arena.  Allocations from it
  91   91   *   are not subject to relocation so it is safe to use the memory
  92   92   *   physical address as well as the virtual address (e.g. the VA to
  93   93   *   PA translations are static).  Caches may import from static_arena;
  94   94   *   all other static memory allocations should use static_alloc_arena.
  95   95   *
  96   96   * On some platforms which have limited virtual address space, seg_kmem
  97   97   * may share [kernelheap, ekernelheap) with seg_kp; if this is so,
  98   98   * segkp_bitmap is non-NULL, and each bit represents a page of virtual
  99   99   * address space which is actually seg_kp mapped.
 100  100   */
 101  101  
 102  102  extern ulong_t *segkp_bitmap;   /* Is set if segkp is from the kernel heap */
 103  103  
 104  104  char *kernelheap;               /* start of primary kernel heap */
 105  105  char *ekernelheap;              /* end of primary kernel heap */
 106  106  struct seg kvseg;               /* primary kernel heap segment */
 107  107  struct seg kvseg_core;          /* "core" kernel heap segment */
 108  108  struct seg kzioseg;             /* Segment for zio mappings */
 109  109  vmem_t *heap_arena;             /* primary kernel heap arena */
 110  110  vmem_t *heap_core_arena;        /* core kernel heap arena */
 111  111  char *heap_core_base;           /* start of core kernel heap arena */
 112  112  char *heap_lp_base;             /* start of kernel large page heap arena */
 113  113  char *heap_lp_end;              /* end of kernel large page heap arena */
 114  114  vmem_t *hat_memload_arena;      /* HAT translation data */
 115  115  struct seg kvseg32;             /* 32-bit kernel heap segment */
 116  116  vmem_t *heap32_arena;           /* 32-bit kernel heap arena */
 117  117  vmem_t *heaptext_arena;         /* heaptext arena */
 118  118  struct as kas;                  /* kernel address space */
 119  119  int segkmem_reloc;              /* enable/disable relocatable segkmem pages */
 120  120  vmem_t *static_arena;           /* arena for caches to import static memory */
 121  121  vmem_t *static_alloc_arena;     /* arena for allocating static memory */
 122  122  vmem_t *zio_arena = NULL;       /* arena for allocating zio memory */
 123  123  vmem_t *zio_alloc_arena = NULL; /* arena for allocating zio memory */
 124  124  
 125  125  /*
 126  126   * seg_kmem driver can map part of the kernel heap with large pages.
 127  127   * Currently this functionality is implemented for sparc platforms only.
 128  128   *
 129  129   * The large page size "segkmem_lpsize" for kernel heap is selected in the
 130  130   * platform specific code. It can also be modified via /etc/system file.
 131  131   * Setting segkmem_lpsize to PAGESIZE in /etc/system disables usage of large
 132  132   * pages for kernel heap. "segkmem_lpshift" is adjusted appropriately to
 133  133   * match segkmem_lpsize.
 134  134   *
 135  135   * At boot time we carve from kernel heap arena a range of virtual addresses
 136  136   * that will be used for large page mappings. This range [heap_lp_base,
 137  137   * heap_lp_end) is set up as a separate vmem arena - "heap_lp_arena". We also
 138  138   * create "kmem_lp_arena" that caches memory already backed up by large
 139  139   * pages. kmem_lp_arena imports virtual segments from heap_lp_arena.
 140  140   */
 141  141  
 142  142  size_t  segkmem_lpsize;
 143  143  static  uint_t  segkmem_lpshift = PAGESHIFT;
 144  144  int     segkmem_lpszc = 0;
 145  145  
 146  146  size_t  segkmem_kmemlp_quantum = 0x400000;      /* 4MB */
 147  147  size_t  segkmem_heaplp_quantum;
 148  148  vmem_t *heap_lp_arena;
 149  149  static  vmem_t *kmem_lp_arena;
 150  150  static  vmem_t *segkmem_ppa_arena;
 151  151  static  segkmem_lpcb_t segkmem_lpcb;
 152  152  
 153  153  /*
 154  154   * We use "segkmem_kmemlp_max" to limit the total amount of physical memory
 155  155   * consumed by the large page heap. By default this parameter is set to 1/8 of
 156  156   * physmem but can be adjusted through /etc/system either directly or
 157  157   * indirectly by setting "segkmem_kmemlp_pcnt" to the percent of physmem
 158  158   * we allow for large page heap.
 159  159   */
 160  160  size_t  segkmem_kmemlp_max;
 161  161  static  uint_t  segkmem_kmemlp_pcnt;
 162  162  
 163  163  /*
 164  164   * Getting large pages for kernel heap could be problematic due to
 165  165   * physical memory fragmentation. That's why we allow to preallocate
 166  166   * "segkmem_kmemlp_min" bytes at boot time.
 167  167   */
 168  168  static  size_t  segkmem_kmemlp_min;
 169  169  
 170  170  /*
 171  171   * Throttling is used to avoid expensive tries to allocate large pages
 172  172   * for kernel heap when a lot of succesive attempts to do so fail.
 173  173   */
 174  174  static  ulong_t segkmem_lpthrottle_max = 0x400000;
 175  175  static  ulong_t segkmem_lpthrottle_start = 0x40;
 176  176  static  ulong_t segkmem_use_lpthrottle = 1;
 177  177  
 178  178  /*
 179  179   * Freed pages accumulate on a garbage list until segkmem is ready,
 180  180   * at which point we call segkmem_gc() to free it all.
 181  181   */
 182  182  typedef struct segkmem_gc_list {
 183  183          struct segkmem_gc_list  *gc_next;
 184  184          vmem_t                  *gc_arena;
 185  185          size_t                  gc_size;
 186  186  } segkmem_gc_list_t;
 187  187  
 188  188  static segkmem_gc_list_t *segkmem_gc_list;
 189  189  
 190  190  /*
 191  191   * Allocations from the hat_memload arena add VM_MEMLOAD to their
 192  192   * vmflags so that segkmem_xalloc() can inform the hat layer that it needs
 193  193   * to take steps to prevent infinite recursion.  HAT allocations also
 194  194   * must be non-relocatable to prevent recursive page faults.
 195  195   */
 196  196  static void *
 197  197  hat_memload_alloc(vmem_t *vmp, size_t size, int flags)
 198  198  {
 199  199          flags |= (VM_MEMLOAD | VM_NORELOC);
 200  200          return (segkmem_alloc(vmp, size, flags));
 201  201  }
 202  202  
 203  203  /*
 204  204   * Allocations from static_arena arena (or any other arena that uses
 205  205   * segkmem_alloc_permanent()) require non-relocatable (permanently
 206  206   * wired) memory pages, since these pages are referenced by physical
 207  207   * as well as virtual address.
 208  208   */
 209  209  void *
 210  210  segkmem_alloc_permanent(vmem_t *vmp, size_t size, int flags)
 211  211  {
 212  212          return (segkmem_alloc(vmp, size, flags | VM_NORELOC));
 213  213  }
 214  214  
 215  215  /*
 216  216   * Initialize kernel heap boundaries.
 217  217   */
 218  218  void
 219  219  kernelheap_init(
 220  220          void *heap_start,
 221  221          void *heap_end,
 222  222          char *first_avail,
 223  223          void *core_start,
 224  224          void *core_end)
 225  225  {
 226  226          uintptr_t textbase;
 227  227          size_t core_size;
 228  228          size_t heap_size;
 229  229          vmem_t *heaptext_parent;
 230  230          size_t  heap_lp_size = 0;
 231  231  #ifdef __sparc
 232  232          size_t kmem64_sz = kmem64_aligned_end - kmem64_base;
 233  233  #endif  /* __sparc */
 234  234  
 235  235          kernelheap = heap_start;
 236  236          ekernelheap = heap_end;
 237  237  
 238  238  #ifdef __sparc
 239  239          heap_lp_size = (((uintptr_t)heap_end - (uintptr_t)heap_start) / 4);
 240  240          /*
 241  241           * Bias heap_lp start address by kmem64_sz to reduce collisions
 242  242           * in 4M kernel TSB between kmem64 area and heap_lp
 243  243           */
 244  244          kmem64_sz = P2ROUNDUP(kmem64_sz, MMU_PAGESIZE256M);
 245  245          if (kmem64_sz <= heap_lp_size / 2)
 246  246                  heap_lp_size -= kmem64_sz;
 247  247          heap_lp_base = ekernelheap - heap_lp_size;
 248  248          heap_lp_end = heap_lp_base + heap_lp_size;
 249  249  #endif  /* __sparc */
 250  250  
 251  251          /*
 252  252           * If this platform has a 'core' heap area, then the space for
 253  253           * overflow module text should be carved out of the end of that
 254  254           * heap.  Otherwise, it gets carved out of the general purpose
 255  255           * heap.
 256  256           */
 257  257          core_size = (uintptr_t)core_end - (uintptr_t)core_start;
 258  258          if (core_size > 0) {
 259  259                  ASSERT(core_size >= HEAPTEXT_SIZE);
 260  260                  textbase = (uintptr_t)core_end - HEAPTEXT_SIZE;
 261  261                  core_size -= HEAPTEXT_SIZE;
 262  262          }
 263  263  #ifndef __sparc
 264  264          else {
 265  265                  ekernelheap -= HEAPTEXT_SIZE;
 266  266                  textbase = (uintptr_t)ekernelheap;
 267  267          }
 268  268  #endif
 269  269  
 270  270          heap_size = (uintptr_t)ekernelheap - (uintptr_t)kernelheap;
 271  271          heap_arena = vmem_init("heap", kernelheap, heap_size, PAGESIZE,
 272  272              segkmem_alloc, segkmem_free);
 273  273  
 274  274          if (core_size > 0) {
 275  275                  heap_core_arena = vmem_create("heap_core", core_start,
 276  276                      core_size, PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP);
 277  277                  heap_core_base = core_start;
 278  278          } else {
 279  279                  heap_core_arena = heap_arena;
 280  280                  heap_core_base = kernelheap;
 281  281          }
 282  282  
 283  283          /*
 284  284           * reserve space for the large page heap. If large pages for kernel
 285  285           * heap is enabled large page heap arean will be created later in the
 286  286           * boot sequence in segkmem_heap_lp_init(). Otherwise the allocated
 287  287           * range will be returned back to the heap_arena.
 288  288           */
 289  289          if (heap_lp_size) {
 290  290                  (void) vmem_xalloc(heap_arena, heap_lp_size, PAGESIZE, 0, 0,
 291  291                      heap_lp_base, heap_lp_end,
 292  292                      VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 293  293          }
 294  294  
 295  295          /*
 296  296           * Remove the already-spoken-for memory range [kernelheap, first_avail).
 297  297           */
 298  298          (void) vmem_xalloc(heap_arena, first_avail - kernelheap, PAGESIZE,
 299  299              0, 0, kernelheap, first_avail, VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 300  300  
 301  301  #ifdef __sparc
 302  302          heap32_arena = vmem_create("heap32", (void *)SYSBASE32,
 303  303              SYSLIMIT32 - SYSBASE32 - HEAPTEXT_SIZE, PAGESIZE, NULL,
 304  304              NULL, NULL, 0, VM_SLEEP);
 305  305          /*
 306  306           * Prom claims the physical and virtual resources used by panicbuf
 307  307           * and inter_vec_table. So reserve space for panicbuf, intr_vec_table,
 308  308           * reserved interrupt vector data structures from 32-bit heap.
 309  309           */
 310  310          (void) vmem_xalloc(heap32_arena, PANICBUFSIZE, PAGESIZE, 0, 0,
 311  311              panicbuf, panicbuf + PANICBUFSIZE,
 312  312              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 313  313  
 314  314          (void) vmem_xalloc(heap32_arena, IVSIZE, PAGESIZE, 0, 0,
 315  315              intr_vec_table, (caddr_t)intr_vec_table + IVSIZE,
 316  316              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 317  317  
 318  318          textbase = SYSLIMIT32 - HEAPTEXT_SIZE;
 319  319          heaptext_parent = NULL;
 320  320  #else   /* __sparc */
 321  321          heap32_arena = heap_core_arena;
 322  322          heaptext_parent = heap_core_arena;
 323  323  #endif  /* __sparc */
 324  324  
 325  325          heaptext_arena = vmem_create("heaptext", (void *)textbase,
 326  326              HEAPTEXT_SIZE, PAGESIZE, NULL, NULL, heaptext_parent, 0, VM_SLEEP);
 327  327  
 328  328          /*
 329  329           * Create a set of arenas for memory with static translations
 330  330           * (e.g. VA -> PA translations cannot change).  Since using
 331  331           * kernel pages by physical address implies it isn't safe to
 332  332           * walk across page boundaries, the static_arena quantum must
 333  333           * be PAGESIZE.  Any kmem caches that require static memory
 334  334           * should source from static_arena, while direct allocations
 335  335           * should only use static_alloc_arena.
 336  336           */
 337  337          static_arena = vmem_create("static", NULL, 0, PAGESIZE,
 338  338              segkmem_alloc_permanent, segkmem_free, heap_arena, 0, VM_SLEEP);
 339  339          static_alloc_arena = vmem_create("static_alloc", NULL, 0,
 340  340              sizeof (uint64_t), vmem_alloc, vmem_free, static_arena,
 341  341              0, VM_SLEEP);
 342  342  
 343  343          /*
 344  344           * Create an arena for translation data (ptes, hmes, or hblks).
 345  345           * We need an arena for this because hat_memload() is essential
 346  346           * to vmem_populate() (see comments in common/os/vmem.c).
 347  347           *
 348  348           * Note: any kmem cache that allocates from hat_memload_arena
 349  349           * must be created as a KMC_NOHASH cache (i.e. no external slab
 350  350           * and bufctl structures to allocate) so that slab creation doesn't
 351  351           * require anything more than a single vmem_alloc().
 352  352           */
 353  353          hat_memload_arena = vmem_create("hat_memload", NULL, 0, PAGESIZE,
 354  354              hat_memload_alloc, segkmem_free, heap_arena, 0,
 355  355              VM_SLEEP | VMC_POPULATOR | VMC_DUMPSAFE);
 356  356  }
 357  357  
 358  358  void
 359  359  boot_mapin(caddr_t addr, size_t size)
 360  360  {
 361  361          caddr_t  eaddr;
 362  362          page_t  *pp;
 363  363          pfn_t    pfnum;
 364  364  
 365  365          if (page_resv(btop(size), KM_NOSLEEP) == 0)
 366  366                  panic("boot_mapin: page_resv failed");
 367  367  
 368  368          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
 369  369                  pfnum = va_to_pfn(addr);
 370  370                  if (pfnum == PFN_INVALID)
 371  371                          continue;
 372  372                  if ((pp = page_numtopp_nolock(pfnum)) == NULL)
 373  373                          panic("boot_mapin(): No pp for pfnum = %lx", pfnum);
 374  374  
 375  375                  /*
 376  376                   * must break up any large pages that may have constituent
 377  377                   * pages being utilized for BOP_ALLOC()'s before calling
 378  378                   * page_numtopp().The locking code (ie. page_reclaim())
 379  379                   * can't handle them
 380  380                   */
 381  381                  if (pp->p_szc != 0)
 382  382                          page_boot_demote(pp);
 383  383  
 384  384                  pp = page_numtopp(pfnum, SE_EXCL);
 385  385                  if (pp == NULL || PP_ISFREE(pp))
 386  386                          panic("boot_alloc: pp is NULL or free");
 387  387  
 388  388                  /*
 389  389                   * If the cage is on but doesn't yet contain this page,
 390  390                   * mark it as non-relocatable.
 391  391                   */
 392  392                  if (kcage_on && !PP_ISNORELOC(pp)) {
 393  393                          PP_SETNORELOC(pp);
 394  394                          PLCNT_XFER_NORELOC(pp);
 395  395                  }
 396  396  
 397  397                  (void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL);
 398  398                  pp->p_lckcnt = 1;
 399  399  #if defined(__x86)
 400  400                  page_downgrade(pp);
 401  401  #else
 402  402                  page_unlock(pp);
 403  403  #endif
 404  404          }
 405  405  }
 406  406  
 407  407  /*
 408  408   * Get pages from boot and hash them into the kernel's vp.
 409  409   * Used after page structs have been allocated, but before segkmem is ready.
 410  410   */
 411  411  void *
 412  412  boot_alloc(void *inaddr, size_t size, uint_t align)
 413  413  {
 414  414          caddr_t addr = inaddr;
 415  415  
 416  416          if (bootops == NULL)
 417  417                  prom_panic("boot_alloc: attempt to allocate memory after "
 418  418                      "BOP_GONE");
 419  419  
 420  420          size = ptob(btopr(size));
 421  421  #ifdef __sparc
 422  422          if (bop_alloc_chunk(addr, size, align) != (caddr_t)addr)
 423  423                  panic("boot_alloc: bop_alloc_chunk failed");
 424  424  #else
 425  425          if (BOP_ALLOC(bootops, addr, size, align) != addr)
 426  426                  panic("boot_alloc: BOP_ALLOC failed");
 427  427  #endif
 428  428          boot_mapin((caddr_t)addr, size);
 429  429          return (addr);
 430  430  }
 431  431  
 432  432  static void
 433  433  segkmem_badop()
 434  434  {
 435  435          panic("segkmem_badop");
 436  436  }
 437  437  
 438  438  #define SEGKMEM_BADOP(t)        (t(*)())segkmem_badop
 439  439  
 440  440  /*ARGSUSED*/
 441  441  static faultcode_t
 442  442  segkmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t size,
 443  443          enum fault_type type, enum seg_rw rw)
 444  444  {
 445  445          pgcnt_t npages;
 446  446          spgcnt_t pg;
 447  447          page_t *pp;
 448  448          struct vnode *vp = seg->s_data;
 449  449  
 450  450          ASSERT(RW_READ_HELD(&seg->s_as->a_lock));
 451  451  
 452  452          if (seg->s_as != &kas || size > seg->s_size ||
 453  453              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 454  454                  panic("segkmem_fault: bad args");
 455  455  
 456  456          /*
 457  457           * If it is one of segkp pages, call segkp_fault.
 458  458           */
 459  459          if (segkp_bitmap && seg == &kvseg &&
 460  460              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 461  461                  return (SEGOP_FAULT(hat, segkp, addr, size, type, rw));
 462  462  
 463  463          if (rw != S_READ && rw != S_WRITE && rw != S_OTHER)
 464  464                  return (FC_NOSUPPORT);
 465  465  
 466  466          npages = btopr(size);
 467  467  
 468  468          switch (type) {
 469  469          case F_SOFTLOCK:        /* lock down already-loaded translations */
 470  470                  for (pg = 0; pg < npages; pg++) {
 471  471                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
 472  472                              SE_SHARED);
 473  473                          if (pp == NULL) {
 474  474                                  /*
 475  475                                   * Hmm, no page. Does a kernel mapping
 476  476                                   * exist for it?
 477  477                                   */
 478  478                                  if (!hat_probe(kas.a_hat, addr)) {
 479  479                                          addr -= PAGESIZE;
 480  480                                          while (--pg >= 0) {
 481  481                                                  pp = page_find(vp, (u_offset_t)
 482  482                                                      (uintptr_t)addr);
 483  483                                                  if (pp)
 484  484                                                          page_unlock(pp);
 485  485                                                  addr -= PAGESIZE;
 486  486                                          }
 487  487                                          return (FC_NOMAP);
 488  488                                  }
 489  489                          }
 490  490                          addr += PAGESIZE;
 491  491                  }
 492  492                  if (rw == S_OTHER)
 493  493                          hat_reserve(seg->s_as, addr, size);
 494  494                  return (0);
 495  495          case F_SOFTUNLOCK:
 496  496                  while (npages--) {
 497  497                          pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
 498  498                          if (pp)
 499  499                                  page_unlock(pp);
 500  500                          addr += PAGESIZE;
 501  501                  }
 502  502                  return (0);
 503  503          default:
 504  504                  return (FC_NOSUPPORT);
 505  505          }
 506  506          /*NOTREACHED*/
 507  507  }
 508  508  
 509  509  static int
 510  510  segkmem_setprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 511  511  {
 512  512          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 513  513  
 514  514          if (seg->s_as != &kas || size > seg->s_size ||
 515  515              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 516  516                  panic("segkmem_setprot: bad args");
 517  517  
 518  518          /*
 519  519           * If it is one of segkp pages, call segkp.
 520  520           */
 521  521          if (segkp_bitmap && seg == &kvseg &&
 522  522              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 523  523                  return (SEGOP_SETPROT(segkp, addr, size, prot));
 524  524  
 525  525          if (prot == 0)
 526  526                  hat_unload(kas.a_hat, addr, size, HAT_UNLOAD);
 527  527          else
 528  528                  hat_chgprot(kas.a_hat, addr, size, prot);
 529  529          return (0);
 530  530  }
 531  531  
 532  532  /*
 533  533   * This is a dummy segkmem function overloaded to call segkp
 534  534   * when segkp is under the heap.
 535  535   */
 536  536  /* ARGSUSED */
 537  537  static int
 538  538  segkmem_checkprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 539  539  {
 540  540          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 541  541  
 542  542          if (seg->s_as != &kas)
 543  543                  segkmem_badop();
 544  544  
 545  545          /*
 546  546           * If it is one of segkp pages, call into segkp.
 547  547           */
 548  548          if (segkp_bitmap && seg == &kvseg &&
 549  549              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 550  550                  return (SEGOP_CHECKPROT(segkp, addr, size, prot));
 551  551  
 552  552          segkmem_badop();
 553  553          return (0);
 554  554  }
 555  555  
 556  556  /*
 557  557   * This is a dummy segkmem function overloaded to call segkp
 558  558   * when segkp is under the heap.
 559  559   */
 560  560  /* ARGSUSED */
 561  561  static int
 562  562  segkmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 563  563  {
 564  564          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 565  565  
 566  566          if (seg->s_as != &kas)
 567  567                  segkmem_badop();
 568  568  
 569  569          /*
 570  570           * If it is one of segkp pages, call into segkp.
 571  571           */
 572  572          if (segkp_bitmap && seg == &kvseg &&
 573  573              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 574  574                  return (SEGOP_KLUSTER(segkp, addr, delta));
 575  575  
 576  576          segkmem_badop();
 577  577          return (0);
 578  578  }
 579  579  
 580  580  static void
 581  581  segkmem_xdump_range(void *arg, void *start, size_t size)
 582  582  {
 583  583          struct as *as = arg;
 584  584          caddr_t addr = start;
 585  585          caddr_t addr_end = addr + size;
 586  586  
 587  587          while (addr < addr_end) {
 588  588                  pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
 589  589                  if (pfn != PFN_INVALID && pfn <= physmax && pf_is_memory(pfn))
 590  590                          dump_addpage(as, addr, pfn);
 591  591                  addr += PAGESIZE;
 592  592                  dump_timeleft = dump_timeout;
 593  593          }
 594  594  }
 595  595  
 596  596  static void
 597  597  segkmem_dump_range(void *arg, void *start, size_t size)
 598  598  {
 599  599          caddr_t addr = start;
 600  600          caddr_t addr_end = addr + size;
 601  601  
 602  602          /*
 603  603           * If we are about to start dumping the range of addresses we
 604  604           * carved out of the kernel heap for the large page heap walk
 605  605           * heap_lp_arena to find what segments are actually populated
 606  606           */
 607  607          if (SEGKMEM_USE_LARGEPAGES &&
 608  608              addr == heap_lp_base && addr_end == heap_lp_end &&
 609  609              vmem_size(heap_lp_arena, VMEM_ALLOC) < size) {
 610  610                  vmem_walk(heap_lp_arena, VMEM_ALLOC | VMEM_REENTRANT,
 611  611                      segkmem_xdump_range, arg);
 612  612          } else {
 613  613                  segkmem_xdump_range(arg, start, size);
 614  614          }
 615  615  }
 616  616  
 617  617  static void
 618  618  segkmem_dump(struct seg *seg)
 619  619  {
 620  620          /*
 621  621           * The kernel's heap_arena (represented by kvseg) is a very large
 622  622           * VA space, most of which is typically unused.  To speed up dumping
 623  623           * we use vmem_walk() to quickly find the pieces of heap_arena that
 624  624           * are actually in use.  We do the same for heap32_arena and
 625  625           * heap_core.
 626  626           *
 627  627           * We specify VMEM_REENTRANT to vmem_walk() because dump_addpage()
 628  628           * may ultimately need to allocate memory.  Reentrant walks are
 629  629           * necessarily imperfect snapshots.  The kernel heap continues
 630  630           * to change during a live crash dump, for example.  For a normal
 631  631           * crash dump, however, we know that there won't be any other threads
 632  632           * messing with the heap.  Therefore, at worst, we may fail to dump
 633  633           * the pages that get allocated by the act of dumping; but we will
 634  634           * always dump every page that was allocated when the walk began.
 635  635           *
 636  636           * The other segkmem segments are dense (fully populated), so there's
 637  637           * no need to use this technique when dumping them.
 638  638           *
 639  639           * Note: when adding special dump handling for any new sparsely-
 640  640           * populated segments, be sure to add similar handling to the ::kgrep
 641  641           * code in mdb.
 642  642           */
 643  643          if (seg == &kvseg) {
 644  644                  vmem_walk(heap_arena, VMEM_ALLOC | VMEM_REENTRANT,
 645  645                      segkmem_dump_range, seg->s_as);
 646  646  #ifndef __sparc
 647  647                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 648  648                      segkmem_dump_range, seg->s_as);
 649  649  #endif
 650  650          } else if (seg == &kvseg_core) {
 651  651                  vmem_walk(heap_core_arena, VMEM_ALLOC | VMEM_REENTRANT,
 652  652                      segkmem_dump_range, seg->s_as);
 653  653          } else if (seg == &kvseg32) {
 654  654                  vmem_walk(heap32_arena, VMEM_ALLOC | VMEM_REENTRANT,
 655  655                      segkmem_dump_range, seg->s_as);
 656  656                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 657  657                      segkmem_dump_range, seg->s_as);
 658  658          } else if (seg == &kzioseg) {
 659  659                  /*
 660  660                   * We don't want to dump pages attached to kzioseg since they
 661  661                   * contain file data from ZFS.  If this page's segment is
 662  662                   * kzioseg return instead of writing it to the dump device.
 663  663                   */
 664  664                  return;
 665  665          } else {
 666  666                  segkmem_dump_range(seg->s_as, seg->s_base, seg->s_size);
 667  667          }
 668  668  }
 669  669  
 670  670  /*
 671  671   * lock/unlock kmem pages over a given range [addr, addr+len).
 672  672   * Returns a shadow list of pages in ppp. If there are holes
 673  673   * in the range (e.g. some of the kernel mappings do not have
 674  674   * underlying page_ts) returns ENOTSUP so that as_pagelock()
 675  675   * will handle the range via as_fault(F_SOFTLOCK).
 676  676   */
 677  677  /*ARGSUSED*/
 678  678  static int
 679  679  segkmem_pagelock(struct seg *seg, caddr_t addr, size_t len,
 680  680          page_t ***ppp, enum lock_type type, enum seg_rw rw)
 681  681  {
 682  682          page_t **pplist, *pp;
 683  683          pgcnt_t npages;
 684  684          spgcnt_t pg;
 685  685          size_t nb;
 686  686          struct vnode *vp = seg->s_data;
 687  687  
 688  688          ASSERT(ppp != NULL);
 689  689  
 690  690          /*
 691  691           * If it is one of segkp pages, call into segkp.
 692  692           */
 693  693          if (segkp_bitmap && seg == &kvseg &&
 694  694              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 695  695                  return (SEGOP_PAGELOCK(segkp, addr, len, ppp, type, rw));
 696  696  
 697  697          npages = btopr(len);
 698  698          nb = sizeof (page_t *) * npages;
 699  699  
 700  700          if (type == L_PAGEUNLOCK) {
 701  701                  pplist = *ppp;
 702  702                  ASSERT(pplist != NULL);
 703  703  
 704  704                  for (pg = 0; pg < npages; pg++) {
 705  705                          pp = pplist[pg];
 706  706                          page_unlock(pp);
 707  707                  }
 708  708                  kmem_free(pplist, nb);
 709  709                  return (0);
 710  710          }
 711  711  
 712  712          ASSERT(type == L_PAGELOCK);
 713  713  
 714  714          pplist = kmem_alloc(nb, KM_NOSLEEP);
 715  715          if (pplist == NULL) {
 716  716                  *ppp = NULL;
 717  717                  return (ENOTSUP);       /* take the slow path */
 718  718          }
 719  719  
 720  720          for (pg = 0; pg < npages; pg++) {
 721  721                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_SHARED);
 722  722                  if (pp == NULL) {
 723  723                          while (--pg >= 0)
 724  724                                  page_unlock(pplist[pg]);
 725  725                          kmem_free(pplist, nb);
 726  726                          *ppp = NULL;
 727  727                          return (ENOTSUP);
 728  728                  }
 729  729                  pplist[pg] = pp;
 730  730                  addr += PAGESIZE;
 731  731          }
 732  732  
 733  733          *ppp = pplist;
 734  734          return (0);
 735  735  }
 736  736  
 737  737  /*
 738  738   * This is a dummy segkmem function overloaded to call segkp
 739  739   * when segkp is under the heap.
 740  740   */
 741  741  /* ARGSUSED */
 742  742  static int
 743  743  segkmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 744  744  {
 745  745          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 746  746  
 747  747          if (seg->s_as != &kas)
 748  748                  segkmem_badop();
 749  749  
 750  750          /*
 751  751           * If it is one of segkp pages, call into segkp.
 752  752           */
 753  753          if (segkp_bitmap && seg == &kvseg &&
 754  754              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 755  755                  return (SEGOP_GETMEMID(segkp, addr, memidp));
 756  756  
 757  757          segkmem_badop();
 758  758          return (0);
 759  759  }
 760  760  
 761  761  /*ARGSUSED*/
 762  762  static lgrp_mem_policy_info_t *
 763  763  segkmem_getpolicy(struct seg *seg, caddr_t addr)
 764  764  {
 765  765          return (NULL);
 766  766  }
 767  767  
 768  768  /*ARGSUSED*/
 769  769  static int
 770  770  segkmem_capable(struct seg *seg, segcapability_t capability)
 771  771  {
 772  772          if (capability == S_CAPABILITY_NOMINFLT)
 773  773                  return (1);
 774  774          return (0);
 775  775  }
 776  776  
 777  777  struct seg_ops segkmem_ops = {
 778  778          SEGKMEM_BADOP(int),             /* dup */
 779  779          SEGKMEM_BADOP(int),             /* unmap */
 780  780          SEGKMEM_BADOP(void),            /* free */
 781  781          segkmem_fault,
 782  782          SEGKMEM_BADOP(faultcode_t),     /* faulta */
 783  783          segkmem_setprot,
 784  784          segkmem_checkprot,
 785  785          segkmem_kluster,
 786  786          SEGKMEM_BADOP(size_t),          /* swapout */
 787  787          SEGKMEM_BADOP(int),             /* sync */
 788  788          SEGKMEM_BADOP(size_t),          /* incore */
 789  789          SEGKMEM_BADOP(int),             /* lockop */
 790  790          SEGKMEM_BADOP(int),             /* getprot */
 791  791          SEGKMEM_BADOP(u_offset_t),      /* getoffset */
 792  792          SEGKMEM_BADOP(int),             /* gettype */
 793  793          SEGKMEM_BADOP(int),             /* getvp */
 794  794          SEGKMEM_BADOP(int),             /* advise */
 795  795          segkmem_dump,
 796  796          segkmem_pagelock,
 797  797          SEGKMEM_BADOP(int),             /* setpgsz */
 798  798          segkmem_getmemid,
 799  799          segkmem_getpolicy,              /* getpolicy */
 800  800          segkmem_capable,                /* capable */
 801  801          seg_inherit_notsup              /* inherit */
 802  802  };
 803  803  
 804  804  int
 805  805  segkmem_zio_create(struct seg *seg)
 806  806  {
 807  807          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 808  808          seg->s_ops = &segkmem_ops;
 809  809          seg->s_data = &zvp;
 810  810          kas.a_size += seg->s_size;
 811  811          return (0);
 812  812  }
 813  813  
 814  814  int
 815  815  segkmem_create(struct seg *seg)
 816  816  {
 817  817          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 818  818          seg->s_ops = &segkmem_ops;
 819  819          seg->s_data = &kvp;
 820  820          kas.a_size += seg->s_size;
 821  821          return (0);
 822  822  }
 823  823  
 824  824  /*ARGSUSED*/
 825  825  page_t *
 826  826  segkmem_page_create(void *addr, size_t size, int vmflag, void *arg)
 827  827  {
 828  828          struct seg kseg;
 829  829          int pgflags;
 830  830          struct vnode *vp = arg;
 831  831  
 832  832          if (vp == NULL)
 833  833                  vp = &kvp;
 834  834  
 835  835          kseg.s_as = &kas;
 836  836          pgflags = PG_EXCL;
 837  837  
 838  838          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
 839  839                  pgflags |= PG_NORELOC;
 840  840          if ((vmflag & VM_NOSLEEP) == 0)
 841  841                  pgflags |= PG_WAIT;
 842  842          if (vmflag & VM_PANIC)
 843  843                  pgflags |= PG_PANIC;
 844  844          if (vmflag & VM_PUSHPAGE)
 845  845                  pgflags |= PG_PUSHPAGE;
 846  846          if (vmflag & VM_NORMALPRI) {
 847  847                  ASSERT(vmflag & VM_NOSLEEP);
 848  848                  pgflags |= PG_NORMALPRI;
 849  849          }
 850  850  
 851  851          return (page_create_va(vp, (u_offset_t)(uintptr_t)addr, size,
 852  852              pgflags, &kseg, addr));
 853  853  }
 854  854  
 855  855  /*
 856  856   * Allocate pages to back the virtual address range [addr, addr + size).
 857  857   * If addr is NULL, allocate the virtual address space as well.
 858  858   */
 859  859  void *
 860  860  segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
 861  861          page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
 862  862  {
 863  863          page_t *ppl;
 864  864          caddr_t addr = inaddr;
 865  865          pgcnt_t npages = btopr(size);
 866  866          int allocflag;
 867  867  
 868  868          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
 869  869                  return (NULL);
 870  870  
 871  871          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 872  872  
 873  873          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
 874  874                  if (inaddr == NULL)
 875  875                          vmem_free(vmp, addr, size);
 876  876                  return (NULL);
 877  877          }
 878  878  
 879  879          ppl = page_create_func(addr, size, vmflag, pcarg);
 880  880          if (ppl == NULL) {
 881  881                  if (inaddr == NULL)
 882  882                          vmem_free(vmp, addr, size);
 883  883                  page_unresv(npages);
 884  884                  return (NULL);
 885  885          }
 886  886  
 887  887          /*
 888  888           * Under certain conditions, we need to let the HAT layer know
 889  889           * that it cannot safely allocate memory.  Allocations from
 890  890           * the hat_memload vmem arena always need this, to prevent
 891  891           * infinite recursion.
 892  892           *
 893  893           * In addition, the x86 hat cannot safely do memory
 894  894           * allocations while in vmem_populate(), because there
 895  895           * is no simple bound on its usage.
 896  896           */
 897  897          if (vmflag & VM_MEMLOAD)
 898  898                  allocflag = HAT_NO_KALLOC;
 899  899  #if defined(__x86)
 900  900          else if (vmem_is_populator())
 901  901                  allocflag = HAT_NO_KALLOC;
 902  902  #endif
 903  903          else
 904  904                  allocflag = 0;
 905  905  
 906  906          while (ppl != NULL) {
 907  907                  page_t *pp = ppl;
 908  908                  page_sub(&ppl, pp);
 909  909                  ASSERT(page_iolock_assert(pp));
 910  910                  ASSERT(PAGE_EXCL(pp));
 911  911                  page_io_unlock(pp);
 912  912                  hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
 913  913                      (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
 914  914                      HAT_LOAD_LOCK | allocflag);
 915  915                  pp->p_lckcnt = 1;
 916  916  #if defined(__x86)
 917  917                  page_downgrade(pp);
 918  918  #else
 919  919                  if (vmflag & SEGKMEM_SHARELOCKED)
 920  920                          page_downgrade(pp);
 921  921                  else
 922  922                          page_unlock(pp);
 923  923  #endif
 924  924          }
 925  925  
 926  926          return (addr);
 927  927  }
 928  928  
 929  929  static void *
 930  930  segkmem_alloc_vn(vmem_t *vmp, size_t size, int vmflag, struct vnode *vp)
 931  931  {
 932  932          void *addr;
 933  933          segkmem_gc_list_t *gcp, **prev_gcpp;
 934  934  
 935  935          ASSERT(vp != NULL);
 936  936  
 937  937          if (kvseg.s_base == NULL) {
 938  938  #ifndef __sparc
 939  939                  if (bootops->bsys_alloc == NULL)
 940  940                          halt("Memory allocation between bop_alloc() and "
 941  941                              "kmem_alloc().\n");
 942  942  #endif
 943  943  
 944  944                  /*
 945  945                   * There's not a lot of memory to go around during boot,
 946  946                   * so recycle it if we can.
 947  947                   */
 948  948                  for (prev_gcpp = &segkmem_gc_list; (gcp = *prev_gcpp) != NULL;
 949  949                      prev_gcpp = &gcp->gc_next) {
 950  950                          if (gcp->gc_arena == vmp && gcp->gc_size == size) {
 951  951                                  *prev_gcpp = gcp->gc_next;
 952  952                                  return (gcp);
 953  953                          }
 954  954                  }
 955  955  
 956  956                  addr = vmem_alloc(vmp, size, vmflag | VM_PANIC);
 957  957                  if (boot_alloc(addr, size, BO_NO_ALIGN) != addr)
 958  958                          panic("segkmem_alloc: boot_alloc failed");
 959  959                  return (addr);
 960  960          }
 961  961          return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
 962  962              segkmem_page_create, vp));
 963  963  }
 964  964  
 965  965  void *
 966  966  segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
 967  967  {
 968  968          return (segkmem_alloc_vn(vmp, size, vmflag, &kvp));
 969  969  }
 970  970  
 971  971  void *
 972  972  segkmem_zio_alloc(vmem_t *vmp, size_t size, int vmflag)
 973  973  {
 974  974          return (segkmem_alloc_vn(vmp, size, vmflag, &zvp));
 975  975  }
 976  976  
 977  977  /*
 978  978   * Any changes to this routine must also be carried over to
 979  979   * devmap_free_pages() in the seg_dev driver. This is because
 980  980   * we currently don't have a special kernel segment for non-paged
 981  981   * kernel memory that is exported by drivers to user space.
 982  982   */
 983  983  static void
 984  984  segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp,
 985  985      void (*func)(page_t *))
 986  986  {
 987  987          page_t *pp;
 988  988          caddr_t addr = inaddr;
 989  989          caddr_t eaddr;
 990  990          pgcnt_t npages = btopr(size);
 991  991  
 992  992          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 993  993          ASSERT(vp != NULL);
 994  994  
 995  995          if (kvseg.s_base == NULL) {
 996  996                  segkmem_gc_list_t *gc = inaddr;
 997  997                  gc->gc_arena = vmp;
 998  998                  gc->gc_size = size;
 999  999                  gc->gc_next = segkmem_gc_list;
1000 1000                  segkmem_gc_list = gc;
1001 1001                  return;
1002 1002          }
1003 1003  
1004 1004          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1005 1005  
1006 1006          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
1007 1007  #if defined(__x86)
1008 1008                  pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
1009 1009                  if (pp == NULL)
1010 1010                          panic("segkmem_free: page not found");
1011 1011                  if (!page_tryupgrade(pp)) {
1012 1012                          /*
1013 1013                           * Some other thread has a sharelock. Wait for
1014 1014                           * it to drop the lock so we can free this page.
1015 1015                           */
1016 1016                          page_unlock(pp);
1017 1017                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
1018 1018                              SE_EXCL);
1019 1019                  }
1020 1020  #else
1021 1021                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1022 1022  #endif
1023 1023                  if (pp == NULL)
1024 1024                          panic("segkmem_free: page not found");
1025 1025                  /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
1026 1026                  pp->p_lckcnt = 0;
1027 1027                  if (func)
1028 1028                          func(pp);
1029 1029                  else
1030 1030                          page_destroy(pp, 0);
1031 1031          }
1032 1032          if (func == NULL)
1033 1033                  page_unresv(npages);
1034 1034  
1035 1035          if (vmp != NULL)
1036 1036                  vmem_free(vmp, inaddr, size);
1037 1037  
1038 1038  }
1039 1039  
1040 1040  void
1041 1041  segkmem_xfree(vmem_t *vmp, void *inaddr, size_t size, void (*func)(page_t *))
1042 1042  {
1043 1043          segkmem_free_vn(vmp, inaddr, size, &kvp, func);
1044 1044  }
1045 1045  
1046 1046  void
1047 1047  segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1048 1048  {
1049 1049          segkmem_free_vn(vmp, inaddr, size, &kvp, NULL);
1050 1050  }
1051 1051  
1052 1052  void
1053 1053  segkmem_zio_free(vmem_t *vmp, void *inaddr, size_t size)
1054 1054  {
1055 1055          segkmem_free_vn(vmp, inaddr, size, &zvp, NULL);
1056 1056  }
1057 1057  
1058 1058  void
1059 1059  segkmem_gc(void)
1060 1060  {
1061 1061          ASSERT(kvseg.s_base != NULL);
1062 1062          while (segkmem_gc_list != NULL) {
1063 1063                  segkmem_gc_list_t *gc = segkmem_gc_list;
1064 1064                  segkmem_gc_list = gc->gc_next;
1065 1065                  segkmem_free(gc->gc_arena, gc, gc->gc_size);
1066 1066          }
1067 1067  }
1068 1068  
1069 1069  /*
1070 1070   * Legacy entry points from here to end of file.
1071 1071   */
1072 1072  void
1073 1073  segkmem_mapin(struct seg *seg, void *addr, size_t size, uint_t vprot,
1074 1074      pfn_t pfn, uint_t flags)
1075 1075  {
1076 1076          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1077 1077          hat_devload(seg->s_as->a_hat, addr, size, pfn, vprot,
1078 1078              flags | HAT_LOAD_LOCK);
1079 1079  }
1080 1080  
1081 1081  void
1082 1082  segkmem_mapout(struct seg *seg, void *addr, size_t size)
1083 1083  {
1084 1084          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1085 1085  }
1086 1086  
1087 1087  void *
1088 1088  kmem_getpages(pgcnt_t npages, int kmflag)
1089 1089  {
1090 1090          return (kmem_alloc(ptob(npages), kmflag));
1091 1091  }
1092 1092  
1093 1093  void
1094 1094  kmem_freepages(void *addr, pgcnt_t npages)
1095 1095  {
1096 1096          kmem_free(addr, ptob(npages));
1097 1097  }
1098 1098  
1099 1099  /*
1100 1100   * segkmem_page_create_large() allocates a large page to be used for the kmem
1101 1101   * caches. If kpr is enabled we ask for a relocatable page unless requested
1102 1102   * otherwise. If kpr is disabled we have to ask for a non-reloc page
1103 1103   */
1104 1104  static page_t *
1105 1105  segkmem_page_create_large(void *addr, size_t size, int vmflag, void *arg)
1106 1106  {
1107 1107          int pgflags;
1108 1108  
1109 1109          pgflags = PG_EXCL;
1110 1110  
1111 1111          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
1112 1112                  pgflags |= PG_NORELOC;
1113 1113          if (!(vmflag & VM_NOSLEEP))
1114 1114                  pgflags |= PG_WAIT;
1115 1115          if (vmflag & VM_PUSHPAGE)
1116 1116                  pgflags |= PG_PUSHPAGE;
1117 1117          if (vmflag & VM_NORMALPRI)
1118 1118                  pgflags |= PG_NORMALPRI;
1119 1119  
1120 1120          return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1121 1121              pgflags, &kvseg, addr, arg));
1122 1122  }
1123 1123  
1124 1124  /*
1125 1125   * Allocate a large page to back the virtual address range
1126 1126   * [addr, addr + size).  If addr is NULL, allocate the virtual address
1127 1127   * space as well.
1128 1128   */
1129 1129  static void *
1130 1130  segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1131 1131      uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1132 1132      void *pcarg)
1133 1133  {
1134 1134          caddr_t addr = inaddr, pa;
1135 1135          size_t  lpsize = segkmem_lpsize;
1136 1136          pgcnt_t npages = btopr(size);
1137 1137          pgcnt_t nbpages = btop(lpsize);
1138 1138          pgcnt_t nlpages = size >> segkmem_lpshift;
1139 1139          size_t  ppasize = nbpages * sizeof (page_t *);
1140 1140          page_t *pp, *rootpp, **ppa, *pplist = NULL;
1141 1141          int i;
1142 1142  
1143 1143          vmflag |= VM_NOSLEEP;
1144 1144  
1145 1145          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1146 1146                  return (NULL);
1147 1147          }
1148 1148  
1149 1149          /*
1150 1150           * allocate an array we need for hat_memload_array.
1151 1151           * we use a separate arena to avoid recursion.
1152 1152           * we will not need this array when hat_memload_array learns pp++
1153 1153           */
1154 1154          if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) {
1155 1155                  goto fail_array_alloc;
1156 1156          }
1157 1157  
1158 1158          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
1159 1159                  goto fail_vmem_alloc;
1160 1160  
1161 1161          ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0);
1162 1162  
1163 1163          /* create all the pages */
1164 1164          for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) {
1165 1165                  if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL)
1166 1166                          goto fail_page_create;
1167 1167                  page_list_concat(&pplist, &pp);
1168 1168          }
1169 1169  
1170 1170          /* at this point we have all the resource to complete the request */
1171 1171          while ((rootpp = pplist) != NULL) {
1172 1172                  for (i = 0; i < nbpages; i++) {
1173 1173                          ASSERT(pplist != NULL);
1174 1174                          pp = pplist;
1175 1175                          page_sub(&pplist, pp);
1176 1176                          ASSERT(page_iolock_assert(pp));
1177 1177                          page_io_unlock(pp);
1178 1178                          ppa[i] = pp;
1179 1179                  }
1180 1180                  /*
1181 1181                   * Load the locked entry. It's OK to preload the entry into the
1182 1182                   * TSB since we now support large mappings in the kernel TSB.
1183 1183                   */
1184 1184                  hat_memload_array(kas.a_hat,
1185 1185                      (caddr_t)(uintptr_t)rootpp->p_offset, lpsize,
1186 1186                      ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
1187 1187                      HAT_LOAD_LOCK);
1188 1188  
1189 1189                  for (--i; i >= 0; --i) {
1190 1190                          ppa[i]->p_lckcnt = 1;
1191 1191                          page_unlock(ppa[i]);
1192 1192                  }
1193 1193          }
1194 1194  
1195 1195          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1196 1196          return (addr);
1197 1197  
1198 1198  fail_page_create:
1199 1199          while ((rootpp = pplist) != NULL) {
1200 1200                  for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) {
1201 1201                          ASSERT(pp != NULL);
1202 1202                          page_sub(&pplist, pp);
1203 1203                          ASSERT(page_iolock_assert(pp));
1204 1204                          page_io_unlock(pp);
1205 1205                  }
1206 1206                  page_destroy_pages(rootpp);
1207 1207          }
1208 1208  
1209 1209          if (inaddr == NULL)
1210 1210                  vmem_free(vmp, addr, size);
1211 1211  
1212 1212  fail_vmem_alloc:
1213 1213          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1214 1214  
1215 1215  fail_array_alloc:
1216 1216          page_unresv(npages);
1217 1217  
1218 1218          return (NULL);
1219 1219  }
1220 1220  
1221 1221  static void
1222 1222  segkmem_free_one_lp(caddr_t addr, size_t size)
1223 1223  {
1224 1224          page_t          *pp, *rootpp = NULL;
1225 1225          pgcnt_t         pgs_left = btopr(size);
1226 1226  
1227 1227          ASSERT(size == segkmem_lpsize);
1228 1228  
1229 1229          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1230 1230  
1231 1231          for (; pgs_left > 0; addr += PAGESIZE, pgs_left--) {
1232 1232                  pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1233 1233                  if (pp == NULL)
1234 1234                          panic("segkmem_free_one_lp: page not found");
1235 1235                  ASSERT(PAGE_EXCL(pp));
1236 1236                  pp->p_lckcnt = 0;
1237 1237                  if (rootpp == NULL)
1238 1238                          rootpp = pp;
1239 1239          }
1240 1240          ASSERT(rootpp != NULL);
1241 1241          page_destroy_pages(rootpp);
1242 1242  
1243 1243          /* page_unresv() is done by the caller */
1244 1244  }
1245 1245  
1246 1246  /*
1247 1247   * This function is called to import new spans into the vmem arenas like
1248 1248   * kmem_default_arena and kmem_oversize_arena. It first tries to import
1249 1249   * spans from large page arena - kmem_lp_arena. In order to do this it might
1250 1250   * have to "upgrade the requested size" to kmem_lp_arena quantum. If
1251 1251   * it was not able to satisfy the upgraded request it then calls regular
1252 1252   * segkmem_alloc() that satisfies the request by importing from "*vmp" arena
1253 1253   */
1254 1254  /*ARGSUSED*/
1255 1255  void *
1256 1256  segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, size_t align, int vmflag)
1257 1257  {
1258 1258          size_t size;
1259 1259          kthread_t *t = curthread;
1260 1260          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1261 1261  
1262 1262          ASSERT(sizep != NULL);
1263 1263  
1264 1264          size = *sizep;
1265 1265  
1266 1266          if (lpcb->lp_uselp && !(t->t_flag & T_PANIC) &&
1267 1267              !(vmflag & SEGKMEM_SHARELOCKED)) {
1268 1268  
1269 1269                  size_t kmemlp_qnt = segkmem_kmemlp_quantum;
1270 1270                  size_t asize = P2ROUNDUP(size, kmemlp_qnt);
1271 1271                  void  *addr = NULL;
1272 1272                  ulong_t *lpthrtp = &lpcb->lp_throttle;
1273 1273                  ulong_t lpthrt = *lpthrtp;
1274 1274                  int     dowakeup = 0;
1275 1275                  int     doalloc = 1;
1276 1276  
1277 1277                  ASSERT(kmem_lp_arena != NULL);
1278 1278                  ASSERT(asize >= size);
1279 1279  
1280 1280                  if (lpthrt != 0) {
1281 1281                          /* try to update the throttle value */
1282 1282                          lpthrt = atomic_inc_ulong_nv(lpthrtp);
1283 1283                          if (lpthrt >= segkmem_lpthrottle_max) {
1284 1284                                  lpthrt = atomic_cas_ulong(lpthrtp, lpthrt,
1285 1285                                      segkmem_lpthrottle_max / 4);
1286 1286                          }
1287 1287  
1288 1288                          /*
1289 1289                           * when we get above throttle start do an exponential
1290 1290                           * backoff at trying large pages and reaping
1291 1291                           */
1292 1292                          if (lpthrt > segkmem_lpthrottle_start &&
1293 1293                              !ISP2(lpthrt)) {
1294 1294                                  lpcb->allocs_throttled++;
1295 1295                                  lpthrt--;
1296 1296                                  if (ISP2(lpthrt))
1297 1297                                          kmem_reap();
1298 1298                                  return (segkmem_alloc(vmp, size, vmflag));
1299 1299                          }
1300 1300                  }
1301 1301  
1302 1302                  if (!(vmflag & VM_NOSLEEP) &&
1303 1303                      segkmem_heaplp_quantum >= (8 * kmemlp_qnt) &&
1304 1304                      vmem_size(kmem_lp_arena, VMEM_FREE) <= kmemlp_qnt &&
1305 1305                      asize < (segkmem_heaplp_quantum - kmemlp_qnt)) {
1306 1306  
1307 1307                          /*
1308 1308                           * we are low on free memory in kmem_lp_arena
1309 1309                           * we let only one guy to allocate heap_lp
1310 1310                           * quantum size chunk that everybody is going to
1311 1311                           * share
1312 1312                           */
1313 1313                          mutex_enter(&lpcb->lp_lock);
1314 1314  
1315 1315                          if (lpcb->lp_wait) {
1316 1316  
1317 1317                                  /* we are not the first one - wait */
1318 1318                                  cv_wait(&lpcb->lp_cv, &lpcb->lp_lock);
1319 1319                                  if (vmem_size(kmem_lp_arena, VMEM_FREE) <
1320 1320                                      kmemlp_qnt)  {
1321 1321                                          doalloc = 0;
1322 1322                                  }
1323 1323                          } else if (vmem_size(kmem_lp_arena, VMEM_FREE) <=
1324 1324                              kmemlp_qnt) {
1325 1325  
1326 1326                                  /*
1327 1327                                   * we are the first one, make sure we import
1328 1328                                   * a large page
1329 1329                                   */
1330 1330                                  if (asize == kmemlp_qnt)
1331 1331                                          asize += kmemlp_qnt;
1332 1332                                  dowakeup = 1;
1333 1333                                  lpcb->lp_wait = 1;
1334 1334                          }
1335 1335  
1336 1336                          mutex_exit(&lpcb->lp_lock);
1337 1337                  }
1338 1338  
1339 1339                  /*
1340 1340                   * VM_ABORT flag prevents sleeps in vmem_xalloc when
1341 1341                   * large pages are not available. In that case this allocation
1342 1342                   * attempt will fail and we will retry allocation with small
1343 1343                   * pages. We also do not want to panic if this allocation fails
1344 1344                   * because we are going to retry.
1345 1345                   */
1346 1346                  if (doalloc) {
1347 1347                          addr = vmem_alloc(kmem_lp_arena, asize,
1348 1348                              (vmflag | VM_ABORT) & ~VM_PANIC);
1349 1349  
1350 1350                          if (dowakeup) {
1351 1351                                  mutex_enter(&lpcb->lp_lock);
1352 1352                                  ASSERT(lpcb->lp_wait != 0);
1353 1353                                  lpcb->lp_wait = 0;
1354 1354                                  cv_broadcast(&lpcb->lp_cv);
1355 1355                                  mutex_exit(&lpcb->lp_lock);
1356 1356                          }
1357 1357                  }
1358 1358  
1359 1359                  if (addr != NULL) {
1360 1360                          *sizep = asize;
1361 1361                          *lpthrtp = 0;
1362 1362                          return (addr);
1363 1363                  }
1364 1364  
1365 1365                  if (vmflag & VM_NOSLEEP)
1366 1366                          lpcb->nosleep_allocs_failed++;
1367 1367                  else
1368 1368                          lpcb->sleep_allocs_failed++;
1369 1369                  lpcb->alloc_bytes_failed += size;
1370 1370  
1371 1371                  /* if large page throttling is not started yet do it */
1372 1372                  if (segkmem_use_lpthrottle && lpthrt == 0) {
1373 1373                          lpthrt = atomic_cas_ulong(lpthrtp, lpthrt, 1);
1374 1374                  }
1375 1375          }
1376 1376          return (segkmem_alloc(vmp, size, vmflag));
1377 1377  }
1378 1378  
1379 1379  void
1380 1380  segkmem_free_lp(vmem_t *vmp, void *inaddr, size_t size)
1381 1381  {
1382 1382          if (kmem_lp_arena == NULL || !IS_KMEM_VA_LARGEPAGE((caddr_t)inaddr)) {
1383 1383                  segkmem_free(vmp, inaddr, size);
1384 1384          } else {
1385 1385                  vmem_free(kmem_lp_arena, inaddr, size);
1386 1386          }
1387 1387  }
1388 1388  
1389 1389  /*
1390 1390   * segkmem_alloc_lpi() imports virtual memory from large page heap arena
1391 1391   * into kmem_lp arena. In the process it maps the imported segment with
1392 1392   * large pages
1393 1393   */
1394 1394  static void *
1395 1395  segkmem_alloc_lpi(vmem_t *vmp, size_t size, int vmflag)
1396 1396  {
1397 1397          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1398 1398          void  *addr;
1399 1399  
1400 1400          ASSERT(size != 0);
1401 1401          ASSERT(vmp == heap_lp_arena);
1402 1402  
1403 1403          /* do not allow large page heap grow beyound limits */
1404 1404          if (vmem_size(vmp, VMEM_ALLOC) >= segkmem_kmemlp_max) {
1405 1405                  lpcb->allocs_limited++;
1406 1406                  return (NULL);
1407 1407          }
1408 1408  
1409 1409          addr = segkmem_xalloc_lp(vmp, NULL, size, vmflag, 0,
1410 1410              segkmem_page_create_large, NULL);
1411 1411          return (addr);
1412 1412  }
1413 1413  
1414 1414  /*
1415 1415   * segkmem_free_lpi() returns virtual memory back into large page heap arena
1416 1416   * from kmem_lp arena. Beore doing this it unmaps the segment and frees
1417 1417   * large pages used to map it.
1418 1418   */
1419 1419  static void
1420 1420  segkmem_free_lpi(vmem_t *vmp, void *inaddr, size_t size)
1421 1421  {
1422 1422          pgcnt_t         nlpages = size >> segkmem_lpshift;
1423 1423          size_t          lpsize = segkmem_lpsize;
1424 1424          caddr_t         addr = inaddr;
1425 1425          pgcnt_t         npages = btopr(size);
1426 1426          int             i;
1427 1427  
1428 1428          ASSERT(vmp == heap_lp_arena);
1429 1429          ASSERT(IS_KMEM_VA_LARGEPAGE(addr));
1430 1430          ASSERT(((uintptr_t)inaddr & (lpsize - 1)) == 0);
1431 1431  
1432 1432          for (i = 0; i < nlpages; i++) {
1433 1433                  segkmem_free_one_lp(addr, lpsize);
1434 1434                  addr += lpsize;
1435 1435          }
1436 1436  
1437 1437          page_unresv(npages);
1438 1438  
1439 1439          vmem_free(vmp, inaddr, size);
1440 1440  }
1441 1441  
1442 1442  /*
1443 1443   * This function is called at system boot time by kmem_init right after
1444 1444   * /etc/system file has been read. It checks based on hardware configuration
1445 1445   * and /etc/system settings if system is going to use large pages. The
1446 1446   * initialiazation necessary to actually start using large pages
1447 1447   * happens later in the process after segkmem_heap_lp_init() is called.
1448 1448   */
1449 1449  int
1450 1450  segkmem_lpsetup()
1451 1451  {
1452 1452          int use_large_pages = 0;
1453 1453  
1454 1454  #ifdef __sparc
1455 1455  
1456 1456          size_t memtotal = physmem * PAGESIZE;
1457 1457  
1458 1458          if (heap_lp_base == NULL) {
1459 1459                  segkmem_lpsize = PAGESIZE;
1460 1460                  return (0);
1461 1461          }
1462 1462  
1463 1463          /* get a platform dependent value of large page size for kernel heap */
1464 1464          segkmem_lpsize = get_segkmem_lpsize(segkmem_lpsize);
1465 1465  
1466 1466          if (segkmem_lpsize <= PAGESIZE) {
1467 1467                  /*
1468 1468                   * put virtual space reserved for the large page kernel
1469 1469                   * back to the regular heap
1470 1470                   */
1471 1471                  vmem_xfree(heap_arena, heap_lp_base,
1472 1472                      heap_lp_end - heap_lp_base);
1473 1473                  heap_lp_base = NULL;
1474 1474                  heap_lp_end = NULL;
1475 1475                  segkmem_lpsize = PAGESIZE;
1476 1476                  return (0);
1477 1477          }
1478 1478  
1479 1479          /* set heap_lp quantum if necessary */
1480 1480          if (segkmem_heaplp_quantum == 0 || !ISP2(segkmem_heaplp_quantum) ||
1481 1481              P2PHASE(segkmem_heaplp_quantum, segkmem_lpsize)) {
1482 1482                  segkmem_heaplp_quantum = segkmem_lpsize;
1483 1483          }
1484 1484  
1485 1485          /* set kmem_lp quantum if necessary */
1486 1486          if (segkmem_kmemlp_quantum == 0 || !ISP2(segkmem_kmemlp_quantum) ||
1487 1487              segkmem_kmemlp_quantum > segkmem_heaplp_quantum) {
1488 1488                  segkmem_kmemlp_quantum = segkmem_heaplp_quantum;
1489 1489          }
1490 1490  
1491 1491          /* set total amount of memory allowed for large page kernel heap */
1492 1492          if (segkmem_kmemlp_max == 0) {
1493 1493                  if (segkmem_kmemlp_pcnt == 0 || segkmem_kmemlp_pcnt > 100)
1494 1494                          segkmem_kmemlp_pcnt = 12;
1495 1495                  segkmem_kmemlp_max = (memtotal * segkmem_kmemlp_pcnt) / 100;
1496 1496          }
1497 1497          segkmem_kmemlp_max = P2ROUNDUP(segkmem_kmemlp_max,
1498 1498              segkmem_heaplp_quantum);
1499 1499  
1500 1500          /* fix lp kmem preallocation request if necesssary */
1501 1501          if (segkmem_kmemlp_min) {
1502 1502                  segkmem_kmemlp_min = P2ROUNDUP(segkmem_kmemlp_min,
1503 1503                      segkmem_heaplp_quantum);
1504 1504                  if (segkmem_kmemlp_min > segkmem_kmemlp_max)
1505 1505                          segkmem_kmemlp_min = segkmem_kmemlp_max;
1506 1506          }
1507 1507  
1508 1508          use_large_pages = 1;
1509 1509          segkmem_lpszc = page_szc(segkmem_lpsize);
1510 1510          segkmem_lpshift = page_get_shift(segkmem_lpszc);
1511 1511  
1512 1512  #endif
1513 1513          return (use_large_pages);
1514 1514  }
1515 1515  
1516 1516  void
1517 1517  segkmem_zio_init(void *zio_mem_base, size_t zio_mem_size)
1518 1518  {
1519 1519          ASSERT(zio_mem_base != NULL);
1520 1520          ASSERT(zio_mem_size != 0);
1521 1521  
1522 1522          /*
1523 1523           * To reduce VA space fragmentation, we set up quantum caches for the
1524 1524           * smaller sizes;  we chose 32k because that translates to 128k VA
1525 1525           * slabs, which matches nicely with the common 128k zio_data bufs.
1526 1526           */
1527 1527          zio_arena = vmem_create("zfs_file_data", zio_mem_base, zio_mem_size,
1528 1528              PAGESIZE, NULL, NULL, NULL, 32 * 1024, VM_SLEEP);
1529 1529  
1530 1530          zio_alloc_arena = vmem_create("zfs_file_data_buf", NULL, 0, PAGESIZE,
1531 1531              segkmem_zio_alloc, segkmem_zio_free, zio_arena, 0, VM_SLEEP);
1532 1532  
1533 1533          ASSERT(zio_arena != NULL);
1534 1534          ASSERT(zio_alloc_arena != NULL);
1535 1535  }
1536 1536  
1537 1537  #ifdef __sparc
1538 1538  
1539 1539  
1540 1540  static void *
1541 1541  segkmem_alloc_ppa(vmem_t *vmp, size_t size, int vmflag)
1542 1542  {
1543 1543          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1544 1544          void   *addr;
1545 1545  
1546 1546          if (ppaquantum <= PAGESIZE)
1547 1547                  return (segkmem_alloc(vmp, size, vmflag));
1548 1548  
1549 1549          ASSERT((size & (ppaquantum - 1)) == 0);
1550 1550  
1551 1551          addr = vmem_xalloc(vmp, size, ppaquantum, 0, 0, NULL, NULL, vmflag);
1552 1552          if (addr != NULL && segkmem_xalloc(vmp, addr, size, vmflag, 0,
1553 1553              segkmem_page_create, NULL) == NULL) {
1554 1554                  vmem_xfree(vmp, addr, size);
1555 1555                  addr = NULL;
1556 1556          }
1557 1557  
1558 1558          return (addr);
1559 1559  }
1560 1560  
1561 1561  static void
1562 1562  segkmem_free_ppa(vmem_t *vmp, void *addr, size_t size)
1563 1563  {
1564 1564          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1565 1565  
1566 1566          ASSERT(addr != NULL);
1567 1567  
1568 1568          if (ppaquantum <= PAGESIZE) {
1569 1569                  segkmem_free(vmp, addr, size);
1570 1570          } else {
1571 1571                  segkmem_free(NULL, addr, size);
1572 1572                  vmem_xfree(vmp, addr, size);
1573 1573          }
1574 1574  }
1575 1575  
1576 1576  void
1577 1577  segkmem_heap_lp_init()
1578 1578  {
1579 1579          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1580 1580          size_t heap_lp_size = heap_lp_end - heap_lp_base;
1581 1581          size_t lpsize = segkmem_lpsize;
1582 1582          size_t ppaquantum;
1583 1583          void   *addr;
1584 1584  
1585 1585          if (segkmem_lpsize <= PAGESIZE) {
1586 1586                  ASSERT(heap_lp_base == NULL);
1587 1587                  ASSERT(heap_lp_end == NULL);
1588 1588                  return;
1589 1589          }
1590 1590  
1591 1591          ASSERT(segkmem_heaplp_quantum >= lpsize);
1592 1592          ASSERT((segkmem_heaplp_quantum & (lpsize - 1)) == 0);
1593 1593          ASSERT(lpcb->lp_uselp == 0);
1594 1594          ASSERT(heap_lp_base != NULL);
1595 1595          ASSERT(heap_lp_end != NULL);
1596 1596          ASSERT(heap_lp_base < heap_lp_end);
1597 1597          ASSERT(heap_lp_arena == NULL);
1598 1598          ASSERT(((uintptr_t)heap_lp_base & (lpsize - 1)) == 0);
1599 1599          ASSERT(((uintptr_t)heap_lp_end & (lpsize - 1)) == 0);
1600 1600  
1601 1601          /* create large page heap arena */
1602 1602          heap_lp_arena = vmem_create("heap_lp", heap_lp_base, heap_lp_size,
1603 1603              segkmem_heaplp_quantum, NULL, NULL, NULL, 0, VM_SLEEP);
1604 1604  
1605 1605          ASSERT(heap_lp_arena != NULL);
1606 1606  
1607 1607          /* This arena caches memory already mapped by large pages */
1608 1608          kmem_lp_arena = vmem_create("kmem_lp", NULL, 0, segkmem_kmemlp_quantum,
1609 1609              segkmem_alloc_lpi, segkmem_free_lpi, heap_lp_arena, 0, VM_SLEEP);
1610 1610  
1611 1611          ASSERT(kmem_lp_arena != NULL);
1612 1612  
1613 1613          mutex_init(&lpcb->lp_lock, NULL, MUTEX_DEFAULT, NULL);
1614 1614          cv_init(&lpcb->lp_cv, NULL, CV_DEFAULT, NULL);
1615 1615  
1616 1616          /*
1617 1617           * this arena is used for the array of page_t pointers necessary
1618 1618           * to call hat_mem_load_array
1619 1619           */
1620 1620          ppaquantum = btopr(lpsize) * sizeof (page_t *);
1621 1621          segkmem_ppa_arena = vmem_create("segkmem_ppa", NULL, 0, ppaquantum,
1622 1622              segkmem_alloc_ppa, segkmem_free_ppa, heap_arena, ppaquantum,
1623 1623              VM_SLEEP);
1624 1624  
1625 1625          ASSERT(segkmem_ppa_arena != NULL);
1626 1626  
1627 1627          /* prealloacate some memory for the lp kernel heap */
1628 1628          if (segkmem_kmemlp_min) {
1629 1629  
1630 1630                  ASSERT(P2PHASE(segkmem_kmemlp_min,
1631 1631                      segkmem_heaplp_quantum) == 0);
1632 1632  
1633 1633                  if ((addr = segkmem_alloc_lpi(heap_lp_arena,
1634 1634                      segkmem_kmemlp_min, VM_SLEEP)) != NULL) {
1635 1635  
1636 1636                          addr = vmem_add(kmem_lp_arena, addr,
1637 1637                              segkmem_kmemlp_min, VM_SLEEP);
1638 1638                          ASSERT(addr != NULL);
1639 1639                  }
1640 1640          }
1641 1641  
1642 1642          lpcb->lp_uselp = 1;
1643 1643  }
1644 1644  
1645 1645  #endif

↓ open down ↓

1645 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX