io-lx-public Wdiff usr/src/uts/common/vm/seg_kmem.c

Print this page

OS-5192 need faster clock_gettime
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
Reviewed by: Ryan Zezeski <ryan@zinascii.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_kmem.c
          +++ new/usr/src/uts/common/vm/seg_kmem.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *

↓ open down ↓

12 lines elided

↑ open up ↑

  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2016 Joyent, Inc.
  23   24   */
  24   25  
  25   26  #include <sys/types.h>
  26   27  #include <sys/t_lock.h>
  27   28  #include <sys/param.h>
  28   29  #include <sys/sysmacros.h>
  29   30  #include <sys/tuneable.h>
  30   31  #include <sys/systm.h>
  31   32  #include <sys/vm.h>
  32   33  #include <sys/kmem.h>

  33   34  #include <sys/vmem.h>
  34   35  #include <sys/mman.h>
  35   36  #include <sys/cmn_err.h>
  36   37  #include <sys/debug.h>
  37   38  #include <sys/dumphdr.h>
  38   39  #include <sys/bootconf.h>
  39   40  #include <sys/lgrp.h>
  40   41  #include <vm/seg_kmem.h>
  41   42  #include <vm/hat.h>
  42   43  #include <vm/page.h>
  43   44  #include <vm/vm_dep.h>
  44   45  #include <vm/faultcode.h>
  45   46  #include <sys/promif.h>
  46   47  #include <vm/seg_kp.h>
  47   48  #include <sys/bitmap.h>
  48   49  #include <sys/mem_cage.h>
  49   50  
  50   51  #ifdef __sparc
  51   52  #include <sys/ivintr.h>
  52   53  #include <sys/panic.h>
  53   54  #endif
  54   55  
  55   56  /*
  56   57   * seg_kmem is the primary kernel memory segment driver.  It
  57   58   * maps the kernel heap [kernelheap, ekernelheap), module text,
  58   59   * and all memory which was allocated before the VM was initialized
  59   60   * into kas.
  60   61   *
  61   62   * Pages which belong to seg_kmem are hashed into &kvp vnode at
  62   63   * an offset equal to (u_offset_t)virt_addr, and have p_lckcnt >= 1.
  63   64   * They must never be paged out since segkmem_fault() is a no-op to
  64   65   * prevent recursive faults.
  65   66   *
  66   67   * Currently, seg_kmem pages are sharelocked (p_sharelock == 1) on
  67   68   * __x86 and are unlocked (p_sharelock == 0) on __sparc.  Once __x86
  68   69   * supports relocation the #ifdef kludges can be removed.
  69   70   *
  70   71   * seg_kmem pages may be subject to relocation by page_relocate(),
  71   72   * provided that the HAT supports it; if this is so, segkmem_reloc
  72   73   * will be set to a nonzero value. All boot time allocated memory as
  73   74   * well as static memory is considered off limits to relocation.
  74   75   * Pages are "relocatable" if p_state does not have P_NORELOC set, so
  75   76   * we request P_NORELOC pages for memory that isn't safe to relocate.
  76   77   *
  77   78   * The kernel heap is logically divided up into four pieces:
  78   79   *
  79   80   *   heap32_arena is for allocations that require 32-bit absolute
  80   81   *   virtual addresses (e.g. code that uses 32-bit pointers/offsets).
  81   82   *
  82   83   *   heap_core is for allocations that require 2GB *relative*
  83   84   *   offsets; in other words all memory from heap_core is within
  84   85   *   2GB of all other memory from the same arena. This is a requirement
  85   86   *   of the addressing modes of some processors in supervisor code.
  86   87   *
  87   88   *   heap_arena is the general heap arena.
  88   89   *
  89   90   *   static_arena is the static memory arena.  Allocations from it
  90   91   *   are not subject to relocation so it is safe to use the memory
  91   92   *   physical address as well as the virtual address (e.g. the VA to
  92   93   *   PA translations are static).  Caches may import from static_arena;
  93   94   *   all other static memory allocations should use static_alloc_arena.
  94   95   *
  95   96   * On some platforms which have limited virtual address space, seg_kmem
  96   97   * may share [kernelheap, ekernelheap) with seg_kp; if this is so,
  97   98   * segkp_bitmap is non-NULL, and each bit represents a page of virtual
  98   99   * address space which is actually seg_kp mapped.
  99  100   */
 100  101  
 101  102  extern ulong_t *segkp_bitmap;   /* Is set if segkp is from the kernel heap */
 102  103  
 103  104  char *kernelheap;               /* start of primary kernel heap */
 104  105  char *ekernelheap;              /* end of primary kernel heap */
 105  106  struct seg kvseg;               /* primary kernel heap segment */
 106  107  struct seg kvseg_core;          /* "core" kernel heap segment */
 107  108  struct seg kzioseg;             /* Segment for zio mappings */
 108  109  vmem_t *heap_arena;             /* primary kernel heap arena */
 109  110  vmem_t *heap_core_arena;        /* core kernel heap arena */
 110  111  char *heap_core_base;           /* start of core kernel heap arena */
 111  112  char *heap_lp_base;             /* start of kernel large page heap arena */
 112  113  char *heap_lp_end;              /* end of kernel large page heap arena */
 113  114  vmem_t *hat_memload_arena;      /* HAT translation data */
 114  115  struct seg kvseg32;             /* 32-bit kernel heap segment */
 115  116  vmem_t *heap32_arena;           /* 32-bit kernel heap arena */
 116  117  vmem_t *heaptext_arena;         /* heaptext arena */
 117  118  struct as kas;                  /* kernel address space */
 118  119  int segkmem_reloc;              /* enable/disable relocatable segkmem pages */
 119  120  vmem_t *static_arena;           /* arena for caches to import static memory */
 120  121  vmem_t *static_alloc_arena;     /* arena for allocating static memory */
 121  122  vmem_t *zio_arena = NULL;       /* arena for allocating zio memory */
 122  123  vmem_t *zio_alloc_arena = NULL; /* arena for allocating zio memory */
 123  124  
 124  125  /*
 125  126   * seg_kmem driver can map part of the kernel heap with large pages.
 126  127   * Currently this functionality is implemented for sparc platforms only.
 127  128   *
 128  129   * The large page size "segkmem_lpsize" for kernel heap is selected in the
 129  130   * platform specific code. It can also be modified via /etc/system file.
 130  131   * Setting segkmem_lpsize to PAGESIZE in /etc/system disables usage of large
 131  132   * pages for kernel heap. "segkmem_lpshift" is adjusted appropriately to
 132  133   * match segkmem_lpsize.
 133  134   *
 134  135   * At boot time we carve from kernel heap arena a range of virtual addresses
 135  136   * that will be used for large page mappings. This range [heap_lp_base,
 136  137   * heap_lp_end) is set up as a separate vmem arena - "heap_lp_arena". We also
 137  138   * create "kmem_lp_arena" that caches memory already backed up by large
 138  139   * pages. kmem_lp_arena imports virtual segments from heap_lp_arena.
 139  140   */
 140  141  
 141  142  size_t  segkmem_lpsize;
 142  143  static  uint_t  segkmem_lpshift = PAGESHIFT;
 143  144  int     segkmem_lpszc = 0;
 144  145  
 145  146  size_t  segkmem_kmemlp_quantum = 0x400000;      /* 4MB */
 146  147  size_t  segkmem_heaplp_quantum;
 147  148  vmem_t *heap_lp_arena;
 148  149  static  vmem_t *kmem_lp_arena;
 149  150  static  vmem_t *segkmem_ppa_arena;
 150  151  static  segkmem_lpcb_t segkmem_lpcb;
 151  152  
 152  153  /*
 153  154   * We use "segkmem_kmemlp_max" to limit the total amount of physical memory
 154  155   * consumed by the large page heap. By default this parameter is set to 1/8 of
 155  156   * physmem but can be adjusted through /etc/system either directly or
 156  157   * indirectly by setting "segkmem_kmemlp_pcnt" to the percent of physmem
 157  158   * we allow for large page heap.
 158  159   */
 159  160  size_t  segkmem_kmemlp_max;
 160  161  static  uint_t  segkmem_kmemlp_pcnt;
 161  162  
 162  163  /*
 163  164   * Getting large pages for kernel heap could be problematic due to
 164  165   * physical memory fragmentation. That's why we allow to preallocate
 165  166   * "segkmem_kmemlp_min" bytes at boot time.
 166  167   */
 167  168  static  size_t  segkmem_kmemlp_min;
 168  169  
 169  170  /*
 170  171   * Throttling is used to avoid expensive tries to allocate large pages
 171  172   * for kernel heap when a lot of succesive attempts to do so fail.
 172  173   */
 173  174  static  ulong_t segkmem_lpthrottle_max = 0x400000;
 174  175  static  ulong_t segkmem_lpthrottle_start = 0x40;
 175  176  static  ulong_t segkmem_use_lpthrottle = 1;
 176  177  
 177  178  /*
 178  179   * Freed pages accumulate on a garbage list until segkmem is ready,
 179  180   * at which point we call segkmem_gc() to free it all.
 180  181   */
 181  182  typedef struct segkmem_gc_list {
 182  183          struct segkmem_gc_list  *gc_next;
 183  184          vmem_t                  *gc_arena;
 184  185          size_t                  gc_size;
 185  186  } segkmem_gc_list_t;
 186  187  
 187  188  static segkmem_gc_list_t *segkmem_gc_list;
 188  189  
 189  190  /*
 190  191   * Allocations from the hat_memload arena add VM_MEMLOAD to their
 191  192   * vmflags so that segkmem_xalloc() can inform the hat layer that it needs
 192  193   * to take steps to prevent infinite recursion.  HAT allocations also
 193  194   * must be non-relocatable to prevent recursive page faults.
 194  195   */
 195  196  static void *
 196  197  hat_memload_alloc(vmem_t *vmp, size_t size, int flags)
 197  198  {
 198  199          flags |= (VM_MEMLOAD | VM_NORELOC);
 199  200          return (segkmem_alloc(vmp, size, flags));
 200  201  }
 201  202  
 202  203  /*
 203  204   * Allocations from static_arena arena (or any other arena that uses
 204  205   * segkmem_alloc_permanent()) require non-relocatable (permanently
 205  206   * wired) memory pages, since these pages are referenced by physical
 206  207   * as well as virtual address.
 207  208   */
 208  209  void *
 209  210  segkmem_alloc_permanent(vmem_t *vmp, size_t size, int flags)
 210  211  {
 211  212          return (segkmem_alloc(vmp, size, flags | VM_NORELOC));
 212  213  }
 213  214  
 214  215  /*
 215  216   * Initialize kernel heap boundaries.
 216  217   */
 217  218  void
 218  219  kernelheap_init(
 219  220          void *heap_start,
 220  221          void *heap_end,
 221  222          char *first_avail,
 222  223          void *core_start,
 223  224          void *core_end)
 224  225  {
 225  226          uintptr_t textbase;
 226  227          size_t core_size;
 227  228          size_t heap_size;
 228  229          vmem_t *heaptext_parent;
 229  230          size_t  heap_lp_size = 0;
 230  231  #ifdef __sparc
 231  232          size_t kmem64_sz = kmem64_aligned_end - kmem64_base;
 232  233  #endif  /* __sparc */
 233  234  
 234  235          kernelheap = heap_start;
 235  236          ekernelheap = heap_end;
 236  237  
 237  238  #ifdef __sparc
 238  239          heap_lp_size = (((uintptr_t)heap_end - (uintptr_t)heap_start) / 4);
 239  240          /*
 240  241           * Bias heap_lp start address by kmem64_sz to reduce collisions
 241  242           * in 4M kernel TSB between kmem64 area and heap_lp
 242  243           */
 243  244          kmem64_sz = P2ROUNDUP(kmem64_sz, MMU_PAGESIZE256M);
 244  245          if (kmem64_sz <= heap_lp_size / 2)
 245  246                  heap_lp_size -= kmem64_sz;
 246  247          heap_lp_base = ekernelheap - heap_lp_size;
 247  248          heap_lp_end = heap_lp_base + heap_lp_size;
 248  249  #endif  /* __sparc */
 249  250  
 250  251          /*
 251  252           * If this platform has a 'core' heap area, then the space for
 252  253           * overflow module text should be carved out of the end of that
 253  254           * heap.  Otherwise, it gets carved out of the general purpose
 254  255           * heap.
 255  256           */
 256  257          core_size = (uintptr_t)core_end - (uintptr_t)core_start;
 257  258          if (core_size > 0) {
 258  259                  ASSERT(core_size >= HEAPTEXT_SIZE);
 259  260                  textbase = (uintptr_t)core_end - HEAPTEXT_SIZE;
 260  261                  core_size -= HEAPTEXT_SIZE;
 261  262          }
 262  263  #ifndef __sparc
 263  264          else {
 264  265                  ekernelheap -= HEAPTEXT_SIZE;
 265  266                  textbase = (uintptr_t)ekernelheap;
 266  267          }
 267  268  #endif
 268  269  
 269  270          heap_size = (uintptr_t)ekernelheap - (uintptr_t)kernelheap;
 270  271          heap_arena = vmem_init("heap", kernelheap, heap_size, PAGESIZE,
 271  272              segkmem_alloc, segkmem_free);
 272  273  
 273  274          if (core_size > 0) {
 274  275                  heap_core_arena = vmem_create("heap_core", core_start,
 275  276                      core_size, PAGESIZE, NULL, NULL, NULL, 0, VM_SLEEP);
 276  277                  heap_core_base = core_start;
 277  278          } else {
 278  279                  heap_core_arena = heap_arena;
 279  280                  heap_core_base = kernelheap;
 280  281          }
 281  282  
 282  283          /*
 283  284           * reserve space for the large page heap. If large pages for kernel
 284  285           * heap is enabled large page heap arean will be created later in the
 285  286           * boot sequence in segkmem_heap_lp_init(). Otherwise the allocated
 286  287           * range will be returned back to the heap_arena.
 287  288           */
 288  289          if (heap_lp_size) {
 289  290                  (void) vmem_xalloc(heap_arena, heap_lp_size, PAGESIZE, 0, 0,
 290  291                      heap_lp_base, heap_lp_end,
 291  292                      VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 292  293          }
 293  294  
 294  295          /*
 295  296           * Remove the already-spoken-for memory range [kernelheap, first_avail).
 296  297           */
 297  298          (void) vmem_xalloc(heap_arena, first_avail - kernelheap, PAGESIZE,
 298  299              0, 0, kernelheap, first_avail, VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 299  300  
 300  301  #ifdef __sparc
 301  302          heap32_arena = vmem_create("heap32", (void *)SYSBASE32,
 302  303              SYSLIMIT32 - SYSBASE32 - HEAPTEXT_SIZE, PAGESIZE, NULL,
 303  304              NULL, NULL, 0, VM_SLEEP);
 304  305          /*
 305  306           * Prom claims the physical and virtual resources used by panicbuf
 306  307           * and inter_vec_table. So reserve space for panicbuf, intr_vec_table,
 307  308           * reserved interrupt vector data structures from 32-bit heap.
 308  309           */
 309  310          (void) vmem_xalloc(heap32_arena, PANICBUFSIZE, PAGESIZE, 0, 0,
 310  311              panicbuf, panicbuf + PANICBUFSIZE,
 311  312              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 312  313  
 313  314          (void) vmem_xalloc(heap32_arena, IVSIZE, PAGESIZE, 0, 0,
 314  315              intr_vec_table, (caddr_t)intr_vec_table + IVSIZE,
 315  316              VM_NOSLEEP | VM_BESTFIT | VM_PANIC);
 316  317  
 317  318          textbase = SYSLIMIT32 - HEAPTEXT_SIZE;
 318  319          heaptext_parent = NULL;
 319  320  #else   /* __sparc */
 320  321          heap32_arena = heap_core_arena;
 321  322          heaptext_parent = heap_core_arena;
 322  323  #endif  /* __sparc */
 323  324  
 324  325          heaptext_arena = vmem_create("heaptext", (void *)textbase,
 325  326              HEAPTEXT_SIZE, PAGESIZE, NULL, NULL, heaptext_parent, 0, VM_SLEEP);
 326  327  
 327  328          /*
 328  329           * Create a set of arenas for memory with static translations
 329  330           * (e.g. VA -> PA translations cannot change).  Since using
 330  331           * kernel pages by physical address implies it isn't safe to
 331  332           * walk across page boundaries, the static_arena quantum must
 332  333           * be PAGESIZE.  Any kmem caches that require static memory
 333  334           * should source from static_arena, while direct allocations
 334  335           * should only use static_alloc_arena.
 335  336           */
 336  337          static_arena = vmem_create("static", NULL, 0, PAGESIZE,
 337  338              segkmem_alloc_permanent, segkmem_free, heap_arena, 0, VM_SLEEP);
 338  339          static_alloc_arena = vmem_create("static_alloc", NULL, 0,
 339  340              sizeof (uint64_t), vmem_alloc, vmem_free, static_arena,
 340  341              0, VM_SLEEP);
 341  342  
 342  343          /*
 343  344           * Create an arena for translation data (ptes, hmes, or hblks).
 344  345           * We need an arena for this because hat_memload() is essential
 345  346           * to vmem_populate() (see comments in common/os/vmem.c).
 346  347           *
 347  348           * Note: any kmem cache that allocates from hat_memload_arena
 348  349           * must be created as a KMC_NOHASH cache (i.e. no external slab
 349  350           * and bufctl structures to allocate) so that slab creation doesn't
 350  351           * require anything more than a single vmem_alloc().
 351  352           */
 352  353          hat_memload_arena = vmem_create("hat_memload", NULL, 0, PAGESIZE,
 353  354              hat_memload_alloc, segkmem_free, heap_arena, 0,
 354  355              VM_SLEEP | VMC_POPULATOR | VMC_DUMPSAFE);
 355  356  }
 356  357  
 357  358  void
 358  359  boot_mapin(caddr_t addr, size_t size)
 359  360  {
 360  361          caddr_t  eaddr;
 361  362          page_t  *pp;
 362  363          pfn_t    pfnum;
 363  364  
 364  365          if (page_resv(btop(size), KM_NOSLEEP) == 0)
 365  366                  panic("boot_mapin: page_resv failed");
 366  367  
 367  368          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
 368  369                  pfnum = va_to_pfn(addr);
 369  370                  if (pfnum == PFN_INVALID)
 370  371                          continue;
 371  372                  if ((pp = page_numtopp_nolock(pfnum)) == NULL)
 372  373                          panic("boot_mapin(): No pp for pfnum = %lx", pfnum);
 373  374  
 374  375                  /*
 375  376                   * must break up any large pages that may have constituent
 376  377                   * pages being utilized for BOP_ALLOC()'s before calling
 377  378                   * page_numtopp().The locking code (ie. page_reclaim())
 378  379                   * can't handle them
 379  380                   */
 380  381                  if (pp->p_szc != 0)
 381  382                          page_boot_demote(pp);
 382  383  
 383  384                  pp = page_numtopp(pfnum, SE_EXCL);
 384  385                  if (pp == NULL || PP_ISFREE(pp))
 385  386                          panic("boot_alloc: pp is NULL or free");
 386  387  
 387  388                  /*
 388  389                   * If the cage is on but doesn't yet contain this page,
 389  390                   * mark it as non-relocatable.
 390  391                   */
 391  392                  if (kcage_on && !PP_ISNORELOC(pp)) {
 392  393                          PP_SETNORELOC(pp);
 393  394                          PLCNT_XFER_NORELOC(pp);
 394  395                  }
 395  396  
 396  397                  (void) page_hashin(pp, &kvp, (u_offset_t)(uintptr_t)addr, NULL);
 397  398                  pp->p_lckcnt = 1;
 398  399  #if defined(__x86)
 399  400                  page_downgrade(pp);
 400  401  #else
 401  402                  page_unlock(pp);
 402  403  #endif
 403  404          }
 404  405  }
 405  406  
 406  407  /*
 407  408   * Get pages from boot and hash them into the kernel's vp.
 408  409   * Used after page structs have been allocated, but before segkmem is ready.
 409  410   */
 410  411  void *
 411  412  boot_alloc(void *inaddr, size_t size, uint_t align)
 412  413  {
 413  414          caddr_t addr = inaddr;
 414  415  
 415  416          if (bootops == NULL)
 416  417                  prom_panic("boot_alloc: attempt to allocate memory after "
 417  418                      "BOP_GONE");
 418  419  
 419  420          size = ptob(btopr(size));
 420  421  #ifdef __sparc
 421  422          if (bop_alloc_chunk(addr, size, align) != (caddr_t)addr)
 422  423                  panic("boot_alloc: bop_alloc_chunk failed");
 423  424  #else
 424  425          if (BOP_ALLOC(bootops, addr, size, align) != addr)
 425  426                  panic("boot_alloc: BOP_ALLOC failed");
 426  427  #endif
 427  428          boot_mapin((caddr_t)addr, size);
 428  429          return (addr);
 429  430  }
 430  431  
 431  432  static void
 432  433  segkmem_badop()
 433  434  {
 434  435          panic("segkmem_badop");
 435  436  }
 436  437  
 437  438  #define SEGKMEM_BADOP(t)        (t(*)())segkmem_badop
 438  439  
 439  440  /*ARGSUSED*/
 440  441  static faultcode_t
 441  442  segkmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t size,
 442  443          enum fault_type type, enum seg_rw rw)
 443  444  {
 444  445          pgcnt_t npages;
 445  446          spgcnt_t pg;
 446  447          page_t *pp;
 447  448          struct vnode *vp = seg->s_data;
 448  449  
 449  450          ASSERT(RW_READ_HELD(&seg->s_as->a_lock));
 450  451  
 451  452          if (seg->s_as != &kas || size > seg->s_size ||
 452  453              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 453  454                  panic("segkmem_fault: bad args");
 454  455  
 455  456          /*
 456  457           * If it is one of segkp pages, call segkp_fault.
 457  458           */
 458  459          if (segkp_bitmap && seg == &kvseg &&
 459  460              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 460  461                  return (SEGOP_FAULT(hat, segkp, addr, size, type, rw));
 461  462  
 462  463          if (rw != S_READ && rw != S_WRITE && rw != S_OTHER)
 463  464                  return (FC_NOSUPPORT);
 464  465  
 465  466          npages = btopr(size);
 466  467  
 467  468          switch (type) {
 468  469          case F_SOFTLOCK:        /* lock down already-loaded translations */
 469  470                  for (pg = 0; pg < npages; pg++) {
 470  471                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
 471  472                              SE_SHARED);
 472  473                          if (pp == NULL) {
 473  474                                  /*
 474  475                                   * Hmm, no page. Does a kernel mapping
 475  476                                   * exist for it?
 476  477                                   */
 477  478                                  if (!hat_probe(kas.a_hat, addr)) {
 478  479                                          addr -= PAGESIZE;
 479  480                                          while (--pg >= 0) {
 480  481                                                  pp = page_find(vp, (u_offset_t)
 481  482                                                      (uintptr_t)addr);
 482  483                                                  if (pp)
 483  484                                                          page_unlock(pp);
 484  485                                                  addr -= PAGESIZE;
 485  486                                          }
 486  487                                          return (FC_NOMAP);
 487  488                                  }
 488  489                          }
 489  490                          addr += PAGESIZE;
 490  491                  }
 491  492                  if (rw == S_OTHER)
 492  493                          hat_reserve(seg->s_as, addr, size);
 493  494                  return (0);
 494  495          case F_SOFTUNLOCK:
 495  496                  while (npages--) {
 496  497                          pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
 497  498                          if (pp)
 498  499                                  page_unlock(pp);
 499  500                          addr += PAGESIZE;
 500  501                  }
 501  502                  return (0);
 502  503          default:
 503  504                  return (FC_NOSUPPORT);
 504  505          }
 505  506          /*NOTREACHED*/
 506  507  }
 507  508  
 508  509  static int
 509  510  segkmem_setprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 510  511  {
 511  512          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 512  513  
 513  514          if (seg->s_as != &kas || size > seg->s_size ||
 514  515              addr < seg->s_base || addr + size > seg->s_base + seg->s_size)
 515  516                  panic("segkmem_setprot: bad args");
 516  517  
 517  518          /*
 518  519           * If it is one of segkp pages, call segkp.
 519  520           */
 520  521          if (segkp_bitmap && seg == &kvseg &&
 521  522              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 522  523                  return (SEGOP_SETPROT(segkp, addr, size, prot));
 523  524  
 524  525          if (prot == 0)
 525  526                  hat_unload(kas.a_hat, addr, size, HAT_UNLOAD);
 526  527          else
 527  528                  hat_chgprot(kas.a_hat, addr, size, prot);
 528  529          return (0);
 529  530  }
 530  531  
 531  532  /*
 532  533   * This is a dummy segkmem function overloaded to call segkp
 533  534   * when segkp is under the heap.
 534  535   */
 535  536  /* ARGSUSED */
 536  537  static int
 537  538  segkmem_checkprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
 538  539  {
 539  540          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 540  541  
 541  542          if (seg->s_as != &kas)
 542  543                  segkmem_badop();
 543  544  
 544  545          /*
 545  546           * If it is one of segkp pages, call into segkp.
 546  547           */
 547  548          if (segkp_bitmap && seg == &kvseg &&
 548  549              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 549  550                  return (SEGOP_CHECKPROT(segkp, addr, size, prot));
 550  551  
 551  552          segkmem_badop();
 552  553          return (0);
 553  554  }
 554  555  
 555  556  /*
 556  557   * This is a dummy segkmem function overloaded to call segkp
 557  558   * when segkp is under the heap.
 558  559   */
 559  560  /* ARGSUSED */
 560  561  static int
 561  562  segkmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 562  563  {
 563  564          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 564  565  
 565  566          if (seg->s_as != &kas)
 566  567                  segkmem_badop();
 567  568  
 568  569          /*
 569  570           * If it is one of segkp pages, call into segkp.
 570  571           */
 571  572          if (segkp_bitmap && seg == &kvseg &&
 572  573              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 573  574                  return (SEGOP_KLUSTER(segkp, addr, delta));
 574  575  
 575  576          segkmem_badop();
 576  577          return (0);
 577  578  }
 578  579  
 579  580  static void
 580  581  segkmem_xdump_range(void *arg, void *start, size_t size)
 581  582  {
 582  583          struct as *as = arg;
 583  584          caddr_t addr = start;
 584  585          caddr_t addr_end = addr + size;
 585  586  
 586  587          while (addr < addr_end) {
 587  588                  pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
 588  589                  if (pfn != PFN_INVALID && pfn <= physmax && pf_is_memory(pfn))
 589  590                          dump_addpage(as, addr, pfn);
 590  591                  addr += PAGESIZE;
 591  592                  dump_timeleft = dump_timeout;
 592  593          }
 593  594  }
 594  595  
 595  596  static void
 596  597  segkmem_dump_range(void *arg, void *start, size_t size)
 597  598  {
 598  599          caddr_t addr = start;
 599  600          caddr_t addr_end = addr + size;
 600  601  
 601  602          /*
 602  603           * If we are about to start dumping the range of addresses we
 603  604           * carved out of the kernel heap for the large page heap walk
 604  605           * heap_lp_arena to find what segments are actually populated
 605  606           */
 606  607          if (SEGKMEM_USE_LARGEPAGES &&
 607  608              addr == heap_lp_base && addr_end == heap_lp_end &&
 608  609              vmem_size(heap_lp_arena, VMEM_ALLOC) < size) {
 609  610                  vmem_walk(heap_lp_arena, VMEM_ALLOC | VMEM_REENTRANT,
 610  611                      segkmem_xdump_range, arg);
 611  612          } else {
 612  613                  segkmem_xdump_range(arg, start, size);
 613  614          }
 614  615  }
 615  616  
 616  617  static void
 617  618  segkmem_dump(struct seg *seg)
 618  619  {
 619  620          /*
 620  621           * The kernel's heap_arena (represented by kvseg) is a very large
 621  622           * VA space, most of which is typically unused.  To speed up dumping
 622  623           * we use vmem_walk() to quickly find the pieces of heap_arena that
 623  624           * are actually in use.  We do the same for heap32_arena and
 624  625           * heap_core.
 625  626           *
 626  627           * We specify VMEM_REENTRANT to vmem_walk() because dump_addpage()
 627  628           * may ultimately need to allocate memory.  Reentrant walks are
 628  629           * necessarily imperfect snapshots.  The kernel heap continues
 629  630           * to change during a live crash dump, for example.  For a normal
 630  631           * crash dump, however, we know that there won't be any other threads
 631  632           * messing with the heap.  Therefore, at worst, we may fail to dump
 632  633           * the pages that get allocated by the act of dumping; but we will
 633  634           * always dump every page that was allocated when the walk began.
 634  635           *
 635  636           * The other segkmem segments are dense (fully populated), so there's
 636  637           * no need to use this technique when dumping them.
 637  638           *
 638  639           * Note: when adding special dump handling for any new sparsely-
 639  640           * populated segments, be sure to add similar handling to the ::kgrep
 640  641           * code in mdb.
 641  642           */
 642  643          if (seg == &kvseg) {
 643  644                  vmem_walk(heap_arena, VMEM_ALLOC | VMEM_REENTRANT,
 644  645                      segkmem_dump_range, seg->s_as);
 645  646  #ifndef __sparc
 646  647                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 647  648                      segkmem_dump_range, seg->s_as);
 648  649  #endif
 649  650          } else if (seg == &kvseg_core) {
 650  651                  vmem_walk(heap_core_arena, VMEM_ALLOC | VMEM_REENTRANT,
 651  652                      segkmem_dump_range, seg->s_as);
 652  653          } else if (seg == &kvseg32) {
 653  654                  vmem_walk(heap32_arena, VMEM_ALLOC | VMEM_REENTRANT,
 654  655                      segkmem_dump_range, seg->s_as);
 655  656                  vmem_walk(heaptext_arena, VMEM_ALLOC | VMEM_REENTRANT,
 656  657                      segkmem_dump_range, seg->s_as);
 657  658          } else if (seg == &kzioseg) {
 658  659                  /*
 659  660                   * We don't want to dump pages attached to kzioseg since they
 660  661                   * contain file data from ZFS.  If this page's segment is
 661  662                   * kzioseg return instead of writing it to the dump device.
 662  663                   */
 663  664                  return;
 664  665          } else {
 665  666                  segkmem_dump_range(seg->s_as, seg->s_base, seg->s_size);
 666  667          }
 667  668  }
 668  669  
 669  670  /*
 670  671   * lock/unlock kmem pages over a given range [addr, addr+len).
 671  672   * Returns a shadow list of pages in ppp. If there are holes
 672  673   * in the range (e.g. some of the kernel mappings do not have
 673  674   * underlying page_ts) returns ENOTSUP so that as_pagelock()
 674  675   * will handle the range via as_fault(F_SOFTLOCK).
 675  676   */
 676  677  /*ARGSUSED*/
 677  678  static int
 678  679  segkmem_pagelock(struct seg *seg, caddr_t addr, size_t len,
 679  680          page_t ***ppp, enum lock_type type, enum seg_rw rw)
 680  681  {
 681  682          page_t **pplist, *pp;
 682  683          pgcnt_t npages;
 683  684          spgcnt_t pg;
 684  685          size_t nb;
 685  686          struct vnode *vp = seg->s_data;
 686  687  
 687  688          ASSERT(ppp != NULL);
 688  689  
 689  690          /*
 690  691           * If it is one of segkp pages, call into segkp.
 691  692           */
 692  693          if (segkp_bitmap && seg == &kvseg &&
 693  694              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 694  695                  return (SEGOP_PAGELOCK(segkp, addr, len, ppp, type, rw));
 695  696  
 696  697          npages = btopr(len);
 697  698          nb = sizeof (page_t *) * npages;
 698  699  
 699  700          if (type == L_PAGEUNLOCK) {
 700  701                  pplist = *ppp;
 701  702                  ASSERT(pplist != NULL);
 702  703  
 703  704                  for (pg = 0; pg < npages; pg++) {
 704  705                          pp = pplist[pg];
 705  706                          page_unlock(pp);
 706  707                  }
 707  708                  kmem_free(pplist, nb);
 708  709                  return (0);
 709  710          }
 710  711  
 711  712          ASSERT(type == L_PAGELOCK);
 712  713  
 713  714          pplist = kmem_alloc(nb, KM_NOSLEEP);
 714  715          if (pplist == NULL) {
 715  716                  *ppp = NULL;
 716  717                  return (ENOTSUP);       /* take the slow path */
 717  718          }
 718  719  
 719  720          for (pg = 0; pg < npages; pg++) {
 720  721                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_SHARED);
 721  722                  if (pp == NULL) {
 722  723                          while (--pg >= 0)
 723  724                                  page_unlock(pplist[pg]);
 724  725                          kmem_free(pplist, nb);
 725  726                          *ppp = NULL;
 726  727                          return (ENOTSUP);
 727  728                  }
 728  729                  pplist[pg] = pp;
 729  730                  addr += PAGESIZE;
 730  731          }
 731  732  
 732  733          *ppp = pplist;
 733  734          return (0);
 734  735  }
 735  736  
 736  737  /*
 737  738   * This is a dummy segkmem function overloaded to call segkp
 738  739   * when segkp is under the heap.
 739  740   */
 740  741  /* ARGSUSED */
 741  742  static int
 742  743  segkmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 743  744  {
 744  745          ASSERT(RW_LOCK_HELD(&seg->s_as->a_lock));
 745  746  
 746  747          if (seg->s_as != &kas)
 747  748                  segkmem_badop();
 748  749  
 749  750          /*
 750  751           * If it is one of segkp pages, call into segkp.
 751  752           */
 752  753          if (segkp_bitmap && seg == &kvseg &&
 753  754              BT_TEST(segkp_bitmap, btop((uintptr_t)(addr - seg->s_base))))
 754  755                  return (SEGOP_GETMEMID(segkp, addr, memidp));
 755  756  
 756  757          segkmem_badop();
 757  758          return (0);
 758  759  }
 759  760  
 760  761  /*ARGSUSED*/
 761  762  static lgrp_mem_policy_info_t *
 762  763  segkmem_getpolicy(struct seg *seg, caddr_t addr)
 763  764  {
 764  765          return (NULL);
 765  766  }

↓ open down ↓

733 lines elided

↑ open up ↑

 766  767  
 767  768  /*ARGSUSED*/
 768  769  static int
 769  770  segkmem_capable(struct seg *seg, segcapability_t capability)
 770  771  {
 771  772          if (capability == S_CAPABILITY_NOMINFLT)
 772  773                  return (1);
 773  774          return (0);
 774  775  }
 775  776  
 776      -static struct seg_ops segkmem_ops = {
      777 +struct seg_ops segkmem_ops = {
 777  778          SEGKMEM_BADOP(int),             /* dup */
 778  779          SEGKMEM_BADOP(int),             /* unmap */
 779  780          SEGKMEM_BADOP(void),            /* free */
 780  781          segkmem_fault,
 781  782          SEGKMEM_BADOP(faultcode_t),     /* faulta */
 782  783          segkmem_setprot,
 783  784          segkmem_checkprot,
 784  785          segkmem_kluster,
 785  786          SEGKMEM_BADOP(size_t),          /* swapout */
 786  787          SEGKMEM_BADOP(int),             /* sync */

 787  788          SEGKMEM_BADOP(size_t),          /* incore */
 788  789          SEGKMEM_BADOP(int),             /* lockop */
 789  790          SEGKMEM_BADOP(int),             /* getprot */
 790  791          SEGKMEM_BADOP(u_offset_t),      /* getoffset */
 791  792          SEGKMEM_BADOP(int),             /* gettype */
 792  793          SEGKMEM_BADOP(int),             /* getvp */
 793  794          SEGKMEM_BADOP(int),             /* advise */
 794  795          segkmem_dump,
 795  796          segkmem_pagelock,
 796  797          SEGKMEM_BADOP(int),             /* setpgsz */
 797  798          segkmem_getmemid,
 798  799          segkmem_getpolicy,              /* getpolicy */
 799  800          segkmem_capable,                /* capable */
 800  801          seg_inherit_notsup              /* inherit */
 801  802  };
 802  803  
 803  804  int
 804  805  segkmem_zio_create(struct seg *seg)
 805  806  {
 806  807          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 807  808          seg->s_ops = &segkmem_ops;
 808  809          seg->s_data = &zvp;
 809  810          kas.a_size += seg->s_size;
 810  811          return (0);
 811  812  }
 812  813  
 813  814  int
 814  815  segkmem_create(struct seg *seg)
 815  816  {
 816  817          ASSERT(seg->s_as == &kas && RW_WRITE_HELD(&kas.a_lock));
 817  818          seg->s_ops = &segkmem_ops;
 818  819          seg->s_data = &kvp;
 819  820          kas.a_size += seg->s_size;
 820  821          return (0);
 821  822  }
 822  823  
 823  824  /*ARGSUSED*/
 824  825  page_t *
 825  826  segkmem_page_create(void *addr, size_t size, int vmflag, void *arg)
 826  827  {
 827  828          struct seg kseg;
 828  829          int pgflags;
 829  830          struct vnode *vp = arg;
 830  831  
 831  832          if (vp == NULL)
 832  833                  vp = &kvp;
 833  834  
 834  835          kseg.s_as = &kas;
 835  836          pgflags = PG_EXCL;
 836  837  
 837  838          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
 838  839                  pgflags |= PG_NORELOC;
 839  840          if ((vmflag & VM_NOSLEEP) == 0)
 840  841                  pgflags |= PG_WAIT;
 841  842          if (vmflag & VM_PANIC)
 842  843                  pgflags |= PG_PANIC;
 843  844          if (vmflag & VM_PUSHPAGE)
 844  845                  pgflags |= PG_PUSHPAGE;
 845  846          if (vmflag & VM_NORMALPRI) {
 846  847                  ASSERT(vmflag & VM_NOSLEEP);
 847  848                  pgflags |= PG_NORMALPRI;
 848  849          }
 849  850  
 850  851          return (page_create_va(vp, (u_offset_t)(uintptr_t)addr, size,
 851  852              pgflags, &kseg, addr));
 852  853  }
 853  854  
 854  855  /*
 855  856   * Allocate pages to back the virtual address range [addr, addr + size).
 856  857   * If addr is NULL, allocate the virtual address space as well.
 857  858   */
 858  859  void *
 859  860  segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
 860  861          page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg)
 861  862  {
 862  863          page_t *ppl;
 863  864          caddr_t addr = inaddr;
 864  865          pgcnt_t npages = btopr(size);
 865  866          int allocflag;
 866  867  
 867  868          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
 868  869                  return (NULL);
 869  870  
 870  871          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 871  872  
 872  873          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
 873  874                  if (inaddr == NULL)
 874  875                          vmem_free(vmp, addr, size);
 875  876                  return (NULL);
 876  877          }
 877  878  
 878  879          ppl = page_create_func(addr, size, vmflag, pcarg);
 879  880          if (ppl == NULL) {
 880  881                  if (inaddr == NULL)
 881  882                          vmem_free(vmp, addr, size);
 882  883                  page_unresv(npages);
 883  884                  return (NULL);
 884  885          }
 885  886  
 886  887          /*
 887  888           * Under certain conditions, we need to let the HAT layer know
 888  889           * that it cannot safely allocate memory.  Allocations from
 889  890           * the hat_memload vmem arena always need this, to prevent
 890  891           * infinite recursion.
 891  892           *
 892  893           * In addition, the x86 hat cannot safely do memory
 893  894           * allocations while in vmem_populate(), because there
 894  895           * is no simple bound on its usage.
 895  896           */
 896  897          if (vmflag & VM_MEMLOAD)
 897  898                  allocflag = HAT_NO_KALLOC;
 898  899  #if defined(__x86)
 899  900          else if (vmem_is_populator())
 900  901                  allocflag = HAT_NO_KALLOC;
 901  902  #endif
 902  903          else
 903  904                  allocflag = 0;
 904  905  
 905  906          while (ppl != NULL) {
 906  907                  page_t *pp = ppl;
 907  908                  page_sub(&ppl, pp);
 908  909                  ASSERT(page_iolock_assert(pp));
 909  910                  ASSERT(PAGE_EXCL(pp));
 910  911                  page_io_unlock(pp);
 911  912                  hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset, pp,
 912  913                      (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
 913  914                      HAT_LOAD_LOCK | allocflag);
 914  915                  pp->p_lckcnt = 1;
 915  916  #if defined(__x86)
 916  917                  page_downgrade(pp);
 917  918  #else
 918  919                  if (vmflag & SEGKMEM_SHARELOCKED)
 919  920                          page_downgrade(pp);
 920  921                  else
 921  922                          page_unlock(pp);
 922  923  #endif
 923  924          }
 924  925  
 925  926          return (addr);
 926  927  }
 927  928  
 928  929  static void *
 929  930  segkmem_alloc_vn(vmem_t *vmp, size_t size, int vmflag, struct vnode *vp)
 930  931  {
 931  932          void *addr;
 932  933          segkmem_gc_list_t *gcp, **prev_gcpp;
 933  934  
 934  935          ASSERT(vp != NULL);
 935  936  
 936  937          if (kvseg.s_base == NULL) {
 937  938  #ifndef __sparc
 938  939                  if (bootops->bsys_alloc == NULL)
 939  940                          halt("Memory allocation between bop_alloc() and "
 940  941                              "kmem_alloc().\n");
 941  942  #endif
 942  943  
 943  944                  /*
 944  945                   * There's not a lot of memory to go around during boot,
 945  946                   * so recycle it if we can.
 946  947                   */
 947  948                  for (prev_gcpp = &segkmem_gc_list; (gcp = *prev_gcpp) != NULL;
 948  949                      prev_gcpp = &gcp->gc_next) {
 949  950                          if (gcp->gc_arena == vmp && gcp->gc_size == size) {
 950  951                                  *prev_gcpp = gcp->gc_next;
 951  952                                  return (gcp);
 952  953                          }
 953  954                  }
 954  955  
 955  956                  addr = vmem_alloc(vmp, size, vmflag | VM_PANIC);
 956  957                  if (boot_alloc(addr, size, BO_NO_ALIGN) != addr)
 957  958                          panic("segkmem_alloc: boot_alloc failed");
 958  959                  return (addr);
 959  960          }
 960  961          return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
 961  962              segkmem_page_create, vp));
 962  963  }
 963  964  
 964  965  void *
 965  966  segkmem_alloc(vmem_t *vmp, size_t size, int vmflag)
 966  967  {
 967  968          return (segkmem_alloc_vn(vmp, size, vmflag, &kvp));
 968  969  }
 969  970  
 970  971  void *
 971  972  segkmem_zio_alloc(vmem_t *vmp, size_t size, int vmflag)
 972  973  {
 973  974          return (segkmem_alloc_vn(vmp, size, vmflag, &zvp));
 974  975  }
 975  976  
 976  977  /*
 977  978   * Any changes to this routine must also be carried over to
 978  979   * devmap_free_pages() in the seg_dev driver. This is because
 979  980   * we currently don't have a special kernel segment for non-paged
 980  981   * kernel memory that is exported by drivers to user space.
 981  982   */
 982  983  static void
 983  984  segkmem_free_vn(vmem_t *vmp, void *inaddr, size_t size, struct vnode *vp,
 984  985      void (*func)(page_t *))
 985  986  {
 986  987          page_t *pp;
 987  988          caddr_t addr = inaddr;
 988  989          caddr_t eaddr;
 989  990          pgcnt_t npages = btopr(size);
 990  991  
 991  992          ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
 992  993          ASSERT(vp != NULL);
 993  994  
 994  995          if (kvseg.s_base == NULL) {
 995  996                  segkmem_gc_list_t *gc = inaddr;
 996  997                  gc->gc_arena = vmp;
 997  998                  gc->gc_size = size;
 998  999                  gc->gc_next = segkmem_gc_list;
 999 1000                  segkmem_gc_list = gc;
1000 1001                  return;
1001 1002          }
1002 1003  
1003 1004          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1004 1005  
1005 1006          for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
1006 1007  #if defined(__x86)
1007 1008                  pp = page_find(vp, (u_offset_t)(uintptr_t)addr);
1008 1009                  if (pp == NULL)
1009 1010                          panic("segkmem_free: page not found");
1010 1011                  if (!page_tryupgrade(pp)) {
1011 1012                          /*
1012 1013                           * Some other thread has a sharelock. Wait for
1013 1014                           * it to drop the lock so we can free this page.
1014 1015                           */
1015 1016                          page_unlock(pp);
1016 1017                          pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr,
1017 1018                              SE_EXCL);
1018 1019                  }
1019 1020  #else
1020 1021                  pp = page_lookup(vp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1021 1022  #endif
1022 1023                  if (pp == NULL)
1023 1024                          panic("segkmem_free: page not found");
1024 1025                  /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
1025 1026                  pp->p_lckcnt = 0;
1026 1027                  if (func)
1027 1028                          func(pp);
1028 1029                  else
1029 1030                          page_destroy(pp, 0);
1030 1031          }
1031 1032          if (func == NULL)
1032 1033                  page_unresv(npages);
1033 1034  
1034 1035          if (vmp != NULL)
1035 1036                  vmem_free(vmp, inaddr, size);
1036 1037  
1037 1038  }
1038 1039  
1039 1040  void
1040 1041  segkmem_xfree(vmem_t *vmp, void *inaddr, size_t size, void (*func)(page_t *))
1041 1042  {
1042 1043          segkmem_free_vn(vmp, inaddr, size, &kvp, func);
1043 1044  }
1044 1045  
1045 1046  void
1046 1047  segkmem_free(vmem_t *vmp, void *inaddr, size_t size)
1047 1048  {
1048 1049          segkmem_free_vn(vmp, inaddr, size, &kvp, NULL);
1049 1050  }
1050 1051  
1051 1052  void
1052 1053  segkmem_zio_free(vmem_t *vmp, void *inaddr, size_t size)
1053 1054  {
1054 1055          segkmem_free_vn(vmp, inaddr, size, &zvp, NULL);
1055 1056  }
1056 1057  
1057 1058  void
1058 1059  segkmem_gc(void)
1059 1060  {
1060 1061          ASSERT(kvseg.s_base != NULL);
1061 1062          while (segkmem_gc_list != NULL) {
1062 1063                  segkmem_gc_list_t *gc = segkmem_gc_list;
1063 1064                  segkmem_gc_list = gc->gc_next;
1064 1065                  segkmem_free(gc->gc_arena, gc, gc->gc_size);
1065 1066          }
1066 1067  }
1067 1068  
1068 1069  /*
1069 1070   * Legacy entry points from here to end of file.
1070 1071   */
1071 1072  void
1072 1073  segkmem_mapin(struct seg *seg, void *addr, size_t size, uint_t vprot,
1073 1074      pfn_t pfn, uint_t flags)
1074 1075  {
1075 1076          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1076 1077          hat_devload(seg->s_as->a_hat, addr, size, pfn, vprot,
1077 1078              flags | HAT_LOAD_LOCK);
1078 1079  }
1079 1080  
1080 1081  void
1081 1082  segkmem_mapout(struct seg *seg, void *addr, size_t size)
1082 1083  {
1083 1084          hat_unload(seg->s_as->a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1084 1085  }
1085 1086  
1086 1087  void *
1087 1088  kmem_getpages(pgcnt_t npages, int kmflag)
1088 1089  {
1089 1090          return (kmem_alloc(ptob(npages), kmflag));
1090 1091  }
1091 1092  
1092 1093  void
1093 1094  kmem_freepages(void *addr, pgcnt_t npages)
1094 1095  {
1095 1096          kmem_free(addr, ptob(npages));
1096 1097  }
1097 1098  
1098 1099  /*
1099 1100   * segkmem_page_create_large() allocates a large page to be used for the kmem
1100 1101   * caches. If kpr is enabled we ask for a relocatable page unless requested
1101 1102   * otherwise. If kpr is disabled we have to ask for a non-reloc page
1102 1103   */
1103 1104  static page_t *
1104 1105  segkmem_page_create_large(void *addr, size_t size, int vmflag, void *arg)
1105 1106  {
1106 1107          int pgflags;
1107 1108  
1108 1109          pgflags = PG_EXCL;
1109 1110  
1110 1111          if (segkmem_reloc == 0 || (vmflag & VM_NORELOC))
1111 1112                  pgflags |= PG_NORELOC;
1112 1113          if (!(vmflag & VM_NOSLEEP))
1113 1114                  pgflags |= PG_WAIT;
1114 1115          if (vmflag & VM_PUSHPAGE)
1115 1116                  pgflags |= PG_PUSHPAGE;
1116 1117          if (vmflag & VM_NORMALPRI)
1117 1118                  pgflags |= PG_NORMALPRI;
1118 1119  
1119 1120          return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size,
1120 1121              pgflags, &kvseg, addr, arg));
1121 1122  }
1122 1123  
1123 1124  /*
1124 1125   * Allocate a large page to back the virtual address range
1125 1126   * [addr, addr + size).  If addr is NULL, allocate the virtual address
1126 1127   * space as well.
1127 1128   */
1128 1129  static void *
1129 1130  segkmem_xalloc_lp(vmem_t *vmp, void *inaddr, size_t size, int vmflag,
1130 1131      uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *),
1131 1132      void *pcarg)
1132 1133  {
1133 1134          caddr_t addr = inaddr, pa;
1134 1135          size_t  lpsize = segkmem_lpsize;
1135 1136          pgcnt_t npages = btopr(size);
1136 1137          pgcnt_t nbpages = btop(lpsize);
1137 1138          pgcnt_t nlpages = size >> segkmem_lpshift;
1138 1139          size_t  ppasize = nbpages * sizeof (page_t *);
1139 1140          page_t *pp, *rootpp, **ppa, *pplist = NULL;
1140 1141          int i;
1141 1142  
1142 1143          vmflag |= VM_NOSLEEP;
1143 1144  
1144 1145          if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) {
1145 1146                  return (NULL);
1146 1147          }
1147 1148  
1148 1149          /*
1149 1150           * allocate an array we need for hat_memload_array.
1150 1151           * we use a separate arena to avoid recursion.
1151 1152           * we will not need this array when hat_memload_array learns pp++
1152 1153           */
1153 1154          if ((ppa = vmem_alloc(segkmem_ppa_arena, ppasize, vmflag)) == NULL) {
1154 1155                  goto fail_array_alloc;
1155 1156          }
1156 1157  
1157 1158          if (inaddr == NULL && (addr = vmem_alloc(vmp, size, vmflag)) == NULL)
1158 1159                  goto fail_vmem_alloc;
1159 1160  
1160 1161          ASSERT(((uintptr_t)addr & (lpsize - 1)) == 0);
1161 1162  
1162 1163          /* create all the pages */
1163 1164          for (pa = addr, i = 0; i < nlpages; i++, pa += lpsize) {
1164 1165                  if ((pp = page_create_func(pa, lpsize, vmflag, pcarg)) == NULL)
1165 1166                          goto fail_page_create;
1166 1167                  page_list_concat(&pplist, &pp);
1167 1168          }
1168 1169  
1169 1170          /* at this point we have all the resource to complete the request */
1170 1171          while ((rootpp = pplist) != NULL) {
1171 1172                  for (i = 0; i < nbpages; i++) {
1172 1173                          ASSERT(pplist != NULL);
1173 1174                          pp = pplist;
1174 1175                          page_sub(&pplist, pp);
1175 1176                          ASSERT(page_iolock_assert(pp));
1176 1177                          page_io_unlock(pp);
1177 1178                          ppa[i] = pp;
1178 1179                  }
1179 1180                  /*
1180 1181                   * Load the locked entry. It's OK to preload the entry into the
1181 1182                   * TSB since we now support large mappings in the kernel TSB.
1182 1183                   */
1183 1184                  hat_memload_array(kas.a_hat,
1184 1185                      (caddr_t)(uintptr_t)rootpp->p_offset, lpsize,
1185 1186                      ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr,
1186 1187                      HAT_LOAD_LOCK);
1187 1188  
1188 1189                  for (--i; i >= 0; --i) {
1189 1190                          ppa[i]->p_lckcnt = 1;
1190 1191                          page_unlock(ppa[i]);
1191 1192                  }
1192 1193          }
1193 1194  
1194 1195          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1195 1196          return (addr);
1196 1197  
1197 1198  fail_page_create:
1198 1199          while ((rootpp = pplist) != NULL) {
1199 1200                  for (i = 0, pp = pplist; i < nbpages; i++, pp = pplist) {
1200 1201                          ASSERT(pp != NULL);
1201 1202                          page_sub(&pplist, pp);
1202 1203                          ASSERT(page_iolock_assert(pp));
1203 1204                          page_io_unlock(pp);
1204 1205                  }
1205 1206                  page_destroy_pages(rootpp);
1206 1207          }
1207 1208  
1208 1209          if (inaddr == NULL)
1209 1210                  vmem_free(vmp, addr, size);
1210 1211  
1211 1212  fail_vmem_alloc:
1212 1213          vmem_free(segkmem_ppa_arena, ppa, ppasize);
1213 1214  
1214 1215  fail_array_alloc:
1215 1216          page_unresv(npages);
1216 1217  
1217 1218          return (NULL);
1218 1219  }
1219 1220  
1220 1221  static void
1221 1222  segkmem_free_one_lp(caddr_t addr, size_t size)
1222 1223  {
1223 1224          page_t          *pp, *rootpp = NULL;
1224 1225          pgcnt_t         pgs_left = btopr(size);
1225 1226  
1226 1227          ASSERT(size == segkmem_lpsize);
1227 1228  
1228 1229          hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
1229 1230  
1230 1231          for (; pgs_left > 0; addr += PAGESIZE, pgs_left--) {
1231 1232                  pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL);
1232 1233                  if (pp == NULL)
1233 1234                          panic("segkmem_free_one_lp: page not found");
1234 1235                  ASSERT(PAGE_EXCL(pp));
1235 1236                  pp->p_lckcnt = 0;
1236 1237                  if (rootpp == NULL)
1237 1238                          rootpp = pp;
1238 1239          }
1239 1240          ASSERT(rootpp != NULL);
1240 1241          page_destroy_pages(rootpp);
1241 1242  
1242 1243          /* page_unresv() is done by the caller */
1243 1244  }
1244 1245  
1245 1246  /*
1246 1247   * This function is called to import new spans into the vmem arenas like
1247 1248   * kmem_default_arena and kmem_oversize_arena. It first tries to import
1248 1249   * spans from large page arena - kmem_lp_arena. In order to do this it might
1249 1250   * have to "upgrade the requested size" to kmem_lp_arena quantum. If
1250 1251   * it was not able to satisfy the upgraded request it then calls regular
1251 1252   * segkmem_alloc() that satisfies the request by importing from "*vmp" arena
1252 1253   */
1253 1254  /*ARGSUSED*/
1254 1255  void *
1255 1256  segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, size_t align, int vmflag)
1256 1257  {
1257 1258          size_t size;
1258 1259          kthread_t *t = curthread;
1259 1260          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1260 1261  
1261 1262          ASSERT(sizep != NULL);
1262 1263  
1263 1264          size = *sizep;
1264 1265  
1265 1266          if (lpcb->lp_uselp && !(t->t_flag & T_PANIC) &&
1266 1267              !(vmflag & SEGKMEM_SHARELOCKED)) {
1267 1268  
1268 1269                  size_t kmemlp_qnt = segkmem_kmemlp_quantum;
1269 1270                  size_t asize = P2ROUNDUP(size, kmemlp_qnt);
1270 1271                  void  *addr = NULL;
1271 1272                  ulong_t *lpthrtp = &lpcb->lp_throttle;
1272 1273                  ulong_t lpthrt = *lpthrtp;
1273 1274                  int     dowakeup = 0;
1274 1275                  int     doalloc = 1;
1275 1276  
1276 1277                  ASSERT(kmem_lp_arena != NULL);
1277 1278                  ASSERT(asize >= size);
1278 1279  
1279 1280                  if (lpthrt != 0) {
1280 1281                          /* try to update the throttle value */
1281 1282                          lpthrt = atomic_inc_ulong_nv(lpthrtp);
1282 1283                          if (lpthrt >= segkmem_lpthrottle_max) {
1283 1284                                  lpthrt = atomic_cas_ulong(lpthrtp, lpthrt,
1284 1285                                      segkmem_lpthrottle_max / 4);
1285 1286                          }
1286 1287  
1287 1288                          /*
1288 1289                           * when we get above throttle start do an exponential
1289 1290                           * backoff at trying large pages and reaping
1290 1291                           */
1291 1292                          if (lpthrt > segkmem_lpthrottle_start &&
1292 1293                              !ISP2(lpthrt)) {
1293 1294                                  lpcb->allocs_throttled++;
1294 1295                                  lpthrt--;
1295 1296                                  if (ISP2(lpthrt))
1296 1297                                          kmem_reap();
1297 1298                                  return (segkmem_alloc(vmp, size, vmflag));
1298 1299                          }
1299 1300                  }
1300 1301  
1301 1302                  if (!(vmflag & VM_NOSLEEP) &&
1302 1303                      segkmem_heaplp_quantum >= (8 * kmemlp_qnt) &&
1303 1304                      vmem_size(kmem_lp_arena, VMEM_FREE) <= kmemlp_qnt &&
1304 1305                      asize < (segkmem_heaplp_quantum - kmemlp_qnt)) {
1305 1306  
1306 1307                          /*
1307 1308                           * we are low on free memory in kmem_lp_arena
1308 1309                           * we let only one guy to allocate heap_lp
1309 1310                           * quantum size chunk that everybody is going to
1310 1311                           * share
1311 1312                           */
1312 1313                          mutex_enter(&lpcb->lp_lock);
1313 1314  
1314 1315                          if (lpcb->lp_wait) {
1315 1316  
1316 1317                                  /* we are not the first one - wait */
1317 1318                                  cv_wait(&lpcb->lp_cv, &lpcb->lp_lock);
1318 1319                                  if (vmem_size(kmem_lp_arena, VMEM_FREE) <
1319 1320                                      kmemlp_qnt)  {
1320 1321                                          doalloc = 0;
1321 1322                                  }
1322 1323                          } else if (vmem_size(kmem_lp_arena, VMEM_FREE) <=
1323 1324                              kmemlp_qnt) {
1324 1325  
1325 1326                                  /*
1326 1327                                   * we are the first one, make sure we import
1327 1328                                   * a large page
1328 1329                                   */
1329 1330                                  if (asize == kmemlp_qnt)
1330 1331                                          asize += kmemlp_qnt;
1331 1332                                  dowakeup = 1;
1332 1333                                  lpcb->lp_wait = 1;
1333 1334                          }
1334 1335  
1335 1336                          mutex_exit(&lpcb->lp_lock);
1336 1337                  }
1337 1338  
1338 1339                  /*
1339 1340                   * VM_ABORT flag prevents sleeps in vmem_xalloc when
1340 1341                   * large pages are not available. In that case this allocation
1341 1342                   * attempt will fail and we will retry allocation with small
1342 1343                   * pages. We also do not want to panic if this allocation fails
1343 1344                   * because we are going to retry.
1344 1345                   */
1345 1346                  if (doalloc) {
1346 1347                          addr = vmem_alloc(kmem_lp_arena, asize,
1347 1348                              (vmflag | VM_ABORT) & ~VM_PANIC);
1348 1349  
1349 1350                          if (dowakeup) {
1350 1351                                  mutex_enter(&lpcb->lp_lock);
1351 1352                                  ASSERT(lpcb->lp_wait != 0);
1352 1353                                  lpcb->lp_wait = 0;
1353 1354                                  cv_broadcast(&lpcb->lp_cv);
1354 1355                                  mutex_exit(&lpcb->lp_lock);
1355 1356                          }
1356 1357                  }
1357 1358  
1358 1359                  if (addr != NULL) {
1359 1360                          *sizep = asize;
1360 1361                          *lpthrtp = 0;
1361 1362                          return (addr);
1362 1363                  }
1363 1364  
1364 1365                  if (vmflag & VM_NOSLEEP)
1365 1366                          lpcb->nosleep_allocs_failed++;
1366 1367                  else
1367 1368                          lpcb->sleep_allocs_failed++;
1368 1369                  lpcb->alloc_bytes_failed += size;
1369 1370  
1370 1371                  /* if large page throttling is not started yet do it */
1371 1372                  if (segkmem_use_lpthrottle && lpthrt == 0) {
1372 1373                          lpthrt = atomic_cas_ulong(lpthrtp, lpthrt, 1);
1373 1374                  }
1374 1375          }
1375 1376          return (segkmem_alloc(vmp, size, vmflag));
1376 1377  }
1377 1378  
1378 1379  void
1379 1380  segkmem_free_lp(vmem_t *vmp, void *inaddr, size_t size)
1380 1381  {
1381 1382          if (kmem_lp_arena == NULL || !IS_KMEM_VA_LARGEPAGE((caddr_t)inaddr)) {
1382 1383                  segkmem_free(vmp, inaddr, size);
1383 1384          } else {
1384 1385                  vmem_free(kmem_lp_arena, inaddr, size);
1385 1386          }
1386 1387  }
1387 1388  
1388 1389  /*
1389 1390   * segkmem_alloc_lpi() imports virtual memory from large page heap arena
1390 1391   * into kmem_lp arena. In the process it maps the imported segment with
1391 1392   * large pages
1392 1393   */
1393 1394  static void *
1394 1395  segkmem_alloc_lpi(vmem_t *vmp, size_t size, int vmflag)
1395 1396  {
1396 1397          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1397 1398          void  *addr;
1398 1399  
1399 1400          ASSERT(size != 0);
1400 1401          ASSERT(vmp == heap_lp_arena);
1401 1402  
1402 1403          /* do not allow large page heap grow beyound limits */
1403 1404          if (vmem_size(vmp, VMEM_ALLOC) >= segkmem_kmemlp_max) {
1404 1405                  lpcb->allocs_limited++;
1405 1406                  return (NULL);
1406 1407          }
1407 1408  
1408 1409          addr = segkmem_xalloc_lp(vmp, NULL, size, vmflag, 0,
1409 1410              segkmem_page_create_large, NULL);
1410 1411          return (addr);
1411 1412  }
1412 1413  
1413 1414  /*
1414 1415   * segkmem_free_lpi() returns virtual memory back into large page heap arena
1415 1416   * from kmem_lp arena. Beore doing this it unmaps the segment and frees
1416 1417   * large pages used to map it.
1417 1418   */
1418 1419  static void
1419 1420  segkmem_free_lpi(vmem_t *vmp, void *inaddr, size_t size)
1420 1421  {
1421 1422          pgcnt_t         nlpages = size >> segkmem_lpshift;
1422 1423          size_t          lpsize = segkmem_lpsize;
1423 1424          caddr_t         addr = inaddr;
1424 1425          pgcnt_t         npages = btopr(size);
1425 1426          int             i;
1426 1427  
1427 1428          ASSERT(vmp == heap_lp_arena);
1428 1429          ASSERT(IS_KMEM_VA_LARGEPAGE(addr));
1429 1430          ASSERT(((uintptr_t)inaddr & (lpsize - 1)) == 0);
1430 1431  
1431 1432          for (i = 0; i < nlpages; i++) {
1432 1433                  segkmem_free_one_lp(addr, lpsize);
1433 1434                  addr += lpsize;
1434 1435          }
1435 1436  
1436 1437          page_unresv(npages);
1437 1438  
1438 1439          vmem_free(vmp, inaddr, size);
1439 1440  }
1440 1441  
1441 1442  /*
1442 1443   * This function is called at system boot time by kmem_init right after
1443 1444   * /etc/system file has been read. It checks based on hardware configuration
1444 1445   * and /etc/system settings if system is going to use large pages. The
1445 1446   * initialiazation necessary to actually start using large pages
1446 1447   * happens later in the process after segkmem_heap_lp_init() is called.
1447 1448   */
1448 1449  int
1449 1450  segkmem_lpsetup()
1450 1451  {
1451 1452          int use_large_pages = 0;
1452 1453  
1453 1454  #ifdef __sparc
1454 1455  
1455 1456          size_t memtotal = physmem * PAGESIZE;
1456 1457  
1457 1458          if (heap_lp_base == NULL) {
1458 1459                  segkmem_lpsize = PAGESIZE;
1459 1460                  return (0);
1460 1461          }
1461 1462  
1462 1463          /* get a platform dependent value of large page size for kernel heap */
1463 1464          segkmem_lpsize = get_segkmem_lpsize(segkmem_lpsize);
1464 1465  
1465 1466          if (segkmem_lpsize <= PAGESIZE) {
1466 1467                  /*
1467 1468                   * put virtual space reserved for the large page kernel
1468 1469                   * back to the regular heap
1469 1470                   */
1470 1471                  vmem_xfree(heap_arena, heap_lp_base,
1471 1472                      heap_lp_end - heap_lp_base);
1472 1473                  heap_lp_base = NULL;
1473 1474                  heap_lp_end = NULL;
1474 1475                  segkmem_lpsize = PAGESIZE;
1475 1476                  return (0);
1476 1477          }
1477 1478  
1478 1479          /* set heap_lp quantum if necessary */
1479 1480          if (segkmem_heaplp_quantum == 0 || !ISP2(segkmem_heaplp_quantum) ||
1480 1481              P2PHASE(segkmem_heaplp_quantum, segkmem_lpsize)) {
1481 1482                  segkmem_heaplp_quantum = segkmem_lpsize;
1482 1483          }
1483 1484  
1484 1485          /* set kmem_lp quantum if necessary */
1485 1486          if (segkmem_kmemlp_quantum == 0 || !ISP2(segkmem_kmemlp_quantum) ||
1486 1487              segkmem_kmemlp_quantum > segkmem_heaplp_quantum) {
1487 1488                  segkmem_kmemlp_quantum = segkmem_heaplp_quantum;
1488 1489          }
1489 1490  
1490 1491          /* set total amount of memory allowed for large page kernel heap */
1491 1492          if (segkmem_kmemlp_max == 0) {
1492 1493                  if (segkmem_kmemlp_pcnt == 0 || segkmem_kmemlp_pcnt > 100)
1493 1494                          segkmem_kmemlp_pcnt = 12;
1494 1495                  segkmem_kmemlp_max = (memtotal * segkmem_kmemlp_pcnt) / 100;
1495 1496          }
1496 1497          segkmem_kmemlp_max = P2ROUNDUP(segkmem_kmemlp_max,
1497 1498              segkmem_heaplp_quantum);
1498 1499  
1499 1500          /* fix lp kmem preallocation request if necesssary */
1500 1501          if (segkmem_kmemlp_min) {
1501 1502                  segkmem_kmemlp_min = P2ROUNDUP(segkmem_kmemlp_min,
1502 1503                      segkmem_heaplp_quantum);
1503 1504                  if (segkmem_kmemlp_min > segkmem_kmemlp_max)
1504 1505                          segkmem_kmemlp_min = segkmem_kmemlp_max;
1505 1506          }
1506 1507  
1507 1508          use_large_pages = 1;
1508 1509          segkmem_lpszc = page_szc(segkmem_lpsize);
1509 1510          segkmem_lpshift = page_get_shift(segkmem_lpszc);
1510 1511  
1511 1512  #endif
1512 1513          return (use_large_pages);
1513 1514  }
1514 1515  
1515 1516  void
1516 1517  segkmem_zio_init(void *zio_mem_base, size_t zio_mem_size)
1517 1518  {
1518 1519          ASSERT(zio_mem_base != NULL);
1519 1520          ASSERT(zio_mem_size != 0);
1520 1521  
1521 1522          /*
1522 1523           * To reduce VA space fragmentation, we set up quantum caches for the
1523 1524           * smaller sizes;  we chose 32k because that translates to 128k VA
1524 1525           * slabs, which matches nicely with the common 128k zio_data bufs.
1525 1526           */
1526 1527          zio_arena = vmem_create("zfs_file_data", zio_mem_base, zio_mem_size,
1527 1528              PAGESIZE, NULL, NULL, NULL, 32 * 1024, VM_SLEEP);
1528 1529  
1529 1530          zio_alloc_arena = vmem_create("zfs_file_data_buf", NULL, 0, PAGESIZE,
1530 1531              segkmem_zio_alloc, segkmem_zio_free, zio_arena, 0, VM_SLEEP);
1531 1532  
1532 1533          ASSERT(zio_arena != NULL);
1533 1534          ASSERT(zio_alloc_arena != NULL);
1534 1535  }
1535 1536  
1536 1537  #ifdef __sparc
1537 1538  
1538 1539  
1539 1540  static void *
1540 1541  segkmem_alloc_ppa(vmem_t *vmp, size_t size, int vmflag)
1541 1542  {
1542 1543          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1543 1544          void   *addr;
1544 1545  
1545 1546          if (ppaquantum <= PAGESIZE)
1546 1547                  return (segkmem_alloc(vmp, size, vmflag));
1547 1548  
1548 1549          ASSERT((size & (ppaquantum - 1)) == 0);
1549 1550  
1550 1551          addr = vmem_xalloc(vmp, size, ppaquantum, 0, 0, NULL, NULL, vmflag);
1551 1552          if (addr != NULL && segkmem_xalloc(vmp, addr, size, vmflag, 0,
1552 1553              segkmem_page_create, NULL) == NULL) {
1553 1554                  vmem_xfree(vmp, addr, size);
1554 1555                  addr = NULL;
1555 1556          }
1556 1557  
1557 1558          return (addr);
1558 1559  }
1559 1560  
1560 1561  static void
1561 1562  segkmem_free_ppa(vmem_t *vmp, void *addr, size_t size)
1562 1563  {
1563 1564          size_t ppaquantum = btopr(segkmem_lpsize) * sizeof (page_t *);
1564 1565  
1565 1566          ASSERT(addr != NULL);
1566 1567  
1567 1568          if (ppaquantum <= PAGESIZE) {
1568 1569                  segkmem_free(vmp, addr, size);
1569 1570          } else {
1570 1571                  segkmem_free(NULL, addr, size);
1571 1572                  vmem_xfree(vmp, addr, size);
1572 1573          }
1573 1574  }
1574 1575  
1575 1576  void
1576 1577  segkmem_heap_lp_init()
1577 1578  {
1578 1579          segkmem_lpcb_t *lpcb = &segkmem_lpcb;
1579 1580          size_t heap_lp_size = heap_lp_end - heap_lp_base;
1580 1581          size_t lpsize = segkmem_lpsize;
1581 1582          size_t ppaquantum;
1582 1583          void   *addr;
1583 1584  
1584 1585          if (segkmem_lpsize <= PAGESIZE) {
1585 1586                  ASSERT(heap_lp_base == NULL);
1586 1587                  ASSERT(heap_lp_end == NULL);
1587 1588                  return;
1588 1589          }
1589 1590  
1590 1591          ASSERT(segkmem_heaplp_quantum >= lpsize);
1591 1592          ASSERT((segkmem_heaplp_quantum & (lpsize - 1)) == 0);
1592 1593          ASSERT(lpcb->lp_uselp == 0);
1593 1594          ASSERT(heap_lp_base != NULL);
1594 1595          ASSERT(heap_lp_end != NULL);
1595 1596          ASSERT(heap_lp_base < heap_lp_end);
1596 1597          ASSERT(heap_lp_arena == NULL);
1597 1598          ASSERT(((uintptr_t)heap_lp_base & (lpsize - 1)) == 0);
1598 1599          ASSERT(((uintptr_t)heap_lp_end & (lpsize - 1)) == 0);
1599 1600  
1600 1601          /* create large page heap arena */
1601 1602          heap_lp_arena = vmem_create("heap_lp", heap_lp_base, heap_lp_size,
1602 1603              segkmem_heaplp_quantum, NULL, NULL, NULL, 0, VM_SLEEP);
1603 1604  
1604 1605          ASSERT(heap_lp_arena != NULL);
1605 1606  
1606 1607          /* This arena caches memory already mapped by large pages */
1607 1608          kmem_lp_arena = vmem_create("kmem_lp", NULL, 0, segkmem_kmemlp_quantum,
1608 1609              segkmem_alloc_lpi, segkmem_free_lpi, heap_lp_arena, 0, VM_SLEEP);
1609 1610  
1610 1611          ASSERT(kmem_lp_arena != NULL);
1611 1612  
1612 1613          mutex_init(&lpcb->lp_lock, NULL, MUTEX_DEFAULT, NULL);
1613 1614          cv_init(&lpcb->lp_cv, NULL, CV_DEFAULT, NULL);
1614 1615  
1615 1616          /*
1616 1617           * this arena is used for the array of page_t pointers necessary
1617 1618           * to call hat_mem_load_array
1618 1619           */
1619 1620          ppaquantum = btopr(lpsize) * sizeof (page_t *);
1620 1621          segkmem_ppa_arena = vmem_create("segkmem_ppa", NULL, 0, ppaquantum,
1621 1622              segkmem_alloc_ppa, segkmem_free_ppa, heap_arena, ppaquantum,
1622 1623              VM_SLEEP);
1623 1624  
1624 1625          ASSERT(segkmem_ppa_arena != NULL);
1625 1626  
1626 1627          /* prealloacate some memory for the lp kernel heap */
1627 1628          if (segkmem_kmemlp_min) {
1628 1629  
1629 1630                  ASSERT(P2PHASE(segkmem_kmemlp_min,
1630 1631                      segkmem_heaplp_quantum) == 0);
1631 1632  
1632 1633                  if ((addr = segkmem_alloc_lpi(heap_lp_arena,
1633 1634                      segkmem_kmemlp_min, VM_SLEEP)) != NULL) {
1634 1635  
1635 1636                          addr = vmem_add(kmem_lp_arena, addr,
1636 1637                              segkmem_kmemlp_min, VM_SLEEP);
1637 1638                          ASSERT(addr != NULL);
1638 1639                  }
1639 1640          }
1640 1641  
1641 1642          lpcb->lp_uselp = 1;
1642 1643  }
1643 1644  
1644 1645  #endif

↓ open down ↓

858 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX