1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  27  */
  28 
  29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  30 /*      All Rights Reserved   */
  31 
  32 /*
  33  * Portions of this source code were derived from Berkeley 4.3 BSD
  34  * under license from the Regents of the University of California.
  35  */
  36 
  37 /*
  38  * UNIX machine dependent virtual memory support.
  39  */
  40 
  41 #include <sys/vm.h>
  42 #include <sys/exec.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/cpu_module.h>
  45 #include <sys/cpu.h>
  46 #include <sys/elf_SPARC.h>
  47 #include <sys/archsystm.h>
  48 #include <vm/hat_sfmmu.h>
  49 #include <sys/memnode.h>
  50 #include <sys/mem_cage.h>
  51 #include <vm/vm_dep.h>
  52 #include <sys/random.h>
  53 
  54 #if defined(__sparcv9) && defined(SF_ERRATA_57)
  55 caddr_t errata57_limit;
  56 #endif
  57 
  58 uint_t page_colors = 0;
  59 uint_t page_colors_mask = 0;
  60 uint_t page_coloring_shift = 0;
  61 volatile int consistent_coloring;
  62 int update_proc_pgcolorbase_after_fork = 0;
  63 
  64 uint_t mmu_page_sizes = DEFAULT_MMU_PAGE_SIZES;
  65 uint_t max_mmu_page_sizes = MMU_PAGE_SIZES;
  66 uint_t mmu_hashcnt = DEFAULT_MAX_HASHCNT;
  67 uint_t max_mmu_hashcnt = MAX_HASHCNT;
  68 size_t mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
  69 
  70 /*
  71  * The sun4u hardware mapping sizes which will always be supported are
  72  * 8K, 64K, 512K and 4M.  If sun4u based machines need to support other
  73  * page sizes, platform or cpu specific routines need to modify the value.
  74  * The base pagesize (p_szc == 0) must always be supported by the hardware.
  75  */
  76 int mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) |
  77         (1 << TTE512K) | (1 << TTE4M);
  78 uint_t mmu_exported_page_sizes;
  79 
  80 uint_t szc_2_userszc[MMU_PAGE_SIZES];
  81 uint_t userszc_2_szc[MMU_PAGE_SIZES];
  82 
  83 extern uint_t vac_colors_mask;
  84 extern int vac_shift;
  85 
  86 hw_pagesize_t hw_page_array[] = {
  87         {MMU_PAGESIZE, MMU_PAGESHIFT, 0, MMU_PAGESIZE >> MMU_PAGESHIFT},
  88         {MMU_PAGESIZE64K, MMU_PAGESHIFT64K, 0,
  89             MMU_PAGESIZE64K >> MMU_PAGESHIFT},
  90         {MMU_PAGESIZE512K, MMU_PAGESHIFT512K, 0,
  91             MMU_PAGESIZE512K >> MMU_PAGESHIFT},
  92         {MMU_PAGESIZE4M, MMU_PAGESHIFT4M, 0, MMU_PAGESIZE4M >> MMU_PAGESHIFT},
  93         {MMU_PAGESIZE32M, MMU_PAGESHIFT32M, 0,
  94             MMU_PAGESIZE32M >> MMU_PAGESHIFT},
  95         {MMU_PAGESIZE256M, MMU_PAGESHIFT256M, 0,
  96             MMU_PAGESIZE256M >> MMU_PAGESHIFT},
  97         {0, 0, 0, 0}
  98 };
  99 
 100 /*
 101  * Maximum page size used to map 64-bit memory segment kmem64_base..kmem64_end
 102  */
 103 int     max_bootlp_tteszc = TTE4M;
 104 
 105 /*
 106  * use_text_pgsz64k and use_text_pgsz512k allow the user to turn on these
 107  * additional text page sizes for USIII-IV+ and OPL by changing the default
 108  * values via /etc/system.
 109  */
 110 int     use_text_pgsz64K = 0;
 111 int     use_text_pgsz512K = 0;
 112 
 113 /*
 114  * Maximum and default segment size tunables for user heap, stack, private
 115  * and shared anonymous memory, and user text and initialized data.
 116  */
 117 size_t max_uheap_lpsize = MMU_PAGESIZE4M;
 118 size_t default_uheap_lpsize = MMU_PAGESIZE;
 119 size_t max_ustack_lpsize = MMU_PAGESIZE4M;
 120 size_t default_ustack_lpsize = MMU_PAGESIZE;
 121 size_t max_privmap_lpsize = MMU_PAGESIZE4M;
 122 size_t max_uidata_lpsize = MMU_PAGESIZE;
 123 size_t max_utext_lpsize = MMU_PAGESIZE4M;
 124 size_t max_shm_lpsize = MMU_PAGESIZE4M;
 125 
 126 void
 127 adjust_data_maxlpsize(size_t ismpagesize)
 128 {
 129         if (max_uheap_lpsize == MMU_PAGESIZE4M) {
 130                 max_uheap_lpsize = ismpagesize;
 131         }
 132         if (max_ustack_lpsize == MMU_PAGESIZE4M) {
 133                 max_ustack_lpsize = ismpagesize;
 134         }
 135         if (max_privmap_lpsize == MMU_PAGESIZE4M) {
 136                 max_privmap_lpsize = ismpagesize;
 137         }
 138         if (max_shm_lpsize == MMU_PAGESIZE4M) {
 139                 max_shm_lpsize = ismpagesize;
 140         }
 141 }
 142 
 143 /*
 144  * The maximum amount a randomized mapping will be slewed.  We should perhaps
 145  * arrange things so these tunables can be separate for mmap, mmapobj, and
 146  * ld.so
 147  */
 148 size_t aslr_max_map_skew = 256 * 1024 * 1024; /* 256MB */
 149 
 150 /*
 151  * map_addr_proc() is the routine called when the system is to
 152  * choose an address for the user.  We will pick an address
 153  * range which is just below the current stack limit.  The
 154  * algorithm used for cache consistency on machines with virtual
 155  * address caches is such that offset 0 in the vnode is always
 156  * on a shm_alignment'ed aligned address.  Unfortunately, this
 157  * means that vnodes which are demand paged will not be mapped
 158  * cache consistently with the executable images.  When the
 159  * cache alignment for a given object is inconsistent, the
 160  * lower level code must manage the translations so that this
 161  * is not seen here (at the cost of efficiency, of course).
 162  *
 163  * Every mapping will have a redzone of a single page on either side of
 164  * the request. This is done to leave one page unmapped between segments.
 165  * This is not required, but it's useful for the user because if their
 166  * program strays across a segment boundary, it will catch a fault
 167  * immediately making debugging a little easier.  Currently the redzone
 168  * is mandatory.
 169  *
 170  *
 171  * addrp is a value/result parameter.
 172  *      On input it is a hint from the user to be used in a completely
 173  *      machine dependent fashion.  For MAP_ALIGN, addrp contains the
 174  *      minimal alignment, which must be some "power of two" multiple of
 175  *      pagesize.
 176  *
 177  *      On output it is NULL if no address can be found in the current
 178  *      processes address space or else an address that is currently
 179  *      not mapped for len bytes with a page of red zone on either side.
 180  *      If vacalign is true, then the selected address will obey the alignment
 181  *      constraints of a vac machine based on the given off value.
 182  */
 183 /*ARGSUSED4*/
 184 void
 185 map_addr_proc(caddr_t *addrp, size_t len, offset_t off, int vacalign,
 186     caddr_t userlimit, struct proc *p, uint_t flags)
 187 {
 188         struct as *as = p->p_as;
 189         caddr_t addr;
 190         caddr_t base;
 191         size_t slen;
 192         uintptr_t align_amount;
 193         int allow_largepage_alignment = 1;
 194 
 195         base = p->p_brkbase;
 196         if (userlimit < as->a_userlimit) {
 197                 /*
 198                  * This happens when a program wants to map something in
 199                  * a range that's accessible to a program in a smaller
 200                  * address space.  For example, a 64-bit program might
 201                  * be calling mmap32(2) to guarantee that the returned
 202                  * address is below 4Gbytes.
 203                  */
 204                 ASSERT(userlimit > base);
 205                 slen = userlimit - base;
 206         } else {
 207                 slen = p->p_usrstack - base -
 208                     ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK);
 209         }
 210 
 211         /* Make len be a multiple of PAGESIZE */
 212         len = (len + PAGEOFFSET) & PAGEMASK;
 213 
 214         /*
 215          *  If the request is larger than the size of a particular
 216          *  mmu level, then we use that level to map the request.
 217          *  But this requires that both the virtual and the physical
 218          *  addresses be aligned with respect to that level, so we
 219          *  do the virtual bit of nastiness here.
 220          *
 221          *  For 32-bit processes, only those which have specified
 222          *  MAP_ALIGN or an addr will be aligned on a page size > 4MB. Otherwise
 223          *  we can potentially waste up to 256MB of the 4G process address
 224          *  space just for alignment.
 225          */
 226         if (p->p_model == DATAMODEL_ILP32 && ((flags & MAP_ALIGN) == 0 ||
 227             ((uintptr_t)*addrp) != 0)) {
 228                 allow_largepage_alignment = 0;
 229         }
 230         if ((mmu_page_sizes == max_mmu_page_sizes) &&
 231             allow_largepage_alignment &&
 232             (len >= MMU_PAGESIZE256M)) {     /* 256MB mappings */
 233                 align_amount = MMU_PAGESIZE256M;
 234         } else if ((mmu_page_sizes == max_mmu_page_sizes) &&
 235             allow_largepage_alignment &&
 236             (len >= MMU_PAGESIZE32M)) {      /* 32MB mappings */
 237                 align_amount = MMU_PAGESIZE32M;
 238         } else if (len >= MMU_PAGESIZE4M) {  /* 4MB mappings */
 239                 align_amount = MMU_PAGESIZE4M;
 240         } else if (len >= MMU_PAGESIZE512K) { /* 512KB mappings */
 241                 align_amount = MMU_PAGESIZE512K;
 242         } else if (len >= MMU_PAGESIZE64K) { /* 64KB mappings */
 243                 align_amount = MMU_PAGESIZE64K;
 244         } else  {
 245                 /*
 246                  * Align virtual addresses on a 64K boundary to ensure
 247                  * that ELF shared libraries are mapped with the appropriate
 248                  * alignment constraints by the run-time linker.
 249                  */
 250                 align_amount = ELF_SPARC_MAXPGSZ;
 251                 if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp != 0) &&
 252                     ((uintptr_t)*addrp < align_amount))
 253                         align_amount = (uintptr_t)*addrp;
 254         }
 255 
 256         /*
 257          * 64-bit processes require 1024K alignment of ELF shared libraries.
 258          */
 259         if (p->p_model == DATAMODEL_LP64)
 260                 align_amount = MAX(align_amount, ELF_SPARCV9_MAXPGSZ);
 261 #ifdef VAC
 262         if (vac && vacalign && (align_amount < shm_alignment))
 263                 align_amount = shm_alignment;
 264 #endif
 265 
 266         if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) {
 267                 align_amount = (uintptr_t)*addrp;
 268         }
 269 
 270         ASSERT(ISP2(align_amount));
 271         ASSERT(align_amount == 0 || align_amount >= PAGESIZE);
 272 
 273         /*
 274          * Look for a large enough hole starting below the stack limit.
 275          * After finding it, use the upper part.
 276          */
 277         as_purge(as);
 278         off = off & (align_amount - 1);
 279 
 280         if (as_gap_aligned(as, len, &base, &slen, AH_HI, NULL, align_amount,
 281             PAGESIZE, off) == 0) {
 282                 caddr_t as_addr;
 283 
 284                 /*
 285                  * addr is the highest possible address to use since we have
 286                  * a PAGESIZE redzone at the beginning and end.
 287                  */
 288                 addr = base + slen - (PAGESIZE + len);
 289                 as_addr = addr;
 290                 /*
 291                  * Round address DOWN to the alignment amount and
 292                  * add the offset in.
 293                  * If addr is greater than as_addr, len would not be large
 294                  * enough to include the redzone, so we must adjust down
 295                  * by the alignment amount.
 296                  */
 297                 addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1l)));
 298                 addr += (long)off;
 299                 if (addr > as_addr) {
 300                         addr -= align_amount;
 301                 }
 302 
 303                 /*
 304                  * If randomization is requested, slew the allocation
 305                  * backwards, within the same gap, by a random amount.
 306                  */
 307                 if (flags & _MAP_RANDOMIZE) {
 308                         uint32_t slew;
 309                         uint32_t maxslew;
 310 
 311                         (void) random_get_pseudo_bytes((uint8_t *)&slew,
 312                             sizeof (slew));
 313 
 314                         maxslew = MIN(aslr_max_map_skew, (addr - base));
 315                         /*
 316                          * Don't allow ASLR to cause mappings to fail below
 317                          * because of SF erratum #57
 318                          */
 319                         maxslew = MIN(maxslew, (addr - errata57_limit));
 320 
 321                         slew = slew % maxslew;
 322                         addr -= P2ALIGN(slew, align_amount);
 323                 }
 324 
 325                 ASSERT(addr > base);
 326                 ASSERT(addr + len < base + slen);
 327                 ASSERT(((uintptr_t)addr & (align_amount - 1l)) ==
 328                     ((uintptr_t)(off)));
 329                 *addrp = addr;
 330 
 331 #if defined(SF_ERRATA_57)
 332                 if (AS_TYPE_64BIT(as) && addr < errata57_limit) {
 333                         *addrp = NULL;
 334                 }
 335 #endif
 336         } else {
 337                 *addrp = NULL;  /* no more virtual space */
 338         }
 339 }
 340 
 341 /*
 342  * Platform-dependent page scrub call.
 343  */
 344 void
 345 pagescrub(page_t *pp, uint_t off, uint_t len)
 346 {
 347         /*
 348          * For now, we rely on the fact that pagezero() will
 349          * always clear UEs.
 350          */
 351         pagezero(pp, off, len);
 352 }
 353 
 354 /*ARGSUSED*/
 355 void
 356 sync_data_memory(caddr_t va, size_t len)
 357 {
 358         cpu_flush_ecache();
 359 }
 360 
 361 /*
 362  * platform specific large pages for kernel heap support
 363  */
 364 void
 365 mmu_init_kcontext()
 366 {
 367         extern void set_kcontextreg();
 368 
 369         if (kcontextreg)
 370                 set_kcontextreg();
 371 }
 372 
 373 void
 374 contig_mem_init(void)
 375 {
 376         /* not applicable to sun4u */
 377 }
 378 
 379 /*ARGSUSED*/
 380 caddr_t
 381 contig_mem_prealloc(caddr_t alloc_base, pgcnt_t npages)
 382 {
 383         /* not applicable to sun4u */
 384         return (alloc_base);
 385 }