1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2015 Joyent, Inc.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 
  31 #include <sys/types.h>
  32 #include <sys/bitmap.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/kmem.h>
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/user.h>
  38 #include <sys/unistd.h>
  39 #include <sys/errno.h>
  40 #include <sys/proc.h>
  41 #include <sys/mman.h>
  42 #include <sys/tuneable.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/cred.h>
  45 #include <sys/vmsystm.h>
  46 #include <sys/debug.h>
  47 #include <sys/policy.h>
  48 
  49 #include <vm/as.h>
  50 #include <vm/seg.h>
  51 
  52 static uint_t mem_getpgszc(size_t);
  53 
  54 /*
  55  * Memory control operations
  56  */
  57 int
  58 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
  59 {
  60         struct as *as = ttoproc(curthread)->p_as;
  61         struct proc *p = ttoproc(curthread);
  62         size_t pgsz;
  63         uint_t szc, oszc, pgcmd;
  64         int error = 0;
  65         faultcode_t fc;
  66         uintptr_t iarg;
  67         STRUCT_DECL(memcntl_mha, mha);
  68 
  69         if (mask)
  70                 return (set_errno(EINVAL));
  71         if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) {
  72                 if ((addr != 0) || (len != 0)) {
  73                         return (set_errno(EINVAL));
  74                 }
  75         } else if (cmd != MC_HAT_ADVISE) {
  76                 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) {
  77                         return (set_errno(EINVAL));
  78                 }
  79                 /*
  80                  * We're only concerned with the address range
  81                  * here, not the protections.  The protections
  82                  * are only used as a "filter" in this code,
  83                  * they aren't set or modified here.
  84                  */
  85                 if (valid_usr_range(addr, len, 0, as,
  86                     as->a_userlimit) != RANGE_OKAY) {
  87                         return (set_errno(ENOMEM));
  88                 }
  89         }
  90 
  91         if (cmd == MC_HAT_ADVISE) {
  92                 if (attr != 0 || mask != 0) {
  93                         return (set_errno(EINVAL));
  94                 }
  95 
  96         } else {
  97                 if ((VALID_ATTR & attr) != attr) {
  98                         return (set_errno(EINVAL));
  99                 }
 100                 if ((attr & SHARED) && (attr & PRIVATE)) {
 101                         return (set_errno(EINVAL));
 102                 }
 103                 if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) ||
 104                     (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) &&
 105                     (error = secpolicy_lock_memory(CRED())) != 0)
 106                         return (set_errno(error));
 107         }
 108         if (attr) {
 109                 attr |= PROT_USER;
 110         }
 111 
 112         switch (cmd) {
 113         case MC_SYNC:
 114                 /*
 115                  * MS_SYNC used to be defined to be zero but is now non-zero.
 116                  * For binary compatibility we still accept zero
 117                  * (the absence of MS_ASYNC) to mean the same thing.
 118                  */
 119                 iarg = (uintptr_t)arg;
 120                 if ((iarg & ~MS_INVALIDATE) == 0)
 121                         iarg |= MS_SYNC;
 122 
 123                 if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
 124                     ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
 125                         error = set_errno(EINVAL);
 126                 } else {
 127                         error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
 128                         if (error) {
 129                                 (void) set_errno(error);
 130                         }
 131                 }
 132                 return (error);
 133         case MC_LOCKAS:
 134                 if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) ||
 135                     (uintptr_t)arg == 0) {
 136                         return (set_errno(EINVAL));
 137                 }
 138                 break;
 139         case MC_LOCK:
 140         case MC_UNLOCKAS:
 141         case MC_UNLOCK:
 142                 break;
 143         case MC_HAT_ADVISE:
 144                 /*
 145                  * Set prefered page size.
 146                  */
 147                 STRUCT_INIT(mha, get_udatamodel());
 148                 if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) {
 149                         return (set_errno(EFAULT));
 150                 }
 151 
 152                 pgcmd = STRUCT_FGET(mha, mha_cmd);
 153 
 154                 /*
 155                  * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK
 156                  * and MHA_MAPSIZE_BSSBRK are supported. Only one
 157                  * command may be specified at a time.
 158                  */
 159                 if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) &
 160                     pgcmd) || pgcmd == 0 || !ISP2(pgcmd) ||
 161                     STRUCT_FGET(mha, mha_flags))
 162                         return (set_errno(EINVAL));
 163 
 164                 pgsz = STRUCT_FGET(mha, mha_pagesize);
 165 
 166                 /*
 167                  * call platform specific map_pgsz() routine to get the
 168                  * optimal pgsz if pgsz is 0.
 169                  *
 170                  * For stack and heap operations addr and len must be zero.
 171                  */
 172                 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
 173                         if (addr != NULL || len != 0) {
 174                                 return (set_errno(EINVAL));
 175                         }
 176 
 177                         /*
 178                          * Disable autompss for this process unless pgsz == 0,
 179                          * which means the system should pick.  In the
 180                          * pgsz == 0 case, leave the SAUTOLPG setting alone, as
 181                          * we don't want to enable it when someone has
 182                          * disabled automatic large page selection for the
 183                          * whole system.
 184                          */
 185                         mutex_enter(&p->p_lock);
 186                         if (pgsz != 0) {
 187                                 p->p_flag &= ~SAUTOLPG;
 188                         }
 189                         mutex_exit(&p->p_lock);
 190 
 191                         as_rangelock(as);
 192 
 193                         if (pgsz == 0) {
 194                                 int     type;
 195 
 196                                 if (pgcmd == MHA_MAPSIZE_BSSBRK)
 197                                         type = MAPPGSZ_HEAP;
 198                                 else
 199                                         type = MAPPGSZ_STK;
 200 
 201                                 pgsz = map_pgsz(type, p, 0, 0, 1);
 202                         }
 203                 } else {
 204                         /*
 205                          * addr and len must be valid for range specified.
 206                          */
 207                         if (valid_usr_range(addr, len, 0, as,
 208                             as->a_userlimit) != RANGE_OKAY) {
 209                                 return (set_errno(ENOMEM));
 210                         }
 211                         /*
 212                          * Note that we don't disable automatic large page
 213                          * selection for anon segments based on use of
 214                          * memcntl().
 215                          */
 216                         if (pgsz == 0) {
 217                                 error = as_set_default_lpsize(as, addr, len);
 218                                 if (error) {
 219                                         (void) set_errno(error);
 220                                 }
 221                                 return (error);
 222                         }
 223 
 224                         /*
 225                          * addr and len must be prefered page size aligned
 226                          */
 227                         if (!IS_P2ALIGNED(addr, pgsz) ||
 228                             !IS_P2ALIGNED(len, pgsz)) {
 229                                 return (set_errno(EINVAL));
 230                         }
 231                 }
 232 
 233                 szc = mem_getpgszc(pgsz);
 234                 if (szc == (uint_t)-1) {
 235                         if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK))
 236                             != 0) {
 237                                 as_rangeunlock(as);
 238                         }
 239                         return (set_errno(EINVAL));
 240                 }
 241 
 242                 /*
 243                  * For stack and heap operations we first need to pad
 244                  * out existing range (create new mappings) to the new
 245                  * prefered page size boundary. Also the start of the
 246                  * .bss for the heap or user's stack base may not be on
 247                  * the new prefered page size boundary. For these cases
 248                  * we align the base of the request on the new prefered
 249                  * page size.
 250                  */
 251                 if (pgcmd & MHA_MAPSIZE_BSSBRK) {
 252                         if (szc == p->p_brkpageszc) {
 253                                 as_rangeunlock(as);
 254                                 return (0);
 255                         }
 256                         if (szc > p->p_brkpageszc) {
 257                                 error = brk_internal(p->p_brkbase
 258                                     + p->p_brksize, szc);
 259                                 if (error) {
 260                                         as_rangeunlock(as);
 261                                         return (set_errno(error));
 262                                 }
 263                         }
 264                         /*
 265                          * It is possible for brk_internal to silently fail to
 266                          * promote the heap size, so don't panic or ASSERT.
 267                          */
 268                         if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
 269                                 as_rangeunlock(as);
 270                                 return (set_errno(ENOMEM));
 271                         }
 272                         oszc = p->p_brkpageszc;
 273                         p->p_brkpageszc = szc;
 274 
 275                         addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
 276                             pgsz);
 277                         len = (p->p_brkbase + p->p_brksize) - addr;
 278                         ASSERT(IS_P2ALIGNED(len, pgsz));
 279                         /*
 280                          * Perhaps no existing pages to promote.
 281                          */
 282                         if (len == 0) {
 283                                 as_rangeunlock(as);
 284                                 return (0);
 285                         }
 286                 }
 287                 /*
 288                  * The code below, as does grow.c, assumes stacks always grow
 289                  * downward.
 290                  */
 291                 if (pgcmd & MHA_MAPSIZE_STACK) {
 292                         if (szc == p->p_stkpageszc) {
 293                                 as_rangeunlock(as);
 294                                 return (0);
 295                         }
 296 
 297                         if (szc > p->p_stkpageszc) {
 298                                 error = grow_internal(p->p_usrstack -
 299                                     p->p_stksize, szc);
 300                                 if (error) {
 301                                         as_rangeunlock(as);
 302                                         return (set_errno(error));
 303                                 }
 304                         }
 305                         /*
 306                          * It is possible for grow_internal to silently fail to
 307                          * promote the stack size, so don't panic or ASSERT.
 308                          */
 309                         if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
 310                                 as_rangeunlock(as);
 311                                 return (set_errno(ENOMEM));
 312                         }
 313                         oszc = p->p_stkpageszc;
 314                         p->p_stkpageszc = szc;
 315 
 316                         addr = p->p_usrstack - p->p_stksize;
 317                         len = P2ALIGN(p->p_stksize, pgsz);
 318 
 319                         /*
 320                          * Perhaps nothing to promote.
 321                          */
 322                         if (len == 0 || addr >= p->p_usrstack ||
 323                             (addr + len) < addr) {
 324                                 as_rangeunlock(as);
 325                                 return (0);
 326                         }
 327                 }
 328                 ASSERT(IS_P2ALIGNED(addr, pgsz));
 329                 ASSERT(IS_P2ALIGNED(len, pgsz));
 330                 error = as_setpagesize(as, addr, len, szc, B_TRUE);
 331 
 332                 /*
 333                  * On stack or heap failures restore original
 334                  * pg size code.
 335                  */
 336                 if (error) {
 337                         if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) {
 338                                 p->p_brkpageszc = oszc;
 339                         }
 340                         if ((pgcmd & MHA_MAPSIZE_STACK) != 0) {
 341                                 p->p_stkpageszc = oszc;
 342                         }
 343                         (void) set_errno(error);
 344                 }
 345                 if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
 346                         as_rangeunlock(as);
 347                 }
 348                 return (error);
 349         case MC_ADVISE:
 350                 if ((uintptr_t)arg == MADV_FREE ||
 351                     (uintptr_t)arg == MADV_PURGE) {
 352                         len &= PAGEMASK;
 353                 }
 354                 switch ((uintptr_t)arg) {
 355                 case MADV_WILLNEED:
 356                         fc = as_faulta(as, addr, len);
 357                         if (fc) {
 358                                 if (FC_CODE(fc) == FC_OBJERR)
 359                                         error = set_errno(FC_ERRNO(fc));
 360                                 else if (FC_CODE(fc) == FC_NOMAP)
 361                                         error = set_errno(ENOMEM);
 362                                 else
 363                                         error = set_errno(EINVAL);
 364                                 return (error);
 365                         }
 366                         break;
 367 
 368                 case MADV_DONTNEED:
 369                         /*
 370                          * For now, don't need is turned into an as_ctl(MC_SYNC)
 371                          * operation flagged for async invalidate.
 372                          */
 373                         error = as_ctl(as, addr, len, MC_SYNC, attr,
 374                             MS_ASYNC | MS_INVALIDATE, NULL, 0);
 375                         if (error)
 376                                 (void) set_errno(error);
 377                         return (error);
 378 
 379                 default:
 380                         error = as_ctl(as, addr, len, cmd, attr,
 381                             (uintptr_t)arg, NULL, 0);
 382                         if (error)
 383                                 (void) set_errno(error);
 384                         return (error);
 385                 }
 386                 break;
 387         case MC_INHERIT_ZERO:
 388                 if (arg != 0 || attr != 0 || mask != 0)
 389                         return (set_errno(EINVAL));
 390                 break;
 391         default:
 392                 return (set_errno(EINVAL));
 393         }
 394 
 395         error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0);
 396 
 397         if (error)
 398                 (void) set_errno(error);
 399         return (error);
 400 }
 401 
 402 /*
 403  * Return page size code for page size passed in. If
 404  * matching page size not found or supported, return -1.
 405  */
 406 static uint_t
 407 mem_getpgszc(size_t pgsz) {
 408         return ((uint_t)page_szc_user_filtered(pgsz));
 409 }