Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/syscall/memcntl.c
          +++ new/usr/src/uts/common/syscall/memcntl.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright (c) 2015 Joyent, Inc.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  
  31   31  #include <sys/types.h>
  32   32  #include <sys/bitmap.h>
  33   33  #include <sys/sysmacros.h>
  34   34  #include <sys/kmem.h>
  35   35  #include <sys/param.h>
  36   36  #include <sys/systm.h>
  37   37  #include <sys/user.h>
  38   38  #include <sys/unistd.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/proc.h>
  41   41  #include <sys/mman.h>
  42   42  #include <sys/tuneable.h>
  43   43  #include <sys/cmn_err.h>
  44   44  #include <sys/cred.h>
  45   45  #include <sys/vmsystm.h>
  46   46  #include <sys/debug.h>
  47   47  #include <sys/policy.h>
  48   48  
  49   49  #include <vm/as.h>
  50   50  #include <vm/seg.h>
  51   51  
  52   52  static uint_t mem_getpgszc(size_t);
  53   53  
  54   54  /*
  55   55   * Memory control operations
  56   56   */
  57   57  int
  58   58  memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
  59   59  {
  60   60          struct as *as = ttoproc(curthread)->p_as;
  61   61          struct proc *p = ttoproc(curthread);
  62   62          size_t pgsz;
  63   63          uint_t szc, oszc, pgcmd;
  64   64          int error = 0;
  65   65          faultcode_t fc;
  66   66          uintptr_t iarg;
  67   67          STRUCT_DECL(memcntl_mha, mha);
  68   68  
  69   69          if (mask)
  70   70                  return (set_errno(EINVAL));
  71   71          if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) {
  72   72                  if ((addr != 0) || (len != 0)) {
  73   73                          return (set_errno(EINVAL));
  74   74                  }
  75   75          } else if (cmd != MC_HAT_ADVISE) {
  76   76                  if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) {
  77   77                          return (set_errno(EINVAL));
  78   78                  }
  79   79                  /*
  80   80                   * We're only concerned with the address range
  81   81                   * here, not the protections.  The protections
  82   82                   * are only used as a "filter" in this code,
  83   83                   * they aren't set or modified here.
  84   84                   */
  85   85                  if (valid_usr_range(addr, len, 0, as,
  86   86                      as->a_userlimit) != RANGE_OKAY) {
  87   87                          return (set_errno(ENOMEM));
  88   88                  }
  89   89          }
  90   90  
  91   91          if (cmd == MC_HAT_ADVISE) {
  92   92                  if (attr != 0 || mask != 0) {
  93   93                          return (set_errno(EINVAL));
  94   94                  }
  95   95  
  96   96          } else {
  97   97                  if ((VALID_ATTR & attr) != attr) {
  98   98                          return (set_errno(EINVAL));
  99   99                  }
 100  100                  if ((attr & SHARED) && (attr & PRIVATE)) {
 101  101                          return (set_errno(EINVAL));
 102  102                  }
 103  103                  if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) ||
 104  104                      (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) &&
 105  105                      (error = secpolicy_lock_memory(CRED())) != 0)
 106  106                          return (set_errno(error));
 107  107          }
 108  108          if (attr) {
 109  109                  attr |= PROT_USER;
 110  110          }
 111  111  
 112  112          switch (cmd) {
 113  113          case MC_SYNC:
 114  114                  /*
 115  115                   * MS_SYNC used to be defined to be zero but is now non-zero.
 116  116                   * For binary compatibility we still accept zero
 117  117                   * (the absence of MS_ASYNC) to mean the same thing.
 118  118                   * Binary compatibility is not an issue for MS_INVALCURPROC.
 119  119                   */
 120  120                  iarg = (uintptr_t)arg;
 121  121                  if ((iarg & ~MS_INVALIDATE) == 0)
 122  122                          iarg |= MS_SYNC;
 123  123  
 124  124                  if (((iarg &
 125  125                      ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) ||
 126  126                      ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) ||
 127  127                      ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) ==
 128  128                      (MS_INVALIDATE|MS_INVALCURPROC))) {
 129  129                          error = set_errno(EINVAL);
 130  130                  } else {
 131  131                          error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
 132  132                          if (error) {
 133  133                                  (void) set_errno(error);
 134  134                          }
 135  135                  }
 136  136                  return (error);
 137  137          case MC_LOCKAS:
 138  138                  if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) ||
 139  139                      (uintptr_t)arg == 0) {
 140  140                          return (set_errno(EINVAL));
 141  141                  }
 142  142                  break;
 143  143          case MC_LOCK:
 144  144          case MC_UNLOCKAS:
 145  145          case MC_UNLOCK:
 146  146                  break;
 147  147          case MC_HAT_ADVISE:
 148  148                  /*
 149  149                   * Set prefered page size.
 150  150                   */
 151  151                  STRUCT_INIT(mha, get_udatamodel());
 152  152                  if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) {
 153  153                          return (set_errno(EFAULT));
 154  154                  }
 155  155  
 156  156                  pgcmd = STRUCT_FGET(mha, mha_cmd);
 157  157  
 158  158                  /*
 159  159                   * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK
 160  160                   * and MHA_MAPSIZE_BSSBRK are supported. Only one
 161  161                   * command may be specified at a time.
 162  162                   */
 163  163                  if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) &
 164  164                      pgcmd) || pgcmd == 0 || !ISP2(pgcmd) ||
 165  165                      STRUCT_FGET(mha, mha_flags))
 166  166                          return (set_errno(EINVAL));
 167  167  
 168  168                  pgsz = STRUCT_FGET(mha, mha_pagesize);
 169  169  
 170  170                  /*
 171  171                   * call platform specific map_pgsz() routine to get the
 172  172                   * optimal pgsz if pgsz is 0.
 173  173                   *
 174  174                   * For stack and heap operations addr and len must be zero.
 175  175                   */
 176  176                  if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
 177  177                          if (addr != NULL || len != 0) {
 178  178                                  return (set_errno(EINVAL));
 179  179                          }
 180  180  
 181  181                          /*
 182  182                           * Disable autompss for this process unless pgsz == 0,
 183  183                           * which means the system should pick.  In the
 184  184                           * pgsz == 0 case, leave the SAUTOLPG setting alone, as
 185  185                           * we don't want to enable it when someone has
 186  186                           * disabled automatic large page selection for the
 187  187                           * whole system.
 188  188                           */
 189  189                          mutex_enter(&p->p_lock);
 190  190                          if (pgsz != 0) {
 191  191                                  p->p_flag &= ~SAUTOLPG;
 192  192                          }
 193  193                          mutex_exit(&p->p_lock);
 194  194  
 195  195                          as_rangelock(as);
 196  196  
 197  197                          if (pgsz == 0) {
 198  198                                  int     type;
 199  199  
 200  200                                  if (pgcmd == MHA_MAPSIZE_BSSBRK)
 201  201                                          type = MAPPGSZ_HEAP;
 202  202                                  else
 203  203                                          type = MAPPGSZ_STK;
 204  204  
 205  205                                  pgsz = map_pgsz(type, p, 0, 0, 1);
 206  206                          }
 207  207                  } else {
 208  208                          /*
 209  209                           * addr and len must be valid for range specified.
 210  210                           */
 211  211                          if (valid_usr_range(addr, len, 0, as,
 212  212                              as->a_userlimit) != RANGE_OKAY) {
 213  213                                  return (set_errno(ENOMEM));
 214  214                          }
 215  215                          /*
 216  216                           * Note that we don't disable automatic large page
 217  217                           * selection for anon segments based on use of
 218  218                           * memcntl().
 219  219                           */
 220  220                          if (pgsz == 0) {
 221  221                                  error = as_set_default_lpsize(as, addr, len);
 222  222                                  if (error) {
 223  223                                          (void) set_errno(error);
 224  224                                  }
 225  225                                  return (error);
 226  226                          }
 227  227  
 228  228                          /*
 229  229                           * addr and len must be prefered page size aligned
 230  230                           */
 231  231                          if (!IS_P2ALIGNED(addr, pgsz) ||
 232  232                              !IS_P2ALIGNED(len, pgsz)) {
 233  233                                  return (set_errno(EINVAL));
 234  234                          }
 235  235                  }
 236  236  
 237  237                  szc = mem_getpgszc(pgsz);
 238  238                  if (szc == (uint_t)-1) {
 239  239                          if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK))
 240  240                              != 0) {
 241  241                                  as_rangeunlock(as);
 242  242                          }
 243  243                          return (set_errno(EINVAL));
 244  244                  }
 245  245  
 246  246                  /*
 247  247                   * For stack and heap operations we first need to pad
 248  248                   * out existing range (create new mappings) to the new
 249  249                   * prefered page size boundary. Also the start of the
 250  250                   * .bss for the heap or user's stack base may not be on
 251  251                   * the new prefered page size boundary. For these cases
 252  252                   * we align the base of the request on the new prefered
 253  253                   * page size.
 254  254                   */
 255  255                  if (pgcmd & MHA_MAPSIZE_BSSBRK) {
 256  256                          if (szc == p->p_brkpageszc) {
 257  257                                  as_rangeunlock(as);
 258  258                                  return (0);
 259  259                          }
 260  260                          if (szc > p->p_brkpageszc) {
 261  261                                  error = brk_internal(p->p_brkbase
 262  262                                      + p->p_brksize, szc);
 263  263                                  if (error) {
 264  264                                          as_rangeunlock(as);
 265  265                                          return (set_errno(error));
 266  266                                  }
 267  267                          }
 268  268                          /*
 269  269                           * It is possible for brk_internal to silently fail to
 270  270                           * promote the heap size, so don't panic or ASSERT.
 271  271                           */
 272  272                          if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
 273  273                                  as_rangeunlock(as);
 274  274                                  return (set_errno(ENOMEM));
 275  275                          }
 276  276                          oszc = p->p_brkpageszc;
 277  277                          p->p_brkpageszc = szc;
 278  278  
 279  279                          addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
 280  280                              pgsz);
 281  281                          len = (p->p_brkbase + p->p_brksize) - addr;
 282  282                          ASSERT(IS_P2ALIGNED(len, pgsz));
 283  283                          /*
 284  284                           * Perhaps no existing pages to promote.
 285  285                           */
 286  286                          if (len == 0) {
 287  287                                  as_rangeunlock(as);
 288  288                                  return (0);
 289  289                          }
 290  290                  }
 291  291                  /*
 292  292                   * The code below, as does grow.c, assumes stacks always grow
 293  293                   * downward.
 294  294                   */
 295  295                  if (pgcmd & MHA_MAPSIZE_STACK) {
 296  296                          if (szc == p->p_stkpageszc) {
 297  297                                  as_rangeunlock(as);
 298  298                                  return (0);
 299  299                          }
 300  300  
 301  301                          if (szc > p->p_stkpageszc) {
 302  302                                  error = grow_internal(p->p_usrstack -
 303  303                                      p->p_stksize, szc);
 304  304                                  if (error) {
 305  305                                          as_rangeunlock(as);
 306  306                                          return (set_errno(error));
 307  307                                  }
 308  308                          }
 309  309                          /*
 310  310                           * It is possible for grow_internal to silently fail to
 311  311                           * promote the stack size, so don't panic or ASSERT.
 312  312                           */
 313  313                          if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
 314  314                                  as_rangeunlock(as);
 315  315                                  return (set_errno(ENOMEM));
 316  316                          }
 317  317                          oszc = p->p_stkpageszc;
 318  318                          p->p_stkpageszc = szc;
 319  319  
 320  320                          addr = p->p_usrstack - p->p_stksize;
 321  321                          len = P2ALIGN(p->p_stksize, pgsz);
 322  322  
 323  323                          /*
 324  324                           * Perhaps nothing to promote.
 325  325                           */
 326  326                          if (len == 0 || addr >= p->p_usrstack ||
 327  327                              (addr + len) < addr) {
 328  328                                  as_rangeunlock(as);
 329  329                                  return (0);
 330  330                          }
 331  331                  }
 332  332                  ASSERT(IS_P2ALIGNED(addr, pgsz));
 333  333                  ASSERT(IS_P2ALIGNED(len, pgsz));
 334  334                  error = as_setpagesize(as, addr, len, szc, B_TRUE);
 335  335  
 336  336                  /*
 337  337                   * On stack or heap failures restore original
 338  338                   * pg size code.
 339  339                   */
 340  340                  if (error) {
 341  341                          if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) {
 342  342                                  p->p_brkpageszc = oszc;
 343  343                          }
 344  344                          if ((pgcmd & MHA_MAPSIZE_STACK) != 0) {
 345  345                                  p->p_stkpageszc = oszc;
 346  346                          }
 347  347                          (void) set_errno(error);
 348  348                  }
 349  349                  if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
 350  350                          as_rangeunlock(as);
 351  351                  }
 352  352                  return (error);
 353  353          case MC_ADVISE:
 354  354                  if ((uintptr_t)arg == MADV_FREE ||
 355  355                      (uintptr_t)arg == MADV_PURGE) {
 356  356                          len &= PAGEMASK;
 357  357                  }
 358  358                  switch ((uintptr_t)arg) {
 359  359                  case MADV_WILLNEED:
 360  360                          fc = as_faulta(as, addr, len);
 361  361                          if (fc) {
 362  362                                  if (FC_CODE(fc) == FC_OBJERR)
 363  363                                          error = set_errno(FC_ERRNO(fc));
 364  364                                  else if (FC_CODE(fc) == FC_NOMAP)
 365  365                                          error = set_errno(ENOMEM);
 366  366                                  else
 367  367                                          error = set_errno(EINVAL);
 368  368                                  return (error);
 369  369                          }
 370  370                          break;
 371  371  
 372  372                  case MADV_DONTNEED:
 373  373                          /*
 374  374                           * For now, don't need is turned into an as_ctl(MC_SYNC)
 375  375                           * operation flagged for async invalidate.
 376  376                           */
 377  377                          error = as_ctl(as, addr, len, MC_SYNC, attr,
 378  378                              MS_ASYNC | MS_INVALIDATE, NULL, 0);
 379  379                          if (error)
 380  380                                  (void) set_errno(error);
 381  381                          return (error);
 382  382  
 383  383                  default:
 384  384                          error = as_ctl(as, addr, len, cmd, attr,
 385  385                              (uintptr_t)arg, NULL, 0);
 386  386                          if (error)
 387  387                                  (void) set_errno(error);
 388  388                          return (error);
 389  389                  }
 390  390                  break;
 391  391          case MC_INHERIT_ZERO:
 392  392                  if (arg != 0 || attr != 0 || mask != 0)
 393  393                          return (set_errno(EINVAL));
 394  394                  break;
 395  395          default:
 396  396                  return (set_errno(EINVAL));
 397  397          }
 398  398  
 399  399          error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0);
 400  400  
 401  401          if (error)
 402  402                  (void) set_errno(error);
 403  403          return (error);
 404  404  }
 405  405  
 406  406  /*
 407  407   * Return page size code for page size passed in. If
 408  408   * matching page size not found or supported, return -1.
 409  409   */
 410  410  static uint_t
 411  411  mem_getpgszc(size_t pgsz) {
 412  412          return ((uint_t)page_szc_user_filtered(pgsz));
 413  413  }
  
    | 
      ↓ open down ↓ | 
    413 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX