Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/shm.c
          +++ new/usr/src/uts/common/os/shm.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2016 Joyent, Inc.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  /*
  31   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   32   * The Regents of the University of California
  33   33   * All Rights Reserved
  34   34   *
  35   35   * University Acknowledgment- Portions of this document are derived from
  36   36   * software developed by the University of California, Berkeley, and its
  37   37   * contributors.
  38   38   */
  39   39  
  40   40  /*
  41   41   * Inter-Process Communication Shared Memory Facility.
  42   42   *
  43   43   * See os/ipc.c for a description of common IPC functionality.
  44   44   *
  45   45   * Resource controls
  46   46   * -----------------
  47   47   *
  48   48   * Control:      zone.max-shm-ids (rc_zone_shmmni)
  49   49   * Description:  Maximum number of shared memory ids allowed a zone.
  50   50   *
  51   51   *   When shmget() is used to allocate a shared memory segment, one id
  52   52   *   is allocated.  If the id allocation doesn't succeed, shmget()
  53   53   *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
  54   54   *   IPC_RMID) the id is deallocated.
  55   55   *
  56   56   * Control:      project.max-shm-ids (rc_project_shmmni)
  57   57   * Description:  Maximum number of shared memory ids allowed a project.
  58   58   *
  59   59   *   When shmget() is used to allocate a shared memory segment, one id
  60   60   *   is allocated.  If the id allocation doesn't succeed, shmget()
  61   61   *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
  62   62   *   IPC_RMID) the id is deallocated.
  63   63   *
  64   64   * Control:      zone.max-shm-memory (rc_zone_shmmax)
  65   65   * Description:  Total amount of shared memory allowed a zone.
  66   66   *
  67   67   *   When shmget() is used to allocate a shared memory segment, the
  68   68   *   segment's size is allocated against this limit.  If the space
  69   69   *   allocation doesn't succeed, shmget() fails and errno is set to
  70   70   *   EINVAL.  The size will be deallocated once the last process has
  71   71   *   detached the segment and the segment has been successfully
  72   72   *   shmctl(, IPC_RMID)ed.
  73   73   *
  74   74   * Control:      project.max-shm-memory (rc_project_shmmax)
  75   75   * Description:  Total amount of shared memory allowed a project.
  76   76   *
  77   77   *   When shmget() is used to allocate a shared memory segment, the
  78   78   *   segment's size is allocated against this limit.  If the space
  79   79   *   allocation doesn't succeed, shmget() fails and errno is set to
  80   80   *   EINVAL.  The size will be deallocated once the last process has
  81   81   *   detached the segment and the segment has been successfully
  82   82   *   shmctl(, IPC_RMID)ed.
  83   83   */
  84   84  
  85   85  #include <sys/types.h>
  86   86  #include <sys/param.h>
  87   87  #include <sys/cred.h>
  88   88  #include <sys/errno.h>
  89   89  #include <sys/time.h>
  90   90  #include <sys/kmem.h>
  91   91  #include <sys/user.h>
  92   92  #include <sys/proc.h>
  93   93  #include <sys/systm.h>
  94   94  #include <sys/prsystm.h>
  95   95  #include <sys/sysmacros.h>
  96   96  #include <sys/tuneable.h>
  97   97  #include <sys/vm.h>
  98   98  #include <sys/mman.h>
  99   99  #include <sys/swap.h>
 100  100  #include <sys/cmn_err.h>
 101  101  #include <sys/debug.h>
 102  102  #include <sys/lwpchan_impl.h>
 103  103  #include <sys/avl.h>
 104  104  #include <sys/modctl.h>
 105  105  #include <sys/syscall.h>
 106  106  #include <sys/task.h>
 107  107  #include <sys/project.h>
 108  108  #include <sys/policy.h>
 109  109  #include <sys/zone.h>
 110  110  #include <sys/rctl.h>
 111  111  
 112  112  #include <sys/ipc.h>
 113  113  #include <sys/ipc_impl.h>
 114  114  #include <sys/shm.h>
 115  115  #include <sys/shm_impl.h>
 116  116  
 117  117  #include <vm/hat.h>
 118  118  #include <vm/seg.h>
 119  119  #include <vm/as.h>
 120  120  #include <vm/seg_vn.h>
 121  121  #include <vm/anon.h>
 122  122  #include <vm/page.h>
 123  123  #include <vm/vpage.h>
 124  124  #include <vm/seg_spt.h>
 125  125  
 126  126  #include <c2/audit.h>
 127  127  
 128  128  static int shmem_lock(kshmid_t *sp, struct anon_map *amp);
 129  129  static void shmem_unlock(kshmid_t *sp, struct anon_map *amp);
 130  130  static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags,
 131  131          kshmid_t *id);
 132  132  static void shm_rm_amp(kshmid_t *sp);
 133  133  static void shm_dtor(kipc_perm_t *);
 134  134  static void shm_rmid(kipc_perm_t *);
 135  135  static void shm_remove_zone(zoneid_t, void *);
 136  136  
 137  137  /*
 138  138   * Semantics for share_page_table and ism_off:
 139  139   *
 140  140   * These are hooks in /etc/system - only for internal testing purpose.
 141  141   *
 142  142   * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
 143  143   * in a call to shmat(2). In other words, with share_page_table set, you always
 144  144   * get ISM, even if say, DISM is specified. It should really be called "ism_on".
 145  145   *
 146  146   * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
 147  147   * shmat(2).
 148  148   *
 149  149   * If both share_page_table and ism_off are set, share_page_table prevails.
 150  150   *
 151  151   * Although these tunables should probably be removed, they do have some
 152  152   * external exposure; as long as they exist, they should at least work sensibly.
 153  153   */
 154  154  
 155  155  int share_page_table;
 156  156  int ism_off;
 157  157  
 158  158  /*
 159  159   * The following tunables are obsolete.  Though for compatibility we
 160  160   * still read and interpret shminfo_shmmax and shminfo_shmmni (see
 161  161   * os/project.c), the preferred mechanism for administrating the IPC
 162  162   * Shared Memory facility is through the resource controls described at
 163  163   * the top of this file.
 164  164   */
 165  165  size_t  shminfo_shmmax = 0x800000;      /* (obsolete) */
 166  166  int     shminfo_shmmni = 100;           /* (obsolete) */
 167  167  size_t  shminfo_shmmin = 1;             /* (obsolete) */
 168  168  int     shminfo_shmseg = 6;             /* (obsolete) */
 169  169  
 170  170  extern rctl_hndl_t rc_zone_shmmax;
 171  171  extern rctl_hndl_t rc_zone_shmmni;
 172  172  extern rctl_hndl_t rc_project_shmmax;
 173  173  extern rctl_hndl_t rc_project_shmmni;
 174  174  static ipc_service_t *shm_svc;
 175  175  static zone_key_t shm_zone_key;
 176  176  
 177  177  /*
 178  178   * Module linkage information for the kernel.
 179  179   */
 180  180  static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
 181  181  
 182  182  static struct sysent ipcshm_sysent = {
 183  183          4,
 184  184  #ifdef  _SYSCALL32_IMPL
 185  185          SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
 186  186  #else   /* _SYSCALL32_IMPL */
 187  187          SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
 188  188  #endif  /* _SYSCALL32_IMPL */
 189  189          (int (*)())shmsys
 190  190  };
 191  191  
 192  192  #ifdef  _SYSCALL32_IMPL
 193  193  static struct sysent ipcshm_sysent32 = {
 194  194          4,
 195  195          SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
 196  196          (int (*)())shmsys
 197  197  };
 198  198  #endif  /* _SYSCALL32_IMPL */
 199  199  
 200  200  static struct modlsys modlsys = {
 201  201          &mod_syscallops, "System V shared memory", &ipcshm_sysent
 202  202  };
 203  203  
 204  204  #ifdef  _SYSCALL32_IMPL
 205  205  static struct modlsys modlsys32 = {
 206  206          &mod_syscallops32, "32-bit System V shared memory", &ipcshm_sysent32
 207  207  };
 208  208  #endif  /* _SYSCALL32_IMPL */
 209  209  
 210  210  static struct modlinkage modlinkage = {
 211  211          MODREV_1,
 212  212          &modlsys,
 213  213  #ifdef  _SYSCALL32_IMPL
 214  214          &modlsys32,
 215  215  #endif
 216  216          NULL
 217  217  };
 218  218  
 219  219  
 220  220  int
 221  221  _init(void)
 222  222  {
 223  223          int result;
 224  224  
 225  225          shm_svc = ipcs_create("shmids", rc_project_shmmni, rc_zone_shmmni,
 226  226              sizeof (kshmid_t), shm_dtor, shm_rmid, AT_IPC_SHM,
 227  227              offsetof(ipc_rqty_t, ipcq_shmmni));
 228  228          zone_key_create(&shm_zone_key, NULL, shm_remove_zone, NULL);
 229  229  
 230  230          if ((result = mod_install(&modlinkage)) == 0)
 231  231                  return (0);
 232  232  
 233  233          (void) zone_key_delete(shm_zone_key);
 234  234          ipcs_destroy(shm_svc);
 235  235  
 236  236          return (result);
 237  237  }
 238  238  
 239  239  int
 240  240  _fini(void)
 241  241  {
 242  242          return (EBUSY);
 243  243  }
 244  244  
 245  245  int
 246  246  _info(struct modinfo *modinfop)
 247  247  {
 248  248          return (mod_info(&modlinkage, modinfop));
 249  249  }
 250  250  
 251  251  /*
 252  252   * Shmat (attach shared segment) system call.
 253  253   */
 254  254  static int
 255  255  shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
 256  256  {
 257  257          kshmid_t *sp;   /* shared memory header ptr */
 258  258          size_t  size;
 259  259          int     error = 0;
 260  260          proc_t *pp = curproc;
 261  261          struct as *as = pp->p_as;
 262  262          struct segvn_crargs     crargs; /* segvn create arguments */
 263  263          kmutex_t        *lock;
 264  264          struct seg      *segspt = NULL;
 265  265          caddr_t         addr = uaddr;
 266  266          int             flags = (uflags & SHMAT_VALID_FLAGS_MASK);
 267  267          int             useISM;
 268  268          uchar_t         prot = PROT_ALL;
 269  269          int result;
 270  270  
 271  271          if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
 272  272                  return (EINVAL);
 273  273          if (error = ipcperm_access(&sp->shm_perm, SHM_R, CRED()))
 274  274                  goto errret;
 275  275          if ((flags & SHM_RDONLY) == 0 &&
 276  276              (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
 277  277                  goto errret;
 278  278          if (spt_invalid(flags)) {
 279  279                  error = EINVAL;
 280  280                  goto errret;
 281  281          }
 282  282          if (ism_off)
 283  283                  flags = flags & ~SHM_SHARE_MMU;
 284  284          if (share_page_table) {
 285  285                  flags = flags & ~SHM_PAGEABLE;
 286  286                  flags = flags | SHM_SHARE_MMU;
 287  287          }
 288  288          useISM = (spt_locked(flags) || spt_pageable(flags));
 289  289          if (useISM && (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
 290  290                  goto errret;
 291  291          if (useISM && isspt(sp)) {
 292  292                  uint_t newsptflags = flags | spt_flags(sp->shm_sptseg);
 293  293                  /*
 294  294                   * If trying to change an existing {D}ISM segment from ISM
 295  295                   * to DISM or vice versa, return error. Note that this
 296  296                   * validation of flags needs to be done after the effect of
 297  297                   * tunables such as ism_off and share_page_table, for
 298  298                   * semantics that are consistent with the tunables' settings.
 299  299                   */
 300  300                  if (spt_invalid(newsptflags)) {
 301  301                          error = EINVAL;
 302  302                          goto errret;
 303  303                  }
 304  304          }
 305  305          ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
 306  306          size = sp->shm_amp->size;
 307  307          ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 308  308  
 309  309          /* somewhere to record spt info for final detach */
 310  310          if (sp->shm_sptinfo == NULL)
 311  311                  sp->shm_sptinfo = kmem_zalloc(sizeof (sptinfo_t), KM_SLEEP);
 312  312  
 313  313          as_rangelock(as);
 314  314  
 315  315          if (useISM) {
 316  316                  /*
 317  317                   * Handle ISM
 318  318                   */
 319  319                  uint_t  share_szc;
 320  320                  size_t  share_size;
 321  321                  struct  shm_data ssd;
 322  322                  uintptr_t align_hint;
 323  323                  long    curprot;
 324  324  
 325  325                  /*
 326  326                   * Pick a share pagesize to use, if (!isspt(sp)).
 327  327                   * Otherwise use the already chosen page size.
 328  328                   *
 329  329                   * For the initial shmat (!isspt(sp)), where sptcreate is
 330  330                   * called, map_pgsz is called to recommend a [D]ISM pagesize,
 331  331                   * important for systems which offer more than one potential
 332  332                   * [D]ISM pagesize.
 333  333                   * If the shmat is just to attach to an already created
 334  334                   * [D]ISM segment, then use the previously selected page size.
 335  335                   */
 336  336                  if (!isspt(sp)) {
 337  337                          share_size = map_pgsz(MAPPGSZ_ISM, pp, addr, size, 0);
 338  338                          if (share_size == 0) {
 339  339                                  as_rangeunlock(as);
 340  340                                  error = EINVAL;
 341  341                                  goto errret;
 342  342                          }
 343  343                          share_szc = page_szc(share_size);
 344  344                  } else {
 345  345                          share_szc = sp->shm_sptseg->s_szc;
 346  346                          share_size = page_get_pagesize(share_szc);
 347  347                  }
 348  348                  size = P2ROUNDUP(size, share_size);
 349  349  
 350  350                  align_hint = share_size;
 351  351  #if defined(__i386) || defined(__amd64)
 352  352                  /*
 353  353                   * For x86, we want to share as much of the page table tree
 354  354                   * as possible. We use a large align_hint at first, but
 355  355                   * if that fails, then the code below retries with align_hint
 356  356                   * set to share_size.
 357  357                   *
 358  358                   * The explicit extern here is due to the difficulties
 359  359                   * of getting to platform dependent includes. When/if the
 360  360                   * platform dependent bits of this function are cleaned up,
 361  361                   * another way of doing this should found.
 362  362                   */
 363  363                  {
 364  364                          extern uint_t ptes_per_table;
 365  365  
 366  366                          while (size >= ptes_per_table * (uint64_t)align_hint)
 367  367                                  align_hint *= ptes_per_table;
 368  368                  }
 369  369  #endif /* __i386 || __amd64 */
 370  370  
 371  371  #if defined(__sparcv9)
 372  372                  if (addr == 0 &&
 373  373                      pp->p_model == DATAMODEL_LP64 && AS_TYPE_64BIT(as)) {
 374  374                          /*
 375  375                           * If no address has been passed in, and this is a
 376  376                           * 64-bit process, we'll try to find an address
 377  377                           * in the predict-ISM zone.
 378  378                           */
 379  379                          caddr_t predbase = (caddr_t)PREDISM_1T_BASE;
 380  380                          size_t len = PREDISM_BOUND - PREDISM_1T_BASE;
 381  381  
 382  382                          as_purge(as);
 383  383                          if (as_gap(as, size + share_size, &predbase, &len,
 384  384                              AH_LO, (caddr_t)NULL) != -1) {
 385  385                                  /*
 386  386                                   * We found an address which looks like a
 387  387                                   * candidate.  We want to round it up, and
 388  388                                   * then check that it's a valid user range.
 389  389                                   * This assures that we won't fail below.
 390  390                                   */
 391  391                                  addr = (caddr_t)P2ROUNDUP((uintptr_t)predbase,
 392  392                                      share_size);
 393  393  
 394  394                                  if (valid_usr_range(addr, size, prot,
 395  395                                      as, as->a_userlimit) != RANGE_OKAY) {
 396  396                                          addr = 0;
 397  397                                  }
 398  398                          }
 399  399                  }
 400  400  #endif /* __sparcv9 */
 401  401  
 402  402                  if (addr == 0) {
 403  403                          for (;;) {
 404  404                                  addr = (caddr_t)align_hint;
 405  405                                  map_addr(&addr, size, 0ll, 1, MAP_ALIGN);
 406  406                                  if (addr != NULL || align_hint == share_size)
 407  407                                          break;
 408  408                                  align_hint = share_size;
 409  409                          }
 410  410                          if (addr == NULL) {
 411  411                                  as_rangeunlock(as);
 412  412                                  error = ENOMEM;
 413  413                                  goto errret;
 414  414                          }
 415  415                          ASSERT(((uintptr_t)addr & (align_hint - 1)) == 0);
 416  416                  } else {
 417  417                          /* Use the user-supplied attach address */
 418  418                          caddr_t base;
 419  419                          size_t len;
 420  420  
 421  421                          /*
 422  422                           * Check that the address range
 423  423                           *  1) is properly aligned
 424  424                           *  2) is correct in unix terms
 425  425                           *  3) is within an unmapped address segment
 426  426                           */
 427  427                          base = addr;
 428  428                          len = size;             /* use spt aligned size */
 429  429                          /* XXX - in SunOS, is sp->shm_segsz */
 430  430                          if ((uintptr_t)base & (share_size - 1)) {
 431  431                                  error = EINVAL;
 432  432                                  as_rangeunlock(as);
 433  433                                  goto errret;
 434  434                          }
 435  435                          result = valid_usr_range(base, len, prot, as,
 436  436                              as->a_userlimit);
 437  437                          if (result == RANGE_BADPROT) {
 438  438                                  /*
 439  439                                   * We try to accomodate processors which
 440  440                                   * may not support execute permissions on
 441  441                                   * all ISM segments by trying the check
 442  442                                   * again but without PROT_EXEC.
 443  443                                   */
 444  444                                  prot &= ~PROT_EXEC;
 445  445                                  result = valid_usr_range(base, len, prot, as,
 446  446                                      as->a_userlimit);
 447  447                          }
 448  448                          as_purge(as);
 449  449                          if (result != RANGE_OKAY ||
 450  450                              as_gap(as, len, &base, &len, AH_LO,
 451  451                              (caddr_t)NULL) != 0) {
 452  452                                  error = EINVAL;
 453  453                                  as_rangeunlock(as);
 454  454                                  goto errret;
 455  455                          }
 456  456                  }
 457  457  
 458  458                  curprot = sp->shm_opts & SHM_PROT_MASK;
 459  459                  if (!isspt(sp)) {
 460  460                          error = sptcreate(size, &segspt, sp->shm_amp, prot,
 461  461                              flags, share_szc);
 462  462                          if (error) {
 463  463                                  as_rangeunlock(as);
 464  464                                  goto errret;
 465  465                          }
 466  466                          sp->shm_sptinfo->sptas = segspt->s_as;
 467  467                          sp->shm_sptseg = segspt;
 468  468                          sp->shm_opts = (sp->shm_opts & ~SHM_PROT_MASK) | prot;
 469  469                  } else if ((prot & curprot) != curprot) {
 470  470                          /*
 471  471                           * Ensure we're attaching to an ISM segment with
 472  472                           * fewer or equal permissions than what we're
 473  473                           * allowed.  Fail if the segment has more
 474  474                           * permissions than what we're allowed.
 475  475                           */
 476  476                          error = EACCES;
 477  477                          as_rangeunlock(as);
 478  478                          goto errret;
 479  479                  }
 480  480  
 481  481                  ssd.shm_sptseg = sp->shm_sptseg;
 482  482                  ssd.shm_sptas = sp->shm_sptinfo->sptas;
 483  483                  ssd.shm_amp = sp->shm_amp;
 484  484                  error = as_map(as, addr, size, segspt_shmattach, &ssd);
 485  485                  if (error == 0)
 486  486                          sp->shm_ismattch++; /* keep count of ISM attaches */
 487  487          } else {
 488  488  
 489  489                  /*
 490  490                   * Normal case.
 491  491                   */
 492  492                  if (flags & SHM_RDONLY)
 493  493                          prot &= ~PROT_WRITE;
 494  494  
 495  495                  if (addr == 0) {
 496  496                          /* Let the system pick the attach address */
 497  497                          map_addr(&addr, size, 0ll, 1, 0);
 498  498                          if (addr == NULL) {
 499  499                                  as_rangeunlock(as);
 500  500                                  error = ENOMEM;
 501  501                                  goto errret;
 502  502                          }
 503  503                  } else {
 504  504                          /* Use the user-supplied attach address */
 505  505                          caddr_t base;
 506  506                          size_t len;
 507  507  
 508  508                          if (flags & SHM_RND)
 509  509                                  addr = (caddr_t)((uintptr_t)addr &
 510  510                                      ~(SHMLBA - 1));
 511  511                          /*
 512  512                           * Check that the address range
 513  513                           *  1) is properly aligned
 514  514                           *  2) is correct in unix terms
 515  515                           *  3) is within an unmapped address segment
 516  516                           */
 517  517                          base = addr;
 518  518                          len = size;             /* use aligned size */
 519  519                          /* XXX - in SunOS, is sp->shm_segsz */
 520  520                          if ((uintptr_t)base & PAGEOFFSET) {
 521  521                                  error = EINVAL;
 522  522                                  as_rangeunlock(as);
 523  523                                  goto errret;
 524  524                          }
 525  525                          result = valid_usr_range(base, len, prot, as,
 526  526                              as->a_userlimit);
 527  527                          if (result == RANGE_BADPROT) {
 528  528                                  prot &= ~PROT_EXEC;
 529  529                                  result = valid_usr_range(base, len, prot, as,
 530  530                                      as->a_userlimit);
 531  531                          }
 532  532                          as_purge(as);
 533  533                          if (result != RANGE_OKAY ||
 534  534                              as_gap(as, len, &base, &len,
 535  535                              AH_LO, (caddr_t)NULL) != 0) {
 536  536                                  error = EINVAL;
 537  537                                  as_rangeunlock(as);
 538  538                                  goto errret;
 539  539                          }
 540  540                  }
 541  541  
 542  542                  /* Initialize the create arguments and map the segment */
 543  543                  crargs = *(struct segvn_crargs *)zfod_argsp;
 544  544                  crargs.offset = 0;
 545  545                  crargs.type = MAP_SHARED;
 546  546                  crargs.amp = sp->shm_amp;
 547  547                  crargs.prot = prot;
 548  548                  crargs.maxprot = crargs.prot;
 549  549                  crargs.flags = 0;
 550  550  
 551  551                  error = as_map(as, addr, size, segvn_create, &crargs);
 552  552          }
 553  553  
 554  554          as_rangeunlock(as);
 555  555          if (error)
 556  556                  goto errret;
 557  557  
 558  558          /* record shmem range for the detach */
 559  559          sa_add(pp, addr, (size_t)size, useISM ? SHMSA_ISM : 0, sp);
 560  560          *rvp = (uintptr_t)addr;
 561  561  
 562  562          sp->shm_atime = gethrestime_sec();
 563  563          sp->shm_lpid = pp->p_pid;
 564  564          ipc_hold(shm_svc, (kipc_perm_t *)sp);
 565  565  
 566  566          /*
 567  567           * Tell machine specific code that lwp has mapped shared memory
 568  568           */
 569  569          LWP_MMODEL_SHARED_AS(addr, size);
 570  570  
 571  571  errret:
 572  572          mutex_exit(lock);
 573  573          return (error);
 574  574  }
 575  575  
 576  576  static void
 577  577  shm_dtor(kipc_perm_t *perm)
 578  578  {
 579  579          kshmid_t *sp = (kshmid_t *)perm;
 580  580          uint_t cnt;
 581  581          size_t rsize;
 582  582  
 583  583          ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
 584  584          anonmap_purge(sp->shm_amp);
 585  585          ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 586  586  
 587  587          if (sp->shm_sptinfo) {
 588  588                  if (isspt(sp)) {
 589  589                          sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp);
 590  590                          sp->shm_lkcnt = 0;
 591  591                  }
 592  592                  kmem_free(sp->shm_sptinfo, sizeof (sptinfo_t));
 593  593          }
 594  594  
 595  595          if (sp->shm_lkcnt > 0) {
 596  596                  shmem_unlock(sp, sp->shm_amp);
 597  597                  sp->shm_lkcnt = 0;
 598  598          }
 599  599  
 600  600          ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
 601  601          cnt = --sp->shm_amp->refcnt;
 602  602          ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 603  603          ASSERT(cnt == 0);
 604  604          shm_rm_amp(sp);
 605  605  
 606  606          if (sp->shm_perm.ipc_id != IPC_ID_INVAL) {
 607  607                  rsize = ptob(btopr(sp->shm_segsz));
 608  608                  ipcs_lock(shm_svc);
 609  609                  sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax -= rsize;
 610  610                  sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax -= rsize;
 611  611                  ipcs_unlock(shm_svc);
 612  612          }
 613  613  }
 614  614  
 615  615  /* ARGSUSED */
 616  616  static void
 617  617  shm_rmid(kipc_perm_t *perm)
 618  618  {
 619  619          /* nothing to do */
 620  620  }
 621  621  
 622  622  /*
 623  623   * Shmctl system call.
 624  624   */
 625  625  /* ARGSUSED */
 626  626  static int
 627  627  shmctl(int shmid, int cmd, void *arg)
 628  628  {
 629  629          kshmid_t                *sp;    /* shared memory header ptr */
 630  630          STRUCT_DECL(shmid_ds, ds);      /* for SVR4 IPC_SET */
 631  631          int                     error = 0;
 632  632          struct cred             *cr = CRED();
 633  633          kmutex_t                *lock;
 634  634          model_t                 mdl = get_udatamodel();
 635  635          struct shmid_ds64       ds64;
 636  636          shmatt_t                nattch;
 637  637  
 638  638          STRUCT_INIT(ds, mdl);
 639  639  
 640  640          /*
 641  641           * Perform pre- or non-lookup actions (e.g. copyins, RMID).
 642  642           */
 643  643          switch (cmd) {
 644  644          case IPC_SET:
 645  645                  if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
 646  646                          return (EFAULT);
 647  647                  break;
 648  648  
 649  649          case IPC_SET64:
 650  650                  if (copyin(arg, &ds64, sizeof (struct shmid_ds64)))
 651  651                          return (EFAULT);
 652  652                  break;
 653  653  
 654  654          case IPC_RMID:
 655  655                  return (ipc_rmid(shm_svc, shmid, cr));
 656  656          }
 657  657  
 658  658          if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
 659  659                  return (EINVAL);
 660  660  
 661  661          switch (cmd) {
 662  662          /* Set ownership and permissions. */
 663  663          case IPC_SET:
 664  664                  if (error = ipcperm_set(shm_svc, cr, &sp->shm_perm,
 665  665                      &STRUCT_BUF(ds)->shm_perm, mdl))
 666  666                                  break;
 667  667                  sp->shm_ctime = gethrestime_sec();
 668  668                  break;
 669  669  
 670  670          case IPC_STAT:
 671  671                  if (error = ipcperm_access(&sp->shm_perm, SHM_R, cr))
 672  672                          break;
 673  673  
 674  674                  nattch = sp->shm_perm.ipc_ref - 1;
 675  675  
 676  676                  ipcperm_stat(&STRUCT_BUF(ds)->shm_perm, &sp->shm_perm, mdl);
 677  677                  STRUCT_FSET(ds, shm_segsz, sp->shm_segsz);
 678  678                  STRUCT_FSETP(ds, shm_amp, NULL);        /* kernel addr */
 679  679                  STRUCT_FSET(ds, shm_lkcnt, sp->shm_lkcnt);
 680  680                  STRUCT_FSET(ds, shm_lpid, sp->shm_lpid);
 681  681                  STRUCT_FSET(ds, shm_cpid, sp->shm_cpid);
 682  682                  STRUCT_FSET(ds, shm_nattch, nattch);
 683  683                  STRUCT_FSET(ds, shm_cnattch, sp->shm_ismattch);
 684  684                  STRUCT_FSET(ds, shm_atime, sp->shm_atime);
 685  685                  STRUCT_FSET(ds, shm_dtime, sp->shm_dtime);
 686  686                  STRUCT_FSET(ds, shm_ctime, sp->shm_ctime);
 687  687  
 688  688                  mutex_exit(lock);
 689  689                  if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
 690  690                          return (EFAULT);
 691  691  
 692  692                  return (0);
 693  693  
 694  694          case IPC_SET64:
 695  695                  if (error = ipcperm_set64(shm_svc, cr,
 696  696                      &sp->shm_perm, &ds64.shmx_perm))
 697  697                          break;
 698  698                  sp->shm_ctime = gethrestime_sec();
 699  699                  break;
 700  700  
 701  701          case IPC_STAT64:
 702  702                  nattch = sp->shm_perm.ipc_ref - 1;
 703  703  
 704  704                  ipcperm_stat64(&ds64.shmx_perm, &sp->shm_perm);
 705  705                  ds64.shmx_segsz = sp->shm_segsz;
 706  706                  ds64.shmx_lkcnt = sp->shm_lkcnt;
 707  707                  ds64.shmx_lpid = sp->shm_lpid;
 708  708                  ds64.shmx_cpid = sp->shm_cpid;
 709  709                  ds64.shmx_nattch = nattch;
 710  710                  ds64.shmx_cnattch = sp->shm_ismattch;
 711  711                  ds64.shmx_atime = sp->shm_atime;
 712  712                  ds64.shmx_dtime = sp->shm_dtime;
 713  713                  ds64.shmx_ctime = sp->shm_ctime;
 714  714  
 715  715                  mutex_exit(lock);
 716  716                  if (copyout(&ds64, arg, sizeof (struct shmid_ds64)))
 717  717                          return (EFAULT);
 718  718  
 719  719                  return (0);
 720  720  
 721  721          /* Lock segment in memory */
 722  722          case SHM_LOCK:
 723  723                  if ((error = secpolicy_lock_memory(cr)) != 0)
 724  724                          break;
 725  725  
 726  726                  /* protect against overflow */
 727  727                  if (sp->shm_lkcnt >= USHRT_MAX) {
 728  728                          error = ENOMEM;
 729  729                          break;
 730  730                  }
 731  731                  if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) {
 732  732                          if (error = shmem_lock(sp, sp->shm_amp)) {
 733  733                                  ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock,
 734  734                                      RW_WRITER);
 735  735                                  cmn_err(CE_NOTE, "shmctl - couldn't lock %ld"
 736  736                                      " pages into memory", sp->shm_amp->size);
 737  737                                  ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 738  738                                  error = ENOMEM;
 739  739                                  sp->shm_lkcnt--;
 740  740                          }
 741  741                  }
 742  742                  break;
 743  743  
 744  744          /* Unlock segment */
 745  745          case SHM_UNLOCK:
 746  746                  if ((error = secpolicy_lock_memory(cr)) != 0)
 747  747                          break;
 748  748  
 749  749                  if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
 750  750                          shmem_unlock(sp, sp->shm_amp);
 751  751                  }
 752  752                  break;
 753  753  
 754  754          /* Stage segment for removal, but don't remove until last detach */
 755  755          case SHM_RMID:
 756  756                  if ((error = secpolicy_ipc_owner(cr, (kipc_perm_t *)sp)) != 0)
 757  757                          break;
 758  758  
 759  759                  /*
 760  760                   * If attached, just mark it as a pending remove, otherwise
 761  761                   * we must perform the normal ipc_rmid now.
 762  762                   */
 763  763                  if ((sp->shm_perm.ipc_ref - 1) > 0) {
 764  764                          sp->shm_opts |= SHM_RM_PENDING;
 765  765                  } else {
 766  766                          mutex_exit(lock);
 767  767                          return (ipc_rmid(shm_svc, shmid, cr));
 768  768                  }
 769  769                  break;
 770  770  
 771  771          default:
 772  772                  error = EINVAL;
 773  773                  break;
 774  774          }
 775  775          mutex_exit(lock);
 776  776          return (error);
 777  777  }
 778  778  
 779  779  static void
 780  780  shm_detach(proc_t *pp, segacct_t *sap)
 781  781  {
 782  782          kshmid_t        *sp = sap->sa_id;
 783  783          size_t          len = sap->sa_len;
 784  784          caddr_t         addr = sap->sa_addr;
 785  785  
 786  786          /*
 787  787           * Discard lwpchan mappings.
 788  788           */
 789  789          if (pp->p_lcp != NULL)
 790  790                  lwpchan_delete_mapping(pp, addr, addr + len);
 791  791          (void) as_unmap(pp->p_as, addr, len);
 792  792  
 793  793          /*
 794  794           * Perform some detach-time accounting.
 795  795           */
 796  796          (void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
 797  797          if (sap->sa_flags & SHMSA_ISM)
 798  798                  sp->shm_ismattch--;
 799  799          sp->shm_dtime = gethrestime_sec();
 800  800          sp->shm_lpid = pp->p_pid;
 801  801          if ((sp->shm_opts & SHM_RM_PENDING) != 0 &&
 802  802              sp->shm_perm.ipc_ref == 2) {
 803  803                  /*
 804  804                   * If this is the last detach of the segment across the whole
 805  805                   * system then now we can perform the delayed IPC_RMID.
 806  806                   * The ipc_ref count has 1 for the original 'get' and one for
 807  807                   * each 'attach' (see 'stat' handling in shmctl).
 808  808                   */
 809  809                  sp->shm_opts &= ~SHM_RM_PENDING;
 810  810                  mutex_enter(&shm_svc->ipcs_lock);
 811  811                  ipc_rmsvc(shm_svc, (kipc_perm_t *)sp);  /* Drops lock */
 812  812                  ASSERT(!MUTEX_HELD(&shm_svc->ipcs_lock));
 813  813                  ASSERT(((kipc_perm_t *)sp)->ipc_ref > 0);
 814  814  
 815  815                  /* Lock was dropped, need to retake it for following rele. */
 816  816                  (void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
 817  817          }
 818  818          ipc_rele(shm_svc, (kipc_perm_t *)sp);   /* Drops lock */
 819  819  
 820  820          kmem_free(sap, sizeof (segacct_t));
 821  821  }
 822  822  
 823  823  static int
 824  824  shmdt(caddr_t addr)
 825  825  {
 826  826          proc_t *pp = curproc;
 827  827          segacct_t *sap, template;
 828  828  
 829  829          mutex_enter(&pp->p_lock);
 830  830          prbarrier(pp);                  /* block /proc.  See shmgetid(). */
 831  831  
 832  832          template.sa_addr = addr;
 833  833          template.sa_len = 0;
 834  834          if ((pp->p_segacct == NULL) ||
 835  835              ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)) {
 836  836                  mutex_exit(&pp->p_lock);
 837  837                  return (EINVAL);
 838  838          }
 839  839          if (sap->sa_addr != addr) {
 840  840                  mutex_exit(&pp->p_lock);
 841  841                  return (EINVAL);
 842  842          }
 843  843          avl_remove(pp->p_segacct, sap);
 844  844          mutex_exit(&pp->p_lock);
 845  845  
 846  846          shm_detach(pp, sap);
 847  847  
 848  848          return (0);
 849  849  }
 850  850  
 851  851  /*
 852  852   * Remove all shared memory segments associated with a given zone.
 853  853   * Called by zone_shutdown when the zone is halted.
 854  854   */
 855  855  /*ARGSUSED1*/
 856  856  static void
 857  857  shm_remove_zone(zoneid_t zoneid, void *arg)
 858  858  {
 859  859          ipc_remove_zone(shm_svc, zoneid);
 860  860  }
 861  861  
 862  862  /*
 863  863   * Shmget (create new shmem) system call.
 864  864   */
 865  865  static int
 866  866  shmget(key_t key, size_t size, int shmflg, uintptr_t *rvp)
 867  867  {
 868  868          proc_t          *pp = curproc;
 869  869          kshmid_t        *sp;
 870  870          kmutex_t        *lock;
 871  871          int             error;
 872  872  
 873  873  top:
 874  874          if (error = ipc_get(shm_svc, key, shmflg, (kipc_perm_t **)&sp, &lock))
 875  875                  return (error);
 876  876  
 877  877          if (!IPC_FREE(&sp->shm_perm)) {
 878  878                  /*
 879  879                   * A segment with the requested key exists.
 880  880                   */
 881  881                  if (size > sp->shm_segsz) {
 882  882                          mutex_exit(lock);
 883  883                          return (EINVAL);
 884  884                  }
 885  885          } else {
 886  886                  /*
 887  887                   * A new segment should be created.
 888  888                   */
 889  889                  size_t npages = btopr(size);
 890  890                  size_t rsize = ptob(npages);
 891  891  
 892  892                  /*
 893  893                   * Check rsize and the per-project and per-zone limit on
 894  894                   * shared memory.  Checking rsize handles both the size == 0
 895  895                   * case and the size < ULONG_MAX & PAGEMASK case (i.e.
 896  896                   * rounding up wraps a size_t).
 897  897                   */
 898  898                  if (rsize == 0 ||
 899  899                      (rctl_test(rc_project_shmmax,
 900  900                      pp->p_task->tk_proj->kpj_rctls, pp, rsize,
 901  901                      RCA_SAFE) & RCT_DENY) ||
 902  902                      (rctl_test(rc_zone_shmmax,
 903  903                      pp->p_zone->zone_rctls, pp, rsize,
 904  904                      RCA_SAFE) & RCT_DENY)) {
 905  905  
 906  906                          mutex_exit(&pp->p_lock);
 907  907                          mutex_exit(lock);
 908  908                          ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
 909  909                          return (EINVAL);
 910  910                  }
 911  911                  mutex_exit(&pp->p_lock);
 912  912                  mutex_exit(lock);
 913  913  
 914  914                  if (anon_resv(rsize) == 0) {
 915  915                          ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
 916  916                          return (ENOMEM);
 917  917                  }
 918  918  
 919  919                  /*
 920  920                   * If any new failure points are introduced between the
 921  921                   * the above anon_resv() and the below ipc_commit_begin(),
 922  922                   * these failure points will need to unreserve the anon
 923  923                   * reserved using anon_unresv().
 924  924                   *
 925  925                   * Once ipc_commit_begin() is called, the anon reserved
 926  926                   * above will be automatically unreserved by future calls to
 927  927                   * ipcs_cleanup() -> shm_dtor() -> shm_rm_amp().  If
 928  928                   * ipc_commit_begin() fails, it internally calls shm_dtor(),
 929  929                   * unreserving the above anon, and freeing the below amp.
 930  930                   */
 931  931  
 932  932                  sp->shm_amp = anonmap_alloc(rsize, rsize, ANON_SLEEP);
 933  933                  sp->shm_amp->a_sp = sp;
 934  934                  /*
 935  935                   * Store the original user's requested size, in bytes,
 936  936                   * rather than the page-aligned size.  The former is
 937  937                   * used for IPC_STAT and shmget() lookups.  The latter
 938  938                   * is saved in the anon_map structure and is used for
 939  939                   * calls to the vm layer.
 940  940                   */
 941  941                  sp->shm_segsz = size;
 942  942                  sp->shm_atime = sp->shm_dtime = 0;
 943  943                  sp->shm_ctime = gethrestime_sec();
 944  944                  sp->shm_lpid = (pid_t)0;
 945  945                  sp->shm_cpid = curproc->p_pid;
 946  946                  sp->shm_ismattch = 0;
 947  947                  sp->shm_sptinfo = NULL;
 948  948                  /*
 949  949                   * Check limits one last time, push id into global
 950  950                   * visibility, and update resource usage counts.
 951  951                   */
 952  952                  if (error = ipc_commit_begin(shm_svc, key, shmflg,
 953  953                      (kipc_perm_t *)sp)) {
 954  954                          if (error == EAGAIN)
 955  955                                  goto top;
 956  956                          return (error);
 957  957                  }
 958  958  
 959  959                  if ((rctl_test(rc_project_shmmax,
 960  960                      sp->shm_perm.ipc_proj->kpj_rctls, pp, rsize,
 961  961                      RCA_SAFE) & RCT_DENY) ||
 962  962                      (rctl_test(rc_zone_shmmax,
 963  963                      sp->shm_perm.ipc_zone_ref.zref_zone->zone_rctls, pp, rsize,
 964  964                      RCA_SAFE) & RCT_DENY)) {
 965  965                          ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
 966  966                          return (EINVAL);
 967  967                  }
 968  968                  sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax += rsize;
 969  969                  sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax += rsize;
 970  970  
 971  971                  lock = ipc_commit_end(shm_svc, &sp->shm_perm);
 972  972          }
 973  973  
 974  974          if (AU_AUDITING())
 975  975                  audit_ipcget(AT_IPC_SHM, (void *)sp);
 976  976  
 977  977          *rvp = (uintptr_t)(sp->shm_perm.ipc_id);
 978  978  
 979  979          mutex_exit(lock);
 980  980          return (0);
 981  981  }
 982  982  
 983  983  /*
 984  984   * shmids system call.
 985  985   */
 986  986  static int
 987  987  shmids(int *buf, uint_t nids, uint_t *pnids)
 988  988  {
 989  989          return (ipc_ids(shm_svc, buf, nids, pnids));
 990  990  }
 991  991  
 992  992  /*
 993  993   * System entry point for shmat, shmctl, shmdt, and shmget system calls.
 994  994   */
 995  995  static uintptr_t
 996  996  shmsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2)
 997  997  {
 998  998          int     error;
 999  999          uintptr_t r_val = 0;
1000 1000  
1001 1001          switch (opcode) {
1002 1002          case SHMAT:
1003 1003                  error = shmat((int)a0, (caddr_t)a1, (int)a2, &r_val);
1004 1004                  break;
1005 1005          case SHMCTL:
1006 1006                  error = shmctl((int)a0, (int)a1, (void *)a2);
1007 1007                  break;
1008 1008          case SHMDT:
1009 1009                  error = shmdt((caddr_t)a0);
1010 1010                  break;
1011 1011          case SHMGET:
1012 1012                  error = shmget((key_t)a0, (size_t)a1, (int)a2, &r_val);
1013 1013                  break;
1014 1014          case SHMIDS:
1015 1015                  error = shmids((int *)a0, (uint_t)a1, (uint_t *)a2);
1016 1016                  break;
1017 1017          default:
1018 1018                  error = EINVAL;
1019 1019                  break;
1020 1020          }
1021 1021  
1022 1022          if (error)
1023 1023                  return ((uintptr_t)set_errno(error));
1024 1024  
1025 1025          return (r_val);
1026 1026  }
1027 1027  
1028 1028  /*
1029 1029   * segacct_t comparator
1030 1030   * This works as expected, with one minor change: the first of two real
1031 1031   * segments with equal addresses is considered to be 'greater than' the
1032 1032   * second.  We only return equal when searching using a template, in
1033 1033   * which case we explicitly set the template segment's length to 0
1034 1034   * (which is invalid for a real segment).
1035 1035   */
1036 1036  static int
1037 1037  shm_sacompar(const void *x, const void *y)
1038 1038  {
1039 1039          segacct_t *sa1 = (segacct_t *)x;
1040 1040          segacct_t *sa2 = (segacct_t *)y;
1041 1041  
1042 1042          if (sa1->sa_addr < sa2->sa_addr) {
1043 1043                  return (-1);
1044 1044          } else if (sa2->sa_len != 0) {
1045 1045                  if (sa1->sa_addr >= sa2->sa_addr + sa2->sa_len) {
1046 1046                          return (1);
1047 1047                  } else if (sa1->sa_len != 0) {
1048 1048                          return (1);
1049 1049                  } else {
1050 1050                          return (0);
1051 1051                  }
1052 1052          } else if (sa1->sa_addr > sa2->sa_addr) {
1053 1053                  return (1);
1054 1054          } else {
1055 1055                  return (0);
1056 1056          }
1057 1057  }
1058 1058  
1059 1059  /*
1060 1060   * add this record to the segacct list.
1061 1061   */
1062 1062  static void
1063 1063  sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags, kshmid_t *id)
1064 1064  {
1065 1065          segacct_t *nsap;
1066 1066          avl_tree_t *tree = NULL;
1067 1067          avl_index_t where;
1068 1068  
1069 1069          nsap = kmem_alloc(sizeof (segacct_t), KM_SLEEP);
1070 1070          nsap->sa_addr = addr;
1071 1071          nsap->sa_len  = len;
1072 1072          nsap->sa_flags = flags;
1073 1073          nsap->sa_id = id;
1074 1074  
1075 1075          if (pp->p_segacct == NULL)
1076 1076                  tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1077 1077  
1078 1078          mutex_enter(&pp->p_lock);
1079 1079          prbarrier(pp);                  /* block /proc.  See shmgetid(). */
1080 1080  
1081 1081          if (pp->p_segacct == NULL) {
1082 1082                  avl_create(tree, shm_sacompar, sizeof (segacct_t),
1083 1083                      offsetof(segacct_t, sa_tree));
1084 1084                  pp->p_segacct = tree;
1085 1085          } else if (tree) {
1086 1086                  kmem_free(tree, sizeof (avl_tree_t));
1087 1087          }
1088 1088  
1089 1089          /*
1090 1090           * We can ignore the result of avl_find, as the comparator will
1091 1091           * never return equal for segments with non-zero length.  This
1092 1092           * is a necessary hack to get around the fact that we do, in
1093 1093           * fact, have duplicate keys.
1094 1094           */
1095 1095          (void) avl_find(pp->p_segacct, nsap, &where);
1096 1096          avl_insert(pp->p_segacct, nsap, where);
1097 1097  
1098 1098          mutex_exit(&pp->p_lock);
1099 1099  }
1100 1100  
1101 1101  /*
1102 1102   * Duplicate parent's segacct records in child.
1103 1103   */
1104 1104  void
1105 1105  shmfork(struct proc *ppp, struct proc *cpp)
1106 1106  {
1107 1107          segacct_t *sap;
1108 1108          kshmid_t *sp;
1109 1109          kmutex_t *mp;
1110 1110  
1111 1111          ASSERT(ppp->p_segacct != NULL);
1112 1112  
1113 1113          /*
1114 1114           * We are the only lwp running in the parent so nobody can
1115 1115           * mess with our p_segacct list.  Thus it is safe to traverse
1116 1116           * the list without holding p_lock.  This is essential because
1117 1117           * we can't hold p_lock during a KM_SLEEP allocation.
1118 1118           */
1119 1119          for (sap = (segacct_t *)avl_first(ppp->p_segacct); sap != NULL;
1120 1120              sap = (segacct_t *)AVL_NEXT(ppp->p_segacct, sap)) {
1121 1121                  sa_add(cpp, sap->sa_addr, sap->sa_len, sap->sa_flags,
1122 1122                      sap->sa_id);
1123 1123                  sp = sap->sa_id;
1124 1124                  mp = ipc_lock(shm_svc, sp->shm_perm.ipc_id);
1125 1125                  if (sap->sa_flags & SHMSA_ISM)
1126 1126                          sp->shm_ismattch++;
1127 1127                  ipc_hold(shm_svc, (kipc_perm_t *)sp);
1128 1128                  mutex_exit(mp);
1129 1129          }
1130 1130  }
1131 1131  
1132 1132  /*
1133 1133   * Detach shared memory segments from exiting process.
1134 1134   */
1135 1135  void
1136 1136  shmexit(struct proc *pp)
1137 1137  {
1138 1138          segacct_t *sap;
1139 1139          avl_tree_t *tree;
1140 1140          void *cookie = NULL;
1141 1141  
1142 1142          ASSERT(pp->p_segacct != NULL);
1143 1143  
1144 1144          mutex_enter(&pp->p_lock);
1145 1145          prbarrier(pp);
1146 1146          tree = pp->p_segacct;
1147 1147          pp->p_segacct = NULL;
1148 1148          mutex_exit(&pp->p_lock);
1149 1149  
1150 1150          while ((sap = avl_destroy_nodes(tree, &cookie)) != NULL)
1151 1151                  (void) shm_detach(pp, sap);
1152 1152  
1153 1153          avl_destroy(tree);
1154 1154          kmem_free(tree, sizeof (avl_tree_t));
1155 1155  }
1156 1156  
1157 1157  /*
1158 1158   * At this time pages should be in memory, so just lock them.
1159 1159   */
1160 1160  static void
1161 1161  lock_again(size_t npages, kshmid_t *sp, struct anon_map *amp)
1162 1162  {
1163 1163          struct anon *ap;
1164 1164          struct page *pp;
1165 1165          struct vnode *vp;
1166 1166          u_offset_t off;
1167 1167          ulong_t anon_idx;
1168 1168          anon_sync_obj_t cookie;
1169 1169  
1170 1170          mutex_enter(&sp->shm_mlock);
1171 1171          ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
1172 1172          for (anon_idx = 0; npages != 0; anon_idx++, npages--) {
1173 1173  
1174 1174                  anon_array_enter(amp, anon_idx, &cookie);
1175 1175                  ap = anon_get_ptr(amp->ahp, anon_idx);
1176 1176                  ASSERT(ap != NULL);
1177 1177                  swap_xlate(ap, &vp, &off);
1178 1178                  anon_array_exit(&cookie);
1179 1179  
1180 1180                  pp = page_lookup(vp, off, SE_SHARED);
1181 1181                  if (pp == NULL) {
1182 1182                          panic("lock_again: page not in the system");
1183 1183                          /*NOTREACHED*/
1184 1184                  }
1185 1185                  /* page should already be locked by caller */
1186 1186                  ASSERT(pp->p_lckcnt > 0);
1187 1187                  (void) page_pp_lock(pp, 0, 0);
1188 1188                  page_unlock(pp);
1189 1189          }
1190 1190          ANON_LOCK_EXIT(&->a_rwlock);
1191 1191          mutex_exit(&sp->shm_mlock);
1192 1192  }
1193 1193  
1194 1194  /*
1195 1195   * Attach the shared memory segment to the process
1196 1196   * address space and lock the pages.
1197 1197   */
1198 1198  static int
1199 1199  shmem_lock(kshmid_t *sp, struct anon_map *amp)
1200 1200  {
1201 1201          size_t npages = btopr(amp->size);
1202 1202          struct as *as;
1203 1203          struct segvn_crargs crargs;
1204 1204          uint_t error;
1205 1205  
1206 1206          /*
1207 1207           * A later ISM/DISM attach may increase the size of the amp, so
1208 1208           * cache the number of pages locked for the future shmem_unlock()
1209 1209           */
1210 1210          sp->shm_lkpages = npages;
1211 1211  
1212 1212          as = as_alloc();
1213 1213          /* Initialize the create arguments and map the segment */
1214 1214          crargs = *(struct segvn_crargs *)zfod_argsp;    /* structure copy */
1215 1215          crargs.offset = (u_offset_t)0;
1216 1216          crargs.type = MAP_SHARED;
1217 1217          crargs.amp = amp;
1218 1218          crargs.prot = PROT_ALL;
1219 1219          crargs.maxprot = crargs.prot;
1220 1220          crargs.flags = 0;
1221 1221          error = as_map(as, 0x0, amp->size, segvn_create, &crargs);
1222 1222          if (!error) {
1223 1223                  if ((error = as_ctl(as, 0x0, amp->size, MC_LOCK, 0, 0,
1224 1224                      NULL, 0)) == 0) {
1225 1225                          lock_again(npages, sp, amp);
1226 1226                  }
1227 1227                  (void) as_unmap(as, 0x0, amp->size);
1228 1228          }
1229 1229          as_free(as);
1230 1230          return (error);
1231 1231  }
1232 1232  
1233 1233  
1234 1234  /*
1235 1235   * Unlock shared memory
1236 1236   */
1237 1237  static void
1238 1238  shmem_unlock(kshmid_t *sp, struct anon_map *amp)
1239 1239  {
1240 1240          struct anon *ap;
1241 1241          pgcnt_t npages = sp->shm_lkpages;
1242 1242          struct vnode *vp;
1243 1243          struct page *pp;
1244 1244          u_offset_t off;
1245 1245          ulong_t anon_idx;
1246 1246          size_t unlocked_bytes = 0;
1247 1247          kproject_t      *proj;
1248 1248          anon_sync_obj_t cookie;
1249 1249  
1250 1250          proj = sp->shm_perm.ipc_proj;
1251 1251          mutex_enter(&sp->shm_mlock);
1252 1252          ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
1253 1253          for (anon_idx = 0; anon_idx < npages; anon_idx++) {
1254 1254  
1255 1255                  anon_array_enter(amp, anon_idx, &cookie);
1256 1256                  if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
1257 1257                          panic("shmem_unlock: null app");
1258 1258                          /*NOTREACHED*/
1259 1259                  }
1260 1260                  swap_xlate(ap, &vp, &off);
1261 1261                  anon_array_exit(&cookie);
1262 1262                  pp = page_lookup(vp, off, SE_SHARED);
1263 1263                  if (pp == NULL) {
1264 1264                          panic("shmem_unlock: page not in the system");
1265 1265                          /*NOTREACHED*/
1266 1266                  }
1267 1267                  /*
1268 1268                   * Page should at least have once lock from previous
1269 1269                   * shmem_lock
1270 1270                   */
1271 1271                  ASSERT(pp->p_lckcnt > 0);
1272 1272                  page_pp_unlock(pp, 0, 0);
1273 1273                  if (pp->p_lckcnt == 0)
1274 1274                          unlocked_bytes += PAGESIZE;
1275 1275  
1276 1276                  page_unlock(pp);
1277 1277          }
1278 1278  
1279 1279          if (unlocked_bytes > 0) {
1280 1280                  rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
1281 1281          }
1282 1282  
1283 1283          ANON_LOCK_EXIT(&->a_rwlock);
1284 1284          mutex_exit(&sp->shm_mlock);
1285 1285  }
1286 1286  
1287 1287  /*
1288 1288   * We call this routine when we have removed all references to this
1289 1289   * amp.  This means all shmdt()s and the IPC_RMID have been done.
1290 1290   */
1291 1291  static void
1292 1292  shm_rm_amp(kshmid_t *sp)
1293 1293  {
1294 1294          struct anon_map *amp = sp->shm_amp;
1295 1295          zone_t *zone;
1296 1296  
1297 1297          zone = sp->shm_perm.ipc_zone_ref.zref_zone;
1298 1298          ASSERT(zone != NULL);
1299 1299          /*
1300 1300           * Free up the anon_map.
1301 1301           */
1302 1302          lgrp_shm_policy_fini(amp, NULL);
1303 1303          ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
1304 1304          if (amp->a_szc != 0) {
1305 1305                  anon_shmap_free_pages(amp, 0, amp->size);
1306 1306          } else {
1307 1307                  anon_free(amp->ahp, 0, amp->size);
1308 1308          }
1309 1309          ANON_LOCK_EXIT(&->a_rwlock);
1310 1310          anon_unresv_zone(amp->swresv, zone);
1311 1311          anonmap_free(amp);
1312 1312  }
1313 1313  
1314 1314  /*
1315 1315   * Return the shared memory id for the process's virtual address.
1316 1316   * Return SHMID_NONE if addr is not within a SysV shared memory segment.
1317 1317   * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
1318 1318   *
1319 1319   * shmgetid() is called from code in /proc with the process locked but
1320 1320   * with pp->p_lock not held.  The address space lock is held, so we
1321 1321   * cannot grab pp->p_lock here due to lock-ordering constraints.
1322 1322   * Because of all this, modifications to the p_segacct list must only
1323 1323   * be made after calling prbarrier() to ensure the process is not locked.
1324 1324   * See shmdt() and sa_add(), above. shmgetid() may also be called on a
1325 1325   * thread's own process without the process locked.
1326 1326   */
1327 1327  int
1328 1328  shmgetid(proc_t *pp, caddr_t addr)
1329 1329  {
1330 1330          segacct_t *sap, template;
1331 1331  
1332 1332          ASSERT(MUTEX_NOT_HELD(&pp->p_lock));
1333 1333          ASSERT((pp->p_proc_flag & P_PR_LOCK) || pp == curproc);
1334 1334  
1335 1335          if (pp->p_segacct == NULL)
1336 1336                  return (SHMID_NONE);
1337 1337  
1338 1338          template.sa_addr = addr;
1339 1339          template.sa_len = 0;
1340 1340          if ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)
1341 1341                  return (SHMID_NONE);
1342 1342  
1343 1343          if (IPC_FREE(&sap->sa_id->shm_perm))
1344 1344                  return (SHMID_FREE);
1345 1345  
1346 1346          return (sap->sa_id->shm_perm.ipc_id);
1347 1347  }
  
    | 
      ↓ open down ↓ | 
    1347 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX