Print this page
    
OS-5223 removed shm segment is no longer available
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/shm.c
          +++ new/usr/src/uts/common/os/shm.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2016 Joyent, Inc.
  24   25   */
  25   26  
  26   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27   28  /*        All Rights Reserved   */
  28   29  
  29   30  /*
  30   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  31   32   * The Regents of the University of California
  32   33   * All Rights Reserved
  33   34   *
  34   35   * University Acknowledgment- Portions of this document are derived from
  35   36   * software developed by the University of California, Berkeley, and its
  36   37   * contributors.
  37   38   */
  38   39  
  39   40  /*
  40   41   * Inter-Process Communication Shared Memory Facility.
  41   42   *
  42   43   * See os/ipc.c for a description of common IPC functionality.
  43   44   *
  44   45   * Resource controls
  45   46   * -----------------
  46   47   *
  47   48   * Control:      zone.max-shm-ids (rc_zone_shmmni)
  48   49   * Description:  Maximum number of shared memory ids allowed a zone.
  49   50   *
  50   51   *   When shmget() is used to allocate a shared memory segment, one id
  51   52   *   is allocated.  If the id allocation doesn't succeed, shmget()
  52   53   *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
  53   54   *   IPC_RMID) the id is deallocated.
  54   55   *
  55   56   * Control:      project.max-shm-ids (rc_project_shmmni)
  56   57   * Description:  Maximum number of shared memory ids allowed a project.
  57   58   *
  58   59   *   When shmget() is used to allocate a shared memory segment, one id
  59   60   *   is allocated.  If the id allocation doesn't succeed, shmget()
  60   61   *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
  61   62   *   IPC_RMID) the id is deallocated.
  62   63   *
  63   64   * Control:      zone.max-shm-memory (rc_zone_shmmax)
  64   65   * Description:  Total amount of shared memory allowed a zone.
  65   66   *
  66   67   *   When shmget() is used to allocate a shared memory segment, the
  67   68   *   segment's size is allocated against this limit.  If the space
  68   69   *   allocation doesn't succeed, shmget() fails and errno is set to
  69   70   *   EINVAL.  The size will be deallocated once the last process has
  70   71   *   detached the segment and the segment has been successfully
  71   72   *   shmctl(, IPC_RMID)ed.
  72   73   *
  73   74   * Control:      project.max-shm-memory (rc_project_shmmax)
  74   75   * Description:  Total amount of shared memory allowed a project.
  75   76   *
  76   77   *   When shmget() is used to allocate a shared memory segment, the
  77   78   *   segment's size is allocated against this limit.  If the space
  78   79   *   allocation doesn't succeed, shmget() fails and errno is set to
  79   80   *   EINVAL.  The size will be deallocated once the last process has
  80   81   *   detached the segment and the segment has been successfully
  81   82   *   shmctl(, IPC_RMID)ed.
  82   83   */
  83   84  
  84   85  #include <sys/types.h>
  85   86  #include <sys/param.h>
  86   87  #include <sys/cred.h>
  87   88  #include <sys/errno.h>
  88   89  #include <sys/time.h>
  89   90  #include <sys/kmem.h>
  90   91  #include <sys/user.h>
  91   92  #include <sys/proc.h>
  92   93  #include <sys/systm.h>
  93   94  #include <sys/prsystm.h>
  94   95  #include <sys/sysmacros.h>
  95   96  #include <sys/tuneable.h>
  96   97  #include <sys/vm.h>
  97   98  #include <sys/mman.h>
  98   99  #include <sys/swap.h>
  99  100  #include <sys/cmn_err.h>
 100  101  #include <sys/debug.h>
 101  102  #include <sys/lwpchan_impl.h>
 102  103  #include <sys/avl.h>
 103  104  #include <sys/modctl.h>
 104  105  #include <sys/syscall.h>
 105  106  #include <sys/task.h>
 106  107  #include <sys/project.h>
 107  108  #include <sys/policy.h>
 108  109  #include <sys/zone.h>
 109  110  #include <sys/rctl.h>
 110  111  
 111  112  #include <sys/ipc.h>
 112  113  #include <sys/ipc_impl.h>
 113  114  #include <sys/shm.h>
 114  115  #include <sys/shm_impl.h>
 115  116  
 116  117  #include <vm/hat.h>
 117  118  #include <vm/seg.h>
 118  119  #include <vm/as.h>
 119  120  #include <vm/seg_vn.h>
 120  121  #include <vm/anon.h>
 121  122  #include <vm/page.h>
 122  123  #include <vm/vpage.h>
 123  124  #include <vm/seg_spt.h>
 124  125  
 125  126  #include <c2/audit.h>
 126  127  
 127  128  static int shmem_lock(kshmid_t *sp, struct anon_map *amp);
 128  129  static void shmem_unlock(kshmid_t *sp, struct anon_map *amp);
 129  130  static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags,
 130  131          kshmid_t *id);
 131  132  static void shm_rm_amp(kshmid_t *sp);
 132  133  static void shm_dtor(kipc_perm_t *);
 133  134  static void shm_rmid(kipc_perm_t *);
 134  135  static void shm_remove_zone(zoneid_t, void *);
 135  136  
 136  137  /*
 137  138   * Semantics for share_page_table and ism_off:
 138  139   *
 139  140   * These are hooks in /etc/system - only for internal testing purpose.
 140  141   *
 141  142   * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
 142  143   * in a call to shmat(2). In other words, with share_page_table set, you always
 143  144   * get ISM, even if say, DISM is specified. It should really be called "ism_on".
 144  145   *
 145  146   * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
 146  147   * shmat(2).
 147  148   *
 148  149   * If both share_page_table and ism_off are set, share_page_table prevails.
 149  150   *
 150  151   * Although these tunables should probably be removed, they do have some
 151  152   * external exposure; as long as they exist, they should at least work sensibly.
 152  153   */
 153  154  
 154  155  int share_page_table;
 155  156  int ism_off;
 156  157  
 157  158  /*
 158  159   * The following tunables are obsolete.  Though for compatibility we
 159  160   * still read and interpret shminfo_shmmax and shminfo_shmmni (see
 160  161   * os/project.c), the preferred mechanism for administrating the IPC
 161  162   * Shared Memory facility is through the resource controls described at
 162  163   * the top of this file.
 163  164   */
 164  165  size_t  shminfo_shmmax = 0x800000;      /* (obsolete) */
 165  166  int     shminfo_shmmni = 100;           /* (obsolete) */
 166  167  size_t  shminfo_shmmin = 1;             /* (obsolete) */
 167  168  int     shminfo_shmseg = 6;             /* (obsolete) */
 168  169  
 169  170  extern rctl_hndl_t rc_zone_shmmax;
 170  171  extern rctl_hndl_t rc_zone_shmmni;
 171  172  extern rctl_hndl_t rc_project_shmmax;
 172  173  extern rctl_hndl_t rc_project_shmmni;
 173  174  static ipc_service_t *shm_svc;
 174  175  static zone_key_t shm_zone_key;
 175  176  
 176  177  /*
 177  178   * Module linkage information for the kernel.
 178  179   */
 179  180  static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
 180  181  
 181  182  static struct sysent ipcshm_sysent = {
 182  183          4,
 183  184  #ifdef  _SYSCALL32_IMPL
 184  185          SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
 185  186  #else   /* _SYSCALL32_IMPL */
 186  187          SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
 187  188  #endif  /* _SYSCALL32_IMPL */
 188  189          (int (*)())shmsys
 189  190  };
 190  191  
 191  192  #ifdef  _SYSCALL32_IMPL
 192  193  static struct sysent ipcshm_sysent32 = {
 193  194          4,
 194  195          SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
 195  196          (int (*)())shmsys
 196  197  };
 197  198  #endif  /* _SYSCALL32_IMPL */
 198  199  
 199  200  static struct modlsys modlsys = {
 200  201          &mod_syscallops, "System V shared memory", &ipcshm_sysent
 201  202  };
 202  203  
 203  204  #ifdef  _SYSCALL32_IMPL
 204  205  static struct modlsys modlsys32 = {
 205  206          &mod_syscallops32, "32-bit System V shared memory", &ipcshm_sysent32
 206  207  };
 207  208  #endif  /* _SYSCALL32_IMPL */
 208  209  
 209  210  static struct modlinkage modlinkage = {
 210  211          MODREV_1,
 211  212          &modlsys,
 212  213  #ifdef  _SYSCALL32_IMPL
 213  214          &modlsys32,
 214  215  #endif
 215  216          NULL
 216  217  };
 217  218  
 218  219  
 219  220  int
 220  221  _init(void)
 221  222  {
 222  223          int result;
 223  224  
 224  225          shm_svc = ipcs_create("shmids", rc_project_shmmni, rc_zone_shmmni,
 225  226              sizeof (kshmid_t), shm_dtor, shm_rmid, AT_IPC_SHM,
 226  227              offsetof(ipc_rqty_t, ipcq_shmmni));
 227  228          zone_key_create(&shm_zone_key, NULL, shm_remove_zone, NULL);
 228  229  
 229  230          if ((result = mod_install(&modlinkage)) == 0)
 230  231                  return (0);
 231  232  
 232  233          (void) zone_key_delete(shm_zone_key);
 233  234          ipcs_destroy(shm_svc);
 234  235  
 235  236          return (result);
 236  237  }
 237  238  
 238  239  int
 239  240  _fini(void)
 240  241  {
 241  242          return (EBUSY);
 242  243  }
 243  244  
 244  245  int
 245  246  _info(struct modinfo *modinfop)
 246  247  {
 247  248          return (mod_info(&modlinkage, modinfop));
 248  249  }
 249  250  
 250  251  /*
 251  252   * Shmat (attach shared segment) system call.
 252  253   */
 253  254  static int
 254  255  shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
 255  256  {
 256  257          kshmid_t *sp;   /* shared memory header ptr */
 257  258          size_t  size;
 258  259          int     error = 0;
 259  260          proc_t *pp = curproc;
 260  261          struct as *as = pp->p_as;
 261  262          struct segvn_crargs     crargs; /* segvn create arguments */
 262  263          kmutex_t        *lock;
 263  264          struct seg      *segspt = NULL;
 264  265          caddr_t         addr = uaddr;
 265  266          int             flags = (uflags & SHMAT_VALID_FLAGS_MASK);
 266  267          int             useISM;
 267  268          uchar_t         prot = PROT_ALL;
 268  269          int result;
 269  270  
 270  271          if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
 271  272                  return (EINVAL);
 272  273          if (error = ipcperm_access(&sp->shm_perm, SHM_R, CRED()))
 273  274                  goto errret;
 274  275          if ((flags & SHM_RDONLY) == 0 &&
 275  276              (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
 276  277                  goto errret;
 277  278          if (spt_invalid(flags)) {
 278  279                  error = EINVAL;
 279  280                  goto errret;
 280  281          }
 281  282          if (ism_off)
 282  283                  flags = flags & ~SHM_SHARE_MMU;
 283  284          if (share_page_table) {
 284  285                  flags = flags & ~SHM_PAGEABLE;
 285  286                  flags = flags | SHM_SHARE_MMU;
 286  287          }
 287  288          useISM = (spt_locked(flags) || spt_pageable(flags));
 288  289          if (useISM && (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
 289  290                  goto errret;
 290  291          if (useISM && isspt(sp)) {
 291  292                  uint_t newsptflags = flags | spt_flags(sp->shm_sptseg);
 292  293                  /*
 293  294                   * If trying to change an existing {D}ISM segment from ISM
 294  295                   * to DISM or vice versa, return error. Note that this
 295  296                   * validation of flags needs to be done after the effect of
 296  297                   * tunables such as ism_off and share_page_table, for
 297  298                   * semantics that are consistent with the tunables' settings.
 298  299                   */
 299  300                  if (spt_invalid(newsptflags)) {
 300  301                          error = EINVAL;
 301  302                          goto errret;
 302  303                  }
 303  304          }
 304  305          ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
 305  306          size = sp->shm_amp->size;
 306  307          ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 307  308  
 308  309          /* somewhere to record spt info for final detach */
 309  310          if (sp->shm_sptinfo == NULL)
 310  311                  sp->shm_sptinfo = kmem_zalloc(sizeof (sptinfo_t), KM_SLEEP);
 311  312  
  
    | 
      ↓ open down ↓ | 
    278 lines elided | 
    
      ↑ open up ↑ | 
  
 312  313          as_rangelock(as);
 313  314  
 314  315          if (useISM) {
 315  316                  /*
 316  317                   * Handle ISM
 317  318                   */
 318  319                  uint_t  share_szc;
 319  320                  size_t  share_size;
 320  321                  struct  shm_data ssd;
 321  322                  uintptr_t align_hint;
      323 +                long    curprot;
 322  324  
 323  325                  /*
 324  326                   * Pick a share pagesize to use, if (!isspt(sp)).
 325  327                   * Otherwise use the already chosen page size.
 326  328                   *
 327  329                   * For the initial shmat (!isspt(sp)), where sptcreate is
 328  330                   * called, map_pgsz is called to recommend a [D]ISM pagesize,
 329  331                   * important for systems which offer more than one potential
 330  332                   * [D]ISM pagesize.
 331  333                   * If the shmat is just to attach to an already created
 332  334                   * [D]ISM segment, then use the previously selected page size.
 333  335                   */
 334  336                  if (!isspt(sp)) {
 335  337                          share_size = map_pgsz(MAPPGSZ_ISM, pp, addr, size, 0);
 336  338                          if (share_size == 0) {
 337  339                                  as_rangeunlock(as);
 338  340                                  error = EINVAL;
 339  341                                  goto errret;
 340  342                          }
 341  343                          share_szc = page_szc(share_size);
 342  344                  } else {
 343  345                          share_szc = sp->shm_sptseg->s_szc;
 344  346                          share_size = page_get_pagesize(share_szc);
 345  347                  }
 346  348                  size = P2ROUNDUP(size, share_size);
 347  349  
 348  350                  align_hint = share_size;
 349  351  #if defined(__i386) || defined(__amd64)
 350  352                  /*
 351  353                   * For x86, we want to share as much of the page table tree
 352  354                   * as possible. We use a large align_hint at first, but
 353  355                   * if that fails, then the code below retries with align_hint
 354  356                   * set to share_size.
 355  357                   *
 356  358                   * The explicit extern here is due to the difficulties
 357  359                   * of getting to platform dependent includes. When/if the
 358  360                   * platform dependent bits of this function are cleaned up,
 359  361                   * another way of doing this should found.
 360  362                   */
 361  363                  {
 362  364                          extern uint_t ptes_per_table;
 363  365  
 364  366                          while (size >= ptes_per_table * (uint64_t)align_hint)
 365  367                                  align_hint *= ptes_per_table;
 366  368                  }
 367  369  #endif /* __i386 || __amd64 */
 368  370  
 369  371  #if defined(__sparcv9)
 370  372                  if (addr == 0 &&
 371  373                      pp->p_model == DATAMODEL_LP64 && AS_TYPE_64BIT(as)) {
 372  374                          /*
 373  375                           * If no address has been passed in, and this is a
 374  376                           * 64-bit process, we'll try to find an address
 375  377                           * in the predict-ISM zone.
 376  378                           */
 377  379                          caddr_t predbase = (caddr_t)PREDISM_1T_BASE;
 378  380                          size_t len = PREDISM_BOUND - PREDISM_1T_BASE;
 379  381  
 380  382                          as_purge(as);
 381  383                          if (as_gap(as, size + share_size, &predbase, &len,
 382  384                              AH_LO, (caddr_t)NULL) != -1) {
 383  385                                  /*
 384  386                                   * We found an address which looks like a
 385  387                                   * candidate.  We want to round it up, and
 386  388                                   * then check that it's a valid user range.
 387  389                                   * This assures that we won't fail below.
 388  390                                   */
 389  391                                  addr = (caddr_t)P2ROUNDUP((uintptr_t)predbase,
 390  392                                      share_size);
 391  393  
 392  394                                  if (valid_usr_range(addr, size, prot,
 393  395                                      as, as->a_userlimit) != RANGE_OKAY) {
 394  396                                          addr = 0;
 395  397                                  }
 396  398                          }
 397  399                  }
 398  400  #endif /* __sparcv9 */
 399  401  
 400  402                  if (addr == 0) {
 401  403                          for (;;) {
 402  404                                  addr = (caddr_t)align_hint;
 403  405                                  map_addr(&addr, size, 0ll, 1, MAP_ALIGN);
 404  406                                  if (addr != NULL || align_hint == share_size)
 405  407                                          break;
 406  408                                  align_hint = share_size;
 407  409                          }
 408  410                          if (addr == NULL) {
 409  411                                  as_rangeunlock(as);
 410  412                                  error = ENOMEM;
 411  413                                  goto errret;
 412  414                          }
 413  415                          ASSERT(((uintptr_t)addr & (align_hint - 1)) == 0);
 414  416                  } else {
 415  417                          /* Use the user-supplied attach address */
 416  418                          caddr_t base;
 417  419                          size_t len;
 418  420  
 419  421                          /*
 420  422                           * Check that the address range
 421  423                           *  1) is properly aligned
 422  424                           *  2) is correct in unix terms
 423  425                           *  3) is within an unmapped address segment
 424  426                           */
 425  427                          base = addr;
 426  428                          len = size;             /* use spt aligned size */
 427  429                          /* XXX - in SunOS, is sp->shm_segsz */
 428  430                          if ((uintptr_t)base & (share_size - 1)) {
 429  431                                  error = EINVAL;
 430  432                                  as_rangeunlock(as);
 431  433                                  goto errret;
 432  434                          }
 433  435                          result = valid_usr_range(base, len, prot, as,
 434  436                              as->a_userlimit);
 435  437                          if (result == RANGE_BADPROT) {
 436  438                                  /*
 437  439                                   * We try to accomodate processors which
 438  440                                   * may not support execute permissions on
 439  441                                   * all ISM segments by trying the check
 440  442                                   * again but without PROT_EXEC.
 441  443                                   */
 442  444                                  prot &= ~PROT_EXEC;
 443  445                                  result = valid_usr_range(base, len, prot, as,
 444  446                                      as->a_userlimit);
 445  447                          }
  
    | 
      ↓ open down ↓ | 
    114 lines elided | 
    
      ↑ open up ↑ | 
  
 446  448                          as_purge(as);
 447  449                          if (result != RANGE_OKAY ||
 448  450                              as_gap(as, len, &base, &len, AH_LO,
 449  451                              (caddr_t)NULL) != 0) {
 450  452                                  error = EINVAL;
 451  453                                  as_rangeunlock(as);
 452  454                                  goto errret;
 453  455                          }
 454  456                  }
 455  457  
      458 +                curprot = sp->shm_opts & SHM_PROT_MASK;
 456  459                  if (!isspt(sp)) {
 457  460                          error = sptcreate(size, &segspt, sp->shm_amp, prot,
 458  461                              flags, share_szc);
 459  462                          if (error) {
 460  463                                  as_rangeunlock(as);
 461  464                                  goto errret;
 462  465                          }
 463  466                          sp->shm_sptinfo->sptas = segspt->s_as;
 464  467                          sp->shm_sptseg = segspt;
 465      -                        sp->shm_sptprot = prot;
 466      -                } else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) {
      468 +                        sp->shm_opts = (sp->shm_opts & ~SHM_PROT_MASK) | prot;
      469 +                } else if ((prot & curprot) != curprot) {
 467  470                          /*
 468  471                           * Ensure we're attaching to an ISM segment with
 469  472                           * fewer or equal permissions than what we're
 470  473                           * allowed.  Fail if the segment has more
 471  474                           * permissions than what we're allowed.
 472  475                           */
 473  476                          error = EACCES;
 474  477                          as_rangeunlock(as);
 475  478                          goto errret;
 476  479                  }
 477  480  
 478  481                  ssd.shm_sptseg = sp->shm_sptseg;
 479  482                  ssd.shm_sptas = sp->shm_sptinfo->sptas;
 480  483                  ssd.shm_amp = sp->shm_amp;
 481  484                  error = as_map(as, addr, size, segspt_shmattach, &ssd);
 482  485                  if (error == 0)
 483  486                          sp->shm_ismattch++; /* keep count of ISM attaches */
 484  487          } else {
 485  488  
 486  489                  /*
 487  490                   * Normal case.
 488  491                   */
 489  492                  if (flags & SHM_RDONLY)
 490  493                          prot &= ~PROT_WRITE;
 491  494  
 492  495                  if (addr == 0) {
 493  496                          /* Let the system pick the attach address */
 494  497                          map_addr(&addr, size, 0ll, 1, 0);
 495  498                          if (addr == NULL) {
 496  499                                  as_rangeunlock(as);
 497  500                                  error = ENOMEM;
 498  501                                  goto errret;
 499  502                          }
 500  503                  } else {
 501  504                          /* Use the user-supplied attach address */
 502  505                          caddr_t base;
 503  506                          size_t len;
 504  507  
 505  508                          if (flags & SHM_RND)
 506  509                                  addr = (caddr_t)((uintptr_t)addr &
 507  510                                      ~(SHMLBA - 1));
 508  511                          /*
 509  512                           * Check that the address range
 510  513                           *  1) is properly aligned
 511  514                           *  2) is correct in unix terms
 512  515                           *  3) is within an unmapped address segment
 513  516                           */
 514  517                          base = addr;
 515  518                          len = size;             /* use aligned size */
 516  519                          /* XXX - in SunOS, is sp->shm_segsz */
 517  520                          if ((uintptr_t)base & PAGEOFFSET) {
 518  521                                  error = EINVAL;
 519  522                                  as_rangeunlock(as);
 520  523                                  goto errret;
 521  524                          }
 522  525                          result = valid_usr_range(base, len, prot, as,
 523  526                              as->a_userlimit);
 524  527                          if (result == RANGE_BADPROT) {
 525  528                                  prot &= ~PROT_EXEC;
 526  529                                  result = valid_usr_range(base, len, prot, as,
 527  530                                      as->a_userlimit);
 528  531                          }
 529  532                          as_purge(as);
 530  533                          if (result != RANGE_OKAY ||
 531  534                              as_gap(as, len, &base, &len,
 532  535                              AH_LO, (caddr_t)NULL) != 0) {
 533  536                                  error = EINVAL;
 534  537                                  as_rangeunlock(as);
 535  538                                  goto errret;
 536  539                          }
 537  540                  }
 538  541  
 539  542                  /* Initialize the create arguments and map the segment */
 540  543                  crargs = *(struct segvn_crargs *)zfod_argsp;
 541  544                  crargs.offset = 0;
 542  545                  crargs.type = MAP_SHARED;
 543  546                  crargs.amp = sp->shm_amp;
 544  547                  crargs.prot = prot;
 545  548                  crargs.maxprot = crargs.prot;
 546  549                  crargs.flags = 0;
 547  550  
 548  551                  error = as_map(as, addr, size, segvn_create, &crargs);
 549  552          }
 550  553  
 551  554          as_rangeunlock(as);
 552  555          if (error)
 553  556                  goto errret;
 554  557  
 555  558          /* record shmem range for the detach */
 556  559          sa_add(pp, addr, (size_t)size, useISM ? SHMSA_ISM : 0, sp);
 557  560          *rvp = (uintptr_t)addr;
 558  561  
 559  562          sp->shm_atime = gethrestime_sec();
 560  563          sp->shm_lpid = pp->p_pid;
 561  564          ipc_hold(shm_svc, (kipc_perm_t *)sp);
 562  565  
 563  566          /*
 564  567           * Tell machine specific code that lwp has mapped shared memory
 565  568           */
 566  569          LWP_MMODEL_SHARED_AS(addr, size);
 567  570  
 568  571  errret:
 569  572          mutex_exit(lock);
 570  573          return (error);
 571  574  }
 572  575  
 573  576  static void
 574  577  shm_dtor(kipc_perm_t *perm)
 575  578  {
 576  579          kshmid_t *sp = (kshmid_t *)perm;
 577  580          uint_t cnt;
 578  581          size_t rsize;
 579  582  
 580  583          ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
 581  584          anonmap_purge(sp->shm_amp);
 582  585          ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 583  586  
 584  587          if (sp->shm_sptinfo) {
 585  588                  if (isspt(sp)) {
 586  589                          sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp);
 587  590                          sp->shm_lkcnt = 0;
 588  591                  }
 589  592                  kmem_free(sp->shm_sptinfo, sizeof (sptinfo_t));
 590  593          }
 591  594  
 592  595          if (sp->shm_lkcnt > 0) {
 593  596                  shmem_unlock(sp, sp->shm_amp);
 594  597                  sp->shm_lkcnt = 0;
 595  598          }
 596  599  
 597  600          ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
 598  601          cnt = --sp->shm_amp->refcnt;
 599  602          ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 600  603          ASSERT(cnt == 0);
 601  604          shm_rm_amp(sp);
 602  605  
 603  606          if (sp->shm_perm.ipc_id != IPC_ID_INVAL) {
 604  607                  rsize = ptob(btopr(sp->shm_segsz));
 605  608                  ipcs_lock(shm_svc);
 606  609                  sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax -= rsize;
 607  610                  sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax -= rsize;
 608  611                  ipcs_unlock(shm_svc);
 609  612          }
 610  613  }
 611  614  
 612  615  /* ARGSUSED */
 613  616  static void
 614  617  shm_rmid(kipc_perm_t *perm)
 615  618  {
 616  619          /* nothing to do */
 617  620  }
 618  621  
 619  622  /*
 620  623   * Shmctl system call.
 621  624   */
 622  625  /* ARGSUSED */
 623  626  static int
 624  627  shmctl(int shmid, int cmd, void *arg)
 625  628  {
 626  629          kshmid_t                *sp;    /* shared memory header ptr */
 627  630          STRUCT_DECL(shmid_ds, ds);      /* for SVR4 IPC_SET */
 628  631          int                     error = 0;
 629  632          struct cred             *cr = CRED();
 630  633          kmutex_t                *lock;
 631  634          model_t                 mdl = get_udatamodel();
 632  635          struct shmid_ds64       ds64;
 633  636          shmatt_t                nattch;
 634  637  
 635  638          STRUCT_INIT(ds, mdl);
 636  639  
 637  640          /*
 638  641           * Perform pre- or non-lookup actions (e.g. copyins, RMID).
 639  642           */
 640  643          switch (cmd) {
 641  644          case IPC_SET:
 642  645                  if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
 643  646                          return (EFAULT);
 644  647                  break;
 645  648  
 646  649          case IPC_SET64:
 647  650                  if (copyin(arg, &ds64, sizeof (struct shmid_ds64)))
 648  651                          return (EFAULT);
 649  652                  break;
 650  653  
 651  654          case IPC_RMID:
 652  655                  return (ipc_rmid(shm_svc, shmid, cr));
 653  656          }
 654  657  
 655  658          if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
 656  659                  return (EINVAL);
 657  660  
 658  661          switch (cmd) {
 659  662          /* Set ownership and permissions. */
 660  663          case IPC_SET:
 661  664                  if (error = ipcperm_set(shm_svc, cr, &sp->shm_perm,
 662  665                      &STRUCT_BUF(ds)->shm_perm, mdl))
 663  666                                  break;
 664  667                  sp->shm_ctime = gethrestime_sec();
 665  668                  break;
 666  669  
 667  670          case IPC_STAT:
 668  671                  if (error = ipcperm_access(&sp->shm_perm, SHM_R, cr))
 669  672                          break;
 670  673  
 671  674                  nattch = sp->shm_perm.ipc_ref - 1;
 672  675  
 673  676                  ipcperm_stat(&STRUCT_BUF(ds)->shm_perm, &sp->shm_perm, mdl);
 674  677                  STRUCT_FSET(ds, shm_segsz, sp->shm_segsz);
 675  678                  STRUCT_FSETP(ds, shm_amp, NULL);        /* kernel addr */
 676  679                  STRUCT_FSET(ds, shm_lkcnt, sp->shm_lkcnt);
 677  680                  STRUCT_FSET(ds, shm_lpid, sp->shm_lpid);
 678  681                  STRUCT_FSET(ds, shm_cpid, sp->shm_cpid);
 679  682                  STRUCT_FSET(ds, shm_nattch, nattch);
 680  683                  STRUCT_FSET(ds, shm_cnattch, sp->shm_ismattch);
 681  684                  STRUCT_FSET(ds, shm_atime, sp->shm_atime);
 682  685                  STRUCT_FSET(ds, shm_dtime, sp->shm_dtime);
 683  686                  STRUCT_FSET(ds, shm_ctime, sp->shm_ctime);
 684  687  
 685  688                  mutex_exit(lock);
 686  689                  if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
 687  690                          return (EFAULT);
 688  691  
 689  692                  return (0);
 690  693  
 691  694          case IPC_SET64:
 692  695                  if (error = ipcperm_set64(shm_svc, cr,
 693  696                      &sp->shm_perm, &ds64.shmx_perm))
 694  697                          break;
 695  698                  sp->shm_ctime = gethrestime_sec();
 696  699                  break;
 697  700  
 698  701          case IPC_STAT64:
 699  702                  nattch = sp->shm_perm.ipc_ref - 1;
 700  703  
 701  704                  ipcperm_stat64(&ds64.shmx_perm, &sp->shm_perm);
 702  705                  ds64.shmx_segsz = sp->shm_segsz;
 703  706                  ds64.shmx_lkcnt = sp->shm_lkcnt;
 704  707                  ds64.shmx_lpid = sp->shm_lpid;
 705  708                  ds64.shmx_cpid = sp->shm_cpid;
 706  709                  ds64.shmx_nattch = nattch;
 707  710                  ds64.shmx_cnattch = sp->shm_ismattch;
 708  711                  ds64.shmx_atime = sp->shm_atime;
 709  712                  ds64.shmx_dtime = sp->shm_dtime;
 710  713                  ds64.shmx_ctime = sp->shm_ctime;
 711  714  
 712  715                  mutex_exit(lock);
 713  716                  if (copyout(&ds64, arg, sizeof (struct shmid_ds64)))
 714  717                          return (EFAULT);
 715  718  
 716  719                  return (0);
 717  720  
 718  721          /* Lock segment in memory */
 719  722          case SHM_LOCK:
 720  723                  if ((error = secpolicy_lock_memory(cr)) != 0)
 721  724                          break;
 722  725  
 723  726                  /* protect against overflow */
 724  727                  if (sp->shm_lkcnt >= USHRT_MAX) {
 725  728                          error = ENOMEM;
 726  729                          break;
 727  730                  }
 728  731                  if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) {
 729  732                          if (error = shmem_lock(sp, sp->shm_amp)) {
 730  733                                  ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock,
 731  734                                      RW_WRITER);
 732  735                                  cmn_err(CE_NOTE, "shmctl - couldn't lock %ld"
 733  736                                      " pages into memory", sp->shm_amp->size);
 734  737                                  ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
 735  738                                  error = ENOMEM;
 736  739                                  sp->shm_lkcnt--;
 737  740                          }
 738  741                  }
 739  742                  break;
 740  743  
  
    | 
      ↓ open down ↓ | 
    264 lines elided | 
    
      ↑ open up ↑ | 
  
 741  744          /* Unlock segment */
 742  745          case SHM_UNLOCK:
 743  746                  if ((error = secpolicy_lock_memory(cr)) != 0)
 744  747                          break;
 745  748  
 746  749                  if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
 747  750                          shmem_unlock(sp, sp->shm_amp);
 748  751                  }
 749  752                  break;
 750  753  
      754 +        /* Stage segment for removal, but don't remove until last detach */
      755 +        case SHM_RMID:
      756 +                if ((error = secpolicy_ipc_owner(cr, (kipc_perm_t *)sp)) != 0)
      757 +                        break;
      758 +
      759 +                /*
      760 +                 * If attached, just mark it as a pending remove, otherwise
      761 +                 * we must perform the normal ipc_rmid now.
      762 +                 */
      763 +                if ((sp->shm_perm.ipc_ref - 1) > 0) {
      764 +                        sp->shm_opts |= SHM_RM_PENDING;
      765 +                } else {
      766 +                        mutex_exit(lock);
      767 +                        return (ipc_rmid(shm_svc, shmid, cr));
      768 +                }
      769 +                break;
      770 +
 751  771          default:
 752  772                  error = EINVAL;
 753  773                  break;
 754  774          }
 755  775          mutex_exit(lock);
 756  776          return (error);
 757  777  }
 758  778  
 759  779  static void
 760  780  shm_detach(proc_t *pp, segacct_t *sap)
 761  781  {
 762  782          kshmid_t        *sp = sap->sa_id;
 763  783          size_t          len = sap->sa_len;
 764  784          caddr_t         addr = sap->sa_addr;
 765  785  
 766  786          /*
 767  787           * Discard lwpchan mappings.
 768  788           */
 769  789          if (pp->p_lcp != NULL)
 770  790                  lwpchan_delete_mapping(pp, addr, addr + len);
  
    | 
      ↓ open down ↓ | 
    10 lines elided | 
    
      ↑ open up ↑ | 
  
 771  791          (void) as_unmap(pp->p_as, addr, len);
 772  792  
 773  793          /*
 774  794           * Perform some detach-time accounting.
 775  795           */
 776  796          (void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
 777  797          if (sap->sa_flags & SHMSA_ISM)
 778  798                  sp->shm_ismattch--;
 779  799          sp->shm_dtime = gethrestime_sec();
 780  800          sp->shm_lpid = pp->p_pid;
      801 +        if ((sp->shm_opts & SHM_RM_PENDING) != 0 &&
      802 +            sp->shm_perm.ipc_ref == 2) {
      803 +                /*
      804 +                 * If this is the last detach of the segment across the whole
      805 +                 * system then now we can perform the delayed IPC_RMID.
      806 +                 * The ipc_ref count has 1 for the original 'get' and one for
      807 +                 * each 'attach' (see 'stat' handling in shmctl).
      808 +                 */
      809 +                sp->shm_opts &= ~SHM_RM_PENDING;
      810 +                mutex_enter(&shm_svc->ipcs_lock);
      811 +                ipc_rmsvc(shm_svc, (kipc_perm_t *)sp);  /* Drops lock */
      812 +                ASSERT(!MUTEX_HELD(&shm_svc->ipcs_lock));
      813 +                ASSERT(((kipc_perm_t *)sp)->ipc_ref > 0);
      814 +
      815 +                /* Lock was dropped, need to retake it for following rele. */
      816 +                (void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
      817 +        }
 781  818          ipc_rele(shm_svc, (kipc_perm_t *)sp);   /* Drops lock */
 782  819  
 783  820          kmem_free(sap, sizeof (segacct_t));
 784  821  }
 785  822  
 786  823  static int
 787  824  shmdt(caddr_t addr)
 788  825  {
 789  826          proc_t *pp = curproc;
 790  827          segacct_t *sap, template;
 791  828  
 792  829          mutex_enter(&pp->p_lock);
 793  830          prbarrier(pp);                  /* block /proc.  See shmgetid(). */
 794  831  
 795  832          template.sa_addr = addr;
 796  833          template.sa_len = 0;
 797  834          if ((pp->p_segacct == NULL) ||
 798  835              ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)) {
 799  836                  mutex_exit(&pp->p_lock);
 800  837                  return (EINVAL);
 801  838          }
 802  839          if (sap->sa_addr != addr) {
 803  840                  mutex_exit(&pp->p_lock);
 804  841                  return (EINVAL);
 805  842          }
 806  843          avl_remove(pp->p_segacct, sap);
 807  844          mutex_exit(&pp->p_lock);
 808  845  
 809  846          shm_detach(pp, sap);
 810  847  
 811  848          return (0);
 812  849  }
 813  850  
 814  851  /*
 815  852   * Remove all shared memory segments associated with a given zone.
 816  853   * Called by zone_shutdown when the zone is halted.
 817  854   */
 818  855  /*ARGSUSED1*/
 819  856  static void
 820  857  shm_remove_zone(zoneid_t zoneid, void *arg)
 821  858  {
 822  859          ipc_remove_zone(shm_svc, zoneid);
 823  860  }
 824  861  
 825  862  /*
 826  863   * Shmget (create new shmem) system call.
 827  864   */
 828  865  static int
 829  866  shmget(key_t key, size_t size, int shmflg, uintptr_t *rvp)
 830  867  {
 831  868          proc_t          *pp = curproc;
 832  869          kshmid_t        *sp;
 833  870          kmutex_t        *lock;
 834  871          int             error;
 835  872  
 836  873  top:
 837  874          if (error = ipc_get(shm_svc, key, shmflg, (kipc_perm_t **)&sp, &lock))
 838  875                  return (error);
 839  876  
 840  877          if (!IPC_FREE(&sp->shm_perm)) {
 841  878                  /*
 842  879                   * A segment with the requested key exists.
 843  880                   */
 844  881                  if (size > sp->shm_segsz) {
 845  882                          mutex_exit(lock);
 846  883                          return (EINVAL);
 847  884                  }
 848  885          } else {
 849  886                  /*
 850  887                   * A new segment should be created.
 851  888                   */
 852  889                  size_t npages = btopr(size);
 853  890                  size_t rsize = ptob(npages);
 854  891  
 855  892                  /*
 856  893                   * Check rsize and the per-project and per-zone limit on
 857  894                   * shared memory.  Checking rsize handles both the size == 0
 858  895                   * case and the size < ULONG_MAX & PAGEMASK case (i.e.
 859  896                   * rounding up wraps a size_t).
 860  897                   */
 861  898                  if (rsize == 0 ||
 862  899                      (rctl_test(rc_project_shmmax,
 863  900                      pp->p_task->tk_proj->kpj_rctls, pp, rsize,
 864  901                      RCA_SAFE) & RCT_DENY) ||
 865  902                      (rctl_test(rc_zone_shmmax,
 866  903                      pp->p_zone->zone_rctls, pp, rsize,
 867  904                      RCA_SAFE) & RCT_DENY)) {
 868  905  
 869  906                          mutex_exit(&pp->p_lock);
 870  907                          mutex_exit(lock);
 871  908                          ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
 872  909                          return (EINVAL);
 873  910                  }
 874  911                  mutex_exit(&pp->p_lock);
 875  912                  mutex_exit(lock);
 876  913  
 877  914                  if (anon_resv(rsize) == 0) {
 878  915                          ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
 879  916                          return (ENOMEM);
 880  917                  }
 881  918  
 882  919                  /*
 883  920                   * If any new failure points are introduced between the
 884  921                   * the above anon_resv() and the below ipc_commit_begin(),
 885  922                   * these failure points will need to unreserve the anon
 886  923                   * reserved using anon_unresv().
 887  924                   *
 888  925                   * Once ipc_commit_begin() is called, the anon reserved
 889  926                   * above will be automatically unreserved by future calls to
 890  927                   * ipcs_cleanup() -> shm_dtor() -> shm_rm_amp().  If
 891  928                   * ipc_commit_begin() fails, it internally calls shm_dtor(),
 892  929                   * unreserving the above anon, and freeing the below amp.
 893  930                   */
 894  931  
 895  932                  sp->shm_amp = anonmap_alloc(rsize, rsize, ANON_SLEEP);
 896  933                  sp->shm_amp->a_sp = sp;
 897  934                  /*
 898  935                   * Store the original user's requested size, in bytes,
 899  936                   * rather than the page-aligned size.  The former is
 900  937                   * used for IPC_STAT and shmget() lookups.  The latter
 901  938                   * is saved in the anon_map structure and is used for
 902  939                   * calls to the vm layer.
 903  940                   */
 904  941                  sp->shm_segsz = size;
 905  942                  sp->shm_atime = sp->shm_dtime = 0;
 906  943                  sp->shm_ctime = gethrestime_sec();
 907  944                  sp->shm_lpid = (pid_t)0;
 908  945                  sp->shm_cpid = curproc->p_pid;
 909  946                  sp->shm_ismattch = 0;
 910  947                  sp->shm_sptinfo = NULL;
 911  948                  /*
 912  949                   * Check limits one last time, push id into global
 913  950                   * visibility, and update resource usage counts.
 914  951                   */
 915  952                  if (error = ipc_commit_begin(shm_svc, key, shmflg,
 916  953                      (kipc_perm_t *)sp)) {
 917  954                          if (error == EAGAIN)
 918  955                                  goto top;
 919  956                          return (error);
 920  957                  }
 921  958  
 922  959                  if ((rctl_test(rc_project_shmmax,
 923  960                      sp->shm_perm.ipc_proj->kpj_rctls, pp, rsize,
 924  961                      RCA_SAFE) & RCT_DENY) ||
 925  962                      (rctl_test(rc_zone_shmmax,
 926  963                      sp->shm_perm.ipc_zone_ref.zref_zone->zone_rctls, pp, rsize,
 927  964                      RCA_SAFE) & RCT_DENY)) {
 928  965                          ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
 929  966                          return (EINVAL);
 930  967                  }
 931  968                  sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax += rsize;
 932  969                  sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax += rsize;
 933  970  
 934  971                  lock = ipc_commit_end(shm_svc, &sp->shm_perm);
 935  972          }
 936  973  
 937  974          if (AU_AUDITING())
 938  975                  audit_ipcget(AT_IPC_SHM, (void *)sp);
 939  976  
 940  977          *rvp = (uintptr_t)(sp->shm_perm.ipc_id);
 941  978  
 942  979          mutex_exit(lock);
 943  980          return (0);
 944  981  }
 945  982  
 946  983  /*
 947  984   * shmids system call.
 948  985   */
 949  986  static int
 950  987  shmids(int *buf, uint_t nids, uint_t *pnids)
 951  988  {
 952  989          return (ipc_ids(shm_svc, buf, nids, pnids));
 953  990  }
 954  991  
 955  992  /*
 956  993   * System entry point for shmat, shmctl, shmdt, and shmget system calls.
 957  994   */
 958  995  static uintptr_t
 959  996  shmsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2)
 960  997  {
 961  998          int     error;
 962  999          uintptr_t r_val = 0;
 963 1000  
 964 1001          switch (opcode) {
 965 1002          case SHMAT:
 966 1003                  error = shmat((int)a0, (caddr_t)a1, (int)a2, &r_val);
 967 1004                  break;
 968 1005          case SHMCTL:
 969 1006                  error = shmctl((int)a0, (int)a1, (void *)a2);
 970 1007                  break;
 971 1008          case SHMDT:
 972 1009                  error = shmdt((caddr_t)a0);
 973 1010                  break;
 974 1011          case SHMGET:
 975 1012                  error = shmget((key_t)a0, (size_t)a1, (int)a2, &r_val);
 976 1013                  break;
 977 1014          case SHMIDS:
 978 1015                  error = shmids((int *)a0, (uint_t)a1, (uint_t *)a2);
 979 1016                  break;
 980 1017          default:
 981 1018                  error = EINVAL;
 982 1019                  break;
 983 1020          }
 984 1021  
 985 1022          if (error)
 986 1023                  return ((uintptr_t)set_errno(error));
 987 1024  
 988 1025          return (r_val);
 989 1026  }
 990 1027  
 991 1028  /*
 992 1029   * segacct_t comparator
 993 1030   * This works as expected, with one minor change: the first of two real
 994 1031   * segments with equal addresses is considered to be 'greater than' the
 995 1032   * second.  We only return equal when searching using a template, in
 996 1033   * which case we explicitly set the template segment's length to 0
 997 1034   * (which is invalid for a real segment).
 998 1035   */
 999 1036  static int
1000 1037  shm_sacompar(const void *x, const void *y)
1001 1038  {
1002 1039          segacct_t *sa1 = (segacct_t *)x;
1003 1040          segacct_t *sa2 = (segacct_t *)y;
1004 1041  
1005 1042          if (sa1->sa_addr < sa2->sa_addr) {
1006 1043                  return (-1);
1007 1044          } else if (sa2->sa_len != 0) {
1008 1045                  if (sa1->sa_addr >= sa2->sa_addr + sa2->sa_len) {
1009 1046                          return (1);
1010 1047                  } else if (sa1->sa_len != 0) {
1011 1048                          return (1);
1012 1049                  } else {
1013 1050                          return (0);
1014 1051                  }
1015 1052          } else if (sa1->sa_addr > sa2->sa_addr) {
1016 1053                  return (1);
1017 1054          } else {
1018 1055                  return (0);
1019 1056          }
1020 1057  }
1021 1058  
1022 1059  /*
1023 1060   * add this record to the segacct list.
1024 1061   */
1025 1062  static void
1026 1063  sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags, kshmid_t *id)
1027 1064  {
1028 1065          segacct_t *nsap;
1029 1066          avl_tree_t *tree = NULL;
1030 1067          avl_index_t where;
1031 1068  
1032 1069          nsap = kmem_alloc(sizeof (segacct_t), KM_SLEEP);
1033 1070          nsap->sa_addr = addr;
1034 1071          nsap->sa_len  = len;
1035 1072          nsap->sa_flags = flags;
1036 1073          nsap->sa_id = id;
1037 1074  
1038 1075          if (pp->p_segacct == NULL)
1039 1076                  tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1040 1077  
1041 1078          mutex_enter(&pp->p_lock);
1042 1079          prbarrier(pp);                  /* block /proc.  See shmgetid(). */
1043 1080  
1044 1081          if (pp->p_segacct == NULL) {
1045 1082                  avl_create(tree, shm_sacompar, sizeof (segacct_t),
1046 1083                      offsetof(segacct_t, sa_tree));
1047 1084                  pp->p_segacct = tree;
1048 1085          } else if (tree) {
1049 1086                  kmem_free(tree, sizeof (avl_tree_t));
1050 1087          }
1051 1088  
1052 1089          /*
1053 1090           * We can ignore the result of avl_find, as the comparator will
1054 1091           * never return equal for segments with non-zero length.  This
1055 1092           * is a necessary hack to get around the fact that we do, in
1056 1093           * fact, have duplicate keys.
1057 1094           */
1058 1095          (void) avl_find(pp->p_segacct, nsap, &where);
1059 1096          avl_insert(pp->p_segacct, nsap, where);
1060 1097  
1061 1098          mutex_exit(&pp->p_lock);
1062 1099  }
1063 1100  
1064 1101  /*
1065 1102   * Duplicate parent's segacct records in child.
1066 1103   */
1067 1104  void
1068 1105  shmfork(struct proc *ppp, struct proc *cpp)
1069 1106  {
1070 1107          segacct_t *sap;
1071 1108          kshmid_t *sp;
1072 1109          kmutex_t *mp;
1073 1110  
1074 1111          ASSERT(ppp->p_segacct != NULL);
1075 1112  
1076 1113          /*
1077 1114           * We are the only lwp running in the parent so nobody can
1078 1115           * mess with our p_segacct list.  Thus it is safe to traverse
1079 1116           * the list without holding p_lock.  This is essential because
1080 1117           * we can't hold p_lock during a KM_SLEEP allocation.
1081 1118           */
1082 1119          for (sap = (segacct_t *)avl_first(ppp->p_segacct); sap != NULL;
1083 1120              sap = (segacct_t *)AVL_NEXT(ppp->p_segacct, sap)) {
1084 1121                  sa_add(cpp, sap->sa_addr, sap->sa_len, sap->sa_flags,
1085 1122                      sap->sa_id);
1086 1123                  sp = sap->sa_id;
1087 1124                  mp = ipc_lock(shm_svc, sp->shm_perm.ipc_id);
1088 1125                  if (sap->sa_flags & SHMSA_ISM)
1089 1126                          sp->shm_ismattch++;
1090 1127                  ipc_hold(shm_svc, (kipc_perm_t *)sp);
1091 1128                  mutex_exit(mp);
1092 1129          }
1093 1130  }
1094 1131  
1095 1132  /*
1096 1133   * Detach shared memory segments from exiting process.
1097 1134   */
1098 1135  void
1099 1136  shmexit(struct proc *pp)
1100 1137  {
1101 1138          segacct_t *sap;
1102 1139          avl_tree_t *tree;
1103 1140          void *cookie = NULL;
1104 1141  
1105 1142          ASSERT(pp->p_segacct != NULL);
1106 1143  
1107 1144          mutex_enter(&pp->p_lock);
1108 1145          prbarrier(pp);
1109 1146          tree = pp->p_segacct;
1110 1147          pp->p_segacct = NULL;
1111 1148          mutex_exit(&pp->p_lock);
1112 1149  
1113 1150          while ((sap = avl_destroy_nodes(tree, &cookie)) != NULL)
1114 1151                  (void) shm_detach(pp, sap);
1115 1152  
1116 1153          avl_destroy(tree);
1117 1154          kmem_free(tree, sizeof (avl_tree_t));
1118 1155  }
1119 1156  
1120 1157  /*
1121 1158   * At this time pages should be in memory, so just lock them.
1122 1159   */
1123 1160  static void
1124 1161  lock_again(size_t npages, kshmid_t *sp, struct anon_map *amp)
1125 1162  {
1126 1163          struct anon *ap;
1127 1164          struct page *pp;
1128 1165          struct vnode *vp;
1129 1166          u_offset_t off;
1130 1167          ulong_t anon_idx;
1131 1168          anon_sync_obj_t cookie;
1132 1169  
1133 1170          mutex_enter(&sp->shm_mlock);
1134 1171          ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
1135 1172          for (anon_idx = 0; npages != 0; anon_idx++, npages--) {
1136 1173  
1137 1174                  anon_array_enter(amp, anon_idx, &cookie);
1138 1175                  ap = anon_get_ptr(amp->ahp, anon_idx);
1139 1176                  ASSERT(ap != NULL);
1140 1177                  swap_xlate(ap, &vp, &off);
1141 1178                  anon_array_exit(&cookie);
1142 1179  
1143 1180                  pp = page_lookup(vp, off, SE_SHARED);
1144 1181                  if (pp == NULL) {
1145 1182                          panic("lock_again: page not in the system");
1146 1183                          /*NOTREACHED*/
1147 1184                  }
1148 1185                  /* page should already be locked by caller */
1149 1186                  ASSERT(pp->p_lckcnt > 0);
1150 1187                  (void) page_pp_lock(pp, 0, 0);
1151 1188                  page_unlock(pp);
1152 1189          }
1153 1190          ANON_LOCK_EXIT(&->a_rwlock);
1154 1191          mutex_exit(&sp->shm_mlock);
1155 1192  }
1156 1193  
1157 1194  /*
1158 1195   * Attach the shared memory segment to the process
1159 1196   * address space and lock the pages.
1160 1197   */
1161 1198  static int
1162 1199  shmem_lock(kshmid_t *sp, struct anon_map *amp)
1163 1200  {
1164 1201          size_t npages = btopr(amp->size);
1165 1202          struct as *as;
1166 1203          struct segvn_crargs crargs;
1167 1204          uint_t error;
1168 1205  
1169 1206          /*
1170 1207           * A later ISM/DISM attach may increase the size of the amp, so
1171 1208           * cache the number of pages locked for the future shmem_unlock()
1172 1209           */
1173 1210          sp->shm_lkpages = npages;
1174 1211  
1175 1212          as = as_alloc();
1176 1213          /* Initialize the create arguments and map the segment */
1177 1214          crargs = *(struct segvn_crargs *)zfod_argsp;    /* structure copy */
1178 1215          crargs.offset = (u_offset_t)0;
1179 1216          crargs.type = MAP_SHARED;
1180 1217          crargs.amp = amp;
1181 1218          crargs.prot = PROT_ALL;
1182 1219          crargs.maxprot = crargs.prot;
1183 1220          crargs.flags = 0;
1184 1221          error = as_map(as, 0x0, amp->size, segvn_create, &crargs);
1185 1222          if (!error) {
1186 1223                  if ((error = as_ctl(as, 0x0, amp->size, MC_LOCK, 0, 0,
1187 1224                      NULL, 0)) == 0) {
1188 1225                          lock_again(npages, sp, amp);
1189 1226                  }
1190 1227                  (void) as_unmap(as, 0x0, amp->size);
1191 1228          }
1192 1229          as_free(as);
1193 1230          return (error);
1194 1231  }
1195 1232  
1196 1233  
1197 1234  /*
1198 1235   * Unlock shared memory
1199 1236   */
1200 1237  static void
1201 1238  shmem_unlock(kshmid_t *sp, struct anon_map *amp)
1202 1239  {
1203 1240          struct anon *ap;
1204 1241          pgcnt_t npages = sp->shm_lkpages;
1205 1242          struct vnode *vp;
1206 1243          struct page *pp;
1207 1244          u_offset_t off;
1208 1245          ulong_t anon_idx;
1209 1246          size_t unlocked_bytes = 0;
1210 1247          kproject_t      *proj;
1211 1248          anon_sync_obj_t cookie;
1212 1249  
1213 1250          proj = sp->shm_perm.ipc_proj;
1214 1251          mutex_enter(&sp->shm_mlock);
1215 1252          ANON_LOCK_ENTER(&->a_rwlock, RW_READER);
1216 1253          for (anon_idx = 0; anon_idx < npages; anon_idx++) {
1217 1254  
1218 1255                  anon_array_enter(amp, anon_idx, &cookie);
1219 1256                  if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
1220 1257                          panic("shmem_unlock: null app");
1221 1258                          /*NOTREACHED*/
1222 1259                  }
1223 1260                  swap_xlate(ap, &vp, &off);
1224 1261                  anon_array_exit(&cookie);
1225 1262                  pp = page_lookup(vp, off, SE_SHARED);
1226 1263                  if (pp == NULL) {
1227 1264                          panic("shmem_unlock: page not in the system");
1228 1265                          /*NOTREACHED*/
1229 1266                  }
1230 1267                  /*
1231 1268                   * Page should at least have once lock from previous
1232 1269                   * shmem_lock
1233 1270                   */
1234 1271                  ASSERT(pp->p_lckcnt > 0);
1235 1272                  page_pp_unlock(pp, 0, 0);
1236 1273                  if (pp->p_lckcnt == 0)
1237 1274                          unlocked_bytes += PAGESIZE;
1238 1275  
1239 1276                  page_unlock(pp);
1240 1277          }
1241 1278  
1242 1279          if (unlocked_bytes > 0) {
1243 1280                  rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
1244 1281          }
1245 1282  
1246 1283          ANON_LOCK_EXIT(&->a_rwlock);
1247 1284          mutex_exit(&sp->shm_mlock);
1248 1285  }
1249 1286  
1250 1287  /*
1251 1288   * We call this routine when we have removed all references to this
1252 1289   * amp.  This means all shmdt()s and the IPC_RMID have been done.
1253 1290   */
1254 1291  static void
1255 1292  shm_rm_amp(kshmid_t *sp)
1256 1293  {
1257 1294          struct anon_map *amp = sp->shm_amp;
1258 1295          zone_t *zone;
1259 1296  
1260 1297          zone = sp->shm_perm.ipc_zone_ref.zref_zone;
1261 1298          ASSERT(zone != NULL);
1262 1299          /*
1263 1300           * Free up the anon_map.
1264 1301           */
1265 1302          lgrp_shm_policy_fini(amp, NULL);
1266 1303          ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
1267 1304          if (amp->a_szc != 0) {
1268 1305                  anon_shmap_free_pages(amp, 0, amp->size);
1269 1306          } else {
1270 1307                  anon_free(amp->ahp, 0, amp->size);
1271 1308          }
1272 1309          ANON_LOCK_EXIT(&->a_rwlock);
1273 1310          anon_unresv_zone(amp->swresv, zone);
1274 1311          anonmap_free(amp);
1275 1312  }
1276 1313  
1277 1314  /*
1278 1315   * Return the shared memory id for the process's virtual address.
1279 1316   * Return SHMID_NONE if addr is not within a SysV shared memory segment.
1280 1317   * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
1281 1318   *
1282 1319   * shmgetid() is called from code in /proc with the process locked but
1283 1320   * with pp->p_lock not held.  The address space lock is held, so we
1284 1321   * cannot grab pp->p_lock here due to lock-ordering constraints.
1285 1322   * Because of all this, modifications to the p_segacct list must only
1286 1323   * be made after calling prbarrier() to ensure the process is not locked.
1287 1324   * See shmdt() and sa_add(), above. shmgetid() may also be called on a
1288 1325   * thread's own process without the process locked.
1289 1326   */
1290 1327  int
1291 1328  shmgetid(proc_t *pp, caddr_t addr)
1292 1329  {
1293 1330          segacct_t *sap, template;
1294 1331  
1295 1332          ASSERT(MUTEX_NOT_HELD(&pp->p_lock));
1296 1333          ASSERT((pp->p_proc_flag & P_PR_LOCK) || pp == curproc);
1297 1334  
1298 1335          if (pp->p_segacct == NULL)
1299 1336                  return (SHMID_NONE);
1300 1337  
1301 1338          template.sa_addr = addr;
1302 1339          template.sa_len = 0;
1303 1340          if ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)
1304 1341                  return (SHMID_NONE);
1305 1342  
1306 1343          if (IPC_FREE(&sap->sa_id->shm_perm))
1307 1344                  return (SHMID_FREE);
1308 1345  
1309 1346          return (sap->sa_id->shm_perm.ipc_id);
1310 1347  }
  
    | 
      ↓ open down ↓ | 
    520 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX