Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/ipc.c
          +++ new/usr/src/uts/common/os/ipc.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2016 Joyent, Inc.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T         */
  27   27  /*      All Rights Reserved                                     */
  28   28  
  29   29  
  30   30  /*
  31   31   * Common Inter-Process Communication routines.
  32   32   *
  33   33   * Overview
  34   34   * --------
  35   35   *
  36   36   * The System V inter-process communication (IPC) facilities provide
  37   37   * three services, message queues, semaphore arrays, and shared memory
  38   38   * segments, which are mananged using filesystem-like namespaces.
  39   39   * Unlike a filesystem, these namespaces aren't mounted and accessible
  40   40   * via a path -- a special API is used to interact with the different
  41   41   * facilities (nothing precludes a VFS-based interface, but the
  42   42   * standards require the special APIs).  Furthermore, these special
  43   43   * APIs don't use file descriptors, nor do they have an equivalent.
  44   44   * This means that every operation which acts on an object needs to
  45   45   * perform the quivalent of a lookup, which in turn means that every
  46   46   * operation can fail if the specified object doesn't exist in the
  47   47   * facility's namespace.
  48   48   *
  49   49   * Objects
  50   50   * -------
  51   51   *
  52   52   * Each object in a namespace has a unique ID, which is assigned by the
  53   53   * system and is used to identify the object when performing operations
  54   54   * on it.  An object can also have a key, which is selected by the user
  55   55   * at allocation time and is used as a primitive rendezvous mechanism.
  56   56   * An object without a key is said to have a "private" key.
  57   57   *
  58   58   * To perform an operation on an object given its key, one must first
  59   59   * perform a lookup and obtain its ID.  The ID is then used to identify
  60   60   * the object when performing the operation.  If the object has a
  61   61   * private key, the ID must be known or obtained by other means.
  62   62   *
  63   63   * Each object in the namespace has a creator uid and gid, as well as
  64   64   * an owner uid and gid.  Both are initialized with the ruid and rgid
  65   65   * of the process which created the object.  The creator or current
  66   66   * owner has the ability to change the owner of the object.
  67   67   *
  68   68   * Each object in the namespace has a set of file-like permissions,
  69   69   * which, in conjunction with the creator and owner uid and gid,
  70   70   * control read and write access to the object (execute is ignored).
  71   71   *
  72   72   * Each object also has a creator project and zone, which are used to
  73   73   * account for its resource usage.
  74   74   *
  75   75   * Operations
  76   76   * ----------
  77   77   *
  78   78   * There are five operations which all three facilities have in
  79   79   * common: GET, SET, STAT, RMID, and IDS.
  80   80   *
  81   81   * GET, like open, is used to allocate a new object or obtain an
  82   82   * existing one (using its key).  It takes a key, a set of flags and
  83   83   * mode bits, and optionally facility-specific arguments.  If the key
  84   84   * is IPC_PRIVATE, a new object with the requested mode bits and
  85   85   * facility-specific attributes is created.  If the key isn't
  86   86   * IPC_PRIVATE, the GET will attempt to look up the specified key and
  87   87   * either return that or create a new key depending on the state of the
  88   88   * IPC_CREAT and IPC_EXCL flags, much like open.  If GET needs to
  89   89   * allocate an object, it can fail if there is insufficient space in
  90   90   * the namespace (the maximum number of ids for the facility has been
  91   91   * exceeded) or if the facility-specific initialization fails.  If GET
  92   92   * finds an object it can return, it can still fail if that object's
  93   93   * permissions or facility-specific attributes are less than those
  94   94   * requested.
  95   95   *
  96   96   * SET is used to adjust facility-specific parameters of an object, in
  97   97   * addition to the owner uid and gid, and mode bits.  It can fail if
  98   98   * the caller isn't the creator or owner.
  99   99   *
 100  100   * STAT is used to obtain information about an object including the
 101  101   * general attributes object described as well as facility-specific
 102  102   * information.  It can fail if the caller doesn't have read
 103  103   * permission.
 104  104   *
 105  105   * RMID removes an object from the namespace.  Subsequent operations
 106  106   * using the object's ID or key will fail (until another object is
 107  107   * created with the same key or ID).  Since an RMID may be performed
 108  108   * asynchronously with other operations, it is possible that other
 109  109   * threads and/or processes will have references to the object.  While
 110  110   * a facility may have actions which need to be performed at RMID time,
 111  111   * only when all references are dropped can the object be destroyed.
 112  112   * RMID will fail if the caller isn't the creator or owner.
 113  113   *
 114  114   * IDS obtains a list of all IDs in a facility's namespace.  There are
 115  115   * no facility-specific behaviors of IDS.
 116  116   *
 117  117   * Design
 118  118   * ------
 119  119   *
 120  120   * Because some IPC facilities provide services whose operations must
 121  121   * scale, a mechanism which allows fast, concurrent access to
 122  122   * individual objects is needed.  Of primary importance is object
 123  123   * lookup based on ID (SET, STAT, others).  Allocation (GET),
 124  124   * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
 125  125   * lesser concerns, but should be implemented in such a way that ID
 126  126   * lookup isn't affected (at least not in the common case).
 127  127   *
 128  128   * Starting from the bottom up, each object is represented by a
 129  129   * structure, the first member of which must be a kipc_perm_t.  The
 130  130   * kipc_perm_t contains the information described above in "Objects", a
 131  131   * reference count (since the object may continue to exist after it has
 132  132   * been removed from the namespace), as well as some additional
 133  133   * metadata used to manage data structure membership.  These objects
 134  134   * are dynamically allocated.
 135  135   *
 136  136   * Above the objects is a power-of-two sized table of ID slots.  Each
 137  137   * slot contains a pointer to an object, a sequence number, and a
 138  138   * lock.  An object's ID is a function of its slot's index in the table
 139  139   * and its slot's sequence number.  Every time a slot is released (via
 140  140   * RMID) its sequence number is increased.  Strictly speaking, the
 141  141   * sequence number is unnecessary.  However, checking the sequence
 142  142   * number after a lookup provides a certain degree of robustness
 143  143   * against the use of stale IDs (useful since nothing else does).  When
 144  144   * the table fills up, it is resized (see Locking, below).
 145  145   *
 146  146   * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
 147  147   * int) the top IPC_SEQ_BITS are used for the sequence number with the
 148  148   * remainder holding the index into the table.  The size of the table
 149  149   * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
 150  150   *
 151  151   * Managing this table is the ipc_service structure.  It contains a
 152  152   * pointer to the dynamically allocated ID table, a namespace-global
 153  153   * lock, an id_space for managing the free space in the table, and
 154  154   * sundry other metadata necessary for the maintenance of the
 155  155   * namespace.  An AVL tree of all keyed objects in the table (sorted by
 156  156   * key) is used for key lookups.  An unordered doubly linked list of
 157  157   * all objects in the namespace (keyed or not) is maintained to
 158  158   * facilitate ID enumeration.
 159  159   *
 160  160   * To help visualize these relationships, here's a picture of a
 161  161   * namespace with a table of size 8 containing three objects
 162  162   * (IPC_SEQ_BITS = 28):
 163  163   *
 164  164   *
 165  165   * +-ipc_service_t--+
 166  166   * | table          *---\
 167  167   * | keys           *---+----------------------\
 168  168   * | all ids        *--\|                      |
 169  169   * |                |  ||                      |
 170  170   * +----------------+  ||                      |
 171  171   *                     ||                      |
 172  172   * /-------------------/|                      |
 173  173   * |    /---------------/                      |
 174  174   * |    |                                      |
 175  175   * |    v                                      |
 176  176   * |  +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
 177  177   * |  | Seq=3  |        |        | Seq=1  |    :   |        |        | Seq=6  |
 178  178   * |  |        |        |        |        |    :   |        |        |        |
 179  179   * |  +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
 180  180   * |    |                          |           |                       |
 181  181   * |    |                      /---/           |      /----------------/
 182  182   * |    |                      |               |      |
 183  183   * |    v                      v               |      v
 184  184   * |  +-kipc_perm_t-+        +-kipc_perm_t-+   |    +-kipc_perm_t-+
 185  185   * |  | id=0x30     |        | id=0x13     |   |    | id=0x67     |
 186  186   * |  | key=0xfeed  |        | key=0xbeef  |   |    | key=0xcafe  |
 187  187   * \->| [list]      |<------>| [list]      |<------>| [list]      |
 188  188   * /->| [avl left]  x   /--->| [avl left]  x   \--->| [avl left]  *---\
 189  189   * |  | [avl right] x   |    | [avl right] x        | [avl right] *---+-\
 190  190   * |  |             |   |    |             |        |             |   | |
 191  191   * |  +-------------+   |    +-------------+        +-------------+   | |
 192  192   * |                    \---------------------------------------------/ |
 193  193   * \--------------------------------------------------------------------/
 194  194   *
 195  195   * Locking
 196  196   * -------
 197  197   *
 198  198   * There are three locks (or sets of locks) which are used to ensure
 199  199   * correctness: the slot locks, the namespace lock, and p_lock (needed
 200  200   * when checking resource controls).  Their ordering is
 201  201   *
 202  202   *   namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
 203  203   *
 204  204   * Generally speaking, the namespace lock is used to protect allocation
 205  205   * and removal from the namespace, ID enumeration, and resizing the ID
 206  206   * table.  Specifically:
 207  207   *
 208  208   * - write access to all fields of the ipc_service structure
 209  209   * - read access to all variable fields of ipc_service except
 210  210   *   ipcs_tabsz (table size) and ipcs_table (the table pointer)
 211  211   * - read/write access to ipc_avl, ipc_list in visible objects'
 212  212   *   kipc_perm structures (i.e. objects which have been removed from
 213  213   *   the namespace don't have this restriction)
 214  214   * - write access to ipct_seq and ipct_data in the table entries
 215  215   *
 216  216   * A slot lock by itself is meaningless (except when resizing).  Of
 217  217   * greater interest conceptually is the notion of an ID lock -- a
 218  218   * "virtual lock" which refers to whichever slot lock an object's ID
 219  219   * currently hashes to.
 220  220   *
 221  221   * An ID lock protects all objects with that ID.  Normally there will
 222  222   * only be one such object: the one pointed to by the locked slot.
 223  223   * However, if an object is removed from the namespace but retains
 224  224   * references (e.g. an attached shared memory segment which has been
 225  225   * RMIDed), it continues to use the lock associated with its original
 226  226   * ID.  While this can result in increased contention, operations which
 227  227   * require taking the ID lock of removed objects are infrequent.
 228  228   *
 229  229   * Specifically, an ID lock protects the contents of an object's
 230  230   * structure, including the contents of the embedded kipc_perm
 231  231   * structure (but excluding those fields protected by the namespace
 232  232   * lock).  It also protects the ipct_seq and ipct_data fields in its
 233  233   * slot (it is really a slot lock, after all).
 234  234   *
 235  235   * Recall that the table is resizable.  To avoid requiring every ID
 236  236   * lookup to take a global lock, a scheme much like that employed for
 237  237   * file descriptors (see the comment above UF_ENTER in user.h) is
 238  238   * used.  Note that the sequence number and data pointer are protected
 239  239   * by both the namespace lock and their slot lock.  When the table is
 240  240   * resized, the following operations take place:
 241  241   *
 242  242   *   1) A new table is allocated.
 243  243   *   2) The global lock is taken.
 244  244   *   3) All old slots are locked, in order.
 245  245   *   4) The first half of the new slots are locked.
 246  246   *   5) All table entries are copied to the new table, and cleared from
 247  247   *      the old table.
 248  248   *   6) The ipc_service structure is updated to point to the new table.
 249  249   *   7) The ipc_service structure is updated with the new table size.
 250  250   *   8) All slot locks (old and new) are dropped.
 251  251   *
 252  252   * Because the slot locks are embedded in the table, ID lookups and
 253  253   * other operations which require taking an slot lock need to verify
 254  254   * that the lock taken wasn't part of a stale table.  This is
 255  255   * accomplished by checking the table size before and after
 256  256   * dereferencing the table pointer and taking the lock: if the size
 257  257   * changes, the lock must be dropped and reacquired.  It is this
 258  258   * additional work which distinguishes an ID lock from a slot lock.
 259  259   *
 260  260   * Because we can't guarantee that threads aren't accessing the old
 261  261   * tables' locks, they are never deallocated.  To prevent spurious
 262  262   * reports of memory leaks, a pointer to the discarded table is stored
 263  263   * in the new one in step 5.  (Theoretically ipcs_destroy will delete
 264  264   * the discarded tables, but it is only ever called from a failed _init
 265  265   * invocation; i.e. when there aren't any.)
 266  266   *
 267  267   * Interfaces
 268  268   * ----------
 269  269   *
 270  270   * The following interfaces are provided by the ipc module for use by
 271  271   * the individual IPC facilities:
 272  272   *
 273  273   * ipcperm_access
 274  274   *
 275  275   *   Given an object and a cred structure, determines if the requested
 276  276   *   access type is allowed.
 277  277   *
 278  278   * ipcperm_set, ipcperm_stat,
 279  279   * ipcperm_set64, ipcperm_stat64
 280  280   *
 281  281   *   Performs the common portion of an STAT or SET operation.  All
 282  282   *   (except stat and stat64) can fail, so they should be called before
 283  283   *   any facility-specific non-reversible changes are made to an
 284  284   *   object.  Similarly, the set operations have side effects, so they
 285  285   *   should only be called once the possibility of a facility-specific
 286  286   *   failure is eliminated.
 287  287   *
 288  288   * ipcs_create
 289  289   *
 290  290   *   Creates an IPC namespace for use by an IPC facility.
 291  291   *
 292  292   * ipcs_destroy
 293  293   *
 294  294   *   Destroys an IPC namespace.
 295  295   *
 296  296   * ipcs_lock, ipcs_unlock
 297  297   *
 298  298   *   Takes the namespace lock.  Ideally such access wouldn't be
 299  299   *   necessary, but there may be facility-specific data protected by
 300  300   *   this lock (e.g. project-wide resource consumption).
 301  301   *
 302  302   * ipc_lock
 303  303   *
 304  304   *   Takes the lock associated with an ID.  Can't fail.
 305  305   *
 306  306   * ipc_relock
 307  307   *
 308  308   *   Like ipc_lock, but takes a pointer to a held lock.  Drops the lock
 309  309   *   unless it is the one that would have been returned by ipc_lock.
 310  310   *   Used after calls to cv_wait.
 311  311   *
 312  312   * ipc_lookup
 313  313   *
 314  314   *   Performs an ID lookup, returns with the ID lock held.  Fails if
 315  315   *   the ID doesn't exist in the namespace.
 316  316   *
 317  317   * ipc_hold
 318  318   *
 319  319   *   Takes a reference on an object.
 320  320   *
 321  321   * ipc_rele
 322  322   *
 323  323   *   Releases a reference on an object, and drops the object's lock.
 324  324   *   Calls the object's destructor if last reference is being
 325  325   *   released.
 326  326   *
 327  327   * ipc_rele_locked
 328  328   *
 329  329   *   Releases a reference on an object.  Doesn't drop lock, and may
 330  330   *   only be called when there is more than one reference to the
 331  331   *   object.
 332  332   *
 333  333   * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
 334  334   *
 335  335   *   Components of a GET operation.  ipc_get performs a key lookup,
 336  336   *   allocating an object if the key isn't found (returning with the
 337  337   *   namespace lock and p_lock held), and returning the existing object
 338  338   *   if it is (with the object lock held).  ipc_get doesn't modify the
 339  339   *   namespace.
 340  340   *
 341  341   *   ipc_commit_begin begins the process of inserting an object
 342  342   *   allocated by ipc_get into the namespace, and can fail.  If
 343  343   *   successful, it returns with the namespace lock and p_lock held.
 344  344   *   ipc_commit_end completes the process of inserting an object into
 345  345   *   the namespace and can't fail.  The facility can call ipc_cleanup
 346  346   *   at any time following a successful ipc_get and before
 347  347   *   ipc_commit_end or a failed ipc_commit_begin to fail the
 348  348   *   allocation.  Pseudocode for the suggested GET implementation:
 349  349   *
 350  350   *   top:
 351  351   *
 352  352   *     ipc_get
 353  353   *
 354  354   *     if failure
 355  355   *       return
 356  356   *
 357  357   *     if found {
 358  358   *
 359  359   *       if object meets criteria
 360  360   *         unlock object and return success
 361  361   *       else
 362  362   *         unlock object and return failure
 363  363   *
 364  364   *     } else {
 365  365   *
 366  366   *       perform resource control tests
 367  367   *       drop namespace lock, p_lock
 368  368   *       if failure
 369  369   *         ipc_cleanup
 370  370   *
 371  371   *       perform facility-specific initialization
 372  372   *       if failure {
 373  373   *         facility-specific cleanup
 374  374   *         ipc_cleanup
 375  375   *       }
 376  376   *
 377  377   *       ( At this point the object should be destructible using the
 378  378   *         destructor given to ipcs_create )
 379  379   *
 380  380   *       ipc_commit_begin
 381  381   *       if retry
 382  382   *         goto top
 383  383   *       else if failure
 384  384   *         return
 385  385   *
 386  386   *       perform facility-specific resource control tests/allocations
 387  387   *       if failure
 388  388   *         ipc_cleanup
 389  389   *
 390  390   *       ipc_commit_end
 391  391   *       perform any infallible post-creation actions, unlock, and return
 392  392   *
 393  393   *     }
 394  394   *
 395  395   * ipc_rmid
 396  396   *
 397  397   *   Performs the common portion of an RMID operation -- looks up an ID
 398  398   *   removes it, and calls the a facility-specific function to do
 399  399   *   RMID-time cleanup on the private portions of the object.
 400  400   *
 401  401   * ipc_ids
 402  402   *
 403  403   *   Performs the common portion of an IDS operation.
 404  404   *
 405  405   */
 406  406  
 407  407  #include <sys/types.h>
 408  408  #include <sys/param.h>
 409  409  #include <sys/cred.h>
 410  410  #include <sys/policy.h>
 411  411  #include <sys/proc.h>
 412  412  #include <sys/user.h>
 413  413  #include <sys/ipc.h>
 414  414  #include <sys/ipc_impl.h>
 415  415  #include <sys/errno.h>
 416  416  #include <sys/systm.h>
 417  417  #include <sys/list.h>
 418  418  #include <sys/atomic.h>
 419  419  #include <sys/zone.h>
 420  420  #include <sys/task.h>
 421  421  #include <sys/modctl.h>
 422  422  
 423  423  #include <c2/audit.h>
 424  424  
 425  425  static struct modlmisc modlmisc = {
 426  426          &mod_miscops,
 427  427          "common ipc code",
 428  428  };
 429  429  
 430  430  static struct modlinkage modlinkage = {
 431  431          MODREV_1, (void *)&modlmisc, NULL
 432  432  };
 433  433  
 434  434  
 435  435  int
 436  436  _init(void)
 437  437  {
 438  438          return (mod_install(&modlinkage));
 439  439  }
 440  440  
 441  441  int
 442  442  _fini(void)
 443  443  {
 444  444          return (mod_remove(&modlinkage));
 445  445  }
 446  446  
 447  447  int
 448  448  _info(struct modinfo *modinfop)
 449  449  {
 450  450          return (mod_info(&modlinkage, modinfop));
 451  451  }
 452  452  
 453  453  
 454  454  /*
 455  455   * Check message, semaphore, or shared memory access permissions.
 456  456   *
 457  457   * This routine verifies the requested access permission for the current
 458  458   * process.  The zone ids are compared, and the appropriate bits are
 459  459   * checked corresponding to owner, group (including the list of
 460  460   * supplementary groups), or everyone.  Zero is returned on success.
 461  461   * On failure, the security policy is asked to check to override the
 462  462   * permissions check; the policy will either return 0 for access granted
 463  463   * or EACCES.
 464  464   *
 465  465   * Access to objects in other zones requires that the caller be in the
 466  466   * global zone and have the appropriate IPC_DAC_* privilege, regardless
 467  467   * of whether the uid or gid match those of the object.  Note that
 468  468   * cross-zone accesses will normally never get here since they'll
 469  469   * fail in ipc_lookup or ipc_get.
 470  470   *
 471  471   * The arguments must be set up as follows:
 472  472   *      p - Pointer to permission structure to verify
 473  473   *      mode - Desired access permissions
 474  474   */
 475  475  int
 476  476  ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
 477  477  {
 478  478          int shifts = 0;
 479  479          uid_t uid = crgetuid(cr);
 480  480          zoneid_t zoneid = getzoneid();
 481  481  
 482  482          if (p->ipc_zoneid == zoneid) {
 483  483                  if (uid != p->ipc_uid && uid != p->ipc_cuid) {
 484  484                          shifts += 3;
 485  485                          if (!groupmember(p->ipc_gid, cr) &&
 486  486                              !groupmember(p->ipc_cgid, cr))
 487  487                                  shifts += 3;
 488  488                  }
 489  489  
 490  490                  mode &= ~(p->ipc_mode << shifts);
 491  491  
 492  492                  if (mode == 0)
 493  493                          return (0);
 494  494          } else if (zoneid != GLOBAL_ZONEID)
 495  495                  return (EACCES);
 496  496  
 497  497          return (secpolicy_ipc_access(cr, p, mode));
 498  498  }
 499  499  
 500  500  /*
 501  501   * There are two versions of the ipcperm_set/stat functions:
 502  502   *   ipcperm_???        - for use with IPC_SET/STAT
 503  503   *   ipcperm_???_64     - for use with IPC_SET64/STAT64
 504  504   *
 505  505   * These functions encapsulate the common portions (copying, permission
 506  506   * checks, and auditing) of the set/stat operations.  All, except for
 507  507   * stat and stat_64 which are void, return 0 on success or a non-zero
 508  508   * errno value on error.
 509  509   */
 510  510  
 511  511  int
 512  512  ipcperm_set(ipc_service_t *service, struct cred *cr,
 513  513      kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
 514  514  {
 515  515          STRUCT_HANDLE(ipc_perm, lperm);
 516  516          uid_t uid;
 517  517          gid_t gid;
 518  518          mode_t mode;
 519  519          zone_t *zone;
 520  520  
 521  521          ASSERT(IPC_LOCKED(service, kperm));
 522  522  
 523  523          STRUCT_SET_HANDLE(lperm, model, perm);
 524  524          uid = STRUCT_FGET(lperm, uid);
 525  525          gid = STRUCT_FGET(lperm, gid);
 526  526          mode = STRUCT_FGET(lperm, mode);
 527  527  
 528  528          if (secpolicy_ipc_owner(cr, kperm) != 0)
 529  529                  return (EPERM);
 530  530  
 531  531          zone = crgetzone(cr);
 532  532          if (!VALID_UID(uid, zone) || !VALID_GID(gid, zone))
 533  533                  return (EINVAL);
 534  534  
 535  535          kperm->ipc_uid = uid;
 536  536          kperm->ipc_gid = gid;
 537  537          kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
 538  538  
 539  539          if (AU_AUDITING())
 540  540                  audit_ipcget(service->ipcs_atype, kperm);
 541  541  
 542  542          return (0);
 543  543  }
 544  544  
 545  545  void
 546  546  ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
 547  547  {
 548  548          STRUCT_HANDLE(ipc_perm, lperm);
 549  549  
 550  550          STRUCT_SET_HANDLE(lperm, model, perm);
 551  551          STRUCT_FSET(lperm, uid, kperm->ipc_uid);
 552  552          STRUCT_FSET(lperm, gid, kperm->ipc_gid);
 553  553          STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
 554  554          STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
 555  555          STRUCT_FSET(lperm, mode, kperm->ipc_mode);
 556  556          STRUCT_FSET(lperm, seq, 0);
 557  557          STRUCT_FSET(lperm, key, kperm->ipc_key);
 558  558  }
 559  559  
 560  560  int
 561  561  ipcperm_set64(ipc_service_t *service, struct cred *cr,
 562  562      kipc_perm_t *kperm, ipc_perm64_t *perm64)
 563  563  {
 564  564          zone_t *zone;
 565  565  
 566  566          ASSERT(IPC_LOCKED(service, kperm));
 567  567  
 568  568          if (secpolicy_ipc_owner(cr, kperm) != 0)
 569  569                  return (EPERM);
 570  570  
 571  571          zone = crgetzone(cr);
 572  572          if (!VALID_UID(perm64->ipcx_uid, zone) ||
 573  573              !VALID_GID(perm64->ipcx_gid, zone))
 574  574                  return (EINVAL);
 575  575  
 576  576          kperm->ipc_uid = perm64->ipcx_uid;
 577  577          kperm->ipc_gid = perm64->ipcx_gid;
 578  578          kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
 579  579              (kperm->ipc_mode & ~0777);
 580  580  
 581  581          if (AU_AUDITING())
 582  582                  audit_ipcget(service->ipcs_atype, kperm);
 583  583  
 584  584          return (0);
 585  585  }
 586  586  
 587  587  void
 588  588  ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
 589  589  {
 590  590          perm64->ipcx_uid = kperm->ipc_uid;
 591  591          perm64->ipcx_gid = kperm->ipc_gid;
 592  592          perm64->ipcx_cuid = kperm->ipc_cuid;
 593  593          perm64->ipcx_cgid = kperm->ipc_cgid;
 594  594          perm64->ipcx_mode = kperm->ipc_mode;
 595  595          perm64->ipcx_key = kperm->ipc_key;
 596  596          perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
 597  597          perm64->ipcx_zoneid = kperm->ipc_zoneid;
 598  598  }
 599  599  
 600  600  
 601  601  /*
 602  602   * ipc key comparator.
 603  603   */
 604  604  static int
 605  605  ipc_key_compar(const void *a, const void *b)
 606  606  {
 607  607          kipc_perm_t *aperm = (kipc_perm_t *)a;
 608  608          kipc_perm_t *bperm = (kipc_perm_t *)b;
 609  609          int ak = aperm->ipc_key;
 610  610          int bk = bperm->ipc_key;
 611  611          zoneid_t az;
 612  612          zoneid_t bz;
 613  613  
 614  614          ASSERT(ak != IPC_PRIVATE);
 615  615          ASSERT(bk != IPC_PRIVATE);
 616  616  
 617  617          /*
 618  618           * Compare key first, then zoneid.  This optimizes performance for
 619  619           * systems with only one zone, since the zone checks will only be
 620  620           * made when the keys match.
 621  621           */
 622  622          if (ak < bk)
 623  623                  return (-1);
 624  624          if (ak > bk)
 625  625                  return (1);
 626  626  
 627  627          /* keys match */
 628  628          az = aperm->ipc_zoneid;
 629  629          bz = bperm->ipc_zoneid;
 630  630          if (az < bz)
 631  631                  return (-1);
 632  632          if (az > bz)
 633  633                  return (1);
 634  634          return (0);
 635  635  }
 636  636  
 637  637  /*
 638  638   * Create an ipc service.
 639  639   */
 640  640  ipc_service_t *
 641  641  ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
 642  642      size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
 643  643      size_t rctl_offset)
 644  644  {
 645  645          ipc_service_t *result;
 646  646  
 647  647          result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
 648  648  
 649  649          mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
 650  650          result->ipcs_count = 0;
 651  651          avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
 652  652          result->ipcs_tabsz = IPC_IDS_MIN;
 653  653          result->ipcs_table =
 654  654              kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
 655  655          result->ipcs_ssize = size;
 656  656          result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
 657  657          result->ipcs_dtor = dtor;
 658  658          result->ipcs_rmid = rmid;
 659  659          result->ipcs_proj_rctl = proj_rctl;
 660  660          result->ipcs_zone_rctl = zone_rctl;
 661  661          result->ipcs_atype = audit_type;
 662  662          ASSERT(rctl_offset < sizeof (ipc_rqty_t));
 663  663          result->ipcs_rctlofs = rctl_offset;
 664  664          list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
 665  665              offsetof(kipc_perm_t, ipc_list));
 666  666  
 667  667          return (result);
 668  668  }
 669  669  
 670  670  /*
 671  671   * Destroy an ipc service.
 672  672   */
 673  673  void
 674  674  ipcs_destroy(ipc_service_t *service)
 675  675  {
 676  676          ipc_slot_t *slot, *next;
 677  677  
 678  678          mutex_enter(&service->ipcs_lock);
 679  679  
 680  680          ASSERT(service->ipcs_count == 0);
 681  681          avl_destroy(&service->ipcs_keys);
 682  682          list_destroy(&service->ipcs_usedids);
 683  683          id_space_destroy(service->ipcs_ids);
 684  684  
 685  685          for (slot = service->ipcs_table; slot; slot = next) {
 686  686                  next = slot[0].ipct_chain;
 687  687                  kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
 688  688                  service->ipcs_tabsz >>= 1;
 689  689          }
 690  690  
 691  691          mutex_destroy(&service->ipcs_lock);
 692  692          kmem_free(service, sizeof (ipc_service_t));
 693  693  }
 694  694  
 695  695  /*
 696  696   * Takes the service lock.
 697  697   */
 698  698  void
 699  699  ipcs_lock(ipc_service_t *service)
 700  700  {
 701  701          mutex_enter(&service->ipcs_lock);
 702  702  }
 703  703  
 704  704  /*
 705  705   * Releases the service lock.
 706  706   */
 707  707  void
 708  708  ipcs_unlock(ipc_service_t *service)
 709  709  {
 710  710          mutex_exit(&service->ipcs_lock);
 711  711  }
 712  712  
 713  713  
 714  714  /*
 715  715   * Locks the specified ID.  Returns the ID's ID table index.
 716  716   */
 717  717  static int
 718  718  ipc_lock_internal(ipc_service_t *service, uint_t id)
 719  719  {
 720  720          uint_t  tabsz;
 721  721          uint_t  index;
 722  722          kmutex_t *mutex;
 723  723  
 724  724          for (;;) {
 725  725                  tabsz = service->ipcs_tabsz;
 726  726                  membar_consumer();
 727  727                  index = id & (tabsz - 1);
 728  728                  mutex = &service->ipcs_table[index].ipct_lock;
 729  729                  mutex_enter(mutex);
 730  730                  if (tabsz == service->ipcs_tabsz)
 731  731                          break;
 732  732                  mutex_exit(mutex);
 733  733          }
 734  734  
 735  735          return (index);
 736  736  }
 737  737  
 738  738  /*
 739  739   * Locks the specified ID.  Returns a pointer to the ID's lock.
 740  740   */
 741  741  kmutex_t *
 742  742  ipc_lock(ipc_service_t *service, int id)
 743  743  {
 744  744          uint_t index;
 745  745  
 746  746          /*
 747  747           * These assertions don't reflect requirements of the code
 748  748           * which follows, but they should never fail nonetheless.
 749  749           */
 750  750          ASSERT(id >= 0);
 751  751          ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
 752  752          index = ipc_lock_internal(service, id);
 753  753  
 754  754          return (&service->ipcs_table[index].ipct_lock);
 755  755  }
 756  756  
 757  757  /*
 758  758   * Checks to see if the held lock provided is the current lock for the
 759  759   * specified id.  If so, we return it instead of dropping it and
 760  760   * returning the result of ipc_lock.  This is intended to speed up cv
 761  761   * wakeups where we are left holding a lock which could be stale, but
 762  762   * probably isn't.
 763  763   */
 764  764  kmutex_t *
 765  765  ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
 766  766  {
 767  767          ASSERT(id >= 0);
 768  768          ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
 769  769          ASSERT(MUTEX_HELD(lock));
 770  770  
 771  771          if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
 772  772                  return (lock);
 773  773  
 774  774          mutex_exit(lock);
 775  775          return (ipc_lock(service, id));
 776  776  }
 777  777  
 778  778  /*
 779  779   * Performs an ID lookup.  If the ID doesn't exist or has been removed,
 780  780   * or isn't visible to the caller (because of zones), NULL is returned.
 781  781   * Otherwise, a pointer to the ID's perm structure and held ID lock are
 782  782   * returned.
 783  783   */
 784  784  kmutex_t *
 785  785  ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
 786  786  {
 787  787          kipc_perm_t *result;
 788  788          uint_t index;
 789  789  
 790  790          /*
 791  791           * There is no need to check to see if id is in-range (i.e.
 792  792           * positive and fits into the table).  If it is out-of-range,
 793  793           * the id simply won't match the object's.
 794  794           */
 795  795  
 796  796          index = ipc_lock_internal(service, id);
 797  797          result = service->ipcs_table[index].ipct_data;
 798  798          if (result == NULL || result->ipc_id != (uint_t)id ||
 799  799              !HASZONEACCESS(curproc, result->ipc_zoneid)) {
 800  800                  mutex_exit(&service->ipcs_table[index].ipct_lock);
 801  801                  return (NULL);
 802  802          }
 803  803  
 804  804          ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
 805  805  
 806  806          *perm = result;
 807  807          if (AU_AUDITING())
 808  808                  audit_ipc(service->ipcs_atype, id, result);
 809  809  
 810  810          return (&service->ipcs_table[index].ipct_lock);
 811  811  }
 812  812  
 813  813  /*
 814  814   * Increase the reference count on an ID.
 815  815   */
 816  816  /*ARGSUSED*/
 817  817  void
 818  818  ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
 819  819  {
 820  820          ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 821  821          ASSERT(IPC_LOCKED(s, perm));
 822  822          perm->ipc_ref++;
 823  823  }
 824  824  
 825  825  /*
 826  826   * Decrease the reference count on an ID and drops the ID's lock.
 827  827   * Destroys the ID if the new reference count is zero.
 828  828   */
 829  829  void
 830  830  ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
 831  831  {
 832  832          int nref;
 833  833  
 834  834          ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 835  835          ASSERT(IPC_LOCKED(s, perm));
 836  836          ASSERT(perm->ipc_ref > 0);
 837  837  
 838  838          nref = --perm->ipc_ref;
 839  839          mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
 840  840  
 841  841          if (nref == 0) {
 842  842                  ASSERT(IPC_FREE(perm));         /* ipc_rmid clears IPC_ALLOC */
 843  843                  s->ipcs_dtor(perm);
 844  844                  project_rele(perm->ipc_proj);
 845  845                  zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
 846  846                  kmem_free(perm, s->ipcs_ssize);
 847  847          }
 848  848  }
 849  849  
 850  850  /*
 851  851   * Decrease the reference count on an ID, but don't drop the ID lock.
 852  852   * Used in cases where one thread needs to remove many references (on
 853  853   * behalf of other parties).
 854  854   */
 855  855  void
 856  856  ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
 857  857  {
 858  858          ASSERT(perm->ipc_ref > 1);
 859  859          ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 860  860          ASSERT(IPC_LOCKED(s, perm));
 861  861  
 862  862          perm->ipc_ref--;
 863  863  }
 864  864  
 865  865  
 866  866  /*
 867  867   * Internal function to grow the service ID table.
 868  868   */
 869  869  static int
 870  870  ipc_grow(ipc_service_t *service)
 871  871  {
 872  872          ipc_slot_t *new, *old;
 873  873          int i, oldsize, newsize;
 874  874  
 875  875          ASSERT(MUTEX_HELD(&service->ipcs_lock));
 876  876          ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
 877  877  
 878  878          if (service->ipcs_tabsz == IPC_IDS_MAX)
 879  879                  return (ENOSPC);
 880  880  
 881  881          oldsize = service->ipcs_tabsz;
 882  882          newsize = oldsize << 1;
 883  883          new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
 884  884          if (new == NULL)
 885  885                  return (ENOSPC);
 886  886  
 887  887          old = service->ipcs_table;
 888  888          for (i = 0; i < oldsize; i++) {
 889  889                  mutex_enter(&old[i].ipct_lock);
 890  890                  mutex_enter(&new[i].ipct_lock);
 891  891  
 892  892                  new[i].ipct_seq = old[i].ipct_seq;
 893  893                  new[i].ipct_data = old[i].ipct_data;
 894  894                  old[i].ipct_data = NULL;
 895  895          }
 896  896  
 897  897          new[0].ipct_chain = old;
 898  898          service->ipcs_table = new;
 899  899          membar_producer();
 900  900          service->ipcs_tabsz = newsize;
 901  901  
 902  902          for (i = 0; i < oldsize; i++) {
 903  903                  mutex_exit(&old[i].ipct_lock);
 904  904                  mutex_exit(&new[i].ipct_lock);
 905  905          }
 906  906  
 907  907          id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
 908  908  
 909  909          return (0);
 910  910  }
 911  911  
 912  912  
 913  913  static int
 914  914  ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
 915  915  {
 916  916          kipc_perm_t *perm = NULL;
 917  917          avl_index_t where;
 918  918          kipc_perm_t template;
 919  919  
 920  920          ASSERT(MUTEX_HELD(&service->ipcs_lock));
 921  921  
 922  922          template.ipc_key = key;
 923  923          template.ipc_zoneid = getzoneid();
 924  924          if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
 925  925                  ASSERT(!IPC_FREE(perm));
 926  926                  if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
 927  927                          return (EEXIST);
 928  928                  if ((flag & 0777) & ~perm->ipc_mode) {
 929  929                          if (AU_AUDITING())
 930  930                                  audit_ipcget(NULL, (void *)perm);
 931  931                          return (EACCES);
 932  932                  }
 933  933                  *permp = perm;
 934  934                  return (0);
 935  935          } else if (flag & IPC_CREAT) {
 936  936                  *permp = NULL;
 937  937                  return (0);
 938  938          }
 939  939          return (ENOENT);
 940  940  }
 941  941  
 942  942  static int
 943  943  ipc_alloc_test(ipc_service_t *service, proc_t *pp)
 944  944  {
 945  945          ASSERT(MUTEX_HELD(&service->ipcs_lock));
 946  946  
 947  947          /*
 948  948           * Resizing the table first would result in a cleaner code
 949  949           * path, but would also allow a user to (permanently) double
 950  950           * the id table size in cases where the allocation would be
 951  951           * denied.  Hence we test the rctl first.
 952  952           */
 953  953  retry:
 954  954          mutex_enter(&pp->p_lock);
 955  955          if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
 956  956              pp, 1, RCA_SAFE) & RCT_DENY) ||
 957  957              (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
 958  958              pp, 1, RCA_SAFE) & RCT_DENY)) {
 959  959                  mutex_exit(&pp->p_lock);
 960  960                  return (ENOSPC);
 961  961          }
 962  962  
 963  963          if (service->ipcs_count == service->ipcs_tabsz) {
 964  964                  int error;
 965  965  
 966  966                  mutex_exit(&pp->p_lock);
 967  967                  if (error = ipc_grow(service))
 968  968                          return (error);
 969  969                  goto retry;
 970  970          }
 971  971  
 972  972          return (0);
 973  973  }
 974  974  
 975  975  /*
 976  976   * Given a key, search for or create the associated identifier.
 977  977   *
 978  978   * If IPC_CREAT is specified and the key isn't found, or if the key is
 979  979   * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
 980  980   * allocated object structure in permp.  A pointer to the held service
 981  981   * lock is placed in lockp.  ipc_mode's IPC_ALLOC bit is clear.
 982  982   *
 983  983   * If the key is found and no error conditions arise, we return 0 and
 984  984   * place a pointer to the existing object structure in permp.  A
 985  985   * pointer to the held ID lock is placed in lockp.  ipc_mode's
 986  986   * IPC_ALLOC bit is set.
 987  987   *
 988  988   * Otherwise, a non-zero errno value is returned.
 989  989   */
 990  990  int
 991  991  ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
 992  992      kmutex_t **lockp)
 993  993  {
 994  994          kipc_perm_t     *perm = NULL;
 995  995          proc_t          *pp = curproc;
 996  996          int             error, index;
 997  997          cred_t          *cr = CRED();
 998  998  
 999  999          if (key != IPC_PRIVATE) {
1000 1000  
1001 1001                  mutex_enter(&service->ipcs_lock);
1002 1002                  error = ipc_keylookup(service, key, flag, &perm);
1003 1003                  if (perm != NULL)
1004 1004                          index = ipc_lock_internal(service, perm->ipc_id);
1005 1005                  mutex_exit(&service->ipcs_lock);
1006 1006  
1007 1007                  if (error) {
1008 1008                          ASSERT(perm == NULL);
1009 1009                          return (error);
1010 1010                  }
1011 1011  
1012 1012                  if (perm) {
1013 1013                          ASSERT(!IPC_FREE(perm));
1014 1014                          *permp = perm;
1015 1015                          *lockp = &service->ipcs_table[index].ipct_lock;
1016 1016                          return (0);
1017 1017                  }
1018 1018  
1019 1019                  /* Key not found; fall through */
1020 1020          }
1021 1021  
1022 1022          perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
1023 1023  
1024 1024          mutex_enter(&service->ipcs_lock);
1025 1025          if (error = ipc_alloc_test(service, pp)) {
1026 1026                  mutex_exit(&service->ipcs_lock);
1027 1027                  kmem_free(perm, service->ipcs_ssize);
1028 1028                  return (error);
1029 1029          }
1030 1030  
1031 1031          perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
1032 1032          perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
1033 1033          perm->ipc_zoneid = getzoneid();
1034 1034          perm->ipc_mode = flag & 0777;
1035 1035          perm->ipc_key = key;
1036 1036          perm->ipc_ref = 1;
1037 1037          perm->ipc_id = IPC_ID_INVAL;
1038 1038          *permp = perm;
1039 1039          *lockp = &service->ipcs_lock;
1040 1040  
1041 1041          return (0);
1042 1042  }
1043 1043  
1044 1044  /*
1045 1045   * Attempts to add the a newly created ID to the global namespace.  If
1046 1046   * creating it would cause an error, we return the error.  If there is
1047 1047   * the possibility that we could obtain the existing ID and return it
1048 1048   * to the user, we return EAGAIN.  Otherwise, we return 0 with p_lock
1049 1049   * and the service lock held.
1050 1050   *
1051 1051   * Since this should be only called after all initialization has been
1052 1052   * completed, on failure we automatically invoke the destructor for the
1053 1053   * object and deallocate the memory associated with it.
1054 1054   */
1055 1055  int
1056 1056  ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
1057 1057      kipc_perm_t *newperm)
1058 1058  {
1059 1059          kipc_perm_t *perm;
1060 1060          int error;
1061 1061          proc_t *pp = curproc;
1062 1062  
1063 1063          ASSERT(newperm->ipc_ref == 1);
1064 1064          ASSERT(IPC_FREE(newperm));
1065 1065  
1066 1066          /*
1067 1067           * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup()
1068 1068           * clean up the necessary state.  This must be done before the
1069 1069           * potential call to ipcs_dtor() below.
1070 1070           */
1071 1071          newperm->ipc_proj = pp->p_task->tk_proj;
1072 1072          zone_init_ref(&newperm->ipc_zone_ref);
1073 1073          zone_hold_ref(pp->p_zone, &newperm->ipc_zone_ref, ZONE_REF_IPC);
1074 1074  
1075 1075          mutex_enter(&service->ipcs_lock);
1076 1076          /*
1077 1077           * Ensure that no-one has raced with us and created the key.
1078 1078           */
1079 1079          if ((key != IPC_PRIVATE) &&
1080 1080              (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
1081 1081              (perm != NULL))) {
1082 1082                  error = error ? error : EAGAIN;
1083 1083                  goto errout;
1084 1084          }
1085 1085  
1086 1086          /*
1087 1087           * Ensure that no-one has raced with us and used the last of
1088 1088           * the permissible ids, or the last of the free spaces in the
1089 1089           * id table.
1090 1090           */
1091 1091          if (error = ipc_alloc_test(service, pp))
1092 1092                  goto errout;
1093 1093  
1094 1094          ASSERT(MUTEX_HELD(&service->ipcs_lock));
1095 1095          ASSERT(MUTEX_HELD(&pp->p_lock));
1096 1096  
1097 1097          return (0);
1098 1098  errout:
1099 1099          mutex_exit(&service->ipcs_lock);
1100 1100          service->ipcs_dtor(newperm);
1101 1101          zone_rele_ref(&newperm->ipc_zone_ref, ZONE_REF_IPC);
1102 1102          kmem_free(newperm, service->ipcs_ssize);
1103 1103          return (error);
1104 1104  }
1105 1105  
1106 1106  /*
1107 1107   * Commit the ID allocation transaction.  Called with p_lock and the
1108 1108   * service lock held, both of which are dropped.  Returns the held ID
1109 1109   * lock so the caller can extract the ID and perform ipcget auditing.
1110 1110   */
1111 1111  kmutex_t *
1112 1112  ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
1113 1113  {
1114 1114          ipc_slot_t *slot;
1115 1115          avl_index_t where;
1116 1116          int index;
1117 1117          void *loc;
1118 1118  
1119 1119          ASSERT(MUTEX_HELD(&service->ipcs_lock));
1120 1120          ASSERT(MUTEX_HELD(&curproc->p_lock));
1121 1121  
1122 1122          (void) project_hold(perm->ipc_proj);
1123 1123          mutex_exit(&curproc->p_lock);
1124 1124  
1125 1125          /*
1126 1126           * Pick out our slot.
1127 1127           */
1128 1128          service->ipcs_count++;
1129 1129          index = id_alloc(service->ipcs_ids);
1130 1130          ASSERT(index < service->ipcs_tabsz);
1131 1131          slot = &service->ipcs_table[index];
1132 1132          mutex_enter(&slot->ipct_lock);
1133 1133          ASSERT(slot->ipct_data == NULL);
1134 1134  
1135 1135          /*
1136 1136           * Update the perm structure.
1137 1137           */
1138 1138          perm->ipc_mode |= IPC_ALLOC;
1139 1139          perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
1140 1140  
1141 1141          /*
1142 1142           * Push into global visibility.
1143 1143           */
1144 1144          slot->ipct_data = perm;
1145 1145          if (perm->ipc_key != IPC_PRIVATE) {
1146 1146                  loc = avl_find(&service->ipcs_keys, perm, &where);
1147 1147                  ASSERT(loc == NULL);
1148 1148                  avl_insert(&service->ipcs_keys, perm, where);
1149 1149          }
1150 1150          list_insert_head(&service->ipcs_usedids, perm);
1151 1151  
1152 1152          /*
1153 1153           * Update resource consumption.
1154 1154           */
1155 1155          IPC_PROJ_USAGE(perm, service) += 1;
1156 1156          IPC_ZONE_USAGE(perm, service) += 1;
1157 1157  
1158 1158          mutex_exit(&service->ipcs_lock);
1159 1159          return (&slot->ipct_lock);
1160 1160  }
1161 1161  
1162 1162  /*
1163 1163   * Clean up function, in case the allocation fails.  If called between
1164 1164   * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1165 1165   * merely free the perm structure.  If called after ipc_commit_begin,
1166 1166   * we also drop locks and call the ID's destructor.
1167 1167   */
1168 1168  void
1169 1169  ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
1170 1170  {
1171 1171          ASSERT(IPC_FREE(perm));
1172 1172          if (perm->ipc_proj) {
1173 1173                  mutex_exit(&curproc->p_lock);
1174 1174                  mutex_exit(&service->ipcs_lock);
1175 1175                  service->ipcs_dtor(perm);
1176 1176          }
1177 1177          if (perm->ipc_zone_ref.zref_zone != NULL)
1178 1178                  zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
1179 1179          kmem_free(perm, service->ipcs_ssize);
1180 1180  }
1181 1181  
1182 1182  
1183 1183  /*
1184 1184   * Common code to remove an IPC object.  This should be called after
1185 1185   * all permissions checks have been performed, and with the service
1186 1186   * and ID locked.  Note that this does not remove the object from
1187 1187   * the ipcs_usedids list (this needs to be done by the caller before
1188 1188   * dropping the service lock).
1189 1189   */
1190 1190  static void
1191 1191  ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
1192 1192  {
1193 1193          int id = perm->ipc_id;
1194 1194          int index;
1195 1195  
1196 1196          ASSERT(MUTEX_HELD(&service->ipcs_lock));
1197 1197          ASSERT(IPC_LOCKED(service, perm));
1198 1198  
1199 1199          index = IPC_INDEX(id);
1200 1200  
1201 1201          service->ipcs_table[index].ipct_data = NULL;
1202 1202  
1203 1203          if (perm->ipc_key != IPC_PRIVATE)
1204 1204                  avl_remove(&service->ipcs_keys, perm);
1205 1205          list_remove(&service->ipcs_usedids, perm);
1206 1206          perm->ipc_mode &= ~IPC_ALLOC;
1207 1207  
1208 1208          id_free(service->ipcs_ids, index);
1209 1209  
1210 1210          if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
1211 1211                  service->ipcs_table[index].ipct_seq = 0;
1212 1212          service->ipcs_count--;
1213 1213          ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
1214 1214          ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
1215 1215          IPC_PROJ_USAGE(perm, service) -= 1;
1216 1216          IPC_ZONE_USAGE(perm, service) -= 1;
1217 1217          ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
1218 1218              (IPC_ZONE_USAGE(perm, service) == 0)));
1219 1219  }
1220 1220  
1221 1221  /*
1222 1222   * Perform actual IPC_RMID, either via ipc_rmid or due to a delayed *_RMID.
1223 1223   */
1224 1224  void
1225 1225  ipc_rmsvc(ipc_service_t *service, kipc_perm_t *perm)
1226 1226  {
1227 1227          ASSERT(service->ipcs_count > 0);
1228 1228          ASSERT(MUTEX_HELD(&service->ipcs_lock));
1229 1229  
1230 1230          ipc_remove(service, perm);
1231 1231          mutex_exit(&service->ipcs_lock);
1232 1232  
1233 1233          /* perform any per-service removal actions */
1234 1234          service->ipcs_rmid(perm);
1235 1235  
1236 1236          ipc_rele(service, perm);
1237 1237  }
1238 1238  
1239 1239  /*
1240 1240   * Common code to perform an IPC_RMID.  Returns an errno value on
1241 1241   * failure, 0 on success.
1242 1242   */
1243 1243  int
1244 1244  ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
1245 1245  {
1246 1246          kipc_perm_t *perm;
1247 1247          kmutex_t *lock;
1248 1248  
1249 1249          mutex_enter(&service->ipcs_lock);
1250 1250  
1251 1251          lock = ipc_lookup(service, id, &perm);
1252 1252          if (lock == NULL) {
1253 1253                  mutex_exit(&service->ipcs_lock);
1254 1254                  return (EINVAL);
1255 1255          }
1256 1256  
1257 1257          ASSERT(service->ipcs_count > 0);
1258 1258  
1259 1259          if (secpolicy_ipc_owner(cr, perm) != 0) {
1260 1260                  mutex_exit(lock);
1261 1261                  mutex_exit(&service->ipcs_lock);
1262 1262                  return (EPERM);
1263 1263          }
1264 1264  
1265 1265          /*
1266 1266           * Nothing can fail from this point on.
1267 1267           */
1268 1268          ipc_rmsvc(service, perm);
1269 1269  
1270 1270          return (0);
1271 1271  }
1272 1272  
1273 1273  /*
1274 1274   * Implementation for shmids, semids, and msgids.  buf is the address
1275 1275   * of the user buffer, nids is the size, and pnids is a pointer to
1276 1276   * where we write the actual number of ids that [would] have been
1277 1277   * copied out.
1278 1278   */
1279 1279  int
1280 1280  ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
1281 1281  {
1282 1282          kipc_perm_t *perm;
1283 1283          size_t  idsize = 0;
1284 1284          int     error = 0;
1285 1285          int     idcount;
1286 1286          int     *ids;
1287 1287          int     numids = 0;
1288 1288          zoneid_t zoneid = getzoneid();
1289 1289          int     global = INGLOBALZONE(curproc);
1290 1290  
1291 1291          if (buf == NULL)
1292 1292                  nids = 0;
1293 1293  
1294 1294          /*
1295 1295           * Get an accurate count of the total number of ids, and allocate a
1296 1296           * staging buffer.  Since ipcs_count is always sane, we don't have
1297 1297           * to take ipcs_lock for our first guess.  If there are no ids, or
1298 1298           * we're in the global zone and the number of ids is greater than
1299 1299           * the size of the specified buffer, we shunt to the end.  Otherwise,
1300 1300           * we go through the id list looking for (and counting) what is
1301 1301           * visible in the specified zone.
1302 1302           */
1303 1303          idcount = service->ipcs_count;
1304 1304          for (;;) {
1305 1305                  if ((global && idcount > nids) || idcount == 0) {
1306 1306                          numids = idcount;
1307 1307                          nids = 0;
1308 1308                          goto out;
1309 1309                  }
1310 1310  
1311 1311                  idsize = idcount * sizeof (int);
1312 1312                  ids = kmem_alloc(idsize, KM_SLEEP);
1313 1313  
1314 1314                  mutex_enter(&service->ipcs_lock);
1315 1315                  if (idcount >= service->ipcs_count)
1316 1316                          break;
1317 1317                  idcount = service->ipcs_count;
1318 1318                  mutex_exit(&service->ipcs_lock);
1319 1319  
1320 1320                  if (idsize != 0) {
1321 1321                          kmem_free(ids, idsize);
1322 1322                          idsize = 0;
1323 1323                  }
1324 1324          }
1325 1325  
1326 1326          for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1327 1327              perm = list_next(&service->ipcs_usedids, perm)) {
1328 1328                  ASSERT(!IPC_FREE(perm));
1329 1329                  if (global || perm->ipc_zoneid == zoneid)
1330 1330                          ids[numids++] = perm->ipc_id;
1331 1331          }
1332 1332          mutex_exit(&service->ipcs_lock);
1333 1333  
1334 1334          /*
1335 1335           * If there isn't enough space to hold all of the ids, just
1336 1336           * return the number of ids without copying out any of them.
1337 1337           */
1338 1338          if (nids < numids)
1339 1339                  nids = 0;
1340 1340  
1341 1341  out:
1342 1342          if (suword32(pnids, (uint32_t)numids) ||
1343 1343              (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
1344 1344                  error = EFAULT;
1345 1345          if (idsize != 0)
1346 1346                  kmem_free(ids, idsize);
1347 1347          return (error);
1348 1348  }
1349 1349  
1350 1350  /*
1351 1351   * Destroy IPC objects from the given service that are associated with
1352 1352   * the given zone.
1353 1353   *
1354 1354   * We can't hold on to the service lock when freeing objects, so we
1355 1355   * first search the service and move all the objects to a private
1356 1356   * list, then walk through and free them after dropping the lock.
1357 1357   */
1358 1358  void
1359 1359  ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
1360 1360  {
1361 1361          kipc_perm_t *perm, *next;
1362 1362          list_t rmlist;
1363 1363          kmutex_t *lock;
1364 1364  
1365 1365          list_create(&rmlist, sizeof (kipc_perm_t),
1366 1366              offsetof(kipc_perm_t, ipc_list));
1367 1367  
1368 1368          mutex_enter(&service->ipcs_lock);
1369 1369          for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1370 1370              perm = next) {
1371 1371                  next = list_next(&service->ipcs_usedids, perm);
1372 1372                  if (perm->ipc_zoneid != zoneid)
1373 1373                          continue;
1374 1374  
1375 1375                  /*
1376 1376                   * Remove the object from the service, then put it on
1377 1377                   * the removal list so we can defer the call to
1378 1378                   * ipc_rele (which will actually free the structure).
1379 1379                   * We need to do this since the destructor may grab
1380 1380                   * the service lock.
1381 1381                   */
1382 1382                  ASSERT(!IPC_FREE(perm));
1383 1383                  lock = ipc_lock(service, perm->ipc_id);
1384 1384                  ipc_remove(service, perm);
1385 1385                  mutex_exit(lock);
1386 1386                  list_insert_tail(&rmlist, perm);
1387 1387          }
1388 1388          mutex_exit(&service->ipcs_lock);
1389 1389  
1390 1390          /*
1391 1391           * Now that we've dropped the service lock, loop through the
1392 1392           * private list freeing removed objects.
1393 1393           */
1394 1394          for (perm = list_head(&rmlist); perm != NULL; perm = next) {
1395 1395                  next = list_next(&rmlist, perm);
1396 1396                  list_remove(&rmlist, perm);
1397 1397  
1398 1398                  (void) ipc_lock(service, perm->ipc_id);
1399 1399  
1400 1400                  /* perform any per-service removal actions */
1401 1401                  service->ipcs_rmid(perm);
1402 1402  
1403 1403                  /* release reference */
1404 1404                  ipc_rele(service, perm);
1405 1405          }
1406 1406  
1407 1407          list_destroy(&rmlist);
1408 1408  }
  
    | 
      ↓ open down ↓ | 
    1408 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX