1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T             */
  27 /*      All Rights Reserved                                     */
  28 
  29 
  30 /*
  31  * Common Inter-Process Communication routines.
  32  *
  33  * Overview
  34  * --------
  35  *
  36  * The System V inter-process communication (IPC) facilities provide
  37  * three services, message queues, semaphore arrays, and shared memory
  38  * segments, which are mananged using filesystem-like namespaces.
  39  * Unlike a filesystem, these namespaces aren't mounted and accessible
  40  * via a path -- a special API is used to interact with the different
  41  * facilities (nothing precludes a VFS-based interface, but the
  42  * standards require the special APIs).  Furthermore, these special
  43  * APIs don't use file descriptors, nor do they have an equivalent.
  44  * This means that every operation which acts on an object needs to
  45  * perform the quivalent of a lookup, which in turn means that every
  46  * operation can fail if the specified object doesn't exist in the
  47  * facility's namespace.
  48  *
  49  * Objects
  50  * -------
  51  *
  52  * Each object in a namespace has a unique ID, which is assigned by the
  53  * system and is used to identify the object when performing operations
  54  * on it.  An object can also have a key, which is selected by the user
  55  * at allocation time and is used as a primitive rendezvous mechanism.
  56  * An object without a key is said to have a "private" key.
  57  *
  58  * To perform an operation on an object given its key, one must first
  59  * perform a lookup and obtain its ID.  The ID is then used to identify
  60  * the object when performing the operation.  If the object has a
  61  * private key, the ID must be known or obtained by other means.
  62  *
  63  * Each object in the namespace has a creator uid and gid, as well as
  64  * an owner uid and gid.  Both are initialized with the ruid and rgid
  65  * of the process which created the object.  The creator or current
  66  * owner has the ability to change the owner of the object.
  67  *
  68  * Each object in the namespace has a set of file-like permissions,
  69  * which, in conjunction with the creator and owner uid and gid,
  70  * control read and write access to the object (execute is ignored).
  71  *
  72  * Each object also has a creator project and zone, which are used to
  73  * account for its resource usage.
  74  *
  75  * Operations
  76  * ----------
  77  *
  78  * There are five operations which all three facilities have in
  79  * common: GET, SET, STAT, RMID, and IDS.
  80  *
  81  * GET, like open, is used to allocate a new object or obtain an
  82  * existing one (using its key).  It takes a key, a set of flags and
  83  * mode bits, and optionally facility-specific arguments.  If the key
  84  * is IPC_PRIVATE, a new object with the requested mode bits and
  85  * facility-specific attributes is created.  If the key isn't
  86  * IPC_PRIVATE, the GET will attempt to look up the specified key and
  87  * either return that or create a new key depending on the state of the
  88  * IPC_CREAT and IPC_EXCL flags, much like open.  If GET needs to
  89  * allocate an object, it can fail if there is insufficient space in
  90  * the namespace (the maximum number of ids for the facility has been
  91  * exceeded) or if the facility-specific initialization fails.  If GET
  92  * finds an object it can return, it can still fail if that object's
  93  * permissions or facility-specific attributes are less than those
  94  * requested.
  95  *
  96  * SET is used to adjust facility-specific parameters of an object, in
  97  * addition to the owner uid and gid, and mode bits.  It can fail if
  98  * the caller isn't the creator or owner.
  99  *
 100  * STAT is used to obtain information about an object including the
 101  * general attributes object described as well as facility-specific
 102  * information.  It can fail if the caller doesn't have read
 103  * permission.
 104  *
 105  * RMID removes an object from the namespace.  Subsequent operations
 106  * using the object's ID or key will fail (until another object is
 107  * created with the same key or ID).  Since an RMID may be performed
 108  * asynchronously with other operations, it is possible that other
 109  * threads and/or processes will have references to the object.  While
 110  * a facility may have actions which need to be performed at RMID time,
 111  * only when all references are dropped can the object be destroyed.
 112  * RMID will fail if the caller isn't the creator or owner.
 113  *
 114  * IDS obtains a list of all IDs in a facility's namespace.  There are
 115  * no facility-specific behaviors of IDS.
 116  *
 117  * Design
 118  * ------
 119  *
 120  * Because some IPC facilities provide services whose operations must
 121  * scale, a mechanism which allows fast, concurrent access to
 122  * individual objects is needed.  Of primary importance is object
 123  * lookup based on ID (SET, STAT, others).  Allocation (GET),
 124  * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
 125  * lesser concerns, but should be implemented in such a way that ID
 126  * lookup isn't affected (at least not in the common case).
 127  *
 128  * Starting from the bottom up, each object is represented by a
 129  * structure, the first member of which must be a kipc_perm_t.  The
 130  * kipc_perm_t contains the information described above in "Objects", a
 131  * reference count (since the object may continue to exist after it has
 132  * been removed from the namespace), as well as some additional
 133  * metadata used to manage data structure membership.  These objects
 134  * are dynamically allocated.
 135  *
 136  * Above the objects is a power-of-two sized table of ID slots.  Each
 137  * slot contains a pointer to an object, a sequence number, and a
 138  * lock.  An object's ID is a function of its slot's index in the table
 139  * and its slot's sequence number.  Every time a slot is released (via
 140  * RMID) its sequence number is increased.  Strictly speaking, the
 141  * sequence number is unnecessary.  However, checking the sequence
 142  * number after a lookup provides a certain degree of robustness
 143  * against the use of stale IDs (useful since nothing else does).  When
 144  * the table fills up, it is resized (see Locking, below).
 145  *
 146  * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
 147  * int) the top IPC_SEQ_BITS are used for the sequence number with the
 148  * remainder holding the index into the table.  The size of the table
 149  * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
 150  *
 151  * Managing this table is the ipc_service structure.  It contains a
 152  * pointer to the dynamically allocated ID table, a namespace-global
 153  * lock, an id_space for managing the free space in the table, and
 154  * sundry other metadata necessary for the maintenance of the
 155  * namespace.  An AVL tree of all keyed objects in the table (sorted by
 156  * key) is used for key lookups.  An unordered doubly linked list of
 157  * all objects in the namespace (keyed or not) is maintained to
 158  * facilitate ID enumeration.
 159  *
 160  * To help visualize these relationships, here's a picture of a
 161  * namespace with a table of size 8 containing three objects
 162  * (IPC_SEQ_BITS = 28):
 163  *
 164  *
 165  * +-ipc_service_t--+
 166  * | table          *---\
 167  * | keys           *---+----------------------\
 168  * | all ids        *--\|                      |
 169  * |                |  ||                      |
 170  * +----------------+  ||                      |
 171  *                     ||                      |
 172  * /-------------------/|                      |
 173  * |    /---------------/                      |
 174  * |    |                                      |
 175  * |    v                                      |
 176  * |  +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
 177  * |  | Seq=3  |        |        | Seq=1  |    :   |        |        | Seq=6  |
 178  * |  |        |        |        |        |    :   |        |        |        |
 179  * |  +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
 180  * |    |                          |           |                       |
 181  * |    |                      /---/           |      /----------------/
 182  * |    |                      |               |      |
 183  * |    v                      v               |      v
 184  * |  +-kipc_perm_t-+        +-kipc_perm_t-+   |    +-kipc_perm_t-+
 185  * |  | id=0x30     |        | id=0x13     |   |    | id=0x67     |
 186  * |  | key=0xfeed  |        | key=0xbeef  |   |    | key=0xcafe  |
 187  * \->| [list]      |<------>| [list]      |<------>| [list]      |
 188  * /->| [avl left]  x   /--->| [avl left]  x   \--->| [avl left]  *---\
 189  * |  | [avl right] x   |    | [avl right] x        | [avl right] *---+-\
 190  * |  |             |   |    |             |        |             |   | |
 191  * |  +-------------+   |    +-------------+        +-------------+   | |
 192  * |                    \---------------------------------------------/ |
 193  * \--------------------------------------------------------------------/
 194  *
 195  * Locking
 196  * -------
 197  *
 198  * There are three locks (or sets of locks) which are used to ensure
 199  * correctness: the slot locks, the namespace lock, and p_lock (needed
 200  * when checking resource controls).  Their ordering is
 201  *
 202  *   namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
 203  *
 204  * Generally speaking, the namespace lock is used to protect allocation
 205  * and removal from the namespace, ID enumeration, and resizing the ID
 206  * table.  Specifically:
 207  *
 208  * - write access to all fields of the ipc_service structure
 209  * - read access to all variable fields of ipc_service except
 210  *   ipcs_tabsz (table size) and ipcs_table (the table pointer)
 211  * - read/write access to ipc_avl, ipc_list in visible objects'
 212  *   kipc_perm structures (i.e. objects which have been removed from
 213  *   the namespace don't have this restriction)
 214  * - write access to ipct_seq and ipct_data in the table entries
 215  *
 216  * A slot lock by itself is meaningless (except when resizing).  Of
 217  * greater interest conceptually is the notion of an ID lock -- a
 218  * "virtual lock" which refers to whichever slot lock an object's ID
 219  * currently hashes to.
 220  *
 221  * An ID lock protects all objects with that ID.  Normally there will
 222  * only be one such object: the one pointed to by the locked slot.
 223  * However, if an object is removed from the namespace but retains
 224  * references (e.g. an attached shared memory segment which has been
 225  * RMIDed), it continues to use the lock associated with its original
 226  * ID.  While this can result in increased contention, operations which
 227  * require taking the ID lock of removed objects are infrequent.
 228  *
 229  * Specifically, an ID lock protects the contents of an object's
 230  * structure, including the contents of the embedded kipc_perm
 231  * structure (but excluding those fields protected by the namespace
 232  * lock).  It also protects the ipct_seq and ipct_data fields in its
 233  * slot (it is really a slot lock, after all).
 234  *
 235  * Recall that the table is resizable.  To avoid requiring every ID
 236  * lookup to take a global lock, a scheme much like that employed for
 237  * file descriptors (see the comment above UF_ENTER in user.h) is
 238  * used.  Note that the sequence number and data pointer are protected
 239  * by both the namespace lock and their slot lock.  When the table is
 240  * resized, the following operations take place:
 241  *
 242  *   1) A new table is allocated.
 243  *   2) The global lock is taken.
 244  *   3) All old slots are locked, in order.
 245  *   4) The first half of the new slots are locked.
 246  *   5) All table entries are copied to the new table, and cleared from
 247  *      the old table.
 248  *   6) The ipc_service structure is updated to point to the new table.
 249  *   7) The ipc_service structure is updated with the new table size.
 250  *   8) All slot locks (old and new) are dropped.
 251  *
 252  * Because the slot locks are embedded in the table, ID lookups and
 253  * other operations which require taking an slot lock need to verify
 254  * that the lock taken wasn't part of a stale table.  This is
 255  * accomplished by checking the table size before and after
 256  * dereferencing the table pointer and taking the lock: if the size
 257  * changes, the lock must be dropped and reacquired.  It is this
 258  * additional work which distinguishes an ID lock from a slot lock.
 259  *
 260  * Because we can't guarantee that threads aren't accessing the old
 261  * tables' locks, they are never deallocated.  To prevent spurious
 262  * reports of memory leaks, a pointer to the discarded table is stored
 263  * in the new one in step 5.  (Theoretically ipcs_destroy will delete
 264  * the discarded tables, but it is only ever called from a failed _init
 265  * invocation; i.e. when there aren't any.)
 266  *
 267  * Interfaces
 268  * ----------
 269  *
 270  * The following interfaces are provided by the ipc module for use by
 271  * the individual IPC facilities:
 272  *
 273  * ipcperm_access
 274  *
 275  *   Given an object and a cred structure, determines if the requested
 276  *   access type is allowed.
 277  *
 278  * ipcperm_set, ipcperm_stat,
 279  * ipcperm_set64, ipcperm_stat64
 280  *
 281  *   Performs the common portion of an STAT or SET operation.  All
 282  *   (except stat and stat64) can fail, so they should be called before
 283  *   any facility-specific non-reversible changes are made to an
 284  *   object.  Similarly, the set operations have side effects, so they
 285  *   should only be called once the possibility of a facility-specific
 286  *   failure is eliminated.
 287  *
 288  * ipcs_create
 289  *
 290  *   Creates an IPC namespace for use by an IPC facility.
 291  *
 292  * ipcs_destroy
 293  *
 294  *   Destroys an IPC namespace.
 295  *
 296  * ipcs_lock, ipcs_unlock
 297  *
 298  *   Takes the namespace lock.  Ideally such access wouldn't be
 299  *   necessary, but there may be facility-specific data protected by
 300  *   this lock (e.g. project-wide resource consumption).
 301  *
 302  * ipc_lock
 303  *
 304  *   Takes the lock associated with an ID.  Can't fail.
 305  *
 306  * ipc_relock
 307  *
 308  *   Like ipc_lock, but takes a pointer to a held lock.  Drops the lock
 309  *   unless it is the one that would have been returned by ipc_lock.
 310  *   Used after calls to cv_wait.
 311  *
 312  * ipc_lookup
 313  *
 314  *   Performs an ID lookup, returns with the ID lock held.  Fails if
 315  *   the ID doesn't exist in the namespace.
 316  *
 317  * ipc_hold
 318  *
 319  *   Takes a reference on an object.
 320  *
 321  * ipc_rele
 322  *
 323  *   Releases a reference on an object, and drops the object's lock.
 324  *   Calls the object's destructor if last reference is being
 325  *   released.
 326  *
 327  * ipc_rele_locked
 328  *
 329  *   Releases a reference on an object.  Doesn't drop lock, and may
 330  *   only be called when there is more than one reference to the
 331  *   object.
 332  *
 333  * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
 334  *
 335  *   Components of a GET operation.  ipc_get performs a key lookup,
 336  *   allocating an object if the key isn't found (returning with the
 337  *   namespace lock and p_lock held), and returning the existing object
 338  *   if it is (with the object lock held).  ipc_get doesn't modify the
 339  *   namespace.
 340  *
 341  *   ipc_commit_begin begins the process of inserting an object
 342  *   allocated by ipc_get into the namespace, and can fail.  If
 343  *   successful, it returns with the namespace lock and p_lock held.
 344  *   ipc_commit_end completes the process of inserting an object into
 345  *   the namespace and can't fail.  The facility can call ipc_cleanup
 346  *   at any time following a successful ipc_get and before
 347  *   ipc_commit_end or a failed ipc_commit_begin to fail the
 348  *   allocation.  Pseudocode for the suggested GET implementation:
 349  *
 350  *   top:
 351  *
 352  *     ipc_get
 353  *
 354  *     if failure
 355  *       return
 356  *
 357  *     if found {
 358  *
 359  *       if object meets criteria
 360  *         unlock object and return success
 361  *       else
 362  *         unlock object and return failure
 363  *
 364  *     } else {
 365  *
 366  *       perform resource control tests
 367  *       drop namespace lock, p_lock
 368  *       if failure
 369  *         ipc_cleanup
 370  *
 371  *       perform facility-specific initialization
 372  *       if failure {
 373  *         facility-specific cleanup
 374  *         ipc_cleanup
 375  *       }
 376  *
 377  *       ( At this point the object should be destructible using the
 378  *         destructor given to ipcs_create )
 379  *
 380  *       ipc_commit_begin
 381  *       if retry
 382  *         goto top
 383  *       else if failure
 384  *         return
 385  *
 386  *       perform facility-specific resource control tests/allocations
 387  *       if failure
 388  *         ipc_cleanup
 389  *
 390  *       ipc_commit_end
 391  *       perform any infallible post-creation actions, unlock, and return
 392  *
 393  *     }
 394  *
 395  * ipc_rmid
 396  *
 397  *   Performs the common portion of an RMID operation -- looks up an ID
 398  *   removes it, and calls the a facility-specific function to do
 399  *   RMID-time cleanup on the private portions of the object.
 400  *
 401  * ipc_ids
 402  *
 403  *   Performs the common portion of an IDS operation.
 404  *
 405  */
 406 
 407 #include <sys/types.h>
 408 #include <sys/param.h>
 409 #include <sys/cred.h>
 410 #include <sys/policy.h>
 411 #include <sys/proc.h>
 412 #include <sys/user.h>
 413 #include <sys/ipc.h>
 414 #include <sys/ipc_impl.h>
 415 #include <sys/errno.h>
 416 #include <sys/systm.h>
 417 #include <sys/list.h>
 418 #include <sys/atomic.h>
 419 #include <sys/zone.h>
 420 #include <sys/task.h>
 421 #include <sys/modctl.h>
 422 
 423 #include <c2/audit.h>
 424 
 425 static struct modlmisc modlmisc = {
 426         &mod_miscops,
 427         "common ipc code",
 428 };
 429 
 430 static struct modlinkage modlinkage = {
 431         MODREV_1, (void *)&modlmisc, NULL
 432 };
 433 
 434 
 435 int
 436 _init(void)
 437 {
 438         return (mod_install(&modlinkage));
 439 }
 440 
 441 int
 442 _fini(void)
 443 {
 444         return (mod_remove(&modlinkage));
 445 }
 446 
 447 int
 448 _info(struct modinfo *modinfop)
 449 {
 450         return (mod_info(&modlinkage, modinfop));
 451 }
 452 
 453 
 454 /*
 455  * Check message, semaphore, or shared memory access permissions.
 456  *
 457  * This routine verifies the requested access permission for the current
 458  * process.  The zone ids are compared, and the appropriate bits are
 459  * checked corresponding to owner, group (including the list of
 460  * supplementary groups), or everyone.  Zero is returned on success.
 461  * On failure, the security policy is asked to check to override the
 462  * permissions check; the policy will either return 0 for access granted
 463  * or EACCES.
 464  *
 465  * Access to objects in other zones requires that the caller be in the
 466  * global zone and have the appropriate IPC_DAC_* privilege, regardless
 467  * of whether the uid or gid match those of the object.  Note that
 468  * cross-zone accesses will normally never get here since they'll
 469  * fail in ipc_lookup or ipc_get.
 470  *
 471  * The arguments must be set up as follows:
 472  *      p - Pointer to permission structure to verify
 473  *      mode - Desired access permissions
 474  */
 475 int
 476 ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
 477 {
 478         int shifts = 0;
 479         uid_t uid = crgetuid(cr);
 480         zoneid_t zoneid = getzoneid();
 481 
 482         if (p->ipc_zoneid == zoneid) {
 483                 if (uid != p->ipc_uid && uid != p->ipc_cuid) {
 484                         shifts += 3;
 485                         if (!groupmember(p->ipc_gid, cr) &&
 486                             !groupmember(p->ipc_cgid, cr))
 487                                 shifts += 3;
 488                 }
 489 
 490                 mode &= ~(p->ipc_mode << shifts);
 491 
 492                 if (mode == 0)
 493                         return (0);
 494         } else if (zoneid != GLOBAL_ZONEID)
 495                 return (EACCES);
 496 
 497         return (secpolicy_ipc_access(cr, p, mode));
 498 }
 499 
 500 /*
 501  * There are two versions of the ipcperm_set/stat functions:
 502  *   ipcperm_???        - for use with IPC_SET/STAT
 503  *   ipcperm_???_64     - for use with IPC_SET64/STAT64
 504  *
 505  * These functions encapsulate the common portions (copying, permission
 506  * checks, and auditing) of the set/stat operations.  All, except for
 507  * stat and stat_64 which are void, return 0 on success or a non-zero
 508  * errno value on error.
 509  */
 510 
 511 int
 512 ipcperm_set(ipc_service_t *service, struct cred *cr,
 513     kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
 514 {
 515         STRUCT_HANDLE(ipc_perm, lperm);
 516         uid_t uid;
 517         gid_t gid;
 518         mode_t mode;
 519         zone_t *zone;
 520 
 521         ASSERT(IPC_LOCKED(service, kperm));
 522 
 523         STRUCT_SET_HANDLE(lperm, model, perm);
 524         uid = STRUCT_FGET(lperm, uid);
 525         gid = STRUCT_FGET(lperm, gid);
 526         mode = STRUCT_FGET(lperm, mode);
 527 
 528         if (secpolicy_ipc_owner(cr, kperm) != 0)
 529                 return (EPERM);
 530 
 531         zone = crgetzone(cr);
 532         if (!VALID_UID(uid, zone) || !VALID_GID(gid, zone))
 533                 return (EINVAL);
 534 
 535         kperm->ipc_uid = uid;
 536         kperm->ipc_gid = gid;
 537         kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
 538 
 539         if (AU_AUDITING())
 540                 audit_ipcget(service->ipcs_atype, kperm);
 541 
 542         return (0);
 543 }
 544 
 545 void
 546 ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
 547 {
 548         STRUCT_HANDLE(ipc_perm, lperm);
 549 
 550         STRUCT_SET_HANDLE(lperm, model, perm);
 551         STRUCT_FSET(lperm, uid, kperm->ipc_uid);
 552         STRUCT_FSET(lperm, gid, kperm->ipc_gid);
 553         STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
 554         STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
 555         STRUCT_FSET(lperm, mode, kperm->ipc_mode);
 556         STRUCT_FSET(lperm, seq, 0);
 557         STRUCT_FSET(lperm, key, kperm->ipc_key);
 558 }
 559 
 560 int
 561 ipcperm_set64(ipc_service_t *service, struct cred *cr,
 562     kipc_perm_t *kperm, ipc_perm64_t *perm64)
 563 {
 564         zone_t *zone;
 565 
 566         ASSERT(IPC_LOCKED(service, kperm));
 567 
 568         if (secpolicy_ipc_owner(cr, kperm) != 0)
 569                 return (EPERM);
 570 
 571         zone = crgetzone(cr);
 572         if (!VALID_UID(perm64->ipcx_uid, zone) ||
 573             !VALID_GID(perm64->ipcx_gid, zone))
 574                 return (EINVAL);
 575 
 576         kperm->ipc_uid = perm64->ipcx_uid;
 577         kperm->ipc_gid = perm64->ipcx_gid;
 578         kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
 579             (kperm->ipc_mode & ~0777);
 580 
 581         if (AU_AUDITING())
 582                 audit_ipcget(service->ipcs_atype, kperm);
 583 
 584         return (0);
 585 }
 586 
 587 void
 588 ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
 589 {
 590         perm64->ipcx_uid = kperm->ipc_uid;
 591         perm64->ipcx_gid = kperm->ipc_gid;
 592         perm64->ipcx_cuid = kperm->ipc_cuid;
 593         perm64->ipcx_cgid = kperm->ipc_cgid;
 594         perm64->ipcx_mode = kperm->ipc_mode;
 595         perm64->ipcx_key = kperm->ipc_key;
 596         perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
 597         perm64->ipcx_zoneid = kperm->ipc_zoneid;
 598 }
 599 
 600 
 601 /*
 602  * ipc key comparator.
 603  */
 604 static int
 605 ipc_key_compar(const void *a, const void *b)
 606 {
 607         kipc_perm_t *aperm = (kipc_perm_t *)a;
 608         kipc_perm_t *bperm = (kipc_perm_t *)b;
 609         int ak = aperm->ipc_key;
 610         int bk = bperm->ipc_key;
 611         zoneid_t az;
 612         zoneid_t bz;
 613 
 614         ASSERT(ak != IPC_PRIVATE);
 615         ASSERT(bk != IPC_PRIVATE);
 616 
 617         /*
 618          * Compare key first, then zoneid.  This optimizes performance for
 619          * systems with only one zone, since the zone checks will only be
 620          * made when the keys match.
 621          */
 622         if (ak < bk)
 623                 return (-1);
 624         if (ak > bk)
 625                 return (1);
 626 
 627         /* keys match */
 628         az = aperm->ipc_zoneid;
 629         bz = bperm->ipc_zoneid;
 630         if (az < bz)
 631                 return (-1);
 632         if (az > bz)
 633                 return (1);
 634         return (0);
 635 }
 636 
 637 /*
 638  * Create an ipc service.
 639  */
 640 ipc_service_t *
 641 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
 642     size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
 643     size_t rctl_offset)
 644 {
 645         ipc_service_t *result;
 646 
 647         result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
 648 
 649         mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
 650         result->ipcs_count = 0;
 651         avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
 652         result->ipcs_tabsz = IPC_IDS_MIN;
 653         result->ipcs_table =
 654             kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
 655         result->ipcs_ssize = size;
 656         result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
 657         result->ipcs_dtor = dtor;
 658         result->ipcs_rmid = rmid;
 659         result->ipcs_proj_rctl = proj_rctl;
 660         result->ipcs_zone_rctl = zone_rctl;
 661         result->ipcs_atype = audit_type;
 662         ASSERT(rctl_offset < sizeof (ipc_rqty_t));
 663         result->ipcs_rctlofs = rctl_offset;
 664         list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
 665             offsetof(kipc_perm_t, ipc_list));
 666 
 667         return (result);
 668 }
 669 
 670 /*
 671  * Destroy an ipc service.
 672  */
 673 void
 674 ipcs_destroy(ipc_service_t *service)
 675 {
 676         ipc_slot_t *slot, *next;
 677 
 678         mutex_enter(&service->ipcs_lock);
 679 
 680         ASSERT(service->ipcs_count == 0);
 681         avl_destroy(&service->ipcs_keys);
 682         list_destroy(&service->ipcs_usedids);
 683         id_space_destroy(service->ipcs_ids);
 684 
 685         for (slot = service->ipcs_table; slot; slot = next) {
 686                 next = slot[0].ipct_chain;
 687                 kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
 688                 service->ipcs_tabsz >>= 1;
 689         }
 690 
 691         mutex_destroy(&service->ipcs_lock);
 692         kmem_free(service, sizeof (ipc_service_t));
 693 }
 694 
 695 /*
 696  * Takes the service lock.
 697  */
 698 void
 699 ipcs_lock(ipc_service_t *service)
 700 {
 701         mutex_enter(&service->ipcs_lock);
 702 }
 703 
 704 /*
 705  * Releases the service lock.
 706  */
 707 void
 708 ipcs_unlock(ipc_service_t *service)
 709 {
 710         mutex_exit(&service->ipcs_lock);
 711 }
 712 
 713 
 714 /*
 715  * Locks the specified ID.  Returns the ID's ID table index.
 716  */
 717 static int
 718 ipc_lock_internal(ipc_service_t *service, uint_t id)
 719 {
 720         uint_t  tabsz;
 721         uint_t  index;
 722         kmutex_t *mutex;
 723 
 724         for (;;) {
 725                 tabsz = service->ipcs_tabsz;
 726                 membar_consumer();
 727                 index = id & (tabsz - 1);
 728                 mutex = &service->ipcs_table[index].ipct_lock;
 729                 mutex_enter(mutex);
 730                 if (tabsz == service->ipcs_tabsz)
 731                         break;
 732                 mutex_exit(mutex);
 733         }
 734 
 735         return (index);
 736 }
 737 
 738 /*
 739  * Locks the specified ID.  Returns a pointer to the ID's lock.
 740  */
 741 kmutex_t *
 742 ipc_lock(ipc_service_t *service, int id)
 743 {
 744         uint_t index;
 745 
 746         /*
 747          * These assertions don't reflect requirements of the code
 748          * which follows, but they should never fail nonetheless.
 749          */
 750         ASSERT(id >= 0);
 751         ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
 752         index = ipc_lock_internal(service, id);
 753 
 754         return (&service->ipcs_table[index].ipct_lock);
 755 }
 756 
 757 /*
 758  * Checks to see if the held lock provided is the current lock for the
 759  * specified id.  If so, we return it instead of dropping it and
 760  * returning the result of ipc_lock.  This is intended to speed up cv
 761  * wakeups where we are left holding a lock which could be stale, but
 762  * probably isn't.
 763  */
 764 kmutex_t *
 765 ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
 766 {
 767         ASSERT(id >= 0);
 768         ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
 769         ASSERT(MUTEX_HELD(lock));
 770 
 771         if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
 772                 return (lock);
 773 
 774         mutex_exit(lock);
 775         return (ipc_lock(service, id));
 776 }
 777 
 778 /*
 779  * Performs an ID lookup.  If the ID doesn't exist or has been removed,
 780  * or isn't visible to the caller (because of zones), NULL is returned.
 781  * Otherwise, a pointer to the ID's perm structure and held ID lock are
 782  * returned.
 783  */
 784 kmutex_t *
 785 ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
 786 {
 787         kipc_perm_t *result;
 788         uint_t index;
 789 
 790         /*
 791          * There is no need to check to see if id is in-range (i.e.
 792          * positive and fits into the table).  If it is out-of-range,
 793          * the id simply won't match the object's.
 794          */
 795 
 796         index = ipc_lock_internal(service, id);
 797         result = service->ipcs_table[index].ipct_data;
 798         if (result == NULL || result->ipc_id != (uint_t)id ||
 799             !HASZONEACCESS(curproc, result->ipc_zoneid)) {
 800                 mutex_exit(&service->ipcs_table[index].ipct_lock);
 801                 return (NULL);
 802         }
 803 
 804         ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
 805 
 806         *perm = result;
 807         if (AU_AUDITING())
 808                 audit_ipc(service->ipcs_atype, id, result);
 809 
 810         return (&service->ipcs_table[index].ipct_lock);
 811 }
 812 
 813 /*
 814  * Increase the reference count on an ID.
 815  */
 816 /*ARGSUSED*/
 817 void
 818 ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
 819 {
 820         ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 821         ASSERT(IPC_LOCKED(s, perm));
 822         perm->ipc_ref++;
 823 }
 824 
 825 /*
 826  * Decrease the reference count on an ID and drops the ID's lock.
 827  * Destroys the ID if the new reference count is zero.
 828  */
 829 void
 830 ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
 831 {
 832         int nref;
 833 
 834         ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 835         ASSERT(IPC_LOCKED(s, perm));
 836         ASSERT(perm->ipc_ref > 0);
 837 
 838         nref = --perm->ipc_ref;
 839         mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
 840 
 841         if (nref == 0) {
 842                 ASSERT(IPC_FREE(perm));         /* ipc_rmid clears IPC_ALLOC */
 843                 s->ipcs_dtor(perm);
 844                 project_rele(perm->ipc_proj);
 845                 zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
 846                 kmem_free(perm, s->ipcs_ssize);
 847         }
 848 }
 849 
 850 /*
 851  * Decrease the reference count on an ID, but don't drop the ID lock.
 852  * Used in cases where one thread needs to remove many references (on
 853  * behalf of other parties).
 854  */
 855 void
 856 ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
 857 {
 858         ASSERT(perm->ipc_ref > 1);
 859         ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
 860         ASSERT(IPC_LOCKED(s, perm));
 861 
 862         perm->ipc_ref--;
 863 }
 864 
 865 
 866 /*
 867  * Internal function to grow the service ID table.
 868  */
 869 static int
 870 ipc_grow(ipc_service_t *service)
 871 {
 872         ipc_slot_t *new, *old;
 873         int i, oldsize, newsize;
 874 
 875         ASSERT(MUTEX_HELD(&service->ipcs_lock));
 876         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
 877 
 878         if (service->ipcs_tabsz == IPC_IDS_MAX)
 879                 return (ENOSPC);
 880 
 881         oldsize = service->ipcs_tabsz;
 882         newsize = oldsize << 1;
 883         new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
 884         if (new == NULL)
 885                 return (ENOSPC);
 886 
 887         old = service->ipcs_table;
 888         for (i = 0; i < oldsize; i++) {
 889                 mutex_enter(&old[i].ipct_lock);
 890                 mutex_enter(&new[i].ipct_lock);
 891 
 892                 new[i].ipct_seq = old[i].ipct_seq;
 893                 new[i].ipct_data = old[i].ipct_data;
 894                 old[i].ipct_data = NULL;
 895         }
 896 
 897         new[0].ipct_chain = old;
 898         service->ipcs_table = new;
 899         membar_producer();
 900         service->ipcs_tabsz = newsize;
 901 
 902         for (i = 0; i < oldsize; i++) {
 903                 mutex_exit(&old[i].ipct_lock);
 904                 mutex_exit(&new[i].ipct_lock);
 905         }
 906 
 907         id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
 908 
 909         return (0);
 910 }
 911 
 912 
 913 static int
 914 ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
 915 {
 916         kipc_perm_t *perm = NULL;
 917         avl_index_t where;
 918         kipc_perm_t template;
 919 
 920         ASSERT(MUTEX_HELD(&service->ipcs_lock));
 921 
 922         template.ipc_key = key;
 923         template.ipc_zoneid = getzoneid();
 924         if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
 925                 ASSERT(!IPC_FREE(perm));
 926                 if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
 927                         return (EEXIST);
 928                 if ((flag & 0777) & ~perm->ipc_mode) {
 929                         if (AU_AUDITING())
 930                                 audit_ipcget(NULL, (void *)perm);
 931                         return (EACCES);
 932                 }
 933                 *permp = perm;
 934                 return (0);
 935         } else if (flag & IPC_CREAT) {
 936                 *permp = NULL;
 937                 return (0);
 938         }
 939         return (ENOENT);
 940 }
 941 
 942 static int
 943 ipc_alloc_test(ipc_service_t *service, proc_t *pp)
 944 {
 945         ASSERT(MUTEX_HELD(&service->ipcs_lock));
 946 
 947         /*
 948          * Resizing the table first would result in a cleaner code
 949          * path, but would also allow a user to (permanently) double
 950          * the id table size in cases where the allocation would be
 951          * denied.  Hence we test the rctl first.
 952          */
 953 retry:
 954         mutex_enter(&pp->p_lock);
 955         if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
 956             pp, 1, RCA_SAFE) & RCT_DENY) ||
 957             (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
 958             pp, 1, RCA_SAFE) & RCT_DENY)) {
 959                 mutex_exit(&pp->p_lock);
 960                 return (ENOSPC);
 961         }
 962 
 963         if (service->ipcs_count == service->ipcs_tabsz) {
 964                 int error;
 965 
 966                 mutex_exit(&pp->p_lock);
 967                 if (error = ipc_grow(service))
 968                         return (error);
 969                 goto retry;
 970         }
 971 
 972         return (0);
 973 }
 974 
 975 /*
 976  * Given a key, search for or create the associated identifier.
 977  *
 978  * If IPC_CREAT is specified and the key isn't found, or if the key is
 979  * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
 980  * allocated object structure in permp.  A pointer to the held service
 981  * lock is placed in lockp.  ipc_mode's IPC_ALLOC bit is clear.
 982  *
 983  * If the key is found and no error conditions arise, we return 0 and
 984  * place a pointer to the existing object structure in permp.  A
 985  * pointer to the held ID lock is placed in lockp.  ipc_mode's
 986  * IPC_ALLOC bit is set.
 987  *
 988  * Otherwise, a non-zero errno value is returned.
 989  */
 990 int
 991 ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
 992     kmutex_t **lockp)
 993 {
 994         kipc_perm_t     *perm = NULL;
 995         proc_t          *pp = curproc;
 996         int             error, index;
 997         cred_t          *cr = CRED();
 998 
 999         if (key != IPC_PRIVATE) {
1000 
1001                 mutex_enter(&service->ipcs_lock);
1002                 error = ipc_keylookup(service, key, flag, &perm);
1003                 if (perm != NULL)
1004                         index = ipc_lock_internal(service, perm->ipc_id);
1005                 mutex_exit(&service->ipcs_lock);
1006 
1007                 if (error) {
1008                         ASSERT(perm == NULL);
1009                         return (error);
1010                 }
1011 
1012                 if (perm) {
1013                         ASSERT(!IPC_FREE(perm));
1014                         *permp = perm;
1015                         *lockp = &service->ipcs_table[index].ipct_lock;
1016                         return (0);
1017                 }
1018 
1019                 /* Key not found; fall through */
1020         }
1021 
1022         perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
1023 
1024         mutex_enter(&service->ipcs_lock);
1025         if (error = ipc_alloc_test(service, pp)) {
1026                 mutex_exit(&service->ipcs_lock);
1027                 kmem_free(perm, service->ipcs_ssize);
1028                 return (error);
1029         }
1030 
1031         perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
1032         perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
1033         perm->ipc_zoneid = getzoneid();
1034         perm->ipc_mode = flag & 0777;
1035         perm->ipc_key = key;
1036         perm->ipc_ref = 1;
1037         perm->ipc_id = IPC_ID_INVAL;
1038         *permp = perm;
1039         *lockp = &service->ipcs_lock;
1040 
1041         return (0);
1042 }
1043 
1044 /*
1045  * Attempts to add the a newly created ID to the global namespace.  If
1046  * creating it would cause an error, we return the error.  If there is
1047  * the possibility that we could obtain the existing ID and return it
1048  * to the user, we return EAGAIN.  Otherwise, we return 0 with p_lock
1049  * and the service lock held.
1050  *
1051  * Since this should be only called after all initialization has been
1052  * completed, on failure we automatically invoke the destructor for the
1053  * object and deallocate the memory associated with it.
1054  */
1055 int
1056 ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
1057     kipc_perm_t *newperm)
1058 {
1059         kipc_perm_t *perm;
1060         int error;
1061         proc_t *pp = curproc;
1062 
1063         ASSERT(newperm->ipc_ref == 1);
1064         ASSERT(IPC_FREE(newperm));
1065 
1066         /*
1067          * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup()
1068          * clean up the necessary state.  This must be done before the
1069          * potential call to ipcs_dtor() below.
1070          */
1071         newperm->ipc_proj = pp->p_task->tk_proj;
1072         zone_init_ref(&newperm->ipc_zone_ref);
1073         zone_hold_ref(pp->p_zone, &newperm->ipc_zone_ref, ZONE_REF_IPC);
1074 
1075         mutex_enter(&service->ipcs_lock);
1076         /*
1077          * Ensure that no-one has raced with us and created the key.
1078          */
1079         if ((key != IPC_PRIVATE) &&
1080             (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
1081             (perm != NULL))) {
1082                 error = error ? error : EAGAIN;
1083                 goto errout;
1084         }
1085 
1086         /*
1087          * Ensure that no-one has raced with us and used the last of
1088          * the permissible ids, or the last of the free spaces in the
1089          * id table.
1090          */
1091         if (error = ipc_alloc_test(service, pp))
1092                 goto errout;
1093 
1094         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1095         ASSERT(MUTEX_HELD(&pp->p_lock));
1096 
1097         return (0);
1098 errout:
1099         mutex_exit(&service->ipcs_lock);
1100         service->ipcs_dtor(newperm);
1101         zone_rele_ref(&newperm->ipc_zone_ref, ZONE_REF_IPC);
1102         kmem_free(newperm, service->ipcs_ssize);
1103         return (error);
1104 }
1105 
1106 /*
1107  * Commit the ID allocation transaction.  Called with p_lock and the
1108  * service lock held, both of which are dropped.  Returns the held ID
1109  * lock so the caller can extract the ID and perform ipcget auditing.
1110  */
1111 kmutex_t *
1112 ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
1113 {
1114         ipc_slot_t *slot;
1115         avl_index_t where;
1116         int index;
1117         void *loc;
1118 
1119         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1120         ASSERT(MUTEX_HELD(&curproc->p_lock));
1121 
1122         (void) project_hold(perm->ipc_proj);
1123         mutex_exit(&curproc->p_lock);
1124 
1125         /*
1126          * Pick out our slot.
1127          */
1128         service->ipcs_count++;
1129         index = id_alloc(service->ipcs_ids);
1130         ASSERT(index < service->ipcs_tabsz);
1131         slot = &service->ipcs_table[index];
1132         mutex_enter(&slot->ipct_lock);
1133         ASSERT(slot->ipct_data == NULL);
1134 
1135         /*
1136          * Update the perm structure.
1137          */
1138         perm->ipc_mode |= IPC_ALLOC;
1139         perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
1140 
1141         /*
1142          * Push into global visibility.
1143          */
1144         slot->ipct_data = perm;
1145         if (perm->ipc_key != IPC_PRIVATE) {
1146                 loc = avl_find(&service->ipcs_keys, perm, &where);
1147                 ASSERT(loc == NULL);
1148                 avl_insert(&service->ipcs_keys, perm, where);
1149         }
1150         list_insert_head(&service->ipcs_usedids, perm);
1151 
1152         /*
1153          * Update resource consumption.
1154          */
1155         IPC_PROJ_USAGE(perm, service) += 1;
1156         IPC_ZONE_USAGE(perm, service) += 1;
1157 
1158         mutex_exit(&service->ipcs_lock);
1159         return (&slot->ipct_lock);
1160 }
1161 
1162 /*
1163  * Clean up function, in case the allocation fails.  If called between
1164  * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1165  * merely free the perm structure.  If called after ipc_commit_begin,
1166  * we also drop locks and call the ID's destructor.
1167  */
1168 void
1169 ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
1170 {
1171         ASSERT(IPC_FREE(perm));
1172         if (perm->ipc_proj) {
1173                 mutex_exit(&curproc->p_lock);
1174                 mutex_exit(&service->ipcs_lock);
1175                 service->ipcs_dtor(perm);
1176         }
1177         if (perm->ipc_zone_ref.zref_zone != NULL)
1178                 zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
1179         kmem_free(perm, service->ipcs_ssize);
1180 }
1181 
1182 
1183 /*
1184  * Common code to remove an IPC object.  This should be called after
1185  * all permissions checks have been performed, and with the service
1186  * and ID locked.  Note that this does not remove the object from
1187  * the ipcs_usedids list (this needs to be done by the caller before
1188  * dropping the service lock).
1189  */
1190 static void
1191 ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
1192 {
1193         int id = perm->ipc_id;
1194         int index;
1195 
1196         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1197         ASSERT(IPC_LOCKED(service, perm));
1198 
1199         index = IPC_INDEX(id);
1200 
1201         service->ipcs_table[index].ipct_data = NULL;
1202 
1203         if (perm->ipc_key != IPC_PRIVATE)
1204                 avl_remove(&service->ipcs_keys, perm);
1205         list_remove(&service->ipcs_usedids, perm);
1206         perm->ipc_mode &= ~IPC_ALLOC;
1207 
1208         id_free(service->ipcs_ids, index);
1209 
1210         if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
1211                 service->ipcs_table[index].ipct_seq = 0;
1212         service->ipcs_count--;
1213         ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
1214         ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
1215         IPC_PROJ_USAGE(perm, service) -= 1;
1216         IPC_ZONE_USAGE(perm, service) -= 1;
1217         ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
1218             (IPC_ZONE_USAGE(perm, service) == 0)));
1219 }
1220 
1221 /*
1222  * Perform actual IPC_RMID, either via ipc_rmid or due to a delayed *_RMID.
1223  */
1224 void
1225 ipc_rmsvc(ipc_service_t *service, kipc_perm_t *perm)
1226 {
1227         ASSERT(service->ipcs_count > 0);
1228         ASSERT(MUTEX_HELD(&service->ipcs_lock));
1229 
1230         ipc_remove(service, perm);
1231         mutex_exit(&service->ipcs_lock);
1232 
1233         /* perform any per-service removal actions */
1234         service->ipcs_rmid(perm);
1235 
1236         ipc_rele(service, perm);
1237 }
1238 
1239 /*
1240  * Common code to perform an IPC_RMID.  Returns an errno value on
1241  * failure, 0 on success.
1242  */
1243 int
1244 ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
1245 {
1246         kipc_perm_t *perm;
1247         kmutex_t *lock;
1248 
1249         mutex_enter(&service->ipcs_lock);
1250 
1251         lock = ipc_lookup(service, id, &perm);
1252         if (lock == NULL) {
1253                 mutex_exit(&service->ipcs_lock);
1254                 return (EINVAL);
1255         }
1256 
1257         ASSERT(service->ipcs_count > 0);
1258 
1259         if (secpolicy_ipc_owner(cr, perm) != 0) {
1260                 mutex_exit(lock);
1261                 mutex_exit(&service->ipcs_lock);
1262                 return (EPERM);
1263         }
1264 
1265         /*
1266          * Nothing can fail from this point on.
1267          */
1268         ipc_rmsvc(service, perm);
1269 
1270         return (0);
1271 }
1272 
1273 /*
1274  * Implementation for shmids, semids, and msgids.  buf is the address
1275  * of the user buffer, nids is the size, and pnids is a pointer to
1276  * where we write the actual number of ids that [would] have been
1277  * copied out.
1278  */
1279 int
1280 ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
1281 {
1282         kipc_perm_t *perm;
1283         size_t  idsize = 0;
1284         int     error = 0;
1285         int     idcount;
1286         int     *ids;
1287         int     numids = 0;
1288         zoneid_t zoneid = getzoneid();
1289         int     global = INGLOBALZONE(curproc);
1290 
1291         if (buf == NULL)
1292                 nids = 0;
1293 
1294         /*
1295          * Get an accurate count of the total number of ids, and allocate a
1296          * staging buffer.  Since ipcs_count is always sane, we don't have
1297          * to take ipcs_lock for our first guess.  If there are no ids, or
1298          * we're in the global zone and the number of ids is greater than
1299          * the size of the specified buffer, we shunt to the end.  Otherwise,
1300          * we go through the id list looking for (and counting) what is
1301          * visible in the specified zone.
1302          */
1303         idcount = service->ipcs_count;
1304         for (;;) {
1305                 if ((global && idcount > nids) || idcount == 0) {
1306                         numids = idcount;
1307                         nids = 0;
1308                         goto out;
1309                 }
1310 
1311                 idsize = idcount * sizeof (int);
1312                 ids = kmem_alloc(idsize, KM_SLEEP);
1313 
1314                 mutex_enter(&service->ipcs_lock);
1315                 if (idcount >= service->ipcs_count)
1316                         break;
1317                 idcount = service->ipcs_count;
1318                 mutex_exit(&service->ipcs_lock);
1319 
1320                 if (idsize != 0) {
1321                         kmem_free(ids, idsize);
1322                         idsize = 0;
1323                 }
1324         }
1325 
1326         for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1327             perm = list_next(&service->ipcs_usedids, perm)) {
1328                 ASSERT(!IPC_FREE(perm));
1329                 if (global || perm->ipc_zoneid == zoneid)
1330                         ids[numids++] = perm->ipc_id;
1331         }
1332         mutex_exit(&service->ipcs_lock);
1333 
1334         /*
1335          * If there isn't enough space to hold all of the ids, just
1336          * return the number of ids without copying out any of them.
1337          */
1338         if (nids < numids)
1339                 nids = 0;
1340 
1341 out:
1342         if (suword32(pnids, (uint32_t)numids) ||
1343             (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
1344                 error = EFAULT;
1345         if (idsize != 0)
1346                 kmem_free(ids, idsize);
1347         return (error);
1348 }
1349 
1350 /*
1351  * Destroy IPC objects from the given service that are associated with
1352  * the given zone.
1353  *
1354  * We can't hold on to the service lock when freeing objects, so we
1355  * first search the service and move all the objects to a private
1356  * list, then walk through and free them after dropping the lock.
1357  */
1358 void
1359 ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
1360 {
1361         kipc_perm_t *perm, *next;
1362         list_t rmlist;
1363         kmutex_t *lock;
1364 
1365         list_create(&rmlist, sizeof (kipc_perm_t),
1366             offsetof(kipc_perm_t, ipc_list));
1367 
1368         mutex_enter(&service->ipcs_lock);
1369         for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1370             perm = next) {
1371                 next = list_next(&service->ipcs_usedids, perm);
1372                 if (perm->ipc_zoneid != zoneid)
1373                         continue;
1374 
1375                 /*
1376                  * Remove the object from the service, then put it on
1377                  * the removal list so we can defer the call to
1378                  * ipc_rele (which will actually free the structure).
1379                  * We need to do this since the destructor may grab
1380                  * the service lock.
1381                  */
1382                 ASSERT(!IPC_FREE(perm));
1383                 lock = ipc_lock(service, perm->ipc_id);
1384                 ipc_remove(service, perm);
1385                 mutex_exit(lock);
1386                 list_insert_tail(&rmlist, perm);
1387         }
1388         mutex_exit(&service->ipcs_lock);
1389 
1390         /*
1391          * Now that we've dropped the service lock, loop through the
1392          * private list freeing removed objects.
1393          */
1394         for (perm = list_head(&rmlist); perm != NULL; perm = next) {
1395                 next = list_next(&rmlist, perm);
1396                 list_remove(&rmlist, perm);
1397 
1398                 (void) ipc_lock(service, perm->ipc_id);
1399 
1400                 /* perform any per-service removal actions */
1401                 service->ipcs_rmid(perm);
1402 
1403                 /* release reference */
1404                 ipc_rele(service, perm);
1405         }
1406 
1407         list_destroy(&rmlist);
1408 }