1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29
30 /*
31 * Common Inter-Process Communication routines.
32 *
33 * Overview
34 * --------
35 *
36 * The System V inter-process communication (IPC) facilities provide
37 * three services, message queues, semaphore arrays, and shared memory
38 * segments, which are mananged using filesystem-like namespaces.
39 * Unlike a filesystem, these namespaces aren't mounted and accessible
40 * via a path -- a special API is used to interact with the different
41 * facilities (nothing precludes a VFS-based interface, but the
42 * standards require the special APIs). Furthermore, these special
43 * APIs don't use file descriptors, nor do they have an equivalent.
44 * This means that every operation which acts on an object needs to
45 * perform the quivalent of a lookup, which in turn means that every
46 * operation can fail if the specified object doesn't exist in the
47 * facility's namespace.
48 *
49 * Objects
50 * -------
51 *
52 * Each object in a namespace has a unique ID, which is assigned by the
53 * system and is used to identify the object when performing operations
54 * on it. An object can also have a key, which is selected by the user
55 * at allocation time and is used as a primitive rendezvous mechanism.
56 * An object without a key is said to have a "private" key.
57 *
58 * To perform an operation on an object given its key, one must first
59 * perform a lookup and obtain its ID. The ID is then used to identify
60 * the object when performing the operation. If the object has a
61 * private key, the ID must be known or obtained by other means.
62 *
63 * Each object in the namespace has a creator uid and gid, as well as
64 * an owner uid and gid. Both are initialized with the ruid and rgid
65 * of the process which created the object. The creator or current
66 * owner has the ability to change the owner of the object.
67 *
68 * Each object in the namespace has a set of file-like permissions,
69 * which, in conjunction with the creator and owner uid and gid,
70 * control read and write access to the object (execute is ignored).
71 *
72 * Each object also has a creator project and zone, which are used to
73 * account for its resource usage.
74 *
75 * Operations
76 * ----------
77 *
78 * There are five operations which all three facilities have in
79 * common: GET, SET, STAT, RMID, and IDS.
80 *
81 * GET, like open, is used to allocate a new object or obtain an
82 * existing one (using its key). It takes a key, a set of flags and
83 * mode bits, and optionally facility-specific arguments. If the key
84 * is IPC_PRIVATE, a new object with the requested mode bits and
85 * facility-specific attributes is created. If the key isn't
86 * IPC_PRIVATE, the GET will attempt to look up the specified key and
87 * either return that or create a new key depending on the state of the
88 * IPC_CREAT and IPC_EXCL flags, much like open. If GET needs to
89 * allocate an object, it can fail if there is insufficient space in
90 * the namespace (the maximum number of ids for the facility has been
91 * exceeded) or if the facility-specific initialization fails. If GET
92 * finds an object it can return, it can still fail if that object's
93 * permissions or facility-specific attributes are less than those
94 * requested.
95 *
96 * SET is used to adjust facility-specific parameters of an object, in
97 * addition to the owner uid and gid, and mode bits. It can fail if
98 * the caller isn't the creator or owner.
99 *
100 * STAT is used to obtain information about an object including the
101 * general attributes object described as well as facility-specific
102 * information. It can fail if the caller doesn't have read
103 * permission.
104 *
105 * RMID removes an object from the namespace. Subsequent operations
106 * using the object's ID or key will fail (until another object is
107 * created with the same key or ID). Since an RMID may be performed
108 * asynchronously with other operations, it is possible that other
109 * threads and/or processes will have references to the object. While
110 * a facility may have actions which need to be performed at RMID time,
111 * only when all references are dropped can the object be destroyed.
112 * RMID will fail if the caller isn't the creator or owner.
113 *
114 * IDS obtains a list of all IDs in a facility's namespace. There are
115 * no facility-specific behaviors of IDS.
116 *
117 * Design
118 * ------
119 *
120 * Because some IPC facilities provide services whose operations must
121 * scale, a mechanism which allows fast, concurrent access to
122 * individual objects is needed. Of primary importance is object
123 * lookup based on ID (SET, STAT, others). Allocation (GET),
124 * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
125 * lesser concerns, but should be implemented in such a way that ID
126 * lookup isn't affected (at least not in the common case).
127 *
128 * Starting from the bottom up, each object is represented by a
129 * structure, the first member of which must be a kipc_perm_t. The
130 * kipc_perm_t contains the information described above in "Objects", a
131 * reference count (since the object may continue to exist after it has
132 * been removed from the namespace), as well as some additional
133 * metadata used to manage data structure membership. These objects
134 * are dynamically allocated.
135 *
136 * Above the objects is a power-of-two sized table of ID slots. Each
137 * slot contains a pointer to an object, a sequence number, and a
138 * lock. An object's ID is a function of its slot's index in the table
139 * and its slot's sequence number. Every time a slot is released (via
140 * RMID) its sequence number is increased. Strictly speaking, the
141 * sequence number is unnecessary. However, checking the sequence
142 * number after a lookup provides a certain degree of robustness
143 * against the use of stale IDs (useful since nothing else does). When
144 * the table fills up, it is resized (see Locking, below).
145 *
146 * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
147 * int) the top IPC_SEQ_BITS are used for the sequence number with the
148 * remainder holding the index into the table. The size of the table
149 * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
150 *
151 * Managing this table is the ipc_service structure. It contains a
152 * pointer to the dynamically allocated ID table, a namespace-global
153 * lock, an id_space for managing the free space in the table, and
154 * sundry other metadata necessary for the maintenance of the
155 * namespace. An AVL tree of all keyed objects in the table (sorted by
156 * key) is used for key lookups. An unordered doubly linked list of
157 * all objects in the namespace (keyed or not) is maintained to
158 * facilitate ID enumeration.
159 *
160 * To help visualize these relationships, here's a picture of a
161 * namespace with a table of size 8 containing three objects
162 * (IPC_SEQ_BITS = 28):
163 *
164 *
165 * +-ipc_service_t--+
166 * | table *---\
167 * | keys *---+----------------------\
168 * | all ids *--\| |
169 * | | || |
170 * +----------------+ || |
171 * || |
172 * /-------------------/| |
173 * | /---------------/ |
174 * | | |
175 * | v |
176 * | +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
177 * | | Seq=3 | | | Seq=1 | : | | | Seq=6 |
178 * | | | | | | : | | | |
179 * | +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
180 * | | | | |
181 * | | /---/ | /----------------/
182 * | | | | |
183 * | v v | v
184 * | +-kipc_perm_t-+ +-kipc_perm_t-+ | +-kipc_perm_t-+
185 * | | id=0x30 | | id=0x13 | | | id=0x67 |
186 * | | key=0xfeed | | key=0xbeef | | | key=0xcafe |
187 * \->| [list] |<------>| [list] |<------>| [list] |
188 * /->| [avl left] x /--->| [avl left] x \--->| [avl left] *---\
189 * | | [avl right] x | | [avl right] x | [avl right] *---+-\
190 * | | | | | | | | | |
191 * | +-------------+ | +-------------+ +-------------+ | |
192 * | \---------------------------------------------/ |
193 * \--------------------------------------------------------------------/
194 *
195 * Locking
196 * -------
197 *
198 * There are three locks (or sets of locks) which are used to ensure
199 * correctness: the slot locks, the namespace lock, and p_lock (needed
200 * when checking resource controls). Their ordering is
201 *
202 * namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
203 *
204 * Generally speaking, the namespace lock is used to protect allocation
205 * and removal from the namespace, ID enumeration, and resizing the ID
206 * table. Specifically:
207 *
208 * - write access to all fields of the ipc_service structure
209 * - read access to all variable fields of ipc_service except
210 * ipcs_tabsz (table size) and ipcs_table (the table pointer)
211 * - read/write access to ipc_avl, ipc_list in visible objects'
212 * kipc_perm structures (i.e. objects which have been removed from
213 * the namespace don't have this restriction)
214 * - write access to ipct_seq and ipct_data in the table entries
215 *
216 * A slot lock by itself is meaningless (except when resizing). Of
217 * greater interest conceptually is the notion of an ID lock -- a
218 * "virtual lock" which refers to whichever slot lock an object's ID
219 * currently hashes to.
220 *
221 * An ID lock protects all objects with that ID. Normally there will
222 * only be one such object: the one pointed to by the locked slot.
223 * However, if an object is removed from the namespace but retains
224 * references (e.g. an attached shared memory segment which has been
225 * RMIDed), it continues to use the lock associated with its original
226 * ID. While this can result in increased contention, operations which
227 * require taking the ID lock of removed objects are infrequent.
228 *
229 * Specifically, an ID lock protects the contents of an object's
230 * structure, including the contents of the embedded kipc_perm
231 * structure (but excluding those fields protected by the namespace
232 * lock). It also protects the ipct_seq and ipct_data fields in its
233 * slot (it is really a slot lock, after all).
234 *
235 * Recall that the table is resizable. To avoid requiring every ID
236 * lookup to take a global lock, a scheme much like that employed for
237 * file descriptors (see the comment above UF_ENTER in user.h) is
238 * used. Note that the sequence number and data pointer are protected
239 * by both the namespace lock and their slot lock. When the table is
240 * resized, the following operations take place:
241 *
242 * 1) A new table is allocated.
243 * 2) The global lock is taken.
244 * 3) All old slots are locked, in order.
245 * 4) The first half of the new slots are locked.
246 * 5) All table entries are copied to the new table, and cleared from
247 * the old table.
248 * 6) The ipc_service structure is updated to point to the new table.
249 * 7) The ipc_service structure is updated with the new table size.
250 * 8) All slot locks (old and new) are dropped.
251 *
252 * Because the slot locks are embedded in the table, ID lookups and
253 * other operations which require taking an slot lock need to verify
254 * that the lock taken wasn't part of a stale table. This is
255 * accomplished by checking the table size before and after
256 * dereferencing the table pointer and taking the lock: if the size
257 * changes, the lock must be dropped and reacquired. It is this
258 * additional work which distinguishes an ID lock from a slot lock.
259 *
260 * Because we can't guarantee that threads aren't accessing the old
261 * tables' locks, they are never deallocated. To prevent spurious
262 * reports of memory leaks, a pointer to the discarded table is stored
263 * in the new one in step 5. (Theoretically ipcs_destroy will delete
264 * the discarded tables, but it is only ever called from a failed _init
265 * invocation; i.e. when there aren't any.)
266 *
267 * Interfaces
268 * ----------
269 *
270 * The following interfaces are provided by the ipc module for use by
271 * the individual IPC facilities:
272 *
273 * ipcperm_access
274 *
275 * Given an object and a cred structure, determines if the requested
276 * access type is allowed.
277 *
278 * ipcperm_set, ipcperm_stat,
279 * ipcperm_set64, ipcperm_stat64
280 *
281 * Performs the common portion of an STAT or SET operation. All
282 * (except stat and stat64) can fail, so they should be called before
283 * any facility-specific non-reversible changes are made to an
284 * object. Similarly, the set operations have side effects, so they
285 * should only be called once the possibility of a facility-specific
286 * failure is eliminated.
287 *
288 * ipcs_create
289 *
290 * Creates an IPC namespace for use by an IPC facility.
291 *
292 * ipcs_destroy
293 *
294 * Destroys an IPC namespace.
295 *
296 * ipcs_lock, ipcs_unlock
297 *
298 * Takes the namespace lock. Ideally such access wouldn't be
299 * necessary, but there may be facility-specific data protected by
300 * this lock (e.g. project-wide resource consumption).
301 *
302 * ipc_lock
303 *
304 * Takes the lock associated with an ID. Can't fail.
305 *
306 * ipc_relock
307 *
308 * Like ipc_lock, but takes a pointer to a held lock. Drops the lock
309 * unless it is the one that would have been returned by ipc_lock.
310 * Used after calls to cv_wait.
311 *
312 * ipc_lookup
313 *
314 * Performs an ID lookup, returns with the ID lock held. Fails if
315 * the ID doesn't exist in the namespace.
316 *
317 * ipc_hold
318 *
319 * Takes a reference on an object.
320 *
321 * ipc_rele
322 *
323 * Releases a reference on an object, and drops the object's lock.
324 * Calls the object's destructor if last reference is being
325 * released.
326 *
327 * ipc_rele_locked
328 *
329 * Releases a reference on an object. Doesn't drop lock, and may
330 * only be called when there is more than one reference to the
331 * object.
332 *
333 * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
334 *
335 * Components of a GET operation. ipc_get performs a key lookup,
336 * allocating an object if the key isn't found (returning with the
337 * namespace lock and p_lock held), and returning the existing object
338 * if it is (with the object lock held). ipc_get doesn't modify the
339 * namespace.
340 *
341 * ipc_commit_begin begins the process of inserting an object
342 * allocated by ipc_get into the namespace, and can fail. If
343 * successful, it returns with the namespace lock and p_lock held.
344 * ipc_commit_end completes the process of inserting an object into
345 * the namespace and can't fail. The facility can call ipc_cleanup
346 * at any time following a successful ipc_get and before
347 * ipc_commit_end or a failed ipc_commit_begin to fail the
348 * allocation. Pseudocode for the suggested GET implementation:
349 *
350 * top:
351 *
352 * ipc_get
353 *
354 * if failure
355 * return
356 *
357 * if found {
358 *
359 * if object meets criteria
360 * unlock object and return success
361 * else
362 * unlock object and return failure
363 *
364 * } else {
365 *
366 * perform resource control tests
367 * drop namespace lock, p_lock
368 * if failure
369 * ipc_cleanup
370 *
371 * perform facility-specific initialization
372 * if failure {
373 * facility-specific cleanup
374 * ipc_cleanup
375 * }
376 *
377 * ( At this point the object should be destructible using the
378 * destructor given to ipcs_create )
379 *
380 * ipc_commit_begin
381 * if retry
382 * goto top
383 * else if failure
384 * return
385 *
386 * perform facility-specific resource control tests/allocations
387 * if failure
388 * ipc_cleanup
389 *
390 * ipc_commit_end
391 * perform any infallible post-creation actions, unlock, and return
392 *
393 * }
394 *
395 * ipc_rmid
396 *
397 * Performs the common portion of an RMID operation -- looks up an ID
398 * removes it, and calls the a facility-specific function to do
399 * RMID-time cleanup on the private portions of the object.
400 *
401 * ipc_ids
402 *
403 * Performs the common portion of an IDS operation.
404 *
405 */
406
407 #include <sys/types.h>
408 #include <sys/param.h>
409 #include <sys/cred.h>
410 #include <sys/policy.h>
411 #include <sys/proc.h>
412 #include <sys/user.h>
413 #include <sys/ipc.h>
414 #include <sys/ipc_impl.h>
415 #include <sys/errno.h>
416 #include <sys/systm.h>
417 #include <sys/list.h>
418 #include <sys/atomic.h>
419 #include <sys/zone.h>
420 #include <sys/task.h>
421 #include <sys/modctl.h>
422
423 #include <c2/audit.h>
424
425 static struct modlmisc modlmisc = {
426 &mod_miscops,
427 "common ipc code",
428 };
429
430 static struct modlinkage modlinkage = {
431 MODREV_1, (void *)&modlmisc, NULL
432 };
433
434
435 int
436 _init(void)
437 {
438 return (mod_install(&modlinkage));
439 }
440
441 int
442 _fini(void)
443 {
444 return (mod_remove(&modlinkage));
445 }
446
447 int
448 _info(struct modinfo *modinfop)
449 {
450 return (mod_info(&modlinkage, modinfop));
451 }
452
453
454 /*
455 * Check message, semaphore, or shared memory access permissions.
456 *
457 * This routine verifies the requested access permission for the current
458 * process. The zone ids are compared, and the appropriate bits are
459 * checked corresponding to owner, group (including the list of
460 * supplementary groups), or everyone. Zero is returned on success.
461 * On failure, the security policy is asked to check to override the
462 * permissions check; the policy will either return 0 for access granted
463 * or EACCES.
464 *
465 * Access to objects in other zones requires that the caller be in the
466 * global zone and have the appropriate IPC_DAC_* privilege, regardless
467 * of whether the uid or gid match those of the object. Note that
468 * cross-zone accesses will normally never get here since they'll
469 * fail in ipc_lookup or ipc_get.
470 *
471 * The arguments must be set up as follows:
472 * p - Pointer to permission structure to verify
473 * mode - Desired access permissions
474 */
475 int
476 ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
477 {
478 int shifts = 0;
479 uid_t uid = crgetuid(cr);
480 zoneid_t zoneid = getzoneid();
481
482 if (p->ipc_zoneid == zoneid) {
483 if (uid != p->ipc_uid && uid != p->ipc_cuid) {
484 shifts += 3;
485 if (!groupmember(p->ipc_gid, cr) &&
486 !groupmember(p->ipc_cgid, cr))
487 shifts += 3;
488 }
489
490 mode &= ~(p->ipc_mode << shifts);
491
492 if (mode == 0)
493 return (0);
494 } else if (zoneid != GLOBAL_ZONEID)
495 return (EACCES);
496
497 return (secpolicy_ipc_access(cr, p, mode));
498 }
499
500 /*
501 * There are two versions of the ipcperm_set/stat functions:
502 * ipcperm_??? - for use with IPC_SET/STAT
503 * ipcperm_???_64 - for use with IPC_SET64/STAT64
504 *
505 * These functions encapsulate the common portions (copying, permission
506 * checks, and auditing) of the set/stat operations. All, except for
507 * stat and stat_64 which are void, return 0 on success or a non-zero
508 * errno value on error.
509 */
510
511 int
512 ipcperm_set(ipc_service_t *service, struct cred *cr,
513 kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
514 {
515 STRUCT_HANDLE(ipc_perm, lperm);
516 uid_t uid;
517 gid_t gid;
518 mode_t mode;
519 zone_t *zone;
520
521 ASSERT(IPC_LOCKED(service, kperm));
522
523 STRUCT_SET_HANDLE(lperm, model, perm);
524 uid = STRUCT_FGET(lperm, uid);
525 gid = STRUCT_FGET(lperm, gid);
526 mode = STRUCT_FGET(lperm, mode);
527
528 if (secpolicy_ipc_owner(cr, kperm) != 0)
529 return (EPERM);
530
531 zone = crgetzone(cr);
532 if (!VALID_UID(uid, zone) || !VALID_GID(gid, zone))
533 return (EINVAL);
534
535 kperm->ipc_uid = uid;
536 kperm->ipc_gid = gid;
537 kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
538
539 if (AU_AUDITING())
540 audit_ipcget(service->ipcs_atype, kperm);
541
542 return (0);
543 }
544
545 void
546 ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
547 {
548 STRUCT_HANDLE(ipc_perm, lperm);
549
550 STRUCT_SET_HANDLE(lperm, model, perm);
551 STRUCT_FSET(lperm, uid, kperm->ipc_uid);
552 STRUCT_FSET(lperm, gid, kperm->ipc_gid);
553 STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
554 STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
555 STRUCT_FSET(lperm, mode, kperm->ipc_mode);
556 STRUCT_FSET(lperm, seq, 0);
557 STRUCT_FSET(lperm, key, kperm->ipc_key);
558 }
559
560 int
561 ipcperm_set64(ipc_service_t *service, struct cred *cr,
562 kipc_perm_t *kperm, ipc_perm64_t *perm64)
563 {
564 zone_t *zone;
565
566 ASSERT(IPC_LOCKED(service, kperm));
567
568 if (secpolicy_ipc_owner(cr, kperm) != 0)
569 return (EPERM);
570
571 zone = crgetzone(cr);
572 if (!VALID_UID(perm64->ipcx_uid, zone) ||
573 !VALID_GID(perm64->ipcx_gid, zone))
574 return (EINVAL);
575
576 kperm->ipc_uid = perm64->ipcx_uid;
577 kperm->ipc_gid = perm64->ipcx_gid;
578 kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
579 (kperm->ipc_mode & ~0777);
580
581 if (AU_AUDITING())
582 audit_ipcget(service->ipcs_atype, kperm);
583
584 return (0);
585 }
586
587 void
588 ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
589 {
590 perm64->ipcx_uid = kperm->ipc_uid;
591 perm64->ipcx_gid = kperm->ipc_gid;
592 perm64->ipcx_cuid = kperm->ipc_cuid;
593 perm64->ipcx_cgid = kperm->ipc_cgid;
594 perm64->ipcx_mode = kperm->ipc_mode;
595 perm64->ipcx_key = kperm->ipc_key;
596 perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
597 perm64->ipcx_zoneid = kperm->ipc_zoneid;
598 }
599
600
601 /*
602 * ipc key comparator.
603 */
604 static int
605 ipc_key_compar(const void *a, const void *b)
606 {
607 kipc_perm_t *aperm = (kipc_perm_t *)a;
608 kipc_perm_t *bperm = (kipc_perm_t *)b;
609 int ak = aperm->ipc_key;
610 int bk = bperm->ipc_key;
611 zoneid_t az;
612 zoneid_t bz;
613
614 ASSERT(ak != IPC_PRIVATE);
615 ASSERT(bk != IPC_PRIVATE);
616
617 /*
618 * Compare key first, then zoneid. This optimizes performance for
619 * systems with only one zone, since the zone checks will only be
620 * made when the keys match.
621 */
622 if (ak < bk)
623 return (-1);
624 if (ak > bk)
625 return (1);
626
627 /* keys match */
628 az = aperm->ipc_zoneid;
629 bz = bperm->ipc_zoneid;
630 if (az < bz)
631 return (-1);
632 if (az > bz)
633 return (1);
634 return (0);
635 }
636
637 /*
638 * Create an ipc service.
639 */
640 ipc_service_t *
641 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
642 size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
643 size_t rctl_offset)
644 {
645 ipc_service_t *result;
646
647 result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
648
649 mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
650 result->ipcs_count = 0;
651 avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
652 result->ipcs_tabsz = IPC_IDS_MIN;
653 result->ipcs_table =
654 kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
655 result->ipcs_ssize = size;
656 result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
657 result->ipcs_dtor = dtor;
658 result->ipcs_rmid = rmid;
659 result->ipcs_proj_rctl = proj_rctl;
660 result->ipcs_zone_rctl = zone_rctl;
661 result->ipcs_atype = audit_type;
662 ASSERT(rctl_offset < sizeof (ipc_rqty_t));
663 result->ipcs_rctlofs = rctl_offset;
664 list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
665 offsetof(kipc_perm_t, ipc_list));
666
667 return (result);
668 }
669
670 /*
671 * Destroy an ipc service.
672 */
673 void
674 ipcs_destroy(ipc_service_t *service)
675 {
676 ipc_slot_t *slot, *next;
677
678 mutex_enter(&service->ipcs_lock);
679
680 ASSERT(service->ipcs_count == 0);
681 avl_destroy(&service->ipcs_keys);
682 list_destroy(&service->ipcs_usedids);
683 id_space_destroy(service->ipcs_ids);
684
685 for (slot = service->ipcs_table; slot; slot = next) {
686 next = slot[0].ipct_chain;
687 kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
688 service->ipcs_tabsz >>= 1;
689 }
690
691 mutex_destroy(&service->ipcs_lock);
692 kmem_free(service, sizeof (ipc_service_t));
693 }
694
695 /*
696 * Takes the service lock.
697 */
698 void
699 ipcs_lock(ipc_service_t *service)
700 {
701 mutex_enter(&service->ipcs_lock);
702 }
703
704 /*
705 * Releases the service lock.
706 */
707 void
708 ipcs_unlock(ipc_service_t *service)
709 {
710 mutex_exit(&service->ipcs_lock);
711 }
712
713
714 /*
715 * Locks the specified ID. Returns the ID's ID table index.
716 */
717 static int
718 ipc_lock_internal(ipc_service_t *service, uint_t id)
719 {
720 uint_t tabsz;
721 uint_t index;
722 kmutex_t *mutex;
723
724 for (;;) {
725 tabsz = service->ipcs_tabsz;
726 membar_consumer();
727 index = id & (tabsz - 1);
728 mutex = &service->ipcs_table[index].ipct_lock;
729 mutex_enter(mutex);
730 if (tabsz == service->ipcs_tabsz)
731 break;
732 mutex_exit(mutex);
733 }
734
735 return (index);
736 }
737
738 /*
739 * Locks the specified ID. Returns a pointer to the ID's lock.
740 */
741 kmutex_t *
742 ipc_lock(ipc_service_t *service, int id)
743 {
744 uint_t index;
745
746 /*
747 * These assertions don't reflect requirements of the code
748 * which follows, but they should never fail nonetheless.
749 */
750 ASSERT(id >= 0);
751 ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
752 index = ipc_lock_internal(service, id);
753
754 return (&service->ipcs_table[index].ipct_lock);
755 }
756
757 /*
758 * Checks to see if the held lock provided is the current lock for the
759 * specified id. If so, we return it instead of dropping it and
760 * returning the result of ipc_lock. This is intended to speed up cv
761 * wakeups where we are left holding a lock which could be stale, but
762 * probably isn't.
763 */
764 kmutex_t *
765 ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
766 {
767 ASSERT(id >= 0);
768 ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
769 ASSERT(MUTEX_HELD(lock));
770
771 if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
772 return (lock);
773
774 mutex_exit(lock);
775 return (ipc_lock(service, id));
776 }
777
778 /*
779 * Performs an ID lookup. If the ID doesn't exist or has been removed,
780 * or isn't visible to the caller (because of zones), NULL is returned.
781 * Otherwise, a pointer to the ID's perm structure and held ID lock are
782 * returned.
783 */
784 kmutex_t *
785 ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
786 {
787 kipc_perm_t *result;
788 uint_t index;
789
790 /*
791 * There is no need to check to see if id is in-range (i.e.
792 * positive and fits into the table). If it is out-of-range,
793 * the id simply won't match the object's.
794 */
795
796 index = ipc_lock_internal(service, id);
797 result = service->ipcs_table[index].ipct_data;
798 if (result == NULL || result->ipc_id != (uint_t)id ||
799 !HASZONEACCESS(curproc, result->ipc_zoneid)) {
800 mutex_exit(&service->ipcs_table[index].ipct_lock);
801 return (NULL);
802 }
803
804 ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
805
806 *perm = result;
807 if (AU_AUDITING())
808 audit_ipc(service->ipcs_atype, id, result);
809
810 return (&service->ipcs_table[index].ipct_lock);
811 }
812
813 /*
814 * Increase the reference count on an ID.
815 */
816 /*ARGSUSED*/
817 void
818 ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
819 {
820 ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
821 ASSERT(IPC_LOCKED(s, perm));
822 perm->ipc_ref++;
823 }
824
825 /*
826 * Decrease the reference count on an ID and drops the ID's lock.
827 * Destroys the ID if the new reference count is zero.
828 */
829 void
830 ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
831 {
832 int nref;
833
834 ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
835 ASSERT(IPC_LOCKED(s, perm));
836 ASSERT(perm->ipc_ref > 0);
837
838 nref = --perm->ipc_ref;
839 mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
840
841 if (nref == 0) {
842 ASSERT(IPC_FREE(perm)); /* ipc_rmid clears IPC_ALLOC */
843 s->ipcs_dtor(perm);
844 project_rele(perm->ipc_proj);
845 zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
846 kmem_free(perm, s->ipcs_ssize);
847 }
848 }
849
850 /*
851 * Decrease the reference count on an ID, but don't drop the ID lock.
852 * Used in cases where one thread needs to remove many references (on
853 * behalf of other parties).
854 */
855 void
856 ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
857 {
858 ASSERT(perm->ipc_ref > 1);
859 ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
860 ASSERT(IPC_LOCKED(s, perm));
861
862 perm->ipc_ref--;
863 }
864
865
866 /*
867 * Internal function to grow the service ID table.
868 */
869 static int
870 ipc_grow(ipc_service_t *service)
871 {
872 ipc_slot_t *new, *old;
873 int i, oldsize, newsize;
874
875 ASSERT(MUTEX_HELD(&service->ipcs_lock));
876 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
877
878 if (service->ipcs_tabsz == IPC_IDS_MAX)
879 return (ENOSPC);
880
881 oldsize = service->ipcs_tabsz;
882 newsize = oldsize << 1;
883 new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
884 if (new == NULL)
885 return (ENOSPC);
886
887 old = service->ipcs_table;
888 for (i = 0; i < oldsize; i++) {
889 mutex_enter(&old[i].ipct_lock);
890 mutex_enter(&new[i].ipct_lock);
891
892 new[i].ipct_seq = old[i].ipct_seq;
893 new[i].ipct_data = old[i].ipct_data;
894 old[i].ipct_data = NULL;
895 }
896
897 new[0].ipct_chain = old;
898 service->ipcs_table = new;
899 membar_producer();
900 service->ipcs_tabsz = newsize;
901
902 for (i = 0; i < oldsize; i++) {
903 mutex_exit(&old[i].ipct_lock);
904 mutex_exit(&new[i].ipct_lock);
905 }
906
907 id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
908
909 return (0);
910 }
911
912
913 static int
914 ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
915 {
916 kipc_perm_t *perm = NULL;
917 avl_index_t where;
918 kipc_perm_t template;
919
920 ASSERT(MUTEX_HELD(&service->ipcs_lock));
921
922 template.ipc_key = key;
923 template.ipc_zoneid = getzoneid();
924 if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
925 ASSERT(!IPC_FREE(perm));
926 if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
927 return (EEXIST);
928 if ((flag & 0777) & ~perm->ipc_mode) {
929 if (AU_AUDITING())
930 audit_ipcget(NULL, (void *)perm);
931 return (EACCES);
932 }
933 *permp = perm;
934 return (0);
935 } else if (flag & IPC_CREAT) {
936 *permp = NULL;
937 return (0);
938 }
939 return (ENOENT);
940 }
941
942 static int
943 ipc_alloc_test(ipc_service_t *service, proc_t *pp)
944 {
945 ASSERT(MUTEX_HELD(&service->ipcs_lock));
946
947 /*
948 * Resizing the table first would result in a cleaner code
949 * path, but would also allow a user to (permanently) double
950 * the id table size in cases where the allocation would be
951 * denied. Hence we test the rctl first.
952 */
953 retry:
954 mutex_enter(&pp->p_lock);
955 if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
956 pp, 1, RCA_SAFE) & RCT_DENY) ||
957 (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
958 pp, 1, RCA_SAFE) & RCT_DENY)) {
959 mutex_exit(&pp->p_lock);
960 return (ENOSPC);
961 }
962
963 if (service->ipcs_count == service->ipcs_tabsz) {
964 int error;
965
966 mutex_exit(&pp->p_lock);
967 if (error = ipc_grow(service))
968 return (error);
969 goto retry;
970 }
971
972 return (0);
973 }
974
975 /*
976 * Given a key, search for or create the associated identifier.
977 *
978 * If IPC_CREAT is specified and the key isn't found, or if the key is
979 * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
980 * allocated object structure in permp. A pointer to the held service
981 * lock is placed in lockp. ipc_mode's IPC_ALLOC bit is clear.
982 *
983 * If the key is found and no error conditions arise, we return 0 and
984 * place a pointer to the existing object structure in permp. A
985 * pointer to the held ID lock is placed in lockp. ipc_mode's
986 * IPC_ALLOC bit is set.
987 *
988 * Otherwise, a non-zero errno value is returned.
989 */
990 int
991 ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
992 kmutex_t **lockp)
993 {
994 kipc_perm_t *perm = NULL;
995 proc_t *pp = curproc;
996 int error, index;
997 cred_t *cr = CRED();
998
999 if (key != IPC_PRIVATE) {
1000
1001 mutex_enter(&service->ipcs_lock);
1002 error = ipc_keylookup(service, key, flag, &perm);
1003 if (perm != NULL)
1004 index = ipc_lock_internal(service, perm->ipc_id);
1005 mutex_exit(&service->ipcs_lock);
1006
1007 if (error) {
1008 ASSERT(perm == NULL);
1009 return (error);
1010 }
1011
1012 if (perm) {
1013 ASSERT(!IPC_FREE(perm));
1014 *permp = perm;
1015 *lockp = &service->ipcs_table[index].ipct_lock;
1016 return (0);
1017 }
1018
1019 /* Key not found; fall through */
1020 }
1021
1022 perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
1023
1024 mutex_enter(&service->ipcs_lock);
1025 if (error = ipc_alloc_test(service, pp)) {
1026 mutex_exit(&service->ipcs_lock);
1027 kmem_free(perm, service->ipcs_ssize);
1028 return (error);
1029 }
1030
1031 perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
1032 perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
1033 perm->ipc_zoneid = getzoneid();
1034 perm->ipc_mode = flag & 0777;
1035 perm->ipc_key = key;
1036 perm->ipc_ref = 1;
1037 perm->ipc_id = IPC_ID_INVAL;
1038 *permp = perm;
1039 *lockp = &service->ipcs_lock;
1040
1041 return (0);
1042 }
1043
1044 /*
1045 * Attempts to add the a newly created ID to the global namespace. If
1046 * creating it would cause an error, we return the error. If there is
1047 * the possibility that we could obtain the existing ID and return it
1048 * to the user, we return EAGAIN. Otherwise, we return 0 with p_lock
1049 * and the service lock held.
1050 *
1051 * Since this should be only called after all initialization has been
1052 * completed, on failure we automatically invoke the destructor for the
1053 * object and deallocate the memory associated with it.
1054 */
1055 int
1056 ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
1057 kipc_perm_t *newperm)
1058 {
1059 kipc_perm_t *perm;
1060 int error;
1061 proc_t *pp = curproc;
1062
1063 ASSERT(newperm->ipc_ref == 1);
1064 ASSERT(IPC_FREE(newperm));
1065
1066 /*
1067 * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup()
1068 * clean up the necessary state. This must be done before the
1069 * potential call to ipcs_dtor() below.
1070 */
1071 newperm->ipc_proj = pp->p_task->tk_proj;
1072 zone_init_ref(&newperm->ipc_zone_ref);
1073 zone_hold_ref(pp->p_zone, &newperm->ipc_zone_ref, ZONE_REF_IPC);
1074
1075 mutex_enter(&service->ipcs_lock);
1076 /*
1077 * Ensure that no-one has raced with us and created the key.
1078 */
1079 if ((key != IPC_PRIVATE) &&
1080 (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
1081 (perm != NULL))) {
1082 error = error ? error : EAGAIN;
1083 goto errout;
1084 }
1085
1086 /*
1087 * Ensure that no-one has raced with us and used the last of
1088 * the permissible ids, or the last of the free spaces in the
1089 * id table.
1090 */
1091 if (error = ipc_alloc_test(service, pp))
1092 goto errout;
1093
1094 ASSERT(MUTEX_HELD(&service->ipcs_lock));
1095 ASSERT(MUTEX_HELD(&pp->p_lock));
1096
1097 return (0);
1098 errout:
1099 mutex_exit(&service->ipcs_lock);
1100 service->ipcs_dtor(newperm);
1101 zone_rele_ref(&newperm->ipc_zone_ref, ZONE_REF_IPC);
1102 kmem_free(newperm, service->ipcs_ssize);
1103 return (error);
1104 }
1105
1106 /*
1107 * Commit the ID allocation transaction. Called with p_lock and the
1108 * service lock held, both of which are dropped. Returns the held ID
1109 * lock so the caller can extract the ID and perform ipcget auditing.
1110 */
1111 kmutex_t *
1112 ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
1113 {
1114 ipc_slot_t *slot;
1115 avl_index_t where;
1116 int index;
1117 void *loc;
1118
1119 ASSERT(MUTEX_HELD(&service->ipcs_lock));
1120 ASSERT(MUTEX_HELD(&curproc->p_lock));
1121
1122 (void) project_hold(perm->ipc_proj);
1123 mutex_exit(&curproc->p_lock);
1124
1125 /*
1126 * Pick out our slot.
1127 */
1128 service->ipcs_count++;
1129 index = id_alloc(service->ipcs_ids);
1130 ASSERT(index < service->ipcs_tabsz);
1131 slot = &service->ipcs_table[index];
1132 mutex_enter(&slot->ipct_lock);
1133 ASSERT(slot->ipct_data == NULL);
1134
1135 /*
1136 * Update the perm structure.
1137 */
1138 perm->ipc_mode |= IPC_ALLOC;
1139 perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
1140
1141 /*
1142 * Push into global visibility.
1143 */
1144 slot->ipct_data = perm;
1145 if (perm->ipc_key != IPC_PRIVATE) {
1146 loc = avl_find(&service->ipcs_keys, perm, &where);
1147 ASSERT(loc == NULL);
1148 avl_insert(&service->ipcs_keys, perm, where);
1149 }
1150 list_insert_head(&service->ipcs_usedids, perm);
1151
1152 /*
1153 * Update resource consumption.
1154 */
1155 IPC_PROJ_USAGE(perm, service) += 1;
1156 IPC_ZONE_USAGE(perm, service) += 1;
1157
1158 mutex_exit(&service->ipcs_lock);
1159 return (&slot->ipct_lock);
1160 }
1161
1162 /*
1163 * Clean up function, in case the allocation fails. If called between
1164 * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1165 * merely free the perm structure. If called after ipc_commit_begin,
1166 * we also drop locks and call the ID's destructor.
1167 */
1168 void
1169 ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
1170 {
1171 ASSERT(IPC_FREE(perm));
1172 if (perm->ipc_proj) {
1173 mutex_exit(&curproc->p_lock);
1174 mutex_exit(&service->ipcs_lock);
1175 service->ipcs_dtor(perm);
1176 }
1177 if (perm->ipc_zone_ref.zref_zone != NULL)
1178 zone_rele_ref(&perm->ipc_zone_ref, ZONE_REF_IPC);
1179 kmem_free(perm, service->ipcs_ssize);
1180 }
1181
1182
1183 /*
1184 * Common code to remove an IPC object. This should be called after
1185 * all permissions checks have been performed, and with the service
1186 * and ID locked. Note that this does not remove the object from
1187 * the ipcs_usedids list (this needs to be done by the caller before
1188 * dropping the service lock).
1189 */
1190 static void
1191 ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
1192 {
1193 int id = perm->ipc_id;
1194 int index;
1195
1196 ASSERT(MUTEX_HELD(&service->ipcs_lock));
1197 ASSERT(IPC_LOCKED(service, perm));
1198
1199 index = IPC_INDEX(id);
1200
1201 service->ipcs_table[index].ipct_data = NULL;
1202
1203 if (perm->ipc_key != IPC_PRIVATE)
1204 avl_remove(&service->ipcs_keys, perm);
1205 list_remove(&service->ipcs_usedids, perm);
1206 perm->ipc_mode &= ~IPC_ALLOC;
1207
1208 id_free(service->ipcs_ids, index);
1209
1210 if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
1211 service->ipcs_table[index].ipct_seq = 0;
1212 service->ipcs_count--;
1213 ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
1214 ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
1215 IPC_PROJ_USAGE(perm, service) -= 1;
1216 IPC_ZONE_USAGE(perm, service) -= 1;
1217 ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
1218 (IPC_ZONE_USAGE(perm, service) == 0)));
1219 }
1220
1221 /*
1222 * Perform actual IPC_RMID, either via ipc_rmid or due to a delayed *_RMID.
1223 */
1224 void
1225 ipc_rmsvc(ipc_service_t *service, kipc_perm_t *perm)
1226 {
1227 ASSERT(service->ipcs_count > 0);
1228 ASSERT(MUTEX_HELD(&service->ipcs_lock));
1229
1230 ipc_remove(service, perm);
1231 mutex_exit(&service->ipcs_lock);
1232
1233 /* perform any per-service removal actions */
1234 service->ipcs_rmid(perm);
1235
1236 ipc_rele(service, perm);
1237 }
1238
1239 /*
1240 * Common code to perform an IPC_RMID. Returns an errno value on
1241 * failure, 0 on success.
1242 */
1243 int
1244 ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
1245 {
1246 kipc_perm_t *perm;
1247 kmutex_t *lock;
1248
1249 mutex_enter(&service->ipcs_lock);
1250
1251 lock = ipc_lookup(service, id, &perm);
1252 if (lock == NULL) {
1253 mutex_exit(&service->ipcs_lock);
1254 return (EINVAL);
1255 }
1256
1257 ASSERT(service->ipcs_count > 0);
1258
1259 if (secpolicy_ipc_owner(cr, perm) != 0) {
1260 mutex_exit(lock);
1261 mutex_exit(&service->ipcs_lock);
1262 return (EPERM);
1263 }
1264
1265 /*
1266 * Nothing can fail from this point on.
1267 */
1268 ipc_rmsvc(service, perm);
1269
1270 return (0);
1271 }
1272
1273 /*
1274 * Implementation for shmids, semids, and msgids. buf is the address
1275 * of the user buffer, nids is the size, and pnids is a pointer to
1276 * where we write the actual number of ids that [would] have been
1277 * copied out.
1278 */
1279 int
1280 ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
1281 {
1282 kipc_perm_t *perm;
1283 size_t idsize = 0;
1284 int error = 0;
1285 int idcount;
1286 int *ids;
1287 int numids = 0;
1288 zoneid_t zoneid = getzoneid();
1289 int global = INGLOBALZONE(curproc);
1290
1291 if (buf == NULL)
1292 nids = 0;
1293
1294 /*
1295 * Get an accurate count of the total number of ids, and allocate a
1296 * staging buffer. Since ipcs_count is always sane, we don't have
1297 * to take ipcs_lock for our first guess. If there are no ids, or
1298 * we're in the global zone and the number of ids is greater than
1299 * the size of the specified buffer, we shunt to the end. Otherwise,
1300 * we go through the id list looking for (and counting) what is
1301 * visible in the specified zone.
1302 */
1303 idcount = service->ipcs_count;
1304 for (;;) {
1305 if ((global && idcount > nids) || idcount == 0) {
1306 numids = idcount;
1307 nids = 0;
1308 goto out;
1309 }
1310
1311 idsize = idcount * sizeof (int);
1312 ids = kmem_alloc(idsize, KM_SLEEP);
1313
1314 mutex_enter(&service->ipcs_lock);
1315 if (idcount >= service->ipcs_count)
1316 break;
1317 idcount = service->ipcs_count;
1318 mutex_exit(&service->ipcs_lock);
1319
1320 if (idsize != 0) {
1321 kmem_free(ids, idsize);
1322 idsize = 0;
1323 }
1324 }
1325
1326 for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1327 perm = list_next(&service->ipcs_usedids, perm)) {
1328 ASSERT(!IPC_FREE(perm));
1329 if (global || perm->ipc_zoneid == zoneid)
1330 ids[numids++] = perm->ipc_id;
1331 }
1332 mutex_exit(&service->ipcs_lock);
1333
1334 /*
1335 * If there isn't enough space to hold all of the ids, just
1336 * return the number of ids without copying out any of them.
1337 */
1338 if (nids < numids)
1339 nids = 0;
1340
1341 out:
1342 if (suword32(pnids, (uint32_t)numids) ||
1343 (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
1344 error = EFAULT;
1345 if (idsize != 0)
1346 kmem_free(ids, idsize);
1347 return (error);
1348 }
1349
1350 /*
1351 * Destroy IPC objects from the given service that are associated with
1352 * the given zone.
1353 *
1354 * We can't hold on to the service lock when freeing objects, so we
1355 * first search the service and move all the objects to a private
1356 * list, then walk through and free them after dropping the lock.
1357 */
1358 void
1359 ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
1360 {
1361 kipc_perm_t *perm, *next;
1362 list_t rmlist;
1363 kmutex_t *lock;
1364
1365 list_create(&rmlist, sizeof (kipc_perm_t),
1366 offsetof(kipc_perm_t, ipc_list));
1367
1368 mutex_enter(&service->ipcs_lock);
1369 for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1370 perm = next) {
1371 next = list_next(&service->ipcs_usedids, perm);
1372 if (perm->ipc_zoneid != zoneid)
1373 continue;
1374
1375 /*
1376 * Remove the object from the service, then put it on
1377 * the removal list so we can defer the call to
1378 * ipc_rele (which will actually free the structure).
1379 * We need to do this since the destructor may grab
1380 * the service lock.
1381 */
1382 ASSERT(!IPC_FREE(perm));
1383 lock = ipc_lock(service, perm->ipc_id);
1384 ipc_remove(service, perm);
1385 mutex_exit(lock);
1386 list_insert_tail(&rmlist, perm);
1387 }
1388 mutex_exit(&service->ipcs_lock);
1389
1390 /*
1391 * Now that we've dropped the service lock, loop through the
1392 * private list freeing removed objects.
1393 */
1394 for (perm = list_head(&rmlist); perm != NULL; perm = next) {
1395 next = list_next(&rmlist, perm);
1396 list_remove(&rmlist, perm);
1397
1398 (void) ipc_lock(service, perm->ipc_id);
1399
1400 /* perform any per-service removal actions */
1401 service->ipcs_rmid(perm);
1402
1403 /* release reference */
1404 ipc_rele(service, perm);
1405 }
1406
1407 list_destroy(&rmlist);
1408 }