1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2016 Joyent, Inc.
25 */
26
27 /*
28 * Contracts
29 * ---------
30 *
31 * Contracts are a primitive which enrich the relationships between
32 * processes and system resources. The primary purpose of contracts is
33 * to provide a means for the system to negotiate the departure from a
34 * binding relationship (e.g. pages locked in memory or a thread bound
35 * to processor), but they can also be used as a purely asynchronous
36 * error reporting mechanism as they are with process contracts.
37 *
38 * More information on how one interfaces with contracts and what
39 * contracts can do for you can be found in:
40 * PSARC 2003/193 Solaris Contracts
41 * PSARC 2004/460 Contracts addendum
42 *
43 * This file contains the core contracts framework. By itself it is
44 * useless: it depends the contracts filesystem (ctfs) to provide an
45 * interface to user processes and individual contract types to
46 * implement the process/resource relationships.
47 *
48 * Data structure overview
49 * -----------------------
50 *
51 * A contract is represented by a contract_t, which itself points to an
52 * encapsulating contract-type specific contract object. A contract_t
53 * contains the contract's static identity (including its terms), its
54 * linkage to various bookkeeping structures, the contract-specific
55 * event queue, and a reference count.
56 *
57 * A contract template is represented by a ct_template_t, which, like a
58 * contract, points to an encapsulating contract-type specific template
59 * object. A ct_template_t contains the template's terms.
60 *
61 * An event queue is represented by a ct_equeue_t, and consists of a
62 * list of events, a list of listeners, and a list of listeners who are
63 * waiting for new events (affectionately referred to as "tail
64 * listeners"). There are three queue types, defined by ct_listnum_t
65 * (an enum). An event may be on one of each type of queue
66 * simultaneously; the list linkage used by a queue is determined by
67 * its type.
68 *
69 * An event is represented by a ct_kevent_t, which contains mostly
70 * static event data (e.g. id, payload). It also has an array of
71 * ct_member_t structures, each of which contains a list_node_t and
72 * represent the event's linkage in a specific event queue.
73 *
74 * Each open of an event endpoint results in the creation of a new
75 * listener, represented by a ct_listener_t. In addition to linkage
76 * into the aforementioned lists in the event_queue, a ct_listener_t
77 * contains a pointer to the ct_kevent_t it is currently positioned at
78 * as well as a set of status flags and other administrative data.
79 *
80 * Each process has a list of contracts it owns, p_ct_held; a pointer
81 * to the process contract it is a member of, p_ct_process; the linkage
82 * for that membership, p_ct_member; and an array of event queue
83 * structures representing the process bundle queues.
84 *
85 * Each LWP has an array of its active templates, lwp_ct_active; and
86 * the most recently created contracts, lwp_ct_latest.
87 *
88 * A process contract has a list of member processes and a list of
89 * inherited contracts.
90 *
91 * There is a system-wide list of all contracts, as well as per-type
92 * lists of contracts.
93 *
94 * Lock ordering overview
95 * ----------------------
96 *
97 * Locks at the top are taken first:
98 *
99 * ct_evtlock
100 * regent ct_lock
101 * member ct_lock
102 * pidlock
103 * p_lock
104 * contract ctq_lock contract_lock
105 * pbundle ctq_lock
106 * cte_lock
107 * ct_reflock
108 *
109 * contract_lock and ctq_lock/cte_lock are not currently taken at the
110 * same time.
111 *
112 * Reference counting and locking
113 * ------------------------------
114 *
115 * A contract has a reference count, protected by ct_reflock.
116 * (ct_reflock is also used in a couple other places where atomic
117 * access to a variable is needed in an innermost context). A process
118 * maintains a hold on each contract it owns. A process contract has a
119 * hold on each contract is has inherited. Each event has a hold on
120 * the contract which generated it. Process contract templates have
121 * holds on the contracts referred to by their transfer terms. CTFS
122 * contract directory nodes have holds on contracts. Lastly, various
123 * code paths may temporarily take holds on contracts to prevent them
124 * from disappearing while other processing is going on. It is
125 * important to note that the global contract lists do not hold
126 * references on contracts; a contract is removed from these structures
127 * atomically with the release of its last reference.
128 *
129 * At a given point in time, a contract can either be owned by a
130 * process, inherited by a regent process contract, or orphaned. A
131 * contract_t's owner and regent pointers, ct_owner and ct_regent, are
132 * protected by its ct_lock. The linkage in the holder's (holder =
133 * owner or regent) list of contracts, ct_ctlist, is protected by
134 * whatever lock protects the holder's data structure. In order for
135 * these two directions to remain consistent, changing the holder of a
136 * contract requires that both locks be held.
137 *
138 * Events also have reference counts. There is one hold on an event
139 * per queue it is present on, in addition to those needed for the
140 * usual sundry reasons. Individual listeners are associated with
141 * specific queues, and increase a queue-specific reference count
142 * stored in the ct_member_t structure.
143 *
144 * The dynamic contents of an event (reference count and flags) are
145 * protected by its cte_lock, while the contents of the embedded
146 * ct_member_t structures are protected by the locks of the queues they
147 * are linked into. A ct_listener_t's contents are also protected by
148 * its event queue's ctq_lock.
149 *
150 * Resource controls
151 * -----------------
152 *
153 * Control: project.max-contracts (rc_project_contract)
154 * Description: Maximum number of contracts allowed a project.
155 *
156 * When a contract is created, the project's allocation is tested and
157 * (assuming success) increased. When the last reference to a
158 * contract is released, the creating project's allocation is
159 * decreased.
160 */
161
162 #include <sys/mutex.h>
163 #include <sys/debug.h>
164 #include <sys/types.h>
165 #include <sys/param.h>
166 #include <sys/kmem.h>
167 #include <sys/thread.h>
168 #include <sys/id_space.h>
169 #include <sys/avl.h>
170 #include <sys/list.h>
171 #include <sys/sysmacros.h>
172 #include <sys/proc.h>
173 #include <sys/ctfs.h>
174 #include <sys/contract_impl.h>
175 #include <sys/contract/process_impl.h>
176 #include <sys/dditypes.h>
177 #include <sys/contract/device_impl.h>
178 #include <sys/systm.h>
179 #include <sys/atomic.h>
180 #include <sys/cmn_err.h>
181 #include <sys/model.h>
182 #include <sys/policy.h>
183 #include <sys/zone.h>
184 #include <sys/task.h>
185 #include <sys/ddi.h>
186 #include <sys/sunddi.h>
187
188 extern rctl_hndl_t rc_project_contract;
189
190 static id_space_t *contract_ids;
191 static avl_tree_t contract_avl;
192 static kmutex_t contract_lock;
193
194 int ct_ntypes = CTT_MAXTYPE;
195 static ct_type_t *ct_types_static[CTT_MAXTYPE];
196 ct_type_t **ct_types = ct_types_static;
197 int ct_debug;
198
199 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
200 static void cte_queue_destroy(ct_equeue_t *);
201 static void cte_queue_drain(ct_equeue_t *, int);
202 static void cte_trim(ct_equeue_t *, contract_t *);
203 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
204
205 /*
206 * contract_compar
207 *
208 * A contract comparator which sorts on contract ID.
209 */
210 int
211 contract_compar(const void *x, const void *y)
212 {
213 const contract_t *ct1 = x;
214 const contract_t *ct2 = y;
215
216 if (ct1->ct_id < ct2->ct_id)
217 return (-1);
218 if (ct1->ct_id > ct2->ct_id)
219 return (1);
220 return (0);
221 }
222
223 /*
224 * contract_init
225 *
226 * Initializes the contract subsystem, the specific contract types, and
227 * process 0.
228 */
229 void
230 contract_init(void)
231 {
232 /*
233 * Initialize contract subsystem.
234 */
235 contract_ids = id_space_create("contracts", 1, INT_MAX);
236 avl_create(&contract_avl, contract_compar, sizeof (contract_t),
237 offsetof(contract_t, ct_ctavl));
238 mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
239
240 /*
241 * Initialize contract types.
242 */
243 contract_process_init();
244 contract_device_init();
245
246 /*
247 * Initialize p0/lwp0 contract state.
248 */
249 avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
250 offsetof(contract_t, ct_ctlist));
251 }
252
253 /*
254 * contract_dtor
255 *
256 * Performs basic destruction of the common portions of a contract.
257 * Called from the failure path of contract_ctor and from
258 * contract_rele.
259 */
260 static void
261 contract_dtor(contract_t *ct)
262 {
263 cte_queue_destroy(&ct->ct_events);
264 list_destroy(&ct->ct_vnodes);
265 mutex_destroy(&ct->ct_reflock);
266 mutex_destroy(&ct->ct_lock);
267 mutex_destroy(&ct->ct_evtlock);
268 }
269
270 /*
271 * contract_ctor
272 *
273 * Called by a contract type to initialize a contract. Fails if the
274 * max-contract resource control would have been exceeded. After a
275 * successful call to contract_ctor, the contract is unlocked and
276 * visible in all namespaces; any type-specific initialization should
277 * be completed before calling contract_ctor. Returns 0 on success.
278 *
279 * Because not all callers can tolerate failure, a 0 value for canfail
280 * instructs contract_ctor to ignore the project.max-contracts resource
281 * control. Obviously, this "out" should only be employed by callers
282 * who are sufficiently constrained in other ways (e.g. newproc).
283 */
284 int
285 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
286 ctflags_t flags, proc_t *author, int canfail)
287 {
288 avl_index_t where;
289 klwp_t *curlwp = ttolwp(curthread);
290
291 /*
292 * It's possible that author is not curproc if the zone is creating
293 * a new process as a child of zsched.
294 */
295
296 mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
297 mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
298 mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
299 ct->ct_id = id_alloc(contract_ids);
300
301 cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
302 list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
303 offsetof(contract_vnode_t, ctv_node));
304
305 /*
306 * Instance data
307 */
308 ct->ct_ref = 2; /* one for the holder, one for "latest" */
309 ct->ct_cuid = crgetuid(CRED());
310 ct->ct_type = type;
311 ct->ct_data = data;
312 gethrestime(&ct->ct_ctime);
313 ct->ct_state = CTS_OWNED;
314 ct->ct_flags = flags;
315 ct->ct_regent = author->p_ct_process ?
316 &author->p_ct_process->conp_contract : NULL;
317 ct->ct_ev_info = tmpl->ctmpl_ev_info;
318 ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
319 ct->ct_cookie = tmpl->ctmpl_cookie;
320 ct->ct_owner = author;
321 ct->ct_ntime.ctm_total = -1;
322 ct->ct_qtime.ctm_total = -1;
323 ct->ct_nevent = NULL;
324
325 /*
326 * Test project.max-contracts.
327 */
328 mutex_enter(&author->p_lock);
329 mutex_enter(&contract_lock);
330 if (canfail && rctl_test(rc_project_contract,
331 author->p_task->tk_proj->kpj_rctls, author, 1,
332 RCA_SAFE) & RCT_DENY) {
333 id_free(contract_ids, ct->ct_id);
334 mutex_exit(&contract_lock);
335 mutex_exit(&author->p_lock);
336 ct->ct_events.ctq_flags |= CTQ_DEAD;
337 contract_dtor(ct);
338 return (1);
339 }
340 ct->ct_proj = author->p_task->tk_proj;
341 ct->ct_proj->kpj_data.kpd_contract++;
342 (void) project_hold(ct->ct_proj);
343 mutex_exit(&contract_lock);
344
345 /*
346 * Insert into holder's avl of contracts.
347 * We use an avl not because order is important, but because
348 * readdir of /proc/contracts requires we be able to use a
349 * scalar as an index into the process's list of contracts
350 */
351 ct->ct_zoneid = author->p_zone->zone_id;
352 ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
353 VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
354 avl_insert(&author->p_ct_held, ct, where);
355 mutex_exit(&author->p_lock);
356
357 /*
358 * Insert into global contract AVL
359 */
360 mutex_enter(&contract_lock);
361 VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
362 avl_insert(&contract_avl, ct, where);
363 mutex_exit(&contract_lock);
364
365 /*
366 * Insert into type AVL
367 */
368 mutex_enter(&type->ct_type_lock);
369 VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
370 avl_insert(&type->ct_type_avl, ct, where);
371 type->ct_type_timestruc = ct->ct_ctime;
372 mutex_exit(&type->ct_type_lock);
373
374 if (curlwp->lwp_ct_latest[type->ct_type_index])
375 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
376 curlwp->lwp_ct_latest[type->ct_type_index] = ct;
377
378 return (0);
379 }
380
381 /*
382 * contract_rele
383 *
384 * Releases a reference to a contract. If the caller had the last
385 * reference, the contract is removed from all namespaces, its
386 * allocation against the max-contracts resource control is released,
387 * and the contract type's free entry point is invoked for any
388 * type-specific deconstruction and to (presumably) free the object.
389 */
390 void
391 contract_rele(contract_t *ct)
392 {
393 uint64_t nref;
394
395 mutex_enter(&ct->ct_reflock);
396 ASSERT(ct->ct_ref > 0);
397 nref = --ct->ct_ref;
398 mutex_exit(&ct->ct_reflock);
399 if (nref == 0) {
400 /*
401 * ct_owner is cleared when it drops its reference.
402 */
403 ASSERT(ct->ct_owner == NULL);
404 ASSERT(ct->ct_evcnt == 0);
405
406 /*
407 * Remove from global contract AVL
408 */
409 mutex_enter(&contract_lock);
410 avl_remove(&contract_avl, ct);
411 mutex_exit(&contract_lock);
412
413 /*
414 * Remove from type AVL
415 */
416 mutex_enter(&ct->ct_type->ct_type_lock);
417 avl_remove(&ct->ct_type->ct_type_avl, ct);
418 mutex_exit(&ct->ct_type->ct_type_lock);
419
420 /*
421 * Release the contract's ID
422 */
423 id_free(contract_ids, ct->ct_id);
424
425 /*
426 * Release project hold
427 */
428 mutex_enter(&contract_lock);
429 ct->ct_proj->kpj_data.kpd_contract--;
430 project_rele(ct->ct_proj);
431 mutex_exit(&contract_lock);
432
433 /*
434 * Free the contract
435 */
436 contract_dtor(ct);
437 ct->ct_type->ct_type_ops->contop_free(ct);
438 }
439 }
440
441 /*
442 * contract_hold
443 *
444 * Adds a reference to a contract
445 */
446 void
447 contract_hold(contract_t *ct)
448 {
449 mutex_enter(&ct->ct_reflock);
450 ASSERT(ct->ct_ref < UINT64_MAX);
451 ct->ct_ref++;
452 mutex_exit(&ct->ct_reflock);
453 }
454
455 /*
456 * contract_getzuniqid
457 *
458 * Get a contract's zone unique ID. Needed because 64-bit reads and
459 * writes aren't atomic on x86. Since there are contexts where we are
460 * unable to take ct_lock, we instead use ct_reflock; in actuality any
461 * lock would do.
462 */
463 uint64_t
464 contract_getzuniqid(contract_t *ct)
465 {
466 uint64_t zuniqid;
467
468 mutex_enter(&ct->ct_reflock);
469 zuniqid = ct->ct_mzuniqid;
470 mutex_exit(&ct->ct_reflock);
471
472 return (zuniqid);
473 }
474
475 /*
476 * contract_setzuniqid
477 *
478 * Sets a contract's zone unique ID. See contract_getzuniqid.
479 */
480 void
481 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
482 {
483 mutex_enter(&ct->ct_reflock);
484 ct->ct_mzuniqid = zuniqid;
485 mutex_exit(&ct->ct_reflock);
486 }
487
488 /*
489 * contract_abandon
490 *
491 * Abandons the specified contract. If "explicit" is clear, the
492 * contract was implicitly abandoned (by process exit) and should be
493 * inherited if its terms allow it and its owner was a member of a
494 * regent contract. Otherwise, the contract type's abandon entry point
495 * is invoked to either destroy or orphan the contract.
496 */
497 int
498 contract_abandon(contract_t *ct, proc_t *p, int explicit)
499 {
500 ct_equeue_t *q = NULL;
501 contract_t *parent = &p->p_ct_process->conp_contract;
502 int inherit = 0;
503
504 VERIFY(p == curproc);
505
506 mutex_enter(&ct->ct_lock);
507
508 /*
509 * Multiple contract locks are taken contract -> subcontract.
510 * Check if the contract will be inherited so we can acquire
511 * all the necessary locks before making sensitive changes.
512 */
513 if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
514 contract_process_accept(parent)) {
515 mutex_exit(&ct->ct_lock);
516 mutex_enter(&parent->ct_lock);
517 mutex_enter(&ct->ct_lock);
518 inherit = 1;
519 }
520
521 if (ct->ct_owner != p) {
522 mutex_exit(&ct->ct_lock);
523 if (inherit)
524 mutex_exit(&parent->ct_lock);
525 return (EINVAL);
526 }
527
528 mutex_enter(&p->p_lock);
529 if (explicit)
530 avl_remove(&p->p_ct_held, ct);
531 ct->ct_owner = NULL;
532 mutex_exit(&p->p_lock);
533
534 /*
535 * Since we can't call cte_trim with the contract lock held,
536 * we grab the queue pointer here.
537 */
538 if (p->p_ct_equeue)
539 q = p->p_ct_equeue[ct->ct_type->ct_type_index];
540
541 /*
542 * contop_abandon may destroy the contract so we rely on it to
543 * drop ct_lock. We retain a reference on the contract so that
544 * the cte_trim which follows functions properly. Even though
545 * cte_trim doesn't dereference the contract pointer, it is
546 * still necessary to retain a reference to the contract so
547 * that we don't trim events which are sent by a subsequently
548 * allocated contract infortuitously located at the same address.
549 */
550 contract_hold(ct);
551
552 if (inherit) {
553 ct->ct_state = CTS_INHERITED;
554 VERIFY(ct->ct_regent == parent);
555 contract_process_take(parent, ct);
556
557 /*
558 * We are handing off the process's reference to the
559 * parent contract. For this reason, the order in
560 * which we drop the contract locks is also important.
561 */
562 mutex_exit(&ct->ct_lock);
563 mutex_exit(&parent->ct_lock);
564 } else {
565 ct->ct_regent = NULL;
566 ct->ct_type->ct_type_ops->contop_abandon(ct);
567 }
568
569 /*
570 * ct_lock has been dropped; we can safely trim the event
571 * queue now.
572 */
573 if (q) {
574 mutex_enter(&q->ctq_lock);
575 cte_trim(q, ct);
576 mutex_exit(&q->ctq_lock);
577 }
578
579 contract_rele(ct);
580
581 return (0);
582 }
583
584 int
585 contract_newct(contract_t *ct)
586 {
587 return (ct->ct_type->ct_type_ops->contop_newct(ct));
588 }
589
590 /*
591 * contract_adopt
592 *
593 * Adopts a contract. After a successful call to this routine, the
594 * previously inherited contract will belong to the calling process,
595 * and its events will have been appended to its new owner's process
596 * bundle queue.
597 */
598 int
599 contract_adopt(contract_t *ct, proc_t *p)
600 {
601 avl_index_t where;
602 ct_equeue_t *q;
603 contract_t *parent;
604
605 ASSERT(p == curproc);
606
607 /*
608 * Ensure the process has an event queue. Checked by ASSERTs
609 * below.
610 */
611 (void) contract_type_pbundle(ct->ct_type, p);
612
613 mutex_enter(&ct->ct_lock);
614 parent = ct->ct_regent;
615 if (ct->ct_state != CTS_INHERITED ||
616 &p->p_ct_process->conp_contract != parent ||
617 p->p_zone->zone_uniqid != ct->ct_czuniqid) {
618 mutex_exit(&ct->ct_lock);
619 return (EINVAL);
620 }
621
622 /*
623 * Multiple contract locks are taken contract -> subcontract.
624 */
625 mutex_exit(&ct->ct_lock);
626 mutex_enter(&parent->ct_lock);
627 mutex_enter(&ct->ct_lock);
628
629 /*
630 * It is possible that the contract was adopted by someone else
631 * while its lock was dropped. It isn't possible for the
632 * contract to have been inherited by a different regent
633 * contract.
634 */
635 if (ct->ct_state != CTS_INHERITED) {
636 mutex_exit(&parent->ct_lock);
637 mutex_exit(&ct->ct_lock);
638 return (EBUSY);
639 }
640 ASSERT(ct->ct_regent == parent);
641
642 ct->ct_state = CTS_OWNED;
643
644 contract_process_adopt(ct, p);
645
646 mutex_enter(&p->p_lock);
647 ct->ct_owner = p;
648 VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
649 avl_insert(&p->p_ct_held, ct, where);
650 mutex_exit(&p->p_lock);
651
652 ASSERT(ct->ct_owner->p_ct_equeue);
653 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
654 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
655 cte_copy(&ct->ct_events, q);
656 mutex_exit(&ct->ct_lock);
657
658 return (0);
659 }
660
661 /*
662 * contract_ack
663 *
664 * Acknowledges receipt of a critical event.
665 */
666 int
667 contract_ack(contract_t *ct, uint64_t evid, int ack)
668 {
669 ct_kevent_t *ev;
670 list_t *queue = &ct->ct_events.ctq_events;
671 int error = ESRCH;
672 int nego = 0;
673 uint_t evtype;
674
675 ASSERT(ack == CT_ACK || ack == CT_NACK);
676
677 mutex_enter(&ct->ct_lock);
678 mutex_enter(&ct->ct_events.ctq_lock);
679 /*
680 * We are probably ACKing something near the head of the queue.
681 */
682 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
683 if (ev->cte_id == evid) {
684 if (ev->cte_flags & CTE_NEG)
685 nego = 1;
686 else if (ack == CT_NACK)
687 break;
688 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
689 ev->cte_flags |= CTE_ACK;
690 ct->ct_evcnt--;
691 evtype = ev->cte_type;
692 error = 0;
693 }
694 break;
695 }
696 }
697 mutex_exit(&ct->ct_events.ctq_lock);
698 mutex_exit(&ct->ct_lock);
699
700 /*
701 * Not all critical events are negotiation events, however
702 * every negotiation event is a critical event. NEGEND events
703 * are critical events but are not negotiation events
704 */
705 if (error || !nego)
706 return (error);
707
708 if (ack == CT_ACK)
709 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
710 else
711 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
712
713 return (error);
714 }
715
716 /*ARGSUSED*/
717 int
718 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
719 {
720 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
721 ct->ct_id);
722 return (ENOSYS);
723 }
724
725 /*ARGSUSED*/
726 int
727 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
728 {
729 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
730 ct->ct_id);
731 return (ENOSYS);
732 }
733
734 /*ARGSUSED*/
735 int
736 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
737 {
738 return (ERANGE);
739 }
740
741 /*
742 * contract_qack
743 *
744 * Asks that negotiations be extended by another time quantum
745 */
746 int
747 contract_qack(contract_t *ct, uint64_t evid)
748 {
749 ct_kevent_t *ev;
750 list_t *queue = &ct->ct_events.ctq_events;
751 int nego = 0;
752 uint_t evtype;
753
754 mutex_enter(&ct->ct_lock);
755 mutex_enter(&ct->ct_events.ctq_lock);
756
757 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
758 if (ev->cte_id == evid) {
759 if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
760 evtype = ev->cte_type;
761 nego = 1;
762 }
763 break;
764 }
765 }
766 mutex_exit(&ct->ct_events.ctq_lock);
767 mutex_exit(&ct->ct_lock);
768
769 /*
770 * Only a negotiated event (which is by definition also a critical
771 * event) which has not yet been acknowledged can provide
772 * time quanta to a negotiating owner process.
773 */
774 if (!nego)
775 return (ESRCH);
776
777 return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
778 }
779
780 /*
781 * contract_orphan
782 *
783 * Icky-poo. This is a process-contract special, used to ACK all
784 * critical messages when a contract is orphaned.
785 */
786 void
787 contract_orphan(contract_t *ct)
788 {
789 ct_kevent_t *ev;
790 list_t *queue = &ct->ct_events.ctq_events;
791
792 ASSERT(MUTEX_HELD(&ct->ct_lock));
793 ASSERT(ct->ct_state != CTS_ORPHAN);
794
795 mutex_enter(&ct->ct_events.ctq_lock);
796 ct->ct_state = CTS_ORPHAN;
797 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
798 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
799 ev->cte_flags |= CTE_ACK;
800 ct->ct_evcnt--;
801 }
802 }
803 mutex_exit(&ct->ct_events.ctq_lock);
804
805 ASSERT(ct->ct_evcnt == 0);
806 }
807
808 /*
809 * contract_destroy
810 *
811 * Explicit contract destruction. Called when contract is empty.
812 * The contract will actually stick around until all of its events are
813 * removed from the bundle and and process bundle queues, and all fds
814 * which refer to it are closed. See contract_dtor if you are looking
815 * for what destroys the contract structure.
816 */
817 void
818 contract_destroy(contract_t *ct)
819 {
820 ASSERT(MUTEX_HELD(&ct->ct_lock));
821 ASSERT(ct->ct_state != CTS_DEAD);
822 ASSERT(ct->ct_owner == NULL);
823
824 ct->ct_state = CTS_DEAD;
825 cte_queue_drain(&ct->ct_events, 1);
826 mutex_exit(&ct->ct_lock);
827 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
828 cte_trim(&ct->ct_type->ct_type_events, ct);
829 mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
830 mutex_enter(&ct->ct_lock);
831 ct->ct_type->ct_type_ops->contop_destroy(ct);
832 mutex_exit(&ct->ct_lock);
833 contract_rele(ct);
834 }
835
836 /*
837 * contract_vnode_get
838 *
839 * Obtains the contract directory vnode for this contract, if there is
840 * one. The caller must VN_RELE the vnode when they are through using
841 * it.
842 */
843 vnode_t *
844 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
845 {
846 contract_vnode_t *ctv;
847 vnode_t *vp = NULL;
848
849 mutex_enter(&ct->ct_lock);
850 for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
851 ctv = list_next(&ct->ct_vnodes, ctv))
852 if (ctv->ctv_vnode->v_vfsp == vfsp) {
853 vp = ctv->ctv_vnode;
854 VN_HOLD(vp);
855 break;
856 }
857 mutex_exit(&ct->ct_lock);
858 return (vp);
859 }
860
861 /*
862 * contract_vnode_set
863 *
864 * Sets the contract directory vnode for this contract. We don't hold
865 * a reference on the vnode because we don't want to prevent it from
866 * being freed. The vnode's inactive entry point will take care of
867 * notifying us when it should be removed.
868 */
869 void
870 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
871 {
872 mutex_enter(&ct->ct_lock);
873 ctv->ctv_vnode = vnode;
874 list_insert_head(&ct->ct_vnodes, ctv);
875 mutex_exit(&ct->ct_lock);
876 }
877
878 /*
879 * contract_vnode_clear
880 *
881 * Removes this vnode as the contract directory vnode for this
882 * contract. Called from a contract directory's inactive entry point,
883 * this may return 0 indicating that the vnode gained another reference
884 * because of a simultaneous call to contract_vnode_get.
885 */
886 int
887 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
888 {
889 vnode_t *vp = ctv->ctv_vnode;
890 int result;
891
892 mutex_enter(&ct->ct_lock);
893 mutex_enter(&vp->v_lock);
894 if (vp->v_count == 1) {
895 list_remove(&ct->ct_vnodes, ctv);
896 result = 1;
897 } else {
898 vp->v_count--;
899 result = 0;
900 }
901 mutex_exit(&vp->v_lock);
902 mutex_exit(&ct->ct_lock);
903
904 return (result);
905 }
906
907 /*
908 * contract_exit
909 *
910 * Abandons all contracts held by process p, and drains process p's
911 * bundle queues. Called on process exit.
912 */
913 void
914 contract_exit(proc_t *p)
915 {
916 contract_t *ct;
917 void *cookie = NULL;
918 int i;
919
920 ASSERT(p == curproc);
921
922 /*
923 * Abandon held contracts. contract_abandon knows enough not
924 * to remove the contract from the list a second time. We are
925 * exiting, so no locks are needed here. But because
926 * contract_abandon will take p_lock, we need to make sure we
927 * aren't holding it.
928 */
929 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
930 while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
931 VERIFY(contract_abandon(ct, p, 0) == 0);
932
933 /*
934 * Drain pbundles. Because a process bundle queue could have
935 * been passed to another process, they may not be freed right
936 * away.
937 */
938 if (p->p_ct_equeue) {
939 for (i = 0; i < CTT_MAXTYPE; i++)
940 if (p->p_ct_equeue[i])
941 cte_queue_drain(p->p_ct_equeue[i], 0);
942 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
943 }
944 }
945
946 static int
947 get_time_left(struct ct_time *t)
948 {
949 clock_t ticks_elapsed;
950 int secs_elapsed;
951
952 if (t->ctm_total == -1)
953 return (-1);
954
955 ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
956 secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
957 return (secs_elapsed > 0 ? secs_elapsed : 0);
958 }
959
960 /*
961 * contract_status_common
962 *
963 * Populates a ct_status structure. Used by contract types in their
964 * status entry points and ctfs when only common information is
965 * requested.
966 */
967 void
968 contract_status_common(contract_t *ct, zone_t *zone, void *status,
969 model_t model)
970 {
971 STRUCT_HANDLE(ct_status, lstatus);
972
973 STRUCT_SET_HANDLE(lstatus, model, status);
974 ASSERT(MUTEX_HELD(&ct->ct_lock));
975 if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
976 zone->zone_uniqid == ct->ct_czuniqid) {
977 zone_t *czone;
978 zoneid_t zoneid = -1;
979
980 /*
981 * Contracts don't have holds on the zones they were
982 * created by. If the contract's zone no longer
983 * exists, we say its zoneid is -1.
984 */
985 if (zone->zone_uniqid == ct->ct_czuniqid ||
986 ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
987 zoneid = ct->ct_zoneid;
988 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
989 if (czone->zone_uniqid == ct->ct_mzuniqid)
990 zoneid = ct->ct_zoneid;
991 zone_rele(czone);
992 }
993
994 STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
995 STRUCT_FSET(lstatus, ctst_holder,
996 (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
997 (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
998 STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
999 } else {
1000 /*
1001 * We are looking at a contract which was created by a
1002 * process outside of our zone. We provide fake zone,
1003 * holder, and state information.
1004 */
1005
1006 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1007 /*
1008 * Since "zone" can't disappear until the calling ctfs
1009 * is unmounted, zone_zsched must be valid.
1010 */
1011 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1012 zone->zone_zsched->p_pid : 0);
1013 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1014 CTS_OWNED : ct->ct_state);
1015 }
1016 STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1017 STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1018 STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1019 STRUCT_FSET(lstatus, ctst_nevid,
1020 ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1021 STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1022 STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1023 STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1024 STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1025 STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1026 }
1027
1028 /*
1029 * contract_checkcred
1030 *
1031 * Determines if the specified contract is owned by a process with the
1032 * same effective uid as the specified credential. The caller must
1033 * ensure that the uid spaces are the same. Returns 1 on success.
1034 */
1035 static int
1036 contract_checkcred(contract_t *ct, const cred_t *cr)
1037 {
1038 proc_t *p;
1039 int fail = 1;
1040
1041 mutex_enter(&ct->ct_lock);
1042 if ((p = ct->ct_owner) != NULL) {
1043 mutex_enter(&p->p_crlock);
1044 fail = crgetuid(cr) != crgetuid(p->p_cred);
1045 mutex_exit(&p->p_crlock);
1046 }
1047 mutex_exit(&ct->ct_lock);
1048
1049 return (!fail);
1050 }
1051
1052 /*
1053 * contract_owned
1054 *
1055 * Determines if the specified credential can view an event generated
1056 * by the specified contract. If locked is set, the contract's ct_lock
1057 * is held and the caller will need to do additional work to determine
1058 * if they truly can see the event. Returns 1 on success.
1059 */
1060 int
1061 contract_owned(contract_t *ct, const cred_t *cr, int locked)
1062 {
1063 int owner, cmatch, zmatch;
1064 uint64_t zuniqid, mzuniqid;
1065 uid_t euid;
1066
1067 ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1068
1069 zuniqid = curproc->p_zone->zone_uniqid;
1070 mzuniqid = contract_getzuniqid(ct);
1071 euid = crgetuid(cr);
1072
1073 /*
1074 * owner: we own the contract
1075 * cmatch: we are in the creator's (and holder's) zone and our
1076 * uid matches the creator's or holder's
1077 * zmatch: we are in the effective zone of a contract created
1078 * in the global zone, and our uid matches that of the
1079 * virtualized holder's (zsched/kcred)
1080 */
1081 owner = (ct->ct_owner == curproc);
1082 cmatch = (zuniqid == ct->ct_czuniqid) &&
1083 ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1084 zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1085 (crgetuid(kcred) == euid);
1086
1087 return (owner || cmatch || zmatch);
1088 }
1089
1090
1091 /*
1092 * contract_type_init
1093 *
1094 * Called by contract types to register themselves with the contracts
1095 * framework.
1096 */
1097 ct_type_t *
1098 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1099 ct_f_default_t *dfault)
1100 {
1101 ct_type_t *result;
1102
1103 ASSERT(type < CTT_MAXTYPE);
1104
1105 result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1106
1107 mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1108 avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1109 offsetof(contract_t, ct_cttavl));
1110 cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1111 result->ct_type_name = name;
1112 result->ct_type_ops = ops;
1113 result->ct_type_default = dfault;
1114 result->ct_type_evid = 0;
1115 gethrestime(&result->ct_type_timestruc);
1116 result->ct_type_index = type;
1117
1118 ct_types[type] = result;
1119
1120 return (result);
1121 }
1122
1123 /*
1124 * contract_type_count
1125 *
1126 * Obtains the number of contracts of a particular type.
1127 */
1128 int
1129 contract_type_count(ct_type_t *type)
1130 {
1131 ulong_t count;
1132
1133 mutex_enter(&type->ct_type_lock);
1134 count = avl_numnodes(&type->ct_type_avl);
1135 mutex_exit(&type->ct_type_lock);
1136
1137 return (count);
1138 }
1139
1140 /*
1141 * contract_type_max
1142 *
1143 * Obtains the maximum contract id of of a particular type.
1144 */
1145 ctid_t
1146 contract_type_max(ct_type_t *type)
1147 {
1148 contract_t *ct;
1149 ctid_t res;
1150
1151 mutex_enter(&type->ct_type_lock);
1152 ct = avl_last(&type->ct_type_avl);
1153 res = ct ? ct->ct_id : -1;
1154 mutex_exit(&type->ct_type_lock);
1155
1156 return (res);
1157 }
1158
1159 /*
1160 * contract_max
1161 *
1162 * Obtains the maximum contract id.
1163 */
1164 ctid_t
1165 contract_max(void)
1166 {
1167 contract_t *ct;
1168 ctid_t res;
1169
1170 mutex_enter(&contract_lock);
1171 ct = avl_last(&contract_avl);
1172 res = ct ? ct->ct_id : -1;
1173 mutex_exit(&contract_lock);
1174
1175 return (res);
1176 }
1177
1178 /*
1179 * contract_lookup_common
1180 *
1181 * Common code for contract_lookup and contract_type_lookup. Takes a
1182 * pointer to an AVL tree to search in. Should be called with the
1183 * appropriate tree-protecting lock held (unfortunately unassertable).
1184 */
1185 static ctid_t
1186 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1187 {
1188 contract_t template, *ct;
1189 avl_index_t where;
1190 ctid_t res;
1191
1192 template.ct_id = current;
1193 ct = avl_find(tree, &template, &where);
1194 if (ct == NULL)
1195 ct = avl_nearest(tree, where, AVL_AFTER);
1196 if (zuniqid != GLOBAL_ZONEUNIQID)
1197 while (ct && (contract_getzuniqid(ct) != zuniqid))
1198 ct = AVL_NEXT(tree, ct);
1199 res = ct ? ct->ct_id : -1;
1200
1201 return (res);
1202 }
1203
1204 /*
1205 * contract_type_lookup
1206 *
1207 * Returns the next type contract after the specified id, visible from
1208 * the specified zone.
1209 */
1210 ctid_t
1211 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1212 {
1213 ctid_t res;
1214
1215 mutex_enter(&type->ct_type_lock);
1216 res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1217 mutex_exit(&type->ct_type_lock);
1218
1219 return (res);
1220 }
1221
1222 /*
1223 * contract_lookup
1224 *
1225 * Returns the next contract after the specified id, visible from the
1226 * specified zone.
1227 */
1228 ctid_t
1229 contract_lookup(uint64_t zuniqid, ctid_t current)
1230 {
1231 ctid_t res;
1232
1233 mutex_enter(&contract_lock);
1234 res = contract_lookup_common(&contract_avl, zuniqid, current);
1235 mutex_exit(&contract_lock);
1236
1237 return (res);
1238 }
1239
1240 /*
1241 * contract_plookup
1242 *
1243 * Returns the next contract held by process p after the specified id,
1244 * visible from the specified zone. Made complicated by the fact that
1245 * contracts visible in a zone but held by processes outside of the
1246 * zone need to appear as being held by zsched to zone members.
1247 */
1248 ctid_t
1249 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1250 {
1251 contract_t template, *ct;
1252 avl_index_t where;
1253 ctid_t res;
1254
1255 template.ct_id = current;
1256 if (zuniqid != GLOBAL_ZONEUNIQID &&
1257 (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1258 /* This is inelegant. */
1259 mutex_enter(&contract_lock);
1260 ct = avl_find(&contract_avl, &template, &where);
1261 if (ct == NULL)
1262 ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1263 while (ct && !(ct->ct_state < CTS_ORPHAN &&
1264 contract_getzuniqid(ct) == zuniqid &&
1265 ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1266 ct = AVL_NEXT(&contract_avl, ct);
1267 res = ct ? ct->ct_id : -1;
1268 mutex_exit(&contract_lock);
1269 } else {
1270 mutex_enter(&p->p_lock);
1271 ct = avl_find(&p->p_ct_held, &template, &where);
1272 if (ct == NULL)
1273 ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1274 res = ct ? ct->ct_id : -1;
1275 mutex_exit(&p->p_lock);
1276 }
1277
1278 return (res);
1279 }
1280
1281 /*
1282 * contract_ptr_common
1283 *
1284 * Common code for contract_ptr and contract_type_ptr. Takes a pointer
1285 * to an AVL tree to search in. Should be called with the appropriate
1286 * tree-protecting lock held (unfortunately unassertable).
1287 */
1288 static contract_t *
1289 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1290 {
1291 contract_t template, *ct;
1292
1293 template.ct_id = id;
1294 ct = avl_find(tree, &template, NULL);
1295 if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1296 contract_getzuniqid(ct) != zuniqid)) {
1297 return (NULL);
1298 }
1299
1300 /*
1301 * Check to see if a thread is in the window in contract_rele
1302 * between dropping the reference count and removing the
1303 * contract from the type AVL.
1304 */
1305 mutex_enter(&ct->ct_reflock);
1306 if (ct->ct_ref) {
1307 ct->ct_ref++;
1308 mutex_exit(&ct->ct_reflock);
1309 } else {
1310 mutex_exit(&ct->ct_reflock);
1311 ct = NULL;
1312 }
1313
1314 return (ct);
1315 }
1316
1317 /*
1318 * contract_type_ptr
1319 *
1320 * Returns a pointer to the contract with the specified id. The
1321 * contract is held, so the caller needs to release the reference when
1322 * it is through with the contract.
1323 */
1324 contract_t *
1325 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1326 {
1327 contract_t *ct;
1328
1329 mutex_enter(&type->ct_type_lock);
1330 ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1331 mutex_exit(&type->ct_type_lock);
1332
1333 return (ct);
1334 }
1335
1336 /*
1337 * contract_ptr
1338 *
1339 * Returns a pointer to the contract with the specified id. The
1340 * contract is held, so the caller needs to release the reference when
1341 * it is through with the contract.
1342 */
1343 contract_t *
1344 contract_ptr(ctid_t id, uint64_t zuniqid)
1345 {
1346 contract_t *ct;
1347
1348 mutex_enter(&contract_lock);
1349 ct = contract_ptr_common(&contract_avl, id, zuniqid);
1350 mutex_exit(&contract_lock);
1351
1352 return (ct);
1353 }
1354
1355 /*
1356 * contract_type_time
1357 *
1358 * Obtains the last time a contract of a particular type was created.
1359 */
1360 void
1361 contract_type_time(ct_type_t *type, timestruc_t *time)
1362 {
1363 mutex_enter(&type->ct_type_lock);
1364 *time = type->ct_type_timestruc;
1365 mutex_exit(&type->ct_type_lock);
1366 }
1367
1368 /*
1369 * contract_type_bundle
1370 *
1371 * Obtains a type's bundle queue.
1372 */
1373 ct_equeue_t *
1374 contract_type_bundle(ct_type_t *type)
1375 {
1376 return (&type->ct_type_events);
1377 }
1378
1379 /*
1380 * contract_type_pbundle
1381 *
1382 * Obtain's a process's bundle queue. If one doesn't exist, one is
1383 * created. Often used simply to ensure that a bundle queue is
1384 * allocated.
1385 */
1386 ct_equeue_t *
1387 contract_type_pbundle(ct_type_t *type, proc_t *pp)
1388 {
1389 /*
1390 * If there isn't an array of bundle queues, allocate one.
1391 */
1392 if (pp->p_ct_equeue == NULL) {
1393 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1394 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1395
1396 mutex_enter(&pp->p_lock);
1397 if (pp->p_ct_equeue)
1398 kmem_free(qa, size);
1399 else
1400 pp->p_ct_equeue = qa;
1401 mutex_exit(&pp->p_lock);
1402 }
1403
1404 /*
1405 * If there isn't a bundle queue of the required type, allocate
1406 * one.
1407 */
1408 if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1409 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1410 cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1411
1412 mutex_enter(&pp->p_lock);
1413 if (pp->p_ct_equeue[type->ct_type_index])
1414 cte_queue_drain(q, 0);
1415 else
1416 pp->p_ct_equeue[type->ct_type_index] = q;
1417 mutex_exit(&pp->p_lock);
1418 }
1419
1420 return (pp->p_ct_equeue[type->ct_type_index]);
1421 }
1422
1423 /*
1424 * ctparam_copyin
1425 *
1426 * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1427 * If ctparam_copyout() is not called after ctparam_copyin(), then
1428 * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1429 *
1430 * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1431 * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1432 * process lock.
1433 */
1434 int
1435 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1436 {
1437 uint32_t size;
1438 void *ubuf;
1439 ct_param_t *param = &kparam->param;
1440 STRUCT_DECL(ct_param, uarg);
1441
1442 STRUCT_INIT(uarg, flag);
1443 if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1444 return (EFAULT);
1445 size = STRUCT_FGET(uarg, ctpm_size);
1446 ubuf = STRUCT_FGETP(uarg, ctpm_value);
1447
1448 if (size > CT_PARAM_MAX_SIZE || size == 0)
1449 return (EINVAL);
1450
1451 kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1452 if (cmd == CT_TSET) {
1453 if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1454 kmem_free(kparam->ctpm_kbuf, size);
1455 return (EFAULT);
1456 }
1457 }
1458 param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1459 param->ctpm_size = size;
1460 param->ctpm_value = ubuf;
1461 kparam->ret_size = 0;
1462
1463 return (0);
1464 }
1465
1466 /*
1467 * ctparam_copyout
1468 *
1469 * copyout a ct_kparam_t and frees the buffer pointed by the member
1470 * ctpm_kbuf of ct_kparam_t
1471 */
1472 int
1473 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1474 {
1475 int r = 0;
1476 ct_param_t *param = &kparam->param;
1477 STRUCT_DECL(ct_param, uarg);
1478
1479 STRUCT_INIT(uarg, flag);
1480
1481 STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1482 STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1483 STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1484 if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1485 r = EFAULT;
1486 goto error;
1487 }
1488 if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1489 MIN(kparam->ret_size, param->ctpm_size))) {
1490 r = EFAULT;
1491 }
1492
1493 error:
1494 kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1495
1496 return (r);
1497 }
1498
1499 /*
1500 * ctmpl_free
1501 *
1502 * Frees a template.
1503 */
1504 void
1505 ctmpl_free(ct_template_t *template)
1506 {
1507 mutex_destroy(&template->ctmpl_lock);
1508 template->ctmpl_ops->ctop_free(template);
1509 }
1510
1511 /*
1512 * ctmpl_dup
1513 *
1514 * Creates a copy of a template.
1515 */
1516 ct_template_t *
1517 ctmpl_dup(ct_template_t *template)
1518 {
1519 ct_template_t *new;
1520
1521 if (template == NULL)
1522 return (NULL);
1523
1524 new = template->ctmpl_ops->ctop_dup(template);
1525 /*
1526 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1527 * should have remain held until now.
1528 */
1529 mutex_exit(&template->ctmpl_lock);
1530
1531 return (new);
1532 }
1533
1534 /*
1535 * ctmpl_set
1536 *
1537 * Sets the requested terms of a template.
1538 */
1539 int
1540 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1541 {
1542 int result = 0;
1543 ct_param_t *param = &kparam->param;
1544 uint64_t param_value;
1545
1546 if (param->ctpm_id == CTP_COOKIE ||
1547 param->ctpm_id == CTP_EV_INFO ||
1548 param->ctpm_id == CTP_EV_CRITICAL) {
1549 if (param->ctpm_size < sizeof (uint64_t)) {
1550 return (EINVAL);
1551 } else {
1552 param_value = *(uint64_t *)kparam->ctpm_kbuf;
1553 }
1554 }
1555
1556 mutex_enter(&template->ctmpl_lock);
1557 switch (param->ctpm_id) {
1558 case CTP_COOKIE:
1559 template->ctmpl_cookie = param_value;
1560 break;
1561 case CTP_EV_INFO:
1562 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1563 result = EINVAL;
1564 else
1565 template->ctmpl_ev_info = param_value;
1566 break;
1567 case CTP_EV_CRITICAL:
1568 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1569 result = EINVAL;
1570 break;
1571 } else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1572 /*
1573 * Assume that a pure reduction of the critical
1574 * set is allowed by the contract type.
1575 */
1576 template->ctmpl_ev_crit = param_value;
1577 break;
1578 }
1579 /*
1580 * There may be restrictions on what we can make
1581 * critical, so we defer to the judgement of the
1582 * contract type.
1583 */
1584 /* FALLTHROUGH */
1585 default:
1586 result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1587 }
1588 mutex_exit(&template->ctmpl_lock);
1589
1590 return (result);
1591 }
1592
1593 /*
1594 * ctmpl_get
1595 *
1596 * Obtains the requested terms from a template.
1597 *
1598 * If the term requested is a variable-sized term and the buffer
1599 * provided is too small for the data, we truncate the data and return
1600 * the buffer size necessary to fit the term in kparam->ret_size. If the
1601 * term requested is fix-sized (uint64_t) and the buffer provided is too
1602 * small, we return EINVAL. This should never happen if you're using
1603 * libcontract(3LIB), only if you call ioctl with a hand constructed
1604 * ct_param_t argument.
1605 *
1606 * Currently, only contract specific parameters have variable-sized
1607 * parameters.
1608 */
1609 int
1610 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1611 {
1612 int result = 0;
1613 ct_param_t *param = &kparam->param;
1614 uint64_t *param_value;
1615
1616 if (param->ctpm_id == CTP_COOKIE ||
1617 param->ctpm_id == CTP_EV_INFO ||
1618 param->ctpm_id == CTP_EV_CRITICAL) {
1619 if (param->ctpm_size < sizeof (uint64_t)) {
1620 return (EINVAL);
1621 } else {
1622 param_value = kparam->ctpm_kbuf;
1623 kparam->ret_size = sizeof (uint64_t);
1624 }
1625 }
1626
1627 mutex_enter(&template->ctmpl_lock);
1628 switch (param->ctpm_id) {
1629 case CTP_COOKIE:
1630 *param_value = template->ctmpl_cookie;
1631 break;
1632 case CTP_EV_INFO:
1633 *param_value = template->ctmpl_ev_info;
1634 break;
1635 case CTP_EV_CRITICAL:
1636 *param_value = template->ctmpl_ev_crit;
1637 break;
1638 default:
1639 result = template->ctmpl_ops->ctop_get(template, kparam);
1640 }
1641 mutex_exit(&template->ctmpl_lock);
1642
1643 return (result);
1644 }
1645
1646 /*
1647 * ctmpl_makecurrent
1648 *
1649 * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1650 * active template. Frees the old active template, if there was one.
1651 */
1652 static void
1653 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1654 {
1655 klwp_t *curlwp = ttolwp(curthread);
1656 proc_t *p = curproc;
1657 ct_template_t *old;
1658
1659 mutex_enter(&p->p_lock);
1660 old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1661 curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1662 mutex_exit(&p->p_lock);
1663
1664 if (old)
1665 ctmpl_free(old);
1666 }
1667
1668 /*
1669 * ctmpl_activate
1670 *
1671 * Copy the specified template as the current thread's activate
1672 * template of that type.
1673 */
1674 void
1675 ctmpl_activate(ct_template_t *template)
1676 {
1677 ctmpl_makecurrent(template, ctmpl_dup(template));
1678 }
1679
1680 /*
1681 * ctmpl_clear
1682 *
1683 * Clears the current thread's activate template of the same type as
1684 * the specified template.
1685 */
1686 void
1687 ctmpl_clear(ct_template_t *template)
1688 {
1689 ctmpl_makecurrent(template, NULL);
1690 }
1691
1692 /*
1693 * ctmpl_create
1694 *
1695 * Creates a new contract using the specified template.
1696 */
1697 int
1698 ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1699 {
1700 return (template->ctmpl_ops->ctop_create(template, ctidp));
1701 }
1702
1703 /*
1704 * ctmpl_init
1705 *
1706 * Initializes the common portion of a new contract template.
1707 */
1708 void
1709 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1710 {
1711 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1712 new->ctmpl_ops = ops;
1713 new->ctmpl_type = type;
1714 new->ctmpl_data = data;
1715 new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1716 new->ctmpl_cookie = 0;
1717 }
1718
1719 /*
1720 * ctmpl_copy
1721 *
1722 * Copies the common portions of a contract template. Intended for use
1723 * by a contract type's ctop_dup template op. Returns with the old
1724 * template's lock held, which will should remain held until the
1725 * template op returns (it is dropped by ctmpl_dup).
1726 */
1727 void
1728 ctmpl_copy(ct_template_t *new, ct_template_t *old)
1729 {
1730 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1731 mutex_enter(&old->ctmpl_lock);
1732 new->ctmpl_ops = old->ctmpl_ops;
1733 new->ctmpl_type = old->ctmpl_type;
1734 new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1735 new->ctmpl_ev_info = old->ctmpl_ev_info;
1736 new->ctmpl_cookie = old->ctmpl_cookie;
1737 }
1738
1739 /*
1740 * ctmpl_create_inval
1741 *
1742 * Returns EINVAL. Provided for the convenience of those contract
1743 * types which don't support ct_tmpl_create(3contract) and would
1744 * otherwise need to create their own stub for the ctop_create template
1745 * op.
1746 */
1747 /*ARGSUSED*/
1748 int
1749 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1750 {
1751 return (EINVAL);
1752 }
1753
1754
1755 /*
1756 * cte_queue_create
1757 *
1758 * Initializes a queue of a particular type. If dynamic is set, the
1759 * queue is to be freed when its last listener is removed after being
1760 * drained.
1761 */
1762 static void
1763 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1764 {
1765 mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1766 q->ctq_listno = list;
1767 list_create(&q->ctq_events, sizeof (ct_kevent_t),
1768 offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1769 list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1770 offsetof(ct_listener_t, ctl_allnode));
1771 list_create(&q->ctq_tail, sizeof (ct_listener_t),
1772 offsetof(ct_listener_t, ctl_tailnode));
1773 gethrestime(&q->ctq_atime);
1774 q->ctq_nlisteners = 0;
1775 q->ctq_nreliable = 0;
1776 q->ctq_ninf = 0;
1777 q->ctq_max = maxinf;
1778
1779 /*
1780 * Bundle queues and contract queues are embedded in other
1781 * structures and are implicitly referenced counted by virtue
1782 * of their vnodes' indirect hold on their contracts. Process
1783 * bundle queues are dynamically allocated and may persist
1784 * after the death of the process, so they must be explicitly
1785 * reference counted.
1786 */
1787 q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1788 }
1789
1790 /*
1791 * cte_queue_destroy
1792 *
1793 * Destroys the specified queue. The queue is freed if referenced
1794 * counted.
1795 */
1796 static void
1797 cte_queue_destroy(ct_equeue_t *q)
1798 {
1799 ASSERT(q->ctq_flags & CTQ_DEAD);
1800 ASSERT(q->ctq_nlisteners == 0);
1801 ASSERT(q->ctq_nreliable == 0);
1802 list_destroy(&q->ctq_events);
1803 list_destroy(&q->ctq_listeners);
1804 list_destroy(&q->ctq_tail);
1805 mutex_destroy(&q->ctq_lock);
1806 if (q->ctq_flags & CTQ_REFFED)
1807 kmem_free(q, sizeof (ct_equeue_t));
1808 }
1809
1810 /*
1811 * cte_hold
1812 *
1813 * Takes a hold on the specified event.
1814 */
1815 static void
1816 cte_hold(ct_kevent_t *e)
1817 {
1818 mutex_enter(&e->cte_lock);
1819 ASSERT(e->cte_refs > 0);
1820 e->cte_refs++;
1821 mutex_exit(&e->cte_lock);
1822 }
1823
1824 /*
1825 * cte_rele
1826 *
1827 * Releases a hold on the specified event. If the caller had the last
1828 * reference, frees the event and releases its hold on the contract
1829 * that generated it.
1830 */
1831 static void
1832 cte_rele(ct_kevent_t *e)
1833 {
1834 mutex_enter(&e->cte_lock);
1835 ASSERT(e->cte_refs > 0);
1836 if (--e->cte_refs) {
1837 mutex_exit(&e->cte_lock);
1838 return;
1839 }
1840
1841 contract_rele(e->cte_contract);
1842
1843 mutex_destroy(&e->cte_lock);
1844 nvlist_free(e->cte_data);
1845 nvlist_free(e->cte_gdata);
1846 kmem_free(e, sizeof (ct_kevent_t));
1847 }
1848
1849 /*
1850 * cte_qrele
1851 *
1852 * Remove this listener's hold on the specified event, removing and
1853 * releasing the queue's hold on the event if appropriate.
1854 */
1855 static void
1856 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1857 {
1858 ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1859
1860 ASSERT(MUTEX_HELD(&q->ctq_lock));
1861
1862 if (l->ctl_flags & CTLF_RELIABLE)
1863 member->ctm_nreliable--;
1864 if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1865 member->ctm_trimmed = 0;
1866 list_remove(&q->ctq_events, e);
1867 cte_rele(e);
1868 }
1869 }
1870
1871 /*
1872 * cte_qmove
1873 *
1874 * Move this listener to the specified event in the queue.
1875 */
1876 static ct_kevent_t *
1877 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1878 {
1879 ct_kevent_t *olde;
1880
1881 ASSERT(MUTEX_HELD(&q->ctq_lock));
1882 ASSERT(l->ctl_equeue == q);
1883
1884 if ((olde = l->ctl_position) == NULL)
1885 list_remove(&q->ctq_tail, l);
1886
1887 while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1888 e = list_next(&q->ctq_events, e);
1889
1890 if (e != NULL) {
1891 e->cte_nodes[q->ctq_listno].ctm_refs++;
1892 if (l->ctl_flags & CTLF_RELIABLE)
1893 e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1894 } else {
1895 list_insert_tail(&q->ctq_tail, l);
1896 }
1897
1898 l->ctl_position = e;
1899 if (olde)
1900 cte_qrele(q, l, olde);
1901
1902 return (e);
1903 }
1904
1905 /*
1906 * cte_checkcred
1907 *
1908 * Determines if the specified event's contract is owned by a process
1909 * with the same effective uid as the specified credential. Called
1910 * after a failed call to contract_owned with locked set. Because it
1911 * drops the queue lock, its caller (cte_qreadable) needs to make sure
1912 * we're still in the same place after we return. Returns 1 on
1913 * success.
1914 */
1915 static int
1916 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1917 {
1918 int result;
1919 contract_t *ct = e->cte_contract;
1920
1921 cte_hold(e);
1922 mutex_exit(&q->ctq_lock);
1923 result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1924 contract_checkcred(ct, cr);
1925 mutex_enter(&q->ctq_lock);
1926 cte_rele(e);
1927
1928 return (result);
1929 }
1930
1931 /*
1932 * cte_qreadable
1933 *
1934 * Ensures that the listener is pointing to a valid event that the
1935 * caller has the credentials to read. Returns 0 if we can read the
1936 * event we're pointing to.
1937 */
1938 static int
1939 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1940 uint64_t zuniqid, int crit)
1941 {
1942 ct_kevent_t *e, *next;
1943 contract_t *ct;
1944
1945 ASSERT(MUTEX_HELD(&q->ctq_lock));
1946 ASSERT(l->ctl_equeue == q);
1947
1948 if (l->ctl_flags & CTLF_COPYOUT)
1949 return (1);
1950
1951 next = l->ctl_position;
1952 while (e = cte_qmove(q, l, next)) {
1953 ct = e->cte_contract;
1954 /*
1955 * Check obvious things first. If we are looking for a
1956 * critical message, is this one? If we aren't in the
1957 * global zone, is this message meant for us?
1958 */
1959 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1960 (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1961 zuniqid != contract_getzuniqid(ct))) {
1962
1963 next = list_next(&q->ctq_events, e);
1964
1965 /*
1966 * Next, see if our effective uid equals that of owner
1967 * or author of the contract. Since we are holding the
1968 * queue lock, contract_owned can't always check if we
1969 * have the same effective uid as the contract's
1970 * owner. If it comes to that, it fails and we take
1971 * the slow(er) path.
1972 */
1973 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1974
1975 /*
1976 * At this point we either don't have any claim
1977 * to this contract or we match the effective
1978 * uid of the owner but couldn't tell. We
1979 * first test for a NULL holder so that events
1980 * from orphans and inherited contracts avoid
1981 * the penalty phase.
1982 */
1983 if (e->cte_contract->ct_owner == NULL &&
1984 !secpolicy_contract_observer_choice(cr))
1985 next = list_next(&q->ctq_events, e);
1986
1987 /*
1988 * cte_checkcred will juggle locks to see if we
1989 * have the same uid as the event's contract's
1990 * current owner. If it succeeds, we have to
1991 * make sure we are in the same point in the
1992 * queue.
1993 */
1994 else if (cte_checkcred(q, e, cr) &&
1995 l->ctl_position == e)
1996 break;
1997
1998 /*
1999 * cte_checkcred failed; see if we're in the
2000 * same place.
2001 */
2002 else if (l->ctl_position == e)
2003 if (secpolicy_contract_observer_choice(cr))
2004 break;
2005 else
2006 next = list_next(&q->ctq_events, e);
2007
2008 /*
2009 * cte_checkcred failed, and our position was
2010 * changed. Start from there.
2011 */
2012 else
2013 next = l->ctl_position;
2014 } else {
2015 break;
2016 }
2017 }
2018
2019 /*
2020 * We check for CTLF_COPYOUT again in case we dropped the queue
2021 * lock in cte_checkcred.
2022 */
2023 return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2024 }
2025
2026 /*
2027 * cte_qwakeup
2028 *
2029 * Wakes up any waiting listeners and points them at the specified event.
2030 */
2031 static void
2032 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2033 {
2034 ct_listener_t *l;
2035
2036 ASSERT(MUTEX_HELD(&q->ctq_lock));
2037
2038 while (l = list_head(&q->ctq_tail)) {
2039 list_remove(&q->ctq_tail, l);
2040 e->cte_nodes[q->ctq_listno].ctm_refs++;
2041 if (l->ctl_flags & CTLF_RELIABLE)
2042 e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2043 l->ctl_position = e;
2044 cv_signal(&l->ctl_cv);
2045 pollwakeup(&l->ctl_pollhead, POLLIN);
2046 }
2047 }
2048
2049 /*
2050 * cte_copy
2051 *
2052 * Copies events from the specified contract event queue to the
2053 * end of the specified process bundle queue. Only called from
2054 * contract_adopt.
2055 *
2056 * We copy to the end of the target queue instead of mixing the events
2057 * in their proper order because otherwise the act of adopting a
2058 * contract would require a process to reset all process bundle
2059 * listeners it needed to see the new events. This would, in turn,
2060 * require the process to keep track of which preexisting events had
2061 * already been processed.
2062 */
2063 static void
2064 cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2065 {
2066 ct_kevent_t *e, *first = NULL;
2067
2068 VERIFY(q->ctq_listno == CTEL_CONTRACT);
2069 VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2070
2071 mutex_enter(&q->ctq_lock);
2072 mutex_enter(&newq->ctq_lock);
2073
2074 /*
2075 * For now, only copy critical events.
2076 */
2077 for (e = list_head(&q->ctq_events); e != NULL;
2078 e = list_next(&q->ctq_events, e)) {
2079 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2080 if (first == NULL)
2081 first = e;
2082 /*
2083 * It is possible for adoption to race with an owner's
2084 * cte_publish_all(); we must only enqueue events that
2085 * have not already been enqueued.
2086 */
2087 if (!list_link_active((list_node_t *)
2088 ((uintptr_t)e + newq->ctq_events.list_offset))) {
2089 list_insert_tail(&newq->ctq_events, e);
2090 cte_hold(e);
2091 }
2092 }
2093 }
2094
2095 mutex_exit(&q->ctq_lock);
2096
2097 if (first)
2098 cte_qwakeup(newq, first);
2099
2100 mutex_exit(&newq->ctq_lock);
2101 }
2102
2103 /*
2104 * cte_trim
2105 *
2106 * Trims unneeded events from an event queue. Algorithm works as
2107 * follows:
2108 *
2109 * Removes all informative and acknowledged critical events until the
2110 * first referenced event is found.
2111 *
2112 * If a contract is specified, removes all events (regardless of
2113 * acknowledgement) generated by that contract until the first event
2114 * referenced by a reliable listener is found. Reference events are
2115 * removed by marking them "trimmed". Such events will be removed
2116 * when the last reference is dropped and will be skipped by future
2117 * listeners.
2118 *
2119 * This is pretty basic. Ideally this should remove from the middle of
2120 * the list (i.e. beyond the first referenced event), and even
2121 * referenced events.
2122 */
2123 static void
2124 cte_trim(ct_equeue_t *q, contract_t *ct)
2125 {
2126 ct_kevent_t *e, *next;
2127 int flags, stopper;
2128 int start = 1;
2129
2130 VERIFY(MUTEX_HELD(&q->ctq_lock));
2131
2132 for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2133 next = list_next(&q->ctq_events, e);
2134 flags = e->cte_flags;
2135 stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2136 (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2137 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2138 if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2139 (e->cte_contract == ct)) {
2140 /*
2141 * Toss informative and ACKed critical messages.
2142 */
2143 list_remove(&q->ctq_events, e);
2144 cte_rele(e);
2145 }
2146 } else if ((e->cte_contract == ct) && !stopper) {
2147 ASSERT(q->ctq_nlisteners != 0);
2148 e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2149 } else if (ct && !stopper) {
2150 start = 0;
2151 } else {
2152 /*
2153 * Don't free messages past the first reader.
2154 */
2155 break;
2156 }
2157 }
2158 }
2159
2160 /*
2161 * cte_queue_drain
2162 *
2163 * Drain all events from the specified queue, and mark it dead. If
2164 * "ack" is set, acknowledge any critical events we find along the
2165 * way.
2166 */
2167 static void
2168 cte_queue_drain(ct_equeue_t *q, int ack)
2169 {
2170 ct_kevent_t *e, *next;
2171 ct_listener_t *l;
2172
2173 mutex_enter(&q->ctq_lock);
2174
2175 for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2176 next = list_next(&q->ctq_events, e);
2177 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2178 /*
2179 * Make sure critical messages are eventually
2180 * removed from the bundle queues.
2181 */
2182 mutex_enter(&e->cte_lock);
2183 e->cte_flags |= CTE_ACK;
2184 mutex_exit(&e->cte_lock);
2185 ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2186 e->cte_contract->ct_evcnt--;
2187 }
2188 list_remove(&q->ctq_events, e);
2189 e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2190 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2191 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2192 cte_rele(e);
2193 }
2194
2195 /*
2196 * This is necessary only because of CTEL_PBUNDLE listeners;
2197 * the events they point to can move from one pbundle to
2198 * another. Fortunately, this only happens if the contract is
2199 * inherited, which (in turn) only happens if the process
2200 * exits, which means it's an all-or-nothing deal. If this
2201 * wasn't the case, we would instead need to keep track of
2202 * listeners on a per-event basis, not just a per-queue basis.
2203 * This would have the side benefit of letting us clean up
2204 * trimmed events sooner (i.e. immediately), but would
2205 * unfortunately make events even bigger than they already
2206 * are.
2207 */
2208 for (l = list_head(&q->ctq_listeners); l;
2209 l = list_next(&q->ctq_listeners, l)) {
2210 l->ctl_flags |= CTLF_DEAD;
2211 if (l->ctl_position) {
2212 l->ctl_position = NULL;
2213 list_insert_tail(&q->ctq_tail, l);
2214 }
2215 cv_broadcast(&l->ctl_cv);
2216 }
2217
2218 /*
2219 * Disallow events.
2220 */
2221 q->ctq_flags |= CTQ_DEAD;
2222
2223 /*
2224 * If we represent the last reference to a reference counted
2225 * process bundle queue, free it.
2226 */
2227 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2228 cte_queue_destroy(q);
2229 else
2230 mutex_exit(&q->ctq_lock);
2231 }
2232
2233 /*
2234 * cte_publish
2235 *
2236 * Publishes an event to a specific queue. Only called by
2237 * cte_publish_all.
2238 */
2239 static void
2240 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2241 {
2242 ASSERT(MUTEX_HELD(&q->ctq_lock));
2243
2244 q->ctq_atime = *tsp;
2245
2246 /*
2247 * If this event may already exist on this queue, check to see if it
2248 * is already there and return if so.
2249 */
2250 if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2251 q->ctq_events.list_offset))) {
2252 mutex_exit(&q->ctq_lock);
2253 cte_rele(e);
2254 return;
2255 }
2256
2257 /*
2258 * Don't publish if the event is informative and there aren't
2259 * any listeners, or if the queue has been shut down.
2260 */
2261 if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2262 (q->ctq_flags & CTQ_DEAD)) {
2263 mutex_exit(&q->ctq_lock);
2264 cte_rele(e);
2265 return;
2266 }
2267
2268 /*
2269 * Enqueue event
2270 */
2271 VERIFY(!list_link_active((list_node_t *)
2272 ((uintptr_t)e + q->ctq_events.list_offset)));
2273 list_insert_tail(&q->ctq_events, e);
2274
2275 /*
2276 * Check for waiting listeners
2277 */
2278 cte_qwakeup(q, e);
2279
2280 /*
2281 * Trim unnecessary events from the queue.
2282 */
2283 cte_trim(q, NULL);
2284 mutex_exit(&q->ctq_lock);
2285 }
2286
2287 /*
2288 * cte_publish_all
2289 *
2290 * Publish an event to all necessary event queues. The event, e, must
2291 * be zallocated by the caller, and the event's flags and type must be
2292 * set. The rest of the event's fields are initialized here.
2293 */
2294 uint64_t
2295 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2296 {
2297 ct_equeue_t *q;
2298 timespec_t ts;
2299 uint64_t evid;
2300 ct_kevent_t *negev;
2301 int negend;
2302
2303 e->cte_contract = ct;
2304 e->cte_data = data;
2305 e->cte_gdata = gdata;
2306 e->cte_refs = 3;
2307 evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid);
2308 contract_hold(ct);
2309
2310 /*
2311 * For a negotiation event we set the ct->ct_nevent field of the
2312 * contract for the duration of the negotiation
2313 */
2314 negend = 0;
2315 if (e->cte_flags & CTE_NEG) {
2316 cte_hold(e);
2317 ct->ct_nevent = e;
2318 } else if (e->cte_type == CT_EV_NEGEND) {
2319 negend = 1;
2320 }
2321
2322 gethrestime(&ts);
2323
2324 /*
2325 * ct_evtlock simply (and only) ensures that two events sent
2326 * from the same contract are delivered to all queues in the
2327 * same order.
2328 */
2329 mutex_enter(&ct->ct_evtlock);
2330
2331 /*
2332 * CTEL_CONTRACT - First deliver to the contract queue, acking
2333 * the event if the contract has been orphaned.
2334 */
2335 mutex_enter(&ct->ct_lock);
2336 mutex_enter(&ct->ct_events.ctq_lock);
2337 if ((e->cte_flags & CTE_INFO) == 0) {
2338 if (ct->ct_state >= CTS_ORPHAN)
2339 e->cte_flags |= CTE_ACK;
2340 else
2341 ct->ct_evcnt++;
2342 }
2343 mutex_exit(&ct->ct_lock);
2344 cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2345
2346 /*
2347 * CTEL_BUNDLE - Next deliver to the contract type's bundle
2348 * queue.
2349 */
2350 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2351 cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2352
2353 /*
2354 * CTEL_PBUNDLE - Finally, if the contract has an owner,
2355 * deliver to the owner's process bundle queue.
2356 */
2357 mutex_enter(&ct->ct_lock);
2358 if (ct->ct_owner) {
2359 /*
2360 * proc_exit doesn't free event queues until it has
2361 * abandoned all contracts.
2362 */
2363 ASSERT(ct->ct_owner->p_ct_equeue);
2364 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2365 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2366 mutex_enter(&q->ctq_lock);
2367 mutex_exit(&ct->ct_lock);
2368
2369 /*
2370 * It is possible for this code to race with adoption; we
2371 * publish the event indicating that the event may already
2372 * be enqueued because adoption beat us to it (in which case
2373 * cte_pubish() does nothing).
2374 */
2375 cte_publish(q, e, &ts, B_TRUE);
2376 } else {
2377 mutex_exit(&ct->ct_lock);
2378 cte_rele(e);
2379 }
2380
2381 if (negend) {
2382 mutex_enter(&ct->ct_lock);
2383 negev = ct->ct_nevent;
2384 ct->ct_nevent = NULL;
2385 cte_rele(negev);
2386 mutex_exit(&ct->ct_lock);
2387 }
2388
2389 mutex_exit(&ct->ct_evtlock);
2390
2391 return (evid);
2392 }
2393
2394 /*
2395 * cte_add_listener
2396 *
2397 * Add a new listener to an event queue.
2398 */
2399 void
2400 cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2401 {
2402 cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2403 l->ctl_equeue = q;
2404 l->ctl_position = NULL;
2405 l->ctl_flags = 0;
2406
2407 mutex_enter(&q->ctq_lock);
2408 list_insert_head(&q->ctq_tail, l);
2409 list_insert_head(&q->ctq_listeners, l);
2410 q->ctq_nlisteners++;
2411 mutex_exit(&q->ctq_lock);
2412 }
2413
2414 /*
2415 * cte_remove_listener
2416 *
2417 * Remove a listener from an event queue. No other queue activities
2418 * (e.g. cte_get event) may be in progress at this endpoint when this
2419 * is called.
2420 */
2421 void
2422 cte_remove_listener(ct_listener_t *l)
2423 {
2424 ct_equeue_t *q = l->ctl_equeue;
2425 ct_kevent_t *e;
2426
2427 mutex_enter(&q->ctq_lock);
2428
2429 ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2430
2431 if ((e = l->ctl_position) != NULL)
2432 cte_qrele(q, l, e);
2433 else
2434 list_remove(&q->ctq_tail, l);
2435 l->ctl_position = NULL;
2436
2437 q->ctq_nlisteners--;
2438 list_remove(&q->ctq_listeners, l);
2439
2440 if (l->ctl_flags & CTLF_RELIABLE)
2441 q->ctq_nreliable--;
2442
2443 /*
2444 * If we are a the last listener of a dead reference counted
2445 * queue (i.e. a process bundle) we free it. Otherwise we just
2446 * trim any events which may have been kept around for our
2447 * benefit.
2448 */
2449 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2450 (q->ctq_nlisteners == 0)) {
2451 cte_queue_destroy(q);
2452 } else {
2453 cte_trim(q, NULL);
2454 mutex_exit(&q->ctq_lock);
2455 }
2456 }
2457
2458 /*
2459 * cte_reset_listener
2460 *
2461 * Moves a listener's queue pointer to the beginning of the queue.
2462 */
2463 void
2464 cte_reset_listener(ct_listener_t *l)
2465 {
2466 ct_equeue_t *q = l->ctl_equeue;
2467
2468 mutex_enter(&q->ctq_lock);
2469
2470 /*
2471 * We allow an asynchronous reset because it doesn't make a
2472 * whole lot of sense to make reset block or fail. We already
2473 * have most of the mechanism needed thanks to queue trimming,
2474 * so implementing it isn't a big deal.
2475 */
2476 if (l->ctl_flags & CTLF_COPYOUT)
2477 l->ctl_flags |= CTLF_RESET;
2478
2479 (void) cte_qmove(q, l, list_head(&q->ctq_events));
2480
2481 /*
2482 * Inform blocked readers.
2483 */
2484 cv_broadcast(&l->ctl_cv);
2485 pollwakeup(&l->ctl_pollhead, POLLIN);
2486 mutex_exit(&q->ctq_lock);
2487 }
2488
2489 /*
2490 * cte_next_event
2491 *
2492 * Moves the event pointer for the specified listener to the next event
2493 * on the queue. To avoid races, this movement only occurs if the
2494 * specified event id matches that of the current event. This is used
2495 * primarily to skip events that have been read but whose extended data
2496 * haven't been copied out.
2497 */
2498 int
2499 cte_next_event(ct_listener_t *l, uint64_t id)
2500 {
2501 ct_equeue_t *q = l->ctl_equeue;
2502 ct_kevent_t *old;
2503
2504 mutex_enter(&q->ctq_lock);
2505
2506 if (l->ctl_flags & CTLF_COPYOUT)
2507 l->ctl_flags |= CTLF_RESET;
2508
2509 if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2510 (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2511
2512 mutex_exit(&q->ctq_lock);
2513
2514 return (0);
2515 }
2516
2517 /*
2518 * cte_get_event
2519 *
2520 * Reads an event from an event endpoint. If "nonblock" is clear, we
2521 * block until a suitable event is ready. If "crit" is set, we only
2522 * read critical events. Note that while "cr" is the caller's cred,
2523 * "zuniqid" is the unique id of the zone the calling contract
2524 * filesystem was mounted in.
2525 */
2526 int
2527 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2528 uint64_t zuniqid, int crit)
2529 {
2530 ct_equeue_t *q = l->ctl_equeue;
2531 ct_kevent_t *temp;
2532 int result = 0;
2533 int partial = 0;
2534 size_t size, gsize, len;
2535 model_t mdl = get_udatamodel();
2536 STRUCT_DECL(ct_event, ev);
2537 STRUCT_INIT(ev, mdl);
2538
2539 /*
2540 * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2541 * that there exists, and we are pointing to, an appropriate
2542 * event. It may temporarily drop ctq_lock, but that doesn't
2543 * really matter to us.
2544 */
2545 mutex_enter(&q->ctq_lock);
2546 while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2547 if (nonblock) {
2548 result = EAGAIN;
2549 goto error;
2550 }
2551 if (q->ctq_flags & CTQ_DEAD) {
2552 result = EIDRM;
2553 goto error;
2554 }
2555 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2556 if (result == 0) {
2557 result = EINTR;
2558 goto error;
2559 }
2560 }
2561 temp = l->ctl_position;
2562 cte_hold(temp);
2563 l->ctl_flags |= CTLF_COPYOUT;
2564 mutex_exit(&q->ctq_lock);
2565
2566 /*
2567 * We now have an event. Copy in the user event structure to
2568 * see how much space we have to work with.
2569 */
2570 result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2571 if (result)
2572 goto copyerr;
2573
2574 /*
2575 * Determine what data we have and what the user should be
2576 * allowed to see.
2577 */
2578 size = gsize = 0;
2579 if (temp->cte_data) {
2580 VERIFY(nvlist_size(temp->cte_data, &size,
2581 NV_ENCODE_NATIVE) == 0);
2582 ASSERT(size != 0);
2583 }
2584 if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2585 VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2586 NV_ENCODE_NATIVE) == 0);
2587 ASSERT(gsize != 0);
2588 }
2589
2590 /*
2591 * If we have enough space, copy out the extended event data.
2592 */
2593 len = size + gsize;
2594 if (len) {
2595 if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2596 char *buf = kmem_alloc(len, KM_SLEEP);
2597
2598 if (size)
2599 VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2600 NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2601 if (gsize) {
2602 char *tmp = buf + size;
2603
2604 VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2605 &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2606 }
2607
2608 /* This shouldn't have changed */
2609 ASSERT(size + gsize == len);
2610 result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2611 len);
2612 kmem_free(buf, len);
2613 if (result)
2614 goto copyerr;
2615 } else {
2616 partial = 1;
2617 }
2618 }
2619
2620 /*
2621 * Copy out the common event data.
2622 */
2623 STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2624 STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2625 STRUCT_FSET(ev, ctev_cttype,
2626 temp->cte_contract->ct_type->ct_type_index);
2627 STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2628 (CTE_ACK|CTE_INFO|CTE_NEG));
2629 STRUCT_FSET(ev, ctev_type, temp->cte_type);
2630 STRUCT_FSET(ev, ctev_nbytes, len);
2631 STRUCT_FSET(ev, ctev_goffset, size);
2632 result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2633
2634 copyerr:
2635 /*
2636 * Only move our location in the queue if all copyouts were
2637 * successful, the caller provided enough space for the entire
2638 * event, and our endpoint wasn't reset or otherwise moved by
2639 * another thread.
2640 */
2641 mutex_enter(&q->ctq_lock);
2642 if (result)
2643 result = EFAULT;
2644 else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2645 (l->ctl_position == temp))
2646 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2647 l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2648 /*
2649 * Signal any readers blocked on our CTLF_COPYOUT.
2650 */
2651 cv_signal(&l->ctl_cv);
2652 cte_rele(temp);
2653
2654 error:
2655 mutex_exit(&q->ctq_lock);
2656 return (result);
2657 }
2658
2659 /*
2660 * cte_set_reliable
2661 *
2662 * Requests that events be reliably delivered to an event endpoint.
2663 * Unread informative and acknowledged critical events will not be
2664 * removed from the queue until this listener reads or skips them.
2665 * Because a listener could maliciously request reliable delivery and
2666 * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2667 * caller's effective set.
2668 */
2669 int
2670 cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2671 {
2672 ct_equeue_t *q = l->ctl_equeue;
2673 int error;
2674
2675 if ((error = secpolicy_contract_event(cr)) != 0)
2676 return (error);
2677
2678 mutex_enter(&q->ctq_lock);
2679 if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2680 l->ctl_flags |= CTLF_RELIABLE;
2681 q->ctq_nreliable++;
2682 if (l->ctl_position != NULL)
2683 l->ctl_position->cte_nodes[q->ctq_listno].
2684 ctm_nreliable++;
2685 }
2686 mutex_exit(&q->ctq_lock);
2687
2688 return (0);
2689 }