1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * Contracts
  29  * ---------
  30  *
  31  * Contracts are a primitive which enrich the relationships between
  32  * processes and system resources.  The primary purpose of contracts is
  33  * to provide a means for the system to negotiate the departure from a
  34  * binding relationship (e.g. pages locked in memory or a thread bound
  35  * to processor), but they can also be used as a purely asynchronous
  36  * error reporting mechanism as they are with process contracts.
  37  *
  38  * More information on how one interfaces with contracts and what
  39  * contracts can do for you can be found in:
  40  *   PSARC 2003/193 Solaris Contracts
  41  *   PSARC 2004/460 Contracts addendum
  42  *
  43  * This file contains the core contracts framework.  By itself it is
  44  * useless: it depends the contracts filesystem (ctfs) to provide an
  45  * interface to user processes and individual contract types to
  46  * implement the process/resource relationships.
  47  *
  48  * Data structure overview
  49  * -----------------------
  50  *
  51  * A contract is represented by a contract_t, which itself points to an
  52  * encapsulating contract-type specific contract object.  A contract_t
  53  * contains the contract's static identity (including its terms), its
  54  * linkage to various bookkeeping structures, the contract-specific
  55  * event queue, and a reference count.
  56  *
  57  * A contract template is represented by a ct_template_t, which, like a
  58  * contract, points to an encapsulating contract-type specific template
  59  * object.  A ct_template_t contains the template's terms.
  60  *
  61  * An event queue is represented by a ct_equeue_t, and consists of a
  62  * list of events, a list of listeners, and a list of listeners who are
  63  * waiting for new events (affectionately referred to as "tail
  64  * listeners").  There are three queue types, defined by ct_listnum_t
  65  * (an enum).  An event may be on one of each type of queue
  66  * simultaneously; the list linkage used by a queue is determined by
  67  * its type.
  68  *
  69  * An event is represented by a ct_kevent_t, which contains mostly
  70  * static event data (e.g. id, payload).  It also has an array of
  71  * ct_member_t structures, each of which contains a list_node_t and
  72  * represent the event's linkage in a specific event queue.
  73  *
  74  * Each open of an event endpoint results in the creation of a new
  75  * listener, represented by a ct_listener_t.  In addition to linkage
  76  * into the aforementioned lists in the event_queue, a ct_listener_t
  77  * contains a pointer to the ct_kevent_t it is currently positioned at
  78  * as well as a set of status flags and other administrative data.
  79  *
  80  * Each process has a list of contracts it owns, p_ct_held; a pointer
  81  * to the process contract it is a member of, p_ct_process; the linkage
  82  * for that membership, p_ct_member; and an array of event queue
  83  * structures representing the process bundle queues.
  84  *
  85  * Each LWP has an array of its active templates, lwp_ct_active; and
  86  * the most recently created contracts, lwp_ct_latest.
  87  *
  88  * A process contract has a list of member processes and a list of
  89  * inherited contracts.
  90  *
  91  * There is a system-wide list of all contracts, as well as per-type
  92  * lists of contracts.
  93  *
  94  * Lock ordering overview
  95  * ----------------------
  96  *
  97  * Locks at the top are taken first:
  98  *
  99  *                   ct_evtlock
 100  *                   regent ct_lock
 101  *                   member ct_lock
 102  *                   pidlock
 103  *                   p_lock
 104  *    contract ctq_lock         contract_lock
 105  *    pbundle ctq_lock
 106  *    cte_lock
 107  *                   ct_reflock
 108  *
 109  * contract_lock and ctq_lock/cte_lock are not currently taken at the
 110  * same time.
 111  *
 112  * Reference counting and locking
 113  * ------------------------------
 114  *
 115  * A contract has a reference count, protected by ct_reflock.
 116  * (ct_reflock is also used in a couple other places where atomic
 117  * access to a variable is needed in an innermost context).  A process
 118  * maintains a hold on each contract it owns.  A process contract has a
 119  * hold on each contract is has inherited.  Each event has a hold on
 120  * the contract which generated it.  Process contract templates have
 121  * holds on the contracts referred to by their transfer terms.  CTFS
 122  * contract directory nodes have holds on contracts.  Lastly, various
 123  * code paths may temporarily take holds on contracts to prevent them
 124  * from disappearing while other processing is going on.  It is
 125  * important to note that the global contract lists do not hold
 126  * references on contracts; a contract is removed from these structures
 127  * atomically with the release of its last reference.
 128  *
 129  * At a given point in time, a contract can either be owned by a
 130  * process, inherited by a regent process contract, or orphaned.  A
 131  * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
 132  * protected by its ct_lock.  The linkage in the holder's (holder =
 133  * owner or regent) list of contracts, ct_ctlist, is protected by
 134  * whatever lock protects the holder's data structure.  In order for
 135  * these two directions to remain consistent, changing the holder of a
 136  * contract requires that both locks be held.
 137  *
 138  * Events also have reference counts.  There is one hold on an event
 139  * per queue it is present on, in addition to those needed for the
 140  * usual sundry reasons.  Individual listeners are associated with
 141  * specific queues, and increase a queue-specific reference count
 142  * stored in the ct_member_t structure.
 143  *
 144  * The dynamic contents of an event (reference count and flags) are
 145  * protected by its cte_lock, while the contents of the embedded
 146  * ct_member_t structures are protected by the locks of the queues they
 147  * are linked into.  A ct_listener_t's contents are also protected by
 148  * its event queue's ctq_lock.
 149  *
 150  * Resource controls
 151  * -----------------
 152  *
 153  * Control:      project.max-contracts (rc_project_contract)
 154  * Description:  Maximum number of contracts allowed a project.
 155  *
 156  *   When a contract is created, the project's allocation is tested and
 157  *   (assuming success) increased.  When the last reference to a
 158  *   contract is released, the creating project's allocation is
 159  *   decreased.
 160  */
 161 
 162 #include <sys/mutex.h>
 163 #include <sys/debug.h>
 164 #include <sys/types.h>
 165 #include <sys/param.h>
 166 #include <sys/kmem.h>
 167 #include <sys/thread.h>
 168 #include <sys/id_space.h>
 169 #include <sys/avl.h>
 170 #include <sys/list.h>
 171 #include <sys/sysmacros.h>
 172 #include <sys/proc.h>
 173 #include <sys/ctfs.h>
 174 #include <sys/contract_impl.h>
 175 #include <sys/contract/process_impl.h>
 176 #include <sys/dditypes.h>
 177 #include <sys/contract/device_impl.h>
 178 #include <sys/systm.h>
 179 #include <sys/atomic.h>
 180 #include <sys/cmn_err.h>
 181 #include <sys/model.h>
 182 #include <sys/policy.h>
 183 #include <sys/zone.h>
 184 #include <sys/task.h>
 185 #include <sys/ddi.h>
 186 #include <sys/sunddi.h>
 187 
 188 extern rctl_hndl_t rc_project_contract;
 189 
 190 static id_space_t       *contract_ids;
 191 static avl_tree_t       contract_avl;
 192 static kmutex_t         contract_lock;
 193 
 194 int                     ct_ntypes = CTT_MAXTYPE;
 195 static ct_type_t        *ct_types_static[CTT_MAXTYPE];
 196 ct_type_t               **ct_types = ct_types_static;
 197 int                     ct_debug;
 198 
 199 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
 200 static void cte_queue_destroy(ct_equeue_t *);
 201 static void cte_queue_drain(ct_equeue_t *, int);
 202 static void cte_trim(ct_equeue_t *, contract_t *);
 203 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
 204 
 205 /*
 206  * contract_compar
 207  *
 208  * A contract comparator which sorts on contract ID.
 209  */
 210 int
 211 contract_compar(const void *x, const void *y)
 212 {
 213         const contract_t *ct1 = x;
 214         const contract_t *ct2 = y;
 215 
 216         if (ct1->ct_id < ct2->ct_id)
 217                 return (-1);
 218         if (ct1->ct_id > ct2->ct_id)
 219                 return (1);
 220         return (0);
 221 }
 222 
 223 /*
 224  * contract_init
 225  *
 226  * Initializes the contract subsystem, the specific contract types, and
 227  * process 0.
 228  */
 229 void
 230 contract_init(void)
 231 {
 232         /*
 233          * Initialize contract subsystem.
 234          */
 235         contract_ids = id_space_create("contracts", 1, INT_MAX);
 236         avl_create(&contract_avl, contract_compar, sizeof (contract_t),
 237             offsetof(contract_t, ct_ctavl));
 238         mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
 239 
 240         /*
 241          * Initialize contract types.
 242          */
 243         contract_process_init();
 244         contract_device_init();
 245 
 246         /*
 247          * Initialize p0/lwp0 contract state.
 248          */
 249         avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
 250             offsetof(contract_t, ct_ctlist));
 251 }
 252 
 253 /*
 254  * contract_dtor
 255  *
 256  * Performs basic destruction of the common portions of a contract.
 257  * Called from the failure path of contract_ctor and from
 258  * contract_rele.
 259  */
 260 static void
 261 contract_dtor(contract_t *ct)
 262 {
 263         cte_queue_destroy(&ct->ct_events);
 264         list_destroy(&ct->ct_vnodes);
 265         mutex_destroy(&ct->ct_reflock);
 266         mutex_destroy(&ct->ct_lock);
 267         mutex_destroy(&ct->ct_evtlock);
 268 }
 269 
 270 /*
 271  * contract_ctor
 272  *
 273  * Called by a contract type to initialize a contract.  Fails if the
 274  * max-contract resource control would have been exceeded.  After a
 275  * successful call to contract_ctor, the contract is unlocked and
 276  * visible in all namespaces; any type-specific initialization should
 277  * be completed before calling contract_ctor.  Returns 0 on success.
 278  *
 279  * Because not all callers can tolerate failure, a 0 value for canfail
 280  * instructs contract_ctor to ignore the project.max-contracts resource
 281  * control.  Obviously, this "out" should only be employed by callers
 282  * who are sufficiently constrained in other ways (e.g. newproc).
 283  */
 284 int
 285 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
 286     ctflags_t flags, proc_t *author, int canfail)
 287 {
 288         avl_index_t where;
 289         klwp_t *curlwp = ttolwp(curthread);
 290 
 291         /*
 292          * It's possible that author is not curproc if the zone is creating
 293          * a new process as a child of zsched.
 294          */
 295 
 296         mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
 297         mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
 298         mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
 299         ct->ct_id = id_alloc(contract_ids);
 300 
 301         cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
 302         list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
 303             offsetof(contract_vnode_t, ctv_node));
 304 
 305         /*
 306          * Instance data
 307          */
 308         ct->ct_ref = 2;              /* one for the holder, one for "latest" */
 309         ct->ct_cuid = crgetuid(CRED());
 310         ct->ct_type = type;
 311         ct->ct_data = data;
 312         gethrestime(&ct->ct_ctime);
 313         ct->ct_state = CTS_OWNED;
 314         ct->ct_flags = flags;
 315         ct->ct_regent = author->p_ct_process ?
 316             &author->p_ct_process->conp_contract : NULL;
 317         ct->ct_ev_info = tmpl->ctmpl_ev_info;
 318         ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
 319         ct->ct_cookie = tmpl->ctmpl_cookie;
 320         ct->ct_owner = author;
 321         ct->ct_ntime.ctm_total = -1;
 322         ct->ct_qtime.ctm_total = -1;
 323         ct->ct_nevent = NULL;
 324 
 325         /*
 326          * Test project.max-contracts.
 327          */
 328         mutex_enter(&author->p_lock);
 329         mutex_enter(&contract_lock);
 330         if (canfail && rctl_test(rc_project_contract,
 331             author->p_task->tk_proj->kpj_rctls, author, 1,
 332             RCA_SAFE) & RCT_DENY) {
 333                 id_free(contract_ids, ct->ct_id);
 334                 mutex_exit(&contract_lock);
 335                 mutex_exit(&author->p_lock);
 336                 ct->ct_events.ctq_flags |= CTQ_DEAD;
 337                 contract_dtor(ct);
 338                 return (1);
 339         }
 340         ct->ct_proj = author->p_task->tk_proj;
 341         ct->ct_proj->kpj_data.kpd_contract++;
 342         (void) project_hold(ct->ct_proj);
 343         mutex_exit(&contract_lock);
 344 
 345         /*
 346          * Insert into holder's avl of contracts.
 347          * We use an avl not because order is important, but because
 348          * readdir of /proc/contracts requires we be able to use a
 349          * scalar as an index into the process's list of contracts
 350          */
 351         ct->ct_zoneid = author->p_zone->zone_id;
 352         ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
 353         VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
 354         avl_insert(&author->p_ct_held, ct, where);
 355         mutex_exit(&author->p_lock);
 356 
 357         /*
 358          * Insert into global contract AVL
 359          */
 360         mutex_enter(&contract_lock);
 361         VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
 362         avl_insert(&contract_avl, ct, where);
 363         mutex_exit(&contract_lock);
 364 
 365         /*
 366          * Insert into type AVL
 367          */
 368         mutex_enter(&type->ct_type_lock);
 369         VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
 370         avl_insert(&type->ct_type_avl, ct, where);
 371         type->ct_type_timestruc = ct->ct_ctime;
 372         mutex_exit(&type->ct_type_lock);
 373 
 374         if (curlwp->lwp_ct_latest[type->ct_type_index])
 375                 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
 376         curlwp->lwp_ct_latest[type->ct_type_index] = ct;
 377 
 378         return (0);
 379 }
 380 
 381 /*
 382  * contract_rele
 383  *
 384  * Releases a reference to a contract.  If the caller had the last
 385  * reference, the contract is removed from all namespaces, its
 386  * allocation against the max-contracts resource control is released,
 387  * and the contract type's free entry point is invoked for any
 388  * type-specific deconstruction and to (presumably) free the object.
 389  */
 390 void
 391 contract_rele(contract_t *ct)
 392 {
 393         uint64_t nref;
 394 
 395         mutex_enter(&ct->ct_reflock);
 396         ASSERT(ct->ct_ref > 0);
 397         nref = --ct->ct_ref;
 398         mutex_exit(&ct->ct_reflock);
 399         if (nref == 0) {
 400                 /*
 401                  * ct_owner is cleared when it drops its reference.
 402                  */
 403                 ASSERT(ct->ct_owner == NULL);
 404                 ASSERT(ct->ct_evcnt == 0);
 405 
 406                 /*
 407                  * Remove from global contract AVL
 408                  */
 409                 mutex_enter(&contract_lock);
 410                 avl_remove(&contract_avl, ct);
 411                 mutex_exit(&contract_lock);
 412 
 413                 /*
 414                  * Remove from type AVL
 415                  */
 416                 mutex_enter(&ct->ct_type->ct_type_lock);
 417                 avl_remove(&ct->ct_type->ct_type_avl, ct);
 418                 mutex_exit(&ct->ct_type->ct_type_lock);
 419 
 420                 /*
 421                  * Release the contract's ID
 422                  */
 423                 id_free(contract_ids, ct->ct_id);
 424 
 425                 /*
 426                  * Release project hold
 427                  */
 428                 mutex_enter(&contract_lock);
 429                 ct->ct_proj->kpj_data.kpd_contract--;
 430                 project_rele(ct->ct_proj);
 431                 mutex_exit(&contract_lock);
 432 
 433                 /*
 434                  * Free the contract
 435                  */
 436                 contract_dtor(ct);
 437                 ct->ct_type->ct_type_ops->contop_free(ct);
 438         }
 439 }
 440 
 441 /*
 442  * contract_hold
 443  *
 444  * Adds a reference to a contract
 445  */
 446 void
 447 contract_hold(contract_t *ct)
 448 {
 449         mutex_enter(&ct->ct_reflock);
 450         ASSERT(ct->ct_ref < UINT64_MAX);
 451         ct->ct_ref++;
 452         mutex_exit(&ct->ct_reflock);
 453 }
 454 
 455 /*
 456  * contract_getzuniqid
 457  *
 458  * Get a contract's zone unique ID.  Needed because 64-bit reads and
 459  * writes aren't atomic on x86.  Since there are contexts where we are
 460  * unable to take ct_lock, we instead use ct_reflock; in actuality any
 461  * lock would do.
 462  */
 463 uint64_t
 464 contract_getzuniqid(contract_t *ct)
 465 {
 466         uint64_t zuniqid;
 467 
 468         mutex_enter(&ct->ct_reflock);
 469         zuniqid = ct->ct_mzuniqid;
 470         mutex_exit(&ct->ct_reflock);
 471 
 472         return (zuniqid);
 473 }
 474 
 475 /*
 476  * contract_setzuniqid
 477  *
 478  * Sets a contract's zone unique ID.   See contract_getzuniqid.
 479  */
 480 void
 481 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
 482 {
 483         mutex_enter(&ct->ct_reflock);
 484         ct->ct_mzuniqid = zuniqid;
 485         mutex_exit(&ct->ct_reflock);
 486 }
 487 
 488 /*
 489  * contract_abandon
 490  *
 491  * Abandons the specified contract.  If "explicit" is clear, the
 492  * contract was implicitly abandoned (by process exit) and should be
 493  * inherited if its terms allow it and its owner was a member of a
 494  * regent contract.  Otherwise, the contract type's abandon entry point
 495  * is invoked to either destroy or orphan the contract.
 496  */
 497 int
 498 contract_abandon(contract_t *ct, proc_t *p, int explicit)
 499 {
 500         ct_equeue_t *q = NULL;
 501         contract_t *parent = &p->p_ct_process->conp_contract;
 502         int inherit = 0;
 503 
 504         VERIFY(p == curproc);
 505 
 506         mutex_enter(&ct->ct_lock);
 507 
 508         /*
 509          * Multiple contract locks are taken contract -> subcontract.
 510          * Check if the contract will be inherited so we can acquire
 511          * all the necessary locks before making sensitive changes.
 512          */
 513         if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
 514             contract_process_accept(parent)) {
 515                 mutex_exit(&ct->ct_lock);
 516                 mutex_enter(&parent->ct_lock);
 517                 mutex_enter(&ct->ct_lock);
 518                 inherit = 1;
 519         }
 520 
 521         if (ct->ct_owner != p) {
 522                 mutex_exit(&ct->ct_lock);
 523                 if (inherit)
 524                         mutex_exit(&parent->ct_lock);
 525                 return (EINVAL);
 526         }
 527 
 528         mutex_enter(&p->p_lock);
 529         if (explicit)
 530                 avl_remove(&p->p_ct_held, ct);
 531         ct->ct_owner = NULL;
 532         mutex_exit(&p->p_lock);
 533 
 534         /*
 535          * Since we can't call cte_trim with the contract lock held,
 536          * we grab the queue pointer here.
 537          */
 538         if (p->p_ct_equeue)
 539                 q = p->p_ct_equeue[ct->ct_type->ct_type_index];
 540 
 541         /*
 542          * contop_abandon may destroy the contract so we rely on it to
 543          * drop ct_lock.  We retain a reference on the contract so that
 544          * the cte_trim which follows functions properly.  Even though
 545          * cte_trim doesn't dereference the contract pointer, it is
 546          * still necessary to retain a reference to the contract so
 547          * that we don't trim events which are sent by a subsequently
 548          * allocated contract infortuitously located at the same address.
 549          */
 550         contract_hold(ct);
 551 
 552         if (inherit) {
 553                 ct->ct_state = CTS_INHERITED;
 554                 VERIFY(ct->ct_regent == parent);
 555                 contract_process_take(parent, ct);
 556 
 557                 /*
 558                  * We are handing off the process's reference to the
 559                  * parent contract.  For this reason, the order in
 560                  * which we drop the contract locks is also important.
 561                  */
 562                 mutex_exit(&ct->ct_lock);
 563                 mutex_exit(&parent->ct_lock);
 564         } else {
 565                 ct->ct_regent = NULL;
 566                 ct->ct_type->ct_type_ops->contop_abandon(ct);
 567         }
 568 
 569         /*
 570          * ct_lock has been dropped; we can safely trim the event
 571          * queue now.
 572          */
 573         if (q) {
 574                 mutex_enter(&q->ctq_lock);
 575                 cte_trim(q, ct);
 576                 mutex_exit(&q->ctq_lock);
 577         }
 578 
 579         contract_rele(ct);
 580 
 581         return (0);
 582 }
 583 
 584 int
 585 contract_newct(contract_t *ct)
 586 {
 587         return (ct->ct_type->ct_type_ops->contop_newct(ct));
 588 }
 589 
 590 /*
 591  * contract_adopt
 592  *
 593  * Adopts a contract.  After a successful call to this routine, the
 594  * previously inherited contract will belong to the calling process,
 595  * and its events will have been appended to its new owner's process
 596  * bundle queue.
 597  */
 598 int
 599 contract_adopt(contract_t *ct, proc_t *p)
 600 {
 601         avl_index_t where;
 602         ct_equeue_t *q;
 603         contract_t *parent;
 604 
 605         ASSERT(p == curproc);
 606 
 607         /*
 608          * Ensure the process has an event queue.  Checked by ASSERTs
 609          * below.
 610          */
 611         (void) contract_type_pbundle(ct->ct_type, p);
 612 
 613         mutex_enter(&ct->ct_lock);
 614         parent = ct->ct_regent;
 615         if (ct->ct_state != CTS_INHERITED ||
 616             &p->p_ct_process->conp_contract != parent ||
 617             p->p_zone->zone_uniqid != ct->ct_czuniqid) {
 618                 mutex_exit(&ct->ct_lock);
 619                 return (EINVAL);
 620         }
 621 
 622         /*
 623          * Multiple contract locks are taken contract -> subcontract.
 624          */
 625         mutex_exit(&ct->ct_lock);
 626         mutex_enter(&parent->ct_lock);
 627         mutex_enter(&ct->ct_lock);
 628 
 629         /*
 630          * It is possible that the contract was adopted by someone else
 631          * while its lock was dropped.  It isn't possible for the
 632          * contract to have been inherited by a different regent
 633          * contract.
 634          */
 635         if (ct->ct_state != CTS_INHERITED) {
 636                 mutex_exit(&parent->ct_lock);
 637                 mutex_exit(&ct->ct_lock);
 638                 return (EBUSY);
 639         }
 640         ASSERT(ct->ct_regent == parent);
 641 
 642         ct->ct_state = CTS_OWNED;
 643 
 644         contract_process_adopt(ct, p);
 645 
 646         mutex_enter(&p->p_lock);
 647         ct->ct_owner = p;
 648         VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
 649         avl_insert(&p->p_ct_held, ct, where);
 650         mutex_exit(&p->p_lock);
 651 
 652         ASSERT(ct->ct_owner->p_ct_equeue);
 653         ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
 654         q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
 655         cte_copy(&ct->ct_events, q);
 656         mutex_exit(&ct->ct_lock);
 657 
 658         return (0);
 659 }
 660 
 661 /*
 662  * contract_ack
 663  *
 664  * Acknowledges receipt of a critical event.
 665  */
 666 int
 667 contract_ack(contract_t *ct, uint64_t evid, int ack)
 668 {
 669         ct_kevent_t *ev;
 670         list_t *queue = &ct->ct_events.ctq_events;
 671         int error = ESRCH;
 672         int nego = 0;
 673         uint_t evtype;
 674 
 675         ASSERT(ack == CT_ACK || ack == CT_NACK);
 676 
 677         mutex_enter(&ct->ct_lock);
 678         mutex_enter(&ct->ct_events.ctq_lock);
 679         /*
 680          * We are probably ACKing something near the head of the queue.
 681          */
 682         for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 683                 if (ev->cte_id == evid) {
 684                         if (ev->cte_flags & CTE_NEG)
 685                                 nego = 1;
 686                         else if (ack == CT_NACK)
 687                                 break;
 688                         if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 689                                 ev->cte_flags |= CTE_ACK;
 690                                 ct->ct_evcnt--;
 691                                 evtype = ev->cte_type;
 692                                 error = 0;
 693                         }
 694                         break;
 695                 }
 696         }
 697         mutex_exit(&ct->ct_events.ctq_lock);
 698         mutex_exit(&ct->ct_lock);
 699 
 700         /*
 701          * Not all critical events are negotiation events, however
 702          * every negotiation event is a critical event. NEGEND events
 703          * are critical events but are not negotiation events
 704          */
 705         if (error || !nego)
 706                 return (error);
 707 
 708         if (ack == CT_ACK)
 709                 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
 710         else
 711                 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
 712 
 713         return (error);
 714 }
 715 
 716 /*ARGSUSED*/
 717 int
 718 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 719 {
 720         cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 721             ct->ct_id);
 722         return (ENOSYS);
 723 }
 724 
 725 /*ARGSUSED*/
 726 int
 727 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 728 {
 729         cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 730             ct->ct_id);
 731         return (ENOSYS);
 732 }
 733 
 734 /*ARGSUSED*/
 735 int
 736 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
 737 {
 738         return (ERANGE);
 739 }
 740 
 741 /*
 742  * contract_qack
 743  *
 744  * Asks that negotiations be extended by another time quantum
 745  */
 746 int
 747 contract_qack(contract_t *ct, uint64_t evid)
 748 {
 749         ct_kevent_t *ev;
 750         list_t *queue = &ct->ct_events.ctq_events;
 751         int nego = 0;
 752         uint_t evtype;
 753 
 754         mutex_enter(&ct->ct_lock);
 755         mutex_enter(&ct->ct_events.ctq_lock);
 756 
 757         for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 758                 if (ev->cte_id == evid) {
 759                         if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
 760                                 evtype = ev->cte_type;
 761                                 nego = 1;
 762                         }
 763                         break;
 764                 }
 765         }
 766         mutex_exit(&ct->ct_events.ctq_lock);
 767         mutex_exit(&ct->ct_lock);
 768 
 769         /*
 770          * Only a negotiated event (which is by definition also a critical
 771          * event) which has not yet been acknowledged can provide
 772          * time quanta to a negotiating owner process.
 773          */
 774         if (!nego)
 775                 return (ESRCH);
 776 
 777         return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
 778 }
 779 
 780 /*
 781  * contract_orphan
 782  *
 783  * Icky-poo.  This is a process-contract special, used to ACK all
 784  * critical messages when a contract is orphaned.
 785  */
 786 void
 787 contract_orphan(contract_t *ct)
 788 {
 789         ct_kevent_t *ev;
 790         list_t *queue = &ct->ct_events.ctq_events;
 791 
 792         ASSERT(MUTEX_HELD(&ct->ct_lock));
 793         ASSERT(ct->ct_state != CTS_ORPHAN);
 794 
 795         mutex_enter(&ct->ct_events.ctq_lock);
 796         ct->ct_state = CTS_ORPHAN;
 797         for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 798                 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 799                         ev->cte_flags |= CTE_ACK;
 800                         ct->ct_evcnt--;
 801                 }
 802         }
 803         mutex_exit(&ct->ct_events.ctq_lock);
 804 
 805         ASSERT(ct->ct_evcnt == 0);
 806 }
 807 
 808 /*
 809  * contract_destroy
 810  *
 811  * Explicit contract destruction.  Called when contract is empty.
 812  * The contract will actually stick around until all of its events are
 813  * removed from the bundle and and process bundle queues, and all fds
 814  * which refer to it are closed.  See contract_dtor if you are looking
 815  * for what destroys the contract structure.
 816  */
 817 void
 818 contract_destroy(contract_t *ct)
 819 {
 820         ASSERT(MUTEX_HELD(&ct->ct_lock));
 821         ASSERT(ct->ct_state != CTS_DEAD);
 822         ASSERT(ct->ct_owner == NULL);
 823 
 824         ct->ct_state = CTS_DEAD;
 825         cte_queue_drain(&ct->ct_events, 1);
 826         mutex_exit(&ct->ct_lock);
 827         mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
 828         cte_trim(&ct->ct_type->ct_type_events, ct);
 829         mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
 830         mutex_enter(&ct->ct_lock);
 831         ct->ct_type->ct_type_ops->contop_destroy(ct);
 832         mutex_exit(&ct->ct_lock);
 833         contract_rele(ct);
 834 }
 835 
 836 /*
 837  * contract_vnode_get
 838  *
 839  * Obtains the contract directory vnode for this contract, if there is
 840  * one.  The caller must VN_RELE the vnode when they are through using
 841  * it.
 842  */
 843 vnode_t *
 844 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
 845 {
 846         contract_vnode_t *ctv;
 847         vnode_t *vp = NULL;
 848 
 849         mutex_enter(&ct->ct_lock);
 850         for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
 851             ctv = list_next(&ct->ct_vnodes, ctv))
 852                 if (ctv->ctv_vnode->v_vfsp == vfsp) {
 853                         vp = ctv->ctv_vnode;
 854                         VN_HOLD(vp);
 855                         break;
 856                 }
 857         mutex_exit(&ct->ct_lock);
 858         return (vp);
 859 }
 860 
 861 /*
 862  * contract_vnode_set
 863  *
 864  * Sets the contract directory vnode for this contract.  We don't hold
 865  * a reference on the vnode because we don't want to prevent it from
 866  * being freed.  The vnode's inactive entry point will take care of
 867  * notifying us when it should be removed.
 868  */
 869 void
 870 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
 871 {
 872         mutex_enter(&ct->ct_lock);
 873         ctv->ctv_vnode = vnode;
 874         list_insert_head(&ct->ct_vnodes, ctv);
 875         mutex_exit(&ct->ct_lock);
 876 }
 877 
 878 /*
 879  * contract_vnode_clear
 880  *
 881  * Removes this vnode as the contract directory vnode for this
 882  * contract.  Called from a contract directory's inactive entry point,
 883  * this may return 0 indicating that the vnode gained another reference
 884  * because of a simultaneous call to contract_vnode_get.
 885  */
 886 int
 887 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
 888 {
 889         vnode_t *vp = ctv->ctv_vnode;
 890         int result;
 891 
 892         mutex_enter(&ct->ct_lock);
 893         mutex_enter(&vp->v_lock);
 894         if (vp->v_count == 1) {
 895                 list_remove(&ct->ct_vnodes, ctv);
 896                 result = 1;
 897         } else {
 898                 vp->v_count--;
 899                 result = 0;
 900         }
 901         mutex_exit(&vp->v_lock);
 902         mutex_exit(&ct->ct_lock);
 903 
 904         return (result);
 905 }
 906 
 907 /*
 908  * contract_exit
 909  *
 910  * Abandons all contracts held by process p, and drains process p's
 911  * bundle queues.  Called on process exit.
 912  */
 913 void
 914 contract_exit(proc_t *p)
 915 {
 916         contract_t *ct;
 917         void *cookie = NULL;
 918         int i;
 919 
 920         ASSERT(p == curproc);
 921 
 922         /*
 923          * Abandon held contracts.  contract_abandon knows enough not
 924          * to remove the contract from the list a second time.  We are
 925          * exiting, so no locks are needed here.  But because
 926          * contract_abandon will take p_lock, we need to make sure we
 927          * aren't holding it.
 928          */
 929         ASSERT(MUTEX_NOT_HELD(&p->p_lock));
 930         while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
 931                 VERIFY(contract_abandon(ct, p, 0) == 0);
 932 
 933         /*
 934          * Drain pbundles.  Because a process bundle queue could have
 935          * been passed to another process, they may not be freed right
 936          * away.
 937          */
 938         if (p->p_ct_equeue) {
 939                 for (i = 0; i < CTT_MAXTYPE; i++)
 940                         if (p->p_ct_equeue[i])
 941                                 cte_queue_drain(p->p_ct_equeue[i], 0);
 942                 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
 943         }
 944 }
 945 
 946 static int
 947 get_time_left(struct ct_time *t)
 948 {
 949         clock_t ticks_elapsed;
 950         int secs_elapsed;
 951 
 952         if (t->ctm_total == -1)
 953                 return (-1);
 954 
 955         ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
 956         secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
 957         return (secs_elapsed > 0 ? secs_elapsed : 0);
 958 }
 959 
 960 /*
 961  * contract_status_common
 962  *
 963  * Populates a ct_status structure.  Used by contract types in their
 964  * status entry points and ctfs when only common information is
 965  * requested.
 966  */
 967 void
 968 contract_status_common(contract_t *ct, zone_t *zone, void *status,
 969     model_t model)
 970 {
 971         STRUCT_HANDLE(ct_status, lstatus);
 972 
 973         STRUCT_SET_HANDLE(lstatus, model, status);
 974         ASSERT(MUTEX_HELD(&ct->ct_lock));
 975         if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
 976             zone->zone_uniqid == ct->ct_czuniqid) {
 977                 zone_t *czone;
 978                 zoneid_t zoneid = -1;
 979 
 980                 /*
 981                  * Contracts don't have holds on the zones they were
 982                  * created by.  If the contract's zone no longer
 983                  * exists, we say its zoneid is -1.
 984                  */
 985                 if (zone->zone_uniqid == ct->ct_czuniqid ||
 986                     ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
 987                         zoneid = ct->ct_zoneid;
 988                 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
 989                         if (czone->zone_uniqid == ct->ct_mzuniqid)
 990                                 zoneid = ct->ct_zoneid;
 991                         zone_rele(czone);
 992                 }
 993 
 994                 STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
 995                 STRUCT_FSET(lstatus, ctst_holder,
 996                     (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
 997                     (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
 998                 STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
 999         } else {
1000                 /*
1001                  * We are looking at a contract which was created by a
1002                  * process outside of our zone.  We provide fake zone,
1003                  * holder, and state information.
1004                  */
1005 
1006                 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1007                 /*
1008                  * Since "zone" can't disappear until the calling ctfs
1009                  * is unmounted, zone_zsched must be valid.
1010                  */
1011                 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1012                     zone->zone_zsched->p_pid : 0);
1013                 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1014                     CTS_OWNED : ct->ct_state);
1015         }
1016         STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1017         STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1018         STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1019         STRUCT_FSET(lstatus, ctst_nevid,
1020             ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1021         STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1022         STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1023         STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1024         STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1025         STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1026 }
1027 
1028 /*
1029  * contract_checkcred
1030  *
1031  * Determines if the specified contract is owned by a process with the
1032  * same effective uid as the specified credential.  The caller must
1033  * ensure that the uid spaces are the same.  Returns 1 on success.
1034  */
1035 static int
1036 contract_checkcred(contract_t *ct, const cred_t *cr)
1037 {
1038         proc_t *p;
1039         int fail = 1;
1040 
1041         mutex_enter(&ct->ct_lock);
1042         if ((p = ct->ct_owner) != NULL) {
1043                 mutex_enter(&p->p_crlock);
1044                 fail = crgetuid(cr) != crgetuid(p->p_cred);
1045                 mutex_exit(&p->p_crlock);
1046         }
1047         mutex_exit(&ct->ct_lock);
1048 
1049         return (!fail);
1050 }
1051 
1052 /*
1053  * contract_owned
1054  *
1055  * Determines if the specified credential can view an event generated
1056  * by the specified contract.  If locked is set, the contract's ct_lock
1057  * is held and the caller will need to do additional work to determine
1058  * if they truly can see the event.  Returns 1 on success.
1059  */
1060 int
1061 contract_owned(contract_t *ct, const cred_t *cr, int locked)
1062 {
1063         int owner, cmatch, zmatch;
1064         uint64_t zuniqid, mzuniqid;
1065         uid_t euid;
1066 
1067         ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1068 
1069         zuniqid = curproc->p_zone->zone_uniqid;
1070         mzuniqid = contract_getzuniqid(ct);
1071         euid = crgetuid(cr);
1072 
1073         /*
1074          * owner: we own the contract
1075          * cmatch: we are in the creator's (and holder's) zone and our
1076          *   uid matches the creator's or holder's
1077          * zmatch: we are in the effective zone of a contract created
1078          *   in the global zone, and our uid matches that of the
1079          *   virtualized holder's (zsched/kcred)
1080          */
1081         owner = (ct->ct_owner == curproc);
1082         cmatch = (zuniqid == ct->ct_czuniqid) &&
1083             ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1084         zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1085             (crgetuid(kcred) == euid);
1086 
1087         return (owner || cmatch || zmatch);
1088 }
1089 
1090 
1091 /*
1092  * contract_type_init
1093  *
1094  * Called by contract types to register themselves with the contracts
1095  * framework.
1096  */
1097 ct_type_t *
1098 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1099     ct_f_default_t *dfault)
1100 {
1101         ct_type_t *result;
1102 
1103         ASSERT(type < CTT_MAXTYPE);
1104 
1105         result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1106 
1107         mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1108         avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1109             offsetof(contract_t, ct_cttavl));
1110         cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1111         result->ct_type_name = name;
1112         result->ct_type_ops = ops;
1113         result->ct_type_default = dfault;
1114         result->ct_type_evid = 0;
1115         gethrestime(&result->ct_type_timestruc);
1116         result->ct_type_index = type;
1117 
1118         ct_types[type] = result;
1119 
1120         return (result);
1121 }
1122 
1123 /*
1124  * contract_type_count
1125  *
1126  * Obtains the number of contracts of a particular type.
1127  */
1128 int
1129 contract_type_count(ct_type_t *type)
1130 {
1131         ulong_t count;
1132 
1133         mutex_enter(&type->ct_type_lock);
1134         count = avl_numnodes(&type->ct_type_avl);
1135         mutex_exit(&type->ct_type_lock);
1136 
1137         return (count);
1138 }
1139 
1140 /*
1141  * contract_type_max
1142  *
1143  * Obtains the maximum contract id of of a particular type.
1144  */
1145 ctid_t
1146 contract_type_max(ct_type_t *type)
1147 {
1148         contract_t *ct;
1149         ctid_t res;
1150 
1151         mutex_enter(&type->ct_type_lock);
1152         ct = avl_last(&type->ct_type_avl);
1153         res = ct ? ct->ct_id : -1;
1154         mutex_exit(&type->ct_type_lock);
1155 
1156         return (res);
1157 }
1158 
1159 /*
1160  * contract_max
1161  *
1162  * Obtains the maximum contract id.
1163  */
1164 ctid_t
1165 contract_max(void)
1166 {
1167         contract_t *ct;
1168         ctid_t res;
1169 
1170         mutex_enter(&contract_lock);
1171         ct = avl_last(&contract_avl);
1172         res = ct ? ct->ct_id : -1;
1173         mutex_exit(&contract_lock);
1174 
1175         return (res);
1176 }
1177 
1178 /*
1179  * contract_lookup_common
1180  *
1181  * Common code for contract_lookup and contract_type_lookup.  Takes a
1182  * pointer to an AVL tree to search in.  Should be called with the
1183  * appropriate tree-protecting lock held (unfortunately unassertable).
1184  */
1185 static ctid_t
1186 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1187 {
1188         contract_t template, *ct;
1189         avl_index_t where;
1190         ctid_t res;
1191 
1192         template.ct_id = current;
1193         ct = avl_find(tree, &template, &where);
1194         if (ct == NULL)
1195                 ct = avl_nearest(tree, where, AVL_AFTER);
1196         if (zuniqid != GLOBAL_ZONEUNIQID)
1197                 while (ct && (contract_getzuniqid(ct) != zuniqid))
1198                         ct = AVL_NEXT(tree, ct);
1199         res = ct ? ct->ct_id : -1;
1200 
1201         return (res);
1202 }
1203 
1204 /*
1205  * contract_type_lookup
1206  *
1207  * Returns the next type contract after the specified id, visible from
1208  * the specified zone.
1209  */
1210 ctid_t
1211 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1212 {
1213         ctid_t res;
1214 
1215         mutex_enter(&type->ct_type_lock);
1216         res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1217         mutex_exit(&type->ct_type_lock);
1218 
1219         return (res);
1220 }
1221 
1222 /*
1223  * contract_lookup
1224  *
1225  * Returns the next contract after the specified id, visible from the
1226  * specified zone.
1227  */
1228 ctid_t
1229 contract_lookup(uint64_t zuniqid, ctid_t current)
1230 {
1231         ctid_t res;
1232 
1233         mutex_enter(&contract_lock);
1234         res = contract_lookup_common(&contract_avl, zuniqid, current);
1235         mutex_exit(&contract_lock);
1236 
1237         return (res);
1238 }
1239 
1240 /*
1241  * contract_plookup
1242  *
1243  * Returns the next contract held by process p after the specified id,
1244  * visible from the specified zone.  Made complicated by the fact that
1245  * contracts visible in a zone but held by processes outside of the
1246  * zone need to appear as being held by zsched to zone members.
1247  */
1248 ctid_t
1249 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1250 {
1251         contract_t template, *ct;
1252         avl_index_t where;
1253         ctid_t res;
1254 
1255         template.ct_id = current;
1256         if (zuniqid != GLOBAL_ZONEUNIQID &&
1257             (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1258                 /* This is inelegant. */
1259                 mutex_enter(&contract_lock);
1260                 ct = avl_find(&contract_avl, &template, &where);
1261                 if (ct == NULL)
1262                         ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1263                 while (ct && !(ct->ct_state < CTS_ORPHAN &&
1264                     contract_getzuniqid(ct) == zuniqid &&
1265                     ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1266                         ct = AVL_NEXT(&contract_avl, ct);
1267                 res = ct ? ct->ct_id : -1;
1268                 mutex_exit(&contract_lock);
1269         } else {
1270                 mutex_enter(&p->p_lock);
1271                 ct = avl_find(&p->p_ct_held, &template, &where);
1272                 if (ct == NULL)
1273                         ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1274                 res = ct ? ct->ct_id : -1;
1275                 mutex_exit(&p->p_lock);
1276         }
1277 
1278         return (res);
1279 }
1280 
1281 /*
1282  * contract_ptr_common
1283  *
1284  * Common code for contract_ptr and contract_type_ptr.  Takes a pointer
1285  * to an AVL tree to search in.  Should be called with the appropriate
1286  * tree-protecting lock held (unfortunately unassertable).
1287  */
1288 static contract_t *
1289 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1290 {
1291         contract_t template, *ct;
1292 
1293         template.ct_id = id;
1294         ct = avl_find(tree, &template, NULL);
1295         if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1296             contract_getzuniqid(ct) != zuniqid)) {
1297                 return (NULL);
1298         }
1299 
1300         /*
1301          * Check to see if a thread is in the window in contract_rele
1302          * between dropping the reference count and removing the
1303          * contract from the type AVL.
1304          */
1305         mutex_enter(&ct->ct_reflock);
1306         if (ct->ct_ref) {
1307                 ct->ct_ref++;
1308                 mutex_exit(&ct->ct_reflock);
1309         } else {
1310                 mutex_exit(&ct->ct_reflock);
1311                 ct = NULL;
1312         }
1313 
1314         return (ct);
1315 }
1316 
1317 /*
1318  * contract_type_ptr
1319  *
1320  * Returns a pointer to the contract with the specified id.  The
1321  * contract is held, so the caller needs to release the reference when
1322  * it is through with the contract.
1323  */
1324 contract_t *
1325 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1326 {
1327         contract_t *ct;
1328 
1329         mutex_enter(&type->ct_type_lock);
1330         ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1331         mutex_exit(&type->ct_type_lock);
1332 
1333         return (ct);
1334 }
1335 
1336 /*
1337  * contract_ptr
1338  *
1339  * Returns a pointer to the contract with the specified id.  The
1340  * contract is held, so the caller needs to release the reference when
1341  * it is through with the contract.
1342  */
1343 contract_t *
1344 contract_ptr(ctid_t id, uint64_t zuniqid)
1345 {
1346         contract_t *ct;
1347 
1348         mutex_enter(&contract_lock);
1349         ct = contract_ptr_common(&contract_avl, id, zuniqid);
1350         mutex_exit(&contract_lock);
1351 
1352         return (ct);
1353 }
1354 
1355 /*
1356  * contract_type_time
1357  *
1358  * Obtains the last time a contract of a particular type was created.
1359  */
1360 void
1361 contract_type_time(ct_type_t *type, timestruc_t *time)
1362 {
1363         mutex_enter(&type->ct_type_lock);
1364         *time = type->ct_type_timestruc;
1365         mutex_exit(&type->ct_type_lock);
1366 }
1367 
1368 /*
1369  * contract_type_bundle
1370  *
1371  * Obtains a type's bundle queue.
1372  */
1373 ct_equeue_t *
1374 contract_type_bundle(ct_type_t *type)
1375 {
1376         return (&type->ct_type_events);
1377 }
1378 
1379 /*
1380  * contract_type_pbundle
1381  *
1382  * Obtain's a process's bundle queue.  If one doesn't exist, one is
1383  * created.  Often used simply to ensure that a bundle queue is
1384  * allocated.
1385  */
1386 ct_equeue_t *
1387 contract_type_pbundle(ct_type_t *type, proc_t *pp)
1388 {
1389         /*
1390          * If there isn't an array of bundle queues, allocate one.
1391          */
1392         if (pp->p_ct_equeue == NULL) {
1393                 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1394                 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1395 
1396                 mutex_enter(&pp->p_lock);
1397                 if (pp->p_ct_equeue)
1398                         kmem_free(qa, size);
1399                 else
1400                         pp->p_ct_equeue = qa;
1401                 mutex_exit(&pp->p_lock);
1402         }
1403 
1404         /*
1405          * If there isn't a bundle queue of the required type, allocate
1406          * one.
1407          */
1408         if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1409                 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1410                 cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1411 
1412                 mutex_enter(&pp->p_lock);
1413                 if (pp->p_ct_equeue[type->ct_type_index])
1414                         cte_queue_drain(q, 0);
1415                 else
1416                         pp->p_ct_equeue[type->ct_type_index] = q;
1417                 mutex_exit(&pp->p_lock);
1418         }
1419 
1420         return (pp->p_ct_equeue[type->ct_type_index]);
1421 }
1422 
1423 /*
1424  * ctparam_copyin
1425  *
1426  * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1427  * If ctparam_copyout() is not called after ctparam_copyin(), then
1428  * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1429  *
1430  * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1431  * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1432  * process lock.
1433  */
1434 int
1435 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1436 {
1437         uint32_t size;
1438         void *ubuf;
1439         ct_param_t *param = &kparam->param;
1440         STRUCT_DECL(ct_param, uarg);
1441 
1442         STRUCT_INIT(uarg, flag);
1443         if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1444                 return (EFAULT);
1445         size = STRUCT_FGET(uarg, ctpm_size);
1446         ubuf = STRUCT_FGETP(uarg, ctpm_value);
1447 
1448         if (size > CT_PARAM_MAX_SIZE || size == 0)
1449                 return (EINVAL);
1450 
1451         kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1452         if (cmd == CT_TSET) {
1453                 if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1454                         kmem_free(kparam->ctpm_kbuf, size);
1455                         return (EFAULT);
1456                 }
1457         }
1458         param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1459         param->ctpm_size = size;
1460         param->ctpm_value = ubuf;
1461         kparam->ret_size = 0;
1462 
1463         return (0);
1464 }
1465 
1466 /*
1467  * ctparam_copyout
1468  *
1469  * copyout a ct_kparam_t and frees the buffer pointed by the member
1470  * ctpm_kbuf of ct_kparam_t
1471  */
1472 int
1473 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1474 {
1475         int r = 0;
1476         ct_param_t *param = &kparam->param;
1477         STRUCT_DECL(ct_param, uarg);
1478 
1479         STRUCT_INIT(uarg, flag);
1480 
1481         STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1482         STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1483         STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1484         if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1485                 r = EFAULT;
1486                 goto error;
1487         }
1488         if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1489             MIN(kparam->ret_size, param->ctpm_size))) {
1490                 r = EFAULT;
1491         }
1492 
1493 error:
1494         kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1495 
1496         return (r);
1497 }
1498 
1499 /*
1500  * ctmpl_free
1501  *
1502  * Frees a template.
1503  */
1504 void
1505 ctmpl_free(ct_template_t *template)
1506 {
1507         mutex_destroy(&template->ctmpl_lock);
1508         template->ctmpl_ops->ctop_free(template);
1509 }
1510 
1511 /*
1512  * ctmpl_dup
1513  *
1514  * Creates a copy of a template.
1515  */
1516 ct_template_t *
1517 ctmpl_dup(ct_template_t *template)
1518 {
1519         ct_template_t *new;
1520 
1521         if (template == NULL)
1522                 return (NULL);
1523 
1524         new = template->ctmpl_ops->ctop_dup(template);
1525         /*
1526          * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1527          * should have remain held until now.
1528          */
1529         mutex_exit(&template->ctmpl_lock);
1530 
1531         return (new);
1532 }
1533 
1534 /*
1535  * ctmpl_set
1536  *
1537  * Sets the requested terms of a template.
1538  */
1539 int
1540 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1541 {
1542         int result = 0;
1543         ct_param_t *param = &kparam->param;
1544         uint64_t param_value;
1545 
1546         if (param->ctpm_id == CTP_COOKIE ||
1547             param->ctpm_id == CTP_EV_INFO ||
1548             param->ctpm_id == CTP_EV_CRITICAL) {
1549                 if (param->ctpm_size < sizeof (uint64_t)) {
1550                         return (EINVAL);
1551                 } else {
1552                         param_value = *(uint64_t *)kparam->ctpm_kbuf;
1553                 }
1554         }
1555 
1556         mutex_enter(&template->ctmpl_lock);
1557         switch (param->ctpm_id) {
1558         case CTP_COOKIE:
1559                 template->ctmpl_cookie = param_value;
1560                 break;
1561         case CTP_EV_INFO:
1562                 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1563                         result = EINVAL;
1564                 else
1565                         template->ctmpl_ev_info = param_value;
1566                 break;
1567         case CTP_EV_CRITICAL:
1568                 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1569                         result = EINVAL;
1570                         break;
1571                 } else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1572                         /*
1573                          * Assume that a pure reduction of the critical
1574                          * set is allowed by the contract type.
1575                          */
1576                         template->ctmpl_ev_crit = param_value;
1577                         break;
1578                 }
1579                 /*
1580                  * There may be restrictions on what we can make
1581                  * critical, so we defer to the judgement of the
1582                  * contract type.
1583                  */
1584                 /* FALLTHROUGH */
1585         default:
1586                 result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1587         }
1588         mutex_exit(&template->ctmpl_lock);
1589 
1590         return (result);
1591 }
1592 
1593 /*
1594  * ctmpl_get
1595  *
1596  * Obtains the requested terms from a template.
1597  *
1598  * If the term requested is a variable-sized term and the buffer
1599  * provided is too small for the data, we truncate the data and return
1600  * the buffer size necessary to fit the term in kparam->ret_size. If the
1601  * term requested is fix-sized (uint64_t) and the buffer provided is too
1602  * small, we return EINVAL.  This should never happen if you're using
1603  * libcontract(3LIB), only if you call ioctl with a hand constructed
1604  * ct_param_t argument.
1605  *
1606  * Currently, only contract specific parameters have variable-sized
1607  * parameters.
1608  */
1609 int
1610 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1611 {
1612         int result = 0;
1613         ct_param_t *param = &kparam->param;
1614         uint64_t *param_value;
1615 
1616         if (param->ctpm_id == CTP_COOKIE ||
1617             param->ctpm_id == CTP_EV_INFO ||
1618             param->ctpm_id == CTP_EV_CRITICAL) {
1619                 if (param->ctpm_size < sizeof (uint64_t)) {
1620                         return (EINVAL);
1621                 } else {
1622                         param_value = kparam->ctpm_kbuf;
1623                         kparam->ret_size = sizeof (uint64_t);
1624                 }
1625         }
1626 
1627         mutex_enter(&template->ctmpl_lock);
1628         switch (param->ctpm_id) {
1629         case CTP_COOKIE:
1630                 *param_value = template->ctmpl_cookie;
1631                 break;
1632         case CTP_EV_INFO:
1633                 *param_value = template->ctmpl_ev_info;
1634                 break;
1635         case CTP_EV_CRITICAL:
1636                 *param_value = template->ctmpl_ev_crit;
1637                 break;
1638         default:
1639                 result = template->ctmpl_ops->ctop_get(template, kparam);
1640         }
1641         mutex_exit(&template->ctmpl_lock);
1642 
1643         return (result);
1644 }
1645 
1646 /*
1647  * ctmpl_makecurrent
1648  *
1649  * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1650  * active template.  Frees the old active template, if there was one.
1651  */
1652 static void
1653 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1654 {
1655         klwp_t *curlwp = ttolwp(curthread);
1656         proc_t *p = curproc;
1657         ct_template_t *old;
1658 
1659         mutex_enter(&p->p_lock);
1660         old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1661         curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1662         mutex_exit(&p->p_lock);
1663 
1664         if (old)
1665                 ctmpl_free(old);
1666 }
1667 
1668 /*
1669  * ctmpl_activate
1670  *
1671  * Copy the specified template as the current thread's activate
1672  * template of that type.
1673  */
1674 void
1675 ctmpl_activate(ct_template_t *template)
1676 {
1677         ctmpl_makecurrent(template, ctmpl_dup(template));
1678 }
1679 
1680 /*
1681  * ctmpl_clear
1682  *
1683  * Clears the current thread's activate template of the same type as
1684  * the specified template.
1685  */
1686 void
1687 ctmpl_clear(ct_template_t *template)
1688 {
1689         ctmpl_makecurrent(template, NULL);
1690 }
1691 
1692 /*
1693  * ctmpl_create
1694  *
1695  * Creates a new contract using the specified template.
1696  */
1697 int
1698 ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1699 {
1700         return (template->ctmpl_ops->ctop_create(template, ctidp));
1701 }
1702 
1703 /*
1704  * ctmpl_init
1705  *
1706  * Initializes the common portion of a new contract template.
1707  */
1708 void
1709 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1710 {
1711         mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1712         new->ctmpl_ops = ops;
1713         new->ctmpl_type = type;
1714         new->ctmpl_data = data;
1715         new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1716         new->ctmpl_cookie = 0;
1717 }
1718 
1719 /*
1720  * ctmpl_copy
1721  *
1722  * Copies the common portions of a contract template.  Intended for use
1723  * by a contract type's ctop_dup template op.  Returns with the old
1724  * template's lock held, which will should remain held until the
1725  * template op returns (it is dropped by ctmpl_dup).
1726  */
1727 void
1728 ctmpl_copy(ct_template_t *new, ct_template_t *old)
1729 {
1730         mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1731         mutex_enter(&old->ctmpl_lock);
1732         new->ctmpl_ops = old->ctmpl_ops;
1733         new->ctmpl_type = old->ctmpl_type;
1734         new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1735         new->ctmpl_ev_info = old->ctmpl_ev_info;
1736         new->ctmpl_cookie = old->ctmpl_cookie;
1737 }
1738 
1739 /*
1740  * ctmpl_create_inval
1741  *
1742  * Returns EINVAL.  Provided for the convenience of those contract
1743  * types which don't support ct_tmpl_create(3contract) and would
1744  * otherwise need to create their own stub for the ctop_create template
1745  * op.
1746  */
1747 /*ARGSUSED*/
1748 int
1749 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1750 {
1751         return (EINVAL);
1752 }
1753 
1754 
1755 /*
1756  * cte_queue_create
1757  *
1758  * Initializes a queue of a particular type.  If dynamic is set, the
1759  * queue is to be freed when its last listener is removed after being
1760  * drained.
1761  */
1762 static void
1763 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1764 {
1765         mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1766         q->ctq_listno = list;
1767         list_create(&q->ctq_events, sizeof (ct_kevent_t),
1768             offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1769         list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1770             offsetof(ct_listener_t, ctl_allnode));
1771         list_create(&q->ctq_tail, sizeof (ct_listener_t),
1772             offsetof(ct_listener_t, ctl_tailnode));
1773         gethrestime(&q->ctq_atime);
1774         q->ctq_nlisteners = 0;
1775         q->ctq_nreliable = 0;
1776         q->ctq_ninf = 0;
1777         q->ctq_max = maxinf;
1778 
1779         /*
1780          * Bundle queues and contract queues are embedded in other
1781          * structures and are implicitly referenced counted by virtue
1782          * of their vnodes' indirect hold on their contracts.  Process
1783          * bundle queues are dynamically allocated and may persist
1784          * after the death of the process, so they must be explicitly
1785          * reference counted.
1786          */
1787         q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1788 }
1789 
1790 /*
1791  * cte_queue_destroy
1792  *
1793  * Destroys the specified queue.  The queue is freed if referenced
1794  * counted.
1795  */
1796 static void
1797 cte_queue_destroy(ct_equeue_t *q)
1798 {
1799         ASSERT(q->ctq_flags & CTQ_DEAD);
1800         ASSERT(q->ctq_nlisteners == 0);
1801         ASSERT(q->ctq_nreliable == 0);
1802         list_destroy(&q->ctq_events);
1803         list_destroy(&q->ctq_listeners);
1804         list_destroy(&q->ctq_tail);
1805         mutex_destroy(&q->ctq_lock);
1806         if (q->ctq_flags & CTQ_REFFED)
1807                 kmem_free(q, sizeof (ct_equeue_t));
1808 }
1809 
1810 /*
1811  * cte_hold
1812  *
1813  * Takes a hold on the specified event.
1814  */
1815 static void
1816 cte_hold(ct_kevent_t *e)
1817 {
1818         mutex_enter(&e->cte_lock);
1819         ASSERT(e->cte_refs > 0);
1820         e->cte_refs++;
1821         mutex_exit(&e->cte_lock);
1822 }
1823 
1824 /*
1825  * cte_rele
1826  *
1827  * Releases a hold on the specified event.  If the caller had the last
1828  * reference, frees the event and releases its hold on the contract
1829  * that generated it.
1830  */
1831 static void
1832 cte_rele(ct_kevent_t *e)
1833 {
1834         mutex_enter(&e->cte_lock);
1835         ASSERT(e->cte_refs > 0);
1836         if (--e->cte_refs) {
1837                 mutex_exit(&e->cte_lock);
1838                 return;
1839         }
1840 
1841         contract_rele(e->cte_contract);
1842 
1843         mutex_destroy(&e->cte_lock);
1844         nvlist_free(e->cte_data);
1845         nvlist_free(e->cte_gdata);
1846         kmem_free(e, sizeof (ct_kevent_t));
1847 }
1848 
1849 /*
1850  * cte_qrele
1851  *
1852  * Remove this listener's hold on the specified event, removing and
1853  * releasing the queue's hold on the event if appropriate.
1854  */
1855 static void
1856 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1857 {
1858         ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1859 
1860         ASSERT(MUTEX_HELD(&q->ctq_lock));
1861 
1862         if (l->ctl_flags & CTLF_RELIABLE)
1863                 member->ctm_nreliable--;
1864         if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1865                 member->ctm_trimmed = 0;
1866                 list_remove(&q->ctq_events, e);
1867                 cte_rele(e);
1868         }
1869 }
1870 
1871 /*
1872  * cte_qmove
1873  *
1874  * Move this listener to the specified event in the queue.
1875  */
1876 static ct_kevent_t *
1877 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1878 {
1879         ct_kevent_t *olde;
1880 
1881         ASSERT(MUTEX_HELD(&q->ctq_lock));
1882         ASSERT(l->ctl_equeue == q);
1883 
1884         if ((olde = l->ctl_position) == NULL)
1885                 list_remove(&q->ctq_tail, l);
1886 
1887         while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1888                 e = list_next(&q->ctq_events, e);
1889 
1890         if (e != NULL) {
1891                 e->cte_nodes[q->ctq_listno].ctm_refs++;
1892                 if (l->ctl_flags & CTLF_RELIABLE)
1893                         e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1894         } else {
1895                 list_insert_tail(&q->ctq_tail, l);
1896         }
1897 
1898         l->ctl_position = e;
1899         if (olde)
1900                 cte_qrele(q, l, olde);
1901 
1902         return (e);
1903 }
1904 
1905 /*
1906  * cte_checkcred
1907  *
1908  * Determines if the specified event's contract is owned by a process
1909  * with the same effective uid as the specified credential.  Called
1910  * after a failed call to contract_owned with locked set.  Because it
1911  * drops the queue lock, its caller (cte_qreadable) needs to make sure
1912  * we're still in the same place after we return.  Returns 1 on
1913  * success.
1914  */
1915 static int
1916 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1917 {
1918         int result;
1919         contract_t *ct = e->cte_contract;
1920 
1921         cte_hold(e);
1922         mutex_exit(&q->ctq_lock);
1923         result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1924             contract_checkcred(ct, cr);
1925         mutex_enter(&q->ctq_lock);
1926         cte_rele(e);
1927 
1928         return (result);
1929 }
1930 
1931 /*
1932  * cte_qreadable
1933  *
1934  * Ensures that the listener is pointing to a valid event that the
1935  * caller has the credentials to read.  Returns 0 if we can read the
1936  * event we're pointing to.
1937  */
1938 static int
1939 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1940     uint64_t zuniqid, int crit)
1941 {
1942         ct_kevent_t *e, *next;
1943         contract_t *ct;
1944 
1945         ASSERT(MUTEX_HELD(&q->ctq_lock));
1946         ASSERT(l->ctl_equeue == q);
1947 
1948         if (l->ctl_flags & CTLF_COPYOUT)
1949                 return (1);
1950 
1951         next = l->ctl_position;
1952         while (e = cte_qmove(q, l, next)) {
1953                 ct = e->cte_contract;
1954                 /*
1955                  * Check obvious things first.  If we are looking for a
1956                  * critical message, is this one?  If we aren't in the
1957                  * global zone, is this message meant for us?
1958                  */
1959                 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1960                     (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1961                     zuniqid != contract_getzuniqid(ct))) {
1962 
1963                         next = list_next(&q->ctq_events, e);
1964 
1965                 /*
1966                  * Next, see if our effective uid equals that of owner
1967                  * or author of the contract.  Since we are holding the
1968                  * queue lock, contract_owned can't always check if we
1969                  * have the same effective uid as the contract's
1970                  * owner.  If it comes to that, it fails and we take
1971                  * the slow(er) path.
1972                  */
1973                 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1974 
1975                         /*
1976                          * At this point we either don't have any claim
1977                          * to this contract or we match the effective
1978                          * uid of the owner but couldn't tell.  We
1979                          * first test for a NULL holder so that events
1980                          * from orphans and inherited contracts avoid
1981                          * the penalty phase.
1982                          */
1983                         if (e->cte_contract->ct_owner == NULL &&
1984                             !secpolicy_contract_observer_choice(cr))
1985                                 next = list_next(&q->ctq_events, e);
1986 
1987                         /*
1988                          * cte_checkcred will juggle locks to see if we
1989                          * have the same uid as the event's contract's
1990                          * current owner.  If it succeeds, we have to
1991                          * make sure we are in the same point in the
1992                          * queue.
1993                          */
1994                         else if (cte_checkcred(q, e, cr) &&
1995                             l->ctl_position == e)
1996                                 break;
1997 
1998                         /*
1999                          * cte_checkcred failed; see if we're in the
2000                          * same place.
2001                          */
2002                         else if (l->ctl_position == e)
2003                                 if (secpolicy_contract_observer_choice(cr))
2004                                         break;
2005                                 else
2006                                         next = list_next(&q->ctq_events, e);
2007 
2008                         /*
2009                          * cte_checkcred failed, and our position was
2010                          * changed.  Start from there.
2011                          */
2012                         else
2013                                 next = l->ctl_position;
2014                 } else {
2015                         break;
2016                 }
2017         }
2018 
2019         /*
2020          * We check for CTLF_COPYOUT again in case we dropped the queue
2021          * lock in cte_checkcred.
2022          */
2023         return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2024 }
2025 
2026 /*
2027  * cte_qwakeup
2028  *
2029  * Wakes up any waiting listeners and points them at the specified event.
2030  */
2031 static void
2032 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2033 {
2034         ct_listener_t *l;
2035 
2036         ASSERT(MUTEX_HELD(&q->ctq_lock));
2037 
2038         while (l = list_head(&q->ctq_tail)) {
2039                 list_remove(&q->ctq_tail, l);
2040                 e->cte_nodes[q->ctq_listno].ctm_refs++;
2041                 if (l->ctl_flags & CTLF_RELIABLE)
2042                         e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2043                 l->ctl_position = e;
2044                 cv_signal(&l->ctl_cv);
2045                 pollwakeup(&l->ctl_pollhead, POLLIN);
2046         }
2047 }
2048 
2049 /*
2050  * cte_copy
2051  *
2052  * Copies events from the specified contract event queue to the
2053  * end of the specified process bundle queue.  Only called from
2054  * contract_adopt.
2055  *
2056  * We copy to the end of the target queue instead of mixing the events
2057  * in their proper order because otherwise the act of adopting a
2058  * contract would require a process to reset all process bundle
2059  * listeners it needed to see the new events.  This would, in turn,
2060  * require the process to keep track of which preexisting events had
2061  * already been processed.
2062  */
2063 static void
2064 cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2065 {
2066         ct_kevent_t *e, *first = NULL;
2067 
2068         VERIFY(q->ctq_listno == CTEL_CONTRACT);
2069         VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2070 
2071         mutex_enter(&q->ctq_lock);
2072         mutex_enter(&newq->ctq_lock);
2073 
2074         /*
2075          * For now, only copy critical events.
2076          */
2077         for (e = list_head(&q->ctq_events); e != NULL;
2078             e = list_next(&q->ctq_events, e)) {
2079                 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2080                         if (first == NULL)
2081                                 first = e;
2082                         /*
2083                          * It is possible for adoption to race with an owner's
2084                          * cte_publish_all(); we must only enqueue events that
2085                          * have not already been enqueued.
2086                          */
2087                         if (!list_link_active((list_node_t *)
2088                             ((uintptr_t)e + newq->ctq_events.list_offset))) {
2089                                 list_insert_tail(&newq->ctq_events, e);
2090                                 cte_hold(e);
2091                         }
2092                 }
2093         }
2094 
2095         mutex_exit(&q->ctq_lock);
2096 
2097         if (first)
2098                 cte_qwakeup(newq, first);
2099 
2100         mutex_exit(&newq->ctq_lock);
2101 }
2102 
2103 /*
2104  * cte_trim
2105  *
2106  * Trims unneeded events from an event queue.  Algorithm works as
2107  * follows:
2108  *
2109  *   Removes all informative and acknowledged critical events until the
2110  *   first referenced event is found.
2111  *
2112  *   If a contract is specified, removes all events (regardless of
2113  *   acknowledgement) generated by that contract until the first event
2114  *   referenced by a reliable listener is found.  Reference events are
2115  *   removed by marking them "trimmed".  Such events will be removed
2116  *   when the last reference is dropped and will be skipped by future
2117  *   listeners.
2118  *
2119  * This is pretty basic.  Ideally this should remove from the middle of
2120  * the list (i.e. beyond the first referenced event), and even
2121  * referenced events.
2122  */
2123 static void
2124 cte_trim(ct_equeue_t *q, contract_t *ct)
2125 {
2126         ct_kevent_t *e, *next;
2127         int flags, stopper;
2128         int start = 1;
2129 
2130         VERIFY(MUTEX_HELD(&q->ctq_lock));
2131 
2132         for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2133                 next = list_next(&q->ctq_events, e);
2134                 flags = e->cte_flags;
2135                 stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2136                     (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2137                 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2138                         if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2139                             (e->cte_contract == ct)) {
2140                                 /*
2141                                  * Toss informative and ACKed critical messages.
2142                                  */
2143                                 list_remove(&q->ctq_events, e);
2144                                 cte_rele(e);
2145                         }
2146                 } else if ((e->cte_contract == ct) && !stopper) {
2147                         ASSERT(q->ctq_nlisteners != 0);
2148                         e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2149                 } else if (ct && !stopper) {
2150                         start = 0;
2151                 } else {
2152                         /*
2153                          * Don't free messages past the first reader.
2154                          */
2155                         break;
2156                 }
2157         }
2158 }
2159 
2160 /*
2161  * cte_queue_drain
2162  *
2163  * Drain all events from the specified queue, and mark it dead.  If
2164  * "ack" is set, acknowledge any critical events we find along the
2165  * way.
2166  */
2167 static void
2168 cte_queue_drain(ct_equeue_t *q, int ack)
2169 {
2170         ct_kevent_t *e, *next;
2171         ct_listener_t *l;
2172 
2173         mutex_enter(&q->ctq_lock);
2174 
2175         for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2176                 next = list_next(&q->ctq_events, e);
2177                 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2178                         /*
2179                          * Make sure critical messages are eventually
2180                          * removed from the bundle queues.
2181                          */
2182                         mutex_enter(&e->cte_lock);
2183                         e->cte_flags |= CTE_ACK;
2184                         mutex_exit(&e->cte_lock);
2185                         ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2186                         e->cte_contract->ct_evcnt--;
2187                 }
2188                 list_remove(&q->ctq_events, e);
2189                 e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2190                 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2191                 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2192                 cte_rele(e);
2193         }
2194 
2195         /*
2196          * This is necessary only because of CTEL_PBUNDLE listeners;
2197          * the events they point to can move from one pbundle to
2198          * another.  Fortunately, this only happens if the contract is
2199          * inherited, which (in turn) only happens if the process
2200          * exits, which means it's an all-or-nothing deal.  If this
2201          * wasn't the case, we would instead need to keep track of
2202          * listeners on a per-event basis, not just a per-queue basis.
2203          * This would have the side benefit of letting us clean up
2204          * trimmed events sooner (i.e. immediately), but would
2205          * unfortunately make events even bigger than they already
2206          * are.
2207          */
2208         for (l = list_head(&q->ctq_listeners); l;
2209             l = list_next(&q->ctq_listeners, l)) {
2210                 l->ctl_flags |= CTLF_DEAD;
2211                 if (l->ctl_position) {
2212                         l->ctl_position = NULL;
2213                         list_insert_tail(&q->ctq_tail, l);
2214                 }
2215                 cv_broadcast(&l->ctl_cv);
2216         }
2217 
2218         /*
2219          * Disallow events.
2220          */
2221         q->ctq_flags |= CTQ_DEAD;
2222 
2223         /*
2224          * If we represent the last reference to a reference counted
2225          * process bundle queue, free it.
2226          */
2227         if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2228                 cte_queue_destroy(q);
2229         else
2230                 mutex_exit(&q->ctq_lock);
2231 }
2232 
2233 /*
2234  * cte_publish
2235  *
2236  * Publishes an event to a specific queue.  Only called by
2237  * cte_publish_all.
2238  */
2239 static void
2240 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2241 {
2242         ASSERT(MUTEX_HELD(&q->ctq_lock));
2243 
2244         q->ctq_atime = *tsp;
2245 
2246         /*
2247          * If this event may already exist on this queue, check to see if it
2248          * is already there and return if so.
2249          */
2250         if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2251             q->ctq_events.list_offset))) {
2252                 mutex_exit(&q->ctq_lock);
2253                 cte_rele(e);
2254                 return;
2255         }
2256 
2257         /*
2258          * Don't publish if the event is informative and there aren't
2259          * any listeners, or if the queue has been shut down.
2260          */
2261         if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2262             (q->ctq_flags & CTQ_DEAD)) {
2263                 mutex_exit(&q->ctq_lock);
2264                 cte_rele(e);
2265                 return;
2266         }
2267 
2268         /*
2269          * Enqueue event
2270          */
2271         VERIFY(!list_link_active((list_node_t *)
2272             ((uintptr_t)e + q->ctq_events.list_offset)));
2273         list_insert_tail(&q->ctq_events, e);
2274 
2275         /*
2276          * Check for waiting listeners
2277          */
2278         cte_qwakeup(q, e);
2279 
2280         /*
2281          * Trim unnecessary events from the queue.
2282          */
2283         cte_trim(q, NULL);
2284         mutex_exit(&q->ctq_lock);
2285 }
2286 
2287 /*
2288  * cte_publish_all
2289  *
2290  * Publish an event to all necessary event queues.  The event, e, must
2291  * be zallocated by the caller, and the event's flags and type must be
2292  * set.  The rest of the event's fields are initialized here.
2293  */
2294 uint64_t
2295 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2296 {
2297         ct_equeue_t *q;
2298         timespec_t ts;
2299         uint64_t evid;
2300         ct_kevent_t *negev;
2301         int negend;
2302 
2303         e->cte_contract = ct;
2304         e->cte_data = data;
2305         e->cte_gdata = gdata;
2306         e->cte_refs = 3;
2307         evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid);
2308         contract_hold(ct);
2309 
2310         /*
2311          * For a negotiation event we set the ct->ct_nevent field of the
2312          * contract for the duration of the negotiation
2313          */
2314         negend = 0;
2315         if (e->cte_flags & CTE_NEG) {
2316                 cte_hold(e);
2317                 ct->ct_nevent = e;
2318         } else if (e->cte_type == CT_EV_NEGEND) {
2319                 negend = 1;
2320         }
2321 
2322         gethrestime(&ts);
2323 
2324         /*
2325          * ct_evtlock simply (and only) ensures that two events sent
2326          * from the same contract are delivered to all queues in the
2327          * same order.
2328          */
2329         mutex_enter(&ct->ct_evtlock);
2330 
2331         /*
2332          * CTEL_CONTRACT - First deliver to the contract queue, acking
2333          * the event if the contract has been orphaned.
2334          */
2335         mutex_enter(&ct->ct_lock);
2336         mutex_enter(&ct->ct_events.ctq_lock);
2337         if ((e->cte_flags & CTE_INFO) == 0) {
2338                 if (ct->ct_state >= CTS_ORPHAN)
2339                         e->cte_flags |= CTE_ACK;
2340                 else
2341                         ct->ct_evcnt++;
2342         }
2343         mutex_exit(&ct->ct_lock);
2344         cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2345 
2346         /*
2347          * CTEL_BUNDLE - Next deliver to the contract type's bundle
2348          * queue.
2349          */
2350         mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2351         cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2352 
2353         /*
2354          * CTEL_PBUNDLE - Finally, if the contract has an owner,
2355          * deliver to the owner's process bundle queue.
2356          */
2357         mutex_enter(&ct->ct_lock);
2358         if (ct->ct_owner) {
2359                 /*
2360                  * proc_exit doesn't free event queues until it has
2361                  * abandoned all contracts.
2362                  */
2363                 ASSERT(ct->ct_owner->p_ct_equeue);
2364                 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2365                 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2366                 mutex_enter(&q->ctq_lock);
2367                 mutex_exit(&ct->ct_lock);
2368 
2369                 /*
2370                  * It is possible for this code to race with adoption; we
2371                  * publish the event indicating that the event may already
2372                  * be enqueued because adoption beat us to it (in which case
2373                  * cte_pubish() does nothing).
2374                  */
2375                 cte_publish(q, e, &ts, B_TRUE);
2376         } else {
2377                 mutex_exit(&ct->ct_lock);
2378                 cte_rele(e);
2379         }
2380 
2381         if (negend) {
2382                 mutex_enter(&ct->ct_lock);
2383                 negev = ct->ct_nevent;
2384                 ct->ct_nevent = NULL;
2385                 cte_rele(negev);
2386                 mutex_exit(&ct->ct_lock);
2387         }
2388 
2389         mutex_exit(&ct->ct_evtlock);
2390 
2391         return (evid);
2392 }
2393 
2394 /*
2395  * cte_add_listener
2396  *
2397  * Add a new listener to an event queue.
2398  */
2399 void
2400 cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2401 {
2402         cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2403         l->ctl_equeue = q;
2404         l->ctl_position = NULL;
2405         l->ctl_flags = 0;
2406 
2407         mutex_enter(&q->ctq_lock);
2408         list_insert_head(&q->ctq_tail, l);
2409         list_insert_head(&q->ctq_listeners, l);
2410         q->ctq_nlisteners++;
2411         mutex_exit(&q->ctq_lock);
2412 }
2413 
2414 /*
2415  * cte_remove_listener
2416  *
2417  * Remove a listener from an event queue.  No other queue activities
2418  * (e.g. cte_get event) may be in progress at this endpoint when this
2419  * is called.
2420  */
2421 void
2422 cte_remove_listener(ct_listener_t *l)
2423 {
2424         ct_equeue_t *q = l->ctl_equeue;
2425         ct_kevent_t *e;
2426 
2427         mutex_enter(&q->ctq_lock);
2428 
2429         ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2430 
2431         if ((e = l->ctl_position) != NULL)
2432                 cte_qrele(q, l, e);
2433         else
2434                 list_remove(&q->ctq_tail, l);
2435         l->ctl_position = NULL;
2436 
2437         q->ctq_nlisteners--;
2438         list_remove(&q->ctq_listeners, l);
2439 
2440         if (l->ctl_flags & CTLF_RELIABLE)
2441                 q->ctq_nreliable--;
2442 
2443         /*
2444          * If we are a the last listener of a dead reference counted
2445          * queue (i.e. a process bundle) we free it.  Otherwise we just
2446          * trim any events which may have been kept around for our
2447          * benefit.
2448          */
2449         if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2450             (q->ctq_nlisteners == 0)) {
2451                 cte_queue_destroy(q);
2452         } else {
2453                 cte_trim(q, NULL);
2454                 mutex_exit(&q->ctq_lock);
2455         }
2456 }
2457 
2458 /*
2459  * cte_reset_listener
2460  *
2461  * Moves a listener's queue pointer to the beginning of the queue.
2462  */
2463 void
2464 cte_reset_listener(ct_listener_t *l)
2465 {
2466         ct_equeue_t *q = l->ctl_equeue;
2467 
2468         mutex_enter(&q->ctq_lock);
2469 
2470         /*
2471          * We allow an asynchronous reset because it doesn't make a
2472          * whole lot of sense to make reset block or fail.  We already
2473          * have most of the mechanism needed thanks to queue trimming,
2474          * so implementing it isn't a big deal.
2475          */
2476         if (l->ctl_flags & CTLF_COPYOUT)
2477                 l->ctl_flags |= CTLF_RESET;
2478 
2479         (void) cte_qmove(q, l, list_head(&q->ctq_events));
2480 
2481         /*
2482          * Inform blocked readers.
2483          */
2484         cv_broadcast(&l->ctl_cv);
2485         pollwakeup(&l->ctl_pollhead, POLLIN);
2486         mutex_exit(&q->ctq_lock);
2487 }
2488 
2489 /*
2490  * cte_next_event
2491  *
2492  * Moves the event pointer for the specified listener to the next event
2493  * on the queue.  To avoid races, this movement only occurs if the
2494  * specified event id matches that of the current event.  This is used
2495  * primarily to skip events that have been read but whose extended data
2496  * haven't been copied out.
2497  */
2498 int
2499 cte_next_event(ct_listener_t *l, uint64_t id)
2500 {
2501         ct_equeue_t *q = l->ctl_equeue;
2502         ct_kevent_t *old;
2503 
2504         mutex_enter(&q->ctq_lock);
2505 
2506         if (l->ctl_flags & CTLF_COPYOUT)
2507                 l->ctl_flags |= CTLF_RESET;
2508 
2509         if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2510                 (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2511 
2512         mutex_exit(&q->ctq_lock);
2513 
2514         return (0);
2515 }
2516 
2517 /*
2518  * cte_get_event
2519  *
2520  * Reads an event from an event endpoint.  If "nonblock" is clear, we
2521  * block until a suitable event is ready.  If "crit" is set, we only
2522  * read critical events.  Note that while "cr" is the caller's cred,
2523  * "zuniqid" is the unique id of the zone the calling contract
2524  * filesystem was mounted in.
2525  */
2526 int
2527 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2528     uint64_t zuniqid, int crit)
2529 {
2530         ct_equeue_t *q = l->ctl_equeue;
2531         ct_kevent_t *temp;
2532         int result = 0;
2533         int partial = 0;
2534         size_t size, gsize, len;
2535         model_t mdl = get_udatamodel();
2536         STRUCT_DECL(ct_event, ev);
2537         STRUCT_INIT(ev, mdl);
2538 
2539         /*
2540          * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2541          * that there exists, and we are pointing to, an appropriate
2542          * event.  It may temporarily drop ctq_lock, but that doesn't
2543          * really matter to us.
2544          */
2545         mutex_enter(&q->ctq_lock);
2546         while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2547                 if (nonblock) {
2548                         result = EAGAIN;
2549                         goto error;
2550                 }
2551                 if (q->ctq_flags & CTQ_DEAD) {
2552                         result = EIDRM;
2553                         goto error;
2554                 }
2555                 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2556                 if (result == 0) {
2557                         result = EINTR;
2558                         goto error;
2559                 }
2560         }
2561         temp = l->ctl_position;
2562         cte_hold(temp);
2563         l->ctl_flags |= CTLF_COPYOUT;
2564         mutex_exit(&q->ctq_lock);
2565 
2566         /*
2567          * We now have an event.  Copy in the user event structure to
2568          * see how much space we have to work with.
2569          */
2570         result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2571         if (result)
2572                 goto copyerr;
2573 
2574         /*
2575          * Determine what data we have and what the user should be
2576          * allowed to see.
2577          */
2578         size = gsize = 0;
2579         if (temp->cte_data) {
2580                 VERIFY(nvlist_size(temp->cte_data, &size,
2581                     NV_ENCODE_NATIVE) == 0);
2582                 ASSERT(size != 0);
2583         }
2584         if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2585                 VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2586                     NV_ENCODE_NATIVE) == 0);
2587                 ASSERT(gsize != 0);
2588         }
2589 
2590         /*
2591          * If we have enough space, copy out the extended event data.
2592          */
2593         len = size + gsize;
2594         if (len) {
2595                 if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2596                         char *buf = kmem_alloc(len, KM_SLEEP);
2597 
2598                         if (size)
2599                                 VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2600                                     NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2601                         if (gsize) {
2602                                 char *tmp = buf + size;
2603 
2604                                 VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2605                                     &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2606                         }
2607 
2608                         /* This shouldn't have changed */
2609                         ASSERT(size + gsize == len);
2610                         result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2611                             len);
2612                         kmem_free(buf, len);
2613                         if (result)
2614                                 goto copyerr;
2615                 } else {
2616                         partial = 1;
2617                 }
2618         }
2619 
2620         /*
2621          * Copy out the common event data.
2622          */
2623         STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2624         STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2625         STRUCT_FSET(ev, ctev_cttype,
2626             temp->cte_contract->ct_type->ct_type_index);
2627         STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2628             (CTE_ACK|CTE_INFO|CTE_NEG));
2629         STRUCT_FSET(ev, ctev_type, temp->cte_type);
2630         STRUCT_FSET(ev, ctev_nbytes, len);
2631         STRUCT_FSET(ev, ctev_goffset, size);
2632         result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2633 
2634 copyerr:
2635         /*
2636          * Only move our location in the queue if all copyouts were
2637          * successful, the caller provided enough space for the entire
2638          * event, and our endpoint wasn't reset or otherwise moved by
2639          * another thread.
2640          */
2641         mutex_enter(&q->ctq_lock);
2642         if (result)
2643                 result = EFAULT;
2644         else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2645             (l->ctl_position == temp))
2646                 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2647         l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2648         /*
2649          * Signal any readers blocked on our CTLF_COPYOUT.
2650          */
2651         cv_signal(&l->ctl_cv);
2652         cte_rele(temp);
2653 
2654 error:
2655         mutex_exit(&q->ctq_lock);
2656         return (result);
2657 }
2658 
2659 /*
2660  * cte_set_reliable
2661  *
2662  * Requests that events be reliably delivered to an event endpoint.
2663  * Unread informative and acknowledged critical events will not be
2664  * removed from the queue until this listener reads or skips them.
2665  * Because a listener could maliciously request reliable delivery and
2666  * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2667  * caller's effective set.
2668  */
2669 int
2670 cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2671 {
2672         ct_equeue_t *q = l->ctl_equeue;
2673         int error;
2674 
2675         if ((error = secpolicy_contract_event(cr)) != 0)
2676                 return (error);
2677 
2678         mutex_enter(&q->ctq_lock);
2679         if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2680                 l->ctl_flags |= CTLF_RELIABLE;
2681                 q->ctq_nreliable++;
2682                 if (l->ctl_position != NULL)
2683                         l->ctl_position->cte_nodes[q->ctq_listno].
2684                             ctm_nreliable++;
2685         }
2686         mutex_exit(&q->ctq_lock);
2687 
2688         return (0);
2689 }