Print this page
    
OS-4825 cgroup user agent should be launched from the kernel
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/contract.c
          +++ new/usr/src/uts/common/os/contract.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + * Copyright 2016 Joyent, Inc.
  24   25   */
  25   26  
  26   27  /*
  27   28   * Contracts
  28   29   * ---------
  29   30   *
  30   31   * Contracts are a primitive which enrich the relationships between
  31   32   * processes and system resources.  The primary purpose of contracts is
  32   33   * to provide a means for the system to negotiate the departure from a
  33   34   * binding relationship (e.g. pages locked in memory or a thread bound
  34   35   * to processor), but they can also be used as a purely asynchronous
  35   36   * error reporting mechanism as they are with process contracts.
  36   37   *
  37   38   * More information on how one interfaces with contracts and what
  38   39   * contracts can do for you can be found in:
  39   40   *   PSARC 2003/193 Solaris Contracts
  40   41   *   PSARC 2004/460 Contracts addendum
  41   42   *
  42   43   * This file contains the core contracts framework.  By itself it is
  43   44   * useless: it depends the contracts filesystem (ctfs) to provide an
  44   45   * interface to user processes and individual contract types to
  45   46   * implement the process/resource relationships.
  46   47   *
  47   48   * Data structure overview
  48   49   * -----------------------
  49   50   *
  50   51   * A contract is represented by a contract_t, which itself points to an
  51   52   * encapsulating contract-type specific contract object.  A contract_t
  52   53   * contains the contract's static identity (including its terms), its
  53   54   * linkage to various bookkeeping structures, the contract-specific
  54   55   * event queue, and a reference count.
  55   56   *
  56   57   * A contract template is represented by a ct_template_t, which, like a
  57   58   * contract, points to an encapsulating contract-type specific template
  58   59   * object.  A ct_template_t contains the template's terms.
  59   60   *
  60   61   * An event queue is represented by a ct_equeue_t, and consists of a
  61   62   * list of events, a list of listeners, and a list of listeners who are
  62   63   * waiting for new events (affectionately referred to as "tail
  63   64   * listeners").  There are three queue types, defined by ct_listnum_t
  64   65   * (an enum).  An event may be on one of each type of queue
  65   66   * simultaneously; the list linkage used by a queue is determined by
  66   67   * its type.
  67   68   *
  68   69   * An event is represented by a ct_kevent_t, which contains mostly
  69   70   * static event data (e.g. id, payload).  It also has an array of
  70   71   * ct_member_t structures, each of which contains a list_node_t and
  71   72   * represent the event's linkage in a specific event queue.
  72   73   *
  73   74   * Each open of an event endpoint results in the creation of a new
  74   75   * listener, represented by a ct_listener_t.  In addition to linkage
  75   76   * into the aforementioned lists in the event_queue, a ct_listener_t
  76   77   * contains a pointer to the ct_kevent_t it is currently positioned at
  77   78   * as well as a set of status flags and other administrative data.
  78   79   *
  79   80   * Each process has a list of contracts it owns, p_ct_held; a pointer
  80   81   * to the process contract it is a member of, p_ct_process; the linkage
  81   82   * for that membership, p_ct_member; and an array of event queue
  82   83   * structures representing the process bundle queues.
  83   84   *
  84   85   * Each LWP has an array of its active templates, lwp_ct_active; and
  85   86   * the most recently created contracts, lwp_ct_latest.
  86   87   *
  87   88   * A process contract has a list of member processes and a list of
  88   89   * inherited contracts.
  89   90   *
  90   91   * There is a system-wide list of all contracts, as well as per-type
  91   92   * lists of contracts.
  92   93   *
  93   94   * Lock ordering overview
  94   95   * ----------------------
  95   96   *
  96   97   * Locks at the top are taken first:
  97   98   *
  98   99   *                   ct_evtlock
  99  100   *                   regent ct_lock
 100  101   *                   member ct_lock
 101  102   *                   pidlock
 102  103   *                   p_lock
 103  104   *    contract ctq_lock         contract_lock
 104  105   *    pbundle ctq_lock
 105  106   *    cte_lock
 106  107   *                   ct_reflock
 107  108   *
 108  109   * contract_lock and ctq_lock/cte_lock are not currently taken at the
 109  110   * same time.
 110  111   *
 111  112   * Reference counting and locking
 112  113   * ------------------------------
 113  114   *
 114  115   * A contract has a reference count, protected by ct_reflock.
 115  116   * (ct_reflock is also used in a couple other places where atomic
 116  117   * access to a variable is needed in an innermost context).  A process
 117  118   * maintains a hold on each contract it owns.  A process contract has a
 118  119   * hold on each contract is has inherited.  Each event has a hold on
 119  120   * the contract which generated it.  Process contract templates have
 120  121   * holds on the contracts referred to by their transfer terms.  CTFS
 121  122   * contract directory nodes have holds on contracts.  Lastly, various
 122  123   * code paths may temporarily take holds on contracts to prevent them
 123  124   * from disappearing while other processing is going on.  It is
 124  125   * important to note that the global contract lists do not hold
 125  126   * references on contracts; a contract is removed from these structures
 126  127   * atomically with the release of its last reference.
 127  128   *
 128  129   * At a given point in time, a contract can either be owned by a
 129  130   * process, inherited by a regent process contract, or orphaned.  A
 130  131   * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
 131  132   * protected by its ct_lock.  The linkage in the holder's (holder =
 132  133   * owner or regent) list of contracts, ct_ctlist, is protected by
 133  134   * whatever lock protects the holder's data structure.  In order for
 134  135   * these two directions to remain consistent, changing the holder of a
 135  136   * contract requires that both locks be held.
 136  137   *
 137  138   * Events also have reference counts.  There is one hold on an event
 138  139   * per queue it is present on, in addition to those needed for the
 139  140   * usual sundry reasons.  Individual listeners are associated with
 140  141   * specific queues, and increase a queue-specific reference count
 141  142   * stored in the ct_member_t structure.
 142  143   *
 143  144   * The dynamic contents of an event (reference count and flags) are
 144  145   * protected by its cte_lock, while the contents of the embedded
 145  146   * ct_member_t structures are protected by the locks of the queues they
 146  147   * are linked into.  A ct_listener_t's contents are also protected by
 147  148   * its event queue's ctq_lock.
 148  149   *
 149  150   * Resource controls
 150  151   * -----------------
 151  152   *
 152  153   * Control:      project.max-contracts (rc_project_contract)
 153  154   * Description:  Maximum number of contracts allowed a project.
 154  155   *
 155  156   *   When a contract is created, the project's allocation is tested and
 156  157   *   (assuming success) increased.  When the last reference to a
 157  158   *   contract is released, the creating project's allocation is
 158  159   *   decreased.
 159  160   */
 160  161  
 161  162  #include <sys/mutex.h>
 162  163  #include <sys/debug.h>
 163  164  #include <sys/types.h>
 164  165  #include <sys/param.h>
 165  166  #include <sys/kmem.h>
 166  167  #include <sys/thread.h>
 167  168  #include <sys/id_space.h>
 168  169  #include <sys/avl.h>
 169  170  #include <sys/list.h>
 170  171  #include <sys/sysmacros.h>
 171  172  #include <sys/proc.h>
 172  173  #include <sys/ctfs.h>
 173  174  #include <sys/contract_impl.h>
 174  175  #include <sys/contract/process_impl.h>
 175  176  #include <sys/dditypes.h>
 176  177  #include <sys/contract/device_impl.h>
 177  178  #include <sys/systm.h>
 178  179  #include <sys/atomic.h>
 179  180  #include <sys/cmn_err.h>
 180  181  #include <sys/model.h>
 181  182  #include <sys/policy.h>
 182  183  #include <sys/zone.h>
 183  184  #include <sys/task.h>
 184  185  #include <sys/ddi.h>
 185  186  #include <sys/sunddi.h>
 186  187  
 187  188  extern rctl_hndl_t rc_project_contract;
 188  189  
 189  190  static id_space_t       *contract_ids;
 190  191  static avl_tree_t       contract_avl;
 191  192  static kmutex_t         contract_lock;
 192  193  
 193  194  int                     ct_ntypes = CTT_MAXTYPE;
 194  195  static ct_type_t        *ct_types_static[CTT_MAXTYPE];
 195  196  ct_type_t               **ct_types = ct_types_static;
 196  197  int                     ct_debug;
 197  198  
 198  199  static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
 199  200  static void cte_queue_destroy(ct_equeue_t *);
 200  201  static void cte_queue_drain(ct_equeue_t *, int);
 201  202  static void cte_trim(ct_equeue_t *, contract_t *);
 202  203  static void cte_copy(ct_equeue_t *, ct_equeue_t *);
 203  204  
 204  205  /*
 205  206   * contract_compar
 206  207   *
 207  208   * A contract comparator which sorts on contract ID.
 208  209   */
 209  210  int
 210  211  contract_compar(const void *x, const void *y)
 211  212  {
 212  213          const contract_t *ct1 = x;
 213  214          const contract_t *ct2 = y;
 214  215  
 215  216          if (ct1->ct_id < ct2->ct_id)
 216  217                  return (-1);
 217  218          if (ct1->ct_id > ct2->ct_id)
 218  219                  return (1);
 219  220          return (0);
 220  221  }
 221  222  
 222  223  /*
 223  224   * contract_init
 224  225   *
 225  226   * Initializes the contract subsystem, the specific contract types, and
 226  227   * process 0.
 227  228   */
 228  229  void
 229  230  contract_init(void)
 230  231  {
 231  232          /*
 232  233           * Initialize contract subsystem.
 233  234           */
 234  235          contract_ids = id_space_create("contracts", 1, INT_MAX);
 235  236          avl_create(&contract_avl, contract_compar, sizeof (contract_t),
 236  237              offsetof(contract_t, ct_ctavl));
 237  238          mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
 238  239  
 239  240          /*
 240  241           * Initialize contract types.
 241  242           */
 242  243          contract_process_init();
 243  244          contract_device_init();
 244  245  
 245  246          /*
 246  247           * Initialize p0/lwp0 contract state.
 247  248           */
 248  249          avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
 249  250              offsetof(contract_t, ct_ctlist));
 250  251  }
 251  252  
 252  253  /*
 253  254   * contract_dtor
 254  255   *
 255  256   * Performs basic destruction of the common portions of a contract.
 256  257   * Called from the failure path of contract_ctor and from
 257  258   * contract_rele.
 258  259   */
 259  260  static void
 260  261  contract_dtor(contract_t *ct)
 261  262  {
 262  263          cte_queue_destroy(&ct->ct_events);
 263  264          list_destroy(&ct->ct_vnodes);
 264  265          mutex_destroy(&ct->ct_reflock);
 265  266          mutex_destroy(&ct->ct_lock);
 266  267          mutex_destroy(&ct->ct_evtlock);
 267  268  }
 268  269  
 269  270  /*
 270  271   * contract_ctor
 271  272   *
 272  273   * Called by a contract type to initialize a contract.  Fails if the
 273  274   * max-contract resource control would have been exceeded.  After a
 274  275   * successful call to contract_ctor, the contract is unlocked and
 275  276   * visible in all namespaces; any type-specific initialization should
 276  277   * be completed before calling contract_ctor.  Returns 0 on success.
 277  278   *
 278  279   * Because not all callers can tolerate failure, a 0 value for canfail
 279  280   * instructs contract_ctor to ignore the project.max-contracts resource
  
    | 
      ↓ open down ↓ | 
    246 lines elided | 
    
      ↑ open up ↑ | 
  
 280  281   * control.  Obviously, this "out" should only be employed by callers
 281  282   * who are sufficiently constrained in other ways (e.g. newproc).
 282  283   */
 283  284  int
 284  285  contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
 285  286      ctflags_t flags, proc_t *author, int canfail)
 286  287  {
 287  288          avl_index_t where;
 288  289          klwp_t *curlwp = ttolwp(curthread);
 289  290  
 290      -        ASSERT(author == curproc);
      291 +        /*
      292 +         * It's possible that author is not curproc if the zone is creating
      293 +         * a new process as a child of zsched.
      294 +         */
 291  295  
 292  296          mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
 293  297          mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
 294  298          mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
 295  299          ct->ct_id = id_alloc(contract_ids);
 296  300  
 297  301          cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
 298  302          list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
 299  303              offsetof(contract_vnode_t, ctv_node));
 300  304  
 301  305          /*
 302  306           * Instance data
 303  307           */
 304  308          ct->ct_ref = 2;         /* one for the holder, one for "latest" */
 305  309          ct->ct_cuid = crgetuid(CRED());
 306  310          ct->ct_type = type;
 307  311          ct->ct_data = data;
 308  312          gethrestime(&ct->ct_ctime);
 309  313          ct->ct_state = CTS_OWNED;
 310  314          ct->ct_flags = flags;
 311  315          ct->ct_regent = author->p_ct_process ?
 312  316              &author->p_ct_process->conp_contract : NULL;
 313  317          ct->ct_ev_info = tmpl->ctmpl_ev_info;
 314  318          ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
 315  319          ct->ct_cookie = tmpl->ctmpl_cookie;
 316  320          ct->ct_owner = author;
 317  321          ct->ct_ntime.ctm_total = -1;
 318  322          ct->ct_qtime.ctm_total = -1;
 319  323          ct->ct_nevent = NULL;
 320  324  
 321  325          /*
 322  326           * Test project.max-contracts.
 323  327           */
 324  328          mutex_enter(&author->p_lock);
 325  329          mutex_enter(&contract_lock);
 326  330          if (canfail && rctl_test(rc_project_contract,
 327  331              author->p_task->tk_proj->kpj_rctls, author, 1,
 328  332              RCA_SAFE) & RCT_DENY) {
 329  333                  id_free(contract_ids, ct->ct_id);
 330  334                  mutex_exit(&contract_lock);
 331  335                  mutex_exit(&author->p_lock);
 332  336                  ct->ct_events.ctq_flags |= CTQ_DEAD;
 333  337                  contract_dtor(ct);
 334  338                  return (1);
 335  339          }
 336  340          ct->ct_proj = author->p_task->tk_proj;
 337  341          ct->ct_proj->kpj_data.kpd_contract++;
 338  342          (void) project_hold(ct->ct_proj);
 339  343          mutex_exit(&contract_lock);
 340  344  
 341  345          /*
 342  346           * Insert into holder's avl of contracts.
 343  347           * We use an avl not because order is important, but because
 344  348           * readdir of /proc/contracts requires we be able to use a
 345  349           * scalar as an index into the process's list of contracts
 346  350           */
 347  351          ct->ct_zoneid = author->p_zone->zone_id;
 348  352          ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
 349  353          VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
 350  354          avl_insert(&author->p_ct_held, ct, where);
 351  355          mutex_exit(&author->p_lock);
 352  356  
 353  357          /*
 354  358           * Insert into global contract AVL
 355  359           */
 356  360          mutex_enter(&contract_lock);
 357  361          VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
 358  362          avl_insert(&contract_avl, ct, where);
 359  363          mutex_exit(&contract_lock);
 360  364  
 361  365          /*
 362  366           * Insert into type AVL
 363  367           */
 364  368          mutex_enter(&type->ct_type_lock);
 365  369          VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
 366  370          avl_insert(&type->ct_type_avl, ct, where);
 367  371          type->ct_type_timestruc = ct->ct_ctime;
 368  372          mutex_exit(&type->ct_type_lock);
 369  373  
 370  374          if (curlwp->lwp_ct_latest[type->ct_type_index])
 371  375                  contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
 372  376          curlwp->lwp_ct_latest[type->ct_type_index] = ct;
 373  377  
 374  378          return (0);
 375  379  }
 376  380  
 377  381  /*
 378  382   * contract_rele
 379  383   *
 380  384   * Releases a reference to a contract.  If the caller had the last
 381  385   * reference, the contract is removed from all namespaces, its
 382  386   * allocation against the max-contracts resource control is released,
 383  387   * and the contract type's free entry point is invoked for any
 384  388   * type-specific deconstruction and to (presumably) free the object.
 385  389   */
 386  390  void
 387  391  contract_rele(contract_t *ct)
 388  392  {
 389  393          uint64_t nref;
 390  394  
 391  395          mutex_enter(&ct->ct_reflock);
 392  396          ASSERT(ct->ct_ref > 0);
 393  397          nref = --ct->ct_ref;
 394  398          mutex_exit(&ct->ct_reflock);
 395  399          if (nref == 0) {
 396  400                  /*
 397  401                   * ct_owner is cleared when it drops its reference.
 398  402                   */
 399  403                  ASSERT(ct->ct_owner == NULL);
 400  404                  ASSERT(ct->ct_evcnt == 0);
 401  405  
 402  406                  /*
 403  407                   * Remove from global contract AVL
 404  408                   */
 405  409                  mutex_enter(&contract_lock);
 406  410                  avl_remove(&contract_avl, ct);
 407  411                  mutex_exit(&contract_lock);
 408  412  
 409  413                  /*
 410  414                   * Remove from type AVL
 411  415                   */
 412  416                  mutex_enter(&ct->ct_type->ct_type_lock);
 413  417                  avl_remove(&ct->ct_type->ct_type_avl, ct);
 414  418                  mutex_exit(&ct->ct_type->ct_type_lock);
 415  419  
 416  420                  /*
 417  421                   * Release the contract's ID
 418  422                   */
 419  423                  id_free(contract_ids, ct->ct_id);
 420  424  
 421  425                  /*
 422  426                   * Release project hold
 423  427                   */
 424  428                  mutex_enter(&contract_lock);
 425  429                  ct->ct_proj->kpj_data.kpd_contract--;
 426  430                  project_rele(ct->ct_proj);
 427  431                  mutex_exit(&contract_lock);
 428  432  
 429  433                  /*
 430  434                   * Free the contract
 431  435                   */
 432  436                  contract_dtor(ct);
 433  437                  ct->ct_type->ct_type_ops->contop_free(ct);
 434  438          }
 435  439  }
 436  440  
 437  441  /*
 438  442   * contract_hold
 439  443   *
 440  444   * Adds a reference to a contract
 441  445   */
 442  446  void
 443  447  contract_hold(contract_t *ct)
 444  448  {
 445  449          mutex_enter(&ct->ct_reflock);
 446  450          ASSERT(ct->ct_ref < UINT64_MAX);
 447  451          ct->ct_ref++;
 448  452          mutex_exit(&ct->ct_reflock);
 449  453  }
 450  454  
 451  455  /*
 452  456   * contract_getzuniqid
 453  457   *
 454  458   * Get a contract's zone unique ID.  Needed because 64-bit reads and
 455  459   * writes aren't atomic on x86.  Since there are contexts where we are
 456  460   * unable to take ct_lock, we instead use ct_reflock; in actuality any
 457  461   * lock would do.
 458  462   */
 459  463  uint64_t
 460  464  contract_getzuniqid(contract_t *ct)
 461  465  {
 462  466          uint64_t zuniqid;
 463  467  
 464  468          mutex_enter(&ct->ct_reflock);
 465  469          zuniqid = ct->ct_mzuniqid;
 466  470          mutex_exit(&ct->ct_reflock);
 467  471  
 468  472          return (zuniqid);
 469  473  }
 470  474  
 471  475  /*
 472  476   * contract_setzuniqid
 473  477   *
 474  478   * Sets a contract's zone unique ID.   See contract_getzuniqid.
 475  479   */
 476  480  void
 477  481  contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
 478  482  {
 479  483          mutex_enter(&ct->ct_reflock);
 480  484          ct->ct_mzuniqid = zuniqid;
 481  485          mutex_exit(&ct->ct_reflock);
 482  486  }
 483  487  
 484  488  /*
 485  489   * contract_abandon
 486  490   *
 487  491   * Abandons the specified contract.  If "explicit" is clear, the
 488  492   * contract was implicitly abandoned (by process exit) and should be
 489  493   * inherited if its terms allow it and its owner was a member of a
 490  494   * regent contract.  Otherwise, the contract type's abandon entry point
 491  495   * is invoked to either destroy or orphan the contract.
 492  496   */
 493  497  int
 494  498  contract_abandon(contract_t *ct, proc_t *p, int explicit)
 495  499  {
 496  500          ct_equeue_t *q = NULL;
 497  501          contract_t *parent = &p->p_ct_process->conp_contract;
 498  502          int inherit = 0;
 499  503  
 500  504          VERIFY(p == curproc);
 501  505  
 502  506          mutex_enter(&ct->ct_lock);
 503  507  
 504  508          /*
 505  509           * Multiple contract locks are taken contract -> subcontract.
 506  510           * Check if the contract will be inherited so we can acquire
 507  511           * all the necessary locks before making sensitive changes.
 508  512           */
 509  513          if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
 510  514              contract_process_accept(parent)) {
 511  515                  mutex_exit(&ct->ct_lock);
 512  516                  mutex_enter(&parent->ct_lock);
 513  517                  mutex_enter(&ct->ct_lock);
 514  518                  inherit = 1;
 515  519          }
 516  520  
 517  521          if (ct->ct_owner != p) {
 518  522                  mutex_exit(&ct->ct_lock);
 519  523                  if (inherit)
 520  524                          mutex_exit(&parent->ct_lock);
 521  525                  return (EINVAL);
 522  526          }
 523  527  
 524  528          mutex_enter(&p->p_lock);
 525  529          if (explicit)
 526  530                  avl_remove(&p->p_ct_held, ct);
 527  531          ct->ct_owner = NULL;
 528  532          mutex_exit(&p->p_lock);
 529  533  
 530  534          /*
 531  535           * Since we can't call cte_trim with the contract lock held,
 532  536           * we grab the queue pointer here.
 533  537           */
 534  538          if (p->p_ct_equeue)
 535  539                  q = p->p_ct_equeue[ct->ct_type->ct_type_index];
 536  540  
 537  541          /*
 538  542           * contop_abandon may destroy the contract so we rely on it to
 539  543           * drop ct_lock.  We retain a reference on the contract so that
 540  544           * the cte_trim which follows functions properly.  Even though
 541  545           * cte_trim doesn't dereference the contract pointer, it is
 542  546           * still necessary to retain a reference to the contract so
 543  547           * that we don't trim events which are sent by a subsequently
 544  548           * allocated contract infortuitously located at the same address.
 545  549           */
 546  550          contract_hold(ct);
 547  551  
 548  552          if (inherit) {
 549  553                  ct->ct_state = CTS_INHERITED;
 550  554                  VERIFY(ct->ct_regent == parent);
 551  555                  contract_process_take(parent, ct);
 552  556  
 553  557                  /*
 554  558                   * We are handing off the process's reference to the
 555  559                   * parent contract.  For this reason, the order in
 556  560                   * which we drop the contract locks is also important.
 557  561                   */
 558  562                  mutex_exit(&ct->ct_lock);
 559  563                  mutex_exit(&parent->ct_lock);
 560  564          } else {
 561  565                  ct->ct_regent = NULL;
 562  566                  ct->ct_type->ct_type_ops->contop_abandon(ct);
 563  567          }
 564  568  
 565  569          /*
 566  570           * ct_lock has been dropped; we can safely trim the event
 567  571           * queue now.
 568  572           */
 569  573          if (q) {
 570  574                  mutex_enter(&q->ctq_lock);
 571  575                  cte_trim(q, ct);
 572  576                  mutex_exit(&q->ctq_lock);
 573  577          }
 574  578  
 575  579          contract_rele(ct);
 576  580  
 577  581          return (0);
 578  582  }
 579  583  
 580  584  int
 581  585  contract_newct(contract_t *ct)
 582  586  {
 583  587          return (ct->ct_type->ct_type_ops->contop_newct(ct));
 584  588  }
 585  589  
 586  590  /*
 587  591   * contract_adopt
 588  592   *
 589  593   * Adopts a contract.  After a successful call to this routine, the
 590  594   * previously inherited contract will belong to the calling process,
 591  595   * and its events will have been appended to its new owner's process
 592  596   * bundle queue.
 593  597   */
 594  598  int
 595  599  contract_adopt(contract_t *ct, proc_t *p)
 596  600  {
 597  601          avl_index_t where;
 598  602          ct_equeue_t *q;
 599  603          contract_t *parent;
 600  604  
 601  605          ASSERT(p == curproc);
 602  606  
 603  607          /*
 604  608           * Ensure the process has an event queue.  Checked by ASSERTs
 605  609           * below.
 606  610           */
 607  611          (void) contract_type_pbundle(ct->ct_type, p);
 608  612  
 609  613          mutex_enter(&ct->ct_lock);
 610  614          parent = ct->ct_regent;
 611  615          if (ct->ct_state != CTS_INHERITED ||
 612  616              &p->p_ct_process->conp_contract != parent ||
 613  617              p->p_zone->zone_uniqid != ct->ct_czuniqid) {
 614  618                  mutex_exit(&ct->ct_lock);
 615  619                  return (EINVAL);
 616  620          }
 617  621  
 618  622          /*
 619  623           * Multiple contract locks are taken contract -> subcontract.
 620  624           */
 621  625          mutex_exit(&ct->ct_lock);
 622  626          mutex_enter(&parent->ct_lock);
 623  627          mutex_enter(&ct->ct_lock);
 624  628  
 625  629          /*
 626  630           * It is possible that the contract was adopted by someone else
 627  631           * while its lock was dropped.  It isn't possible for the
 628  632           * contract to have been inherited by a different regent
 629  633           * contract.
 630  634           */
 631  635          if (ct->ct_state != CTS_INHERITED) {
 632  636                  mutex_exit(&parent->ct_lock);
 633  637                  mutex_exit(&ct->ct_lock);
 634  638                  return (EBUSY);
 635  639          }
 636  640          ASSERT(ct->ct_regent == parent);
 637  641  
 638  642          ct->ct_state = CTS_OWNED;
 639  643  
 640  644          contract_process_adopt(ct, p);
 641  645  
 642  646          mutex_enter(&p->p_lock);
 643  647          ct->ct_owner = p;
 644  648          VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
 645  649          avl_insert(&p->p_ct_held, ct, where);
 646  650          mutex_exit(&p->p_lock);
 647  651  
 648  652          ASSERT(ct->ct_owner->p_ct_equeue);
 649  653          ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
 650  654          q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
 651  655          cte_copy(&ct->ct_events, q);
 652  656          mutex_exit(&ct->ct_lock);
 653  657  
 654  658          return (0);
 655  659  }
 656  660  
 657  661  /*
 658  662   * contract_ack
 659  663   *
 660  664   * Acknowledges receipt of a critical event.
 661  665   */
 662  666  int
 663  667  contract_ack(contract_t *ct, uint64_t evid, int ack)
 664  668  {
 665  669          ct_kevent_t *ev;
 666  670          list_t *queue = &ct->ct_events.ctq_events;
 667  671          int error = ESRCH;
 668  672          int nego = 0;
 669  673          uint_t evtype;
 670  674  
 671  675          ASSERT(ack == CT_ACK || ack == CT_NACK);
 672  676  
 673  677          mutex_enter(&ct->ct_lock);
 674  678          mutex_enter(&ct->ct_events.ctq_lock);
 675  679          /*
 676  680           * We are probably ACKing something near the head of the queue.
 677  681           */
 678  682          for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 679  683                  if (ev->cte_id == evid) {
 680  684                          if (ev->cte_flags & CTE_NEG)
 681  685                                  nego = 1;
 682  686                          else if (ack == CT_NACK)
 683  687                                  break;
 684  688                          if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 685  689                                  ev->cte_flags |= CTE_ACK;
 686  690                                  ct->ct_evcnt--;
 687  691                                  evtype = ev->cte_type;
 688  692                                  error = 0;
 689  693                          }
 690  694                          break;
 691  695                  }
 692  696          }
 693  697          mutex_exit(&ct->ct_events.ctq_lock);
 694  698          mutex_exit(&ct->ct_lock);
 695  699  
 696  700          /*
 697  701           * Not all critical events are negotiation events, however
 698  702           * every negotiation event is a critical event. NEGEND events
 699  703           * are critical events but are not negotiation events
 700  704           */
 701  705          if (error || !nego)
 702  706                  return (error);
 703  707  
 704  708          if (ack == CT_ACK)
 705  709                  error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
 706  710          else
 707  711                  error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
 708  712  
 709  713          return (error);
 710  714  }
 711  715  
 712  716  /*ARGSUSED*/
 713  717  int
 714  718  contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 715  719  {
 716  720          cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 717  721              ct->ct_id);
 718  722          return (ENOSYS);
 719  723  }
 720  724  
 721  725  /*ARGSUSED*/
 722  726  int
 723  727  contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 724  728  {
 725  729          cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 726  730              ct->ct_id);
 727  731          return (ENOSYS);
 728  732  }
 729  733  
 730  734  /*ARGSUSED*/
 731  735  int
 732  736  contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
 733  737  {
 734  738          return (ERANGE);
 735  739  }
 736  740  
 737  741  /*
 738  742   * contract_qack
 739  743   *
 740  744   * Asks that negotiations be extended by another time quantum
 741  745   */
 742  746  int
 743  747  contract_qack(contract_t *ct, uint64_t evid)
 744  748  {
 745  749          ct_kevent_t *ev;
 746  750          list_t *queue = &ct->ct_events.ctq_events;
 747  751          int nego = 0;
 748  752          uint_t evtype;
 749  753  
 750  754          mutex_enter(&ct->ct_lock);
 751  755          mutex_enter(&ct->ct_events.ctq_lock);
 752  756  
 753  757          for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 754  758                  if (ev->cte_id == evid) {
 755  759                          if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
 756  760                                  evtype = ev->cte_type;
 757  761                                  nego = 1;
 758  762                          }
 759  763                          break;
 760  764                  }
 761  765          }
 762  766          mutex_exit(&ct->ct_events.ctq_lock);
 763  767          mutex_exit(&ct->ct_lock);
 764  768  
 765  769          /*
 766  770           * Only a negotiated event (which is by definition also a critical
 767  771           * event) which has not yet been acknowledged can provide
 768  772           * time quanta to a negotiating owner process.
 769  773           */
 770  774          if (!nego)
 771  775                  return (ESRCH);
 772  776  
 773  777          return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
 774  778  }
 775  779  
 776  780  /*
 777  781   * contract_orphan
 778  782   *
 779  783   * Icky-poo.  This is a process-contract special, used to ACK all
 780  784   * critical messages when a contract is orphaned.
 781  785   */
 782  786  void
 783  787  contract_orphan(contract_t *ct)
 784  788  {
 785  789          ct_kevent_t *ev;
 786  790          list_t *queue = &ct->ct_events.ctq_events;
 787  791  
 788  792          ASSERT(MUTEX_HELD(&ct->ct_lock));
 789  793          ASSERT(ct->ct_state != CTS_ORPHAN);
 790  794  
 791  795          mutex_enter(&ct->ct_events.ctq_lock);
 792  796          ct->ct_state = CTS_ORPHAN;
 793  797          for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 794  798                  if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 795  799                          ev->cte_flags |= CTE_ACK;
 796  800                          ct->ct_evcnt--;
 797  801                  }
 798  802          }
 799  803          mutex_exit(&ct->ct_events.ctq_lock);
 800  804  
 801  805          ASSERT(ct->ct_evcnt == 0);
 802  806  }
 803  807  
 804  808  /*
 805  809   * contract_destroy
 806  810   *
 807  811   * Explicit contract destruction.  Called when contract is empty.
 808  812   * The contract will actually stick around until all of its events are
 809  813   * removed from the bundle and and process bundle queues, and all fds
 810  814   * which refer to it are closed.  See contract_dtor if you are looking
 811  815   * for what destroys the contract structure.
 812  816   */
 813  817  void
 814  818  contract_destroy(contract_t *ct)
 815  819  {
 816  820          ASSERT(MUTEX_HELD(&ct->ct_lock));
 817  821          ASSERT(ct->ct_state != CTS_DEAD);
 818  822          ASSERT(ct->ct_owner == NULL);
 819  823  
 820  824          ct->ct_state = CTS_DEAD;
 821  825          cte_queue_drain(&ct->ct_events, 1);
 822  826          mutex_exit(&ct->ct_lock);
 823  827          mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
 824  828          cte_trim(&ct->ct_type->ct_type_events, ct);
 825  829          mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
 826  830          mutex_enter(&ct->ct_lock);
 827  831          ct->ct_type->ct_type_ops->contop_destroy(ct);
 828  832          mutex_exit(&ct->ct_lock);
 829  833          contract_rele(ct);
 830  834  }
 831  835  
 832  836  /*
 833  837   * contract_vnode_get
 834  838   *
 835  839   * Obtains the contract directory vnode for this contract, if there is
 836  840   * one.  The caller must VN_RELE the vnode when they are through using
 837  841   * it.
 838  842   */
 839  843  vnode_t *
 840  844  contract_vnode_get(contract_t *ct, vfs_t *vfsp)
 841  845  {
 842  846          contract_vnode_t *ctv;
 843  847          vnode_t *vp = NULL;
 844  848  
 845  849          mutex_enter(&ct->ct_lock);
 846  850          for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
 847  851              ctv = list_next(&ct->ct_vnodes, ctv))
 848  852                  if (ctv->ctv_vnode->v_vfsp == vfsp) {
 849  853                          vp = ctv->ctv_vnode;
 850  854                          VN_HOLD(vp);
 851  855                          break;
 852  856                  }
 853  857          mutex_exit(&ct->ct_lock);
 854  858          return (vp);
 855  859  }
 856  860  
 857  861  /*
 858  862   * contract_vnode_set
 859  863   *
 860  864   * Sets the contract directory vnode for this contract.  We don't hold
 861  865   * a reference on the vnode because we don't want to prevent it from
 862  866   * being freed.  The vnode's inactive entry point will take care of
 863  867   * notifying us when it should be removed.
 864  868   */
 865  869  void
 866  870  contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
 867  871  {
 868  872          mutex_enter(&ct->ct_lock);
 869  873          ctv->ctv_vnode = vnode;
 870  874          list_insert_head(&ct->ct_vnodes, ctv);
 871  875          mutex_exit(&ct->ct_lock);
 872  876  }
 873  877  
 874  878  /*
 875  879   * contract_vnode_clear
 876  880   *
 877  881   * Removes this vnode as the contract directory vnode for this
 878  882   * contract.  Called from a contract directory's inactive entry point,
 879  883   * this may return 0 indicating that the vnode gained another reference
 880  884   * because of a simultaneous call to contract_vnode_get.
 881  885   */
 882  886  int
 883  887  contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
 884  888  {
 885  889          vnode_t *vp = ctv->ctv_vnode;
 886  890          int result;
 887  891  
 888  892          mutex_enter(&ct->ct_lock);
 889  893          mutex_enter(&vp->v_lock);
 890  894          if (vp->v_count == 1) {
 891  895                  list_remove(&ct->ct_vnodes, ctv);
 892  896                  result = 1;
 893  897          } else {
 894  898                  vp->v_count--;
 895  899                  result = 0;
 896  900          }
 897  901          mutex_exit(&vp->v_lock);
 898  902          mutex_exit(&ct->ct_lock);
 899  903  
 900  904          return (result);
 901  905  }
 902  906  
 903  907  /*
 904  908   * contract_exit
 905  909   *
 906  910   * Abandons all contracts held by process p, and drains process p's
 907  911   * bundle queues.  Called on process exit.
 908  912   */
 909  913  void
 910  914  contract_exit(proc_t *p)
 911  915  {
 912  916          contract_t *ct;
 913  917          void *cookie = NULL;
 914  918          int i;
 915  919  
 916  920          ASSERT(p == curproc);
 917  921  
 918  922          /*
 919  923           * Abandon held contracts.  contract_abandon knows enough not
 920  924           * to remove the contract from the list a second time.  We are
 921  925           * exiting, so no locks are needed here.  But because
 922  926           * contract_abandon will take p_lock, we need to make sure we
 923  927           * aren't holding it.
 924  928           */
 925  929          ASSERT(MUTEX_NOT_HELD(&p->p_lock));
 926  930          while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
 927  931                  VERIFY(contract_abandon(ct, p, 0) == 0);
 928  932  
 929  933          /*
 930  934           * Drain pbundles.  Because a process bundle queue could have
 931  935           * been passed to another process, they may not be freed right
 932  936           * away.
 933  937           */
 934  938          if (p->p_ct_equeue) {
 935  939                  for (i = 0; i < CTT_MAXTYPE; i++)
 936  940                          if (p->p_ct_equeue[i])
 937  941                                  cte_queue_drain(p->p_ct_equeue[i], 0);
 938  942                  kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
 939  943          }
 940  944  }
 941  945  
 942  946  static int
 943  947  get_time_left(struct ct_time *t)
 944  948  {
 945  949          clock_t ticks_elapsed;
 946  950          int secs_elapsed;
 947  951  
 948  952          if (t->ctm_total == -1)
 949  953                  return (-1);
 950  954  
 951  955          ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
 952  956          secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
 953  957          return (secs_elapsed > 0 ? secs_elapsed : 0);
 954  958  }
 955  959  
 956  960  /*
 957  961   * contract_status_common
 958  962   *
 959  963   * Populates a ct_status structure.  Used by contract types in their
 960  964   * status entry points and ctfs when only common information is
 961  965   * requested.
 962  966   */
 963  967  void
 964  968  contract_status_common(contract_t *ct, zone_t *zone, void *status,
 965  969      model_t model)
 966  970  {
 967  971          STRUCT_HANDLE(ct_status, lstatus);
 968  972  
 969  973          STRUCT_SET_HANDLE(lstatus, model, status);
 970  974          ASSERT(MUTEX_HELD(&ct->ct_lock));
 971  975          if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
 972  976              zone->zone_uniqid == ct->ct_czuniqid) {
 973  977                  zone_t *czone;
 974  978                  zoneid_t zoneid = -1;
 975  979  
 976  980                  /*
 977  981                   * Contracts don't have holds on the zones they were
 978  982                   * created by.  If the contract's zone no longer
 979  983                   * exists, we say its zoneid is -1.
 980  984                   */
 981  985                  if (zone->zone_uniqid == ct->ct_czuniqid ||
 982  986                      ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
 983  987                          zoneid = ct->ct_zoneid;
 984  988                  } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
 985  989                          if (czone->zone_uniqid == ct->ct_mzuniqid)
 986  990                                  zoneid = ct->ct_zoneid;
 987  991                          zone_rele(czone);
 988  992                  }
 989  993  
 990  994                  STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
 991  995                  STRUCT_FSET(lstatus, ctst_holder,
 992  996                      (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
 993  997                      (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
 994  998                  STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
 995  999          } else {
 996 1000                  /*
 997 1001                   * We are looking at a contract which was created by a
 998 1002                   * process outside of our zone.  We provide fake zone,
 999 1003                   * holder, and state information.
1000 1004                   */
1001 1005  
1002 1006                  STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1003 1007                  /*
1004 1008                   * Since "zone" can't disappear until the calling ctfs
1005 1009                   * is unmounted, zone_zsched must be valid.
1006 1010                   */
1007 1011                  STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1008 1012                      zone->zone_zsched->p_pid : 0);
1009 1013                  STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1010 1014                      CTS_OWNED : ct->ct_state);
1011 1015          }
1012 1016          STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1013 1017          STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1014 1018          STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1015 1019          STRUCT_FSET(lstatus, ctst_nevid,
1016 1020              ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1017 1021          STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1018 1022          STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1019 1023          STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1020 1024          STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1021 1025          STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1022 1026  }
1023 1027  
1024 1028  /*
1025 1029   * contract_checkcred
1026 1030   *
1027 1031   * Determines if the specified contract is owned by a process with the
1028 1032   * same effective uid as the specified credential.  The caller must
1029 1033   * ensure that the uid spaces are the same.  Returns 1 on success.
1030 1034   */
1031 1035  static int
1032 1036  contract_checkcred(contract_t *ct, const cred_t *cr)
1033 1037  {
1034 1038          proc_t *p;
1035 1039          int fail = 1;
1036 1040  
1037 1041          mutex_enter(&ct->ct_lock);
1038 1042          if ((p = ct->ct_owner) != NULL) {
1039 1043                  mutex_enter(&p->p_crlock);
1040 1044                  fail = crgetuid(cr) != crgetuid(p->p_cred);
1041 1045                  mutex_exit(&p->p_crlock);
1042 1046          }
1043 1047          mutex_exit(&ct->ct_lock);
1044 1048  
1045 1049          return (!fail);
1046 1050  }
1047 1051  
1048 1052  /*
1049 1053   * contract_owned
1050 1054   *
1051 1055   * Determines if the specified credential can view an event generated
1052 1056   * by the specified contract.  If locked is set, the contract's ct_lock
1053 1057   * is held and the caller will need to do additional work to determine
1054 1058   * if they truly can see the event.  Returns 1 on success.
1055 1059   */
1056 1060  int
1057 1061  contract_owned(contract_t *ct, const cred_t *cr, int locked)
1058 1062  {
1059 1063          int owner, cmatch, zmatch;
1060 1064          uint64_t zuniqid, mzuniqid;
1061 1065          uid_t euid;
1062 1066  
1063 1067          ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1064 1068  
1065 1069          zuniqid = curproc->p_zone->zone_uniqid;
1066 1070          mzuniqid = contract_getzuniqid(ct);
1067 1071          euid = crgetuid(cr);
1068 1072  
1069 1073          /*
1070 1074           * owner: we own the contract
1071 1075           * cmatch: we are in the creator's (and holder's) zone and our
1072 1076           *   uid matches the creator's or holder's
1073 1077           * zmatch: we are in the effective zone of a contract created
1074 1078           *   in the global zone, and our uid matches that of the
1075 1079           *   virtualized holder's (zsched/kcred)
1076 1080           */
1077 1081          owner = (ct->ct_owner == curproc);
1078 1082          cmatch = (zuniqid == ct->ct_czuniqid) &&
1079 1083              ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1080 1084          zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1081 1085              (crgetuid(kcred) == euid);
1082 1086  
1083 1087          return (owner || cmatch || zmatch);
1084 1088  }
1085 1089  
1086 1090  
1087 1091  /*
1088 1092   * contract_type_init
1089 1093   *
1090 1094   * Called by contract types to register themselves with the contracts
1091 1095   * framework.
1092 1096   */
1093 1097  ct_type_t *
1094 1098  contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1095 1099      ct_f_default_t *dfault)
1096 1100  {
1097 1101          ct_type_t *result;
1098 1102  
1099 1103          ASSERT(type < CTT_MAXTYPE);
1100 1104  
1101 1105          result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1102 1106  
1103 1107          mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1104 1108          avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1105 1109              offsetof(contract_t, ct_cttavl));
1106 1110          cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1107 1111          result->ct_type_name = name;
1108 1112          result->ct_type_ops = ops;
1109 1113          result->ct_type_default = dfault;
1110 1114          result->ct_type_evid = 0;
1111 1115          gethrestime(&result->ct_type_timestruc);
1112 1116          result->ct_type_index = type;
1113 1117  
1114 1118          ct_types[type] = result;
1115 1119  
1116 1120          return (result);
1117 1121  }
1118 1122  
1119 1123  /*
1120 1124   * contract_type_count
1121 1125   *
1122 1126   * Obtains the number of contracts of a particular type.
1123 1127   */
1124 1128  int
1125 1129  contract_type_count(ct_type_t *type)
1126 1130  {
1127 1131          ulong_t count;
1128 1132  
1129 1133          mutex_enter(&type->ct_type_lock);
1130 1134          count = avl_numnodes(&type->ct_type_avl);
1131 1135          mutex_exit(&type->ct_type_lock);
1132 1136  
1133 1137          return (count);
1134 1138  }
1135 1139  
1136 1140  /*
1137 1141   * contract_type_max
1138 1142   *
1139 1143   * Obtains the maximum contract id of of a particular type.
1140 1144   */
1141 1145  ctid_t
1142 1146  contract_type_max(ct_type_t *type)
1143 1147  {
1144 1148          contract_t *ct;
1145 1149          ctid_t res;
1146 1150  
1147 1151          mutex_enter(&type->ct_type_lock);
1148 1152          ct = avl_last(&type->ct_type_avl);
1149 1153          res = ct ? ct->ct_id : -1;
1150 1154          mutex_exit(&type->ct_type_lock);
1151 1155  
1152 1156          return (res);
1153 1157  }
1154 1158  
1155 1159  /*
1156 1160   * contract_max
1157 1161   *
1158 1162   * Obtains the maximum contract id.
1159 1163   */
1160 1164  ctid_t
1161 1165  contract_max(void)
1162 1166  {
1163 1167          contract_t *ct;
1164 1168          ctid_t res;
1165 1169  
1166 1170          mutex_enter(&contract_lock);
1167 1171          ct = avl_last(&contract_avl);
1168 1172          res = ct ? ct->ct_id : -1;
1169 1173          mutex_exit(&contract_lock);
1170 1174  
1171 1175          return (res);
1172 1176  }
1173 1177  
1174 1178  /*
1175 1179   * contract_lookup_common
1176 1180   *
1177 1181   * Common code for contract_lookup and contract_type_lookup.  Takes a
1178 1182   * pointer to an AVL tree to search in.  Should be called with the
1179 1183   * appropriate tree-protecting lock held (unfortunately unassertable).
1180 1184   */
1181 1185  static ctid_t
1182 1186  contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1183 1187  {
1184 1188          contract_t template, *ct;
1185 1189          avl_index_t where;
1186 1190          ctid_t res;
1187 1191  
1188 1192          template.ct_id = current;
1189 1193          ct = avl_find(tree, &template, &where);
1190 1194          if (ct == NULL)
1191 1195                  ct = avl_nearest(tree, where, AVL_AFTER);
1192 1196          if (zuniqid != GLOBAL_ZONEUNIQID)
1193 1197                  while (ct && (contract_getzuniqid(ct) != zuniqid))
1194 1198                          ct = AVL_NEXT(tree, ct);
1195 1199          res = ct ? ct->ct_id : -1;
1196 1200  
1197 1201          return (res);
1198 1202  }
1199 1203  
1200 1204  /*
1201 1205   * contract_type_lookup
1202 1206   *
1203 1207   * Returns the next type contract after the specified id, visible from
1204 1208   * the specified zone.
1205 1209   */
1206 1210  ctid_t
1207 1211  contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1208 1212  {
1209 1213          ctid_t res;
1210 1214  
1211 1215          mutex_enter(&type->ct_type_lock);
1212 1216          res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1213 1217          mutex_exit(&type->ct_type_lock);
1214 1218  
1215 1219          return (res);
1216 1220  }
1217 1221  
1218 1222  /*
1219 1223   * contract_lookup
1220 1224   *
1221 1225   * Returns the next contract after the specified id, visible from the
1222 1226   * specified zone.
1223 1227   */
1224 1228  ctid_t
1225 1229  contract_lookup(uint64_t zuniqid, ctid_t current)
1226 1230  {
1227 1231          ctid_t res;
1228 1232  
1229 1233          mutex_enter(&contract_lock);
1230 1234          res = contract_lookup_common(&contract_avl, zuniqid, current);
1231 1235          mutex_exit(&contract_lock);
1232 1236  
1233 1237          return (res);
1234 1238  }
1235 1239  
1236 1240  /*
1237 1241   * contract_plookup
1238 1242   *
1239 1243   * Returns the next contract held by process p after the specified id,
1240 1244   * visible from the specified zone.  Made complicated by the fact that
1241 1245   * contracts visible in a zone but held by processes outside of the
1242 1246   * zone need to appear as being held by zsched to zone members.
1243 1247   */
1244 1248  ctid_t
1245 1249  contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1246 1250  {
1247 1251          contract_t template, *ct;
1248 1252          avl_index_t where;
1249 1253          ctid_t res;
1250 1254  
1251 1255          template.ct_id = current;
1252 1256          if (zuniqid != GLOBAL_ZONEUNIQID &&
1253 1257              (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1254 1258                  /* This is inelegant. */
1255 1259                  mutex_enter(&contract_lock);
1256 1260                  ct = avl_find(&contract_avl, &template, &where);
1257 1261                  if (ct == NULL)
1258 1262                          ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1259 1263                  while (ct && !(ct->ct_state < CTS_ORPHAN &&
1260 1264                      contract_getzuniqid(ct) == zuniqid &&
1261 1265                      ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1262 1266                          ct = AVL_NEXT(&contract_avl, ct);
1263 1267                  res = ct ? ct->ct_id : -1;
1264 1268                  mutex_exit(&contract_lock);
1265 1269          } else {
1266 1270                  mutex_enter(&p->p_lock);
1267 1271                  ct = avl_find(&p->p_ct_held, &template, &where);
1268 1272                  if (ct == NULL)
1269 1273                          ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1270 1274                  res = ct ? ct->ct_id : -1;
1271 1275                  mutex_exit(&p->p_lock);
1272 1276          }
1273 1277  
1274 1278          return (res);
1275 1279  }
1276 1280  
1277 1281  /*
1278 1282   * contract_ptr_common
1279 1283   *
1280 1284   * Common code for contract_ptr and contract_type_ptr.  Takes a pointer
1281 1285   * to an AVL tree to search in.  Should be called with the appropriate
1282 1286   * tree-protecting lock held (unfortunately unassertable).
1283 1287   */
1284 1288  static contract_t *
1285 1289  contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1286 1290  {
1287 1291          contract_t template, *ct;
1288 1292  
1289 1293          template.ct_id = id;
1290 1294          ct = avl_find(tree, &template, NULL);
1291 1295          if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1292 1296              contract_getzuniqid(ct) != zuniqid)) {
1293 1297                  return (NULL);
1294 1298          }
1295 1299  
1296 1300          /*
1297 1301           * Check to see if a thread is in the window in contract_rele
1298 1302           * between dropping the reference count and removing the
1299 1303           * contract from the type AVL.
1300 1304           */
1301 1305          mutex_enter(&ct->ct_reflock);
1302 1306          if (ct->ct_ref) {
1303 1307                  ct->ct_ref++;
1304 1308                  mutex_exit(&ct->ct_reflock);
1305 1309          } else {
1306 1310                  mutex_exit(&ct->ct_reflock);
1307 1311                  ct = NULL;
1308 1312          }
1309 1313  
1310 1314          return (ct);
1311 1315  }
1312 1316  
1313 1317  /*
1314 1318   * contract_type_ptr
1315 1319   *
1316 1320   * Returns a pointer to the contract with the specified id.  The
1317 1321   * contract is held, so the caller needs to release the reference when
1318 1322   * it is through with the contract.
1319 1323   */
1320 1324  contract_t *
1321 1325  contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1322 1326  {
1323 1327          contract_t *ct;
1324 1328  
1325 1329          mutex_enter(&type->ct_type_lock);
1326 1330          ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1327 1331          mutex_exit(&type->ct_type_lock);
1328 1332  
1329 1333          return (ct);
1330 1334  }
1331 1335  
1332 1336  /*
1333 1337   * contract_ptr
1334 1338   *
1335 1339   * Returns a pointer to the contract with the specified id.  The
1336 1340   * contract is held, so the caller needs to release the reference when
1337 1341   * it is through with the contract.
1338 1342   */
1339 1343  contract_t *
1340 1344  contract_ptr(ctid_t id, uint64_t zuniqid)
1341 1345  {
1342 1346          contract_t *ct;
1343 1347  
1344 1348          mutex_enter(&contract_lock);
1345 1349          ct = contract_ptr_common(&contract_avl, id, zuniqid);
1346 1350          mutex_exit(&contract_lock);
1347 1351  
1348 1352          return (ct);
1349 1353  }
1350 1354  
1351 1355  /*
1352 1356   * contract_type_time
1353 1357   *
1354 1358   * Obtains the last time a contract of a particular type was created.
1355 1359   */
1356 1360  void
1357 1361  contract_type_time(ct_type_t *type, timestruc_t *time)
1358 1362  {
1359 1363          mutex_enter(&type->ct_type_lock);
1360 1364          *time = type->ct_type_timestruc;
1361 1365          mutex_exit(&type->ct_type_lock);
1362 1366  }
1363 1367  
1364 1368  /*
1365 1369   * contract_type_bundle
1366 1370   *
1367 1371   * Obtains a type's bundle queue.
1368 1372   */
1369 1373  ct_equeue_t *
1370 1374  contract_type_bundle(ct_type_t *type)
1371 1375  {
1372 1376          return (&type->ct_type_events);
1373 1377  }
1374 1378  
1375 1379  /*
1376 1380   * contract_type_pbundle
1377 1381   *
1378 1382   * Obtain's a process's bundle queue.  If one doesn't exist, one is
1379 1383   * created.  Often used simply to ensure that a bundle queue is
1380 1384   * allocated.
1381 1385   */
1382 1386  ct_equeue_t *
1383 1387  contract_type_pbundle(ct_type_t *type, proc_t *pp)
1384 1388  {
1385 1389          /*
1386 1390           * If there isn't an array of bundle queues, allocate one.
1387 1391           */
1388 1392          if (pp->p_ct_equeue == NULL) {
1389 1393                  size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1390 1394                  ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1391 1395  
1392 1396                  mutex_enter(&pp->p_lock);
1393 1397                  if (pp->p_ct_equeue)
1394 1398                          kmem_free(qa, size);
1395 1399                  else
1396 1400                          pp->p_ct_equeue = qa;
1397 1401                  mutex_exit(&pp->p_lock);
1398 1402          }
1399 1403  
1400 1404          /*
1401 1405           * If there isn't a bundle queue of the required type, allocate
1402 1406           * one.
1403 1407           */
1404 1408          if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1405 1409                  ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1406 1410                  cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1407 1411  
1408 1412                  mutex_enter(&pp->p_lock);
1409 1413                  if (pp->p_ct_equeue[type->ct_type_index])
1410 1414                          cte_queue_drain(q, 0);
1411 1415                  else
1412 1416                          pp->p_ct_equeue[type->ct_type_index] = q;
1413 1417                  mutex_exit(&pp->p_lock);
1414 1418          }
1415 1419  
1416 1420          return (pp->p_ct_equeue[type->ct_type_index]);
1417 1421  }
1418 1422  
1419 1423  /*
1420 1424   * ctparam_copyin
1421 1425   *
1422 1426   * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1423 1427   * If ctparam_copyout() is not called after ctparam_copyin(), then
1424 1428   * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1425 1429   *
1426 1430   * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1427 1431   * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1428 1432   * process lock.
1429 1433   */
1430 1434  int
1431 1435  ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1432 1436  {
1433 1437          uint32_t size;
1434 1438          void *ubuf;
1435 1439          ct_param_t *param = &kparam->param;
1436 1440          STRUCT_DECL(ct_param, uarg);
1437 1441  
1438 1442          STRUCT_INIT(uarg, flag);
1439 1443          if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1440 1444                  return (EFAULT);
1441 1445          size = STRUCT_FGET(uarg, ctpm_size);
1442 1446          ubuf = STRUCT_FGETP(uarg, ctpm_value);
1443 1447  
1444 1448          if (size > CT_PARAM_MAX_SIZE || size == 0)
1445 1449                  return (EINVAL);
1446 1450  
1447 1451          kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1448 1452          if (cmd == CT_TSET) {
1449 1453                  if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1450 1454                          kmem_free(kparam->ctpm_kbuf, size);
1451 1455                          return (EFAULT);
1452 1456                  }
1453 1457          }
1454 1458          param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1455 1459          param->ctpm_size = size;
1456 1460          param->ctpm_value = ubuf;
1457 1461          kparam->ret_size = 0;
1458 1462  
1459 1463          return (0);
1460 1464  }
1461 1465  
1462 1466  /*
1463 1467   * ctparam_copyout
1464 1468   *
1465 1469   * copyout a ct_kparam_t and frees the buffer pointed by the member
1466 1470   * ctpm_kbuf of ct_kparam_t
1467 1471   */
1468 1472  int
1469 1473  ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1470 1474  {
1471 1475          int r = 0;
1472 1476          ct_param_t *param = &kparam->param;
1473 1477          STRUCT_DECL(ct_param, uarg);
1474 1478  
1475 1479          STRUCT_INIT(uarg, flag);
1476 1480  
1477 1481          STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1478 1482          STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1479 1483          STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1480 1484          if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1481 1485                  r = EFAULT;
1482 1486                  goto error;
1483 1487          }
1484 1488          if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1485 1489              MIN(kparam->ret_size, param->ctpm_size))) {
1486 1490                  r = EFAULT;
1487 1491          }
1488 1492  
1489 1493  error:
1490 1494          kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1491 1495  
1492 1496          return (r);
1493 1497  }
1494 1498  
1495 1499  /*
1496 1500   * ctmpl_free
1497 1501   *
1498 1502   * Frees a template.
1499 1503   */
1500 1504  void
1501 1505  ctmpl_free(ct_template_t *template)
1502 1506  {
1503 1507          mutex_destroy(&template->ctmpl_lock);
1504 1508          template->ctmpl_ops->ctop_free(template);
1505 1509  }
1506 1510  
1507 1511  /*
1508 1512   * ctmpl_dup
1509 1513   *
1510 1514   * Creates a copy of a template.
1511 1515   */
1512 1516  ct_template_t *
1513 1517  ctmpl_dup(ct_template_t *template)
1514 1518  {
1515 1519          ct_template_t *new;
1516 1520  
1517 1521          if (template == NULL)
1518 1522                  return (NULL);
1519 1523  
1520 1524          new = template->ctmpl_ops->ctop_dup(template);
1521 1525          /*
1522 1526           * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1523 1527           * should have remain held until now.
1524 1528           */
1525 1529          mutex_exit(&template->ctmpl_lock);
1526 1530  
1527 1531          return (new);
1528 1532  }
1529 1533  
1530 1534  /*
1531 1535   * ctmpl_set
1532 1536   *
1533 1537   * Sets the requested terms of a template.
1534 1538   */
1535 1539  int
1536 1540  ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1537 1541  {
1538 1542          int result = 0;
1539 1543          ct_param_t *param = &kparam->param;
1540 1544          uint64_t param_value;
1541 1545  
1542 1546          if (param->ctpm_id == CTP_COOKIE ||
1543 1547              param->ctpm_id == CTP_EV_INFO ||
1544 1548              param->ctpm_id == CTP_EV_CRITICAL) {
1545 1549                  if (param->ctpm_size < sizeof (uint64_t)) {
1546 1550                          return (EINVAL);
1547 1551                  } else {
1548 1552                          param_value = *(uint64_t *)kparam->ctpm_kbuf;
1549 1553                  }
1550 1554          }
1551 1555  
1552 1556          mutex_enter(&template->ctmpl_lock);
1553 1557          switch (param->ctpm_id) {
1554 1558          case CTP_COOKIE:
1555 1559                  template->ctmpl_cookie = param_value;
1556 1560                  break;
1557 1561          case CTP_EV_INFO:
1558 1562                  if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1559 1563                          result = EINVAL;
1560 1564                  else
1561 1565                          template->ctmpl_ev_info = param_value;
1562 1566                  break;
1563 1567          case CTP_EV_CRITICAL:
1564 1568                  if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1565 1569                          result = EINVAL;
1566 1570                          break;
1567 1571                  } else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1568 1572                          /*
1569 1573                           * Assume that a pure reduction of the critical
1570 1574                           * set is allowed by the contract type.
1571 1575                           */
1572 1576                          template->ctmpl_ev_crit = param_value;
1573 1577                          break;
1574 1578                  }
1575 1579                  /*
1576 1580                   * There may be restrictions on what we can make
1577 1581                   * critical, so we defer to the judgement of the
1578 1582                   * contract type.
1579 1583                   */
1580 1584                  /* FALLTHROUGH */
1581 1585          default:
1582 1586                  result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1583 1587          }
1584 1588          mutex_exit(&template->ctmpl_lock);
1585 1589  
1586 1590          return (result);
1587 1591  }
1588 1592  
1589 1593  /*
1590 1594   * ctmpl_get
1591 1595   *
1592 1596   * Obtains the requested terms from a template.
1593 1597   *
1594 1598   * If the term requested is a variable-sized term and the buffer
1595 1599   * provided is too small for the data, we truncate the data and return
1596 1600   * the buffer size necessary to fit the term in kparam->ret_size. If the
1597 1601   * term requested is fix-sized (uint64_t) and the buffer provided is too
1598 1602   * small, we return EINVAL.  This should never happen if you're using
1599 1603   * libcontract(3LIB), only if you call ioctl with a hand constructed
1600 1604   * ct_param_t argument.
1601 1605   *
1602 1606   * Currently, only contract specific parameters have variable-sized
1603 1607   * parameters.
1604 1608   */
1605 1609  int
1606 1610  ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1607 1611  {
1608 1612          int result = 0;
1609 1613          ct_param_t *param = &kparam->param;
1610 1614          uint64_t *param_value;
1611 1615  
1612 1616          if (param->ctpm_id == CTP_COOKIE ||
1613 1617              param->ctpm_id == CTP_EV_INFO ||
1614 1618              param->ctpm_id == CTP_EV_CRITICAL) {
1615 1619                  if (param->ctpm_size < sizeof (uint64_t)) {
1616 1620                          return (EINVAL);
1617 1621                  } else {
1618 1622                          param_value = kparam->ctpm_kbuf;
1619 1623                          kparam->ret_size = sizeof (uint64_t);
1620 1624                  }
1621 1625          }
1622 1626  
1623 1627          mutex_enter(&template->ctmpl_lock);
1624 1628          switch (param->ctpm_id) {
1625 1629          case CTP_COOKIE:
1626 1630                  *param_value = template->ctmpl_cookie;
1627 1631                  break;
1628 1632          case CTP_EV_INFO:
1629 1633                  *param_value = template->ctmpl_ev_info;
1630 1634                  break;
1631 1635          case CTP_EV_CRITICAL:
1632 1636                  *param_value = template->ctmpl_ev_crit;
1633 1637                  break;
1634 1638          default:
1635 1639                  result = template->ctmpl_ops->ctop_get(template, kparam);
1636 1640          }
1637 1641          mutex_exit(&template->ctmpl_lock);
1638 1642  
1639 1643          return (result);
1640 1644  }
1641 1645  
1642 1646  /*
1643 1647   * ctmpl_makecurrent
1644 1648   *
1645 1649   * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1646 1650   * active template.  Frees the old active template, if there was one.
1647 1651   */
1648 1652  static void
1649 1653  ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1650 1654  {
1651 1655          klwp_t *curlwp = ttolwp(curthread);
1652 1656          proc_t *p = curproc;
1653 1657          ct_template_t *old;
1654 1658  
1655 1659          mutex_enter(&p->p_lock);
1656 1660          old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1657 1661          curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1658 1662          mutex_exit(&p->p_lock);
1659 1663  
1660 1664          if (old)
1661 1665                  ctmpl_free(old);
1662 1666  }
1663 1667  
1664 1668  /*
1665 1669   * ctmpl_activate
1666 1670   *
1667 1671   * Copy the specified template as the current thread's activate
1668 1672   * template of that type.
1669 1673   */
1670 1674  void
1671 1675  ctmpl_activate(ct_template_t *template)
1672 1676  {
1673 1677          ctmpl_makecurrent(template, ctmpl_dup(template));
1674 1678  }
1675 1679  
1676 1680  /*
1677 1681   * ctmpl_clear
1678 1682   *
1679 1683   * Clears the current thread's activate template of the same type as
1680 1684   * the specified template.
1681 1685   */
1682 1686  void
1683 1687  ctmpl_clear(ct_template_t *template)
1684 1688  {
1685 1689          ctmpl_makecurrent(template, NULL);
1686 1690  }
1687 1691  
1688 1692  /*
1689 1693   * ctmpl_create
1690 1694   *
1691 1695   * Creates a new contract using the specified template.
1692 1696   */
1693 1697  int
1694 1698  ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1695 1699  {
1696 1700          return (template->ctmpl_ops->ctop_create(template, ctidp));
1697 1701  }
1698 1702  
1699 1703  /*
1700 1704   * ctmpl_init
1701 1705   *
1702 1706   * Initializes the common portion of a new contract template.
1703 1707   */
1704 1708  void
1705 1709  ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1706 1710  {
1707 1711          mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1708 1712          new->ctmpl_ops = ops;
1709 1713          new->ctmpl_type = type;
1710 1714          new->ctmpl_data = data;
1711 1715          new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1712 1716          new->ctmpl_cookie = 0;
1713 1717  }
1714 1718  
1715 1719  /*
1716 1720   * ctmpl_copy
1717 1721   *
1718 1722   * Copies the common portions of a contract template.  Intended for use
1719 1723   * by a contract type's ctop_dup template op.  Returns with the old
1720 1724   * template's lock held, which will should remain held until the
1721 1725   * template op returns (it is dropped by ctmpl_dup).
1722 1726   */
1723 1727  void
1724 1728  ctmpl_copy(ct_template_t *new, ct_template_t *old)
1725 1729  {
1726 1730          mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1727 1731          mutex_enter(&old->ctmpl_lock);
1728 1732          new->ctmpl_ops = old->ctmpl_ops;
1729 1733          new->ctmpl_type = old->ctmpl_type;
1730 1734          new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1731 1735          new->ctmpl_ev_info = old->ctmpl_ev_info;
1732 1736          new->ctmpl_cookie = old->ctmpl_cookie;
1733 1737  }
1734 1738  
1735 1739  /*
1736 1740   * ctmpl_create_inval
1737 1741   *
1738 1742   * Returns EINVAL.  Provided for the convenience of those contract
1739 1743   * types which don't support ct_tmpl_create(3contract) and would
1740 1744   * otherwise need to create their own stub for the ctop_create template
1741 1745   * op.
1742 1746   */
1743 1747  /*ARGSUSED*/
1744 1748  int
1745 1749  ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1746 1750  {
1747 1751          return (EINVAL);
1748 1752  }
1749 1753  
1750 1754  
1751 1755  /*
1752 1756   * cte_queue_create
1753 1757   *
1754 1758   * Initializes a queue of a particular type.  If dynamic is set, the
1755 1759   * queue is to be freed when its last listener is removed after being
1756 1760   * drained.
1757 1761   */
1758 1762  static void
1759 1763  cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1760 1764  {
1761 1765          mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1762 1766          q->ctq_listno = list;
1763 1767          list_create(&q->ctq_events, sizeof (ct_kevent_t),
1764 1768              offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1765 1769          list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1766 1770              offsetof(ct_listener_t, ctl_allnode));
1767 1771          list_create(&q->ctq_tail, sizeof (ct_listener_t),
1768 1772              offsetof(ct_listener_t, ctl_tailnode));
1769 1773          gethrestime(&q->ctq_atime);
1770 1774          q->ctq_nlisteners = 0;
1771 1775          q->ctq_nreliable = 0;
1772 1776          q->ctq_ninf = 0;
1773 1777          q->ctq_max = maxinf;
1774 1778  
1775 1779          /*
1776 1780           * Bundle queues and contract queues are embedded in other
1777 1781           * structures and are implicitly referenced counted by virtue
1778 1782           * of their vnodes' indirect hold on their contracts.  Process
1779 1783           * bundle queues are dynamically allocated and may persist
1780 1784           * after the death of the process, so they must be explicitly
1781 1785           * reference counted.
1782 1786           */
1783 1787          q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1784 1788  }
1785 1789  
1786 1790  /*
1787 1791   * cte_queue_destroy
1788 1792   *
1789 1793   * Destroys the specified queue.  The queue is freed if referenced
1790 1794   * counted.
1791 1795   */
1792 1796  static void
1793 1797  cte_queue_destroy(ct_equeue_t *q)
1794 1798  {
1795 1799          ASSERT(q->ctq_flags & CTQ_DEAD);
1796 1800          ASSERT(q->ctq_nlisteners == 0);
1797 1801          ASSERT(q->ctq_nreliable == 0);
1798 1802          list_destroy(&q->ctq_events);
1799 1803          list_destroy(&q->ctq_listeners);
1800 1804          list_destroy(&q->ctq_tail);
1801 1805          mutex_destroy(&q->ctq_lock);
1802 1806          if (q->ctq_flags & CTQ_REFFED)
1803 1807                  kmem_free(q, sizeof (ct_equeue_t));
1804 1808  }
1805 1809  
1806 1810  /*
1807 1811   * cte_hold
1808 1812   *
1809 1813   * Takes a hold on the specified event.
1810 1814   */
1811 1815  static void
1812 1816  cte_hold(ct_kevent_t *e)
1813 1817  {
1814 1818          mutex_enter(&e->cte_lock);
1815 1819          ASSERT(e->cte_refs > 0);
1816 1820          e->cte_refs++;
1817 1821          mutex_exit(&e->cte_lock);
1818 1822  }
1819 1823  
1820 1824  /*
1821 1825   * cte_rele
1822 1826   *
1823 1827   * Releases a hold on the specified event.  If the caller had the last
1824 1828   * reference, frees the event and releases its hold on the contract
1825 1829   * that generated it.
1826 1830   */
1827 1831  static void
1828 1832  cte_rele(ct_kevent_t *e)
1829 1833  {
1830 1834          mutex_enter(&e->cte_lock);
1831 1835          ASSERT(e->cte_refs > 0);
1832 1836          if (--e->cte_refs) {
1833 1837                  mutex_exit(&e->cte_lock);
1834 1838                  return;
1835 1839          }
1836 1840  
1837 1841          contract_rele(e->cte_contract);
1838 1842  
1839 1843          mutex_destroy(&e->cte_lock);
1840 1844          nvlist_free(e->cte_data);
1841 1845          nvlist_free(e->cte_gdata);
1842 1846          kmem_free(e, sizeof (ct_kevent_t));
1843 1847  }
1844 1848  
1845 1849  /*
1846 1850   * cte_qrele
1847 1851   *
1848 1852   * Remove this listener's hold on the specified event, removing and
1849 1853   * releasing the queue's hold on the event if appropriate.
1850 1854   */
1851 1855  static void
1852 1856  cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1853 1857  {
1854 1858          ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1855 1859  
1856 1860          ASSERT(MUTEX_HELD(&q->ctq_lock));
1857 1861  
1858 1862          if (l->ctl_flags & CTLF_RELIABLE)
1859 1863                  member->ctm_nreliable--;
1860 1864          if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1861 1865                  member->ctm_trimmed = 0;
1862 1866                  list_remove(&q->ctq_events, e);
1863 1867                  cte_rele(e);
1864 1868          }
1865 1869  }
1866 1870  
1867 1871  /*
1868 1872   * cte_qmove
1869 1873   *
1870 1874   * Move this listener to the specified event in the queue.
1871 1875   */
1872 1876  static ct_kevent_t *
1873 1877  cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1874 1878  {
1875 1879          ct_kevent_t *olde;
1876 1880  
1877 1881          ASSERT(MUTEX_HELD(&q->ctq_lock));
1878 1882          ASSERT(l->ctl_equeue == q);
1879 1883  
1880 1884          if ((olde = l->ctl_position) == NULL)
1881 1885                  list_remove(&q->ctq_tail, l);
1882 1886  
1883 1887          while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1884 1888                  e = list_next(&q->ctq_events, e);
1885 1889  
1886 1890          if (e != NULL) {
1887 1891                  e->cte_nodes[q->ctq_listno].ctm_refs++;
1888 1892                  if (l->ctl_flags & CTLF_RELIABLE)
1889 1893                          e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1890 1894          } else {
1891 1895                  list_insert_tail(&q->ctq_tail, l);
1892 1896          }
1893 1897  
1894 1898          l->ctl_position = e;
1895 1899          if (olde)
1896 1900                  cte_qrele(q, l, olde);
1897 1901  
1898 1902          return (e);
1899 1903  }
1900 1904  
1901 1905  /*
1902 1906   * cte_checkcred
1903 1907   *
1904 1908   * Determines if the specified event's contract is owned by a process
1905 1909   * with the same effective uid as the specified credential.  Called
1906 1910   * after a failed call to contract_owned with locked set.  Because it
1907 1911   * drops the queue lock, its caller (cte_qreadable) needs to make sure
1908 1912   * we're still in the same place after we return.  Returns 1 on
1909 1913   * success.
1910 1914   */
1911 1915  static int
1912 1916  cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1913 1917  {
1914 1918          int result;
1915 1919          contract_t *ct = e->cte_contract;
1916 1920  
1917 1921          cte_hold(e);
1918 1922          mutex_exit(&q->ctq_lock);
1919 1923          result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1920 1924              contract_checkcred(ct, cr);
1921 1925          mutex_enter(&q->ctq_lock);
1922 1926          cte_rele(e);
1923 1927  
1924 1928          return (result);
1925 1929  }
1926 1930  
1927 1931  /*
1928 1932   * cte_qreadable
1929 1933   *
1930 1934   * Ensures that the listener is pointing to a valid event that the
1931 1935   * caller has the credentials to read.  Returns 0 if we can read the
1932 1936   * event we're pointing to.
1933 1937   */
1934 1938  static int
1935 1939  cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1936 1940      uint64_t zuniqid, int crit)
1937 1941  {
1938 1942          ct_kevent_t *e, *next;
1939 1943          contract_t *ct;
1940 1944  
1941 1945          ASSERT(MUTEX_HELD(&q->ctq_lock));
1942 1946          ASSERT(l->ctl_equeue == q);
1943 1947  
1944 1948          if (l->ctl_flags & CTLF_COPYOUT)
1945 1949                  return (1);
1946 1950  
1947 1951          next = l->ctl_position;
1948 1952          while (e = cte_qmove(q, l, next)) {
1949 1953                  ct = e->cte_contract;
1950 1954                  /*
1951 1955                   * Check obvious things first.  If we are looking for a
1952 1956                   * critical message, is this one?  If we aren't in the
1953 1957                   * global zone, is this message meant for us?
1954 1958                   */
1955 1959                  if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1956 1960                      (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1957 1961                      zuniqid != contract_getzuniqid(ct))) {
1958 1962  
1959 1963                          next = list_next(&q->ctq_events, e);
1960 1964  
1961 1965                  /*
1962 1966                   * Next, see if our effective uid equals that of owner
1963 1967                   * or author of the contract.  Since we are holding the
1964 1968                   * queue lock, contract_owned can't always check if we
1965 1969                   * have the same effective uid as the contract's
1966 1970                   * owner.  If it comes to that, it fails and we take
1967 1971                   * the slow(er) path.
1968 1972                   */
1969 1973                  } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1970 1974  
1971 1975                          /*
1972 1976                           * At this point we either don't have any claim
1973 1977                           * to this contract or we match the effective
1974 1978                           * uid of the owner but couldn't tell.  We
1975 1979                           * first test for a NULL holder so that events
1976 1980                           * from orphans and inherited contracts avoid
1977 1981                           * the penalty phase.
1978 1982                           */
1979 1983                          if (e->cte_contract->ct_owner == NULL &&
1980 1984                              !secpolicy_contract_observer_choice(cr))
1981 1985                                  next = list_next(&q->ctq_events, e);
1982 1986  
1983 1987                          /*
1984 1988                           * cte_checkcred will juggle locks to see if we
1985 1989                           * have the same uid as the event's contract's
1986 1990                           * current owner.  If it succeeds, we have to
1987 1991                           * make sure we are in the same point in the
1988 1992                           * queue.
1989 1993                           */
1990 1994                          else if (cte_checkcred(q, e, cr) &&
1991 1995                              l->ctl_position == e)
1992 1996                                  break;
1993 1997  
1994 1998                          /*
1995 1999                           * cte_checkcred failed; see if we're in the
1996 2000                           * same place.
1997 2001                           */
1998 2002                          else if (l->ctl_position == e)
1999 2003                                  if (secpolicy_contract_observer_choice(cr))
2000 2004                                          break;
2001 2005                                  else
2002 2006                                          next = list_next(&q->ctq_events, e);
2003 2007  
2004 2008                          /*
2005 2009                           * cte_checkcred failed, and our position was
2006 2010                           * changed.  Start from there.
2007 2011                           */
2008 2012                          else
2009 2013                                  next = l->ctl_position;
2010 2014                  } else {
2011 2015                          break;
2012 2016                  }
2013 2017          }
2014 2018  
2015 2019          /*
2016 2020           * We check for CTLF_COPYOUT again in case we dropped the queue
2017 2021           * lock in cte_checkcred.
2018 2022           */
2019 2023          return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2020 2024  }
2021 2025  
2022 2026  /*
2023 2027   * cte_qwakeup
2024 2028   *
2025 2029   * Wakes up any waiting listeners and points them at the specified event.
2026 2030   */
2027 2031  static void
2028 2032  cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2029 2033  {
2030 2034          ct_listener_t *l;
2031 2035  
2032 2036          ASSERT(MUTEX_HELD(&q->ctq_lock));
2033 2037  
2034 2038          while (l = list_head(&q->ctq_tail)) {
2035 2039                  list_remove(&q->ctq_tail, l);
2036 2040                  e->cte_nodes[q->ctq_listno].ctm_refs++;
2037 2041                  if (l->ctl_flags & CTLF_RELIABLE)
2038 2042                          e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2039 2043                  l->ctl_position = e;
2040 2044                  cv_signal(&l->ctl_cv);
2041 2045                  pollwakeup(&l->ctl_pollhead, POLLIN);
2042 2046          }
2043 2047  }
2044 2048  
2045 2049  /*
2046 2050   * cte_copy
2047 2051   *
2048 2052   * Copies events from the specified contract event queue to the
2049 2053   * end of the specified process bundle queue.  Only called from
2050 2054   * contract_adopt.
2051 2055   *
2052 2056   * We copy to the end of the target queue instead of mixing the events
2053 2057   * in their proper order because otherwise the act of adopting a
2054 2058   * contract would require a process to reset all process bundle
2055 2059   * listeners it needed to see the new events.  This would, in turn,
2056 2060   * require the process to keep track of which preexisting events had
2057 2061   * already been processed.
2058 2062   */
2059 2063  static void
2060 2064  cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2061 2065  {
2062 2066          ct_kevent_t *e, *first = NULL;
2063 2067  
2064 2068          VERIFY(q->ctq_listno == CTEL_CONTRACT);
2065 2069          VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2066 2070  
2067 2071          mutex_enter(&q->ctq_lock);
2068 2072          mutex_enter(&newq->ctq_lock);
2069 2073  
2070 2074          /*
2071 2075           * For now, only copy critical events.
2072 2076           */
2073 2077          for (e = list_head(&q->ctq_events); e != NULL;
2074 2078              e = list_next(&q->ctq_events, e)) {
2075 2079                  if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2076 2080                          if (first == NULL)
2077 2081                                  first = e;
2078 2082                          /*
2079 2083                           * It is possible for adoption to race with an owner's
2080 2084                           * cte_publish_all(); we must only enqueue events that
2081 2085                           * have not already been enqueued.
2082 2086                           */
2083 2087                          if (!list_link_active((list_node_t *)
2084 2088                              ((uintptr_t)e + newq->ctq_events.list_offset))) {
2085 2089                                  list_insert_tail(&newq->ctq_events, e);
2086 2090                                  cte_hold(e);
2087 2091                          }
2088 2092                  }
2089 2093          }
2090 2094  
2091 2095          mutex_exit(&q->ctq_lock);
2092 2096  
2093 2097          if (first)
2094 2098                  cte_qwakeup(newq, first);
2095 2099  
2096 2100          mutex_exit(&newq->ctq_lock);
2097 2101  }
2098 2102  
2099 2103  /*
2100 2104   * cte_trim
2101 2105   *
2102 2106   * Trims unneeded events from an event queue.  Algorithm works as
2103 2107   * follows:
2104 2108   *
2105 2109   *   Removes all informative and acknowledged critical events until the
2106 2110   *   first referenced event is found.
2107 2111   *
2108 2112   *   If a contract is specified, removes all events (regardless of
2109 2113   *   acknowledgement) generated by that contract until the first event
2110 2114   *   referenced by a reliable listener is found.  Reference events are
2111 2115   *   removed by marking them "trimmed".  Such events will be removed
2112 2116   *   when the last reference is dropped and will be skipped by future
2113 2117   *   listeners.
2114 2118   *
2115 2119   * This is pretty basic.  Ideally this should remove from the middle of
2116 2120   * the list (i.e. beyond the first referenced event), and even
2117 2121   * referenced events.
2118 2122   */
2119 2123  static void
2120 2124  cte_trim(ct_equeue_t *q, contract_t *ct)
2121 2125  {
2122 2126          ct_kevent_t *e, *next;
2123 2127          int flags, stopper;
2124 2128          int start = 1;
2125 2129  
2126 2130          VERIFY(MUTEX_HELD(&q->ctq_lock));
2127 2131  
2128 2132          for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2129 2133                  next = list_next(&q->ctq_events, e);
2130 2134                  flags = e->cte_flags;
2131 2135                  stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2132 2136                      (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2133 2137                  if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2134 2138                          if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2135 2139                              (e->cte_contract == ct)) {
2136 2140                                  /*
2137 2141                                   * Toss informative and ACKed critical messages.
2138 2142                                   */
2139 2143                                  list_remove(&q->ctq_events, e);
2140 2144                                  cte_rele(e);
2141 2145                          }
2142 2146                  } else if ((e->cte_contract == ct) && !stopper) {
2143 2147                          ASSERT(q->ctq_nlisteners != 0);
2144 2148                          e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2145 2149                  } else if (ct && !stopper) {
2146 2150                          start = 0;
2147 2151                  } else {
2148 2152                          /*
2149 2153                           * Don't free messages past the first reader.
2150 2154                           */
2151 2155                          break;
2152 2156                  }
2153 2157          }
2154 2158  }
2155 2159  
2156 2160  /*
2157 2161   * cte_queue_drain
2158 2162   *
2159 2163   * Drain all events from the specified queue, and mark it dead.  If
2160 2164   * "ack" is set, acknowledge any critical events we find along the
2161 2165   * way.
2162 2166   */
2163 2167  static void
2164 2168  cte_queue_drain(ct_equeue_t *q, int ack)
2165 2169  {
2166 2170          ct_kevent_t *e, *next;
2167 2171          ct_listener_t *l;
2168 2172  
2169 2173          mutex_enter(&q->ctq_lock);
2170 2174  
2171 2175          for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2172 2176                  next = list_next(&q->ctq_events, e);
2173 2177                  if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2174 2178                          /*
2175 2179                           * Make sure critical messages are eventually
2176 2180                           * removed from the bundle queues.
2177 2181                           */
2178 2182                          mutex_enter(&e->cte_lock);
2179 2183                          e->cte_flags |= CTE_ACK;
2180 2184                          mutex_exit(&e->cte_lock);
2181 2185                          ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2182 2186                          e->cte_contract->ct_evcnt--;
2183 2187                  }
2184 2188                  list_remove(&q->ctq_events, e);
2185 2189                  e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2186 2190                  e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2187 2191                  e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2188 2192                  cte_rele(e);
2189 2193          }
2190 2194  
2191 2195          /*
2192 2196           * This is necessary only because of CTEL_PBUNDLE listeners;
2193 2197           * the events they point to can move from one pbundle to
2194 2198           * another.  Fortunately, this only happens if the contract is
2195 2199           * inherited, which (in turn) only happens if the process
2196 2200           * exits, which means it's an all-or-nothing deal.  If this
2197 2201           * wasn't the case, we would instead need to keep track of
2198 2202           * listeners on a per-event basis, not just a per-queue basis.
2199 2203           * This would have the side benefit of letting us clean up
2200 2204           * trimmed events sooner (i.e. immediately), but would
2201 2205           * unfortunately make events even bigger than they already
2202 2206           * are.
2203 2207           */
2204 2208          for (l = list_head(&q->ctq_listeners); l;
2205 2209              l = list_next(&q->ctq_listeners, l)) {
2206 2210                  l->ctl_flags |= CTLF_DEAD;
2207 2211                  if (l->ctl_position) {
2208 2212                          l->ctl_position = NULL;
2209 2213                          list_insert_tail(&q->ctq_tail, l);
2210 2214                  }
2211 2215                  cv_broadcast(&l->ctl_cv);
2212 2216          }
2213 2217  
2214 2218          /*
2215 2219           * Disallow events.
2216 2220           */
2217 2221          q->ctq_flags |= CTQ_DEAD;
2218 2222  
2219 2223          /*
2220 2224           * If we represent the last reference to a reference counted
2221 2225           * process bundle queue, free it.
2222 2226           */
2223 2227          if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2224 2228                  cte_queue_destroy(q);
2225 2229          else
2226 2230                  mutex_exit(&q->ctq_lock);
2227 2231  }
2228 2232  
2229 2233  /*
2230 2234   * cte_publish
2231 2235   *
2232 2236   * Publishes an event to a specific queue.  Only called by
2233 2237   * cte_publish_all.
2234 2238   */
2235 2239  static void
2236 2240  cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2237 2241  {
2238 2242          ASSERT(MUTEX_HELD(&q->ctq_lock));
2239 2243  
2240 2244          q->ctq_atime = *tsp;
2241 2245  
2242 2246          /*
2243 2247           * If this event may already exist on this queue, check to see if it
2244 2248           * is already there and return if so.
2245 2249           */
2246 2250          if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2247 2251              q->ctq_events.list_offset))) {
2248 2252                  mutex_exit(&q->ctq_lock);
2249 2253                  cte_rele(e);
2250 2254                  return;
2251 2255          }
2252 2256  
2253 2257          /*
2254 2258           * Don't publish if the event is informative and there aren't
2255 2259           * any listeners, or if the queue has been shut down.
2256 2260           */
2257 2261          if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2258 2262              (q->ctq_flags & CTQ_DEAD)) {
2259 2263                  mutex_exit(&q->ctq_lock);
2260 2264                  cte_rele(e);
2261 2265                  return;
2262 2266          }
2263 2267  
2264 2268          /*
2265 2269           * Enqueue event
2266 2270           */
2267 2271          VERIFY(!list_link_active((list_node_t *)
2268 2272              ((uintptr_t)e + q->ctq_events.list_offset)));
2269 2273          list_insert_tail(&q->ctq_events, e);
2270 2274  
2271 2275          /*
2272 2276           * Check for waiting listeners
2273 2277           */
2274 2278          cte_qwakeup(q, e);
2275 2279  
2276 2280          /*
2277 2281           * Trim unnecessary events from the queue.
2278 2282           */
2279 2283          cte_trim(q, NULL);
2280 2284          mutex_exit(&q->ctq_lock);
2281 2285  }
2282 2286  
2283 2287  /*
2284 2288   * cte_publish_all
2285 2289   *
2286 2290   * Publish an event to all necessary event queues.  The event, e, must
2287 2291   * be zallocated by the caller, and the event's flags and type must be
2288 2292   * set.  The rest of the event's fields are initialized here.
2289 2293   */
2290 2294  uint64_t
2291 2295  cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2292 2296  {
2293 2297          ct_equeue_t *q;
2294 2298          timespec_t ts;
2295 2299          uint64_t evid;
2296 2300          ct_kevent_t *negev;
2297 2301          int negend;
2298 2302  
2299 2303          e->cte_contract = ct;
2300 2304          e->cte_data = data;
2301 2305          e->cte_gdata = gdata;
2302 2306          e->cte_refs = 3;
2303 2307          evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid);
2304 2308          contract_hold(ct);
2305 2309  
2306 2310          /*
2307 2311           * For a negotiation event we set the ct->ct_nevent field of the
2308 2312           * contract for the duration of the negotiation
2309 2313           */
2310 2314          negend = 0;
2311 2315          if (e->cte_flags & CTE_NEG) {
2312 2316                  cte_hold(e);
2313 2317                  ct->ct_nevent = e;
2314 2318          } else if (e->cte_type == CT_EV_NEGEND) {
2315 2319                  negend = 1;
2316 2320          }
2317 2321  
2318 2322          gethrestime(&ts);
2319 2323  
2320 2324          /*
2321 2325           * ct_evtlock simply (and only) ensures that two events sent
2322 2326           * from the same contract are delivered to all queues in the
2323 2327           * same order.
2324 2328           */
2325 2329          mutex_enter(&ct->ct_evtlock);
2326 2330  
2327 2331          /*
2328 2332           * CTEL_CONTRACT - First deliver to the contract queue, acking
2329 2333           * the event if the contract has been orphaned.
2330 2334           */
2331 2335          mutex_enter(&ct->ct_lock);
2332 2336          mutex_enter(&ct->ct_events.ctq_lock);
2333 2337          if ((e->cte_flags & CTE_INFO) == 0) {
2334 2338                  if (ct->ct_state >= CTS_ORPHAN)
2335 2339                          e->cte_flags |= CTE_ACK;
2336 2340                  else
2337 2341                          ct->ct_evcnt++;
2338 2342          }
2339 2343          mutex_exit(&ct->ct_lock);
2340 2344          cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2341 2345  
2342 2346          /*
2343 2347           * CTEL_BUNDLE - Next deliver to the contract type's bundle
2344 2348           * queue.
2345 2349           */
2346 2350          mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2347 2351          cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2348 2352  
2349 2353          /*
2350 2354           * CTEL_PBUNDLE - Finally, if the contract has an owner,
2351 2355           * deliver to the owner's process bundle queue.
2352 2356           */
2353 2357          mutex_enter(&ct->ct_lock);
2354 2358          if (ct->ct_owner) {
2355 2359                  /*
2356 2360                   * proc_exit doesn't free event queues until it has
2357 2361                   * abandoned all contracts.
2358 2362                   */
2359 2363                  ASSERT(ct->ct_owner->p_ct_equeue);
2360 2364                  ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2361 2365                  q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2362 2366                  mutex_enter(&q->ctq_lock);
2363 2367                  mutex_exit(&ct->ct_lock);
2364 2368  
2365 2369                  /*
2366 2370                   * It is possible for this code to race with adoption; we
2367 2371                   * publish the event indicating that the event may already
2368 2372                   * be enqueued because adoption beat us to it (in which case
2369 2373                   * cte_pubish() does nothing).
2370 2374                   */
2371 2375                  cte_publish(q, e, &ts, B_TRUE);
2372 2376          } else {
2373 2377                  mutex_exit(&ct->ct_lock);
2374 2378                  cte_rele(e);
2375 2379          }
2376 2380  
2377 2381          if (negend) {
2378 2382                  mutex_enter(&ct->ct_lock);
2379 2383                  negev = ct->ct_nevent;
2380 2384                  ct->ct_nevent = NULL;
2381 2385                  cte_rele(negev);
2382 2386                  mutex_exit(&ct->ct_lock);
2383 2387          }
2384 2388  
2385 2389          mutex_exit(&ct->ct_evtlock);
2386 2390  
2387 2391          return (evid);
2388 2392  }
2389 2393  
2390 2394  /*
2391 2395   * cte_add_listener
2392 2396   *
2393 2397   * Add a new listener to an event queue.
2394 2398   */
2395 2399  void
2396 2400  cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2397 2401  {
2398 2402          cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2399 2403          l->ctl_equeue = q;
2400 2404          l->ctl_position = NULL;
2401 2405          l->ctl_flags = 0;
2402 2406  
2403 2407          mutex_enter(&q->ctq_lock);
2404 2408          list_insert_head(&q->ctq_tail, l);
2405 2409          list_insert_head(&q->ctq_listeners, l);
2406 2410          q->ctq_nlisteners++;
2407 2411          mutex_exit(&q->ctq_lock);
2408 2412  }
2409 2413  
2410 2414  /*
2411 2415   * cte_remove_listener
2412 2416   *
2413 2417   * Remove a listener from an event queue.  No other queue activities
2414 2418   * (e.g. cte_get event) may be in progress at this endpoint when this
2415 2419   * is called.
2416 2420   */
2417 2421  void
2418 2422  cte_remove_listener(ct_listener_t *l)
2419 2423  {
2420 2424          ct_equeue_t *q = l->ctl_equeue;
2421 2425          ct_kevent_t *e;
2422 2426  
2423 2427          mutex_enter(&q->ctq_lock);
2424 2428  
2425 2429          ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2426 2430  
2427 2431          if ((e = l->ctl_position) != NULL)
2428 2432                  cte_qrele(q, l, e);
2429 2433          else
2430 2434                  list_remove(&q->ctq_tail, l);
2431 2435          l->ctl_position = NULL;
2432 2436  
2433 2437          q->ctq_nlisteners--;
2434 2438          list_remove(&q->ctq_listeners, l);
2435 2439  
2436 2440          if (l->ctl_flags & CTLF_RELIABLE)
2437 2441                  q->ctq_nreliable--;
2438 2442  
2439 2443          /*
2440 2444           * If we are a the last listener of a dead reference counted
2441 2445           * queue (i.e. a process bundle) we free it.  Otherwise we just
2442 2446           * trim any events which may have been kept around for our
2443 2447           * benefit.
2444 2448           */
2445 2449          if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2446 2450              (q->ctq_nlisteners == 0)) {
2447 2451                  cte_queue_destroy(q);
2448 2452          } else {
2449 2453                  cte_trim(q, NULL);
2450 2454                  mutex_exit(&q->ctq_lock);
2451 2455          }
2452 2456  }
2453 2457  
2454 2458  /*
2455 2459   * cte_reset_listener
2456 2460   *
2457 2461   * Moves a listener's queue pointer to the beginning of the queue.
2458 2462   */
2459 2463  void
2460 2464  cte_reset_listener(ct_listener_t *l)
2461 2465  {
2462 2466          ct_equeue_t *q = l->ctl_equeue;
2463 2467  
2464 2468          mutex_enter(&q->ctq_lock);
2465 2469  
2466 2470          /*
2467 2471           * We allow an asynchronous reset because it doesn't make a
2468 2472           * whole lot of sense to make reset block or fail.  We already
2469 2473           * have most of the mechanism needed thanks to queue trimming,
2470 2474           * so implementing it isn't a big deal.
2471 2475           */
2472 2476          if (l->ctl_flags & CTLF_COPYOUT)
2473 2477                  l->ctl_flags |= CTLF_RESET;
2474 2478  
2475 2479          (void) cte_qmove(q, l, list_head(&q->ctq_events));
2476 2480  
2477 2481          /*
2478 2482           * Inform blocked readers.
2479 2483           */
2480 2484          cv_broadcast(&l->ctl_cv);
2481 2485          pollwakeup(&l->ctl_pollhead, POLLIN);
2482 2486          mutex_exit(&q->ctq_lock);
2483 2487  }
2484 2488  
2485 2489  /*
2486 2490   * cte_next_event
2487 2491   *
2488 2492   * Moves the event pointer for the specified listener to the next event
2489 2493   * on the queue.  To avoid races, this movement only occurs if the
2490 2494   * specified event id matches that of the current event.  This is used
2491 2495   * primarily to skip events that have been read but whose extended data
2492 2496   * haven't been copied out.
2493 2497   */
2494 2498  int
2495 2499  cte_next_event(ct_listener_t *l, uint64_t id)
2496 2500  {
2497 2501          ct_equeue_t *q = l->ctl_equeue;
2498 2502          ct_kevent_t *old;
2499 2503  
2500 2504          mutex_enter(&q->ctq_lock);
2501 2505  
2502 2506          if (l->ctl_flags & CTLF_COPYOUT)
2503 2507                  l->ctl_flags |= CTLF_RESET;
2504 2508  
2505 2509          if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2506 2510                  (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2507 2511  
2508 2512          mutex_exit(&q->ctq_lock);
2509 2513  
2510 2514          return (0);
2511 2515  }
2512 2516  
2513 2517  /*
2514 2518   * cte_get_event
2515 2519   *
2516 2520   * Reads an event from an event endpoint.  If "nonblock" is clear, we
2517 2521   * block until a suitable event is ready.  If "crit" is set, we only
2518 2522   * read critical events.  Note that while "cr" is the caller's cred,
2519 2523   * "zuniqid" is the unique id of the zone the calling contract
2520 2524   * filesystem was mounted in.
2521 2525   */
2522 2526  int
2523 2527  cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2524 2528      uint64_t zuniqid, int crit)
2525 2529  {
2526 2530          ct_equeue_t *q = l->ctl_equeue;
2527 2531          ct_kevent_t *temp;
2528 2532          int result = 0;
2529 2533          int partial = 0;
2530 2534          size_t size, gsize, len;
2531 2535          model_t mdl = get_udatamodel();
2532 2536          STRUCT_DECL(ct_event, ev);
2533 2537          STRUCT_INIT(ev, mdl);
2534 2538  
2535 2539          /*
2536 2540           * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2537 2541           * that there exists, and we are pointing to, an appropriate
2538 2542           * event.  It may temporarily drop ctq_lock, but that doesn't
2539 2543           * really matter to us.
2540 2544           */
2541 2545          mutex_enter(&q->ctq_lock);
2542 2546          while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2543 2547                  if (nonblock) {
2544 2548                          result = EAGAIN;
2545 2549                          goto error;
2546 2550                  }
2547 2551                  if (q->ctq_flags & CTQ_DEAD) {
2548 2552                          result = EIDRM;
2549 2553                          goto error;
2550 2554                  }
2551 2555                  result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2552 2556                  if (result == 0) {
2553 2557                          result = EINTR;
2554 2558                          goto error;
2555 2559                  }
2556 2560          }
2557 2561          temp = l->ctl_position;
2558 2562          cte_hold(temp);
2559 2563          l->ctl_flags |= CTLF_COPYOUT;
2560 2564          mutex_exit(&q->ctq_lock);
2561 2565  
2562 2566          /*
2563 2567           * We now have an event.  Copy in the user event structure to
2564 2568           * see how much space we have to work with.
2565 2569           */
2566 2570          result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2567 2571          if (result)
2568 2572                  goto copyerr;
2569 2573  
2570 2574          /*
2571 2575           * Determine what data we have and what the user should be
2572 2576           * allowed to see.
2573 2577           */
2574 2578          size = gsize = 0;
2575 2579          if (temp->cte_data) {
2576 2580                  VERIFY(nvlist_size(temp->cte_data, &size,
2577 2581                      NV_ENCODE_NATIVE) == 0);
2578 2582                  ASSERT(size != 0);
2579 2583          }
2580 2584          if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2581 2585                  VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2582 2586                      NV_ENCODE_NATIVE) == 0);
2583 2587                  ASSERT(gsize != 0);
2584 2588          }
2585 2589  
2586 2590          /*
2587 2591           * If we have enough space, copy out the extended event data.
2588 2592           */
2589 2593          len = size + gsize;
2590 2594          if (len) {
2591 2595                  if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2592 2596                          char *buf = kmem_alloc(len, KM_SLEEP);
2593 2597  
2594 2598                          if (size)
2595 2599                                  VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2596 2600                                      NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2597 2601                          if (gsize) {
2598 2602                                  char *tmp = buf + size;
2599 2603  
2600 2604                                  VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2601 2605                                      &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2602 2606                          }
2603 2607  
2604 2608                          /* This shouldn't have changed */
2605 2609                          ASSERT(size + gsize == len);
2606 2610                          result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2607 2611                              len);
2608 2612                          kmem_free(buf, len);
2609 2613                          if (result)
2610 2614                                  goto copyerr;
2611 2615                  } else {
2612 2616                          partial = 1;
2613 2617                  }
2614 2618          }
2615 2619  
2616 2620          /*
2617 2621           * Copy out the common event data.
2618 2622           */
2619 2623          STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2620 2624          STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2621 2625          STRUCT_FSET(ev, ctev_cttype,
2622 2626              temp->cte_contract->ct_type->ct_type_index);
2623 2627          STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2624 2628              (CTE_ACK|CTE_INFO|CTE_NEG));
2625 2629          STRUCT_FSET(ev, ctev_type, temp->cte_type);
2626 2630          STRUCT_FSET(ev, ctev_nbytes, len);
2627 2631          STRUCT_FSET(ev, ctev_goffset, size);
2628 2632          result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2629 2633  
2630 2634  copyerr:
2631 2635          /*
2632 2636           * Only move our location in the queue if all copyouts were
2633 2637           * successful, the caller provided enough space for the entire
2634 2638           * event, and our endpoint wasn't reset or otherwise moved by
2635 2639           * another thread.
2636 2640           */
2637 2641          mutex_enter(&q->ctq_lock);
2638 2642          if (result)
2639 2643                  result = EFAULT;
2640 2644          else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2641 2645              (l->ctl_position == temp))
2642 2646                  (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2643 2647          l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2644 2648          /*
2645 2649           * Signal any readers blocked on our CTLF_COPYOUT.
2646 2650           */
2647 2651          cv_signal(&l->ctl_cv);
2648 2652          cte_rele(temp);
2649 2653  
2650 2654  error:
2651 2655          mutex_exit(&q->ctq_lock);
2652 2656          return (result);
2653 2657  }
2654 2658  
2655 2659  /*
2656 2660   * cte_set_reliable
2657 2661   *
2658 2662   * Requests that events be reliably delivered to an event endpoint.
2659 2663   * Unread informative and acknowledged critical events will not be
2660 2664   * removed from the queue until this listener reads or skips them.
2661 2665   * Because a listener could maliciously request reliable delivery and
2662 2666   * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2663 2667   * caller's effective set.
2664 2668   */
2665 2669  int
2666 2670  cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2667 2671  {
2668 2672          ct_equeue_t *q = l->ctl_equeue;
2669 2673          int error;
2670 2674  
2671 2675          if ((error = secpolicy_contract_event(cr)) != 0)
2672 2676                  return (error);
2673 2677  
2674 2678          mutex_enter(&q->ctq_lock);
2675 2679          if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2676 2680                  l->ctl_flags |= CTLF_RELIABLE;
2677 2681                  q->ctq_nreliable++;
2678 2682                  if (l->ctl_position != NULL)
2679 2683                          l->ctl_position->cte_nodes[q->ctq_listno].
2680 2684                              ctm_nreliable++;
2681 2685          }
2682 2686          mutex_exit(&q->ctq_lock);
2683 2687  
2684 2688          return (0);
2685 2689  }
  
    | 
      ↓ open down ↓ | 
    2385 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX