Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/contract.c
          +++ new/usr/src/uts/common/os/contract.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright 2016 Joyent, Inc.
  25   25   */
  26   26  
  27   27  /*
  28   28   * Contracts
  29   29   * ---------
  30   30   *
  31   31   * Contracts are a primitive which enrich the relationships between
  32   32   * processes and system resources.  The primary purpose of contracts is
  33   33   * to provide a means for the system to negotiate the departure from a
  34   34   * binding relationship (e.g. pages locked in memory or a thread bound
  35   35   * to processor), but they can also be used as a purely asynchronous
  36   36   * error reporting mechanism as they are with process contracts.
  37   37   *
  38   38   * More information on how one interfaces with contracts and what
  39   39   * contracts can do for you can be found in:
  40   40   *   PSARC 2003/193 Solaris Contracts
  41   41   *   PSARC 2004/460 Contracts addendum
  42   42   *
  43   43   * This file contains the core contracts framework.  By itself it is
  44   44   * useless: it depends the contracts filesystem (ctfs) to provide an
  45   45   * interface to user processes and individual contract types to
  46   46   * implement the process/resource relationships.
  47   47   *
  48   48   * Data structure overview
  49   49   * -----------------------
  50   50   *
  51   51   * A contract is represented by a contract_t, which itself points to an
  52   52   * encapsulating contract-type specific contract object.  A contract_t
  53   53   * contains the contract's static identity (including its terms), its
  54   54   * linkage to various bookkeeping structures, the contract-specific
  55   55   * event queue, and a reference count.
  56   56   *
  57   57   * A contract template is represented by a ct_template_t, which, like a
  58   58   * contract, points to an encapsulating contract-type specific template
  59   59   * object.  A ct_template_t contains the template's terms.
  60   60   *
  61   61   * An event queue is represented by a ct_equeue_t, and consists of a
  62   62   * list of events, a list of listeners, and a list of listeners who are
  63   63   * waiting for new events (affectionately referred to as "tail
  64   64   * listeners").  There are three queue types, defined by ct_listnum_t
  65   65   * (an enum).  An event may be on one of each type of queue
  66   66   * simultaneously; the list linkage used by a queue is determined by
  67   67   * its type.
  68   68   *
  69   69   * An event is represented by a ct_kevent_t, which contains mostly
  70   70   * static event data (e.g. id, payload).  It also has an array of
  71   71   * ct_member_t structures, each of which contains a list_node_t and
  72   72   * represent the event's linkage in a specific event queue.
  73   73   *
  74   74   * Each open of an event endpoint results in the creation of a new
  75   75   * listener, represented by a ct_listener_t.  In addition to linkage
  76   76   * into the aforementioned lists in the event_queue, a ct_listener_t
  77   77   * contains a pointer to the ct_kevent_t it is currently positioned at
  78   78   * as well as a set of status flags and other administrative data.
  79   79   *
  80   80   * Each process has a list of contracts it owns, p_ct_held; a pointer
  81   81   * to the process contract it is a member of, p_ct_process; the linkage
  82   82   * for that membership, p_ct_member; and an array of event queue
  83   83   * structures representing the process bundle queues.
  84   84   *
  85   85   * Each LWP has an array of its active templates, lwp_ct_active; and
  86   86   * the most recently created contracts, lwp_ct_latest.
  87   87   *
  88   88   * A process contract has a list of member processes and a list of
  89   89   * inherited contracts.
  90   90   *
  91   91   * There is a system-wide list of all contracts, as well as per-type
  92   92   * lists of contracts.
  93   93   *
  94   94   * Lock ordering overview
  95   95   * ----------------------
  96   96   *
  97   97   * Locks at the top are taken first:
  98   98   *
  99   99   *                   ct_evtlock
 100  100   *                   regent ct_lock
 101  101   *                   member ct_lock
 102  102   *                   pidlock
 103  103   *                   p_lock
 104  104   *    contract ctq_lock         contract_lock
 105  105   *    pbundle ctq_lock
 106  106   *    cte_lock
 107  107   *                   ct_reflock
 108  108   *
 109  109   * contract_lock and ctq_lock/cte_lock are not currently taken at the
 110  110   * same time.
 111  111   *
 112  112   * Reference counting and locking
 113  113   * ------------------------------
 114  114   *
 115  115   * A contract has a reference count, protected by ct_reflock.
 116  116   * (ct_reflock is also used in a couple other places where atomic
 117  117   * access to a variable is needed in an innermost context).  A process
 118  118   * maintains a hold on each contract it owns.  A process contract has a
 119  119   * hold on each contract is has inherited.  Each event has a hold on
 120  120   * the contract which generated it.  Process contract templates have
 121  121   * holds on the contracts referred to by their transfer terms.  CTFS
 122  122   * contract directory nodes have holds on contracts.  Lastly, various
 123  123   * code paths may temporarily take holds on contracts to prevent them
 124  124   * from disappearing while other processing is going on.  It is
 125  125   * important to note that the global contract lists do not hold
 126  126   * references on contracts; a contract is removed from these structures
 127  127   * atomically with the release of its last reference.
 128  128   *
 129  129   * At a given point in time, a contract can either be owned by a
 130  130   * process, inherited by a regent process contract, or orphaned.  A
 131  131   * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
 132  132   * protected by its ct_lock.  The linkage in the holder's (holder =
 133  133   * owner or regent) list of contracts, ct_ctlist, is protected by
 134  134   * whatever lock protects the holder's data structure.  In order for
 135  135   * these two directions to remain consistent, changing the holder of a
 136  136   * contract requires that both locks be held.
 137  137   *
 138  138   * Events also have reference counts.  There is one hold on an event
 139  139   * per queue it is present on, in addition to those needed for the
 140  140   * usual sundry reasons.  Individual listeners are associated with
 141  141   * specific queues, and increase a queue-specific reference count
 142  142   * stored in the ct_member_t structure.
 143  143   *
 144  144   * The dynamic contents of an event (reference count and flags) are
 145  145   * protected by its cte_lock, while the contents of the embedded
 146  146   * ct_member_t structures are protected by the locks of the queues they
 147  147   * are linked into.  A ct_listener_t's contents are also protected by
 148  148   * its event queue's ctq_lock.
 149  149   *
 150  150   * Resource controls
 151  151   * -----------------
 152  152   *
 153  153   * Control:      project.max-contracts (rc_project_contract)
 154  154   * Description:  Maximum number of contracts allowed a project.
 155  155   *
 156  156   *   When a contract is created, the project's allocation is tested and
 157  157   *   (assuming success) increased.  When the last reference to a
 158  158   *   contract is released, the creating project's allocation is
 159  159   *   decreased.
 160  160   */
 161  161  
 162  162  #include <sys/mutex.h>
 163  163  #include <sys/debug.h>
 164  164  #include <sys/types.h>
 165  165  #include <sys/param.h>
 166  166  #include <sys/kmem.h>
 167  167  #include <sys/thread.h>
 168  168  #include <sys/id_space.h>
 169  169  #include <sys/avl.h>
 170  170  #include <sys/list.h>
 171  171  #include <sys/sysmacros.h>
 172  172  #include <sys/proc.h>
 173  173  #include <sys/ctfs.h>
 174  174  #include <sys/contract_impl.h>
 175  175  #include <sys/contract/process_impl.h>
 176  176  #include <sys/dditypes.h>
 177  177  #include <sys/contract/device_impl.h>
 178  178  #include <sys/systm.h>
 179  179  #include <sys/atomic.h>
 180  180  #include <sys/cmn_err.h>
 181  181  #include <sys/model.h>
 182  182  #include <sys/policy.h>
 183  183  #include <sys/zone.h>
 184  184  #include <sys/task.h>
 185  185  #include <sys/ddi.h>
 186  186  #include <sys/sunddi.h>
 187  187  
 188  188  extern rctl_hndl_t rc_project_contract;
 189  189  
 190  190  static id_space_t       *contract_ids;
 191  191  static avl_tree_t       contract_avl;
 192  192  static kmutex_t         contract_lock;
 193  193  
 194  194  int                     ct_ntypes = CTT_MAXTYPE;
 195  195  static ct_type_t        *ct_types_static[CTT_MAXTYPE];
 196  196  ct_type_t               **ct_types = ct_types_static;
 197  197  int                     ct_debug;
 198  198  
 199  199  static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
 200  200  static void cte_queue_destroy(ct_equeue_t *);
 201  201  static void cte_queue_drain(ct_equeue_t *, int);
 202  202  static void cte_trim(ct_equeue_t *, contract_t *);
 203  203  static void cte_copy(ct_equeue_t *, ct_equeue_t *);
 204  204  
 205  205  /*
 206  206   * contract_compar
 207  207   *
 208  208   * A contract comparator which sorts on contract ID.
 209  209   */
 210  210  int
 211  211  contract_compar(const void *x, const void *y)
 212  212  {
 213  213          const contract_t *ct1 = x;
 214  214          const contract_t *ct2 = y;
 215  215  
 216  216          if (ct1->ct_id < ct2->ct_id)
 217  217                  return (-1);
 218  218          if (ct1->ct_id > ct2->ct_id)
 219  219                  return (1);
 220  220          return (0);
 221  221  }
 222  222  
 223  223  /*
 224  224   * contract_init
 225  225   *
 226  226   * Initializes the contract subsystem, the specific contract types, and
 227  227   * process 0.
 228  228   */
 229  229  void
 230  230  contract_init(void)
 231  231  {
 232  232          /*
 233  233           * Initialize contract subsystem.
 234  234           */
 235  235          contract_ids = id_space_create("contracts", 1, INT_MAX);
 236  236          avl_create(&contract_avl, contract_compar, sizeof (contract_t),
 237  237              offsetof(contract_t, ct_ctavl));
 238  238          mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
 239  239  
 240  240          /*
 241  241           * Initialize contract types.
 242  242           */
 243  243          contract_process_init();
 244  244          contract_device_init();
 245  245  
 246  246          /*
 247  247           * Initialize p0/lwp0 contract state.
 248  248           */
 249  249          avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
 250  250              offsetof(contract_t, ct_ctlist));
 251  251  }
 252  252  
 253  253  /*
 254  254   * contract_dtor
 255  255   *
 256  256   * Performs basic destruction of the common portions of a contract.
 257  257   * Called from the failure path of contract_ctor and from
 258  258   * contract_rele.
 259  259   */
 260  260  static void
 261  261  contract_dtor(contract_t *ct)
 262  262  {
 263  263          cte_queue_destroy(&ct->ct_events);
 264  264          list_destroy(&ct->ct_vnodes);
 265  265          mutex_destroy(&ct->ct_reflock);
 266  266          mutex_destroy(&ct->ct_lock);
 267  267          mutex_destroy(&ct->ct_evtlock);
 268  268  }
 269  269  
 270  270  /*
 271  271   * contract_ctor
 272  272   *
 273  273   * Called by a contract type to initialize a contract.  Fails if the
 274  274   * max-contract resource control would have been exceeded.  After a
 275  275   * successful call to contract_ctor, the contract is unlocked and
 276  276   * visible in all namespaces; any type-specific initialization should
 277  277   * be completed before calling contract_ctor.  Returns 0 on success.
 278  278   *
 279  279   * Because not all callers can tolerate failure, a 0 value for canfail
 280  280   * instructs contract_ctor to ignore the project.max-contracts resource
 281  281   * control.  Obviously, this "out" should only be employed by callers
 282  282   * who are sufficiently constrained in other ways (e.g. newproc).
 283  283   */
 284  284  int
 285  285  contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
 286  286      ctflags_t flags, proc_t *author, int canfail)
 287  287  {
 288  288          avl_index_t where;
 289  289          klwp_t *curlwp = ttolwp(curthread);
 290  290  
 291  291          /*
 292  292           * It's possible that author is not curproc if the zone is creating
 293  293           * a new process as a child of zsched.
 294  294           */
 295  295  
 296  296          mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
 297  297          mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
 298  298          mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
 299  299          ct->ct_id = id_alloc(contract_ids);
 300  300  
 301  301          cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
 302  302          list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
 303  303              offsetof(contract_vnode_t, ctv_node));
 304  304  
 305  305          /*
 306  306           * Instance data
 307  307           */
 308  308          ct->ct_ref = 2;         /* one for the holder, one for "latest" */
 309  309          ct->ct_cuid = crgetuid(CRED());
 310  310          ct->ct_type = type;
 311  311          ct->ct_data = data;
 312  312          gethrestime(&ct->ct_ctime);
 313  313          ct->ct_state = CTS_OWNED;
 314  314          ct->ct_flags = flags;
 315  315          ct->ct_regent = author->p_ct_process ?
 316  316              &author->p_ct_process->conp_contract : NULL;
 317  317          ct->ct_ev_info = tmpl->ctmpl_ev_info;
 318  318          ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
 319  319          ct->ct_cookie = tmpl->ctmpl_cookie;
 320  320          ct->ct_owner = author;
 321  321          ct->ct_ntime.ctm_total = -1;
 322  322          ct->ct_qtime.ctm_total = -1;
 323  323          ct->ct_nevent = NULL;
 324  324  
 325  325          /*
 326  326           * Test project.max-contracts.
 327  327           */
 328  328          mutex_enter(&author->p_lock);
 329  329          mutex_enter(&contract_lock);
 330  330          if (canfail && rctl_test(rc_project_contract,
 331  331              author->p_task->tk_proj->kpj_rctls, author, 1,
 332  332              RCA_SAFE) & RCT_DENY) {
 333  333                  id_free(contract_ids, ct->ct_id);
 334  334                  mutex_exit(&contract_lock);
 335  335                  mutex_exit(&author->p_lock);
 336  336                  ct->ct_events.ctq_flags |= CTQ_DEAD;
 337  337                  contract_dtor(ct);
 338  338                  return (1);
 339  339          }
 340  340          ct->ct_proj = author->p_task->tk_proj;
 341  341          ct->ct_proj->kpj_data.kpd_contract++;
 342  342          (void) project_hold(ct->ct_proj);
 343  343          mutex_exit(&contract_lock);
 344  344  
 345  345          /*
 346  346           * Insert into holder's avl of contracts.
 347  347           * We use an avl not because order is important, but because
 348  348           * readdir of /proc/contracts requires we be able to use a
 349  349           * scalar as an index into the process's list of contracts
 350  350           */
 351  351          ct->ct_zoneid = author->p_zone->zone_id;
 352  352          ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
 353  353          VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
 354  354          avl_insert(&author->p_ct_held, ct, where);
 355  355          mutex_exit(&author->p_lock);
 356  356  
 357  357          /*
 358  358           * Insert into global contract AVL
 359  359           */
 360  360          mutex_enter(&contract_lock);
 361  361          VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
 362  362          avl_insert(&contract_avl, ct, where);
 363  363          mutex_exit(&contract_lock);
 364  364  
 365  365          /*
 366  366           * Insert into type AVL
 367  367           */
 368  368          mutex_enter(&type->ct_type_lock);
 369  369          VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
 370  370          avl_insert(&type->ct_type_avl, ct, where);
 371  371          type->ct_type_timestruc = ct->ct_ctime;
 372  372          mutex_exit(&type->ct_type_lock);
 373  373  
 374  374          if (curlwp->lwp_ct_latest[type->ct_type_index])
 375  375                  contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
 376  376          curlwp->lwp_ct_latest[type->ct_type_index] = ct;
 377  377  
 378  378          return (0);
 379  379  }
 380  380  
 381  381  /*
 382  382   * contract_rele
 383  383   *
 384  384   * Releases a reference to a contract.  If the caller had the last
 385  385   * reference, the contract is removed from all namespaces, its
 386  386   * allocation against the max-contracts resource control is released,
 387  387   * and the contract type's free entry point is invoked for any
 388  388   * type-specific deconstruction and to (presumably) free the object.
 389  389   */
 390  390  void
 391  391  contract_rele(contract_t *ct)
 392  392  {
 393  393          uint64_t nref;
 394  394  
 395  395          mutex_enter(&ct->ct_reflock);
 396  396          ASSERT(ct->ct_ref > 0);
 397  397          nref = --ct->ct_ref;
 398  398          mutex_exit(&ct->ct_reflock);
 399  399          if (nref == 0) {
 400  400                  /*
 401  401                   * ct_owner is cleared when it drops its reference.
 402  402                   */
 403  403                  ASSERT(ct->ct_owner == NULL);
 404  404                  ASSERT(ct->ct_evcnt == 0);
 405  405  
 406  406                  /*
 407  407                   * Remove from global contract AVL
 408  408                   */
 409  409                  mutex_enter(&contract_lock);
 410  410                  avl_remove(&contract_avl, ct);
 411  411                  mutex_exit(&contract_lock);
 412  412  
 413  413                  /*
 414  414                   * Remove from type AVL
 415  415                   */
 416  416                  mutex_enter(&ct->ct_type->ct_type_lock);
 417  417                  avl_remove(&ct->ct_type->ct_type_avl, ct);
 418  418                  mutex_exit(&ct->ct_type->ct_type_lock);
 419  419  
 420  420                  /*
 421  421                   * Release the contract's ID
 422  422                   */
 423  423                  id_free(contract_ids, ct->ct_id);
 424  424  
 425  425                  /*
 426  426                   * Release project hold
 427  427                   */
 428  428                  mutex_enter(&contract_lock);
 429  429                  ct->ct_proj->kpj_data.kpd_contract--;
 430  430                  project_rele(ct->ct_proj);
 431  431                  mutex_exit(&contract_lock);
 432  432  
 433  433                  /*
 434  434                   * Free the contract
 435  435                   */
 436  436                  contract_dtor(ct);
 437  437                  ct->ct_type->ct_type_ops->contop_free(ct);
 438  438          }
 439  439  }
 440  440  
 441  441  /*
 442  442   * contract_hold
 443  443   *
 444  444   * Adds a reference to a contract
 445  445   */
 446  446  void
 447  447  contract_hold(contract_t *ct)
 448  448  {
 449  449          mutex_enter(&ct->ct_reflock);
 450  450          ASSERT(ct->ct_ref < UINT64_MAX);
 451  451          ct->ct_ref++;
 452  452          mutex_exit(&ct->ct_reflock);
 453  453  }
 454  454  
 455  455  /*
 456  456   * contract_getzuniqid
 457  457   *
 458  458   * Get a contract's zone unique ID.  Needed because 64-bit reads and
 459  459   * writes aren't atomic on x86.  Since there are contexts where we are
 460  460   * unable to take ct_lock, we instead use ct_reflock; in actuality any
 461  461   * lock would do.
 462  462   */
 463  463  uint64_t
 464  464  contract_getzuniqid(contract_t *ct)
 465  465  {
 466  466          uint64_t zuniqid;
 467  467  
 468  468          mutex_enter(&ct->ct_reflock);
 469  469          zuniqid = ct->ct_mzuniqid;
 470  470          mutex_exit(&ct->ct_reflock);
 471  471  
 472  472          return (zuniqid);
 473  473  }
 474  474  
 475  475  /*
 476  476   * contract_setzuniqid
 477  477   *
 478  478   * Sets a contract's zone unique ID.   See contract_getzuniqid.
 479  479   */
 480  480  void
 481  481  contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
 482  482  {
 483  483          mutex_enter(&ct->ct_reflock);
 484  484          ct->ct_mzuniqid = zuniqid;
 485  485          mutex_exit(&ct->ct_reflock);
 486  486  }
 487  487  
 488  488  /*
 489  489   * contract_abandon
 490  490   *
 491  491   * Abandons the specified contract.  If "explicit" is clear, the
 492  492   * contract was implicitly abandoned (by process exit) and should be
 493  493   * inherited if its terms allow it and its owner was a member of a
 494  494   * regent contract.  Otherwise, the contract type's abandon entry point
 495  495   * is invoked to either destroy or orphan the contract.
 496  496   */
 497  497  int
 498  498  contract_abandon(contract_t *ct, proc_t *p, int explicit)
 499  499  {
 500  500          ct_equeue_t *q = NULL;
 501  501          contract_t *parent = &p->p_ct_process->conp_contract;
 502  502          int inherit = 0;
 503  503  
 504  504          VERIFY(p == curproc);
 505  505  
 506  506          mutex_enter(&ct->ct_lock);
 507  507  
 508  508          /*
 509  509           * Multiple contract locks are taken contract -> subcontract.
 510  510           * Check if the contract will be inherited so we can acquire
 511  511           * all the necessary locks before making sensitive changes.
 512  512           */
 513  513          if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
 514  514              contract_process_accept(parent)) {
 515  515                  mutex_exit(&ct->ct_lock);
 516  516                  mutex_enter(&parent->ct_lock);
 517  517                  mutex_enter(&ct->ct_lock);
 518  518                  inherit = 1;
 519  519          }
 520  520  
 521  521          if (ct->ct_owner != p) {
 522  522                  mutex_exit(&ct->ct_lock);
 523  523                  if (inherit)
 524  524                          mutex_exit(&parent->ct_lock);
 525  525                  return (EINVAL);
 526  526          }
 527  527  
 528  528          mutex_enter(&p->p_lock);
 529  529          if (explicit)
 530  530                  avl_remove(&p->p_ct_held, ct);
 531  531          ct->ct_owner = NULL;
 532  532          mutex_exit(&p->p_lock);
 533  533  
 534  534          /*
 535  535           * Since we can't call cte_trim with the contract lock held,
 536  536           * we grab the queue pointer here.
 537  537           */
 538  538          if (p->p_ct_equeue)
 539  539                  q = p->p_ct_equeue[ct->ct_type->ct_type_index];
 540  540  
 541  541          /*
 542  542           * contop_abandon may destroy the contract so we rely on it to
 543  543           * drop ct_lock.  We retain a reference on the contract so that
 544  544           * the cte_trim which follows functions properly.  Even though
 545  545           * cte_trim doesn't dereference the contract pointer, it is
 546  546           * still necessary to retain a reference to the contract so
 547  547           * that we don't trim events which are sent by a subsequently
 548  548           * allocated contract infortuitously located at the same address.
 549  549           */
 550  550          contract_hold(ct);
 551  551  
 552  552          if (inherit) {
 553  553                  ct->ct_state = CTS_INHERITED;
 554  554                  VERIFY(ct->ct_regent == parent);
 555  555                  contract_process_take(parent, ct);
 556  556  
 557  557                  /*
 558  558                   * We are handing off the process's reference to the
 559  559                   * parent contract.  For this reason, the order in
 560  560                   * which we drop the contract locks is also important.
 561  561                   */
 562  562                  mutex_exit(&ct->ct_lock);
 563  563                  mutex_exit(&parent->ct_lock);
 564  564          } else {
 565  565                  ct->ct_regent = NULL;
 566  566                  ct->ct_type->ct_type_ops->contop_abandon(ct);
 567  567          }
 568  568  
 569  569          /*
 570  570           * ct_lock has been dropped; we can safely trim the event
 571  571           * queue now.
 572  572           */
 573  573          if (q) {
 574  574                  mutex_enter(&q->ctq_lock);
 575  575                  cte_trim(q, ct);
 576  576                  mutex_exit(&q->ctq_lock);
 577  577          }
 578  578  
 579  579          contract_rele(ct);
 580  580  
 581  581          return (0);
 582  582  }
 583  583  
 584  584  int
 585  585  contract_newct(contract_t *ct)
 586  586  {
 587  587          return (ct->ct_type->ct_type_ops->contop_newct(ct));
 588  588  }
 589  589  
 590  590  /*
 591  591   * contract_adopt
 592  592   *
 593  593   * Adopts a contract.  After a successful call to this routine, the
 594  594   * previously inherited contract will belong to the calling process,
 595  595   * and its events will have been appended to its new owner's process
 596  596   * bundle queue.
 597  597   */
 598  598  int
 599  599  contract_adopt(contract_t *ct, proc_t *p)
 600  600  {
 601  601          avl_index_t where;
 602  602          ct_equeue_t *q;
 603  603          contract_t *parent;
 604  604  
 605  605          ASSERT(p == curproc);
 606  606  
 607  607          /*
 608  608           * Ensure the process has an event queue.  Checked by ASSERTs
 609  609           * below.
 610  610           */
 611  611          (void) contract_type_pbundle(ct->ct_type, p);
 612  612  
 613  613          mutex_enter(&ct->ct_lock);
 614  614          parent = ct->ct_regent;
 615  615          if (ct->ct_state != CTS_INHERITED ||
 616  616              &p->p_ct_process->conp_contract != parent ||
 617  617              p->p_zone->zone_uniqid != ct->ct_czuniqid) {
 618  618                  mutex_exit(&ct->ct_lock);
 619  619                  return (EINVAL);
 620  620          }
 621  621  
 622  622          /*
 623  623           * Multiple contract locks are taken contract -> subcontract.
 624  624           */
 625  625          mutex_exit(&ct->ct_lock);
 626  626          mutex_enter(&parent->ct_lock);
 627  627          mutex_enter(&ct->ct_lock);
 628  628  
 629  629          /*
 630  630           * It is possible that the contract was adopted by someone else
 631  631           * while its lock was dropped.  It isn't possible for the
 632  632           * contract to have been inherited by a different regent
 633  633           * contract.
 634  634           */
 635  635          if (ct->ct_state != CTS_INHERITED) {
 636  636                  mutex_exit(&parent->ct_lock);
 637  637                  mutex_exit(&ct->ct_lock);
 638  638                  return (EBUSY);
 639  639          }
 640  640          ASSERT(ct->ct_regent == parent);
 641  641  
 642  642          ct->ct_state = CTS_OWNED;
 643  643  
 644  644          contract_process_adopt(ct, p);
 645  645  
 646  646          mutex_enter(&p->p_lock);
 647  647          ct->ct_owner = p;
 648  648          VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
 649  649          avl_insert(&p->p_ct_held, ct, where);
 650  650          mutex_exit(&p->p_lock);
 651  651  
 652  652          ASSERT(ct->ct_owner->p_ct_equeue);
 653  653          ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
 654  654          q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
 655  655          cte_copy(&ct->ct_events, q);
 656  656          mutex_exit(&ct->ct_lock);
 657  657  
 658  658          return (0);
 659  659  }
 660  660  
 661  661  /*
 662  662   * contract_ack
 663  663   *
 664  664   * Acknowledges receipt of a critical event.
 665  665   */
 666  666  int
 667  667  contract_ack(contract_t *ct, uint64_t evid, int ack)
 668  668  {
 669  669          ct_kevent_t *ev;
 670  670          list_t *queue = &ct->ct_events.ctq_events;
 671  671          int error = ESRCH;
 672  672          int nego = 0;
 673  673          uint_t evtype;
 674  674  
 675  675          ASSERT(ack == CT_ACK || ack == CT_NACK);
 676  676  
 677  677          mutex_enter(&ct->ct_lock);
 678  678          mutex_enter(&ct->ct_events.ctq_lock);
 679  679          /*
 680  680           * We are probably ACKing something near the head of the queue.
 681  681           */
 682  682          for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 683  683                  if (ev->cte_id == evid) {
 684  684                          if (ev->cte_flags & CTE_NEG)
 685  685                                  nego = 1;
 686  686                          else if (ack == CT_NACK)
 687  687                                  break;
 688  688                          if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 689  689                                  ev->cte_flags |= CTE_ACK;
 690  690                                  ct->ct_evcnt--;
 691  691                                  evtype = ev->cte_type;
 692  692                                  error = 0;
 693  693                          }
 694  694                          break;
 695  695                  }
 696  696          }
 697  697          mutex_exit(&ct->ct_events.ctq_lock);
 698  698          mutex_exit(&ct->ct_lock);
 699  699  
 700  700          /*
 701  701           * Not all critical events are negotiation events, however
 702  702           * every negotiation event is a critical event. NEGEND events
 703  703           * are critical events but are not negotiation events
 704  704           */
 705  705          if (error || !nego)
 706  706                  return (error);
 707  707  
 708  708          if (ack == CT_ACK)
 709  709                  error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
 710  710          else
 711  711                  error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
 712  712  
 713  713          return (error);
 714  714  }
 715  715  
 716  716  /*ARGSUSED*/
 717  717  int
 718  718  contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 719  719  {
 720  720          cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 721  721              ct->ct_id);
 722  722          return (ENOSYS);
 723  723  }
 724  724  
 725  725  /*ARGSUSED*/
 726  726  int
 727  727  contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
 728  728  {
 729  729          cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
 730  730              ct->ct_id);
 731  731          return (ENOSYS);
 732  732  }
 733  733  
 734  734  /*ARGSUSED*/
 735  735  int
 736  736  contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
 737  737  {
 738  738          return (ERANGE);
 739  739  }
 740  740  
 741  741  /*
 742  742   * contract_qack
 743  743   *
 744  744   * Asks that negotiations be extended by another time quantum
 745  745   */
 746  746  int
 747  747  contract_qack(contract_t *ct, uint64_t evid)
 748  748  {
 749  749          ct_kevent_t *ev;
 750  750          list_t *queue = &ct->ct_events.ctq_events;
 751  751          int nego = 0;
 752  752          uint_t evtype;
 753  753  
 754  754          mutex_enter(&ct->ct_lock);
 755  755          mutex_enter(&ct->ct_events.ctq_lock);
 756  756  
 757  757          for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 758  758                  if (ev->cte_id == evid) {
 759  759                          if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
 760  760                                  evtype = ev->cte_type;
 761  761                                  nego = 1;
 762  762                          }
 763  763                          break;
 764  764                  }
 765  765          }
 766  766          mutex_exit(&ct->ct_events.ctq_lock);
 767  767          mutex_exit(&ct->ct_lock);
 768  768  
 769  769          /*
 770  770           * Only a negotiated event (which is by definition also a critical
 771  771           * event) which has not yet been acknowledged can provide
 772  772           * time quanta to a negotiating owner process.
 773  773           */
 774  774          if (!nego)
 775  775                  return (ESRCH);
 776  776  
 777  777          return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
 778  778  }
 779  779  
 780  780  /*
 781  781   * contract_orphan
 782  782   *
 783  783   * Icky-poo.  This is a process-contract special, used to ACK all
 784  784   * critical messages when a contract is orphaned.
 785  785   */
 786  786  void
 787  787  contract_orphan(contract_t *ct)
 788  788  {
 789  789          ct_kevent_t *ev;
 790  790          list_t *queue = &ct->ct_events.ctq_events;
 791  791  
 792  792          ASSERT(MUTEX_HELD(&ct->ct_lock));
 793  793          ASSERT(ct->ct_state != CTS_ORPHAN);
 794  794  
 795  795          mutex_enter(&ct->ct_events.ctq_lock);
 796  796          ct->ct_state = CTS_ORPHAN;
 797  797          for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
 798  798                  if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 799  799                          ev->cte_flags |= CTE_ACK;
 800  800                          ct->ct_evcnt--;
 801  801                  }
 802  802          }
 803  803          mutex_exit(&ct->ct_events.ctq_lock);
 804  804  
 805  805          ASSERT(ct->ct_evcnt == 0);
 806  806  }
 807  807  
 808  808  /*
 809  809   * contract_destroy
 810  810   *
 811  811   * Explicit contract destruction.  Called when contract is empty.
 812  812   * The contract will actually stick around until all of its events are
 813  813   * removed from the bundle and and process bundle queues, and all fds
 814  814   * which refer to it are closed.  See contract_dtor if you are looking
 815  815   * for what destroys the contract structure.
 816  816   */
 817  817  void
 818  818  contract_destroy(contract_t *ct)
 819  819  {
 820  820          ASSERT(MUTEX_HELD(&ct->ct_lock));
 821  821          ASSERT(ct->ct_state != CTS_DEAD);
 822  822          ASSERT(ct->ct_owner == NULL);
 823  823  
 824  824          ct->ct_state = CTS_DEAD;
 825  825          cte_queue_drain(&ct->ct_events, 1);
 826  826          mutex_exit(&ct->ct_lock);
 827  827          mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
 828  828          cte_trim(&ct->ct_type->ct_type_events, ct);
 829  829          mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
 830  830          mutex_enter(&ct->ct_lock);
 831  831          ct->ct_type->ct_type_ops->contop_destroy(ct);
 832  832          mutex_exit(&ct->ct_lock);
 833  833          contract_rele(ct);
 834  834  }
 835  835  
 836  836  /*
 837  837   * contract_vnode_get
 838  838   *
 839  839   * Obtains the contract directory vnode for this contract, if there is
 840  840   * one.  The caller must VN_RELE the vnode when they are through using
 841  841   * it.
 842  842   */
 843  843  vnode_t *
 844  844  contract_vnode_get(contract_t *ct, vfs_t *vfsp)
 845  845  {
 846  846          contract_vnode_t *ctv;
 847  847          vnode_t *vp = NULL;
 848  848  
 849  849          mutex_enter(&ct->ct_lock);
 850  850          for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
 851  851              ctv = list_next(&ct->ct_vnodes, ctv))
 852  852                  if (ctv->ctv_vnode->v_vfsp == vfsp) {
 853  853                          vp = ctv->ctv_vnode;
 854  854                          VN_HOLD(vp);
 855  855                          break;
 856  856                  }
 857  857          mutex_exit(&ct->ct_lock);
 858  858          return (vp);
 859  859  }
 860  860  
 861  861  /*
 862  862   * contract_vnode_set
 863  863   *
 864  864   * Sets the contract directory vnode for this contract.  We don't hold
 865  865   * a reference on the vnode because we don't want to prevent it from
 866  866   * being freed.  The vnode's inactive entry point will take care of
 867  867   * notifying us when it should be removed.
 868  868   */
 869  869  void
 870  870  contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
 871  871  {
 872  872          mutex_enter(&ct->ct_lock);
 873  873          ctv->ctv_vnode = vnode;
 874  874          list_insert_head(&ct->ct_vnodes, ctv);
 875  875          mutex_exit(&ct->ct_lock);
 876  876  }
 877  877  
 878  878  /*
 879  879   * contract_vnode_clear
 880  880   *
 881  881   * Removes this vnode as the contract directory vnode for this
 882  882   * contract.  Called from a contract directory's inactive entry point,
 883  883   * this may return 0 indicating that the vnode gained another reference
 884  884   * because of a simultaneous call to contract_vnode_get.
 885  885   */
 886  886  int
 887  887  contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
 888  888  {
 889  889          vnode_t *vp = ctv->ctv_vnode;
 890  890          int result;
 891  891  
 892  892          mutex_enter(&ct->ct_lock);
 893  893          mutex_enter(&vp->v_lock);
 894  894          if (vp->v_count == 1) {
 895  895                  list_remove(&ct->ct_vnodes, ctv);
 896  896                  result = 1;
 897  897          } else {
 898  898                  vp->v_count--;
 899  899                  result = 0;
 900  900          }
 901  901          mutex_exit(&vp->v_lock);
 902  902          mutex_exit(&ct->ct_lock);
 903  903  
 904  904          return (result);
 905  905  }
 906  906  
 907  907  /*
 908  908   * contract_exit
 909  909   *
 910  910   * Abandons all contracts held by process p, and drains process p's
 911  911   * bundle queues.  Called on process exit.
 912  912   */
 913  913  void
 914  914  contract_exit(proc_t *p)
 915  915  {
 916  916          contract_t *ct;
 917  917          void *cookie = NULL;
 918  918          int i;
 919  919  
 920  920          ASSERT(p == curproc);
 921  921  
 922  922          /*
 923  923           * Abandon held contracts.  contract_abandon knows enough not
 924  924           * to remove the contract from the list a second time.  We are
 925  925           * exiting, so no locks are needed here.  But because
 926  926           * contract_abandon will take p_lock, we need to make sure we
 927  927           * aren't holding it.
 928  928           */
 929  929          ASSERT(MUTEX_NOT_HELD(&p->p_lock));
 930  930          while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
 931  931                  VERIFY(contract_abandon(ct, p, 0) == 0);
 932  932  
 933  933          /*
 934  934           * Drain pbundles.  Because a process bundle queue could have
 935  935           * been passed to another process, they may not be freed right
 936  936           * away.
 937  937           */
 938  938          if (p->p_ct_equeue) {
 939  939                  for (i = 0; i < CTT_MAXTYPE; i++)
 940  940                          if (p->p_ct_equeue[i])
 941  941                                  cte_queue_drain(p->p_ct_equeue[i], 0);
 942  942                  kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
 943  943          }
 944  944  }
 945  945  
 946  946  static int
 947  947  get_time_left(struct ct_time *t)
 948  948  {
 949  949          clock_t ticks_elapsed;
 950  950          int secs_elapsed;
 951  951  
 952  952          if (t->ctm_total == -1)
 953  953                  return (-1);
 954  954  
 955  955          ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
 956  956          secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
 957  957          return (secs_elapsed > 0 ? secs_elapsed : 0);
 958  958  }
 959  959  
 960  960  /*
 961  961   * contract_status_common
 962  962   *
 963  963   * Populates a ct_status structure.  Used by contract types in their
 964  964   * status entry points and ctfs when only common information is
 965  965   * requested.
 966  966   */
 967  967  void
 968  968  contract_status_common(contract_t *ct, zone_t *zone, void *status,
 969  969      model_t model)
 970  970  {
 971  971          STRUCT_HANDLE(ct_status, lstatus);
 972  972  
 973  973          STRUCT_SET_HANDLE(lstatus, model, status);
 974  974          ASSERT(MUTEX_HELD(&ct->ct_lock));
 975  975          if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
 976  976              zone->zone_uniqid == ct->ct_czuniqid) {
 977  977                  zone_t *czone;
 978  978                  zoneid_t zoneid = -1;
 979  979  
 980  980                  /*
 981  981                   * Contracts don't have holds on the zones they were
 982  982                   * created by.  If the contract's zone no longer
 983  983                   * exists, we say its zoneid is -1.
 984  984                   */
 985  985                  if (zone->zone_uniqid == ct->ct_czuniqid ||
 986  986                      ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
 987  987                          zoneid = ct->ct_zoneid;
 988  988                  } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
 989  989                          if (czone->zone_uniqid == ct->ct_mzuniqid)
 990  990                                  zoneid = ct->ct_zoneid;
 991  991                          zone_rele(czone);
 992  992                  }
 993  993  
 994  994                  STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
 995  995                  STRUCT_FSET(lstatus, ctst_holder,
 996  996                      (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
 997  997                      (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
 998  998                  STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
 999  999          } else {
1000 1000                  /*
1001 1001                   * We are looking at a contract which was created by a
1002 1002                   * process outside of our zone.  We provide fake zone,
1003 1003                   * holder, and state information.
1004 1004                   */
1005 1005  
1006 1006                  STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1007 1007                  /*
1008 1008                   * Since "zone" can't disappear until the calling ctfs
1009 1009                   * is unmounted, zone_zsched must be valid.
1010 1010                   */
1011 1011                  STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1012 1012                      zone->zone_zsched->p_pid : 0);
1013 1013                  STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1014 1014                      CTS_OWNED : ct->ct_state);
1015 1015          }
1016 1016          STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1017 1017          STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1018 1018          STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1019 1019          STRUCT_FSET(lstatus, ctst_nevid,
1020 1020              ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1021 1021          STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1022 1022          STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1023 1023          STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1024 1024          STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1025 1025          STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1026 1026  }
1027 1027  
1028 1028  /*
1029 1029   * contract_checkcred
1030 1030   *
1031 1031   * Determines if the specified contract is owned by a process with the
1032 1032   * same effective uid as the specified credential.  The caller must
1033 1033   * ensure that the uid spaces are the same.  Returns 1 on success.
1034 1034   */
1035 1035  static int
1036 1036  contract_checkcred(contract_t *ct, const cred_t *cr)
1037 1037  {
1038 1038          proc_t *p;
1039 1039          int fail = 1;
1040 1040  
1041 1041          mutex_enter(&ct->ct_lock);
1042 1042          if ((p = ct->ct_owner) != NULL) {
1043 1043                  mutex_enter(&p->p_crlock);
1044 1044                  fail = crgetuid(cr) != crgetuid(p->p_cred);
1045 1045                  mutex_exit(&p->p_crlock);
1046 1046          }
1047 1047          mutex_exit(&ct->ct_lock);
1048 1048  
1049 1049          return (!fail);
1050 1050  }
1051 1051  
1052 1052  /*
1053 1053   * contract_owned
1054 1054   *
1055 1055   * Determines if the specified credential can view an event generated
1056 1056   * by the specified contract.  If locked is set, the contract's ct_lock
1057 1057   * is held and the caller will need to do additional work to determine
1058 1058   * if they truly can see the event.  Returns 1 on success.
1059 1059   */
1060 1060  int
1061 1061  contract_owned(contract_t *ct, const cred_t *cr, int locked)
1062 1062  {
1063 1063          int owner, cmatch, zmatch;
1064 1064          uint64_t zuniqid, mzuniqid;
1065 1065          uid_t euid;
1066 1066  
1067 1067          ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1068 1068  
1069 1069          zuniqid = curproc->p_zone->zone_uniqid;
1070 1070          mzuniqid = contract_getzuniqid(ct);
1071 1071          euid = crgetuid(cr);
1072 1072  
1073 1073          /*
1074 1074           * owner: we own the contract
1075 1075           * cmatch: we are in the creator's (and holder's) zone and our
1076 1076           *   uid matches the creator's or holder's
1077 1077           * zmatch: we are in the effective zone of a contract created
1078 1078           *   in the global zone, and our uid matches that of the
1079 1079           *   virtualized holder's (zsched/kcred)
1080 1080           */
1081 1081          owner = (ct->ct_owner == curproc);
1082 1082          cmatch = (zuniqid == ct->ct_czuniqid) &&
1083 1083              ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1084 1084          zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1085 1085              (crgetuid(kcred) == euid);
1086 1086  
1087 1087          return (owner || cmatch || zmatch);
1088 1088  }
1089 1089  
1090 1090  
1091 1091  /*
1092 1092   * contract_type_init
1093 1093   *
1094 1094   * Called by contract types to register themselves with the contracts
1095 1095   * framework.
1096 1096   */
1097 1097  ct_type_t *
1098 1098  contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1099 1099      ct_f_default_t *dfault)
1100 1100  {
1101 1101          ct_type_t *result;
1102 1102  
1103 1103          ASSERT(type < CTT_MAXTYPE);
1104 1104  
1105 1105          result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1106 1106  
1107 1107          mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1108 1108          avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1109 1109              offsetof(contract_t, ct_cttavl));
1110 1110          cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1111 1111          result->ct_type_name = name;
1112 1112          result->ct_type_ops = ops;
1113 1113          result->ct_type_default = dfault;
1114 1114          result->ct_type_evid = 0;
1115 1115          gethrestime(&result->ct_type_timestruc);
1116 1116          result->ct_type_index = type;
1117 1117  
1118 1118          ct_types[type] = result;
1119 1119  
1120 1120          return (result);
1121 1121  }
1122 1122  
1123 1123  /*
1124 1124   * contract_type_count
1125 1125   *
1126 1126   * Obtains the number of contracts of a particular type.
1127 1127   */
1128 1128  int
1129 1129  contract_type_count(ct_type_t *type)
1130 1130  {
1131 1131          ulong_t count;
1132 1132  
1133 1133          mutex_enter(&type->ct_type_lock);
1134 1134          count = avl_numnodes(&type->ct_type_avl);
1135 1135          mutex_exit(&type->ct_type_lock);
1136 1136  
1137 1137          return (count);
1138 1138  }
1139 1139  
1140 1140  /*
1141 1141   * contract_type_max
1142 1142   *
1143 1143   * Obtains the maximum contract id of of a particular type.
1144 1144   */
1145 1145  ctid_t
1146 1146  contract_type_max(ct_type_t *type)
1147 1147  {
1148 1148          contract_t *ct;
1149 1149          ctid_t res;
1150 1150  
1151 1151          mutex_enter(&type->ct_type_lock);
1152 1152          ct = avl_last(&type->ct_type_avl);
1153 1153          res = ct ? ct->ct_id : -1;
1154 1154          mutex_exit(&type->ct_type_lock);
1155 1155  
1156 1156          return (res);
1157 1157  }
1158 1158  
1159 1159  /*
1160 1160   * contract_max
1161 1161   *
1162 1162   * Obtains the maximum contract id.
1163 1163   */
1164 1164  ctid_t
1165 1165  contract_max(void)
1166 1166  {
1167 1167          contract_t *ct;
1168 1168          ctid_t res;
1169 1169  
1170 1170          mutex_enter(&contract_lock);
1171 1171          ct = avl_last(&contract_avl);
1172 1172          res = ct ? ct->ct_id : -1;
1173 1173          mutex_exit(&contract_lock);
1174 1174  
1175 1175          return (res);
1176 1176  }
1177 1177  
1178 1178  /*
1179 1179   * contract_lookup_common
1180 1180   *
1181 1181   * Common code for contract_lookup and contract_type_lookup.  Takes a
1182 1182   * pointer to an AVL tree to search in.  Should be called with the
1183 1183   * appropriate tree-protecting lock held (unfortunately unassertable).
1184 1184   */
1185 1185  static ctid_t
1186 1186  contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1187 1187  {
1188 1188          contract_t template, *ct;
1189 1189          avl_index_t where;
1190 1190          ctid_t res;
1191 1191  
1192 1192          template.ct_id = current;
1193 1193          ct = avl_find(tree, &template, &where);
1194 1194          if (ct == NULL)
1195 1195                  ct = avl_nearest(tree, where, AVL_AFTER);
1196 1196          if (zuniqid != GLOBAL_ZONEUNIQID)
1197 1197                  while (ct && (contract_getzuniqid(ct) != zuniqid))
1198 1198                          ct = AVL_NEXT(tree, ct);
1199 1199          res = ct ? ct->ct_id : -1;
1200 1200  
1201 1201          return (res);
1202 1202  }
1203 1203  
1204 1204  /*
1205 1205   * contract_type_lookup
1206 1206   *
1207 1207   * Returns the next type contract after the specified id, visible from
1208 1208   * the specified zone.
1209 1209   */
1210 1210  ctid_t
1211 1211  contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1212 1212  {
1213 1213          ctid_t res;
1214 1214  
1215 1215          mutex_enter(&type->ct_type_lock);
1216 1216          res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1217 1217          mutex_exit(&type->ct_type_lock);
1218 1218  
1219 1219          return (res);
1220 1220  }
1221 1221  
1222 1222  /*
1223 1223   * contract_lookup
1224 1224   *
1225 1225   * Returns the next contract after the specified id, visible from the
1226 1226   * specified zone.
1227 1227   */
1228 1228  ctid_t
1229 1229  contract_lookup(uint64_t zuniqid, ctid_t current)
1230 1230  {
1231 1231          ctid_t res;
1232 1232  
1233 1233          mutex_enter(&contract_lock);
1234 1234          res = contract_lookup_common(&contract_avl, zuniqid, current);
1235 1235          mutex_exit(&contract_lock);
1236 1236  
1237 1237          return (res);
1238 1238  }
1239 1239  
1240 1240  /*
1241 1241   * contract_plookup
1242 1242   *
1243 1243   * Returns the next contract held by process p after the specified id,
1244 1244   * visible from the specified zone.  Made complicated by the fact that
1245 1245   * contracts visible in a zone but held by processes outside of the
1246 1246   * zone need to appear as being held by zsched to zone members.
1247 1247   */
1248 1248  ctid_t
1249 1249  contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1250 1250  {
1251 1251          contract_t template, *ct;
1252 1252          avl_index_t where;
1253 1253          ctid_t res;
1254 1254  
1255 1255          template.ct_id = current;
1256 1256          if (zuniqid != GLOBAL_ZONEUNIQID &&
1257 1257              (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1258 1258                  /* This is inelegant. */
1259 1259                  mutex_enter(&contract_lock);
1260 1260                  ct = avl_find(&contract_avl, &template, &where);
1261 1261                  if (ct == NULL)
1262 1262                          ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1263 1263                  while (ct && !(ct->ct_state < CTS_ORPHAN &&
1264 1264                      contract_getzuniqid(ct) == zuniqid &&
1265 1265                      ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1266 1266                          ct = AVL_NEXT(&contract_avl, ct);
1267 1267                  res = ct ? ct->ct_id : -1;
1268 1268                  mutex_exit(&contract_lock);
1269 1269          } else {
1270 1270                  mutex_enter(&p->p_lock);
1271 1271                  ct = avl_find(&p->p_ct_held, &template, &where);
1272 1272                  if (ct == NULL)
1273 1273                          ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1274 1274                  res = ct ? ct->ct_id : -1;
1275 1275                  mutex_exit(&p->p_lock);
1276 1276          }
1277 1277  
1278 1278          return (res);
1279 1279  }
1280 1280  
1281 1281  /*
1282 1282   * contract_ptr_common
1283 1283   *
1284 1284   * Common code for contract_ptr and contract_type_ptr.  Takes a pointer
1285 1285   * to an AVL tree to search in.  Should be called with the appropriate
1286 1286   * tree-protecting lock held (unfortunately unassertable).
1287 1287   */
1288 1288  static contract_t *
1289 1289  contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1290 1290  {
1291 1291          contract_t template, *ct;
1292 1292  
1293 1293          template.ct_id = id;
1294 1294          ct = avl_find(tree, &template, NULL);
1295 1295          if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1296 1296              contract_getzuniqid(ct) != zuniqid)) {
1297 1297                  return (NULL);
1298 1298          }
1299 1299  
1300 1300          /*
1301 1301           * Check to see if a thread is in the window in contract_rele
1302 1302           * between dropping the reference count and removing the
1303 1303           * contract from the type AVL.
1304 1304           */
1305 1305          mutex_enter(&ct->ct_reflock);
1306 1306          if (ct->ct_ref) {
1307 1307                  ct->ct_ref++;
1308 1308                  mutex_exit(&ct->ct_reflock);
1309 1309          } else {
1310 1310                  mutex_exit(&ct->ct_reflock);
1311 1311                  ct = NULL;
1312 1312          }
1313 1313  
1314 1314          return (ct);
1315 1315  }
1316 1316  
1317 1317  /*
1318 1318   * contract_type_ptr
1319 1319   *
1320 1320   * Returns a pointer to the contract with the specified id.  The
1321 1321   * contract is held, so the caller needs to release the reference when
1322 1322   * it is through with the contract.
1323 1323   */
1324 1324  contract_t *
1325 1325  contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1326 1326  {
1327 1327          contract_t *ct;
1328 1328  
1329 1329          mutex_enter(&type->ct_type_lock);
1330 1330          ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1331 1331          mutex_exit(&type->ct_type_lock);
1332 1332  
1333 1333          return (ct);
1334 1334  }
1335 1335  
1336 1336  /*
1337 1337   * contract_ptr
1338 1338   *
1339 1339   * Returns a pointer to the contract with the specified id.  The
1340 1340   * contract is held, so the caller needs to release the reference when
1341 1341   * it is through with the contract.
1342 1342   */
1343 1343  contract_t *
1344 1344  contract_ptr(ctid_t id, uint64_t zuniqid)
1345 1345  {
1346 1346          contract_t *ct;
1347 1347  
1348 1348          mutex_enter(&contract_lock);
1349 1349          ct = contract_ptr_common(&contract_avl, id, zuniqid);
1350 1350          mutex_exit(&contract_lock);
1351 1351  
1352 1352          return (ct);
1353 1353  }
1354 1354  
1355 1355  /*
1356 1356   * contract_type_time
1357 1357   *
1358 1358   * Obtains the last time a contract of a particular type was created.
1359 1359   */
1360 1360  void
1361 1361  contract_type_time(ct_type_t *type, timestruc_t *time)
1362 1362  {
1363 1363          mutex_enter(&type->ct_type_lock);
1364 1364          *time = type->ct_type_timestruc;
1365 1365          mutex_exit(&type->ct_type_lock);
1366 1366  }
1367 1367  
1368 1368  /*
1369 1369   * contract_type_bundle
1370 1370   *
1371 1371   * Obtains a type's bundle queue.
1372 1372   */
1373 1373  ct_equeue_t *
1374 1374  contract_type_bundle(ct_type_t *type)
1375 1375  {
1376 1376          return (&type->ct_type_events);
1377 1377  }
1378 1378  
1379 1379  /*
1380 1380   * contract_type_pbundle
1381 1381   *
1382 1382   * Obtain's a process's bundle queue.  If one doesn't exist, one is
1383 1383   * created.  Often used simply to ensure that a bundle queue is
1384 1384   * allocated.
1385 1385   */
1386 1386  ct_equeue_t *
1387 1387  contract_type_pbundle(ct_type_t *type, proc_t *pp)
1388 1388  {
1389 1389          /*
1390 1390           * If there isn't an array of bundle queues, allocate one.
1391 1391           */
1392 1392          if (pp->p_ct_equeue == NULL) {
1393 1393                  size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1394 1394                  ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1395 1395  
1396 1396                  mutex_enter(&pp->p_lock);
1397 1397                  if (pp->p_ct_equeue)
1398 1398                          kmem_free(qa, size);
1399 1399                  else
1400 1400                          pp->p_ct_equeue = qa;
1401 1401                  mutex_exit(&pp->p_lock);
1402 1402          }
1403 1403  
1404 1404          /*
1405 1405           * If there isn't a bundle queue of the required type, allocate
1406 1406           * one.
1407 1407           */
1408 1408          if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1409 1409                  ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1410 1410                  cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1411 1411  
1412 1412                  mutex_enter(&pp->p_lock);
1413 1413                  if (pp->p_ct_equeue[type->ct_type_index])
1414 1414                          cte_queue_drain(q, 0);
1415 1415                  else
1416 1416                          pp->p_ct_equeue[type->ct_type_index] = q;
1417 1417                  mutex_exit(&pp->p_lock);
1418 1418          }
1419 1419  
1420 1420          return (pp->p_ct_equeue[type->ct_type_index]);
1421 1421  }
1422 1422  
1423 1423  /*
1424 1424   * ctparam_copyin
1425 1425   *
1426 1426   * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1427 1427   * If ctparam_copyout() is not called after ctparam_copyin(), then
1428 1428   * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1429 1429   *
1430 1430   * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1431 1431   * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1432 1432   * process lock.
1433 1433   */
1434 1434  int
1435 1435  ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1436 1436  {
1437 1437          uint32_t size;
1438 1438          void *ubuf;
1439 1439          ct_param_t *param = &kparam->param;
1440 1440          STRUCT_DECL(ct_param, uarg);
1441 1441  
1442 1442          STRUCT_INIT(uarg, flag);
1443 1443          if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1444 1444                  return (EFAULT);
1445 1445          size = STRUCT_FGET(uarg, ctpm_size);
1446 1446          ubuf = STRUCT_FGETP(uarg, ctpm_value);
1447 1447  
1448 1448          if (size > CT_PARAM_MAX_SIZE || size == 0)
1449 1449                  return (EINVAL);
1450 1450  
1451 1451          kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1452 1452          if (cmd == CT_TSET) {
1453 1453                  if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1454 1454                          kmem_free(kparam->ctpm_kbuf, size);
1455 1455                          return (EFAULT);
1456 1456                  }
1457 1457          }
1458 1458          param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1459 1459          param->ctpm_size = size;
1460 1460          param->ctpm_value = ubuf;
1461 1461          kparam->ret_size = 0;
1462 1462  
1463 1463          return (0);
1464 1464  }
1465 1465  
1466 1466  /*
1467 1467   * ctparam_copyout
1468 1468   *
1469 1469   * copyout a ct_kparam_t and frees the buffer pointed by the member
1470 1470   * ctpm_kbuf of ct_kparam_t
1471 1471   */
1472 1472  int
1473 1473  ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1474 1474  {
1475 1475          int r = 0;
1476 1476          ct_param_t *param = &kparam->param;
1477 1477          STRUCT_DECL(ct_param, uarg);
1478 1478  
1479 1479          STRUCT_INIT(uarg, flag);
1480 1480  
1481 1481          STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1482 1482          STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1483 1483          STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1484 1484          if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1485 1485                  r = EFAULT;
1486 1486                  goto error;
1487 1487          }
1488 1488          if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1489 1489              MIN(kparam->ret_size, param->ctpm_size))) {
1490 1490                  r = EFAULT;
1491 1491          }
1492 1492  
1493 1493  error:
1494 1494          kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1495 1495  
1496 1496          return (r);
1497 1497  }
1498 1498  
1499 1499  /*
1500 1500   * ctmpl_free
1501 1501   *
1502 1502   * Frees a template.
1503 1503   */
1504 1504  void
1505 1505  ctmpl_free(ct_template_t *template)
1506 1506  {
1507 1507          mutex_destroy(&template->ctmpl_lock);
1508 1508          template->ctmpl_ops->ctop_free(template);
1509 1509  }
1510 1510  
1511 1511  /*
1512 1512   * ctmpl_dup
1513 1513   *
1514 1514   * Creates a copy of a template.
1515 1515   */
1516 1516  ct_template_t *
1517 1517  ctmpl_dup(ct_template_t *template)
1518 1518  {
1519 1519          ct_template_t *new;
1520 1520  
1521 1521          if (template == NULL)
1522 1522                  return (NULL);
1523 1523  
1524 1524          new = template->ctmpl_ops->ctop_dup(template);
1525 1525          /*
1526 1526           * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1527 1527           * should have remain held until now.
1528 1528           */
1529 1529          mutex_exit(&template->ctmpl_lock);
1530 1530  
1531 1531          return (new);
1532 1532  }
1533 1533  
1534 1534  /*
1535 1535   * ctmpl_set
1536 1536   *
1537 1537   * Sets the requested terms of a template.
1538 1538   */
1539 1539  int
1540 1540  ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1541 1541  {
1542 1542          int result = 0;
1543 1543          ct_param_t *param = &kparam->param;
1544 1544          uint64_t param_value;
1545 1545  
1546 1546          if (param->ctpm_id == CTP_COOKIE ||
1547 1547              param->ctpm_id == CTP_EV_INFO ||
1548 1548              param->ctpm_id == CTP_EV_CRITICAL) {
1549 1549                  if (param->ctpm_size < sizeof (uint64_t)) {
1550 1550                          return (EINVAL);
1551 1551                  } else {
1552 1552                          param_value = *(uint64_t *)kparam->ctpm_kbuf;
1553 1553                  }
1554 1554          }
1555 1555  
1556 1556          mutex_enter(&template->ctmpl_lock);
1557 1557          switch (param->ctpm_id) {
1558 1558          case CTP_COOKIE:
1559 1559                  template->ctmpl_cookie = param_value;
1560 1560                  break;
1561 1561          case CTP_EV_INFO:
1562 1562                  if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1563 1563                          result = EINVAL;
1564 1564                  else
1565 1565                          template->ctmpl_ev_info = param_value;
1566 1566                  break;
1567 1567          case CTP_EV_CRITICAL:
1568 1568                  if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1569 1569                          result = EINVAL;
1570 1570                          break;
1571 1571                  } else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1572 1572                          /*
1573 1573                           * Assume that a pure reduction of the critical
1574 1574                           * set is allowed by the contract type.
1575 1575                           */
1576 1576                          template->ctmpl_ev_crit = param_value;
1577 1577                          break;
1578 1578                  }
1579 1579                  /*
1580 1580                   * There may be restrictions on what we can make
1581 1581                   * critical, so we defer to the judgement of the
1582 1582                   * contract type.
1583 1583                   */
1584 1584                  /* FALLTHROUGH */
1585 1585          default:
1586 1586                  result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1587 1587          }
1588 1588          mutex_exit(&template->ctmpl_lock);
1589 1589  
1590 1590          return (result);
1591 1591  }
1592 1592  
1593 1593  /*
1594 1594   * ctmpl_get
1595 1595   *
1596 1596   * Obtains the requested terms from a template.
1597 1597   *
1598 1598   * If the term requested is a variable-sized term and the buffer
1599 1599   * provided is too small for the data, we truncate the data and return
1600 1600   * the buffer size necessary to fit the term in kparam->ret_size. If the
1601 1601   * term requested is fix-sized (uint64_t) and the buffer provided is too
1602 1602   * small, we return EINVAL.  This should never happen if you're using
1603 1603   * libcontract(3LIB), only if you call ioctl with a hand constructed
1604 1604   * ct_param_t argument.
1605 1605   *
1606 1606   * Currently, only contract specific parameters have variable-sized
1607 1607   * parameters.
1608 1608   */
1609 1609  int
1610 1610  ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1611 1611  {
1612 1612          int result = 0;
1613 1613          ct_param_t *param = &kparam->param;
1614 1614          uint64_t *param_value;
1615 1615  
1616 1616          if (param->ctpm_id == CTP_COOKIE ||
1617 1617              param->ctpm_id == CTP_EV_INFO ||
1618 1618              param->ctpm_id == CTP_EV_CRITICAL) {
1619 1619                  if (param->ctpm_size < sizeof (uint64_t)) {
1620 1620                          return (EINVAL);
1621 1621                  } else {
1622 1622                          param_value = kparam->ctpm_kbuf;
1623 1623                          kparam->ret_size = sizeof (uint64_t);
1624 1624                  }
1625 1625          }
1626 1626  
1627 1627          mutex_enter(&template->ctmpl_lock);
1628 1628          switch (param->ctpm_id) {
1629 1629          case CTP_COOKIE:
1630 1630                  *param_value = template->ctmpl_cookie;
1631 1631                  break;
1632 1632          case CTP_EV_INFO:
1633 1633                  *param_value = template->ctmpl_ev_info;
1634 1634                  break;
1635 1635          case CTP_EV_CRITICAL:
1636 1636                  *param_value = template->ctmpl_ev_crit;
1637 1637                  break;
1638 1638          default:
1639 1639                  result = template->ctmpl_ops->ctop_get(template, kparam);
1640 1640          }
1641 1641          mutex_exit(&template->ctmpl_lock);
1642 1642  
1643 1643          return (result);
1644 1644  }
1645 1645  
1646 1646  /*
1647 1647   * ctmpl_makecurrent
1648 1648   *
1649 1649   * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1650 1650   * active template.  Frees the old active template, if there was one.
1651 1651   */
1652 1652  static void
1653 1653  ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1654 1654  {
1655 1655          klwp_t *curlwp = ttolwp(curthread);
1656 1656          proc_t *p = curproc;
1657 1657          ct_template_t *old;
1658 1658  
1659 1659          mutex_enter(&p->p_lock);
1660 1660          old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1661 1661          curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1662 1662          mutex_exit(&p->p_lock);
1663 1663  
1664 1664          if (old)
1665 1665                  ctmpl_free(old);
1666 1666  }
1667 1667  
1668 1668  /*
1669 1669   * ctmpl_activate
1670 1670   *
1671 1671   * Copy the specified template as the current thread's activate
1672 1672   * template of that type.
1673 1673   */
1674 1674  void
1675 1675  ctmpl_activate(ct_template_t *template)
1676 1676  {
1677 1677          ctmpl_makecurrent(template, ctmpl_dup(template));
1678 1678  }
1679 1679  
1680 1680  /*
1681 1681   * ctmpl_clear
1682 1682   *
1683 1683   * Clears the current thread's activate template of the same type as
1684 1684   * the specified template.
1685 1685   */
1686 1686  void
1687 1687  ctmpl_clear(ct_template_t *template)
1688 1688  {
1689 1689          ctmpl_makecurrent(template, NULL);
1690 1690  }
1691 1691  
1692 1692  /*
1693 1693   * ctmpl_create
1694 1694   *
1695 1695   * Creates a new contract using the specified template.
1696 1696   */
1697 1697  int
1698 1698  ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1699 1699  {
1700 1700          return (template->ctmpl_ops->ctop_create(template, ctidp));
1701 1701  }
1702 1702  
1703 1703  /*
1704 1704   * ctmpl_init
1705 1705   *
1706 1706   * Initializes the common portion of a new contract template.
1707 1707   */
1708 1708  void
1709 1709  ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1710 1710  {
1711 1711          mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1712 1712          new->ctmpl_ops = ops;
1713 1713          new->ctmpl_type = type;
1714 1714          new->ctmpl_data = data;
1715 1715          new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1716 1716          new->ctmpl_cookie = 0;
1717 1717  }
1718 1718  
1719 1719  /*
1720 1720   * ctmpl_copy
1721 1721   *
1722 1722   * Copies the common portions of a contract template.  Intended for use
1723 1723   * by a contract type's ctop_dup template op.  Returns with the old
1724 1724   * template's lock held, which will should remain held until the
1725 1725   * template op returns (it is dropped by ctmpl_dup).
1726 1726   */
1727 1727  void
1728 1728  ctmpl_copy(ct_template_t *new, ct_template_t *old)
1729 1729  {
1730 1730          mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1731 1731          mutex_enter(&old->ctmpl_lock);
1732 1732          new->ctmpl_ops = old->ctmpl_ops;
1733 1733          new->ctmpl_type = old->ctmpl_type;
1734 1734          new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1735 1735          new->ctmpl_ev_info = old->ctmpl_ev_info;
1736 1736          new->ctmpl_cookie = old->ctmpl_cookie;
1737 1737  }
1738 1738  
1739 1739  /*
1740 1740   * ctmpl_create_inval
1741 1741   *
1742 1742   * Returns EINVAL.  Provided for the convenience of those contract
1743 1743   * types which don't support ct_tmpl_create(3contract) and would
1744 1744   * otherwise need to create their own stub for the ctop_create template
1745 1745   * op.
1746 1746   */
1747 1747  /*ARGSUSED*/
1748 1748  int
1749 1749  ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1750 1750  {
1751 1751          return (EINVAL);
1752 1752  }
1753 1753  
1754 1754  
1755 1755  /*
1756 1756   * cte_queue_create
1757 1757   *
1758 1758   * Initializes a queue of a particular type.  If dynamic is set, the
1759 1759   * queue is to be freed when its last listener is removed after being
1760 1760   * drained.
1761 1761   */
1762 1762  static void
1763 1763  cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1764 1764  {
1765 1765          mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1766 1766          q->ctq_listno = list;
1767 1767          list_create(&q->ctq_events, sizeof (ct_kevent_t),
1768 1768              offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1769 1769          list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1770 1770              offsetof(ct_listener_t, ctl_allnode));
1771 1771          list_create(&q->ctq_tail, sizeof (ct_listener_t),
1772 1772              offsetof(ct_listener_t, ctl_tailnode));
1773 1773          gethrestime(&q->ctq_atime);
1774 1774          q->ctq_nlisteners = 0;
1775 1775          q->ctq_nreliable = 0;
1776 1776          q->ctq_ninf = 0;
1777 1777          q->ctq_max = maxinf;
1778 1778  
1779 1779          /*
1780 1780           * Bundle queues and contract queues are embedded in other
1781 1781           * structures and are implicitly referenced counted by virtue
1782 1782           * of their vnodes' indirect hold on their contracts.  Process
1783 1783           * bundle queues are dynamically allocated and may persist
1784 1784           * after the death of the process, so they must be explicitly
1785 1785           * reference counted.
1786 1786           */
1787 1787          q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1788 1788  }
1789 1789  
1790 1790  /*
1791 1791   * cte_queue_destroy
1792 1792   *
1793 1793   * Destroys the specified queue.  The queue is freed if referenced
1794 1794   * counted.
1795 1795   */
1796 1796  static void
1797 1797  cte_queue_destroy(ct_equeue_t *q)
1798 1798  {
1799 1799          ASSERT(q->ctq_flags & CTQ_DEAD);
1800 1800          ASSERT(q->ctq_nlisteners == 0);
1801 1801          ASSERT(q->ctq_nreliable == 0);
1802 1802          list_destroy(&q->ctq_events);
1803 1803          list_destroy(&q->ctq_listeners);
1804 1804          list_destroy(&q->ctq_tail);
1805 1805          mutex_destroy(&q->ctq_lock);
1806 1806          if (q->ctq_flags & CTQ_REFFED)
1807 1807                  kmem_free(q, sizeof (ct_equeue_t));
1808 1808  }
1809 1809  
1810 1810  /*
1811 1811   * cte_hold
1812 1812   *
1813 1813   * Takes a hold on the specified event.
1814 1814   */
1815 1815  static void
1816 1816  cte_hold(ct_kevent_t *e)
1817 1817  {
1818 1818          mutex_enter(&e->cte_lock);
1819 1819          ASSERT(e->cte_refs > 0);
1820 1820          e->cte_refs++;
1821 1821          mutex_exit(&e->cte_lock);
1822 1822  }
1823 1823  
1824 1824  /*
1825 1825   * cte_rele
1826 1826   *
1827 1827   * Releases a hold on the specified event.  If the caller had the last
1828 1828   * reference, frees the event and releases its hold on the contract
1829 1829   * that generated it.
1830 1830   */
1831 1831  static void
1832 1832  cte_rele(ct_kevent_t *e)
1833 1833  {
1834 1834          mutex_enter(&e->cte_lock);
1835 1835          ASSERT(e->cte_refs > 0);
1836 1836          if (--e->cte_refs) {
1837 1837                  mutex_exit(&e->cte_lock);
1838 1838                  return;
1839 1839          }
1840 1840  
1841 1841          contract_rele(e->cte_contract);
1842 1842  
1843 1843          mutex_destroy(&e->cte_lock);
1844 1844          nvlist_free(e->cte_data);
1845 1845          nvlist_free(e->cte_gdata);
1846 1846          kmem_free(e, sizeof (ct_kevent_t));
1847 1847  }
1848 1848  
1849 1849  /*
1850 1850   * cte_qrele
1851 1851   *
1852 1852   * Remove this listener's hold on the specified event, removing and
1853 1853   * releasing the queue's hold on the event if appropriate.
1854 1854   */
1855 1855  static void
1856 1856  cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1857 1857  {
1858 1858          ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1859 1859  
1860 1860          ASSERT(MUTEX_HELD(&q->ctq_lock));
1861 1861  
1862 1862          if (l->ctl_flags & CTLF_RELIABLE)
1863 1863                  member->ctm_nreliable--;
1864 1864          if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1865 1865                  member->ctm_trimmed = 0;
1866 1866                  list_remove(&q->ctq_events, e);
1867 1867                  cte_rele(e);
1868 1868          }
1869 1869  }
1870 1870  
1871 1871  /*
1872 1872   * cte_qmove
1873 1873   *
1874 1874   * Move this listener to the specified event in the queue.
1875 1875   */
1876 1876  static ct_kevent_t *
1877 1877  cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1878 1878  {
1879 1879          ct_kevent_t *olde;
1880 1880  
1881 1881          ASSERT(MUTEX_HELD(&q->ctq_lock));
1882 1882          ASSERT(l->ctl_equeue == q);
1883 1883  
1884 1884          if ((olde = l->ctl_position) == NULL)
1885 1885                  list_remove(&q->ctq_tail, l);
1886 1886  
1887 1887          while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1888 1888                  e = list_next(&q->ctq_events, e);
1889 1889  
1890 1890          if (e != NULL) {
1891 1891                  e->cte_nodes[q->ctq_listno].ctm_refs++;
1892 1892                  if (l->ctl_flags & CTLF_RELIABLE)
1893 1893                          e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1894 1894          } else {
1895 1895                  list_insert_tail(&q->ctq_tail, l);
1896 1896          }
1897 1897  
1898 1898          l->ctl_position = e;
1899 1899          if (olde)
1900 1900                  cte_qrele(q, l, olde);
1901 1901  
1902 1902          return (e);
1903 1903  }
1904 1904  
1905 1905  /*
1906 1906   * cte_checkcred
1907 1907   *
1908 1908   * Determines if the specified event's contract is owned by a process
1909 1909   * with the same effective uid as the specified credential.  Called
1910 1910   * after a failed call to contract_owned with locked set.  Because it
1911 1911   * drops the queue lock, its caller (cte_qreadable) needs to make sure
1912 1912   * we're still in the same place after we return.  Returns 1 on
1913 1913   * success.
1914 1914   */
1915 1915  static int
1916 1916  cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1917 1917  {
1918 1918          int result;
1919 1919          contract_t *ct = e->cte_contract;
1920 1920  
1921 1921          cte_hold(e);
1922 1922          mutex_exit(&q->ctq_lock);
1923 1923          result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1924 1924              contract_checkcred(ct, cr);
1925 1925          mutex_enter(&q->ctq_lock);
1926 1926          cte_rele(e);
1927 1927  
1928 1928          return (result);
1929 1929  }
1930 1930  
1931 1931  /*
1932 1932   * cte_qreadable
1933 1933   *
1934 1934   * Ensures that the listener is pointing to a valid event that the
1935 1935   * caller has the credentials to read.  Returns 0 if we can read the
1936 1936   * event we're pointing to.
1937 1937   */
1938 1938  static int
1939 1939  cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1940 1940      uint64_t zuniqid, int crit)
1941 1941  {
1942 1942          ct_kevent_t *e, *next;
1943 1943          contract_t *ct;
1944 1944  
1945 1945          ASSERT(MUTEX_HELD(&q->ctq_lock));
1946 1946          ASSERT(l->ctl_equeue == q);
1947 1947  
1948 1948          if (l->ctl_flags & CTLF_COPYOUT)
1949 1949                  return (1);
1950 1950  
1951 1951          next = l->ctl_position;
1952 1952          while (e = cte_qmove(q, l, next)) {
1953 1953                  ct = e->cte_contract;
1954 1954                  /*
1955 1955                   * Check obvious things first.  If we are looking for a
1956 1956                   * critical message, is this one?  If we aren't in the
1957 1957                   * global zone, is this message meant for us?
1958 1958                   */
1959 1959                  if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1960 1960                      (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1961 1961                      zuniqid != contract_getzuniqid(ct))) {
1962 1962  
1963 1963                          next = list_next(&q->ctq_events, e);
1964 1964  
1965 1965                  /*
1966 1966                   * Next, see if our effective uid equals that of owner
1967 1967                   * or author of the contract.  Since we are holding the
1968 1968                   * queue lock, contract_owned can't always check if we
1969 1969                   * have the same effective uid as the contract's
1970 1970                   * owner.  If it comes to that, it fails and we take
1971 1971                   * the slow(er) path.
1972 1972                   */
1973 1973                  } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1974 1974  
1975 1975                          /*
1976 1976                           * At this point we either don't have any claim
1977 1977                           * to this contract or we match the effective
1978 1978                           * uid of the owner but couldn't tell.  We
1979 1979                           * first test for a NULL holder so that events
1980 1980                           * from orphans and inherited contracts avoid
1981 1981                           * the penalty phase.
1982 1982                           */
1983 1983                          if (e->cte_contract->ct_owner == NULL &&
1984 1984                              !secpolicy_contract_observer_choice(cr))
1985 1985                                  next = list_next(&q->ctq_events, e);
1986 1986  
1987 1987                          /*
1988 1988                           * cte_checkcred will juggle locks to see if we
1989 1989                           * have the same uid as the event's contract's
1990 1990                           * current owner.  If it succeeds, we have to
1991 1991                           * make sure we are in the same point in the
1992 1992                           * queue.
1993 1993                           */
1994 1994                          else if (cte_checkcred(q, e, cr) &&
1995 1995                              l->ctl_position == e)
1996 1996                                  break;
1997 1997  
1998 1998                          /*
1999 1999                           * cte_checkcred failed; see if we're in the
2000 2000                           * same place.
2001 2001                           */
2002 2002                          else if (l->ctl_position == e)
2003 2003                                  if (secpolicy_contract_observer_choice(cr))
2004 2004                                          break;
2005 2005                                  else
2006 2006                                          next = list_next(&q->ctq_events, e);
2007 2007  
2008 2008                          /*
2009 2009                           * cte_checkcred failed, and our position was
2010 2010                           * changed.  Start from there.
2011 2011                           */
2012 2012                          else
2013 2013                                  next = l->ctl_position;
2014 2014                  } else {
2015 2015                          break;
2016 2016                  }
2017 2017          }
2018 2018  
2019 2019          /*
2020 2020           * We check for CTLF_COPYOUT again in case we dropped the queue
2021 2021           * lock in cte_checkcred.
2022 2022           */
2023 2023          return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2024 2024  }
2025 2025  
2026 2026  /*
2027 2027   * cte_qwakeup
2028 2028   *
2029 2029   * Wakes up any waiting listeners and points them at the specified event.
2030 2030   */
2031 2031  static void
2032 2032  cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2033 2033  {
2034 2034          ct_listener_t *l;
2035 2035  
2036 2036          ASSERT(MUTEX_HELD(&q->ctq_lock));
2037 2037  
2038 2038          while (l = list_head(&q->ctq_tail)) {
2039 2039                  list_remove(&q->ctq_tail, l);
2040 2040                  e->cte_nodes[q->ctq_listno].ctm_refs++;
2041 2041                  if (l->ctl_flags & CTLF_RELIABLE)
2042 2042                          e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2043 2043                  l->ctl_position = e;
2044 2044                  cv_signal(&l->ctl_cv);
2045 2045                  pollwakeup(&l->ctl_pollhead, POLLIN);
2046 2046          }
2047 2047  }
2048 2048  
2049 2049  /*
2050 2050   * cte_copy
2051 2051   *
2052 2052   * Copies events from the specified contract event queue to the
2053 2053   * end of the specified process bundle queue.  Only called from
2054 2054   * contract_adopt.
2055 2055   *
2056 2056   * We copy to the end of the target queue instead of mixing the events
2057 2057   * in their proper order because otherwise the act of adopting a
2058 2058   * contract would require a process to reset all process bundle
2059 2059   * listeners it needed to see the new events.  This would, in turn,
2060 2060   * require the process to keep track of which preexisting events had
2061 2061   * already been processed.
2062 2062   */
2063 2063  static void
2064 2064  cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2065 2065  {
2066 2066          ct_kevent_t *e, *first = NULL;
2067 2067  
2068 2068          VERIFY(q->ctq_listno == CTEL_CONTRACT);
2069 2069          VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2070 2070  
2071 2071          mutex_enter(&q->ctq_lock);
2072 2072          mutex_enter(&newq->ctq_lock);
2073 2073  
2074 2074          /*
2075 2075           * For now, only copy critical events.
2076 2076           */
2077 2077          for (e = list_head(&q->ctq_events); e != NULL;
2078 2078              e = list_next(&q->ctq_events, e)) {
2079 2079                  if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2080 2080                          if (first == NULL)
2081 2081                                  first = e;
2082 2082                          /*
2083 2083                           * It is possible for adoption to race with an owner's
2084 2084                           * cte_publish_all(); we must only enqueue events that
2085 2085                           * have not already been enqueued.
2086 2086                           */
2087 2087                          if (!list_link_active((list_node_t *)
2088 2088                              ((uintptr_t)e + newq->ctq_events.list_offset))) {
2089 2089                                  list_insert_tail(&newq->ctq_events, e);
2090 2090                                  cte_hold(e);
2091 2091                          }
2092 2092                  }
2093 2093          }
2094 2094  
2095 2095          mutex_exit(&q->ctq_lock);
2096 2096  
2097 2097          if (first)
2098 2098                  cte_qwakeup(newq, first);
2099 2099  
2100 2100          mutex_exit(&newq->ctq_lock);
2101 2101  }
2102 2102  
2103 2103  /*
2104 2104   * cte_trim
2105 2105   *
2106 2106   * Trims unneeded events from an event queue.  Algorithm works as
2107 2107   * follows:
2108 2108   *
2109 2109   *   Removes all informative and acknowledged critical events until the
2110 2110   *   first referenced event is found.
2111 2111   *
2112 2112   *   If a contract is specified, removes all events (regardless of
2113 2113   *   acknowledgement) generated by that contract until the first event
2114 2114   *   referenced by a reliable listener is found.  Reference events are
2115 2115   *   removed by marking them "trimmed".  Such events will be removed
2116 2116   *   when the last reference is dropped and will be skipped by future
2117 2117   *   listeners.
2118 2118   *
2119 2119   * This is pretty basic.  Ideally this should remove from the middle of
2120 2120   * the list (i.e. beyond the first referenced event), and even
2121 2121   * referenced events.
2122 2122   */
2123 2123  static void
2124 2124  cte_trim(ct_equeue_t *q, contract_t *ct)
2125 2125  {
2126 2126          ct_kevent_t *e, *next;
2127 2127          int flags, stopper;
2128 2128          int start = 1;
2129 2129  
2130 2130          VERIFY(MUTEX_HELD(&q->ctq_lock));
2131 2131  
2132 2132          for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2133 2133                  next = list_next(&q->ctq_events, e);
2134 2134                  flags = e->cte_flags;
2135 2135                  stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2136 2136                      (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2137 2137                  if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2138 2138                          if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2139 2139                              (e->cte_contract == ct)) {
2140 2140                                  /*
2141 2141                                   * Toss informative and ACKed critical messages.
2142 2142                                   */
2143 2143                                  list_remove(&q->ctq_events, e);
2144 2144                                  cte_rele(e);
2145 2145                          }
2146 2146                  } else if ((e->cte_contract == ct) && !stopper) {
2147 2147                          ASSERT(q->ctq_nlisteners != 0);
2148 2148                          e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2149 2149                  } else if (ct && !stopper) {
2150 2150                          start = 0;
2151 2151                  } else {
2152 2152                          /*
2153 2153                           * Don't free messages past the first reader.
2154 2154                           */
2155 2155                          break;
2156 2156                  }
2157 2157          }
2158 2158  }
2159 2159  
2160 2160  /*
2161 2161   * cte_queue_drain
2162 2162   *
2163 2163   * Drain all events from the specified queue, and mark it dead.  If
2164 2164   * "ack" is set, acknowledge any critical events we find along the
2165 2165   * way.
2166 2166   */
2167 2167  static void
2168 2168  cte_queue_drain(ct_equeue_t *q, int ack)
2169 2169  {
2170 2170          ct_kevent_t *e, *next;
2171 2171          ct_listener_t *l;
2172 2172  
2173 2173          mutex_enter(&q->ctq_lock);
2174 2174  
2175 2175          for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2176 2176                  next = list_next(&q->ctq_events, e);
2177 2177                  if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2178 2178                          /*
2179 2179                           * Make sure critical messages are eventually
2180 2180                           * removed from the bundle queues.
2181 2181                           */
2182 2182                          mutex_enter(&e->cte_lock);
2183 2183                          e->cte_flags |= CTE_ACK;
2184 2184                          mutex_exit(&e->cte_lock);
2185 2185                          ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2186 2186                          e->cte_contract->ct_evcnt--;
2187 2187                  }
2188 2188                  list_remove(&q->ctq_events, e);
2189 2189                  e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2190 2190                  e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2191 2191                  e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2192 2192                  cte_rele(e);
2193 2193          }
2194 2194  
2195 2195          /*
2196 2196           * This is necessary only because of CTEL_PBUNDLE listeners;
2197 2197           * the events they point to can move from one pbundle to
2198 2198           * another.  Fortunately, this only happens if the contract is
2199 2199           * inherited, which (in turn) only happens if the process
2200 2200           * exits, which means it's an all-or-nothing deal.  If this
2201 2201           * wasn't the case, we would instead need to keep track of
2202 2202           * listeners on a per-event basis, not just a per-queue basis.
2203 2203           * This would have the side benefit of letting us clean up
2204 2204           * trimmed events sooner (i.e. immediately), but would
2205 2205           * unfortunately make events even bigger than they already
2206 2206           * are.
2207 2207           */
2208 2208          for (l = list_head(&q->ctq_listeners); l;
2209 2209              l = list_next(&q->ctq_listeners, l)) {
2210 2210                  l->ctl_flags |= CTLF_DEAD;
2211 2211                  if (l->ctl_position) {
2212 2212                          l->ctl_position = NULL;
2213 2213                          list_insert_tail(&q->ctq_tail, l);
2214 2214                  }
2215 2215                  cv_broadcast(&l->ctl_cv);
2216 2216          }
2217 2217  
2218 2218          /*
2219 2219           * Disallow events.
2220 2220           */
2221 2221          q->ctq_flags |= CTQ_DEAD;
2222 2222  
2223 2223          /*
2224 2224           * If we represent the last reference to a reference counted
2225 2225           * process bundle queue, free it.
2226 2226           */
2227 2227          if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2228 2228                  cte_queue_destroy(q);
2229 2229          else
2230 2230                  mutex_exit(&q->ctq_lock);
2231 2231  }
2232 2232  
2233 2233  /*
2234 2234   * cte_publish
2235 2235   *
2236 2236   * Publishes an event to a specific queue.  Only called by
2237 2237   * cte_publish_all.
2238 2238   */
2239 2239  static void
2240 2240  cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2241 2241  {
2242 2242          ASSERT(MUTEX_HELD(&q->ctq_lock));
2243 2243  
2244 2244          q->ctq_atime = *tsp;
2245 2245  
2246 2246          /*
2247 2247           * If this event may already exist on this queue, check to see if it
2248 2248           * is already there and return if so.
2249 2249           */
2250 2250          if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2251 2251              q->ctq_events.list_offset))) {
2252 2252                  mutex_exit(&q->ctq_lock);
2253 2253                  cte_rele(e);
2254 2254                  return;
2255 2255          }
2256 2256  
2257 2257          /*
2258 2258           * Don't publish if the event is informative and there aren't
2259 2259           * any listeners, or if the queue has been shut down.
2260 2260           */
2261 2261          if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2262 2262              (q->ctq_flags & CTQ_DEAD)) {
2263 2263                  mutex_exit(&q->ctq_lock);
2264 2264                  cte_rele(e);
2265 2265                  return;
2266 2266          }
2267 2267  
2268 2268          /*
2269 2269           * Enqueue event
2270 2270           */
2271 2271          VERIFY(!list_link_active((list_node_t *)
2272 2272              ((uintptr_t)e + q->ctq_events.list_offset)));
2273 2273          list_insert_tail(&q->ctq_events, e);
2274 2274  
2275 2275          /*
2276 2276           * Check for waiting listeners
2277 2277           */
2278 2278          cte_qwakeup(q, e);
2279 2279  
2280 2280          /*
2281 2281           * Trim unnecessary events from the queue.
2282 2282           */
2283 2283          cte_trim(q, NULL);
2284 2284          mutex_exit(&q->ctq_lock);
2285 2285  }
2286 2286  
2287 2287  /*
2288 2288   * cte_publish_all
2289 2289   *
2290 2290   * Publish an event to all necessary event queues.  The event, e, must
2291 2291   * be zallocated by the caller, and the event's flags and type must be
2292 2292   * set.  The rest of the event's fields are initialized here.
2293 2293   */
2294 2294  uint64_t
2295 2295  cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2296 2296  {
2297 2297          ct_equeue_t *q;
2298 2298          timespec_t ts;
2299 2299          uint64_t evid;
2300 2300          ct_kevent_t *negev;
2301 2301          int negend;
2302 2302  
2303 2303          e->cte_contract = ct;
2304 2304          e->cte_data = data;
2305 2305          e->cte_gdata = gdata;
2306 2306          e->cte_refs = 3;
2307 2307          evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid);
2308 2308          contract_hold(ct);
2309 2309  
2310 2310          /*
2311 2311           * For a negotiation event we set the ct->ct_nevent field of the
2312 2312           * contract for the duration of the negotiation
2313 2313           */
2314 2314          negend = 0;
2315 2315          if (e->cte_flags & CTE_NEG) {
2316 2316                  cte_hold(e);
2317 2317                  ct->ct_nevent = e;
2318 2318          } else if (e->cte_type == CT_EV_NEGEND) {
2319 2319                  negend = 1;
2320 2320          }
2321 2321  
2322 2322          gethrestime(&ts);
2323 2323  
2324 2324          /*
2325 2325           * ct_evtlock simply (and only) ensures that two events sent
2326 2326           * from the same contract are delivered to all queues in the
2327 2327           * same order.
2328 2328           */
2329 2329          mutex_enter(&ct->ct_evtlock);
2330 2330  
2331 2331          /*
2332 2332           * CTEL_CONTRACT - First deliver to the contract queue, acking
2333 2333           * the event if the contract has been orphaned.
2334 2334           */
2335 2335          mutex_enter(&ct->ct_lock);
2336 2336          mutex_enter(&ct->ct_events.ctq_lock);
2337 2337          if ((e->cte_flags & CTE_INFO) == 0) {
2338 2338                  if (ct->ct_state >= CTS_ORPHAN)
2339 2339                          e->cte_flags |= CTE_ACK;
2340 2340                  else
2341 2341                          ct->ct_evcnt++;
2342 2342          }
2343 2343          mutex_exit(&ct->ct_lock);
2344 2344          cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2345 2345  
2346 2346          /*
2347 2347           * CTEL_BUNDLE - Next deliver to the contract type's bundle
2348 2348           * queue.
2349 2349           */
2350 2350          mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2351 2351          cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2352 2352  
2353 2353          /*
2354 2354           * CTEL_PBUNDLE - Finally, if the contract has an owner,
2355 2355           * deliver to the owner's process bundle queue.
2356 2356           */
2357 2357          mutex_enter(&ct->ct_lock);
2358 2358          if (ct->ct_owner) {
2359 2359                  /*
2360 2360                   * proc_exit doesn't free event queues until it has
2361 2361                   * abandoned all contracts.
2362 2362                   */
2363 2363                  ASSERT(ct->ct_owner->p_ct_equeue);
2364 2364                  ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2365 2365                  q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2366 2366                  mutex_enter(&q->ctq_lock);
2367 2367                  mutex_exit(&ct->ct_lock);
2368 2368  
2369 2369                  /*
2370 2370                   * It is possible for this code to race with adoption; we
2371 2371                   * publish the event indicating that the event may already
2372 2372                   * be enqueued because adoption beat us to it (in which case
2373 2373                   * cte_pubish() does nothing).
2374 2374                   */
2375 2375                  cte_publish(q, e, &ts, B_TRUE);
2376 2376          } else {
2377 2377                  mutex_exit(&ct->ct_lock);
2378 2378                  cte_rele(e);
2379 2379          }
2380 2380  
2381 2381          if (negend) {
2382 2382                  mutex_enter(&ct->ct_lock);
2383 2383                  negev = ct->ct_nevent;
2384 2384                  ct->ct_nevent = NULL;
2385 2385                  cte_rele(negev);
2386 2386                  mutex_exit(&ct->ct_lock);
2387 2387          }
2388 2388  
2389 2389          mutex_exit(&ct->ct_evtlock);
2390 2390  
2391 2391          return (evid);
2392 2392  }
2393 2393  
2394 2394  /*
2395 2395   * cte_add_listener
2396 2396   *
2397 2397   * Add a new listener to an event queue.
2398 2398   */
2399 2399  void
2400 2400  cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2401 2401  {
2402 2402          cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2403 2403          l->ctl_equeue = q;
2404 2404          l->ctl_position = NULL;
2405 2405          l->ctl_flags = 0;
2406 2406  
2407 2407          mutex_enter(&q->ctq_lock);
2408 2408          list_insert_head(&q->ctq_tail, l);
2409 2409          list_insert_head(&q->ctq_listeners, l);
2410 2410          q->ctq_nlisteners++;
2411 2411          mutex_exit(&q->ctq_lock);
2412 2412  }
2413 2413  
2414 2414  /*
2415 2415   * cte_remove_listener
2416 2416   *
2417 2417   * Remove a listener from an event queue.  No other queue activities
2418 2418   * (e.g. cte_get event) may be in progress at this endpoint when this
2419 2419   * is called.
2420 2420   */
2421 2421  void
2422 2422  cte_remove_listener(ct_listener_t *l)
2423 2423  {
2424 2424          ct_equeue_t *q = l->ctl_equeue;
2425 2425          ct_kevent_t *e;
2426 2426  
2427 2427          mutex_enter(&q->ctq_lock);
2428 2428  
2429 2429          ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2430 2430  
2431 2431          if ((e = l->ctl_position) != NULL)
2432 2432                  cte_qrele(q, l, e);
2433 2433          else
2434 2434                  list_remove(&q->ctq_tail, l);
2435 2435          l->ctl_position = NULL;
2436 2436  
2437 2437          q->ctq_nlisteners--;
2438 2438          list_remove(&q->ctq_listeners, l);
2439 2439  
2440 2440          if (l->ctl_flags & CTLF_RELIABLE)
2441 2441                  q->ctq_nreliable--;
2442 2442  
2443 2443          /*
2444 2444           * If we are a the last listener of a dead reference counted
2445 2445           * queue (i.e. a process bundle) we free it.  Otherwise we just
2446 2446           * trim any events which may have been kept around for our
2447 2447           * benefit.
2448 2448           */
2449 2449          if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2450 2450              (q->ctq_nlisteners == 0)) {
2451 2451                  cte_queue_destroy(q);
2452 2452          } else {
2453 2453                  cte_trim(q, NULL);
2454 2454                  mutex_exit(&q->ctq_lock);
2455 2455          }
2456 2456  }
2457 2457  
2458 2458  /*
2459 2459   * cte_reset_listener
2460 2460   *
2461 2461   * Moves a listener's queue pointer to the beginning of the queue.
2462 2462   */
2463 2463  void
2464 2464  cte_reset_listener(ct_listener_t *l)
2465 2465  {
2466 2466          ct_equeue_t *q = l->ctl_equeue;
2467 2467  
2468 2468          mutex_enter(&q->ctq_lock);
2469 2469  
2470 2470          /*
2471 2471           * We allow an asynchronous reset because it doesn't make a
2472 2472           * whole lot of sense to make reset block or fail.  We already
2473 2473           * have most of the mechanism needed thanks to queue trimming,
2474 2474           * so implementing it isn't a big deal.
2475 2475           */
2476 2476          if (l->ctl_flags & CTLF_COPYOUT)
2477 2477                  l->ctl_flags |= CTLF_RESET;
2478 2478  
2479 2479          (void) cte_qmove(q, l, list_head(&q->ctq_events));
2480 2480  
2481 2481          /*
2482 2482           * Inform blocked readers.
2483 2483           */
2484 2484          cv_broadcast(&l->ctl_cv);
2485 2485          pollwakeup(&l->ctl_pollhead, POLLIN);
2486 2486          mutex_exit(&q->ctq_lock);
2487 2487  }
2488 2488  
2489 2489  /*
2490 2490   * cte_next_event
2491 2491   *
2492 2492   * Moves the event pointer for the specified listener to the next event
2493 2493   * on the queue.  To avoid races, this movement only occurs if the
2494 2494   * specified event id matches that of the current event.  This is used
2495 2495   * primarily to skip events that have been read but whose extended data
2496 2496   * haven't been copied out.
2497 2497   */
2498 2498  int
2499 2499  cte_next_event(ct_listener_t *l, uint64_t id)
2500 2500  {
2501 2501          ct_equeue_t *q = l->ctl_equeue;
2502 2502          ct_kevent_t *old;
2503 2503  
2504 2504          mutex_enter(&q->ctq_lock);
2505 2505  
2506 2506          if (l->ctl_flags & CTLF_COPYOUT)
2507 2507                  l->ctl_flags |= CTLF_RESET;
2508 2508  
2509 2509          if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2510 2510                  (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2511 2511  
2512 2512          mutex_exit(&q->ctq_lock);
2513 2513  
2514 2514          return (0);
2515 2515  }
2516 2516  
2517 2517  /*
2518 2518   * cte_get_event
2519 2519   *
2520 2520   * Reads an event from an event endpoint.  If "nonblock" is clear, we
2521 2521   * block until a suitable event is ready.  If "crit" is set, we only
2522 2522   * read critical events.  Note that while "cr" is the caller's cred,
2523 2523   * "zuniqid" is the unique id of the zone the calling contract
2524 2524   * filesystem was mounted in.
2525 2525   */
2526 2526  int
2527 2527  cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2528 2528      uint64_t zuniqid, int crit)
2529 2529  {
2530 2530          ct_equeue_t *q = l->ctl_equeue;
2531 2531          ct_kevent_t *temp;
2532 2532          int result = 0;
2533 2533          int partial = 0;
2534 2534          size_t size, gsize, len;
2535 2535          model_t mdl = get_udatamodel();
2536 2536          STRUCT_DECL(ct_event, ev);
2537 2537          STRUCT_INIT(ev, mdl);
2538 2538  
2539 2539          /*
2540 2540           * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2541 2541           * that there exists, and we are pointing to, an appropriate
2542 2542           * event.  It may temporarily drop ctq_lock, but that doesn't
2543 2543           * really matter to us.
2544 2544           */
2545 2545          mutex_enter(&q->ctq_lock);
2546 2546          while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2547 2547                  if (nonblock) {
2548 2548                          result = EAGAIN;
2549 2549                          goto error;
2550 2550                  }
2551 2551                  if (q->ctq_flags & CTQ_DEAD) {
2552 2552                          result = EIDRM;
2553 2553                          goto error;
2554 2554                  }
2555 2555                  result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2556 2556                  if (result == 0) {
2557 2557                          result = EINTR;
2558 2558                          goto error;
2559 2559                  }
2560 2560          }
2561 2561          temp = l->ctl_position;
2562 2562          cte_hold(temp);
2563 2563          l->ctl_flags |= CTLF_COPYOUT;
2564 2564          mutex_exit(&q->ctq_lock);
2565 2565  
2566 2566          /*
2567 2567           * We now have an event.  Copy in the user event structure to
2568 2568           * see how much space we have to work with.
2569 2569           */
2570 2570          result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2571 2571          if (result)
2572 2572                  goto copyerr;
2573 2573  
2574 2574          /*
2575 2575           * Determine what data we have and what the user should be
2576 2576           * allowed to see.
2577 2577           */
2578 2578          size = gsize = 0;
2579 2579          if (temp->cte_data) {
2580 2580                  VERIFY(nvlist_size(temp->cte_data, &size,
2581 2581                      NV_ENCODE_NATIVE) == 0);
2582 2582                  ASSERT(size != 0);
2583 2583          }
2584 2584          if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2585 2585                  VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2586 2586                      NV_ENCODE_NATIVE) == 0);
2587 2587                  ASSERT(gsize != 0);
2588 2588          }
2589 2589  
2590 2590          /*
2591 2591           * If we have enough space, copy out the extended event data.
2592 2592           */
2593 2593          len = size + gsize;
2594 2594          if (len) {
2595 2595                  if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2596 2596                          char *buf = kmem_alloc(len, KM_SLEEP);
2597 2597  
2598 2598                          if (size)
2599 2599                                  VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2600 2600                                      NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2601 2601                          if (gsize) {
2602 2602                                  char *tmp = buf + size;
2603 2603  
2604 2604                                  VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2605 2605                                      &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2606 2606                          }
2607 2607  
2608 2608                          /* This shouldn't have changed */
2609 2609                          ASSERT(size + gsize == len);
2610 2610                          result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2611 2611                              len);
2612 2612                          kmem_free(buf, len);
2613 2613                          if (result)
2614 2614                                  goto copyerr;
2615 2615                  } else {
2616 2616                          partial = 1;
2617 2617                  }
2618 2618          }
2619 2619  
2620 2620          /*
2621 2621           * Copy out the common event data.
2622 2622           */
2623 2623          STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2624 2624          STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2625 2625          STRUCT_FSET(ev, ctev_cttype,
2626 2626              temp->cte_contract->ct_type->ct_type_index);
2627 2627          STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2628 2628              (CTE_ACK|CTE_INFO|CTE_NEG));
2629 2629          STRUCT_FSET(ev, ctev_type, temp->cte_type);
2630 2630          STRUCT_FSET(ev, ctev_nbytes, len);
2631 2631          STRUCT_FSET(ev, ctev_goffset, size);
2632 2632          result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2633 2633  
2634 2634  copyerr:
2635 2635          /*
2636 2636           * Only move our location in the queue if all copyouts were
2637 2637           * successful, the caller provided enough space for the entire
2638 2638           * event, and our endpoint wasn't reset or otherwise moved by
2639 2639           * another thread.
2640 2640           */
2641 2641          mutex_enter(&q->ctq_lock);
2642 2642          if (result)
2643 2643                  result = EFAULT;
2644 2644          else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2645 2645              (l->ctl_position == temp))
2646 2646                  (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2647 2647          l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2648 2648          /*
2649 2649           * Signal any readers blocked on our CTLF_COPYOUT.
2650 2650           */
2651 2651          cv_signal(&l->ctl_cv);
2652 2652          cte_rele(temp);
2653 2653  
2654 2654  error:
2655 2655          mutex_exit(&q->ctq_lock);
2656 2656          return (result);
2657 2657  }
2658 2658  
2659 2659  /*
2660 2660   * cte_set_reliable
2661 2661   *
2662 2662   * Requests that events be reliably delivered to an event endpoint.
2663 2663   * Unread informative and acknowledged critical events will not be
2664 2664   * removed from the queue until this listener reads or skips them.
2665 2665   * Because a listener could maliciously request reliable delivery and
2666 2666   * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2667 2667   * caller's effective set.
2668 2668   */
2669 2669  int
2670 2670  cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2671 2671  {
2672 2672          ct_equeue_t *q = l->ctl_equeue;
2673 2673          int error;
2674 2674  
2675 2675          if ((error = secpolicy_contract_event(cr)) != 0)
2676 2676                  return (error);
2677 2677  
2678 2678          mutex_enter(&q->ctq_lock);
2679 2679          if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2680 2680                  l->ctl_flags |= CTLF_RELIABLE;
2681 2681                  q->ctq_nreliable++;
2682 2682                  if (l->ctl_position != NULL)
2683 2683                          l->ctl_position->cte_nodes[q->ctq_listno].
2684 2684                              ctm_nreliable++;
2685 2685          }
2686 2686          mutex_exit(&q->ctq_lock);
2687 2687  
2688 2688          return (0);
2689 2689  }
  
    | 
      ↓ open down ↓ | 
    2689 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX