Print this page
    
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/os/lwp.c
          +++ new/usr/src/uts/common/os/lwp.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*
  28   28   * Copyright 2016, Joyent, Inc.
  29   29   */
  30   30  
  31   31  #include <sys/param.h>
  32   32  #include <sys/types.h>
  33   33  #include <sys/sysmacros.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/thread.h>
  36   36  #include <sys/proc.h>
  37   37  #include <sys/task.h>
  38   38  #include <sys/project.h>
  39   39  #include <sys/signal.h>
  40   40  #include <sys/errno.h>
  41   41  #include <sys/vmparam.h>
  42   42  #include <sys/stack.h>
  43   43  #include <sys/procfs.h>
  44   44  #include <sys/prsystm.h>
  45   45  #include <sys/cpuvar.h>
  46   46  #include <sys/kmem.h>
  47   47  #include <sys/vtrace.h>
  48   48  #include <sys/door.h>
  49   49  #include <vm/seg_kp.h>
  50   50  #include <sys/debug.h>
  51   51  #include <sys/tnf.h>
  52   52  #include <sys/schedctl.h>
  53   53  #include <sys/poll.h>
  54   54  #include <sys/copyops.h>
  55   55  #include <sys/lwp_upimutex_impl.h>
  56   56  #include <sys/cpupart.h>
  57   57  #include <sys/lgrp.h>
  58   58  #include <sys/rctl.h>
  59   59  #include <sys/contract_impl.h>
  60   60  #include <sys/contract/process.h>
  61   61  #include <sys/contract/process_impl.h>
  62   62  #include <sys/cpc_impl.h>
  63   63  #include <sys/sdt.h>
  64   64  #include <sys/cmn_err.h>
  65   65  #include <sys/brand.h>
  66   66  #include <sys/cyclic.h>
  67   67  #include <sys/pool.h>
  68   68  
  69   69  /* hash function for the lwpid hash table, p->p_tidhash[] */
  70   70  #define TIDHASH(tid, hash_sz)   ((tid) & ((hash_sz) - 1))
  71   71  
  72   72  void *segkp_lwp;                /* cookie for pool of segkp resources */
  73   73  extern void reapq_move_lq_to_tq(kthread_t *);
  74   74  extern void freectx_ctx(struct ctxop *);
  75   75  
  76   76  /*
  77   77   * Create a kernel thread associated with a particular system process.  Give
  78   78   * it an LWP so that microstate accounting will be available for it.
  79   79   */
  80   80  kthread_t *
  81   81  lwp_kernel_create(proc_t *p, void (*proc)(), void *arg, int state, pri_t pri)
  82   82  {
  83   83          klwp_t *lwp;
  84   84  
  85   85          VERIFY((p->p_flag & SSYS) != 0);
  86   86  
  87   87          lwp = lwp_create(proc, arg, 0, p, state, pri, &t0.t_hold, syscid, 0);
  88   88  
  89   89          VERIFY(lwp != NULL);
  90   90  
  91   91          return (lwptot(lwp));
  92   92  }
  93   93  
  94   94  /*
  95   95   * Create a thread that appears to be stopped at sys_rtt.
  96   96   */
  97   97  klwp_t *
  98   98  lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
  99   99      int state, int pri, const k_sigset_t *smask, int cid, id_t lwpid)
 100  100  {
 101  101          klwp_t *lwp = NULL;
 102  102          kthread_t *t;
 103  103          kthread_t *tx;
 104  104          cpupart_t *oldpart = NULL;
 105  105          size_t  stksize;
 106  106          caddr_t lwpdata = NULL;
 107  107          processorid_t   binding;
 108  108          int err = 0;
 109  109          kproject_t *oldkpj, *newkpj;
 110  110          void *bufp = NULL;
 111  111          klwp_t *curlwp;
 112  112          lwpent_t *lep;
 113  113          lwpdir_t *old_dir = NULL;
 114  114          uint_t old_dirsz = 0;
 115  115          tidhash_t *old_hash = NULL;
 116  116          uint_t old_hashsz = 0;
 117  117          ret_tidhash_t *ret_tidhash = NULL;
 118  118          int i;
 119  119          int rctlfail = 0;
 120  120          void *brand_data = NULL;
 121  121          struct ctxop *ctx = NULL;
 122  122  
 123  123          ASSERT(cid != sysdccid);        /* system threads must start in SYS */
 124  124  
 125  125          ASSERT(p != &p0);               /* No new LWPs in p0. */
 126  126  
 127  127          mutex_enter(&p->p_lock);
 128  128          mutex_enter(&p->p_zone->zone_nlwps_lock);
 129  129          /*
 130  130           * don't enforce rctl limits on system processes
 131  131           */
 132  132          if (!CLASS_KERNEL(cid)) {
 133  133                  if (p->p_task->tk_nlwps >= p->p_task->tk_nlwps_ctl)
 134  134                          if (rctl_test(rc_task_lwps, p->p_task->tk_rctls, p,
 135  135                              1, 0) & RCT_DENY)
 136  136                                  rctlfail = 1;
 137  137                  if (p->p_task->tk_proj->kpj_nlwps >=
 138  138                      p->p_task->tk_proj->kpj_nlwps_ctl)
 139  139                          if (rctl_test(rc_project_nlwps,
 140  140                              p->p_task->tk_proj->kpj_rctls, p, 1, 0)
 141  141                              & RCT_DENY)
 142  142                                  rctlfail = 1;
 143  143                  if (p->p_zone->zone_nlwps >= p->p_zone->zone_nlwps_ctl)
 144  144                          if (rctl_test(rc_zone_nlwps, p->p_zone->zone_rctls, p,
 145  145                              1, 0) & RCT_DENY)
 146  146                                  rctlfail = 1;
 147  147          }
 148  148          if (rctlfail) {
 149  149                  mutex_exit(&p->p_zone->zone_nlwps_lock);
 150  150                  mutex_exit(&p->p_lock);
 151  151                  atomic_inc_32(&p->p_zone->zone_ffcap);
 152  152                  return (NULL);
 153  153          }
 154  154          p->p_task->tk_nlwps++;
 155  155          p->p_task->tk_proj->kpj_nlwps++;
 156  156          p->p_zone->zone_nlwps++;
 157  157          mutex_exit(&p->p_zone->zone_nlwps_lock);
 158  158          mutex_exit(&p->p_lock);
 159  159  
 160  160          curlwp = ttolwp(curthread);
 161  161          if (curlwp == NULL || (stksize = curlwp->lwp_childstksz) == 0)
 162  162                  stksize = lwp_default_stksize;
 163  163  
 164  164          if (CLASS_KERNEL(cid)) {
 165  165                  /*
 166  166                   * Since we are creating an LWP in an SSYS process, we do not
 167  167                   * inherit anything from the current thread's LWP.  We set
 168  168                   * stksize and lwpdata to 0 in order to let thread_create()
 169  169                   * allocate a regular kernel thread stack for this thread.
 170  170                   */
 171  171                  curlwp = NULL;
 172  172                  stksize = 0;
 173  173                  lwpdata = NULL;
 174  174  
 175  175          } else if (stksize == lwp_default_stksize) {
 176  176                  /*
 177  177                   * Try to reuse an <lwp,stack> from the LWP deathrow.
 178  178                   */
 179  179                  if (lwp_reapcnt > 0) {
 180  180                          mutex_enter(&reaplock);
 181  181                          if ((t = lwp_deathrow) != NULL) {
 182  182                                  ASSERT(t->t_swap);
 183  183                                  lwp_deathrow = t->t_forw;
 184  184                                  lwp_reapcnt--;
 185  185                                  lwpdata = t->t_swap;
 186  186                                  lwp = t->t_lwp;
 187  187                                  ctx = t->t_ctx;
 188  188                                  t->t_swap = NULL;
 189  189                                  t->t_lwp = NULL;
 190  190                                  t->t_ctx = NULL;
 191  191                                  reapq_move_lq_to_tq(t);
 192  192                          }
 193  193                          mutex_exit(&reaplock);
 194  194                          if (lwp != NULL) {
 195  195                                  lwp_stk_fini(lwp);
 196  196                          }
 197  197                          if (ctx != NULL) {
 198  198                                  freectx_ctx(ctx);
 199  199                          }
 200  200                  }
 201  201                  if (lwpdata == NULL &&
 202  202                      (lwpdata = (caddr_t)segkp_cache_get(segkp_lwp)) == NULL) {
 203  203                          mutex_enter(&p->p_lock);
 204  204                          mutex_enter(&p->p_zone->zone_nlwps_lock);
 205  205                          p->p_task->tk_nlwps--;
 206  206                          p->p_task->tk_proj->kpj_nlwps--;
 207  207                          p->p_zone->zone_nlwps--;
 208  208                          mutex_exit(&p->p_zone->zone_nlwps_lock);
 209  209                          mutex_exit(&p->p_lock);
 210  210                          atomic_inc_32(&p->p_zone->zone_ffnomem);
 211  211                          return (NULL);
 212  212                  }
 213  213          } else {
 214  214                  stksize = roundup(stksize, PAGESIZE);
 215  215                  if ((lwpdata = (caddr_t)segkp_get(segkp, stksize,
 216  216                      (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED))) == NULL) {
 217  217                          mutex_enter(&p->p_lock);
 218  218                          mutex_enter(&p->p_zone->zone_nlwps_lock);
 219  219                          p->p_task->tk_nlwps--;
 220  220                          p->p_task->tk_proj->kpj_nlwps--;
 221  221                          p->p_zone->zone_nlwps--;
 222  222                          mutex_exit(&p->p_zone->zone_nlwps_lock);
 223  223                          mutex_exit(&p->p_lock);
 224  224                          atomic_inc_32(&p->p_zone->zone_ffnomem);
 225  225                          return (NULL);
 226  226                  }
 227  227          }
 228  228  
 229  229          /*
 230  230           * Create a thread, initializing the stack pointer
 231  231           */
 232  232          t = thread_create(lwpdata, stksize, NULL, NULL, 0, p, TS_STOPPED, pri);
 233  233  
 234  234          /*
 235  235           * If a non-NULL stack base is passed in, thread_create() assumes
 236  236           * that the stack might be statically allocated (as opposed to being
 237  237           * allocated from segkp), and so it does not set t_swap.  Since
 238  238           * the lwpdata was allocated from segkp, we must set t_swap to point
 239  239           * to it ourselves.
 240  240           *
 241  241           * This would be less confusing if t_swap had a better name; it really
 242  242           * indicates that the stack is allocated from segkp, regardless of
 243  243           * whether or not it is swappable.
 244  244           */
 245  245          if (lwpdata != NULL) {
 246  246                  ASSERT(!CLASS_KERNEL(cid));
 247  247                  ASSERT(t->t_swap == NULL);
 248  248                  t->t_swap = lwpdata;    /* Start of page-able data */
 249  249          }
 250  250  
 251  251          /*
 252  252           * If the stack and lwp can be reused, mark the thread as such.
 253  253           * When we get to reapq_add() from resume_from_zombie(), these
 254  254           * threads will go onto lwp_deathrow instead of thread_deathrow.
 255  255           */
 256  256          if (!CLASS_KERNEL(cid) && stksize == lwp_default_stksize)
 257  257                  t->t_flag |= T_LWPREUSE;
 258  258  
 259  259          if (lwp == NULL)
 260  260                  lwp = kmem_cache_alloc(lwp_cache, KM_SLEEP);
 261  261          bzero(lwp, sizeof (*lwp));
 262  262          t->t_lwp = lwp;
 263  263  
 264  264          t->t_hold = *smask;
 265  265          lwp->lwp_thread = t;
 266  266          lwp->lwp_procp = p;
 267  267          lwp->lwp_sigaltstack.ss_flags = SS_DISABLE;
 268  268          if (curlwp != NULL && curlwp->lwp_childstksz != 0)
 269  269                  lwp->lwp_childstksz = curlwp->lwp_childstksz;
 270  270  
 271  271          t->t_stk = lwp_stk_init(lwp, t->t_stk);
 272  272          thread_load(t, proc, arg, len);
 273  273  
 274  274          /*
 275  275           * Allocate the SIGPROF buffer if ITIMER_REALPROF is in effect.
 276  276           */
 277  277          if (p->p_rprof_cyclic != CYCLIC_NONE)
 278  278                  t->t_rprof = kmem_zalloc(sizeof (struct rprof), KM_SLEEP);
 279  279  
 280  280          if (cid != NOCLASS)
 281  281                  (void) CL_ALLOC(&bufp, cid, KM_SLEEP);
 282  282  
 283  283          /*
 284  284           * Allocate an lwp directory entry for the new lwp.
 285  285           */
 286  286          lep = kmem_zalloc(sizeof (*lep), KM_SLEEP);
 287  287  
 288  288          /*
 289  289           * If necessary, speculatively allocate lwp brand data.  This is done
 290  290           * ahead of time so p_lock need not be dropped during lwp branding.
 291  291           */
 292  292          if (PROC_IS_BRANDED(p) && BROP(p)->b_lwpdata_alloc != NULL) {
 293  293                  if ((brand_data = BROP(p)->b_lwpdata_alloc(p)) == NULL) {
 294  294                          mutex_enter(&p->p_lock);
 295  295                          err = 1;
 296  296                          atomic_inc_32(&p->p_zone->zone_ffmisc);
 297  297                          goto error;
 298  298                  }
 299  299          }
 300  300  
 301  301          mutex_enter(&p->p_lock);
 302  302  grow:
 303  303          /*
 304  304           * Grow the lwp (thread) directory and lwpid hash table if necessary.
 305  305           * A note on the growth algorithm:
 306  306           *      The new lwp directory size is computed as:
 307  307           *              new = 2 * old + 2
 308  308           *      Starting with an initial size of 2 (see exec_common()),
 309  309           *      this yields numbers that are a power of two minus 2:
 310  310           *              2, 6, 14, 30, 62, 126, 254, 510, 1022, ...
 311  311           *      The size of the lwpid hash table must be a power of two
 312  312           *      and must be commensurate in size with the lwp directory
 313  313           *      so that hash bucket chains remain short.  Therefore,
 314  314           *      the lwpid hash table size is computed as:
 315  315           *              hashsz = (dirsz + 2) / 2
 316  316           *      which leads to these hash table sizes corresponding to
 317  317           *      the above directory sizes:
 318  318           *              2, 4, 8, 16, 32, 64, 128, 256, 512, ...
 319  319           * A note on growing the hash table:
 320  320           *      For performance reasons, code in lwp_unpark() does not
 321  321           *      acquire curproc->p_lock when searching the hash table.
 322  322           *      Rather, it calls lwp_hash_lookup_and_lock() which
 323  323           *      acquires only the individual hash bucket lock, taking
 324  324           *      care to deal with reallocation of the hash table
 325  325           *      during the time it takes to acquire the lock.
 326  326           *
 327  327           *      This is sufficient to protect the integrity of the
 328  328           *      hash table, but it requires us to acquire all of the
 329  329           *      old hash bucket locks before growing the hash table
 330  330           *      and to release them afterwards.  It also requires us
 331  331           *      not to free the old hash table because some thread
 332  332           *      in lwp_hash_lookup_and_lock() might still be trying
 333  333           *      to acquire the old bucket lock.
 334  334           *
 335  335           *      So we adopt the tactic of keeping all of the retired
 336  336           *      hash tables on a linked list, so they can be safely
 337  337           *      freed when the process exits or execs.
 338  338           *
 339  339           *      Because the hash table grows in powers of two, the
 340  340           *      total size of all of the hash tables will be slightly
 341  341           *      less than twice the size of the largest hash table.
 342  342           */
 343  343          while (p->p_lwpfree == NULL) {
 344  344                  uint_t dirsz = p->p_lwpdir_sz;
 345  345                  lwpdir_t *new_dir;
 346  346                  uint_t new_dirsz;
 347  347                  lwpdir_t *ldp;
 348  348                  tidhash_t *new_hash;
 349  349                  uint_t new_hashsz;
 350  350  
 351  351                  mutex_exit(&p->p_lock);
 352  352  
 353  353                  /*
 354  354                   * Prepare to remember the old p_tidhash for later
 355  355                   * kmem_free()ing when the process exits or execs.
 356  356                   */
 357  357                  if (ret_tidhash == NULL)
 358  358                          ret_tidhash = kmem_zalloc(sizeof (ret_tidhash_t),
 359  359                              KM_SLEEP);
 360  360                  if (old_dir != NULL)
 361  361                          kmem_free(old_dir, old_dirsz * sizeof (*old_dir));
 362  362                  if (old_hash != NULL)
 363  363                          kmem_free(old_hash, old_hashsz * sizeof (*old_hash));
 364  364  
 365  365                  new_dirsz = 2 * dirsz + 2;
 366  366                  new_dir = kmem_zalloc(new_dirsz * sizeof (lwpdir_t), KM_SLEEP);
 367  367                  for (ldp = new_dir, i = 1; i < new_dirsz; i++, ldp++)
 368  368                          ldp->ld_next = ldp + 1;
 369  369                  new_hashsz = (new_dirsz + 2) / 2;
 370  370                  new_hash = kmem_zalloc(new_hashsz * sizeof (tidhash_t),
 371  371                      KM_SLEEP);
 372  372  
 373  373                  mutex_enter(&p->p_lock);
 374  374                  if (p == curproc)
 375  375                          prbarrier(p);
 376  376  
 377  377                  if (dirsz != p->p_lwpdir_sz || p->p_lwpfree != NULL) {
 378  378                          /*
 379  379                           * Someone else beat us to it or some lwp exited.
 380  380                           * Set up to free our memory and take a lap.
 381  381                           */
 382  382                          old_dir = new_dir;
 383  383                          old_dirsz = new_dirsz;
 384  384                          old_hash = new_hash;
 385  385                          old_hashsz = new_hashsz;
 386  386                  } else {
 387  387                          /*
 388  388                           * For the benefit of lwp_hash_lookup_and_lock(),
 389  389                           * called from lwp_unpark(), which searches the
 390  390                           * tid hash table without acquiring p->p_lock,
 391  391                           * we must acquire all of the tid hash table
 392  392                           * locks before replacing p->p_tidhash.
 393  393                           */
 394  394                          old_hash = p->p_tidhash;
 395  395                          old_hashsz = p->p_tidhash_sz;
 396  396                          for (i = 0; i < old_hashsz; i++) {
 397  397                                  mutex_enter(&old_hash[i].th_lock);
 398  398                                  mutex_enter(&new_hash[i].th_lock);
 399  399                          }
 400  400  
 401  401                          /*
 402  402                           * We simply hash in all of the old directory entries.
 403  403                           * This works because the old directory has no empty
 404  404                           * slots and the new hash table starts out empty.
 405  405                           * This reproduces the original directory ordering
 406  406                           * (required for /proc directory semantics).
 407  407                           */
 408  408                          old_dir = p->p_lwpdir;
 409  409                          old_dirsz = p->p_lwpdir_sz;
 410  410                          p->p_lwpdir = new_dir;
 411  411                          p->p_lwpfree = new_dir;
 412  412                          p->p_lwpdir_sz = new_dirsz;
 413  413                          for (ldp = old_dir, i = 0; i < old_dirsz; i++, ldp++)
 414  414                                  lwp_hash_in(p, ldp->ld_entry,
 415  415                                      new_hash, new_hashsz, 0);
 416  416  
 417  417                          /*
 418  418                           * Remember the old hash table along with all
 419  419                           * of the previously-remembered hash tables.
 420  420                           * We will free them at process exit or exec.
 421  421                           */
 422  422                          ret_tidhash->rth_tidhash = old_hash;
 423  423                          ret_tidhash->rth_tidhash_sz = old_hashsz;
 424  424                          ret_tidhash->rth_next = p->p_ret_tidhash;
 425  425                          p->p_ret_tidhash = ret_tidhash;
 426  426  
 427  427                          /*
 428  428                           * Now establish the new tid hash table.
 429  429                           * As soon as we assign p->p_tidhash,
 430  430                           * code in lwp_unpark() can start using it.
 431  431                           */
 432  432                          membar_producer();
 433  433                          p->p_tidhash = new_hash;
 434  434  
 435  435                          /*
 436  436                           * It is necessary that p_tidhash reach global
 437  437                           * visibility before p_tidhash_sz.  Otherwise,
 438  438                           * code in lwp_hash_lookup_and_lock() could
 439  439                           * index into the old p_tidhash using the new
 440  440                           * p_tidhash_sz and thereby access invalid data.
 441  441                           */
 442  442                          membar_producer();
 443  443                          p->p_tidhash_sz = new_hashsz;
 444  444  
 445  445                          /*
 446  446                           * Release the locks; allow lwp_unpark() to carry on.
 447  447                           */
 448  448                          for (i = 0; i < old_hashsz; i++) {
 449  449                                  mutex_exit(&old_hash[i].th_lock);
 450  450                                  mutex_exit(&new_hash[i].th_lock);
 451  451                          }
 452  452  
 453  453                          /*
 454  454                           * Avoid freeing these objects below.
 455  455                           */
 456  456                          ret_tidhash = NULL;
 457  457                          old_hash = NULL;
 458  458                          old_hashsz = 0;
 459  459                  }
 460  460          }
 461  461  
 462  462          /*
 463  463           * Block the process against /proc while we manipulate p->p_tlist,
 464  464           * unless lwp_create() was called by /proc for the PCAGENT operation.
 465  465           * We want to do this early enough so that we don't drop p->p_lock
 466  466           * until the thread is put on the p->p_tlist.
 467  467           */
 468  468          if (p == curproc) {
 469  469                  prbarrier(p);
 470  470                  /*
 471  471                   * If the current lwp has been requested to stop, do so now.
 472  472                   * Otherwise we have a race condition between /proc attempting
 473  473                   * to stop the process and this thread creating a new lwp
 474  474                   * that was not seen when the /proc PCSTOP request was issued.
 475  475                   * We rely on stop() to call prbarrier(p) before returning.
 476  476                   */
 477  477                  while ((curthread->t_proc_flag & TP_PRSTOP) &&
 478  478                      !ttolwp(curthread)->lwp_nostop) {
 479  479                          /*
 480  480                           * We called pool_barrier_enter() before calling
 481  481                           * here to lwp_create(). We have to call
 482  482                           * pool_barrier_exit() before stopping.
 483  483                           */
 484  484                          pool_barrier_exit();
 485  485                          prbarrier(p);
 486  486                          stop(PR_REQUESTED, 0);
 487  487                          /*
 488  488                           * And we have to repeat the call to
 489  489                           * pool_barrier_enter after stopping.
 490  490                           */
 491  491                          pool_barrier_enter();
 492  492                          prbarrier(p);
 493  493                  }
 494  494  
 495  495                  /*
 496  496                   * If process is exiting, there could be a race between
 497  497                   * the agent lwp creation and the new lwp currently being
 498  498                   * created. So to prevent this race lwp creation is failed
 499  499                   * if the process is exiting.
 500  500                   */
 501  501                  if (p->p_flag & (SEXITLWPS|SKILLED)) {
 502  502                          err = 1;
 503  503                          goto error;
 504  504                  }
 505  505  
 506  506                  /*
 507  507                   * Since we might have dropped p->p_lock, the
 508  508                   * lwp directory free list might have changed.
 509  509                   */
 510  510                  if (p->p_lwpfree == NULL)
 511  511                          goto grow;
 512  512          }
 513  513  
 514  514          kpreempt_disable();     /* can't grab cpu_lock here */
 515  515  
 516  516          /*
 517  517           * Inherit processor and processor set bindings from curthread.
 518  518           *
 519  519           * For kernel LWPs, we do not inherit processor set bindings at
 520  520           * process creation time (i.e. when p != curproc).  After the
 521  521           * kernel process is created, any subsequent LWPs must be created
 522  522           * by threads in the kernel process, at which point we *will*
 523  523           * inherit processor set bindings.
 524  524           */
 525  525          if (CLASS_KERNEL(cid) && p != curproc) {
 526  526                  t->t_bind_cpu = binding = PBIND_NONE;
 527  527                  t->t_cpupart = oldpart = &cp_default;
 528  528                  t->t_bind_pset = PS_NONE;
 529  529                  t->t_bindflag = (uchar_t)default_binding_mode;
 530  530          } else {
 531  531                  binding = curthread->t_bind_cpu;
 532  532                  t->t_bind_cpu = binding;
 533  533                  oldpart = t->t_cpupart;
 534  534                  t->t_cpupart = curthread->t_cpupart;
 535  535                  t->t_bind_pset = curthread->t_bind_pset;
 536  536                  t->t_bindflag = curthread->t_bindflag |
 537  537                      (uchar_t)default_binding_mode;
 538  538          }
 539  539  
 540  540          /*
 541  541           * thread_create() initializes this thread's home lgroup to the root.
 542  542           * Choose a more suitable lgroup, since this thread is associated
 543  543           * with an lwp.
 544  544           */
 545  545          ASSERT(oldpart != NULL);
 546  546          if (binding != PBIND_NONE && t->t_affinitycnt == 0) {
 547  547                  t->t_bound_cpu = cpu[binding];
 548  548                  if (t->t_lpl != t->t_bound_cpu->cpu_lpl)
 549  549                          lgrp_move_thread(t, t->t_bound_cpu->cpu_lpl, 1);
 550  550          } else if (CLASS_KERNEL(cid)) {
 551  551                  /*
 552  552                   * Kernel threads are always in the root lgrp.
 553  553                   */
 554  554                  lgrp_move_thread(t,
 555  555                      &t->t_cpupart->cp_lgrploads[LGRP_ROOTID], 1);
 556  556          } else {
 557  557                  lgrp_move_thread(t, lgrp_choose(t, t->t_cpupart), 1);
 558  558          }
 559  559  
 560  560          kpreempt_enable();
 561  561  
 562  562          /*
 563  563           * make sure lpl points to our own partition
 564  564           */
 565  565          ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads);
 566  566          ASSERT(t->t_lpl < t->t_cpupart->cp_lgrploads +
 567  567              t->t_cpupart->cp_nlgrploads);
 568  568  
 569  569          /*
 570  570           * It is safe to point the thread to the new project without holding it
 571  571           * since we're holding the target process' p_lock here and therefore
 572  572           * we're guaranteed that it will not move to another project.
 573  573           */
 574  574          newkpj = p->p_task->tk_proj;
 575  575          oldkpj = ttoproj(t);
 576  576          if (newkpj != oldkpj) {
 577  577                  t->t_proj = newkpj;
 578  578                  (void) project_hold(newkpj);
 579  579                  project_rele(oldkpj);
 580  580          }
 581  581  
 582  582          if (cid != NOCLASS) {
 583  583                  /*
 584  584                   * If the lwp is being created in the current process
 585  585                   * and matches the current thread's scheduling class,
 586  586                   * we should propagate the current thread's scheduling
 587  587                   * parameters by calling CL_FORK.  Otherwise just use
 588  588                   * the defaults by calling CL_ENTERCLASS.
 589  589                   */
 590  590                  if (p != curproc || curthread->t_cid != cid) {
 591  591                          err = CL_ENTERCLASS(t, cid, NULL, NULL, bufp);
 592  592                          t->t_pri = pri; /* CL_ENTERCLASS may have changed it */
 593  593                          /*
 594  594                           * We don't call schedctl_set_cidpri(t) here
 595  595                           * because the schedctl data is not yet set
 596  596                           * up for the newly-created lwp.
 597  597                           */
 598  598                  } else {
 599  599                          t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 600  600                          err = CL_FORK(curthread, t, bufp);
 601  601                          t->t_cid = cid;
 602  602                  }
 603  603                  if (err) {
 604  604                          atomic_inc_32(&p->p_zone->zone_ffmisc);
 605  605                          goto error;
 606  606                  } else {
 607  607                          bufp = NULL;
 608  608                  }
 609  609          }
 610  610  
 611  611          /*
 612  612           * If we were given an lwpid then use it, else allocate one.
 613  613           */
 614  614          if (lwpid != 0)
 615  615                  t->t_tid = lwpid;
 616  616          else {
 617  617                  /*
 618  618                   * lwp/thread id 0 is never valid; reserved for special checks.
 619  619                   * lwp/thread id 1 is reserved for the main thread.
 620  620                   * Start again at 2 when INT_MAX has been reached
 621  621                   * (id_t is a signed 32-bit integer).
 622  622                   */
 623  623                  id_t prev_id = p->p_lwpid;      /* last allocated tid */
 624  624  
 625  625                  do {                    /* avoid lwpid duplication */
 626  626                          if (p->p_lwpid == INT_MAX) {
 627  627                                  p->p_flag |= SLWPWRAP;
 628  628                                  p->p_lwpid = 1;
 629  629                          }
 630  630                          if ((t->t_tid = ++p->p_lwpid) == prev_id) {
 631  631                                  /*
 632  632                                   * All lwpids are allocated; fail the request.
 633  633                                   */
 634  634                                  err = 1;
 635  635                                  atomic_inc_32(&p->p_zone->zone_ffnoproc);
 636  636                                  goto error;
 637  637                          }
 638  638                          /*
 639  639                           * We only need to worry about colliding with an id
 640  640                           * that's already in use if this process has
 641  641                           * cycled through all available lwp ids.
 642  642                           */
 643  643                          if ((p->p_flag & SLWPWRAP) == 0)
 644  644                                  break;
 645  645                  } while (lwp_hash_lookup(p, t->t_tid) != NULL);
 646  646          }
 647  647  
 648  648  
 649  649          if (t->t_tid == 1) {
 650  650                  kpreempt_disable();
 651  651                  ASSERT(t->t_lpl != NULL);
 652  652                  p->p_t1_lgrpid = t->t_lpl->lpl_lgrpid;
 653  653                  kpreempt_enable();
 654  654                  if (p->p_tr_lgrpid != LGRP_NONE &&
 655  655                      p->p_tr_lgrpid != p->p_t1_lgrpid) {
 656  656                          lgrp_update_trthr_migrations(1);
 657  657                  }
 658  658          }
 659  659  
 660  660          t->t_waitfor = -1;
 661  661  
 662  662          /*
 663  663           * Turn microstate accounting on for thread if on for process.
 664  664           */
 665  665          if (p->p_flag & SMSACCT)
 666  666                  t->t_proc_flag |= TP_MSACCT;
 667  667  
 668  668          /*
 669  669           * If the process has watchpoints, mark the new thread as such.
 670  670           */
 671  671          if (pr_watch_active(p))
 672  672                  watch_enable(t);
 673  673  
 674  674          /*
 675  675           * The lwp is being created in the stopped state.
 676  676           * We set all the necessary flags to indicate that fact here.
 677  677           * We omit the TS_CREATE flag from t_schedflag so that the lwp
 678  678           * cannot be set running until the caller is finished with it,
 679  679           * even if lwp_continue() is called on it after we drop p->p_lock.
 680  680           * When the caller is finished with the newly-created lwp,
 681  681           * the caller must call lwp_create_done() to allow the lwp
 682  682           * to be set running.  If the TP_HOLDLWP is left set, the
 683  683           * lwp will suspend itself after reaching system call exit.
 684  684           */
 685  685          init_mstate(t, LMS_STOPPED);
 686  686          t->t_proc_flag |= TP_HOLDLWP;
 687  687          t->t_schedflag |= (TS_ALLSTART & ~(TS_CSTART | TS_CREATE));
 688  688          t->t_whystop = PR_SUSPENDED;
 689  689          t->t_whatstop = SUSPEND_NORMAL;
 690  690          t->t_sig_check = 1;     /* ensure that TP_HOLDLWP is honored */
 691  691  
 692  692          /*
 693  693           * Set system call processing flags in case tracing or profiling
 694  694           * is set.  The first system call will evaluate these and turn
 695  695           * them off if they aren't needed.
 696  696           */
 697  697          t->t_pre_sys = 1;
 698  698          t->t_post_sys = 1;
 699  699  
 700  700          /*
 701  701           * Perform lwp branding
 702  702           *
 703  703           * The b_initlwp hook is _not_ allowed to drop p->p_lock as it must be
 704  704           * continuously held between when the tidhash is sized and when the lwp
 705  705           * is inserted into it.  Operations requiring p->p_lock to be
 706  706           * temporarily dropped can be performed in b_initlwp_post.
 707  707           */
 708  708          if (PROC_IS_BRANDED(p)) {
 709  709                  BROP(p)->b_initlwp(lwp, brand_data);
 710  710                  /*
 711  711                   * The b_initlwp hook is expected to consume any preallocated
 712  712                   * brand_data in a way that prepares it for deallocation by the
 713  713                   * b_freelwp hook.
 714  714                   */
 715  715                  brand_data = NULL;
 716  716          }
 717  717  
 718  718          /*
 719  719           * Insert the new thread into the list of all threads.
 720  720           */
 721  721          p->p_lwpcnt++;
 722  722          if ((tx = p->p_tlist) == NULL) {
 723  723                  t->t_back = t;
 724  724                  t->t_forw = t;
 725  725                  p->p_tlist = t;
 726  726          } else {
 727  727                  t->t_forw = tx;
 728  728                  t->t_back = tx->t_back;
 729  729                  tx->t_back->t_forw = t;
 730  730                  tx->t_back = t;
 731  731          }
 732  732  
 733  733          /*
 734  734           * Insert the new lwp into an lwp directory slot position
 735  735           * and into the lwpid hash table.
 736  736           */
 737  737          lep->le_thread = t;
 738  738          lep->le_lwpid = t->t_tid;
 739  739          lep->le_start = t->t_start;
 740  740          lwp_hash_in(p, lep, p->p_tidhash, p->p_tidhash_sz, 1);
 741  741  
 742  742          /*
 743  743           * Complete lwp branding
 744  744           */
 745  745          if (PROC_IS_BRANDED(p) && BROP(p)->b_initlwp_post != NULL) {
 746  746                  BROP(p)->b_initlwp_post(lwp);
 747  747          }
 748  748  
 749  749          if (state == TS_RUN) {
 750  750                  /*
 751  751                   * We set the new lwp running immediately.
 752  752                   */
 753  753                  t->t_proc_flag &= ~TP_HOLDLWP;
 754  754                  lwp_create_done(t);
 755  755          }
 756  756  
 757  757  error:
 758  758          if (err) {
 759  759                  if (CLASS_KERNEL(cid)) {
 760  760                          /*
 761  761                           * This should only happen if a system process runs
 762  762                           * out of lwpids, which shouldn't occur.
 763  763                           */
 764  764                          panic("Failed to create a system LWP");
 765  765                  }
 766  766                  /*
 767  767                   * We have failed to create an lwp, so decrement the number
 768  768                   * of lwps in the task and let the lgroup load averages know
 769  769                   * that this thread isn't going to show up.
 770  770                   */
 771  771                  kpreempt_disable();
 772  772                  lgrp_move_thread(t, NULL, 1);
 773  773                  kpreempt_enable();
 774  774  
 775  775                  ASSERT(MUTEX_HELD(&p->p_lock));
 776  776                  mutex_enter(&p->p_zone->zone_nlwps_lock);
 777  777                  p->p_task->tk_nlwps--;
 778  778                  p->p_task->tk_proj->kpj_nlwps--;
 779  779                  p->p_zone->zone_nlwps--;
 780  780                  mutex_exit(&p->p_zone->zone_nlwps_lock);
 781  781                  if (cid != NOCLASS && bufp != NULL)
 782  782                          CL_FREE(cid, bufp);
 783  783  
 784  784                  if (brand_data != NULL) {
 785  785                          BROP(p)->b_lwpdata_free(brand_data);
 786  786                  }
 787  787  
 788  788                  mutex_exit(&p->p_lock);
 789  789                  t->t_state = TS_FREE;
 790  790                  thread_rele(t);
 791  791  
 792  792                  /*
 793  793                   * We need to remove t from the list of all threads
 794  794                   * because thread_exit()/lwp_exit() isn't called on t.
 795  795                   */
 796  796                  mutex_enter(&pidlock);
 797  797                  ASSERT(t != t->t_next);         /* t0 never exits */
 798  798                  t->t_next->t_prev = t->t_prev;
 799  799                  t->t_prev->t_next = t->t_next;
 800  800                  mutex_exit(&pidlock);
 801  801  
 802  802                  thread_free(t);
 803  803                  kmem_free(lep, sizeof (*lep));
 804  804                  lwp = NULL;
 805  805          } else {
 806  806                  mutex_exit(&p->p_lock);
 807  807          }
 808  808  
 809  809          if (old_dir != NULL)
 810  810                  kmem_free(old_dir, old_dirsz * sizeof (*old_dir));
 811  811          if (old_hash != NULL)
 812  812                  kmem_free(old_hash, old_hashsz * sizeof (*old_hash));
 813  813          if (ret_tidhash != NULL)
 814  814                  kmem_free(ret_tidhash, sizeof (ret_tidhash_t));
 815  815  
 816  816          DTRACE_PROC1(lwp__create, kthread_t *, t);
 817  817          return (lwp);
 818  818  }
 819  819  
 820  820  /*
 821  821   * lwp_create_done() is called by the caller of lwp_create() to set the
 822  822   * newly-created lwp running after the caller has finished manipulating it.
 823  823   */
 824  824  void
 825  825  lwp_create_done(kthread_t *t)
 826  826  {
 827  827          proc_t *p = ttoproc(t);
 828  828  
 829  829          ASSERT(MUTEX_HELD(&p->p_lock));
 830  830  
 831  831          /*
 832  832           * We set the TS_CREATE and TS_CSTART flags and call setrun_locked().
 833  833           * (The absence of the TS_CREATE flag prevents the lwp from running
 834  834           * until we are finished with it, even if lwp_continue() is called on
 835  835           * it by some other lwp in the process or elsewhere in the kernel.)
 836  836           */
 837  837          thread_lock(t);
 838  838          ASSERT(t->t_state == TS_STOPPED && !(t->t_schedflag & TS_CREATE));
 839  839          /*
 840  840           * If TS_CSTART is set, lwp_continue(t) has been called and
 841  841           * has already incremented p_lwprcnt; avoid doing this twice.
 842  842           */
 843  843          if (!(t->t_schedflag & TS_CSTART))
 844  844                  p->p_lwprcnt++;
 845  845          t->t_schedflag |= (TS_CSTART | TS_CREATE);
 846  846          setrun_locked(t);
 847  847          thread_unlock(t);
 848  848  }
 849  849  
 850  850  /*
 851  851   * Copy an LWP's active templates, and clear the latest contracts.
 852  852   */
 853  853  void
 854  854  lwp_ctmpl_copy(klwp_t *dst, klwp_t *src)
 855  855  {
 856  856          int i;
 857  857  
 858  858          for (i = 0; i < ct_ntypes; i++) {
 859  859                  ct_template_t *tmpl = src->lwp_ct_active[i];
 860  860  
 861  861                  /*
 862  862                   * If the process contract template is setup to be preserved
 863  863                   * across exec, then if we're forking, perform an implicit
 864  864                   * template_clear now. This ensures that future children of
 865  865                   * this child will remain in the same contract unless they're
 866  866                   * explicitly setup differently. We know we're forking if the
 867  867                   * two LWPs belong to different processes.
 868  868                   */
 869  869                  if (i == CTT_PROCESS && tmpl != NULL) {
 870  870                          ctmpl_process_t *ctp = tmpl->ctmpl_data;
 871  871  
 872  872                          if (dst->lwp_procp != src->lwp_procp &&
 873  873                              (ctp->ctp_params & CT_PR_KEEP_EXEC) != 0)
 874  874                                  tmpl = NULL;
 875  875                  }
 876  876  
 877  877                  dst->lwp_ct_active[i] = ctmpl_dup(tmpl);
 878  878                  dst->lwp_ct_latest[i] = NULL;
 879  879  
 880  880          }
 881  881  }
 882  882  
 883  883  /*
 884  884   * Clear an LWP's contract template state.
 885  885   */
 886  886  void
 887  887  lwp_ctmpl_clear(klwp_t *lwp, boolean_t is_exec)
 888  888  {
 889  889          ct_template_t *tmpl;
 890  890          int i;
 891  891  
 892  892          for (i = 0; i < ct_ntypes; i++) {
 893  893                  if (lwp->lwp_ct_latest[i] != NULL) {
 894  894                          contract_rele(lwp->lwp_ct_latest[i]);
 895  895                          lwp->lwp_ct_latest[i] = NULL;
 896  896                  }
 897  897  
 898  898                  if ((tmpl = lwp->lwp_ct_active[i]) != NULL) {
 899  899                          /*
 900  900                           * If we're exec-ing a new program and the process
 901  901                           * contract template is setup to be preserved across
 902  902                           * exec, then don't clear it.
 903  903                           */
 904  904                          if (is_exec && i == CTT_PROCESS) {
 905  905                                  ctmpl_process_t *ctp = tmpl->ctmpl_data;
 906  906  
 907  907                                  if ((ctp->ctp_params & CT_PR_KEEP_EXEC) != 0)
 908  908                                          continue;
 909  909                          }
 910  910  
 911  911                          ctmpl_free(tmpl);
 912  912                          lwp->lwp_ct_active[i] = NULL;
 913  913                  }
 914  914          }
 915  915  }
 916  916  
 917  917  /*
 918  918   * Individual lwp exit.
 919  919   * If this is the last lwp, exit the whole process.
 920  920   */
 921  921  void
 922  922  lwp_exit(void)
 923  923  {
 924  924          kthread_t *t = curthread;
 925  925          klwp_t *lwp = ttolwp(t);
 926  926          proc_t *p = ttoproc(t);
 927  927  
 928  928          ASSERT(MUTEX_HELD(&p->p_lock));
 929  929  
 930  930          mutex_exit(&p->p_lock);
 931  931  
 932  932  #if defined(__sparc)
 933  933          /*
 934  934           * Ensure that the user stack is fully abandoned..
 935  935           */
 936  936          trash_user_windows();
 937  937  #endif
 938  938  
 939  939          tsd_exit();                     /* free thread specific data */
 940  940  
 941  941          kcpc_passivate();               /* Clean up performance counter state */
 942  942  
 943  943          pollcleanup();
 944  944  
 945  945          if (t->t_door)
 946  946                  door_slam();
 947  947  
 948  948          if (t->t_schedctl != NULL)
 949  949                  schedctl_lwp_cleanup(t);
 950  950  
 951  951          if (t->t_upimutex != NULL)
 952  952                  upimutex_cleanup();
 953  953  
 954  954          lwp_pcb_exit();
 955  955  
 956  956          mutex_enter(&p->p_lock);
 957  957          lwp_cleanup();
 958  958  
 959  959          /*
 960  960           * When this process is dumping core, its lwps are held here
 961  961           * until the core dump is finished. Then exitlwps() is called
 962  962           * again to release these lwps so that they can finish exiting.
 963  963           */
 964  964          if (p->p_flag & SCOREDUMP)
 965  965                  stop(PR_SUSPENDED, SUSPEND_NORMAL);
 966  966  
 967  967          /*
 968  968           * Block the process against /proc now that we have really acquired
 969  969           * p->p_lock (to decrement p_lwpcnt and manipulate p_tlist at least).
 970  970           */
 971  971          prbarrier(p);
 972  972  
 973  973          /*
 974  974           * Call proc_exit() if this is the last non-daemon lwp in the process.
 975  975           */
 976  976          if (!(t->t_proc_flag & TP_DAEMON) &&
 977  977              p->p_lwpcnt == p->p_lwpdaemon + 1) {
 978  978                  mutex_exit(&p->p_lock);
 979  979                  if (proc_exit(CLD_EXITED, 0) == 0) {
 980  980                          /* Restarting init. */
 981  981                          return;
 982  982                  }
 983  983  
 984  984                  /*
 985  985                   * proc_exit() returns a non-zero value when some other
 986  986                   * lwp got there first.  We just have to continue in
 987  987                   * lwp_exit().
 988  988                   */
 989  989                  mutex_enter(&p->p_lock);
 990  990                  ASSERT(curproc->p_flag & SEXITLWPS);
 991  991                  prbarrier(p);
 992  992          }
 993  993  
 994  994          DTRACE_PROC(lwp__exit);
 995  995  
 996  996          /*
 997  997           * Perform any brand specific exit processing, then release any
 998  998           * brand data associated with the lwp
 999  999           */
1000 1000          if (PROC_IS_BRANDED(p)) {
1001 1001                  mutex_exit(&p->p_lock);
1002 1002                  BROP(p)->b_lwpexit(lwp);
1003 1003                  BROP(p)->b_freelwp(lwp);
1004 1004                  mutex_enter(&p->p_lock);
1005 1005                  prbarrier(p);
1006 1006          }
1007 1007  
1008 1008          /*
1009 1009           * If the lwp is a detached lwp or if the process is exiting,
1010 1010           * remove (lwp_hash_out()) the lwp from the lwp directory.
1011 1011           * Otherwise null out the lwp's le_thread pointer in the lwp
1012 1012           * directory so that other threads will see it as a zombie lwp.
1013 1013           */
1014 1014          prlwpexit(t);           /* notify /proc */
1015 1015          if (!(t->t_proc_flag & TP_TWAIT) || (p->p_flag & SEXITLWPS))
1016 1016                  lwp_hash_out(p, t->t_tid);
1017 1017          else {
1018 1018                  ASSERT(!(t->t_proc_flag & TP_DAEMON));
1019 1019                  p->p_lwpdir[t->t_dslot].ld_entry->le_thread = NULL;
1020 1020                  p->p_zombcnt++;
1021 1021                  cv_broadcast(&p->p_lwpexit);
1022 1022          }
1023 1023          if (t->t_proc_flag & TP_DAEMON) {
1024 1024                  p->p_lwpdaemon--;
1025 1025                  t->t_proc_flag &= ~TP_DAEMON;
1026 1026          }
1027 1027          t->t_proc_flag &= ~TP_TWAIT;
1028 1028  
1029 1029          /*
1030 1030           * Maintain accurate lwp count for task.max-lwps resource control.
1031 1031           */
1032 1032          mutex_enter(&p->p_zone->zone_nlwps_lock);
1033 1033          p->p_task->tk_nlwps--;
1034 1034          p->p_task->tk_proj->kpj_nlwps--;
1035 1035          p->p_zone->zone_nlwps--;
1036 1036          mutex_exit(&p->p_zone->zone_nlwps_lock);
1037 1037  
1038 1038          CL_EXIT(t);             /* tell the scheduler that t is exiting */
1039 1039          ASSERT(p->p_lwpcnt != 0);
1040 1040          p->p_lwpcnt--;
1041 1041  
1042 1042          /*
1043 1043           * If all remaining non-daemon lwps are waiting in lwp_wait(),
1044 1044           * wake them up so someone can return EDEADLK.
1045 1045           * (See the block comment preceeding lwp_wait().)
1046 1046           */
1047 1047          if (p->p_lwpcnt == p->p_lwpdaemon + (p->p_lwpwait - p->p_lwpdwait))
1048 1048                  cv_broadcast(&p->p_lwpexit);
1049 1049  
1050 1050          t->t_proc_flag |= TP_LWPEXIT;
1051 1051          term_mstate(t);
1052 1052  
1053 1053  #ifndef NPROBE
1054 1054          /* Kernel probe */
1055 1055          if (t->t_tnf_tpdp)
1056 1056                  tnf_thread_exit();
1057 1057  #endif /* NPROBE */
1058 1058  
1059 1059          t->t_forw->t_back = t->t_back;
1060 1060          t->t_back->t_forw = t->t_forw;
1061 1061          if (t == p->p_tlist)
1062 1062                  p->p_tlist = t->t_forw;
1063 1063  
1064 1064          /*
1065 1065           * Clean up the signal state.
1066 1066           */
1067 1067          if (t->t_sigqueue != NULL)
1068 1068                  sigdelq(p, t, 0);
1069 1069          if (lwp->lwp_curinfo != NULL) {
1070 1070                  siginfofree(lwp->lwp_curinfo);
1071 1071                  lwp->lwp_curinfo = NULL;
1072 1072          }
1073 1073  
1074 1074          /*
1075 1075           * If we have spymaster information (that is, if we're an agent LWP),
1076 1076           * free that now.
1077 1077           */
1078 1078          if (lwp->lwp_spymaster != NULL) {
1079 1079                  kmem_free(lwp->lwp_spymaster, sizeof (psinfo_t));
1080 1080                  lwp->lwp_spymaster = NULL;
1081 1081          }
1082 1082  
1083 1083          thread_rele(t);
1084 1084  
1085 1085          /*
1086 1086           * Terminated lwps are associated with process zero and are put onto
1087 1087           * death-row by resume().  Avoid preemption after resetting t->t_procp.
1088 1088           */
1089 1089          t->t_preempt++;
1090 1090  
1091 1091          if (t->t_ctx != NULL)
1092 1092                  exitctx(t);
1093 1093          if (p->p_pctx != NULL)
1094 1094                  exitpctx(p);
1095 1095  
1096 1096          t->t_procp = &p0;
1097 1097  
1098 1098          /*
1099 1099           * Notify the HAT about the change of address space
1100 1100           */
1101 1101          hat_thread_exit(t);
1102 1102          /*
1103 1103           * When this is the last running lwp in this process and some lwp is
1104 1104           * waiting for this condition to become true, or this thread was being
1105 1105           * suspended, then the waiting lwp is awakened.
1106 1106           *
1107 1107           * Also, if the process is exiting, we may have a thread waiting in
1108 1108           * exitlwps() that needs to be notified.
1109 1109           */
1110 1110          if (--p->p_lwprcnt == 0 || (t->t_proc_flag & TP_HOLDLWP) ||
1111 1111              (p->p_flag & SEXITLWPS))
1112 1112                  cv_broadcast(&p->p_holdlwps);
1113 1113  
1114 1114          /*
1115 1115           * Need to drop p_lock so we can reacquire pidlock.
1116 1116           */
1117 1117          mutex_exit(&p->p_lock);
1118 1118          mutex_enter(&pidlock);
1119 1119  
1120 1120          ASSERT(t != t->t_next);         /* t0 never exits */
1121 1121          t->t_next->t_prev = t->t_prev;
1122 1122          t->t_prev->t_next = t->t_next;
1123 1123          cv_broadcast(&t->t_joincv);     /* wake up anyone in thread_join */
1124 1124          mutex_exit(&pidlock);
1125 1125  
1126 1126          t->t_state = TS_ZOMB;
1127 1127          swtch_from_zombie();
1128 1128          /* never returns */
1129 1129  }
1130 1130  
1131 1131  
1132 1132  /*
1133 1133   * Cleanup function for an exiting lwp.
1134 1134   * Called both from lwp_exit() and from proc_exit().
1135 1135   * p->p_lock is repeatedly released and grabbed in this function.
1136 1136   */
1137 1137  void
1138 1138  lwp_cleanup(void)
1139 1139  {
1140 1140          kthread_t *t = curthread;
1141 1141          proc_t *p = ttoproc(t);
1142 1142  
1143 1143          ASSERT(MUTEX_HELD(&p->p_lock));
1144 1144  
1145 1145          /* untimeout any lwp-bound realtime timers */
1146 1146          if (p->p_itimer != NULL)
1147 1147                  timer_lwpexit();
1148 1148  
1149 1149          /*
1150 1150           * If this is the /proc agent lwp that is exiting, readjust p_lwpid
1151 1151           * so it appears that the agent never existed, and clear p_agenttp.
1152 1152           */
1153 1153          if (t == p->p_agenttp) {
1154 1154                  ASSERT(t->t_tid == p->p_lwpid);
1155 1155                  p->p_lwpid--;
1156 1156                  p->p_agenttp = NULL;
1157 1157          }
1158 1158  
1159 1159          /*
1160 1160           * Do lgroup bookkeeping to account for thread exiting.
1161 1161           */
1162 1162          kpreempt_disable();
1163 1163          lgrp_move_thread(t, NULL, 1);
1164 1164          if (t->t_tid == 1) {
1165 1165                  p->p_t1_lgrpid = LGRP_NONE;
1166 1166          }
1167 1167          kpreempt_enable();
1168 1168  
1169 1169          lwp_ctmpl_clear(ttolwp(t), B_FALSE);
1170 1170  }
1171 1171  
1172 1172  int
1173 1173  lwp_suspend(kthread_t *t)
1174 1174  {
1175 1175          int tid;
1176 1176          proc_t *p = ttoproc(t);
1177 1177  
1178 1178          ASSERT(MUTEX_HELD(&p->p_lock));
1179 1179  
1180 1180          /*
1181 1181           * Set the thread's TP_HOLDLWP flag so it will stop in holdlwp().
1182 1182           * If an lwp is stopping itself, there is no need to wait.
1183 1183           */
1184 1184  top:
1185 1185          t->t_proc_flag |= TP_HOLDLWP;
1186 1186          if (t == curthread) {
1187 1187                  t->t_sig_check = 1;
1188 1188          } else {
1189 1189                  /*
1190 1190                   * Make sure the lwp stops promptly.
1191 1191                   */
1192 1192                  thread_lock(t);
1193 1193                  t->t_sig_check = 1;
1194 1194                  /*
1195 1195                   * XXX Should use virtual stop like /proc does instead of
1196 1196                   * XXX waking the thread to get it to stop.
1197 1197                   */
1198 1198                  if (ISWAKEABLE(t) || ISWAITING(t)) {
1199 1199                          setrun_locked(t);
1200 1200                  } else if (t->t_state == TS_ONPROC && t->t_cpu != CPU) {
1201 1201                          poke_cpu(t->t_cpu->cpu_id);
1202 1202                  }
1203 1203  
1204 1204                  tid = t->t_tid;  /* remember thread ID */
1205 1205                  /*
1206 1206                   * Wait for lwp to stop
1207 1207                   */
1208 1208                  while (!SUSPENDED(t)) {
1209 1209                          /*
1210 1210                           * Drop the thread lock before waiting and reacquire it
1211 1211                           * afterwards, so the thread can change its t_state
1212 1212                           * field.
1213 1213                           */
1214 1214                          thread_unlock(t);
1215 1215  
1216 1216                          /*
1217 1217                           * Check if aborted by exitlwps().
1218 1218                           */
1219 1219                          if (p->p_flag & SEXITLWPS)
1220 1220                                  lwp_exit();
1221 1221  
1222 1222                          /*
1223 1223                           * Cooperate with jobcontrol signals and /proc stopping
1224 1224                           * by calling cv_wait_sig() to wait for the target
1225 1225                           * lwp to stop.  Just using cv_wait() can lead to
1226 1226                           * deadlock because, if some other lwp has stopped
1227 1227                           * by either of these mechanisms, then p_lwprcnt will
1228 1228                           * never become zero if we do a cv_wait().
1229 1229                           */
1230 1230                          if (!cv_wait_sig(&p->p_holdlwps, &p->p_lock))
1231 1231                                  return (EINTR);
1232 1232  
1233 1233                          /*
1234 1234                           * Check to see if thread died while we were
1235 1235                           * waiting for it to suspend.
1236 1236                           */
1237 1237                          if (idtot(p, tid) == NULL)
1238 1238                                  return (ESRCH);
1239 1239  
1240 1240                          thread_lock(t);
1241 1241                          /*
1242 1242                           * If the TP_HOLDLWP flag went away, lwp_continue()
1243 1243                           * or vfork() must have been called while we were
1244 1244                           * waiting, so start over again.
1245 1245                           */
1246 1246                          if ((t->t_proc_flag & TP_HOLDLWP) == 0) {
1247 1247                                  thread_unlock(t);
1248 1248                                  goto top;
1249 1249                          }
1250 1250                  }
1251 1251                  thread_unlock(t);
1252 1252          }
1253 1253          return (0);
1254 1254  }
1255 1255  
1256 1256  /*
1257 1257   * continue a lwp that's been stopped by lwp_suspend().
1258 1258   */
1259 1259  void
1260 1260  lwp_continue(kthread_t *t)
1261 1261  {
1262 1262          proc_t *p = ttoproc(t);
1263 1263          int was_suspended = t->t_proc_flag & TP_HOLDLWP;
1264 1264  
1265 1265          ASSERT(MUTEX_HELD(&p->p_lock));
1266 1266  
1267 1267          t->t_proc_flag &= ~TP_HOLDLWP;
1268 1268          thread_lock(t);
1269 1269          if (SUSPENDED(t) &&
1270 1270              !(p->p_flag & (SHOLDFORK | SHOLDFORK1 | SHOLDWATCH))) {
1271 1271                  p->p_lwprcnt++;
1272 1272                  t->t_schedflag |= TS_CSTART;
1273 1273                  setrun_locked(t);
1274 1274          }
1275 1275          thread_unlock(t);
1276 1276          /*
1277 1277           * Wakeup anyone waiting for this thread to be suspended
1278 1278           */
1279 1279          if (was_suspended)
1280 1280                  cv_broadcast(&p->p_holdlwps);
1281 1281  }
1282 1282  
1283 1283  /*
1284 1284   * ********************************
1285 1285   *  Miscellaneous lwp routines    *
1286 1286   * ********************************
1287 1287   */
1288 1288  /*
1289 1289   * When a process is undergoing a forkall(), its p_flag is set to SHOLDFORK.
1290 1290   * This will cause the process's lwps to stop at a hold point.  A hold
1291 1291   * point is where a kernel thread has a flat stack.  This is at the
1292 1292   * return from a system call and at the return from a user level trap.
1293 1293   *
1294 1294   * When a process is undergoing a fork1() or vfork(), its p_flag is set to
1295 1295   * SHOLDFORK1.  This will cause the process's lwps to stop at a modified
1296 1296   * hold point.  The lwps in the process are not being cloned, so they
1297 1297   * are held at the usual hold points and also within issig_forreal().
1298 1298   * This has the side-effect that their system calls do not return
1299 1299   * showing EINTR.
1300 1300   *
1301 1301   * An lwp can also be held.  This is identified by the TP_HOLDLWP flag on
1302 1302   * the thread.  The TP_HOLDLWP flag is set in lwp_suspend(), where the active
1303 1303   * lwp is waiting for the target lwp to be stopped.
1304 1304   */
1305 1305  void
1306 1306  holdlwp(void)
1307 1307  {
1308 1308          proc_t *p = curproc;
1309 1309          kthread_t *t = curthread;
1310 1310  
1311 1311          mutex_enter(&p->p_lock);
1312 1312          /*
1313 1313           * Don't terminate immediately if the process is dumping core.
1314 1314           * Once the process has dumped core, all lwps are terminated.
1315 1315           */
1316 1316          if (!(p->p_flag & SCOREDUMP)) {
1317 1317                  if ((p->p_flag & SEXITLWPS) || (t->t_proc_flag & TP_EXITLWP))
1318 1318                          lwp_exit();
1319 1319          }
1320 1320          if (!(ISHOLD(p)) && !(p->p_flag & (SHOLDFORK1 | SHOLDWATCH))) {
1321 1321                  mutex_exit(&p->p_lock);
1322 1322                  return;
1323 1323          }
1324 1324          /*
1325 1325           * stop() decrements p->p_lwprcnt and cv_signal()s &p->p_holdlwps
1326 1326           * when p->p_lwprcnt becomes zero.
1327 1327           */
1328 1328          stop(PR_SUSPENDED, SUSPEND_NORMAL);
1329 1329          if (p->p_flag & SEXITLWPS)
1330 1330                  lwp_exit();
1331 1331          mutex_exit(&p->p_lock);
1332 1332  }
1333 1333  
1334 1334  /*
1335 1335   * Have all lwps within the process hold at a point where they are
1336 1336   * cloneable (SHOLDFORK) or just safe w.r.t. fork1 (SHOLDFORK1).
1337 1337   */
1338 1338  int
1339 1339  holdlwps(int holdflag)
1340 1340  {
1341 1341          proc_t *p = curproc;
1342 1342  
1343 1343          ASSERT(holdflag == SHOLDFORK || holdflag == SHOLDFORK1);
1344 1344          mutex_enter(&p->p_lock);
1345 1345          schedctl_finish_sigblock(curthread);
1346 1346  again:
1347 1347          while (p->p_flag & (SEXITLWPS | SHOLDFORK | SHOLDFORK1 | SHOLDWATCH)) {
1348 1348                  /*
1349 1349                   * If another lwp is doing a forkall() or proc_exit(), bail out.
1350 1350                   */
1351 1351                  if (p->p_flag & (SEXITLWPS | SHOLDFORK)) {
1352 1352                          mutex_exit(&p->p_lock);
1353 1353                          return (0);
1354 1354                  }
1355 1355                  /*
1356 1356                   * Another lwp is doing a fork1() or is undergoing
1357 1357                   * watchpoint activity.  We hold here for it to complete.
1358 1358                   */
1359 1359                  stop(PR_SUSPENDED, SUSPEND_NORMAL);
1360 1360          }
1361 1361          p->p_flag |= holdflag;
1362 1362          pokelwps(p);
1363 1363          --p->p_lwprcnt;
1364 1364          /*
1365 1365           * Wait for the process to become quiescent (p->p_lwprcnt == 0).
1366 1366           */
1367 1367          while (p->p_lwprcnt > 0) {
1368 1368                  /*
1369 1369                   * Check if aborted by exitlwps().
1370 1370                   * Also check if SHOLDWATCH is set; it takes precedence.
1371 1371                   */
1372 1372                  if (p->p_flag & (SEXITLWPS | SHOLDWATCH)) {
1373 1373                          p->p_lwprcnt++;
1374 1374                          p->p_flag &= ~holdflag;
1375 1375                          cv_broadcast(&p->p_holdlwps);
1376 1376                          goto again;
1377 1377                  }
1378 1378                  /*
1379 1379                   * Cooperate with jobcontrol signals and /proc stopping.
1380 1380                   * If some other lwp has stopped by either of these
1381 1381                   * mechanisms, then p_lwprcnt will never become zero
1382 1382                   * and the process will appear deadlocked unless we
1383 1383                   * stop here in sympathy with the other lwp before
1384 1384                   * doing the cv_wait() below.
1385 1385                   *
1386 1386                   * If the other lwp stops after we do the cv_wait(), it
1387 1387                   * will wake us up to loop around and do the sympathy stop.
1388 1388                   *
1389 1389                   * Since stop() drops p->p_lock, we must start from
1390 1390                   * the top again on returning from stop().
1391 1391                   */
1392 1392                  if (p->p_stopsig | (curthread->t_proc_flag & TP_PRSTOP)) {
1393 1393                          int whystop = p->p_stopsig? PR_JOBCONTROL :
1394 1394                              PR_REQUESTED;
1395 1395                          p->p_lwprcnt++;
1396 1396                          p->p_flag &= ~holdflag;
1397 1397                          stop(whystop, p->p_stopsig);
1398 1398                          goto again;
1399 1399                  }
1400 1400                  cv_wait(&p->p_holdlwps, &p->p_lock);
1401 1401          }
1402 1402          p->p_lwprcnt++;
1403 1403          p->p_flag &= ~holdflag;
1404 1404          mutex_exit(&p->p_lock);
1405 1405          return (1);
1406 1406  }
1407 1407  
1408 1408  /*
1409 1409   * See comments for holdwatch(), below.
1410 1410   */
1411 1411  static int
1412 1412  holdcheck(int clearflags)
1413 1413  {
1414 1414          proc_t *p = curproc;
1415 1415  
1416 1416          /*
1417 1417           * If we are trying to exit, that takes precedence over anything else.
1418 1418           */
1419 1419          if (p->p_flag & SEXITLWPS) {
1420 1420                  p->p_lwprcnt++;
1421 1421                  p->p_flag &= ~clearflags;
1422 1422                  lwp_exit();
1423 1423          }
1424 1424  
1425 1425          /*
1426 1426           * If another thread is calling fork1(), stop the current thread so the
1427 1427           * other can complete.
1428 1428           */
1429 1429          if (p->p_flag & SHOLDFORK1) {
1430 1430                  p->p_lwprcnt++;
1431 1431                  stop(PR_SUSPENDED, SUSPEND_NORMAL);
1432 1432                  if (p->p_flag & SEXITLWPS) {
1433 1433                          p->p_flag &= ~clearflags;
1434 1434                          lwp_exit();
1435 1435                  }
1436 1436                  return (-1);
1437 1437          }
1438 1438  
1439 1439          /*
1440 1440           * If another thread is calling fork(), then indicate we are doing
1441 1441           * watchpoint activity.  This will cause holdlwps() above to stop the
1442 1442           * forking thread, at which point we can continue with watchpoint
1443 1443           * activity.
1444 1444           */
1445 1445          if (p->p_flag & SHOLDFORK) {
1446 1446                  p->p_lwprcnt++;
1447 1447                  while (p->p_flag & SHOLDFORK) {
1448 1448                          p->p_flag |= SHOLDWATCH;
1449 1449                          cv_broadcast(&p->p_holdlwps);
1450 1450                          cv_wait(&p->p_holdlwps, &p->p_lock);
1451 1451                          p->p_flag &= ~SHOLDWATCH;
1452 1452                  }
1453 1453                  return (-1);
1454 1454          }
1455 1455  
1456 1456          return (0);
1457 1457  }
1458 1458  
1459 1459  /*
1460 1460   * Stop all lwps within the process, holding themselves in the kernel while the
1461 1461   * active lwp undergoes watchpoint activity.  This is more complicated than
1462 1462   * expected because stop() relies on calling holdwatch() in order to copyin data
1463 1463   * from the user's address space.  A double barrier is used to prevent an
1464 1464   * infinite loop.
1465 1465   *
1466 1466   *      o The first thread into holdwatch() is the 'master' thread and does
1467 1467   *        the following:
1468 1468   *
1469 1469   *              - Sets SHOLDWATCH on the current process
1470 1470   *              - Sets TP_WATCHSTOP on the current thread
1471 1471   *              - Waits for all threads to be either stopped or have
1472 1472   *                TP_WATCHSTOP set.
1473 1473   *              - Sets the SWATCHOK flag on the process
1474 1474   *              - Unsets TP_WATCHSTOP
1475 1475   *              - Waits for the other threads to completely stop
1476 1476   *              - Unsets SWATCHOK
1477 1477   *
1478 1478   *      o If SHOLDWATCH is already set when we enter this function, then another
1479 1479   *        thread is already trying to stop this thread.  This 'slave' thread
1480 1480   *        does the following:
1481 1481   *
1482 1482   *              - Sets TP_WATCHSTOP on the current thread
1483 1483   *              - Waits for SWATCHOK flag to be set
1484 1484   *              - Calls stop()
1485 1485   *
1486 1486   *      o If SWATCHOK is set on the process, then this function immediately
1487 1487   *        returns, as we must have been called via stop().
1488 1488   *
1489 1489   * In addition, there are other flags that take precedence over SHOLDWATCH:
1490 1490   *
1491 1491   *      o If SEXITLWPS is set, exit immediately.
1492 1492   *
1493 1493   *      o If SHOLDFORK1 is set, wait for fork1() to complete.
1494 1494   *
1495 1495   *      o If SHOLDFORK is set, then watchpoint activity takes precedence In this
1496 1496   *        case, set SHOLDWATCH, signalling the forking thread to stop first.
1497 1497   *
1498 1498   *      o If the process is being stopped via /proc (TP_PRSTOP is set), then we
1499 1499   *        stop the current thread.
1500 1500   *
1501 1501   * Returns 0 if all threads have been quiesced.  Returns non-zero if not all
1502 1502   * threads were stopped, or the list of watched pages has changed.
1503 1503   */
1504 1504  int
1505 1505  holdwatch(void)
1506 1506  {
1507 1507          proc_t *p = curproc;
1508 1508          kthread_t *t = curthread;
1509 1509          int ret = 0;
1510 1510  
1511 1511          mutex_enter(&p->p_lock);
1512 1512  
1513 1513          p->p_lwprcnt--;
1514 1514  
1515 1515          /*
1516 1516           * Check for bail-out conditions as outlined above.
1517 1517           */
1518 1518          if (holdcheck(0) != 0) {
1519 1519                  mutex_exit(&p->p_lock);
1520 1520                  return (-1);
1521 1521          }
1522 1522  
1523 1523          if (!(p->p_flag & SHOLDWATCH)) {
1524 1524                  /*
1525 1525                   * We are the master watchpoint thread.  Set SHOLDWATCH and poke
1526 1526                   * the other threads.
1527 1527                   */
1528 1528                  p->p_flag |= SHOLDWATCH;
1529 1529                  pokelwps(p);
1530 1530  
1531 1531                  /*
1532 1532                   * Wait for all threads to be stopped or have TP_WATCHSTOP set.
1533 1533                   */
1534 1534                  while (pr_allstopped(p, 1) > 0) {
1535 1535                          if (holdcheck(SHOLDWATCH) != 0) {
1536 1536                                  p->p_flag &= ~SHOLDWATCH;
1537 1537                                  mutex_exit(&p->p_lock);
1538 1538                                  return (-1);
1539 1539                          }
1540 1540  
1541 1541                          cv_wait(&p->p_holdlwps, &p->p_lock);
1542 1542                  }
1543 1543  
1544 1544                  /*
1545 1545                   * All threads are now stopped or in the process of stopping.
1546 1546                   * Set SWATCHOK and let them stop completely.
1547 1547                   */
1548 1548                  p->p_flag |= SWATCHOK;
1549 1549                  t->t_proc_flag &= ~TP_WATCHSTOP;
1550 1550                  cv_broadcast(&p->p_holdlwps);
1551 1551  
1552 1552                  while (pr_allstopped(p, 0) > 0) {
1553 1553                          /*
1554 1554                           * At first glance, it may appear that we don't need a
1555 1555                           * call to holdcheck() here.  But if the process gets a
1556 1556                           * SIGKILL signal, one of our stopped threads may have
1557 1557                           * been awakened and is waiting in exitlwps(), which
1558 1558                           * takes precedence over watchpoints.
1559 1559                           */
1560 1560                          if (holdcheck(SHOLDWATCH | SWATCHOK) != 0) {
1561 1561                                  p->p_flag &= ~(SHOLDWATCH | SWATCHOK);
1562 1562                                  mutex_exit(&p->p_lock);
1563 1563                                  return (-1);
1564 1564                          }
1565 1565  
1566 1566                          cv_wait(&p->p_holdlwps, &p->p_lock);
1567 1567                  }
1568 1568  
1569 1569                  /*
1570 1570                   * All threads are now completely stopped.
1571 1571                   */
1572 1572                  p->p_flag &= ~SWATCHOK;
1573 1573                  p->p_flag &= ~SHOLDWATCH;
1574 1574                  p->p_lwprcnt++;
1575 1575  
1576 1576          } else if (!(p->p_flag & SWATCHOK)) {
1577 1577  
1578 1578                  /*
1579 1579                   * SHOLDWATCH is set, so another thread is trying to do
1580 1580                   * watchpoint activity.  Indicate this thread is stopping, and
1581 1581                   * wait for the OK from the master thread.
1582 1582                   */
1583 1583                  t->t_proc_flag |= TP_WATCHSTOP;
1584 1584                  cv_broadcast(&p->p_holdlwps);
1585 1585  
1586 1586                  while (!(p->p_flag & SWATCHOK)) {
1587 1587                          if (holdcheck(0) != 0) {
1588 1588                                  t->t_proc_flag &= ~TP_WATCHSTOP;
1589 1589                                  mutex_exit(&p->p_lock);
1590 1590                                  return (-1);
1591 1591                          }
1592 1592  
1593 1593                          cv_wait(&p->p_holdlwps, &p->p_lock);
1594 1594                  }
1595 1595  
1596 1596                  /*
1597 1597                   * Once the master thread has given the OK, this thread can
1598 1598                   * actually call stop().
1599 1599                   */
1600 1600                  t->t_proc_flag &= ~TP_WATCHSTOP;
1601 1601                  p->p_lwprcnt++;
1602 1602  
1603 1603                  stop(PR_SUSPENDED, SUSPEND_NORMAL);
1604 1604  
1605 1605                  /*
1606 1606                   * It's not OK to do watchpoint activity, notify caller to
1607 1607                   * retry.
1608 1608                   */
1609 1609                  ret = -1;
1610 1610  
1611 1611          } else {
1612 1612  
1613 1613                  /*
1614 1614                   * The only way we can hit the case where SHOLDWATCH is set and
1615 1615                   * SWATCHOK is set is if we are triggering this from within a
1616 1616                   * stop() call.  Assert that this is the case.
1617 1617                   */
1618 1618  
1619 1619                  ASSERT(t->t_proc_flag & TP_STOPPING);
1620 1620                  p->p_lwprcnt++;
1621 1621          }
1622 1622  
1623 1623          mutex_exit(&p->p_lock);
1624 1624  
1625 1625          return (ret);
1626 1626  }
1627 1627  
1628 1628  /*
1629 1629   * force all interruptible lwps to trap into the kernel.
1630 1630   */
1631 1631  void
1632 1632  pokelwps(proc_t *p)
1633 1633  {
1634 1634          kthread_t *t;
1635 1635  
1636 1636          ASSERT(MUTEX_HELD(&p->p_lock));
1637 1637  
1638 1638          t = p->p_tlist;
1639 1639          do {
1640 1640                  if (t == curthread)
1641 1641                          continue;
1642 1642                  thread_lock(t);
1643 1643                  aston(t);       /* make thread trap or do post_syscall */
1644 1644                  if (ISWAKEABLE(t) || ISWAITING(t)) {
1645 1645                          setrun_locked(t);
1646 1646                  } else if (t->t_state == TS_STOPPED) {
1647 1647                          /*
1648 1648                           * Ensure that proc_exit() is not blocked by lwps
1649 1649                           * that were stopped via jobcontrol or /proc.
1650 1650                           */
1651 1651                          if (p->p_flag & SEXITLWPS) {
1652 1652                                  p->p_stopsig = 0;
1653 1653                                  t->t_schedflag |= (TS_XSTART | TS_PSTART);
1654 1654                                  setrun_locked(t);
1655 1655                          }
1656 1656                          /*
1657 1657                           * If we are holding lwps for a forkall(),
1658 1658                           * force lwps that have been suspended via
1659 1659                           * lwp_suspend() and are suspended inside
1660 1660                           * of a system call to proceed to their
1661 1661                           * holdlwp() points where they are clonable.
1662 1662                           */
1663 1663                          if ((p->p_flag & SHOLDFORK) && SUSPENDED(t)) {
1664 1664                                  if ((t->t_schedflag & TS_CSTART) == 0) {
1665 1665                                          p->p_lwprcnt++;
1666 1666                                          t->t_schedflag |= TS_CSTART;
1667 1667                                          setrun_locked(t);
1668 1668                                  }
1669 1669                          }
1670 1670                  } else if (t->t_state == TS_ONPROC) {
1671 1671                          if (t->t_cpu != CPU)
1672 1672                                  poke_cpu(t->t_cpu->cpu_id);
1673 1673                  }
1674 1674                  thread_unlock(t);
1675 1675          } while ((t = t->t_forw) != p->p_tlist);
1676 1676  }
1677 1677  
1678 1678  /*
1679 1679   * undo the effects of holdlwps() or holdwatch().
1680 1680   */
1681 1681  void
1682 1682  continuelwps(proc_t *p)
1683 1683  {
1684 1684          kthread_t *t;
1685 1685  
1686 1686          /*
1687 1687           * If this flag is set, then the original holdwatch() didn't actually
1688 1688           * stop the process.  See comments for holdwatch().
1689 1689           */
1690 1690          if (p->p_flag & SWATCHOK) {
1691 1691                  ASSERT(curthread->t_proc_flag & TP_STOPPING);
1692 1692                  return;
1693 1693          }
1694 1694  
1695 1695          ASSERT(MUTEX_HELD(&p->p_lock));
1696 1696          ASSERT((p->p_flag & (SHOLDFORK | SHOLDFORK1 | SHOLDWATCH)) == 0);
1697 1697  
1698 1698          t = p->p_tlist;
1699 1699          do {
1700 1700                  thread_lock(t);         /* SUSPENDED looks at t_schedflag */
1701 1701                  if (SUSPENDED(t) && !(t->t_proc_flag & TP_HOLDLWP)) {
1702 1702                          p->p_lwprcnt++;
1703 1703                          t->t_schedflag |= TS_CSTART;
1704 1704                          setrun_locked(t);
1705 1705                  }
1706 1706                  thread_unlock(t);
1707 1707          } while ((t = t->t_forw) != p->p_tlist);
1708 1708  }
1709 1709  
1710 1710  /*
1711 1711   * Force all other LWPs in the current process other than the caller to exit,
1712 1712   * and then cv_wait() on p_holdlwps for them to exit.  The exitlwps() function
1713 1713   * is typically used in these situations:
1714 1714   *
1715 1715   *   (a) prior to an exec() system call
1716 1716   *   (b) prior to dumping a core file
1717 1717   *   (c) prior to a uadmin() shutdown
1718 1718   *
1719 1719   * If the 'coredump' flag is set, other LWPs are quiesced but not destroyed.
1720 1720   * Multiple threads in the process can call this function at one time by
1721 1721   * triggering execs or core dumps simultaneously, so the SEXITLWPS bit is used
1722 1722   * to declare one particular thread the winner who gets to kill the others.
1723 1723   * If a thread wins the exitlwps() dance, zero is returned; otherwise an
1724 1724   * appropriate errno value is returned to caller for its system call to return.
1725 1725   */
1726 1726  int
1727 1727  exitlwps(int coredump)
1728 1728  {
1729 1729          proc_t *p = curproc;
1730 1730          int heldcnt;
1731 1731  
1732 1732          if (curthread->t_door)
1733 1733                  door_slam();
1734 1734          if (p->p_door_list)
1735 1735                  door_revoke_all();
1736 1736          if (curthread->t_schedctl != NULL)
1737 1737                  schedctl_lwp_cleanup(curthread);
1738 1738  
1739 1739          /*
1740 1740           * Ensure that before starting to wait for other lwps to exit,
1741 1741           * cleanup all upimutexes held by curthread. Otherwise, some other
1742 1742           * lwp could be waiting (uninterruptibly) for a upimutex held by
1743 1743           * curthread, and the call to pokelwps() below would deadlock.
1744 1744           * Even if a blocked upimutex_lock is made interruptible,
1745 1745           * curthread's upimutexes need to be unlocked: do it here.
1746 1746           */
1747 1747          if (curthread->t_upimutex != NULL)
1748 1748                  upimutex_cleanup();
1749 1749  
1750 1750          /*
1751 1751           * Grab p_lock in order to check and set SEXITLWPS to declare a winner.
1752 1752           * We must also block any further /proc access from this point forward.
1753 1753           */
1754 1754          mutex_enter(&p->p_lock);
1755 1755          prbarrier(p);
1756 1756  
1757 1757          if (p->p_flag & SEXITLWPS) {
1758 1758                  mutex_exit(&p->p_lock);
1759 1759                  aston(curthread);       /* force a trip through post_syscall */
1760 1760                  return (set_errno(EINTR));
1761 1761          }
1762 1762  
1763 1763          p->p_flag |= SEXITLWPS;
1764 1764          if (coredump)           /* tell other lwps to stop, not exit */
1765 1765                  p->p_flag |= SCOREDUMP;
1766 1766  
1767 1767          /*
1768 1768           * Give precedence to exitlwps() if a holdlwps() is
1769 1769           * in progress. The lwp doing the holdlwps() operation
1770 1770           * is aborted when it is awakened.
1771 1771           */
1772 1772          while (p->p_flag & (SHOLDFORK | SHOLDFORK1 | SHOLDWATCH)) {
1773 1773                  cv_broadcast(&p->p_holdlwps);
1774 1774                  cv_wait(&p->p_holdlwps, &p->p_lock);
1775 1775                  prbarrier(p);
1776 1776          }
1777 1777          p->p_flag |= SHOLDFORK;
1778 1778          pokelwps(p);
1779 1779  
1780 1780          /*
1781 1781           * Wait for process to become quiescent.
1782 1782           */
1783 1783          --p->p_lwprcnt;
1784 1784          while (p->p_lwprcnt > 0) {
1785 1785                  cv_wait(&p->p_holdlwps, &p->p_lock);
1786 1786                  prbarrier(p);
1787 1787          }
1788 1788          p->p_lwprcnt++;
1789 1789          ASSERT(p->p_lwprcnt == 1);
1790 1790  
1791 1791          /*
1792 1792           * The SCOREDUMP flag puts the process into a quiescent
1793 1793           * state.  The process's lwps remain attached to this
1794 1794           * process until exitlwps() is called again without the
1795 1795           * 'coredump' flag set, then the lwps are terminated
1796 1796           * and the process can exit.
1797 1797           */
1798 1798          if (coredump) {
1799 1799                  p->p_flag &= ~(SCOREDUMP | SHOLDFORK | SEXITLWPS);
1800 1800                  goto out;
1801 1801          }
1802 1802  
1803 1803          /*
1804 1804           * Determine if there are any lwps left dangling in
1805 1805           * the stopped state.  This happens when exitlwps()
1806 1806           * aborts a holdlwps() operation.
1807 1807           */
1808 1808          p->p_flag &= ~SHOLDFORK;
1809 1809          if ((heldcnt = p->p_lwpcnt) > 1) {
1810 1810                  kthread_t *t;
1811 1811                  for (t = curthread->t_forw; --heldcnt > 0; t = t->t_forw) {
1812 1812                          t->t_proc_flag &= ~TP_TWAIT;
1813 1813                          lwp_continue(t);
1814 1814                  }
1815 1815          }
1816 1816  
1817 1817          /*
1818 1818           * Wait for all other lwps to exit.
1819 1819           */
1820 1820          --p->p_lwprcnt;
1821 1821          while (p->p_lwpcnt > 1) {
1822 1822                  cv_wait(&p->p_holdlwps, &p->p_lock);
1823 1823                  prbarrier(p);
1824 1824          }
1825 1825          ++p->p_lwprcnt;
1826 1826          ASSERT(p->p_lwpcnt == 1 && p->p_lwprcnt == 1);
1827 1827  
1828 1828          p->p_flag &= ~SEXITLWPS;
1829 1829          curthread->t_proc_flag &= ~TP_TWAIT;
1830 1830  
1831 1831  out:
1832 1832          if (!coredump && p->p_zombcnt) {        /* cleanup the zombie lwps */
1833 1833                  lwpdir_t *ldp;
1834 1834                  lwpent_t *lep;
1835 1835                  int i;
1836 1836  
1837 1837                  for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
1838 1838                          lep = ldp->ld_entry;
1839 1839                          if (lep != NULL && lep->le_thread != curthread) {
1840 1840                                  ASSERT(lep->le_thread == NULL);
1841 1841                                  p->p_zombcnt--;
1842 1842                                  lwp_hash_out(p, lep->le_lwpid);
1843 1843                          }
1844 1844                  }
1845 1845                  ASSERT(p->p_zombcnt == 0);
1846 1846          }
1847 1847  
1848 1848          /*
1849 1849           * If some other LWP in the process wanted us to suspend ourself,
1850 1850           * then we will not do it.  The other LWP is now terminated and
1851 1851           * no one will ever continue us again if we suspend ourself.
1852 1852           */
1853 1853          curthread->t_proc_flag &= ~TP_HOLDLWP;
1854 1854          p->p_flag &= ~(SHOLDFORK | SHOLDFORK1 | SHOLDWATCH | SLWPWRAP);
1855 1855          mutex_exit(&p->p_lock);
1856 1856          return (0);
1857 1857  }
1858 1858  
1859 1859  /*
1860 1860   * duplicate a lwp.
1861 1861   */
1862 1862  klwp_t *
1863 1863  forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
1864 1864  {
1865 1865          klwp_t *clwp;
1866 1866          void *tregs, *tfpu;
1867 1867          kthread_t *t = lwptot(lwp);
1868 1868          kthread_t *ct;
1869 1869          proc_t *p = lwptoproc(lwp);
1870 1870          int cid;
1871 1871          void *bufp;
1872 1872          void *brand_data;
1873 1873          int val;
1874 1874  
1875 1875          ASSERT(p == curproc);
1876 1876          ASSERT(t == curthread || (SUSPENDED(t) && lwp->lwp_asleep == 0));
1877 1877  
1878 1878  #if defined(__sparc)
1879 1879          if (t == curthread)
1880 1880                  (void) flush_user_windows_to_stack(NULL);
1881 1881  #endif
1882 1882  
1883 1883          if (t == curthread)
1884 1884                  /* copy args out of registers first */
1885 1885                  (void) save_syscall_args();
1886 1886  
1887 1887          clwp = lwp_create(cp->p_lwpcnt == 0 ? lwp_rtt_initial : lwp_rtt,
1888 1888              NULL, 0, cp, TS_STOPPED, t->t_pri, &t->t_hold, NOCLASS, lwpid);
1889 1889          if (clwp == NULL)
1890 1890                  return (NULL);
1891 1891  
1892 1892          /*
1893 1893           * most of the parent's lwp can be copied to its duplicate,
1894 1894           * except for the fields that are unique to each lwp, like
1895 1895           * lwp_thread, lwp_procp, lwp_regs, and lwp_ap.
1896 1896           */
1897 1897          ct = clwp->lwp_thread;
1898 1898          tregs = clwp->lwp_regs;
1899 1899          tfpu = clwp->lwp_fpu;
1900 1900          brand_data = clwp->lwp_brand;
1901 1901  
1902 1902          /*
1903 1903           * Copy parent lwp to child lwp.  Hold child's p_lock to prevent
1904 1904           * mstate_aggr_state() from reading stale mstate entries copied
1905 1905           * from lwp to clwp.
1906 1906           */
1907 1907          mutex_enter(&cp->p_lock);
1908 1908          *clwp = *lwp;
1909 1909  
1910 1910          /* clear microstate and resource usage data in new lwp */
1911 1911          init_mstate(ct, LMS_STOPPED);
1912 1912          bzero(&clwp->lwp_ru, sizeof (clwp->lwp_ru));
1913 1913          mutex_exit(&cp->p_lock);
1914 1914  
1915 1915          /* fix up child's lwp */
1916 1916  
1917 1917          clwp->lwp_pcb.pcb_flags = 0;
1918 1918  #if defined(__sparc)
1919 1919          clwp->lwp_pcb.pcb_step = STEP_NONE;
1920 1920  #endif
1921 1921          clwp->lwp_cursig = 0;
1922 1922          clwp->lwp_extsig = 0;
1923 1923          clwp->lwp_curinfo = (struct sigqueue *)0;
1924 1924          clwp->lwp_thread = ct;
1925 1925          ct->t_sysnum = t->t_sysnum;
1926 1926          clwp->lwp_regs = tregs;
1927 1927          clwp->lwp_fpu = tfpu;
1928 1928          clwp->lwp_brand = brand_data;
1929 1929          clwp->lwp_ap = clwp->lwp_arg;
1930 1930          clwp->lwp_procp = cp;
1931 1931          bzero(clwp->lwp_timer, sizeof (clwp->lwp_timer));
1932 1932          clwp->lwp_lastfault = 0;
1933 1933          clwp->lwp_lastfaddr = 0;
1934 1934  
1935 1935          /* copy parent's struct regs to child. */
1936 1936          lwp_forkregs(lwp, clwp);
1937 1937  
1938 1938          /*
1939 1939           * Fork thread context ops, if any.
1940 1940           */
1941 1941          if (t->t_ctx)
1942 1942                  forkctx(t, ct);
1943 1943  
1944 1944          /* fix door state in the child */
1945 1945          if (t->t_door)
1946 1946                  door_fork(t, ct);
1947 1947  
1948 1948          /* copy current contract templates, clear latest contracts */
1949 1949          lwp_ctmpl_copy(clwp, lwp);
1950 1950  
1951 1951          mutex_enter(&cp->p_lock);
1952 1952          /* lwp_create() set the TP_HOLDLWP flag */
1953 1953          if (!(t->t_proc_flag & TP_HOLDLWP))
1954 1954                  ct->t_proc_flag &= ~TP_HOLDLWP;
1955 1955          if (cp->p_flag & SMSACCT)
1956 1956                  ct->t_proc_flag |= TP_MSACCT;
1957 1957          mutex_exit(&cp->p_lock);
1958 1958  
1959 1959          /* Allow brand to propagate brand-specific state */
1960 1960          if (PROC_IS_BRANDED(p))
1961 1961                  BROP(p)->b_forklwp(lwp, clwp);
1962 1962  
1963 1963  retry:
1964 1964          cid = t->t_cid;
1965 1965  
1966 1966          val = CL_ALLOC(&bufp, cid, KM_SLEEP);
1967 1967          ASSERT(val == 0);
1968 1968  
1969 1969          mutex_enter(&p->p_lock);
1970 1970          if (cid != t->t_cid) {
1971 1971                  /*
1972 1972                   * Someone just changed this thread's scheduling class,
1973 1973                   * so try pre-allocating the buffer again.  Hopefully we
1974 1974                   * don't hit this often.
1975 1975                   */
1976 1976                  mutex_exit(&p->p_lock);
1977 1977                  CL_FREE(cid, bufp);
1978 1978                  goto retry;
1979 1979          }
1980 1980  
1981 1981          ct->t_unpark = t->t_unpark;
1982 1982          ct->t_clfuncs = t->t_clfuncs;
1983 1983          CL_FORK(t, ct, bufp);
1984 1984          ct->t_cid = t->t_cid;   /* after data allocated so prgetpsinfo works */
1985 1985          mutex_exit(&p->p_lock);
1986 1986  
1987 1987          return (clwp);
1988 1988  }
1989 1989  
1990 1990  /*
1991 1991   * Add a new lwp entry to the lwp directory and to the lwpid hash table.
1992 1992   */
1993 1993  void
1994 1994  lwp_hash_in(proc_t *p, lwpent_t *lep, tidhash_t *tidhash, uint_t tidhash_sz,
1995 1995      int do_lock)
1996 1996  {
1997 1997          tidhash_t *thp = &tidhash[TIDHASH(lep->le_lwpid, tidhash_sz)];
1998 1998          lwpdir_t **ldpp;
1999 1999          lwpdir_t *ldp;
2000 2000          kthread_t *t;
2001 2001  
2002 2002          /*
2003 2003           * Allocate a directory element from the free list.
2004 2004           * Code elsewhere guarantees a free slot.
2005 2005           */
2006 2006          ldp = p->p_lwpfree;
2007 2007          p->p_lwpfree = ldp->ld_next;
2008 2008          ASSERT(ldp->ld_entry == NULL);
2009 2009          ldp->ld_entry = lep;
2010 2010  
2011 2011          if (do_lock)
2012 2012                  mutex_enter(&thp->th_lock);
2013 2013  
2014 2014          /*
2015 2015           * Insert it into the lwpid hash table.
2016 2016           */
2017 2017          ldpp = &thp->th_list;
2018 2018          ldp->ld_next = *ldpp;
2019 2019          *ldpp = ldp;
2020 2020  
2021 2021          /*
2022 2022           * Set the active thread's directory slot entry.
2023 2023           */
2024 2024          if ((t = lep->le_thread) != NULL) {
2025 2025                  ASSERT(lep->le_lwpid == t->t_tid);
2026 2026                  t->t_dslot = (int)(ldp - p->p_lwpdir);
2027 2027          }
2028 2028  
2029 2029          if (do_lock)
2030 2030                  mutex_exit(&thp->th_lock);
2031 2031  }
2032 2032  
2033 2033  /*
2034 2034   * Remove an lwp from the lwpid hash table and free its directory entry.
2035 2035   * This is done when a detached lwp exits in lwp_exit() or
2036 2036   * when a non-detached lwp is waited for in lwp_wait() or
2037 2037   * when a zombie lwp is detached in lwp_detach().
2038 2038   */
2039 2039  void
2040 2040  lwp_hash_out(proc_t *p, id_t lwpid)
2041 2041  {
2042 2042          tidhash_t *thp = &p->p_tidhash[TIDHASH(lwpid, p->p_tidhash_sz)];
2043 2043          lwpdir_t **ldpp;
2044 2044          lwpdir_t *ldp;
2045 2045          lwpent_t *lep;
2046 2046  
2047 2047          mutex_enter(&thp->th_lock);
2048 2048          for (ldpp = &thp->th_list;
2049 2049              (ldp = *ldpp) != NULL; ldpp = &ldp->ld_next) {
2050 2050                  lep = ldp->ld_entry;
2051 2051                  if (lep->le_lwpid == lwpid) {
2052 2052                          prlwpfree(p, lep);      /* /proc deals with le_trace */
2053 2053                          *ldpp = ldp->ld_next;
2054 2054                          ldp->ld_entry = NULL;
2055 2055                          ldp->ld_next = p->p_lwpfree;
2056 2056                          p->p_lwpfree = ldp;
2057 2057                          kmem_free(lep, sizeof (*lep));
2058 2058                          break;
2059 2059                  }
2060 2060          }
2061 2061          mutex_exit(&thp->th_lock);
2062 2062  }
2063 2063  
2064 2064  /*
2065 2065   * Lookup an lwp in the lwpid hash table by lwpid.
2066 2066   */
2067 2067  lwpdir_t *
2068 2068  lwp_hash_lookup(proc_t *p, id_t lwpid)
2069 2069  {
2070 2070          tidhash_t *thp;
2071 2071          lwpdir_t *ldp;
2072 2072  
2073 2073          /*
2074 2074           * The process may be exiting, after p_tidhash has been set to NULL in
2075 2075           * proc_exit() but before prfee() has been called.  Return failure in
2076 2076           * this case.
2077 2077           */
2078 2078          if (p->p_tidhash == NULL)
2079 2079                  return (NULL);
2080 2080  
2081 2081          thp = &p->p_tidhash[TIDHASH(lwpid, p->p_tidhash_sz)];
2082 2082          for (ldp = thp->th_list; ldp != NULL; ldp = ldp->ld_next) {
2083 2083                  if (ldp->ld_entry->le_lwpid == lwpid)
2084 2084                          return (ldp);
2085 2085          }
2086 2086  
2087 2087          return (NULL);
2088 2088  }
2089 2089  
2090 2090  /*
2091 2091   * Same as lwp_hash_lookup(), but acquire and return
2092 2092   * the tid hash table entry lock on success.
2093 2093   */
2094 2094  lwpdir_t *
2095 2095  lwp_hash_lookup_and_lock(proc_t *p, id_t lwpid, kmutex_t **mpp)
2096 2096  {
2097 2097          tidhash_t *tidhash;
2098 2098          uint_t tidhash_sz;
2099 2099          tidhash_t *thp;
2100 2100          lwpdir_t *ldp;
2101 2101  
2102 2102  top:
2103 2103          tidhash_sz = p->p_tidhash_sz;
2104 2104          membar_consumer();
2105 2105          if ((tidhash = p->p_tidhash) == NULL)
2106 2106                  return (NULL);
2107 2107  
2108 2108          thp = &tidhash[TIDHASH(lwpid, tidhash_sz)];
2109 2109          mutex_enter(&thp->th_lock);
2110 2110  
2111 2111          /*
2112 2112           * Since we are not holding p->p_lock, the tid hash table
2113 2113           * may have changed.  If so, start over.  If not, then
2114 2114           * it cannot change until after we drop &thp->th_lock;
2115 2115           */
2116 2116          if (tidhash != p->p_tidhash || tidhash_sz != p->p_tidhash_sz) {
2117 2117                  mutex_exit(&thp->th_lock);
2118 2118                  goto top;
2119 2119          }
2120 2120  
2121 2121          for (ldp = thp->th_list; ldp != NULL; ldp = ldp->ld_next) {
2122 2122                  if (ldp->ld_entry->le_lwpid == lwpid) {
2123 2123                          *mpp = &thp->th_lock;
2124 2124                          return (ldp);
2125 2125                  }
2126 2126          }
2127 2127  
2128 2128          mutex_exit(&thp->th_lock);
2129 2129          return (NULL);
2130 2130  }
2131 2131  
2132 2132  /*
2133 2133   * Update the indicated LWP usage statistic for the current LWP.
2134 2134   */
2135 2135  void
2136 2136  lwp_stat_update(lwp_stat_id_t lwp_stat_id, long inc)
2137 2137  {
2138 2138          klwp_t *lwp = ttolwp(curthread);
2139 2139  
2140 2140          if (lwp == NULL)
2141 2141                  return;
2142 2142  
2143 2143          switch (lwp_stat_id) {
2144 2144          case LWP_STAT_INBLK:
2145 2145                  lwp->lwp_ru.inblock += inc;
2146 2146                  break;
2147 2147          case LWP_STAT_OUBLK:
2148 2148                  lwp->lwp_ru.oublock += inc;
2149 2149                  break;
2150 2150          case LWP_STAT_MSGRCV:
2151 2151                  lwp->lwp_ru.msgrcv += inc;
2152 2152                  break;
2153 2153          case LWP_STAT_MSGSND:
2154 2154                  lwp->lwp_ru.msgsnd += inc;
2155 2155                  break;
2156 2156          default:
2157 2157                  panic("lwp_stat_update: invalid lwp_stat_id 0x%x", lwp_stat_id);
2158 2158          }
2159 2159  }
  
    | 
      ↓ open down ↓ | 
    2159 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX