Print this page
    
re #13613 rb4516 Tunables needs volatile keyword
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/disp/thread.c
          +++ new/usr/src/uts/common/disp/thread.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  24   25   * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  25   26   */
  26   27  
  27   28  #include <sys/types.h>
  28   29  #include <sys/param.h>
  29   30  #include <sys/sysmacros.h>
  30   31  #include <sys/signal.h>
  31   32  #include <sys/stack.h>
  32   33  #include <sys/pcb.h>
  33   34  #include <sys/user.h>
  34   35  #include <sys/systm.h>
  35   36  #include <sys/sysinfo.h>
  36   37  #include <sys/errno.h>
  37   38  #include <sys/cmn_err.h>
  38   39  #include <sys/cred.h>
  39   40  #include <sys/resource.h>
  40   41  #include <sys/task.h>
  41   42  #include <sys/project.h>
  42   43  #include <sys/proc.h>
  43   44  #include <sys/debug.h>
  44   45  #include <sys/disp.h>
  45   46  #include <sys/class.h>
  46   47  #include <vm/seg_kmem.h>
  47   48  #include <vm/seg_kp.h>
  48   49  #include <sys/machlock.h>
  49   50  #include <sys/kmem.h>
  50   51  #include <sys/varargs.h>
  51   52  #include <sys/turnstile.h>
  52   53  #include <sys/poll.h>
  53   54  #include <sys/vtrace.h>
  54   55  #include <sys/callb.h>
  55   56  #include <c2/audit.h>
  56   57  #include <sys/tnf.h>
  57   58  #include <sys/sobject.h>
  58   59  #include <sys/cpupart.h>
  59   60  #include <sys/pset.h>
  60   61  #include <sys/door.h>
  61   62  #include <sys/spl.h>
  62   63  #include <sys/copyops.h>
  63   64  #include <sys/rctl.h>
  64   65  #include <sys/brand.h>
  65   66  #include <sys/pool.h>
  66   67  #include <sys/zone.h>
  67   68  #include <sys/tsol/label.h>
  68   69  #include <sys/tsol/tndb.h>
  69   70  #include <sys/cpc_impl.h>
  70   71  #include <sys/sdt.h>
  71   72  #include <sys/reboot.h>
  72   73  #include <sys/kdi.h>
  73   74  #include <sys/schedctl.h>
  74   75  #include <sys/waitq.h>
  75   76  #include <sys/cpucaps.h>
  76   77  #include <sys/kiconv.h>
  77   78  
  78   79  struct kmem_cache *thread_cache;        /* cache of free threads */
  79   80  struct kmem_cache *lwp_cache;           /* cache of free lwps */
  80   81  struct kmem_cache *turnstile_cache;     /* cache of free turnstiles */
  81   82  
  82   83  /*
  83   84   * allthreads is only for use by kmem_readers.  All kernel loops can use
  84   85   * the current thread as a start/end point.
  85   86   */
  86   87  kthread_t *allthreads = &t0;    /* circular list of all threads */
  87   88  
  88   89  static kcondvar_t reaper_cv;            /* synchronization var */
  89   90  kthread_t       *thread_deathrow;       /* circular list of reapable threads */
  90   91  kthread_t       *lwp_deathrow;          /* circular list of reapable threads */
  91   92  kmutex_t        reaplock;               /* protects lwp and thread deathrows */
  92   93  int     thread_reapcnt = 0;             /* number of threads on deathrow */
  93   94  int     lwp_reapcnt = 0;                /* number of lwps on deathrow */
  94   95  int     reaplimit = 16;                 /* delay reaping until reaplimit */
  95   96  
  96   97  thread_free_lock_t      *thread_free_lock;
  97   98                                          /* protects tick thread from reaper */
  98   99  
  99  100  extern int nthread;
 100  101  
 101  102  /* System Scheduling classes. */
 102  103  id_t    syscid;                         /* system scheduling class ID */
 103  104  id_t    sysdccid = CLASS_UNUSED;        /* reset when SDC loads */
 104  105  
 105  106  void    *segkp_thread;                  /* cookie for segkp pool */
 106  107  
 107  108  int lwp_cache_sz = 32;
 108  109  int t_cache_sz = 8;
 109  110  static kt_did_t next_t_id = 1;
 110  111  
 111  112  /* Default mode for thread binding to CPUs and processor sets */
 112  113  int default_binding_mode = TB_ALLHARD;
  
    | 
      ↓ open down ↓ | 
    79 lines elided | 
    
      ↑ open up ↑ | 
  
 113  114  
 114  115  /*
 115  116   * Min/Max stack sizes for stack size parameters
 116  117   */
 117  118  #define MAX_STKSIZE     (32 * DEFAULTSTKSZ)
 118  119  #define MIN_STKSIZE     DEFAULTSTKSZ
 119  120  
 120  121  /*
 121  122   * default_stksize overrides lwp_default_stksize if it is set.
 122  123   */
 123      -int     default_stksize;
 124      -int     lwp_default_stksize;
      124 +volatile int    default_stksize;
      125 +volatile int    lwp_default_stksize;
 125  126  
 126  127  static zone_key_t zone_thread_key;
 127  128  
 128  129  unsigned int kmem_stackinfo;            /* stackinfo feature on-off */
 129  130  kmem_stkinfo_t *kmem_stkinfo_log;       /* stackinfo circular log */
 130  131  static kmutex_t kmem_stkinfo_lock;      /* protects kmem_stkinfo_log */
 131  132  
 132  133  /*
 133  134   * forward declarations for internal thread specific data (tsd)
 134  135   */
 135  136  static void *tsd_realloc(void *, size_t, size_t);
 136  137  
 137  138  void thread_reaper(void);
 138  139  
 139  140  /* forward declarations for stackinfo feature */
 140  141  static void stkinfo_begin(kthread_t *);
 141  142  static void stkinfo_end(kthread_t *);
 142  143  static size_t stkinfo_percent(caddr_t, caddr_t, caddr_t);
 143  144  
 144  145  /*ARGSUSED*/
 145  146  static int
 146  147  turnstile_constructor(void *buf, void *cdrarg, int kmflags)
 147  148  {
 148  149          bzero(buf, sizeof (turnstile_t));
 149  150          return (0);
 150  151  }
 151  152  
 152  153  /*ARGSUSED*/
 153  154  static void
 154  155  turnstile_destructor(void *buf, void *cdrarg)
 155  156  {
 156  157          turnstile_t *ts = buf;
 157  158  
 158  159          ASSERT(ts->ts_free == NULL);
 159  160          ASSERT(ts->ts_waiters == 0);
 160  161          ASSERT(ts->ts_inheritor == NULL);
 161  162          ASSERT(ts->ts_sleepq[0].sq_first == NULL);
 162  163          ASSERT(ts->ts_sleepq[1].sq_first == NULL);
 163  164  }
 164  165  
 165  166  void
 166  167  thread_init(void)
 167  168  {
 168  169          kthread_t *tp;
 169  170          extern char sys_name[];
 170  171          extern void idle();
 171  172          struct cpu *cpu = CPU;
 172  173          int i;
 173  174          kmutex_t *lp;
 174  175  
 175  176          mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
 176  177          thread_free_lock =
 177  178              kmem_alloc(sizeof (thread_free_lock_t) * THREAD_FREE_NUM, KM_SLEEP);
 178  179          for (i = 0; i < THREAD_FREE_NUM; i++) {
 179  180                  lp = &thread_free_lock[i].tf_lock;
 180  181                  mutex_init(lp, NULL, MUTEX_DEFAULT, NULL);
 181  182          }
 182  183  
 183  184  #if defined(__i386) || defined(__amd64)
 184  185          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 185  186              PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
 186  187  
 187  188          /*
 188  189           * "struct _klwp" includes a "struct pcb", which includes a
 189  190           * "struct fpu", which needs to be 64-byte aligned on amd64
 190  191           * (and even on i386) for xsave/xrstor.
 191  192           */
 192  193          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 193  194              64, NULL, NULL, NULL, NULL, NULL, 0);
 194  195  #else
 195  196          /*
 196  197           * Allocate thread structures from static_arena.  This prevents
 197  198           * issues where a thread tries to relocate its own thread
 198  199           * structure and touches it after the mapping has been suspended.
 199  200           */
 200  201          thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
 201  202              PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
 202  203  
 203  204          lwp_stk_cache_init();
 204  205  
 205  206          lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
 206  207              0, NULL, NULL, NULL, NULL, NULL, 0);
 207  208  #endif
 208  209  
 209  210          turnstile_cache = kmem_cache_create("turnstile_cache",
 210  211              sizeof (turnstile_t), 0,
 211  212              turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
 212  213  
 213  214          label_init();
 214  215          cred_init();
 215  216  
 216  217          /*
 217  218           * Initialize various resource management facilities.
 218  219           */
 219  220          rctl_init();
 220  221          cpucaps_init();
 221  222          /*
 222  223           * Zone_init() should be called before project_init() so that project ID
 223  224           * for the first project is initialized correctly.
 224  225           */
 225  226          zone_init();
 226  227          project_init();
 227  228          brand_init();
 228  229          kiconv_init();
 229  230          task_init();
 230  231          tcache_init();
 231  232          pool_init();
 232  233  
 233  234          curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 234  235  
 235  236          /*
 236  237           * Originally, we had two parameters to set default stack
 237  238           * size: one for lwp's (lwp_default_stksize), and one for
 238  239           * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
 239  240           * Now we have a third parameter that overrides both if it is
 240  241           * set to a legal stack size, called default_stksize.
 241  242           */
 242  243  
 243  244          if (default_stksize == 0) {
 244  245                  default_stksize = DEFAULTSTKSZ;
 245  246          } else if (default_stksize % PAGESIZE != 0 ||
 246  247              default_stksize > MAX_STKSIZE ||
 247  248              default_stksize < MIN_STKSIZE) {
 248  249                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 249  250                      (int)DEFAULTSTKSZ);
 250  251                  default_stksize = DEFAULTSTKSZ;
 251  252          } else {
 252  253                  lwp_default_stksize = default_stksize;
 253  254          }
 254  255  
 255  256          if (lwp_default_stksize == 0) {
 256  257                  lwp_default_stksize = default_stksize;
 257  258          } else if (lwp_default_stksize % PAGESIZE != 0 ||
 258  259              lwp_default_stksize > MAX_STKSIZE ||
 259  260              lwp_default_stksize < MIN_STKSIZE) {
 260  261                  cmn_err(CE_WARN, "Illegal stack size. Using %d",
 261  262                      default_stksize);
 262  263                  lwp_default_stksize = default_stksize;
 263  264          }
 264  265  
 265  266          segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
 266  267              lwp_default_stksize,
 267  268              (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
 268  269  
 269  270          segkp_thread = segkp_cache_init(segkp, t_cache_sz,
 270  271              default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
 271  272  
 272  273          (void) getcid(sys_name, &syscid);
 273  274          curthread->t_cid = syscid;      /* current thread is t0 */
 274  275  
 275  276          /*
 276  277           * Set up the first CPU's idle thread.
 277  278           * It runs whenever the CPU has nothing worthwhile to do.
 278  279           */
 279  280          tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
 280  281          cpu->cpu_idle_thread = tp;
 281  282          tp->t_preempt = 1;
 282  283          tp->t_disp_queue = cpu->cpu_disp;
 283  284          ASSERT(tp->t_disp_queue != NULL);
 284  285          tp->t_bound_cpu = cpu;
 285  286          tp->t_affinitycnt = 1;
 286  287  
 287  288          /*
 288  289           * Registering a thread in the callback table is usually
 289  290           * done in the initialization code of the thread. In this
 290  291           * case, we do it right after thread creation to avoid
 291  292           * blocking idle thread while registering itself. It also
 292  293           * avoids the possibility of reregistration in case a CPU
 293  294           * restarts its idle thread.
 294  295           */
 295  296          CALLB_CPR_INIT_SAFE(tp, "idle");
 296  297  
 297  298          /*
 298  299           * Create the thread_reaper daemon. From this point on, exited
 299  300           * threads will get reaped.
 300  301           */
 301  302          (void) thread_create(NULL, 0, (void (*)())thread_reaper,
 302  303              NULL, 0, &p0, TS_RUN, minclsyspri);
 303  304  
 304  305          /*
 305  306           * Finish initializing the kernel memory allocator now that
 306  307           * thread_create() is available.
 307  308           */
 308  309          kmem_thread_init();
 309  310  
 310  311          if (boothowto & RB_DEBUG)
 311  312                  kdi_dvec_thravail();
 312  313  }
 313  314  
 314  315  /*
 315  316   * Create a thread.
 316  317   *
 317  318   * thread_create() blocks for memory if necessary.  It never fails.
 318  319   *
 319  320   * If stk is NULL, the thread is created at the base of the stack
 320  321   * and cannot be swapped.
 321  322   */
 322  323  kthread_t *
 323  324  thread_create(
 324  325          caddr_t stk,
 325  326          size_t  stksize,
 326  327          void    (*proc)(),
 327  328          void    *arg,
 328  329          size_t  len,
 329  330          proc_t   *pp,
 330  331          int     state,
 331  332          pri_t   pri)
 332  333  {
 333  334          kthread_t *t;
 334  335          extern struct classfuncs sys_classfuncs;
 335  336          turnstile_t *ts;
 336  337  
 337  338          /*
 338  339           * Every thread keeps a turnstile around in case it needs to block.
 339  340           * The only reason the turnstile is not simply part of the thread
 340  341           * structure is that we may have to break the association whenever
 341  342           * more than one thread blocks on a given synchronization object.
 342  343           * From a memory-management standpoint, turnstiles are like the
 343  344           * "attached mblks" that hang off dblks in the streams allocator.
 344  345           */
 345  346          ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
 346  347  
 347  348          if (stk == NULL) {
 348  349                  /*
 349  350                   * alloc both thread and stack in segkp chunk
 350  351                   */
 351  352  
 352  353                  if (stksize < default_stksize)
 353  354                          stksize = default_stksize;
 354  355  
 355  356                  if (stksize == default_stksize) {
 356  357                          stk = (caddr_t)segkp_cache_get(segkp_thread);
 357  358                  } else {
 358  359                          stksize = roundup(stksize, PAGESIZE);
 359  360                          stk = (caddr_t)segkp_get(segkp, stksize,
 360  361                              (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
 361  362                  }
 362  363  
 363  364                  ASSERT(stk != NULL);
 364  365  
 365  366                  /*
 366  367                   * The machine-dependent mutex code may require that
 367  368                   * thread pointers (since they may be used for mutex owner
 368  369                   * fields) have certain alignment requirements.
 369  370                   * PTR24_ALIGN is the size of the alignment quanta.
 370  371                   * XXX - assumes stack grows toward low addresses.
 371  372                   */
 372  373                  if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
 373  374                          cmn_err(CE_PANIC, "thread_create: proposed stack size"
 374  375                              " too small to hold thread.");
 375  376  #ifdef STACK_GROWTH_DOWN
 376  377                  stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
 377  378                  stksize &= -PTR24_ALIGN;        /* make thread aligned */
 378  379                  t = (kthread_t *)(stk + stksize);
 379  380                  bzero(t, sizeof (kthread_t));
 380  381                  if (audit_active)
 381  382                          audit_thread_create(t);
 382  383                  t->t_stk = stk + stksize;
 383  384                  t->t_stkbase = stk;
 384  385  #else   /* stack grows to larger addresses */
 385  386                  stksize -= SA(sizeof (kthread_t));
 386  387                  t = (kthread_t *)(stk);
 387  388                  bzero(t, sizeof (kthread_t));
 388  389                  t->t_stk = stk + sizeof (kthread_t);
 389  390                  t->t_stkbase = stk + stksize + sizeof (kthread_t);
 390  391  #endif  /* STACK_GROWTH_DOWN */
 391  392                  t->t_flag |= T_TALLOCSTK;
 392  393                  t->t_swap = stk;
 393  394          } else {
 394  395                  t = kmem_cache_alloc(thread_cache, KM_SLEEP);
 395  396                  bzero(t, sizeof (kthread_t));
 396  397                  ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
 397  398                  if (audit_active)
 398  399                          audit_thread_create(t);
 399  400                  /*
 400  401                   * Initialize t_stk to the kernel stack pointer to use
 401  402                   * upon entry to the kernel
 402  403                   */
 403  404  #ifdef STACK_GROWTH_DOWN
 404  405                  t->t_stk = stk + stksize;
 405  406                  t->t_stkbase = stk;
 406  407  #else
 407  408                  t->t_stk = stk;                 /* 3b2-like */
 408  409                  t->t_stkbase = stk + stksize;
 409  410  #endif /* STACK_GROWTH_DOWN */
 410  411          }
 411  412  
 412  413          if (kmem_stackinfo != 0) {
 413  414                  stkinfo_begin(t);
 414  415          }
 415  416  
 416  417          t->t_ts = ts;
 417  418  
 418  419          /*
 419  420           * p_cred could be NULL if it thread_create is called before cred_init
 420  421           * is called in main.
 421  422           */
 422  423          mutex_enter(&pp->p_crlock);
 423  424          if (pp->p_cred)
 424  425                  crhold(t->t_cred = pp->p_cred);
 425  426          mutex_exit(&pp->p_crlock);
 426  427          t->t_start = gethrestime_sec();
 427  428          t->t_startpc = proc;
 428  429          t->t_procp = pp;
 429  430          t->t_clfuncs = &sys_classfuncs.thread;
 430  431          t->t_cid = syscid;
 431  432          t->t_pri = pri;
 432  433          t->t_stime = ddi_get_lbolt();
 433  434          t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
 434  435          t->t_bind_cpu = PBIND_NONE;
 435  436          t->t_bindflag = (uchar_t)default_binding_mode;
 436  437          t->t_bind_pset = PS_NONE;
 437  438          t->t_plockp = &pp->p_lock;
 438  439          t->t_copyops = NULL;
 439  440          t->t_taskq = NULL;
 440  441          t->t_anttime = 0;
 441  442          t->t_hatdepth = 0;
 442  443  
 443  444          t->t_dtrace_vtime = 1;  /* assure vtimestamp is always non-zero */
 444  445  
 445  446          CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
 446  447  #ifndef NPROBE
 447  448          /* Kernel probe */
 448  449          tnf_thread_create(t);
 449  450  #endif /* NPROBE */
 450  451          LOCK_INIT_CLEAR(&t->t_lock);
 451  452  
 452  453          /*
 453  454           * Callers who give us a NULL proc must do their own
 454  455           * stack initialization.  e.g. lwp_create()
 455  456           */
 456  457          if (proc != NULL) {
 457  458                  t->t_stk = thread_stk_init(t->t_stk);
 458  459                  thread_load(t, proc, arg, len);
 459  460          }
 460  461  
 461  462          /*
 462  463           * Put a hold on project0. If this thread is actually in a
 463  464           * different project, then t_proj will be changed later in
 464  465           * lwp_create().  All kernel-only threads must be in project 0.
 465  466           */
 466  467          t->t_proj = project_hold(proj0p);
 467  468  
 468  469          lgrp_affinity_init(&t->t_lgrp_affinity);
 469  470  
 470  471          mutex_enter(&pidlock);
 471  472          nthread++;
 472  473          t->t_did = next_t_id++;
 473  474          t->t_prev = curthread->t_prev;
 474  475          t->t_next = curthread;
 475  476  
 476  477          /*
 477  478           * Add the thread to the list of all threads, and initialize
 478  479           * its t_cpu pointer.  We need to block preemption since
 479  480           * cpu_offline walks the thread list looking for threads
 480  481           * with t_cpu pointing to the CPU being offlined.  We want
 481  482           * to make sure that the list is consistent and that if t_cpu
 482  483           * is set, the thread is on the list.
 483  484           */
 484  485          kpreempt_disable();
 485  486          curthread->t_prev->t_next = t;
 486  487          curthread->t_prev = t;
 487  488  
 488  489          /*
 489  490           * Threads should never have a NULL t_cpu pointer so assign it
 490  491           * here.  If the thread is being created with state TS_RUN a
 491  492           * better CPU may be chosen when it is placed on the run queue.
 492  493           *
 493  494           * We need to keep kernel preemption disabled when setting all
 494  495           * three fields to keep them in sync.  Also, always create in
 495  496           * the default partition since that's where kernel threads go
 496  497           * (if this isn't a kernel thread, t_cpupart will be changed
 497  498           * in lwp_create before setting the thread runnable).
 498  499           */
 499  500          t->t_cpupart = &cp_default;
 500  501  
 501  502          /*
 502  503           * For now, affiliate this thread with the root lgroup.
 503  504           * Since the kernel does not (presently) allocate its memory
 504  505           * in a locality aware fashion, the root is an appropriate home.
 505  506           * If this thread is later associated with an lwp, it will have
 506  507           * it's lgroup re-assigned at that time.
 507  508           */
 508  509          lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
 509  510  
 510  511          /*
 511  512           * Inherit the current cpu.  If this cpu isn't part of the chosen
 512  513           * lgroup, a new cpu will be chosen by cpu_choose when the thread
 513  514           * is ready to run.
 514  515           */
 515  516          if (CPU->cpu_part == &cp_default)
 516  517                  t->t_cpu = CPU;
 517  518          else
 518  519                  t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
 519  520                      t->t_pri, NULL);
 520  521  
 521  522          t->t_disp_queue = t->t_cpu->cpu_disp;
 522  523          kpreempt_enable();
 523  524  
 524  525          /*
 525  526           * Initialize thread state and the dispatcher lock pointer.
 526  527           * Need to hold onto pidlock to block allthreads walkers until
 527  528           * the state is set.
 528  529           */
 529  530          switch (state) {
 530  531          case TS_RUN:
 531  532                  curthread->t_oldspl = splhigh();        /* get dispatcher spl */
 532  533                  THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
 533  534                  CL_SETRUN(t);
 534  535                  thread_unlock(t);
 535  536                  break;
 536  537  
 537  538          case TS_ONPROC:
 538  539                  THREAD_ONPROC(t, t->t_cpu);
 539  540                  break;
 540  541  
 541  542          case TS_FREE:
 542  543                  /*
 543  544                   * Free state will be used for intr threads.
 544  545                   * The interrupt routine must set the thread dispatcher
 545  546                   * lock pointer (t_lockp) if starting on a CPU
 546  547                   * other than the current one.
 547  548                   */
 548  549                  THREAD_FREEINTR(t, CPU);
 549  550                  break;
 550  551  
 551  552          case TS_STOPPED:
 552  553                  THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
 553  554                  break;
 554  555  
 555  556          default:                        /* TS_SLEEP, TS_ZOMB or TS_TRANS */
 556  557                  cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
 557  558          }
 558  559          mutex_exit(&pidlock);
 559  560          return (t);
 560  561  }
 561  562  
 562  563  /*
 563  564   * Move thread to project0 and take care of project reference counters.
 564  565   */
 565  566  void
 566  567  thread_rele(kthread_t *t)
 567  568  {
 568  569          kproject_t *kpj;
 569  570  
 570  571          thread_lock(t);
 571  572  
 572  573          ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
 573  574          kpj = ttoproj(t);
 574  575          t->t_proj = proj0p;
 575  576  
 576  577          thread_unlock(t);
 577  578  
 578  579          if (kpj != proj0p) {
 579  580                  project_rele(kpj);
 580  581                  (void) project_hold(proj0p);
 581  582          }
 582  583  }
 583  584  
 584  585  void
 585  586  thread_exit(void)
 586  587  {
 587  588          kthread_t *t = curthread;
 588  589  
 589  590          if ((t->t_proc_flag & TP_ZTHREAD) != 0)
 590  591                  cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
 591  592  
 592  593          tsd_exit();             /* Clean up this thread's TSD */
 593  594  
 594  595          kcpc_passivate();       /* clean up performance counter state */
 595  596  
 596  597          /*
 597  598           * No kernel thread should have called poll() without arranging
 598  599           * calling pollcleanup() here.
 599  600           */
 600  601          ASSERT(t->t_pollstate == NULL);
 601  602          ASSERT(t->t_schedctl == NULL);
 602  603          if (t->t_door)
 603  604                  door_slam();    /* in case thread did an upcall */
 604  605  
 605  606  #ifndef NPROBE
 606  607          /* Kernel probe */
 607  608          if (t->t_tnf_tpdp)
 608  609                  tnf_thread_exit();
 609  610  #endif /* NPROBE */
 610  611  
 611  612          thread_rele(t);
 612  613          t->t_preempt++;
 613  614  
 614  615          /*
 615  616           * remove thread from the all threads list so that
 616  617           * death-row can use the same pointers.
 617  618           */
 618  619          mutex_enter(&pidlock);
 619  620          t->t_next->t_prev = t->t_prev;
 620  621          t->t_prev->t_next = t->t_next;
 621  622          ASSERT(allthreads != t);        /* t0 never exits */
 622  623          cv_broadcast(&t->t_joincv);     /* wake up anyone in thread_join */
 623  624          mutex_exit(&pidlock);
 624  625  
 625  626          if (t->t_ctx != NULL)
 626  627                  exitctx(t);
 627  628          if (t->t_procp->p_pctx != NULL)
 628  629                  exitpctx(t->t_procp);
 629  630  
 630  631          if (kmem_stackinfo != 0) {
 631  632                  stkinfo_end(t);
 632  633          }
 633  634  
 634  635          t->t_state = TS_ZOMB;   /* set zombie thread */
 635  636  
 636  637          swtch_from_zombie();    /* give up the CPU */
 637  638          /* NOTREACHED */
 638  639  }
 639  640  
 640  641  /*
 641  642   * Check to see if the specified thread is active (defined as being on
 642  643   * the thread list).  This is certainly a slow way to do this; if there's
 643  644   * ever a reason to speed it up, we could maintain a hash table of active
 644  645   * threads indexed by their t_did.
 645  646   */
 646  647  static kthread_t *
 647  648  did_to_thread(kt_did_t tid)
 648  649  {
 649  650          kthread_t *t;
 650  651  
 651  652          ASSERT(MUTEX_HELD(&pidlock));
 652  653          for (t = curthread->t_next; t != curthread; t = t->t_next) {
 653  654                  if (t->t_did == tid)
 654  655                          break;
 655  656          }
 656  657          if (t->t_did == tid)
 657  658                  return (t);
 658  659          else
 659  660                  return (NULL);
 660  661  }
 661  662  
 662  663  /*
 663  664   * Wait for specified thread to exit.  Returns immediately if the thread
 664  665   * could not be found, meaning that it has either already exited or never
 665  666   * existed.
 666  667   */
 667  668  void
 668  669  thread_join(kt_did_t tid)
 669  670  {
 670  671          kthread_t *t;
 671  672  
 672  673          ASSERT(tid != curthread->t_did);
 673  674          ASSERT(tid != t0.t_did);
 674  675  
 675  676          mutex_enter(&pidlock);
 676  677          /*
 677  678           * Make sure we check that the thread is on the thread list
 678  679           * before blocking on it; otherwise we could end up blocking on
 679  680           * a cv that's already been freed.  In other words, don't cache
 680  681           * the thread pointer across calls to cv_wait.
 681  682           *
 682  683           * The choice of loop invariant means that whenever a thread
 683  684           * is taken off the allthreads list, a cv_broadcast must be
 684  685           * performed on that thread's t_joincv to wake up any waiters.
 685  686           * The broadcast doesn't have to happen right away, but it
 686  687           * shouldn't be postponed indefinitely (e.g., by doing it in
 687  688           * thread_free which may only be executed when the deathrow
 688  689           * queue is processed.
 689  690           */
 690  691          while (t = did_to_thread(tid))
 691  692                  cv_wait(&t->t_joincv, &pidlock);
 692  693          mutex_exit(&pidlock);
 693  694  }
 694  695  
 695  696  void
 696  697  thread_free_prevent(kthread_t *t)
 697  698  {
 698  699          kmutex_t *lp;
 699  700  
 700  701          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 701  702          mutex_enter(lp);
 702  703  }
 703  704  
 704  705  void
 705  706  thread_free_allow(kthread_t *t)
 706  707  {
 707  708          kmutex_t *lp;
 708  709  
 709  710          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 710  711          mutex_exit(lp);
 711  712  }
 712  713  
 713  714  static void
 714  715  thread_free_barrier(kthread_t *t)
 715  716  {
 716  717          kmutex_t *lp;
 717  718  
 718  719          lp = &thread_free_lock[THREAD_FREE_HASH(t)].tf_lock;
 719  720          mutex_enter(lp);
 720  721          mutex_exit(lp);
 721  722  }
 722  723  
 723  724  void
 724  725  thread_free(kthread_t *t)
 725  726  {
 726  727          boolean_t allocstk = (t->t_flag & T_TALLOCSTK);
 727  728          klwp_t *lwp = t->t_lwp;
 728  729          caddr_t swap = t->t_swap;
 729  730  
 730  731          ASSERT(t != &t0 && t->t_state == TS_FREE);
 731  732          ASSERT(t->t_door == NULL);
 732  733          ASSERT(t->t_schedctl == NULL);
 733  734          ASSERT(t->t_pollstate == NULL);
 734  735  
 735  736          t->t_pri = 0;
 736  737          t->t_pc = 0;
 737  738          t->t_sp = 0;
 738  739          t->t_wchan0 = NULL;
 739  740          t->t_wchan = NULL;
 740  741          if (t->t_cred != NULL) {
 741  742                  crfree(t->t_cred);
 742  743                  t->t_cred = 0;
 743  744          }
 744  745          if (t->t_pdmsg) {
 745  746                  kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
 746  747                  t->t_pdmsg = NULL;
 747  748          }
 748  749          if (audit_active)
 749  750                  audit_thread_free(t);
 750  751  #ifndef NPROBE
 751  752          if (t->t_tnf_tpdp)
 752  753                  tnf_thread_free(t);
 753  754  #endif /* NPROBE */
 754  755          if (t->t_cldata) {
 755  756                  CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
 756  757          }
 757  758          if (t->t_rprof != NULL) {
 758  759                  kmem_free(t->t_rprof, sizeof (*t->t_rprof));
 759  760                  t->t_rprof = NULL;
 760  761          }
 761  762          t->t_lockp = NULL;      /* nothing should try to lock this thread now */
 762  763          if (lwp)
 763  764                  lwp_freeregs(lwp, 0);
 764  765          if (t->t_ctx)
 765  766                  freectx(t, 0);
 766  767          t->t_stk = NULL;
 767  768          if (lwp)
 768  769                  lwp_stk_fini(lwp);
 769  770          lock_clear(&t->t_lock);
 770  771  
 771  772          if (t->t_ts->ts_waiters > 0)
 772  773                  panic("thread_free: turnstile still active");
 773  774  
 774  775          kmem_cache_free(turnstile_cache, t->t_ts);
 775  776  
 776  777          free_afd(&t->t_activefd);
 777  778  
 778  779          /*
 779  780           * Barrier for the tick accounting code.  The tick accounting code
 780  781           * holds this lock to keep the thread from going away while it's
 781  782           * looking at it.
 782  783           */
 783  784          thread_free_barrier(t);
 784  785  
 785  786          ASSERT(ttoproj(t) == proj0p);
 786  787          project_rele(ttoproj(t));
 787  788  
 788  789          lgrp_affinity_free(&t->t_lgrp_affinity);
 789  790  
 790  791          mutex_enter(&pidlock);
 791  792          nthread--;
 792  793          mutex_exit(&pidlock);
 793  794  
 794  795          /*
 795  796           * Free thread, lwp and stack.  This needs to be done carefully, since
 796  797           * if T_TALLOCSTK is set, the thread is part of the stack.
 797  798           */
 798  799          t->t_lwp = NULL;
 799  800          t->t_swap = NULL;
 800  801  
 801  802          if (swap) {
 802  803                  segkp_release(segkp, swap);
 803  804          }
 804  805          if (lwp) {
 805  806                  kmem_cache_free(lwp_cache, lwp);
 806  807          }
 807  808          if (!allocstk) {
 808  809                  kmem_cache_free(thread_cache, t);
 809  810          }
 810  811  }
 811  812  
 812  813  /*
 813  814   * Removes threads associated with the given zone from a deathrow queue.
 814  815   * tp is a pointer to the head of the deathrow queue, and countp is a
 815  816   * pointer to the current deathrow count.  Returns a linked list of
 816  817   * threads removed from the list.
 817  818   */
 818  819  static kthread_t *
 819  820  thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
 820  821  {
 821  822          kthread_t *tmp, *list = NULL;
 822  823          cred_t *cr;
 823  824  
 824  825          ASSERT(MUTEX_HELD(&reaplock));
 825  826          while (*tp != NULL) {
 826  827                  if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
 827  828                          tmp = *tp;
 828  829                          *tp = tmp->t_forw;
 829  830                          tmp->t_forw = list;
 830  831                          list = tmp;
 831  832                          (*countp)--;
 832  833                  } else {
 833  834                          tp = &(*tp)->t_forw;
 834  835                  }
 835  836          }
 836  837          return (list);
 837  838  }
 838  839  
 839  840  static void
 840  841  thread_reap_list(kthread_t *t)
 841  842  {
 842  843          kthread_t *next;
 843  844  
 844  845          while (t != NULL) {
 845  846                  next = t->t_forw;
 846  847                  thread_free(t);
 847  848                  t = next;
 848  849          }
 849  850  }
 850  851  
 851  852  /* ARGSUSED */
 852  853  static void
 853  854  thread_zone_destroy(zoneid_t zoneid, void *unused)
 854  855  {
 855  856          kthread_t *t, *l;
 856  857  
 857  858          mutex_enter(&reaplock);
 858  859          /*
 859  860           * Pull threads and lwps associated with zone off deathrow lists.
 860  861           */
 861  862          t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
 862  863          l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
 863  864          mutex_exit(&reaplock);
 864  865  
 865  866          /*
 866  867           * Guard against race condition in mutex_owner_running:
 867  868           *      thread=owner(mutex)
 868  869           *      <interrupt>
 869  870           *                              thread exits mutex
 870  871           *                              thread exits
 871  872           *                              thread reaped
 872  873           *                              thread struct freed
 873  874           * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 874  875           * A cross call to all cpus will cause the interrupt handler
 875  876           * to reset the PC if it is in mutex_owner_running, refreshing
 876  877           * stale thread pointers.
 877  878           */
 878  879          mutex_sync();   /* sync with mutex code */
 879  880  
 880  881          /*
 881  882           * Reap threads
 882  883           */
 883  884          thread_reap_list(t);
 884  885  
 885  886          /*
 886  887           * Reap lwps
 887  888           */
 888  889          thread_reap_list(l);
 889  890  }
 890  891  
 891  892  /*
 892  893   * cleanup zombie threads that are on deathrow.
 893  894   */
 894  895  void
 895  896  thread_reaper()
 896  897  {
 897  898          kthread_t *t, *l;
 898  899          callb_cpr_t cprinfo;
 899  900  
 900  901          /*
 901  902           * Register callback to clean up threads when zone is destroyed.
 902  903           */
 903  904          zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
 904  905  
 905  906          CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");
 906  907          for (;;) {
 907  908                  mutex_enter(&reaplock);
 908  909                  while (thread_deathrow == NULL && lwp_deathrow == NULL) {
 909  910                          CALLB_CPR_SAFE_BEGIN(&cprinfo);
 910  911                          cv_wait(&reaper_cv, &reaplock);
 911  912                          CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
 912  913                  }
 913  914                  /*
 914  915                   * mutex_sync() needs to be called when reaping, but
 915  916                   * not too often.  We limit reaping rate to once
 916  917                   * per second.  Reaplimit is max rate at which threads can
 917  918                   * be freed. Does not impact thread destruction/creation.
 918  919                   */
 919  920                  t = thread_deathrow;
 920  921                  l = lwp_deathrow;
 921  922                  thread_deathrow = NULL;
 922  923                  lwp_deathrow = NULL;
 923  924                  thread_reapcnt = 0;
 924  925                  lwp_reapcnt = 0;
 925  926                  mutex_exit(&reaplock);
 926  927  
 927  928                  /*
 928  929                   * Guard against race condition in mutex_owner_running:
 929  930                   *      thread=owner(mutex)
 930  931                   *      <interrupt>
 931  932                   *                              thread exits mutex
 932  933                   *                              thread exits
 933  934                   *                              thread reaped
 934  935                   *                              thread struct freed
 935  936                   * cpu = thread->t_cpu <- BAD POINTER DEREFERENCE.
 936  937                   * A cross call to all cpus will cause the interrupt handler
 937  938                   * to reset the PC if it is in mutex_owner_running, refreshing
 938  939                   * stale thread pointers.
 939  940                   */
 940  941                  mutex_sync();   /* sync with mutex code */
 941  942                  /*
 942  943                   * Reap threads
 943  944                   */
 944  945                  thread_reap_list(t);
 945  946  
 946  947                  /*
 947  948                   * Reap lwps
 948  949                   */
 949  950                  thread_reap_list(l);
 950  951                  delay(hz);
 951  952          }
 952  953  }
 953  954  
 954  955  /*
 955  956   * This is called by lwpcreate, etc.() to put a lwp_deathrow thread onto
 956  957   * thread_deathrow. The thread's state is changed already TS_FREE to indicate
 957  958   * that is reapable. The thread already holds the reaplock, and was already
 958  959   * freed.
 959  960   */
 960  961  void
 961  962  reapq_move_lq_to_tq(kthread_t *t)
 962  963  {
 963  964          ASSERT(t->t_state == TS_FREE);
 964  965          ASSERT(MUTEX_HELD(&reaplock));
 965  966          t->t_forw = thread_deathrow;
 966  967          thread_deathrow = t;
 967  968          thread_reapcnt++;
 968  969          if (lwp_reapcnt + thread_reapcnt > reaplimit)
 969  970                  cv_signal(&reaper_cv);  /* wake the reaper */
 970  971  }
 971  972  
 972  973  /*
 973  974   * This is called by resume() to put a zombie thread onto deathrow.
 974  975   * The thread's state is changed to TS_FREE to indicate that is reapable.
 975  976   * This is called from the idle thread so it must not block - just spin.
 976  977   */
 977  978  void
 978  979  reapq_add(kthread_t *t)
 979  980  {
 980  981          mutex_enter(&reaplock);
 981  982  
 982  983          /*
 983  984           * lwp_deathrow contains threads with lwp linkage and
 984  985           * swappable thread stacks which have the default stacksize.
 985  986           * These threads' lwps and stacks may be reused by lwp_create().
 986  987           *
 987  988           * Anything else goes on thread_deathrow(), where it will eventually
 988  989           * be thread_free()d.
 989  990           */
 990  991          if (t->t_flag & T_LWPREUSE) {
 991  992                  ASSERT(ttolwp(t) != NULL);
 992  993                  t->t_forw = lwp_deathrow;
 993  994                  lwp_deathrow = t;
 994  995                  lwp_reapcnt++;
 995  996          } else {
 996  997                  t->t_forw = thread_deathrow;
 997  998                  thread_deathrow = t;
 998  999                  thread_reapcnt++;
 999 1000          }
1000 1001          if (lwp_reapcnt + thread_reapcnt > reaplimit)
1001 1002                  cv_signal(&reaper_cv);  /* wake the reaper */
1002 1003          t->t_state = TS_FREE;
1003 1004          lock_clear(&t->t_lock);
1004 1005  
1005 1006          /*
1006 1007           * Before we return, we need to grab and drop the thread lock for
1007 1008           * the dead thread.  At this point, the current thread is the idle
1008 1009           * thread, and the dead thread's CPU lock points to the current
1009 1010           * CPU -- and we must grab and drop the lock to synchronize with
1010 1011           * a racing thread walking a blocking chain that the zombie thread
1011 1012           * was recently in.  By this point, that blocking chain is (by
1012 1013           * definition) stale:  the dead thread is not holding any locks, and
1013 1014           * is therefore not in any blocking chains -- but if we do not regrab
1014 1015           * our lock before freeing the dead thread's data structures, the
1015 1016           * thread walking the (stale) blocking chain will die on memory
1016 1017           * corruption when it attempts to drop the dead thread's lock.  We
1017 1018           * only need do this once because there is no way for the dead thread
1018 1019           * to ever again be on a blocking chain:  once we have grabbed and
1019 1020           * dropped the thread lock, we are guaranteed that anyone that could
1020 1021           * have seen this thread in a blocking chain can no longer see it.
1021 1022           */
1022 1023          thread_lock(t);
1023 1024          thread_unlock(t);
1024 1025  
1025 1026          mutex_exit(&reaplock);
1026 1027  }
1027 1028  
1028 1029  /*
1029 1030   * Install thread context ops for the current thread.
1030 1031   */
1031 1032  void
1032 1033  installctx(
1033 1034          kthread_t *t,
1034 1035          void    *arg,
1035 1036          void    (*save)(void *),
1036 1037          void    (*restore)(void *),
1037 1038          void    (*fork)(void *, void *),
1038 1039          void    (*lwp_create)(void *, void *),
1039 1040          void    (*exit)(void *),
1040 1041          void    (*free)(void *, int))
1041 1042  {
1042 1043          struct ctxop *ctx;
1043 1044  
1044 1045          ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
1045 1046          ctx->save_op = save;
1046 1047          ctx->restore_op = restore;
1047 1048          ctx->fork_op = fork;
1048 1049          ctx->lwp_create_op = lwp_create;
1049 1050          ctx->exit_op = exit;
1050 1051          ctx->free_op = free;
1051 1052          ctx->arg = arg;
1052 1053          ctx->next = t->t_ctx;
1053 1054          t->t_ctx = ctx;
1054 1055  }
1055 1056  
1056 1057  /*
1057 1058   * Remove the thread context ops from a thread.
1058 1059   */
1059 1060  int
1060 1061  removectx(
1061 1062          kthread_t *t,
1062 1063          void    *arg,
1063 1064          void    (*save)(void *),
1064 1065          void    (*restore)(void *),
1065 1066          void    (*fork)(void *, void *),
1066 1067          void    (*lwp_create)(void *, void *),
1067 1068          void    (*exit)(void *),
1068 1069          void    (*free)(void *, int))
1069 1070  {
1070 1071          struct ctxop *ctx, *prev_ctx;
1071 1072  
1072 1073          /*
1073 1074           * The incoming kthread_t (which is the thread for which the
1074 1075           * context ops will be removed) should be one of the following:
1075 1076           *
1076 1077           * a) the current thread,
1077 1078           *
1078 1079           * b) a thread of a process that's being forked (SIDL),
1079 1080           *
1080 1081           * c) a thread that belongs to the same process as the current
1081 1082           *    thread and for which the current thread is the agent thread,
1082 1083           *
1083 1084           * d) a thread that is TS_STOPPED which is indicative of it
1084 1085           *    being (if curthread is not an agent) a thread being created
1085 1086           *    as part of an lwp creation.
1086 1087           */
1087 1088          ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
1088 1089              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1089 1090  
1090 1091          /*
1091 1092           * Serialize modifications to t->t_ctx to prevent the agent thread
1092 1093           * and the target thread from racing with each other during lwp exit.
1093 1094           */
1094 1095          mutex_enter(&t->t_ctx_lock);
1095 1096          prev_ctx = NULL;
1096 1097          kpreempt_disable();
1097 1098          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
1098 1099                  if (ctx->save_op == save && ctx->restore_op == restore &&
1099 1100                      ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
1100 1101                      ctx->exit_op == exit && ctx->free_op == free &&
1101 1102                      ctx->arg == arg) {
1102 1103                          if (prev_ctx)
1103 1104                                  prev_ctx->next = ctx->next;
1104 1105                          else
1105 1106                                  t->t_ctx = ctx->next;
1106 1107                          mutex_exit(&t->t_ctx_lock);
1107 1108                          if (ctx->free_op != NULL)
1108 1109                                  (ctx->free_op)(ctx->arg, 0);
1109 1110                          kmem_free(ctx, sizeof (struct ctxop));
1110 1111                          kpreempt_enable();
1111 1112                          return (1);
1112 1113                  }
1113 1114                  prev_ctx = ctx;
1114 1115          }
1115 1116          mutex_exit(&t->t_ctx_lock);
1116 1117          kpreempt_enable();
1117 1118  
1118 1119          return (0);
1119 1120  }
1120 1121  
1121 1122  void
1122 1123  savectx(kthread_t *t)
1123 1124  {
1124 1125          struct ctxop *ctx;
1125 1126  
1126 1127          ASSERT(t == curthread);
1127 1128          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1128 1129                  if (ctx->save_op != NULL)
1129 1130                          (ctx->save_op)(ctx->arg);
1130 1131  }
1131 1132  
1132 1133  void
1133 1134  restorectx(kthread_t *t)
1134 1135  {
1135 1136          struct ctxop *ctx;
1136 1137  
1137 1138          ASSERT(t == curthread);
1138 1139          for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
1139 1140                  if (ctx->restore_op != NULL)
1140 1141                          (ctx->restore_op)(ctx->arg);
1141 1142  }
1142 1143  
1143 1144  void
1144 1145  forkctx(kthread_t *t, kthread_t *ct)
1145 1146  {
1146 1147          struct ctxop *ctx;
1147 1148  
1148 1149          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1149 1150                  if (ctx->fork_op != NULL)
1150 1151                          (ctx->fork_op)(t, ct);
1151 1152  }
1152 1153  
1153 1154  /*
1154 1155   * Note that this operator is only invoked via the _lwp_create
1155 1156   * system call.  The system may have other reasons to create lwps
1156 1157   * e.g. the agent lwp or the doors unreferenced lwp.
1157 1158   */
1158 1159  void
1159 1160  lwp_createctx(kthread_t *t, kthread_t *ct)
1160 1161  {
1161 1162          struct ctxop *ctx;
1162 1163  
1163 1164          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1164 1165                  if (ctx->lwp_create_op != NULL)
1165 1166                          (ctx->lwp_create_op)(t, ct);
1166 1167  }
1167 1168  
1168 1169  /*
1169 1170   * exitctx is called from thread_exit() and lwp_exit() to perform any actions
1170 1171   * needed when the thread/LWP leaves the processor for the last time. This
1171 1172   * routine is not intended to deal with freeing memory; freectx() is used for
1172 1173   * that purpose during thread_free(). This routine is provided to allow for
1173 1174   * clean-up that can't wait until thread_free().
1174 1175   */
1175 1176  void
1176 1177  exitctx(kthread_t *t)
1177 1178  {
1178 1179          struct ctxop *ctx;
1179 1180  
1180 1181          for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1181 1182                  if (ctx->exit_op != NULL)
1182 1183                          (ctx->exit_op)(t);
1183 1184  }
1184 1185  
1185 1186  /*
1186 1187   * freectx is called from thread_free() and exec() to get
1187 1188   * rid of old thread context ops.
1188 1189   */
1189 1190  void
1190 1191  freectx(kthread_t *t, int isexec)
1191 1192  {
1192 1193          struct ctxop *ctx;
1193 1194  
1194 1195          kpreempt_disable();
1195 1196          while ((ctx = t->t_ctx) != NULL) {
1196 1197                  t->t_ctx = ctx->next;
1197 1198                  if (ctx->free_op != NULL)
1198 1199                          (ctx->free_op)(ctx->arg, isexec);
1199 1200                  kmem_free(ctx, sizeof (struct ctxop));
1200 1201          }
1201 1202          kpreempt_enable();
1202 1203  }
1203 1204  
1204 1205  /*
1205 1206   * freectx_ctx is called from lwp_create() when lwp is reused from
1206 1207   * lwp_deathrow and its thread structure is added to thread_deathrow.
1207 1208   * The thread structure to which this ctx was attached may be already
1208 1209   * freed by the thread reaper so free_op implementations shouldn't rely
1209 1210   * on thread structure to which this ctx was attached still being around.
1210 1211   */
1211 1212  void
1212 1213  freectx_ctx(struct ctxop *ctx)
1213 1214  {
1214 1215          struct ctxop *nctx;
1215 1216  
1216 1217          ASSERT(ctx != NULL);
1217 1218  
1218 1219          kpreempt_disable();
1219 1220          do {
1220 1221                  nctx = ctx->next;
1221 1222                  if (ctx->free_op != NULL)
1222 1223                          (ctx->free_op)(ctx->arg, 0);
1223 1224                  kmem_free(ctx, sizeof (struct ctxop));
1224 1225          } while ((ctx = nctx) != NULL);
1225 1226          kpreempt_enable();
1226 1227  }
1227 1228  
1228 1229  /*
1229 1230   * Set the thread running; arrange for it to be swapped in if necessary.
1230 1231   */
1231 1232  void
1232 1233  setrun_locked(kthread_t *t)
1233 1234  {
1234 1235          ASSERT(THREAD_LOCK_HELD(t));
1235 1236          if (t->t_state == TS_SLEEP) {
1236 1237                  /*
1237 1238                   * Take off sleep queue.
1238 1239                   */
1239 1240                  SOBJ_UNSLEEP(t->t_sobj_ops, t);
1240 1241          } else if (t->t_state & (TS_RUN | TS_ONPROC)) {
1241 1242                  /*
1242 1243                   * Already on dispatcher queue.
1243 1244                   */
1244 1245                  return;
1245 1246          } else if (t->t_state == TS_WAIT) {
1246 1247                  waitq_setrun(t);
1247 1248          } else if (t->t_state == TS_STOPPED) {
1248 1249                  /*
1249 1250                   * All of the sending of SIGCONT (TC_XSTART) and /proc
1250 1251                   * (TC_PSTART) and lwp_continue() (TC_CSTART) must have
1251 1252                   * requested that the thread be run.
1252 1253                   * Just calling setrun() is not sufficient to set a stopped
1253 1254                   * thread running.  TP_TXSTART is always set if the thread
1254 1255                   * is not stopped by a jobcontrol stop signal.
1255 1256                   * TP_TPSTART is always set if /proc is not controlling it.
1256 1257                   * TP_TCSTART is always set if lwp_suspend() didn't stop it.
1257 1258                   * The thread won't be stopped unless one of these
1258 1259                   * three mechanisms did it.
1259 1260                   *
1260 1261                   * These flags must be set before calling setrun_locked(t).
1261 1262                   * They can't be passed as arguments because the streams
1262 1263                   * code calls setrun() indirectly and the mechanism for
1263 1264                   * doing so admits only one argument.  Note that the
1264 1265                   * thread must be locked in order to change t_schedflags.
1265 1266                   */
1266 1267                  if ((t->t_schedflag & TS_ALLSTART) != TS_ALLSTART)
1267 1268                          return;
1268 1269                  /*
1269 1270                   * Process is no longer stopped (a thread is running).
1270 1271                   */
1271 1272                  t->t_whystop = 0;
1272 1273                  t->t_whatstop = 0;
1273 1274                  /*
1274 1275                   * Strictly speaking, we do not have to clear these
1275 1276                   * flags here; they are cleared on entry to stop().
1276 1277                   * However, they are confusing when doing kernel
1277 1278                   * debugging or when they are revealed by ps(1).
1278 1279                   */
1279 1280                  t->t_schedflag &= ~TS_ALLSTART;
1280 1281                  THREAD_TRANSITION(t);   /* drop stopped-thread lock */
1281 1282                  ASSERT(t->t_lockp == &transition_lock);
1282 1283                  ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
1283 1284                  /*
1284 1285                   * Let the class put the process on the dispatcher queue.
1285 1286                   */
1286 1287                  CL_SETRUN(t);
1287 1288          }
1288 1289  }
1289 1290  
1290 1291  void
1291 1292  setrun(kthread_t *t)
1292 1293  {
1293 1294          thread_lock(t);
1294 1295          setrun_locked(t);
1295 1296          thread_unlock(t);
1296 1297  }
1297 1298  
1298 1299  /*
1299 1300   * Unpin an interrupted thread.
1300 1301   *      When an interrupt occurs, the interrupt is handled on the stack
1301 1302   *      of an interrupt thread, taken from a pool linked to the CPU structure.
1302 1303   *
1303 1304   *      When swtch() is switching away from an interrupt thread because it
1304 1305   *      blocked or was preempted, this routine is called to complete the
1305 1306   *      saving of the interrupted thread state, and returns the interrupted
1306 1307   *      thread pointer so it may be resumed.
1307 1308   *
1308 1309   *      Called by swtch() only at high spl.
1309 1310   */
1310 1311  kthread_t *
1311 1312  thread_unpin()
1312 1313  {
1313 1314          kthread_t       *t = curthread; /* current thread */
1314 1315          kthread_t       *itp;           /* interrupted thread */
1315 1316          int             i;              /* interrupt level */
1316 1317          extern int      intr_passivate();
1317 1318  
1318 1319          ASSERT(t->t_intr != NULL);
1319 1320  
1320 1321          itp = t->t_intr;                /* interrupted thread */
1321 1322          t->t_intr = NULL;               /* clear interrupt ptr */
1322 1323  
1323 1324          /*
1324 1325           * Get state from interrupt thread for the one
1325 1326           * it interrupted.
1326 1327           */
1327 1328  
1328 1329          i = intr_passivate(t, itp);
1329 1330  
1330 1331          TRACE_5(TR_FAC_INTR, TR_INTR_PASSIVATE,
1331 1332              "intr_passivate:level %d curthread %p (%T) ithread %p (%T)",
1332 1333              i, t, t, itp, itp);
1333 1334  
1334 1335          /*
1335 1336           * Dissociate the current thread from the interrupted thread's LWP.
1336 1337           */
1337 1338          t->t_lwp = NULL;
1338 1339  
1339 1340          /*
1340 1341           * Interrupt handlers above the level that spinlocks block must
1341 1342           * not block.
1342 1343           */
1343 1344  #if DEBUG
1344 1345          if (i < 0 || i > LOCK_LEVEL)
1345 1346                  cmn_err(CE_PANIC, "thread_unpin: ipl out of range %x", i);
1346 1347  #endif
1347 1348  
1348 1349          /*
1349 1350           * Compute the CPU's base interrupt level based on the active
1350 1351           * interrupts.
1351 1352           */
1352 1353          ASSERT(CPU->cpu_intr_actv & (1 << i));
1353 1354          set_base_spl();
1354 1355  
1355 1356          return (itp);
1356 1357  }
1357 1358  
1358 1359  /*
1359 1360   * Create and initialize an interrupt thread.
1360 1361   *      Returns non-zero on error.
1361 1362   *      Called at spl7() or better.
1362 1363   */
1363 1364  void
1364 1365  thread_create_intr(struct cpu *cp)
1365 1366  {
1366 1367          kthread_t *tp;
1367 1368  
1368 1369          tp = thread_create(NULL, 0,
1369 1370              (void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
1370 1371  
1371 1372          /*
1372 1373           * Set the thread in the TS_FREE state.  The state will change
1373 1374           * to TS_ONPROC only while the interrupt is active.  Think of these
1374 1375           * as being on a private free list for the CPU.  Being TS_FREE keeps
1375 1376           * inactive interrupt threads out of debugger thread lists.
1376 1377           *
1377 1378           * We cannot call thread_create with TS_FREE because of the current
1378 1379           * checks there for ONPROC.  Fix this when thread_create takes flags.
1379 1380           */
1380 1381          THREAD_FREEINTR(tp, cp);
1381 1382  
1382 1383          /*
1383 1384           * Nobody should ever reference the credentials of an interrupt
1384 1385           * thread so make it NULL to catch any such references.
1385 1386           */
1386 1387          tp->t_cred = NULL;
1387 1388          tp->t_flag |= T_INTR_THREAD;
1388 1389          tp->t_cpu = cp;
1389 1390          tp->t_bound_cpu = cp;
1390 1391          tp->t_disp_queue = cp->cpu_disp;
1391 1392          tp->t_affinitycnt = 1;
1392 1393          tp->t_preempt = 1;
1393 1394  
1394 1395          /*
1395 1396           * Don't make a user-requested binding on this thread so that
1396 1397           * the processor can be offlined.
1397 1398           */
1398 1399          tp->t_bind_cpu = PBIND_NONE;    /* no USER-requested binding */
1399 1400          tp->t_bind_pset = PS_NONE;
1400 1401  
1401 1402  #if defined(__i386) || defined(__amd64)
1402 1403          tp->t_stk -= STACK_ALIGN;
1403 1404          *(tp->t_stk) = 0;               /* terminate intr thread stack */
1404 1405  #endif
1405 1406  
1406 1407          /*
1407 1408           * Link onto CPU's interrupt pool.
1408 1409           */
1409 1410          tp->t_link = cp->cpu_intr_thread;
1410 1411          cp->cpu_intr_thread = tp;
1411 1412  }
1412 1413  
1413 1414  /*
1414 1415   * TSD -- THREAD SPECIFIC DATA
1415 1416   */
1416 1417  static kmutex_t         tsd_mutex;       /* linked list spin lock */
1417 1418  static uint_t           tsd_nkeys;       /* size of destructor array */
1418 1419  /* per-key destructor funcs */
1419 1420  static void             (**tsd_destructor)(void *);
1420 1421  /* list of tsd_thread's */
1421 1422  static struct tsd_thread        *tsd_list;
1422 1423  
1423 1424  /*
1424 1425   * Default destructor
1425 1426   *      Needed because NULL destructor means that the key is unused
1426 1427   */
1427 1428  /* ARGSUSED */
1428 1429  void
1429 1430  tsd_defaultdestructor(void *value)
1430 1431  {}
1431 1432  
1432 1433  /*
1433 1434   * Create a key (index into per thread array)
1434 1435   *      Locks out tsd_create, tsd_destroy, and tsd_exit
1435 1436   *      May allocate memory with lock held
1436 1437   */
1437 1438  void
1438 1439  tsd_create(uint_t *keyp, void (*destructor)(void *))
1439 1440  {
1440 1441          int     i;
1441 1442          uint_t  nkeys;
1442 1443  
1443 1444          /*
1444 1445           * if key is allocated, do nothing
1445 1446           */
1446 1447          mutex_enter(&tsd_mutex);
1447 1448          if (*keyp) {
1448 1449                  mutex_exit(&tsd_mutex);
1449 1450                  return;
1450 1451          }
1451 1452          /*
1452 1453           * find an unused key
1453 1454           */
1454 1455          if (destructor == NULL)
1455 1456                  destructor = tsd_defaultdestructor;
1456 1457  
1457 1458          for (i = 0; i < tsd_nkeys; ++i)
1458 1459                  if (tsd_destructor[i] == NULL)
1459 1460                          break;
1460 1461  
1461 1462          /*
1462 1463           * if no unused keys, increase the size of the destructor array
1463 1464           */
1464 1465          if (i == tsd_nkeys) {
1465 1466                  if ((nkeys = (tsd_nkeys << 1)) == 0)
1466 1467                          nkeys = 1;
1467 1468                  tsd_destructor =
1468 1469                      (void (**)(void *))tsd_realloc((void *)tsd_destructor,
1469 1470                      (size_t)(tsd_nkeys * sizeof (void (*)(void *))),
1470 1471                      (size_t)(nkeys * sizeof (void (*)(void *))));
1471 1472                  tsd_nkeys = nkeys;
1472 1473          }
1473 1474  
1474 1475          /*
1475 1476           * allocate the next available unused key
1476 1477           */
1477 1478          tsd_destructor[i] = destructor;
1478 1479          *keyp = i + 1;
1479 1480          mutex_exit(&tsd_mutex);
1480 1481  }
1481 1482  
1482 1483  /*
1483 1484   * Destroy a key -- this is for unloadable modules
1484 1485   *
1485 1486   * Assumes that the caller is preventing tsd_set and tsd_get
1486 1487   * Locks out tsd_create, tsd_destroy, and tsd_exit
1487 1488   * May free memory with lock held
1488 1489   */
1489 1490  void
1490 1491  tsd_destroy(uint_t *keyp)
1491 1492  {
1492 1493          uint_t key;
1493 1494          struct tsd_thread *tsd;
1494 1495  
1495 1496          /*
1496 1497           * protect the key namespace and our destructor lists
1497 1498           */
1498 1499          mutex_enter(&tsd_mutex);
1499 1500          key = *keyp;
1500 1501          *keyp = 0;
1501 1502  
1502 1503          ASSERT(key <= tsd_nkeys);
1503 1504  
1504 1505          /*
1505 1506           * if the key is valid
1506 1507           */
1507 1508          if (key != 0) {
1508 1509                  uint_t k = key - 1;
1509 1510                  /*
1510 1511                   * for every thread with TSD, call key's destructor
1511 1512                   */
1512 1513                  for (tsd = tsd_list; tsd; tsd = tsd->ts_next) {
1513 1514                          /*
1514 1515                           * no TSD for key in this thread
1515 1516                           */
1516 1517                          if (key > tsd->ts_nkeys)
1517 1518                                  continue;
1518 1519                          /*
1519 1520                           * call destructor for key
1520 1521                           */
1521 1522                          if (tsd->ts_value[k] && tsd_destructor[k])
1522 1523                                  (*tsd_destructor[k])(tsd->ts_value[k]);
1523 1524                          /*
1524 1525                           * reset value for key
1525 1526                           */
1526 1527                          tsd->ts_value[k] = NULL;
1527 1528                  }
1528 1529                  /*
1529 1530                   * actually free the key (NULL destructor == unused)
1530 1531                   */
1531 1532                  tsd_destructor[k] = NULL;
1532 1533          }
1533 1534  
1534 1535          mutex_exit(&tsd_mutex);
1535 1536  }
1536 1537  
1537 1538  /*
1538 1539   * Quickly return the per thread value that was stored with the specified key
1539 1540   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1540 1541   */
1541 1542  void *
1542 1543  tsd_get(uint_t key)
1543 1544  {
1544 1545          return (tsd_agent_get(curthread, key));
1545 1546  }
1546 1547  
1547 1548  /*
1548 1549   * Set a per thread value indexed with the specified key
1549 1550   */
1550 1551  int
1551 1552  tsd_set(uint_t key, void *value)
1552 1553  {
1553 1554          return (tsd_agent_set(curthread, key, value));
1554 1555  }
1555 1556  
1556 1557  /*
1557 1558   * Like tsd_get(), except that the agent lwp can get the tsd of
1558 1559   * another thread in the same process (the agent thread only runs when the
1559 1560   * process is completely stopped by /proc), or syslwp is creating a new lwp.
1560 1561   */
1561 1562  void *
1562 1563  tsd_agent_get(kthread_t *t, uint_t key)
1563 1564  {
1564 1565          struct tsd_thread *tsd = t->t_tsd;
1565 1566  
1566 1567          ASSERT(t == curthread ||
1567 1568              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1568 1569  
1569 1570          if (key && tsd != NULL && key <= tsd->ts_nkeys)
1570 1571                  return (tsd->ts_value[key - 1]);
1571 1572          return (NULL);
1572 1573  }
1573 1574  
1574 1575  /*
1575 1576   * Like tsd_set(), except that the agent lwp can set the tsd of
1576 1577   * another thread in the same process, or syslwp can set the tsd
1577 1578   * of a thread it's in the middle of creating.
1578 1579   *
1579 1580   * Assumes the caller is protecting key from tsd_create and tsd_destroy
1580 1581   * May lock out tsd_destroy (and tsd_create), may allocate memory with
1581 1582   * lock held
1582 1583   */
1583 1584  int
1584 1585  tsd_agent_set(kthread_t *t, uint_t key, void *value)
1585 1586  {
1586 1587          struct tsd_thread *tsd = t->t_tsd;
1587 1588  
1588 1589          ASSERT(t == curthread ||
1589 1590              ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1590 1591  
1591 1592          if (key == 0)
1592 1593                  return (EINVAL);
1593 1594          if (tsd == NULL)
1594 1595                  tsd = t->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1595 1596          if (key <= tsd->ts_nkeys) {
1596 1597                  tsd->ts_value[key - 1] = value;
1597 1598                  return (0);
1598 1599          }
1599 1600  
1600 1601          ASSERT(key <= tsd_nkeys);
1601 1602  
1602 1603          /*
1603 1604           * lock out tsd_destroy()
1604 1605           */
1605 1606          mutex_enter(&tsd_mutex);
1606 1607          if (tsd->ts_nkeys == 0) {
1607 1608                  /*
1608 1609                   * Link onto list of threads with TSD
1609 1610                   */
1610 1611                  if ((tsd->ts_next = tsd_list) != NULL)
1611 1612                          tsd_list->ts_prev = tsd;
1612 1613                  tsd_list = tsd;
1613 1614          }
1614 1615  
1615 1616          /*
1616 1617           * Allocate thread local storage and set the value for key
1617 1618           */
1618 1619          tsd->ts_value = tsd_realloc(tsd->ts_value,
1619 1620              tsd->ts_nkeys * sizeof (void *),
1620 1621              key * sizeof (void *));
1621 1622          tsd->ts_nkeys = key;
1622 1623          tsd->ts_value[key - 1] = value;
1623 1624          mutex_exit(&tsd_mutex);
1624 1625  
1625 1626          return (0);
1626 1627  }
1627 1628  
1628 1629  
1629 1630  /*
1630 1631   * Return the per thread value that was stored with the specified key
1631 1632   *      If necessary, create the key and the value
1632 1633   *      Assumes the caller is protecting *keyp from tsd_destroy
1633 1634   */
1634 1635  void *
1635 1636  tsd_getcreate(uint_t *keyp, void (*destroy)(void *), void *(*allocate)(void))
1636 1637  {
1637 1638          void *value;
1638 1639          uint_t key = *keyp;
1639 1640          struct tsd_thread *tsd = curthread->t_tsd;
1640 1641  
1641 1642          if (tsd == NULL)
1642 1643                  tsd = curthread->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1643 1644          if (key && key <= tsd->ts_nkeys && (value = tsd->ts_value[key - 1]))
1644 1645                  return (value);
1645 1646          if (key == 0)
1646 1647                  tsd_create(keyp, destroy);
1647 1648          (void) tsd_set(*keyp, value = (*allocate)());
1648 1649  
1649 1650          return (value);
1650 1651  }
1651 1652  
1652 1653  /*
1653 1654   * Called from thread_exit() to run the destructor function for each tsd
1654 1655   *      Locks out tsd_create and tsd_destroy
1655 1656   *      Assumes that the destructor *DOES NOT* use tsd
1656 1657   */
1657 1658  void
1658 1659  tsd_exit(void)
1659 1660  {
1660 1661          int i;
1661 1662          struct tsd_thread *tsd = curthread->t_tsd;
1662 1663  
1663 1664          if (tsd == NULL)
1664 1665                  return;
1665 1666  
1666 1667          if (tsd->ts_nkeys == 0) {
1667 1668                  kmem_free(tsd, sizeof (*tsd));
1668 1669                  curthread->t_tsd = NULL;
1669 1670                  return;
1670 1671          }
1671 1672  
1672 1673          /*
1673 1674           * lock out tsd_create and tsd_destroy, call
1674 1675           * the destructor, and mark the value as destroyed.
1675 1676           */
1676 1677          mutex_enter(&tsd_mutex);
1677 1678  
1678 1679          for (i = 0; i < tsd->ts_nkeys; i++) {
1679 1680                  if (tsd->ts_value[i] && tsd_destructor[i])
1680 1681                          (*tsd_destructor[i])(tsd->ts_value[i]);
1681 1682                  tsd->ts_value[i] = NULL;
1682 1683          }
1683 1684  
1684 1685          /*
1685 1686           * remove from linked list of threads with TSD
1686 1687           */
1687 1688          if (tsd->ts_next)
1688 1689                  tsd->ts_next->ts_prev = tsd->ts_prev;
1689 1690          if (tsd->ts_prev)
1690 1691                  tsd->ts_prev->ts_next = tsd->ts_next;
1691 1692          if (tsd_list == tsd)
1692 1693                  tsd_list = tsd->ts_next;
1693 1694  
1694 1695          mutex_exit(&tsd_mutex);
1695 1696  
1696 1697          /*
1697 1698           * free up the TSD
1698 1699           */
1699 1700          kmem_free(tsd->ts_value, tsd->ts_nkeys * sizeof (void *));
1700 1701          kmem_free(tsd, sizeof (struct tsd_thread));
1701 1702          curthread->t_tsd = NULL;
1702 1703  }
1703 1704  
1704 1705  /*
1705 1706   * realloc
1706 1707   */
1707 1708  static void *
1708 1709  tsd_realloc(void *old, size_t osize, size_t nsize)
1709 1710  {
1710 1711          void *new;
1711 1712  
1712 1713          new = kmem_zalloc(nsize, KM_SLEEP);
1713 1714          if (old) {
1714 1715                  bcopy(old, new, osize);
1715 1716                  kmem_free(old, osize);
1716 1717          }
1717 1718          return (new);
1718 1719  }
1719 1720  
1720 1721  /*
1721 1722   * Return non-zero if an interrupt is being serviced.
1722 1723   */
1723 1724  int
1724 1725  servicing_interrupt()
1725 1726  {
1726 1727          int onintr = 0;
1727 1728  
1728 1729          /* Are we an interrupt thread */
1729 1730          if (curthread->t_flag & T_INTR_THREAD)
1730 1731                  return (1);
1731 1732          /* Are we servicing a high level interrupt? */
1732 1733          if (CPU_ON_INTR(CPU)) {
1733 1734                  kpreempt_disable();
1734 1735                  onintr = CPU_ON_INTR(CPU);
1735 1736                  kpreempt_enable();
1736 1737          }
1737 1738          return (onintr);
1738 1739  }
1739 1740  
1740 1741  
1741 1742  /*
1742 1743   * Change the dispatch priority of a thread in the system.
1743 1744   * Used when raising or lowering a thread's priority.
1744 1745   * (E.g., priority inheritance)
1745 1746   *
1746 1747   * Since threads are queued according to their priority, we
1747 1748   * we must check the thread's state to determine whether it
1748 1749   * is on a queue somewhere. If it is, we've got to:
1749 1750   *
1750 1751   *      o Dequeue the thread.
1751 1752   *      o Change its effective priority.
1752 1753   *      o Enqueue the thread.
1753 1754   *
1754 1755   * Assumptions: The thread whose priority we wish to change
1755 1756   * must be locked before we call thread_change_(e)pri().
1756 1757   * The thread_change(e)pri() function doesn't drop the thread
1757 1758   * lock--that must be done by its caller.
1758 1759   */
1759 1760  void
1760 1761  thread_change_epri(kthread_t *t, pri_t disp_pri)
1761 1762  {
1762 1763          uint_t  state;
1763 1764  
1764 1765          ASSERT(THREAD_LOCK_HELD(t));
1765 1766  
1766 1767          /*
1767 1768           * If the inherited priority hasn't actually changed,
1768 1769           * just return.
1769 1770           */
1770 1771          if (t->t_epri == disp_pri)
1771 1772                  return;
1772 1773  
1773 1774          state = t->t_state;
1774 1775  
1775 1776          /*
1776 1777           * If it's not on a queue, change the priority with impunity.
1777 1778           */
1778 1779          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1779 1780                  t->t_epri = disp_pri;
1780 1781                  if (state == TS_ONPROC) {
1781 1782                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1782 1783  
1783 1784                          if (t == cp->cpu_dispthread)
1784 1785                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1785 1786                  }
1786 1787          } else if (state == TS_SLEEP) {
1787 1788                  /*
1788 1789                   * Take the thread out of its sleep queue.
1789 1790                   * Change the inherited priority.
1790 1791                   * Re-enqueue the thread.
1791 1792                   * Each synchronization object exports a function
1792 1793                   * to do this in an appropriate manner.
1793 1794                   */
1794 1795                  SOBJ_CHANGE_EPRI(t->t_sobj_ops, t, disp_pri);
1795 1796          } else if (state == TS_WAIT) {
1796 1797                  /*
1797 1798                   * Re-enqueue a thread on the wait queue if its
1798 1799                   * effective priority needs to change.
1799 1800                   */
1800 1801                  if (disp_pri != t->t_epri)
1801 1802                          waitq_change_pri(t, disp_pri);
1802 1803          } else {
1803 1804                  /*
1804 1805                   * The thread is on a run queue.
1805 1806                   * Note: setbackdq() may not put the thread
1806 1807                   * back on the same run queue where it originally
1807 1808                   * resided.
1808 1809                   */
1809 1810                  (void) dispdeq(t);
1810 1811                  t->t_epri = disp_pri;
1811 1812                  setbackdq(t);
1812 1813          }
1813 1814          schedctl_set_cidpri(t);
1814 1815  }
1815 1816  
1816 1817  /*
1817 1818   * Function: Change the t_pri field of a thread.
1818 1819   * Side Effects: Adjust the thread ordering on a run queue
1819 1820   *               or sleep queue, if necessary.
1820 1821   * Returns: 1 if the thread was on a run queue, else 0.
1821 1822   */
1822 1823  int
1823 1824  thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
1824 1825  {
1825 1826          uint_t  state;
1826 1827          int     on_rq = 0;
1827 1828  
1828 1829          ASSERT(THREAD_LOCK_HELD(t));
1829 1830  
1830 1831          state = t->t_state;
1831 1832          THREAD_WILLCHANGE_PRI(t, disp_pri);
1832 1833  
1833 1834          /*
1834 1835           * If it's not on a queue, change the priority with impunity.
1835 1836           */
1836 1837          if ((state & (TS_SLEEP | TS_RUN | TS_WAIT)) == 0) {
1837 1838                  t->t_pri = disp_pri;
1838 1839  
1839 1840                  if (state == TS_ONPROC) {
1840 1841                          cpu_t *cp = t->t_disp_queue->disp_cpu;
1841 1842  
1842 1843                          if (t == cp->cpu_dispthread)
1843 1844                                  cp->cpu_dispatch_pri = DISP_PRIO(t);
1844 1845                  }
1845 1846          } else if (state == TS_SLEEP) {
1846 1847                  /*
1847 1848                   * If the priority has changed, take the thread out of
1848 1849                   * its sleep queue and change the priority.
1849 1850                   * Re-enqueue the thread.
1850 1851                   * Each synchronization object exports a function
1851 1852                   * to do this in an appropriate manner.
1852 1853                   */
1853 1854                  if (disp_pri != t->t_pri)
1854 1855                          SOBJ_CHANGE_PRI(t->t_sobj_ops, t, disp_pri);
1855 1856          } else if (state == TS_WAIT) {
1856 1857                  /*
1857 1858                   * Re-enqueue a thread on the wait queue if its
1858 1859                   * priority needs to change.
1859 1860                   */
1860 1861                  if (disp_pri != t->t_pri)
1861 1862                          waitq_change_pri(t, disp_pri);
1862 1863          } else {
1863 1864                  /*
1864 1865                   * The thread is on a run queue.
1865 1866                   * Note: setbackdq() may not put the thread
1866 1867                   * back on the same run queue where it originally
1867 1868                   * resided.
1868 1869                   *
1869 1870                   * We still requeue the thread even if the priority
1870 1871                   * is unchanged to preserve round-robin (and other)
1871 1872                   * effects between threads of the same priority.
1872 1873                   */
1873 1874                  on_rq = dispdeq(t);
1874 1875                  ASSERT(on_rq);
1875 1876                  t->t_pri = disp_pri;
1876 1877                  if (front) {
1877 1878                          setfrontdq(t);
1878 1879                  } else {
1879 1880                          setbackdq(t);
1880 1881                  }
1881 1882          }
1882 1883          schedctl_set_cidpri(t);
1883 1884          return (on_rq);
1884 1885  }
1885 1886  
1886 1887  /*
1887 1888   * Tunable kmem_stackinfo is set, fill the kernel thread stack with a
1888 1889   * specific pattern.
1889 1890   */
1890 1891  static void
1891 1892  stkinfo_begin(kthread_t *t)
1892 1893  {
1893 1894          caddr_t start;  /* stack start */
1894 1895          caddr_t end;    /* stack end  */
1895 1896          uint64_t *ptr;  /* pattern pointer */
1896 1897  
1897 1898          /*
1898 1899           * Stack grows up or down, see thread_create(),
1899 1900           * compute stack memory area start and end (start < end).
1900 1901           */
1901 1902          if (t->t_stk > t->t_stkbase) {
1902 1903                  /* stack grows down */
1903 1904                  start = t->t_stkbase;
1904 1905                  end = t->t_stk;
1905 1906          } else {
1906 1907                  /* stack grows up */
1907 1908                  start = t->t_stk;
1908 1909                  end = t->t_stkbase;
1909 1910          }
1910 1911  
1911 1912          /*
1912 1913           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1913 1914           * alignement for start and end in stack area boundaries
1914 1915           * (protection against corrupt t_stkbase/t_stk data).
1915 1916           */
1916 1917          if ((((uintptr_t)start) & 0x7) != 0) {
1917 1918                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1918 1919          }
1919 1920          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1920 1921  
1921 1922          if ((end <= start) || (end - start) > (1024 * 1024)) {
1922 1923                  /* negative or stack size > 1 meg, assume bogus */
1923 1924                  return;
1924 1925          }
1925 1926  
1926 1927          /* fill stack area with a pattern (instead of zeros) */
1927 1928          ptr = (uint64_t *)((void *)start);
1928 1929          while (ptr < (uint64_t *)((void *)end)) {
1929 1930                  *ptr++ = KMEM_STKINFO_PATTERN;
1930 1931          }
1931 1932  }
1932 1933  
1933 1934  
1934 1935  /*
1935 1936   * Tunable kmem_stackinfo is set, create stackinfo log if doesn't already exist,
1936 1937   * compute the percentage of kernel stack really used, and set in the log
1937 1938   * if it's the latest highest percentage.
1938 1939   */
1939 1940  static void
1940 1941  stkinfo_end(kthread_t *t)
1941 1942  {
1942 1943          caddr_t start;  /* stack start */
1943 1944          caddr_t end;    /* stack end  */
1944 1945          uint64_t *ptr;  /* pattern pointer */
1945 1946          size_t stksz;   /* stack size */
1946 1947          size_t smallest = 0;
1947 1948          size_t percent = 0;
1948 1949          uint_t index = 0;
1949 1950          uint_t i;
1950 1951          static size_t smallest_percent = (size_t)-1;
1951 1952          static uint_t full = 0;
1952 1953  
1953 1954          /* create the stackinfo log, if doesn't already exist */
1954 1955          mutex_enter(&kmem_stkinfo_lock);
1955 1956          if (kmem_stkinfo_log == NULL) {
1956 1957                  kmem_stkinfo_log = (kmem_stkinfo_t *)
1957 1958                      kmem_zalloc(KMEM_STKINFO_LOG_SIZE *
1958 1959                      (sizeof (kmem_stkinfo_t)), KM_NOSLEEP);
1959 1960                  if (kmem_stkinfo_log == NULL) {
1960 1961                          mutex_exit(&kmem_stkinfo_lock);
1961 1962                          return;
1962 1963                  }
1963 1964          }
1964 1965          mutex_exit(&kmem_stkinfo_lock);
1965 1966  
1966 1967          /*
1967 1968           * Stack grows up or down, see thread_create(),
1968 1969           * compute stack memory area start and end (start < end).
1969 1970           */
1970 1971          if (t->t_stk > t->t_stkbase) {
1971 1972                  /* stack grows down */
1972 1973                  start = t->t_stkbase;
1973 1974                  end = t->t_stk;
1974 1975          } else {
1975 1976                  /* stack grows up */
1976 1977                  start = t->t_stk;
1977 1978                  end = t->t_stkbase;
1978 1979          }
1979 1980  
1980 1981          /* stack size as found in kthread_t */
1981 1982          stksz = end - start;
1982 1983  
1983 1984          /*
1984 1985           * Stackinfo pattern size is 8 bytes. Ensure proper 8 bytes
1985 1986           * alignement for start and end in stack area boundaries
1986 1987           * (protection against corrupt t_stkbase/t_stk data).
1987 1988           */
1988 1989          if ((((uintptr_t)start) & 0x7) != 0) {
1989 1990                  start = (caddr_t)((((uintptr_t)start) & (~0x7)) + 8);
1990 1991          }
1991 1992          end = (caddr_t)(((uintptr_t)end) & (~0x7));
1992 1993  
1993 1994          if ((end <= start) || (end - start) > (1024 * 1024)) {
1994 1995                  /* negative or stack size > 1 meg, assume bogus */
1995 1996                  return;
1996 1997          }
1997 1998  
1998 1999          /* search until no pattern in the stack */
1999 2000          if (t->t_stk > t->t_stkbase) {
2000 2001                  /* stack grows down */
2001 2002  #if defined(__i386) || defined(__amd64)
2002 2003                  /*
2003 2004                   * 6 longs are pushed on stack, see thread_load(). Skip
2004 2005                   * them, so if kthread has never run, percent is zero.
2005 2006                   * 8 bytes alignement is preserved for a 32 bit kernel,
2006 2007                   * 6 x 4 = 24, 24 is a multiple of 8.
2007 2008                   *
2008 2009                   */
2009 2010                  end -= (6 * sizeof (long));
2010 2011  #endif
2011 2012                  ptr = (uint64_t *)((void *)start);
2012 2013                  while (ptr < (uint64_t *)((void *)end)) {
2013 2014                          if (*ptr != KMEM_STKINFO_PATTERN) {
2014 2015                                  percent = stkinfo_percent(end,
2015 2016                                      start, (caddr_t)ptr);
2016 2017                                  break;
2017 2018                          }
2018 2019                          ptr++;
2019 2020                  }
2020 2021          } else {
2021 2022                  /* stack grows up */
2022 2023                  ptr = (uint64_t *)((void *)end);
2023 2024                  ptr--;
2024 2025                  while (ptr >= (uint64_t *)((void *)start)) {
2025 2026                          if (*ptr != KMEM_STKINFO_PATTERN) {
2026 2027                                  percent = stkinfo_percent(start,
2027 2028                                      end, (caddr_t)ptr);
2028 2029                                  break;
2029 2030                          }
2030 2031                          ptr--;
2031 2032                  }
2032 2033          }
2033 2034  
2034 2035          DTRACE_PROBE3(stack__usage, kthread_t *, t,
2035 2036              size_t, stksz, size_t, percent);
2036 2037  
2037 2038          if (percent == 0) {
2038 2039                  return;
2039 2040          }
2040 2041  
2041 2042          mutex_enter(&kmem_stkinfo_lock);
2042 2043          if (full == KMEM_STKINFO_LOG_SIZE && percent < smallest_percent) {
2043 2044                  /*
2044 2045                   * The log is full and already contains the highest values
2045 2046                   */
2046 2047                  mutex_exit(&kmem_stkinfo_lock);
2047 2048                  return;
2048 2049          }
2049 2050  
2050 2051          /* keep a log of the highest used stack */
2051 2052          for (i = 0; i < KMEM_STKINFO_LOG_SIZE; i++) {
2052 2053                  if (kmem_stkinfo_log[i].percent == 0) {
2053 2054                          index = i;
2054 2055                          full++;
2055 2056                          break;
2056 2057                  }
2057 2058                  if (smallest == 0) {
2058 2059                          smallest = kmem_stkinfo_log[i].percent;
2059 2060                          index = i;
2060 2061                          continue;
2061 2062                  }
2062 2063                  if (kmem_stkinfo_log[i].percent < smallest) {
2063 2064                          smallest = kmem_stkinfo_log[i].percent;
2064 2065                          index = i;
2065 2066                  }
2066 2067          }
2067 2068  
2068 2069          if (percent >= kmem_stkinfo_log[index].percent) {
2069 2070                  kmem_stkinfo_log[index].kthread = (caddr_t)t;
2070 2071                  kmem_stkinfo_log[index].t_startpc = (caddr_t)t->t_startpc;
2071 2072                  kmem_stkinfo_log[index].start = start;
2072 2073                  kmem_stkinfo_log[index].stksz = stksz;
2073 2074                  kmem_stkinfo_log[index].percent = percent;
2074 2075                  kmem_stkinfo_log[index].t_tid = t->t_tid;
2075 2076                  kmem_stkinfo_log[index].cmd[0] = '\0';
2076 2077                  if (t->t_tid != 0) {
2077 2078                          stksz = strlen((t->t_procp)->p_user.u_comm);
2078 2079                          if (stksz >= KMEM_STKINFO_STR_SIZE) {
2079 2080                                  stksz = KMEM_STKINFO_STR_SIZE - 1;
2080 2081                                  kmem_stkinfo_log[index].cmd[stksz] = '\0';
2081 2082                          } else {
2082 2083                                  stksz += 1;
2083 2084                          }
2084 2085                          (void) memcpy(kmem_stkinfo_log[index].cmd,
2085 2086                              (t->t_procp)->p_user.u_comm, stksz);
2086 2087                  }
2087 2088                  if (percent < smallest_percent) {
2088 2089                          smallest_percent = percent;
2089 2090                  }
2090 2091          }
2091 2092          mutex_exit(&kmem_stkinfo_lock);
2092 2093  }
2093 2094  
2094 2095  /*
2095 2096   * Tunable kmem_stackinfo is set, compute stack utilization percentage.
2096 2097   */
2097 2098  static size_t
2098 2099  stkinfo_percent(caddr_t t_stk, caddr_t t_stkbase, caddr_t sp)
2099 2100  {
2100 2101          size_t percent;
2101 2102          size_t s;
2102 2103  
2103 2104          if (t_stk > t_stkbase) {
2104 2105                  /* stack grows down */
2105 2106                  if (sp > t_stk) {
2106 2107                          return (0);
2107 2108                  }
2108 2109                  if (sp < t_stkbase) {
2109 2110                          return (100);
2110 2111                  }
2111 2112                  percent = t_stk - sp + 1;
2112 2113                  s = t_stk - t_stkbase + 1;
2113 2114          } else {
2114 2115                  /* stack grows up */
2115 2116                  if (sp < t_stk) {
2116 2117                          return (0);
2117 2118                  }
2118 2119                  if (sp > t_stkbase) {
2119 2120                          return (100);
2120 2121                  }
2121 2122                  percent = sp - t_stk + 1;
2122 2123                  s = t_stkbase - t_stk + 1;
2123 2124          }
2124 2125          percent = ((100 * percent) / s) + 1;
2125 2126          if (percent > 100) {
2126 2127                  percent = 100;
2127 2128          }
2128 2129          return (percent);
2129 2130  }
  
    | 
      ↓ open down ↓ | 
    1995 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX