io-lx-public-vs-joyent Wdiff usr/src/uts/common/os/exit.c

Print this page

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/exit.c
          +++ new/usr/src/uts/common/os/exit.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each

↓ open down ↓

13 lines elided

↑ open up ↑

  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright 2015 Joyent, Inc. All rights reserved.
       24 + * Copyright 2014 Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28   28  
  29   29  #include <sys/types.h>
  30   30  #include <sys/param.h>
  31   31  #include <sys/sysmacros.h>
  32   32  #include <sys/systm.h>
  33   33  #include <sys/cred.h>
  34   34  #include <sys/user.h>

  35   35  #include <sys/errno.h>
  36   36  #include <sys/proc.h>
  37   37  #include <sys/ucontext.h>
  38   38  #include <sys/procfs.h>
  39   39  #include <sys/vnode.h>
  40   40  #include <sys/acct.h>
  41   41  #include <sys/var.h>
  42   42  #include <sys/cmn_err.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/wait.h>
  45   45  #include <sys/siginfo.h>
  46   46  #include <sys/procset.h>
  47   47  #include <sys/class.h>
  48   48  #include <sys/file.h>
  49   49  #include <sys/session.h>
  50   50  #include <sys/kmem.h>
  51   51  #include <sys/vtrace.h>
  52   52  #include <sys/prsystm.h>
  53   53  #include <sys/ipc.h>
  54   54  #include <sys/sem_impl.h>
  55   55  #include <c2/audit.h>
  56   56  #include <sys/aio_impl.h>
  57   57  #include <vm/as.h>
  58   58  #include <sys/poll.h>
  59   59  #include <sys/door.h>
  60   60  #include <sys/lwpchan_impl.h>
  61   61  #include <sys/utrap.h>
  62   62  #include <sys/task.h>
  63   63  #include <sys/exacct.h>
  64   64  #include <sys/cyclic.h>
  65   65  #include <sys/schedctl.h>
  66   66  #include <sys/rctl.h>
  67   67  #include <sys/contract_impl.h>
  68   68  #include <sys/contract/process_impl.h>
  69   69  #include <sys/list.h>
  70   70  #include <sys/dtrace.h>
  71   71  #include <sys/pool.h>
  72   72  #include <sys/sdt.h>
  73   73  #include <sys/corectl.h>
  74   74  #include <sys/brand.h>
  75   75  #include <sys/libc_kernel.h>
  76   76  
  77   77  /*
  78   78   * convert code/data pair into old style wait status
  79   79   */
  80   80  int
  81   81  wstat(int code, int data)
  82   82  {
  83   83          int stat = (data & 0377);
  84   84  
  85   85          switch (code) {
  86   86          case CLD_EXITED:
  87   87                  stat <<= 8;
  88   88                  break;
  89   89          case CLD_DUMPED:
  90   90                  stat |= WCOREFLG;
  91   91                  break;
  92   92          case CLD_KILLED:
  93   93                  break;
  94   94          case CLD_TRAPPED:
  95   95          case CLD_STOPPED:
  96   96                  stat <<= 8;
  97   97                  stat |= WSTOPFLG;
  98   98                  break;
  99   99          case CLD_CONTINUED:
 100  100                  stat = WCONTFLG;
 101  101                  break;
 102  102          default:
 103  103                  cmn_err(CE_PANIC, "wstat: bad code");
 104  104                  /* NOTREACHED */
 105  105          }
 106  106          return (stat);
 107  107  }
 108  108  
 109  109  static char *
 110  110  exit_reason(char *buf, size_t bufsz, int what, int why)
 111  111  {
 112  112          switch (why) {
 113  113          case CLD_EXITED:
 114  114                  (void) snprintf(buf, bufsz, "exited with status %d", what);
 115  115                  break;
 116  116          case CLD_KILLED:
 117  117                  (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
 118  118                  break;
 119  119          case CLD_DUMPED:
 120  120                  (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
 121  121                  break;
 122  122          default:
 123  123                  (void) snprintf(buf, bufsz, "encountered unknown error "
 124  124                      "(%d, %d)", why, what);
 125  125                  break;
 126  126          }
 127  127  
 128  128          return (buf);
 129  129  }
 130  130  
 131  131  /*
 132  132   * exit system call: pass back caller's arg.
 133  133   */
 134  134  void
 135  135  rexit(int rval)
 136  136  {
 137  137          exit(CLD_EXITED, rval);
 138  138  }
 139  139  
 140  140  /*
 141  141   * Called by proc_exit() when a zone's init exits, presumably because
 142  142   * it failed.  As long as the given zone is still in the "running"
 143  143   * state, we will re-exec() init, but first we need to reset things
 144  144   * which are usually inherited across exec() but will break init's
 145  145   * assumption that it is being exec()'d from a virgin process.  Most
 146  146   * importantly this includes closing all file descriptors (exec only
 147  147   * closes those marked close-on-exec) and resetting signals (exec only
 148  148   * resets handled signals, and we need to clear any signals which
 149  149   * killed init).  Anything else that exec(2) says would be inherited,
 150  150   * but would affect the execution of init, needs to be reset.
 151  151   */
 152  152  static int
 153  153  restart_init(int what, int why)
 154  154  {
 155  155          kthread_t *t = curthread;
 156  156          klwp_t *lwp = ttolwp(t);
 157  157          proc_t *p = ttoproc(t);
 158  158          user_t *up = PTOU(p);
 159  159  
 160  160          vnode_t *oldcd, *oldrd;
 161  161          int i, err;
 162  162          char reason_buf[64];
 163  163  
 164  164          /*
 165  165           * Let zone admin (and global zone admin if this is for a non-global
 166  166           * zone) know that init has failed and will be restarted.
 167  167           */
 168  168          zcmn_err(p->p_zone->zone_id, CE_WARN,
 169  169              "init(1M) %s: restarting automatically",
 170  170              exit_reason(reason_buf, sizeof (reason_buf), what, why));
 171  171  
 172  172          if (!INGLOBALZONE(p)) {
 173  173                  cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
 174  174                      "restarting automatically",
 175  175                      p->p_zone->zone_name, p->p_pid, reason_buf);
 176  176          }
 177  177  
 178  178          /*
 179  179           * Remove any fpollinfo_t's for this (last) thread from our file
 180  180           * descriptors so closeall() can ASSERT() that they're all gone.
 181  181           * Then close all open file descriptors in the process.
 182  182           */
 183  183          pollcleanup();
 184  184          closeall(P_FINFO(p));
 185  185  
 186  186          /*
 187  187           * Grab p_lock and begin clearing miscellaneous global process
 188  188           * state that needs to be reset before we exec the new init(1M).
 189  189           */
 190  190  
 191  191          mutex_enter(&p->p_lock);
 192  192          prbarrier(p);
 193  193  
 194  194          p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
 195  195          up->u_cmask = CMASK;
 196  196  
 197  197          sigemptyset(&t->t_hold);
 198  198          sigemptyset(&t->t_sig);
 199  199          sigemptyset(&t->t_extsig);
 200  200  
 201  201          sigemptyset(&p->p_sig);
 202  202          sigemptyset(&p->p_extsig);
 203  203  
 204  204          sigdelq(p, t, 0);
 205  205          sigdelq(p, NULL, 0);
 206  206  
 207  207          if (p->p_killsqp) {
 208  208                  siginfofree(p->p_killsqp);
 209  209                  p->p_killsqp = NULL;
 210  210          }
 211  211  
 212  212          /*
 213  213           * Reset any signals that are ignored back to the default disposition.
 214  214           * Other u_signal members will be cleared when exec calls sigdefault().
 215  215           */
 216  216          for (i = 1; i < NSIG; i++) {
 217  217                  if (up->u_signal[i - 1] == SIG_IGN) {
 218  218                          up->u_signal[i - 1] = SIG_DFL;
 219  219                          sigemptyset(&up->u_sigmask[i - 1]);
 220  220                  }
 221  221          }
 222  222  
 223  223          /*
 224  224           * Clear the current signal, any signal info associated with it, and
 225  225           * any signal information from contracts and/or contract templates.
 226  226           */
 227  227          lwp->lwp_cursig = 0;
 228  228          lwp->lwp_extsig = 0;
 229  229          if (lwp->lwp_curinfo != NULL) {
 230  230                  siginfofree(lwp->lwp_curinfo);
 231  231                  lwp->lwp_curinfo = NULL;
 232  232          }
 233  233          lwp_ctmpl_clear(lwp, B_FALSE);
 234  234  
 235  235          /*
 236  236           * Reset both the process root directory and the current working
 237  237           * directory to the root of the zone just as we do during boot.
 238  238           */
 239  239          VN_HOLD(p->p_zone->zone_rootvp);
 240  240          oldrd = up->u_rdir;
 241  241          up->u_rdir = p->p_zone->zone_rootvp;
 242  242  
 243  243          VN_HOLD(p->p_zone->zone_rootvp);
 244  244          oldcd = up->u_cdir;
 245  245          up->u_cdir = p->p_zone->zone_rootvp;
 246  246  
 247  247          if (up->u_cwd != NULL) {
 248  248                  refstr_rele(up->u_cwd);
 249  249                  up->u_cwd = NULL;
 250  250          }
 251  251  
 252  252          mutex_exit(&p->p_lock);
 253  253  
 254  254          if (oldrd != NULL)
 255  255                  VN_RELE(oldrd);
 256  256          if (oldcd != NULL)
 257  257                  VN_RELE(oldcd);
 258  258  
 259  259          /* Free the controlling tty.  (freectty() always assumes curproc.) */
 260  260          ASSERT(p == curproc);
 261  261          (void) freectty(B_TRUE);
 262  262  
 263  263          /*
 264  264           * Now exec() the new init(1M) on top of the current process.  If we
 265  265           * succeed, the caller will treat this like a successful system call.
 266  266           * If we fail, we issue messages and the caller will proceed with exit.
 267  267           */
 268  268          err = exec_init(p->p_zone->zone_initname, NULL);
 269  269  
 270  270          if (err == 0)
 271  271                  return (0);
 272  272  
 273  273          zcmn_err(p->p_zone->zone_id, CE_WARN,
 274  274              "failed to restart init(1M) (err=%d): system reboot required", err);
 275  275  
 276  276          if (!INGLOBALZONE(p)) {
 277  277                  cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
 278  278                      "(pid %d, err=%d): zoneadm(1M) boot required",
 279  279                      p->p_zone->zone_name, p->p_pid, err);
 280  280          }
 281  281  
 282  282          return (-1);
 283  283  }
 284  284  
 285  285  /*
 286  286   * Release resources.
 287  287   * Enter zombie state.
 288  288   * Wake up parent and init processes,
 289  289   * and dispose of children.
 290  290   */
 291  291  void
 292  292  exit(int why, int what)
 293  293  {
 294  294          /*
 295  295           * If proc_exit() fails, then some other lwp in the process
 296  296           * got there first.  We just have to call lwp_exit() to allow
 297  297           * the other lwp to finish exiting the process.  Otherwise we're
 298  298           * restarting init, and should return.
 299  299           */
 300  300          if (proc_exit(why, what) != 0) {
 301  301                  mutex_enter(&curproc->p_lock);
 302  302                  ASSERT(curproc->p_flag & SEXITLWPS);
 303  303                  lwp_exit();
 304  304                  /* NOTREACHED */
 305  305          }
 306  306  }
 307  307  
 308  308  /*
 309  309   * Set the SEXITING flag on the process, after making sure /proc does
 310  310   * not have it locked.  This is done in more places than proc_exit(),
 311  311   * so it is a separate function.
 312  312   */
 313  313  void
 314  314  proc_is_exiting(proc_t *p)
 315  315  {
 316  316          mutex_enter(&p->p_lock);
 317  317          prbarrier(p);
 318  318          p->p_flag |= SEXITING;
 319  319          mutex_exit(&p->p_lock);
 320  320  }
 321  321  
 322  322  /*
 323  323   * Return value:
 324  324   *   1 - exitlwps() failed, call (or continue) lwp_exit()
 325  325   *   0 - restarting init.  Return through system call path
 326  326   */
 327  327  int
 328  328  proc_exit(int why, int what)
 329  329  {
 330  330          kthread_t *t = curthread;
 331  331          klwp_t *lwp = ttolwp(t);
 332  332          proc_t *p = ttoproc(t);
 333  333          zone_t *z = p->p_zone;
 334  334          timeout_id_t tmp_id;
 335  335          int rv;
 336  336          proc_t *q;
 337  337          task_t *tk;
 338  338          vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
 339  339          sigqueue_t *sqp;
 340  340          lwpdir_t *lwpdir;
 341  341          uint_t lwpdir_sz;
 342  342          tidhash_t *tidhash;
 343  343          uint_t tidhash_sz;
 344  344          ret_tidhash_t *ret_tidhash;
 345  345          refstr_t *cwd;
 346  346          hrtime_t hrutime, hrstime;
 347  347          int evaporate;
 348  348  
 349  349          /*
 350  350           * Stop and discard the process's lwps except for the current one,
 351  351           * unless some other lwp beat us to it.  If exitlwps() fails then
 352  352           * return and the calling lwp will call (or continue in) lwp_exit().
 353  353           */
 354  354          proc_is_exiting(p);
 355  355          if (exitlwps(0) != 0)
 356  356                  return (1);
 357  357  
 358  358          mutex_enter(&p->p_lock);
 359  359          if (p->p_ttime > 0) {
 360  360                  /*
 361  361                   * Account any remaining ticks charged to this process
 362  362                   * on its way out.
 363  363                   */
 364  364                  (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 365  365                  p->p_ttime = 0;
 366  366          }
 367  367          mutex_exit(&p->p_lock);
 368  368  
 369  369          /*
 370  370           * Don't let init exit unless zone_start_init() failed its exec, or
 371  371           * we are shutting down the zone or the machine.
 372  372           *
 373  373           * Since we are single threaded, we don't need to lock the
 374  374           * following accesses to zone_proc_initpid.
 375  375           */
 376  376          if (p->p_pid == z->zone_proc_initpid) {
 377  377                  if (z->zone_boot_err == 0 &&
 378  378                      zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
 379  379                      zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
 380  380  
 381  381                          /*
 382  382                           * If the init process should be restarted, the
 383  383                           * "zone_restart_init" member will be set.  Some init
 384  384                           * programs in branded zones do not tolerate a restart
 385  385                           * in the traditional manner; setting the
 386  386                           * "zone_reboot_on_init_exit" member will cause the
 387  387                           * entire zone to be rebooted instead.  If neither of
 388  388                           * these flags is set the zone will shut down.
 389  389                           */
 390  390                          if (z->zone_reboot_on_init_exit == B_TRUE &&
 391  391                              z->zone_restart_init == B_TRUE) {
 392  392                                  /*
 393  393                                   * Trigger a zone reboot and continue
 394  394                                   * with exit processing.
 395  395                                   */
 396  396                                  z->zone_init_status = wstat(why, what);
 397  397                                  (void) zone_kadmin(A_REBOOT, 0, NULL,
 398  398                                      zone_kcred());
 399  399  
 400  400                          } else {
 401  401                                  if (z->zone_restart_init == B_TRUE) {
 402  402                                          if (restart_init(what, why) == 0)
 403  403                                                  return (0);
 404  404                                  }
 405  405  
 406  406                                  z->zone_init_status = wstat(why, what);
 407  407                                  (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
 408  408                                      zone_kcred());
 409  409                          }
 410  410                  }
 411  411  
 412  412                  /*
 413  413                   * Since we didn't or couldn't restart init, we clear
 414  414                   * the zone's init state and proceed with exit
 415  415                   * processing.
 416  416                   */
 417  417                  z->zone_proc_initpid = -1;
 418  418          }
 419  419  
 420  420          /*
 421  421           * Delay firing probes (and performing brand cleanup) until after the
 422  422           * zone_proc_initpid check. Cases which result in zone shutdown or
 423  423           * restart via zone_kadmin eventually result in a call back to
 424  424           * proc_exit.
 425  425           */
 426  426          DTRACE_PROC(lwp__exit);
 427  427          DTRACE_PROC1(exit, int, why);
 428  428  
 429  429          /*
 430  430           * Will perform any brand specific proc exit processing. Since this
 431  431           * is always the last lwp, will also perform lwp exit/free and proc
 432  432           * exit. Brand data will be freed when the process is reaped.
 433  433           */
 434  434          if (PROC_IS_BRANDED(p)) {
 435  435                  BROP(p)->b_lwpexit(lwp);
 436  436                  BROP(p)->b_proc_exit(p);
 437  437                  /*
 438  438                   * To ensure that b_proc_exit has access to brand-specific data
 439  439                   * contained by the one remaining lwp, call the freelwp hook as
 440  440                   * the last part of this clean-up process.
 441  441                   */
 442  442                  BROP(p)->b_freelwp(lwp);
 443  443                  lwp_detach_brand_hdlrs(lwp);
 444  444          }
 445  445  
 446  446          lwp_pcb_exit();
 447  447  
 448  448          /*
 449  449           * Allocate a sigqueue now, before we grab locks.
 450  450           * It will be given to sigcld(), below.
 451  451           * Special case:  If we will be making the process disappear
 452  452           * without a trace because it is either:
 453  453           *      * an exiting SSYS process, or
 454  454           *      * a posix_spawn() vfork child who requests it,
 455  455           * we don't bother to allocate a useless sigqueue.
 456  456           */
 457  457          evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
 458  458              why == CLD_EXITED && what == _EVAPORATE);
 459  459          if (!evaporate)
 460  460                  sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 461  461  
 462  462          /*
 463  463           * revoke any doors created by the process.
 464  464           */
 465  465          if (p->p_door_list)
 466  466                  door_exit();
 467  467  
 468  468          /*
 469  469           * Release schedctl data structures.
 470  470           */
 471  471          if (p->p_pagep)
 472  472                  schedctl_proc_cleanup();
 473  473  
 474  474          /*
 475  475           * make sure all pending kaio has completed.
 476  476           */
 477  477          if (p->p_aio)
 478  478                  aio_cleanup_exit();
 479  479  
 480  480          /*
 481  481           * discard the lwpchan cache.
 482  482           */
 483  483          if (p->p_lcp != NULL)
 484  484                  lwpchan_destroy_cache(0);
 485  485  
 486  486          /*
 487  487           * Clean up any DTrace helper actions or probes for the process.
 488  488           */
 489  489          if (p->p_dtrace_helpers != NULL) {
 490  490                  ASSERT(dtrace_helpers_cleanup != NULL);
 491  491                  (*dtrace_helpers_cleanup)();
 492  492          }
 493  493  
 494  494          /*
 495  495           * Clean up any signalfd state for the process.
 496  496           */
 497  497          if (p->p_sigfd != NULL) {
 498  498                  VERIFY(sigfd_exit_helper != NULL);
 499  499                  (*sigfd_exit_helper)();
 500  500          }
 501  501  
 502  502          /* untimeout the realtime timers */
 503  503          if (p->p_itimer != NULL)
 504  504                  timer_exit();
 505  505  
 506  506          if ((tmp_id = p->p_alarmid) != 0) {
 507  507                  p->p_alarmid = 0;
 508  508                  (void) untimeout(tmp_id);
 509  509          }
 510  510  
 511  511          /*
 512  512           * Remove any fpollinfo_t's for this (last) thread from our file
 513  513           * descriptors so closeall() can ASSERT() that they're all gone.
 514  514           */
 515  515          pollcleanup();
 516  516  
 517  517          if (p->p_rprof_cyclic != CYCLIC_NONE) {
 518  518                  mutex_enter(&cpu_lock);
 519  519                  cyclic_remove(p->p_rprof_cyclic);
 520  520                  mutex_exit(&cpu_lock);
 521  521          }
 522  522  
 523  523          mutex_enter(&p->p_lock);
 524  524  
 525  525          /*
 526  526           * Clean up any DTrace probes associated with this process.
 527  527           */
 528  528          if (p->p_dtrace_probes) {
 529  529                  ASSERT(dtrace_fasttrap_exit_ptr != NULL);
 530  530                  dtrace_fasttrap_exit_ptr(p);
 531  531          }
 532  532  
 533  533          while ((tmp_id = p->p_itimerid) != 0) {
 534  534                  p->p_itimerid = 0;
 535  535                  mutex_exit(&p->p_lock);
 536  536                  (void) untimeout(tmp_id);
 537  537                  mutex_enter(&p->p_lock);
 538  538          }
 539  539  
 540  540          lwp_cleanup();
 541  541  
 542  542          /*
 543  543           * We are about to exit; prevent our resource associations from
 544  544           * being changed.
 545  545           */
 546  546          pool_barrier_enter();
 547  547  
 548  548          /*
 549  549           * Block the process against /proc now that we have really
 550  550           * acquired p->p_lock (to manipulate p_tlist at least).
 551  551           */
 552  552          prbarrier(p);
 553  553  
 554  554          sigfillset(&p->p_ignore);
 555  555          sigemptyset(&p->p_siginfo);
 556  556          sigemptyset(&p->p_sig);
 557  557          sigemptyset(&p->p_extsig);
 558  558          sigemptyset(&t->t_sig);
 559  559          sigemptyset(&t->t_extsig);
 560  560          sigemptyset(&p->p_sigmask);
 561  561          sigdelq(p, t, 0);
 562  562          lwp->lwp_cursig = 0;
 563  563          lwp->lwp_extsig = 0;
 564  564          p->p_flag &= ~(SKILLED | SEXTKILLED);
 565  565          if (lwp->lwp_curinfo) {
 566  566                  siginfofree(lwp->lwp_curinfo);
 567  567                  lwp->lwp_curinfo = NULL;
 568  568          }
 569  569  
 570  570          t->t_proc_flag |= TP_LWPEXIT;
 571  571          ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
 572  572          prlwpexit(t);           /* notify /proc */
 573  573          lwp_hash_out(p, t->t_tid);
 574  574          prexit(p);
 575  575  
 576  576          p->p_lwpcnt = 0;
 577  577          p->p_tlist = NULL;
 578  578          sigqfree(p);
 579  579          term_mstate(t);
 580  580          p->p_mterm = gethrtime();
 581  581  
 582  582          exec_vp = p->p_exec;
 583  583          execdir_vp = p->p_execdir;
 584  584          p->p_exec = NULLVP;
 585  585          p->p_execdir = NULLVP;
 586  586          mutex_exit(&p->p_lock);
 587  587  
 588  588          pr_free_watched_pages(p);
 589  589  
 590  590          closeall(P_FINFO(p));
 591  591  
 592  592          /* Free the controlling tty.  (freectty() always assumes curproc.) */
 593  593          ASSERT(p == curproc);
 594  594          (void) freectty(B_TRUE);
 595  595  
 596  596  #if defined(__sparc)
 597  597          if (p->p_utraps != NULL)
 598  598                  utrap_free(p);
 599  599  #endif
 600  600          if (p->p_semacct)                       /* IPC semaphore exit */
 601  601                  semexit(p);
 602  602          rv = wstat(why, what);
 603  603  
 604  604          acct(rv & 0xff);
 605  605          exacct_commit_proc(p, rv);
 606  606  
 607  607          /*
 608  608           * Release any resources associated with C2 auditing
 609  609           */
 610  610          if (AU_AUDITING()) {
 611  611                  /*
 612  612                   * audit exit system call
 613  613                   */
 614  614                  audit_exit(why, what);
 615  615          }
 616  616  
 617  617          /*
 618  618           * Free address space.
 619  619           */
 620  620          relvm();
 621  621  
 622  622          if (exec_vp) {
 623  623                  /*
 624  624                   * Close this executable which has been opened when the process
 625  625                   * was created by getproc().
 626  626                   */
 627  627                  (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
 628  628                  VN_RELE(exec_vp);
 629  629          }
 630  630          if (execdir_vp)
 631  631                  VN_RELE(execdir_vp);
 632  632  
 633  633          /*
 634  634           * Release held contracts.
 635  635           */
 636  636          contract_exit(p);
 637  637  
 638  638          /*
 639  639           * Depart our encapsulating process contract.
 640  640           */
 641  641          if ((p->p_flag & SSYS) == 0) {
 642  642                  ASSERT(p->p_ct_process);
 643  643                  contract_process_exit(p->p_ct_process, p, rv);
 644  644          }
 645  645  
 646  646          /*
 647  647           * Remove pool association, and block if requested by pool_do_bind.
 648  648           */
 649  649          mutex_enter(&p->p_lock);
 650  650          ASSERT(p->p_pool->pool_ref > 0);
 651  651          atomic_dec_32(&p->p_pool->pool_ref);
 652  652          p->p_pool = pool_default;
 653  653          /*
 654  654           * Now that our address space has been freed and all other threads
 655  655           * in this process have exited, set the PEXITED pool flag.  This
 656  656           * tells the pools subsystems to ignore this process if it was
 657  657           * requested to rebind this process to a new pool.
 658  658           */
 659  659          p->p_poolflag |= PEXITED;
 660  660          pool_barrier_exit();
 661  661          mutex_exit(&p->p_lock);
 662  662  
 663  663          mutex_enter(&pidlock);
 664  664  
 665  665          /*
 666  666           * Delete this process from the newstate list of its parent. We
 667  667           * will put it in the right place in the sigcld in the end.
 668  668           */
 669  669          delete_ns(p->p_parent, p);
 670  670  
 671  671          /*
 672  672           * Reassign the orphans to the next of kin.
 673  673           * Don't rearrange init's orphanage.
 674  674           */
 675  675          if ((q = p->p_orphan) != NULL && p != proc_init) {
 676  676  
 677  677                  proc_t *nokp = p->p_nextofkin;
 678  678  
 679  679                  for (;;) {
 680  680                          q->p_nextofkin = nokp;
 681  681                          if (q->p_nextorph == NULL)
 682  682                                  break;
 683  683                          q = q->p_nextorph;
 684  684                  }
 685  685                  q->p_nextorph = nokp->p_orphan;
 686  686                  nokp->p_orphan = p->p_orphan;
 687  687                  p->p_orphan = NULL;
 688  688          }
 689  689  
 690  690          /*
 691  691           * Reassign the children to init.
 692  692           * Don't try to assign init's children to init.
 693  693           */
 694  694          if ((q = p->p_child) != NULL && p != proc_init) {
 695  695                  struct proc     *np;
 696  696                  struct proc     *initp = proc_init;
 697  697                  pid_t           zone_initpid = 1;
 698  698                  struct proc     *zoneinitp = NULL;
 699  699                  boolean_t       setzonetop = B_FALSE;
 700  700  
 701  701                  if (!INGLOBALZONE(curproc)) {
 702  702                          zone_initpid = curproc->p_zone->zone_proc_initpid;
 703  703  
 704  704                          ASSERT(MUTEX_HELD(&pidlock));
 705  705                          zoneinitp = prfind(zone_initpid);
 706  706                          if (zoneinitp != NULL) {
 707  707                                  initp = zoneinitp;
 708  708                          } else {
 709  709                                  zone_initpid = 1;
 710  710                                  setzonetop = B_TRUE;
 711  711                          }
 712  712                  }
 713  713  
 714  714                  pgdetach(p);
 715  715  
 716  716                  do {
 717  717                          np = q->p_sibling;
 718  718                          /*
 719  719                           * Delete it from its current parent new state
 720  720                           * list and add it to init new state list
 721  721                           */
 722  722                          delete_ns(q->p_parent, q);
 723  723  
 724  724                          q->p_ppid = zone_initpid;
 725  725  
 726  726                          q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
 727  727                          if (setzonetop) {
 728  728                                  mutex_enter(&q->p_lock);
 729  729                                  q->p_flag |= SZONETOP;
 730  730                                  mutex_exit(&q->p_lock);
 731  731                          }
 732  732                          q->p_parent = initp;
 733  733  
 734  734                          /*
 735  735                           * Since q will be the first child,
 736  736                           * it will not have a previous sibling.
 737  737                           */
 738  738                          q->p_psibling = NULL;
 739  739                          if (initp->p_child) {
 740  740                                  initp->p_child->p_psibling = q;
 741  741                          }
 742  742                          q->p_sibling = initp->p_child;
 743  743                          initp->p_child = q;
 744  744                          if (q->p_proc_flag & P_PR_PTRACE) {
 745  745                                  mutex_enter(&q->p_lock);
 746  746                                  sigtoproc(q, NULL, SIGKILL);
 747  747                                  mutex_exit(&q->p_lock);
 748  748                          }
 749  749                          /*
 750  750                           * sigcld() will add the child to parents
 751  751                           * newstate list.
 752  752                           */
 753  753                          if (q->p_stat == SZOMB)
 754  754                                  sigcld(q, NULL);
 755  755                  } while ((q = np) != NULL);
 756  756  
 757  757                  p->p_child = NULL;
 758  758                  ASSERT(p->p_child_ns == NULL);
 759  759          }
 760  760  
 761  761          TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
 762  762  
 763  763          mutex_enter(&p->p_lock);
 764  764          CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
 765  765  
 766  766          /*
 767  767           * Have our task accummulate our resource usage data before they
 768  768           * become contaminated by p_cacct etc., and before we renounce
 769  769           * membership of the task.
 770  770           *
 771  771           * We do this regardless of whether or not task accounting is active.
 772  772           * This is to avoid having nonsense data reported for this task if
 773  773           * task accounting is subsequently enabled. The overhead is minimal;
 774  774           * by this point, this process has accounted for the usage of all its
 775  775           * LWPs. We nonetheless do the work here, and under the protection of
 776  776           * pidlock, so that the movement of the process's usage to the task
 777  777           * happens at the same time as the removal of the process from the
 778  778           * task, from the point of view of exacct_snapshot_task_usage().
 779  779           */
 780  780          exacct_update_task_mstate(p);
 781  781  
 782  782          hrutime = mstate_aggr_state(p, LMS_USER);
 783  783          hrstime = mstate_aggr_state(p, LMS_SYSTEM);
 784  784          p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
 785  785          p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
 786  786  
 787  787          p->p_acct[LMS_USER]     += p->p_cacct[LMS_USER];
 788  788          p->p_acct[LMS_SYSTEM]   += p->p_cacct[LMS_SYSTEM];
 789  789          p->p_acct[LMS_TRAP]     += p->p_cacct[LMS_TRAP];
 790  790          p->p_acct[LMS_TFAULT]   += p->p_cacct[LMS_TFAULT];
 791  791          p->p_acct[LMS_DFAULT]   += p->p_cacct[LMS_DFAULT];
 792  792          p->p_acct[LMS_KFAULT]   += p->p_cacct[LMS_KFAULT];
 793  793          p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
 794  794          p->p_acct[LMS_SLEEP]    += p->p_cacct[LMS_SLEEP];
 795  795          p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
 796  796          p->p_acct[LMS_STOPPED]  += p->p_cacct[LMS_STOPPED];
 797  797  
 798  798          p->p_ru.minflt  += p->p_cru.minflt;
 799  799          p->p_ru.majflt  += p->p_cru.majflt;
 800  800          p->p_ru.nswap   += p->p_cru.nswap;
 801  801          p->p_ru.inblock += p->p_cru.inblock;
 802  802          p->p_ru.oublock += p->p_cru.oublock;
 803  803          p->p_ru.msgsnd  += p->p_cru.msgsnd;
 804  804          p->p_ru.msgrcv  += p->p_cru.msgrcv;
 805  805          p->p_ru.nsignals += p->p_cru.nsignals;
 806  806          p->p_ru.nvcsw   += p->p_cru.nvcsw;
 807  807          p->p_ru.nivcsw  += p->p_cru.nivcsw;
 808  808          p->p_ru.sysc    += p->p_cru.sysc;
 809  809          p->p_ru.ioch    += p->p_cru.ioch;
 810  810  
 811  811          p->p_stat = SZOMB;
 812  812          p->p_proc_flag &= ~P_PR_PTRACE;
 813  813          p->p_wdata = what;
 814  814          p->p_wcode = (char)why;
 815  815  
 816  816          cdir = PTOU(p)->u_cdir;
 817  817          rdir = PTOU(p)->u_rdir;
 818  818          cwd = PTOU(p)->u_cwd;
 819  819  
 820  820          ASSERT(cdir != NULL || p->p_parent == &p0);
 821  821  
 822  822          /*
 823  823           * Release resource controls, as they are no longer enforceable.
 824  824           */
 825  825          rctl_set_free(p->p_rctls);
 826  826  
 827  827          /*
 828  828           * Decrement tk_nlwps counter for our task.max-lwps resource control.
 829  829           * An extended accounting record, if that facility is active, is
 830  830           * scheduled to be written.  We cannot give up task and project
 831  831           * membership at this point because that would allow zombies to escape
 832  832           * from the max-processes resource controls.  Zombies stay in their
 833  833           * current task and project until the process table slot is released
 834  834           * in freeproc().
 835  835           */
 836  836          tk = p->p_task;
 837  837  
 838  838          mutex_enter(&p->p_zone->zone_nlwps_lock);
 839  839          tk->tk_nlwps--;
 840  840          tk->tk_proj->kpj_nlwps--;
 841  841          p->p_zone->zone_nlwps--;
 842  842          mutex_exit(&p->p_zone->zone_nlwps_lock);
 843  843  
 844  844          /*
 845  845           * Clear the lwp directory and the lwpid hash table
 846  846           * now that /proc can't bother us any more.
 847  847           * We free the memory below, after dropping p->p_lock.
 848  848           */
 849  849          lwpdir = p->p_lwpdir;
 850  850          lwpdir_sz = p->p_lwpdir_sz;
 851  851          tidhash = p->p_tidhash;
 852  852          tidhash_sz = p->p_tidhash_sz;
 853  853          ret_tidhash = p->p_ret_tidhash;
 854  854          p->p_lwpdir = NULL;
 855  855          p->p_lwpfree = NULL;
 856  856          p->p_lwpdir_sz = 0;
 857  857          p->p_tidhash = NULL;
 858  858          p->p_tidhash_sz = 0;
 859  859          p->p_ret_tidhash = NULL;
 860  860  
 861  861          /*
 862  862           * If the process has context ops installed, call the exit routine
 863  863           * on behalf of this last remaining thread. Normally exitpctx() is
 864  864           * called during thread_exit() or lwp_exit(), but because this is the
 865  865           * last thread in the process, we must call it here. By the time
 866  866           * thread_exit() is called (below), the association with the relevant
 867  867           * process has been lost.
 868  868           *
 869  869           * We also free the context here.
 870  870           */
 871  871          if (p->p_pctx) {
 872  872                  kpreempt_disable();
 873  873                  exitpctx(p);
 874  874                  kpreempt_enable();
 875  875  
 876  876                  freepctx(p, 0);
 877  877          }
 878  878  
 879  879          /*
 880  880           * curthread's proc pointer is changed to point to the 'sched'
 881  881           * process for the corresponding zone, except in the case when
 882  882           * the exiting process is in fact a zsched instance, in which
 883  883           * case the proc pointer is set to p0.  We do so, so that the
 884  884           * process still points at the right zone when we call the VN_RELE()
 885  885           * below.
 886  886           *
 887  887           * This is because curthread's original proc pointer can be freed as
 888  888           * soon as the child sends a SIGCLD to its parent.  We use zsched so
 889  889           * that for user processes, even in the final moments of death, the
 890  890           * process is still associated with its zone.
 891  891           */
 892  892          if (p != t->t_procp->p_zone->zone_zsched)
 893  893                  t->t_procp = t->t_procp->p_zone->zone_zsched;
 894  894          else
 895  895                  t->t_procp = &p0;
 896  896  
 897  897          mutex_exit(&p->p_lock);
 898  898          if (!evaporate) {
 899  899                  /*
 900  900                   * The brand specific code only happens when the brand has a
 901  901                   * function to call in place of sigcld and the parent of the
 902  902                   * exiting process is not the global zone init. If the parent
 903  903                   * is the global zone init, then the process was reparented,
 904  904                   * and we don't want brand code delivering possibly strange
 905  905                   * signals to init. Also, init is not branded, so any brand
 906  906                   * specific exit data will not be picked up by init anyway.
 907  907                   */
 908  908                  if (PROC_IS_BRANDED(p) &&
 909  909                      BROP(p)->b_exit_with_sig != NULL &&
 910  910                      p->p_ppid != 1) {
 911  911                          /*
 912  912                           * The code for _fini that could unload the brand_t
 913  913                           * blocks until the count of zones using the module
 914  914                           * reaches zero. Zones decrement the refcount on their
 915  915                           * brands only after all user tasks in that zone have
 916  916                           * exited and been waited on. The decrement on the
 917  917                           * brand's refcount happen in zone_destroy(). That
 918  918                           * depends on zone_shutdown() having been completed.
 919  919                           * zone_shutdown() includes a call to zone_empty(),
 920  920                           * where the zone waits for itself to reach the state
 921  921                           * ZONE_IS_EMPTY. This state is only set in either
 922  922                           * zone_shutdown(), when there are no user processes as
 923  923                           * the zone enters this function, or in
 924  924                           * zone_task_rele(). zone_task_rele() is called from
 925  925                           * code triggered by waiting on processes, not by the
 926  926                           * processes exiting through proc_exit().  This means
 927  927                           * all the branded processes that could exist for a
 928  928                           * specific brand_t must exit and get reaped before the
 929  929                           * refcount on the brand_t can reach 0. _fini will
 930  930                           * never unload the corresponding brand module before
 931  931                           * proc_exit finishes execution for all processes
 932  932                           * branded with a particular brand_t, which makes the
 933  933                           * operation below safe to do. Brands that wish to use
 934  934                           * this mechanism must wait in _fini as described
 935  935                           * above.
 936  936                           */
 937  937                          BROP(p)->b_exit_with_sig(p, sqp);
 938  938                  } else {
 939  939                          p->p_pidflag &= ~CLDPEND;
 940  940                          sigcld(p, sqp);
 941  941                  }
 942  942  
 943  943          } else {
 944  944                  /*
 945  945                   * Do what sigcld() would do if the disposition
 946  946                   * of the SIGCHLD signal were set to be ignored.
 947  947                   */
 948  948                  cv_broadcast(&p->p_srwchan_cv);
 949  949                  freeproc(p);
 950  950          }
 951  951          mutex_exit(&pidlock);
 952  952  
 953  953          /*
 954  954           * We don't release u_cdir and u_rdir until SZOMB is set.
 955  955           * This protects us against dofusers().
 956  956           */
 957  957          if (cdir)
 958  958                  VN_RELE(cdir);
 959  959          if (rdir)
 960  960                  VN_RELE(rdir);
 961  961          if (cwd)
 962  962                  refstr_rele(cwd);
 963  963  
 964  964          /*
 965  965           * task_rele() may ultimately cause the zone to go away (or
 966  966           * may cause the last user process in a zone to go away, which
 967  967           * signals zsched to go away).  So prior to this call, we must
 968  968           * no longer point at zsched.
 969  969           */
 970  970          t->t_procp = &p0;
 971  971  
 972  972          kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
 973  973          kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
 974  974          while (ret_tidhash != NULL) {
 975  975                  ret_tidhash_t *next = ret_tidhash->rth_next;
 976  976                  kmem_free(ret_tidhash->rth_tidhash,
 977  977                      ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
 978  978                  kmem_free(ret_tidhash, sizeof (*ret_tidhash));
 979  979                  ret_tidhash = next;
 980  980          }
 981  981  
 982  982          thread_exit();
 983  983          /* NOTREACHED */
 984  984  }
 985  985  
 986  986  /*
 987  987   * Format siginfo structure for wait system calls.
 988  988   */
 989  989  void
 990  990  winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
 991  991  {
 992  992          ASSERT(MUTEX_HELD(&pidlock));
 993  993  
 994  994          bzero(ip, sizeof (k_siginfo_t));
 995  995          ip->si_signo = SIGCLD;
 996  996          ip->si_code = pp->p_wcode;
 997  997          ip->si_pid = pp->p_pid;
 998  998          ip->si_ctid = PRCTID(pp);
 999  999          ip->si_zoneid = pp->p_zone->zone_id;
1000 1000          ip->si_status = pp->p_wdata;
1001 1001          ip->si_stime = pp->p_stime;
1002 1002          ip->si_utime = pp->p_utime;
1003 1003  
1004 1004          if (waitflag) {
1005 1005                  pp->p_wcode = 0;
1006 1006                  pp->p_wdata = 0;
1007 1007                  pp->p_pidflag &= ~CLDPEND;
1008 1008          }
1009 1009  }
1010 1010  
1011 1011  /*
1012 1012   * Wait system call.
1013 1013   * Search for a terminated (zombie) child,
1014 1014   * finally lay it to rest, and collect its status.
1015 1015   * Look also for stopped children,
1016 1016   * and pass back status from them.
1017 1017   */
1018 1018  int
1019 1019  waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1020 1020  {
1021 1021          proc_t *cp, *pp;
1022 1022          int waitflag = !(options & WNOWAIT);
1023 1023          boolean_t have_brand_helper = B_FALSE;
1024 1024  
1025 1025          /*
1026 1026           * Obsolete flag, defined here only for binary compatibility
1027 1027           * with old statically linked executables.  Delete this when
1028 1028           * we no longer care about these old and broken applications.
1029 1029           */
1030 1030  #define _WNOCHLD        0400
1031 1031          options &= ~_WNOCHLD;
1032 1032  
1033 1033          if (options == 0 || (options & ~WOPTMASK))
1034 1034                  return (EINVAL);
1035 1035  
1036 1036          switch (idtype) {
1037 1037          case P_PID:
1038 1038          case P_PGID:
1039 1039                  if (id < 0 || id >= maxpid)
1040 1040                          return (EINVAL);
1041 1041                  /* FALLTHROUGH */
1042 1042          case P_ALL:
1043 1043                  break;
1044 1044          default:
1045 1045                  return (EINVAL);
1046 1046          }
1047 1047  
1048 1048          pp = ttoproc(curthread);
1049 1049  
1050 1050          /*
1051 1051           * Anytime you are looking for a process, you take pidlock to prevent
1052 1052           * things from changing as you look.
1053 1053           */
1054 1054          mutex_enter(&pidlock);
1055 1055  
1056 1056          /*
1057 1057           * if we are only looking for exited processes and child_ns list
1058 1058           * is empty no reason to look at all children.
1059 1059           */
1060 1060          if (idtype == P_ALL &&
1061 1061              (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1062 1062              pp->p_child_ns == NULL) {
1063 1063                  if (pp->p_child) {
1064 1064                          mutex_exit(&pidlock);
1065 1065                          bzero(ip, sizeof (k_siginfo_t));
1066 1066                          return (0);
1067 1067                  }
1068 1068                  mutex_exit(&pidlock);
1069 1069                  return (ECHILD);
1070 1070          }
1071 1071  
1072 1072          if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1073 1073                  have_brand_helper = B_TRUE;
1074 1074          }
1075 1075  
1076 1076          while (pp->p_child != NULL || have_brand_helper) {
1077 1077                  boolean_t brand_wants_wait = B_FALSE;
1078 1078                  int proc_gone = 0;
1079 1079                  int found = 0;
1080 1080  
1081 1081                  /*
1082 1082                   * Give the brand a chance to return synthetic results from
1083 1083                   * this waitid() call before we do the real thing.
1084 1084                   */
1085 1085                  if (have_brand_helper) {
1086 1086                          int ret;
1087 1087  
1088 1088                          if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1089 1089                              &brand_wants_wait, &ret) == 0) {
1090 1090                                  mutex_exit(&pidlock);
1091 1091                                  return (ret);
1092 1092                          }
1093 1093  
1094 1094                          if (pp->p_child == NULL) {
1095 1095                                  goto no_real_children;
1096 1096                          }
1097 1097                  }
1098 1098  
1099 1099                  /*
1100 1100                   * Look for interesting children in the newstate list.
1101 1101                   */
1102 1102                  VERIFY(pp->p_child != NULL);
1103 1103                  for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1104 1104                          if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1105 1105                                  continue;
1106 1106                          if (idtype == P_PID && id != cp->p_pid)
1107 1107                                  continue;
1108 1108                          if (idtype == P_PGID && id != cp->p_pgrp)
1109 1109                                  continue;
1110 1110                          if (PROC_IS_BRANDED(pp)) {
1111 1111                                  if (BROP(pp)->b_wait_filter != NULL &&
1112 1112                                      BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1113 1113                                          continue;
1114 1114                          }
1115 1115  
1116 1116                          switch (cp->p_wcode) {
1117 1117  
1118 1118                          case CLD_TRAPPED:
1119 1119                          case CLD_STOPPED:
1120 1120                          case CLD_CONTINUED:
1121 1121                                  cmn_err(CE_PANIC,
1122 1122                                      "waitid: wrong state %d on the p_newstate"
1123 1123                                      " list", cp->p_wcode);
1124 1124                                  break;
1125 1125  
1126 1126                          case CLD_EXITED:
1127 1127                          case CLD_DUMPED:
1128 1128                          case CLD_KILLED:
1129 1129                                  if (!(options & WEXITED)) {
1130 1130                                          /*
1131 1131                                           * Count how many are already gone
1132 1132                                           * for good.
1133 1133                                           */
1134 1134                                          proc_gone++;
1135 1135                                          break;
1136 1136                                  }
1137 1137                                  if (!waitflag) {
1138 1138                                          winfo(cp, ip, 0);
1139 1139                                  } else {
1140 1140                                          winfo(cp, ip, 1);
1141 1141                                          freeproc(cp);
1142 1142                                  }
1143 1143                                  mutex_exit(&pidlock);
1144 1144                                  if (waitflag) {         /* accept SIGCLD */
1145 1145                                          sigcld_delete(ip);
1146 1146                                          sigcld_repost();
1147 1147                                  }
1148 1148                                  return (0);
1149 1149                          }
1150 1150  
1151 1151                          if (idtype == P_PID)
1152 1152                                  break;
1153 1153                  }
1154 1154  
1155 1155                  /*
1156 1156                   * Wow! None of the threads on the p_sibling_ns list were
1157 1157                   * interesting threads. Check all the kids!
1158 1158                   */
1159 1159                  for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1160 1160                          if (idtype == P_PID && id != cp->p_pid)
1161 1161                                  continue;
1162 1162                          if (idtype == P_PGID && id != cp->p_pgrp)
1163 1163                                  continue;
1164 1164                          if (PROC_IS_BRANDED(pp)) {
1165 1165                                  if (BROP(pp)->b_wait_filter != NULL &&
1166 1166                                      BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1167 1167                                          continue;
1168 1168                          }
1169 1169  
1170 1170                          switch (cp->p_wcode) {
1171 1171                          case CLD_TRAPPED:
1172 1172                                  if (!(options & WTRAPPED))
1173 1173                                          break;
1174 1174                                  winfo(cp, ip, waitflag);
1175 1175                                  mutex_exit(&pidlock);
1176 1176                                  if (waitflag) {         /* accept SIGCLD */
1177 1177                                          sigcld_delete(ip);
1178 1178                                          sigcld_repost();
1179 1179                                  }
1180 1180                                  return (0);
1181 1181  
1182 1182                          case CLD_STOPPED:
1183 1183                                  if (!(options & WSTOPPED))
1184 1184                                          break;
1185 1185                                  /* Is it still stopped? */
1186 1186                                  mutex_enter(&cp->p_lock);
1187 1187                                  if (!jobstopped(cp)) {
1188 1188                                          mutex_exit(&cp->p_lock);
1189 1189                                          break;
1190 1190                                  }
1191 1191                                  mutex_exit(&cp->p_lock);
1192 1192                                  winfo(cp, ip, waitflag);
1193 1193                                  mutex_exit(&pidlock);
1194 1194                                  if (waitflag) {         /* accept SIGCLD */
1195 1195                                          sigcld_delete(ip);
1196 1196                                          sigcld_repost();
1197 1197                                  }
1198 1198                                  return (0);
1199 1199  
1200 1200                          case CLD_CONTINUED:
1201 1201                                  if (!(options & WCONTINUED))
1202 1202                                          break;
1203 1203                                  winfo(cp, ip, waitflag);
1204 1204                                  mutex_exit(&pidlock);
1205 1205                                  if (waitflag) {         /* accept SIGCLD */
1206 1206                                          sigcld_delete(ip);
1207 1207                                          sigcld_repost();
1208 1208                                  }
1209 1209                                  return (0);
1210 1210  
1211 1211                          case CLD_EXITED:
1212 1212                          case CLD_DUMPED:
1213 1213                          case CLD_KILLED:
1214 1214                                  if (idtype != P_PID &&
1215 1215                                      (cp->p_pidflag & CLDWAITPID))
1216 1216                                          continue;
1217 1217                                  /*
1218 1218                                   * Don't complain if a process was found in
1219 1219                                   * the first loop but we broke out of the loop
1220 1220                                   * because of the arguments passed to us.
1221 1221                                   */
1222 1222                                  if (proc_gone == 0) {
1223 1223                                          cmn_err(CE_PANIC,
1224 1224                                              "waitid: wrong state on the"
1225 1225                                              " p_child list");
1226 1226                                  } else {
1227 1227                                          break;
1228 1228                                  }
1229 1229                          }
1230 1230  
1231 1231                          found++;
1232 1232  
1233 1233                          if (idtype == P_PID)
1234 1234                                  break;
1235 1235                  }
1236 1236  
1237 1237  no_real_children:
1238 1238                  /*
1239 1239                   * If we found no interesting processes at all,
1240 1240                   * break out and return ECHILD.
1241 1241                   */
1242 1242                  if (!brand_wants_wait && (found + proc_gone == 0))
1243 1243                          break;
1244 1244  
1245 1245                  if (options & WNOHANG) {
1246 1246                          mutex_exit(&pidlock);
1247 1247                          bzero(ip, sizeof (k_siginfo_t));
1248 1248                          /*
1249 1249                           * We should set ip->si_signo = SIGCLD,
1250 1250                           * but there is an SVVS test that expects
1251 1251                           * ip->si_signo to be zero in this case.
1252 1252                           */
1253 1253                          return (0);
1254 1254                  }
1255 1255  
1256 1256                  /*
1257 1257                   * If we found no processes of interest that could
1258 1258                   * change state while we wait, we don't wait at all.
1259 1259                   * Get out with ECHILD according to SVID.
1260 1260                   */
1261 1261                  if (!brand_wants_wait && (found == proc_gone))
1262 1262                          break;
1263 1263  
1264 1264                  if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1265 1265                          mutex_exit(&pidlock);
1266 1266                          return (EINTR);
1267 1267                  }
1268 1268          }
1269 1269          mutex_exit(&pidlock);
1270 1270          return (ECHILD);
1271 1271  }
1272 1272  
1273 1273  int
1274 1274  waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1275 1275  {
1276 1276          int error;
1277 1277          k_siginfo_t info;
1278 1278  
1279 1279          if (error = waitid(idtype, id, &info, options))
1280 1280                  return (set_errno(error));
1281 1281          if (copyout(&info, infop, sizeof (k_siginfo_t)))
1282 1282                  return (set_errno(EFAULT));
1283 1283          return (0);
1284 1284  }
1285 1285  
1286 1286  #ifdef _SYSCALL32_IMPL
1287 1287  
1288 1288  int
1289 1289  waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1290 1290  {
1291 1291          int error;
1292 1292          k_siginfo_t info;
1293 1293          siginfo32_t info32;
1294 1294  
1295 1295          if (error = waitid(idtype, id, &info, options))
1296 1296                  return (set_errno(error));
1297 1297          siginfo_kto32(&info, &info32);
1298 1298          if (copyout(&info32, infop, sizeof (info32)))
1299 1299                  return (set_errno(EFAULT));
1300 1300          return (0);
1301 1301  }
1302 1302  
1303 1303  #endif  /* _SYSCALL32_IMPL */
1304 1304  
1305 1305  void
1306 1306  proc_detach(proc_t *p)
1307 1307  {
1308 1308          proc_t *q;
1309 1309  
1310 1310          ASSERT(MUTEX_HELD(&pidlock));
1311 1311  
1312 1312          q = p->p_parent;
1313 1313          ASSERT(q != NULL);
1314 1314  
1315 1315          /*
1316 1316           * Take it off the newstate list of its parent
1317 1317           */
1318 1318          delete_ns(q, p);
1319 1319  
1320 1320          if (q->p_child == p) {
1321 1321                  q->p_child = p->p_sibling;
1322 1322                  /*
1323 1323                   * If the parent has no children, it better not
1324 1324                   * have any with new states either!
1325 1325                   */
1326 1326                  ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1327 1327          }
1328 1328  
1329 1329          if (p->p_sibling) {
1330 1330                  p->p_sibling->p_psibling = p->p_psibling;
1331 1331          }
1332 1332  
1333 1333          if (p->p_psibling) {
1334 1334                  p->p_psibling->p_sibling = p->p_sibling;
1335 1335          }
1336 1336  }
1337 1337  
1338 1338  /*
1339 1339   * Remove zombie children from the process table.
1340 1340   */
1341 1341  void
1342 1342  freeproc(proc_t *p)
1343 1343  {
1344 1344          proc_t *q;
1345 1345          task_t *tk;
1346 1346  
1347 1347          ASSERT(p->p_stat == SZOMB);
1348 1348          ASSERT(p->p_tlist == NULL);
1349 1349          ASSERT(MUTEX_HELD(&pidlock));
1350 1350  
1351 1351          sigdelq(p, NULL, 0);
1352 1352          if (p->p_killsqp) {
1353 1353                  siginfofree(p->p_killsqp);
1354 1354                  p->p_killsqp = NULL;
1355 1355          }
1356 1356  
1357 1357          /* Clear any remaining brand data */
1358 1358          if (PROC_IS_BRANDED(p)) {
1359 1359                  brand_clearbrand(p, B_FALSE);
1360 1360          }
1361 1361  
1362 1362  
1363 1363          prfree(p);      /* inform /proc */
1364 1364  
1365 1365          /*
1366 1366           * Don't free the init processes.
1367 1367           * Other dying processes will access it.
1368 1368           */
1369 1369          if (p == proc_init)
1370 1370                  return;
1371 1371  
1372 1372  
1373 1373          /*
1374 1374           * We wait until now to free the cred structure because a
1375 1375           * zombie process's credentials may be examined by /proc.
1376 1376           * No cred locking needed because there are no threads at this point.
1377 1377           */
1378 1378          upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1379 1379          crfree(p->p_cred);
1380 1380          if (p->p_corefile != NULL) {
1381 1381                  corectl_path_rele(p->p_corefile);
1382 1382                  p->p_corefile = NULL;
1383 1383          }
1384 1384          if (p->p_content != NULL) {
1385 1385                  corectl_content_rele(p->p_content);
1386 1386                  p->p_content = NULL;
1387 1387          }
1388 1388  
1389 1389          if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1390 1390              (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1391 1391                  /*
1392 1392                   * This should still do the right thing since p_utime/stime
1393 1393                   * get set to the correct value on process exit, so it
1394 1394                   * should get properly updated
1395 1395                   */
1396 1396                  p->p_nextofkin->p_cutime += p->p_utime;
1397 1397                  p->p_nextofkin->p_cstime += p->p_stime;
1398 1398  
1399 1399                  p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1400 1400                  p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1401 1401                  p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1402 1402                  p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1403 1403                  p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1404 1404                  p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1405 1405                  p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1406 1406                      += p->p_acct[LMS_USER_LOCK];
1407 1407                  p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1408 1408                  p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1409 1409                      += p->p_acct[LMS_WAIT_CPU];
1410 1410                  p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1411 1411  
1412 1412                  p->p_nextofkin->p_cru.minflt    += p->p_ru.minflt;
1413 1413                  p->p_nextofkin->p_cru.majflt    += p->p_ru.majflt;
1414 1414                  p->p_nextofkin->p_cru.nswap     += p->p_ru.nswap;
1415 1415                  p->p_nextofkin->p_cru.inblock   += p->p_ru.inblock;
1416 1416                  p->p_nextofkin->p_cru.oublock   += p->p_ru.oublock;
1417 1417                  p->p_nextofkin->p_cru.msgsnd    += p->p_ru.msgsnd;
1418 1418                  p->p_nextofkin->p_cru.msgrcv    += p->p_ru.msgrcv;
1419 1419                  p->p_nextofkin->p_cru.nsignals  += p->p_ru.nsignals;
1420 1420                  p->p_nextofkin->p_cru.nvcsw     += p->p_ru.nvcsw;
1421 1421                  p->p_nextofkin->p_cru.nivcsw    += p->p_ru.nivcsw;
1422 1422                  p->p_nextofkin->p_cru.sysc      += p->p_ru.sysc;
1423 1423                  p->p_nextofkin->p_cru.ioch      += p->p_ru.ioch;
1424 1424  
1425 1425          }
1426 1426  
1427 1427          q = p->p_nextofkin;
1428 1428          if (q && q->p_orphan == p)
1429 1429                  q->p_orphan = p->p_nextorph;
1430 1430          else if (q) {
1431 1431                  for (q = q->p_orphan; q; q = q->p_nextorph)
1432 1432                          if (q->p_nextorph == p)
1433 1433                                  break;
1434 1434                  ASSERT(q && q->p_nextorph == p);
1435 1435                  q->p_nextorph = p->p_nextorph;
1436 1436          }
1437 1437  
1438 1438          /*
1439 1439           * The process table slot is being freed, so it is now safe to give up
1440 1440           * task and project membership.
1441 1441           */
1442 1442          mutex_enter(&p->p_lock);
1443 1443          tk = p->p_task;
1444 1444          task_detach(p);
1445 1445          mutex_exit(&p->p_lock);
1446 1446  
1447 1447          proc_detach(p);
1448 1448          pid_exit(p, tk);        /* frees pid and proc structure */
1449 1449  
1450 1450          task_rele(tk);
1451 1451  }
1452 1452  
1453 1453  /*
1454 1454   * Delete process "child" from the newstate list of process "parent"
1455 1455   */
1456 1456  void
1457 1457  delete_ns(proc_t *parent, proc_t *child)
1458 1458  {
1459 1459          proc_t **ns;
1460 1460  
1461 1461          ASSERT(MUTEX_HELD(&pidlock));
1462 1462          ASSERT(child->p_parent == parent);
1463 1463          for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1464 1464                  if (*ns == child) {
1465 1465  
1466 1466                          ASSERT((*ns)->p_parent == parent);
1467 1467  
1468 1468                          *ns = child->p_sibling_ns;
1469 1469                          child->p_sibling_ns = NULL;
1470 1470                          return;
1471 1471                  }
1472 1472          }
1473 1473  }
1474 1474  
1475 1475  /*
1476 1476   * Add process "child" to the new state list of process "parent"
1477 1477   */
1478 1478  void
1479 1479  add_ns(proc_t *parent, proc_t *child)
1480 1480  {
1481 1481          ASSERT(child->p_sibling_ns == NULL);
1482 1482          child->p_sibling_ns = parent->p_child_ns;
1483 1483          parent->p_child_ns = child;
1484 1484  }

↓ open down ↓

1450 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX