1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2014 Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 
  29 #include <sys/types.h>
  30 #include <sys/param.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/user.h>
  35 #include <sys/errno.h>
  36 #include <sys/proc.h>
  37 #include <sys/ucontext.h>
  38 #include <sys/procfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/acct.h>
  41 #include <sys/var.h>
  42 #include <sys/cmn_err.h>
  43 #include <sys/debug.h>
  44 #include <sys/wait.h>
  45 #include <sys/siginfo.h>
  46 #include <sys/procset.h>
  47 #include <sys/class.h>
  48 #include <sys/file.h>
  49 #include <sys/session.h>
  50 #include <sys/kmem.h>
  51 #include <sys/vtrace.h>
  52 #include <sys/prsystm.h>
  53 #include <sys/ipc.h>
  54 #include <sys/sem_impl.h>
  55 #include <c2/audit.h>
  56 #include <sys/aio_impl.h>
  57 #include <vm/as.h>
  58 #include <sys/poll.h>
  59 #include <sys/door.h>
  60 #include <sys/lwpchan_impl.h>
  61 #include <sys/utrap.h>
  62 #include <sys/task.h>
  63 #include <sys/exacct.h>
  64 #include <sys/cyclic.h>
  65 #include <sys/schedctl.h>
  66 #include <sys/rctl.h>
  67 #include <sys/contract_impl.h>
  68 #include <sys/contract/process_impl.h>
  69 #include <sys/list.h>
  70 #include <sys/dtrace.h>
  71 #include <sys/pool.h>
  72 #include <sys/sdt.h>
  73 #include <sys/corectl.h>
  74 #include <sys/brand.h>
  75 #include <sys/libc_kernel.h>
  76 
  77 /*
  78  * convert code/data pair into old style wait status
  79  */
  80 int
  81 wstat(int code, int data)
  82 {
  83         int stat = (data & 0377);
  84 
  85         switch (code) {
  86         case CLD_EXITED:
  87                 stat <<= 8;
  88                 break;
  89         case CLD_DUMPED:
  90                 stat |= WCOREFLG;
  91                 break;
  92         case CLD_KILLED:
  93                 break;
  94         case CLD_TRAPPED:
  95         case CLD_STOPPED:
  96                 stat <<= 8;
  97                 stat |= WSTOPFLG;
  98                 break;
  99         case CLD_CONTINUED:
 100                 stat = WCONTFLG;
 101                 break;
 102         default:
 103                 cmn_err(CE_PANIC, "wstat: bad code");
 104                 /* NOTREACHED */
 105         }
 106         return (stat);
 107 }
 108 
 109 static char *
 110 exit_reason(char *buf, size_t bufsz, int what, int why)
 111 {
 112         switch (why) {
 113         case CLD_EXITED:
 114                 (void) snprintf(buf, bufsz, "exited with status %d", what);
 115                 break;
 116         case CLD_KILLED:
 117                 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
 118                 break;
 119         case CLD_DUMPED:
 120                 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
 121                 break;
 122         default:
 123                 (void) snprintf(buf, bufsz, "encountered unknown error "
 124                     "(%d, %d)", why, what);
 125                 break;
 126         }
 127 
 128         return (buf);
 129 }
 130 
 131 /*
 132  * exit system call: pass back caller's arg.
 133  */
 134 void
 135 rexit(int rval)
 136 {
 137         exit(CLD_EXITED, rval);
 138 }
 139 
 140 /*
 141  * Called by proc_exit() when a zone's init exits, presumably because
 142  * it failed.  As long as the given zone is still in the "running"
 143  * state, we will re-exec() init, but first we need to reset things
 144  * which are usually inherited across exec() but will break init's
 145  * assumption that it is being exec()'d from a virgin process.  Most
 146  * importantly this includes closing all file descriptors (exec only
 147  * closes those marked close-on-exec) and resetting signals (exec only
 148  * resets handled signals, and we need to clear any signals which
 149  * killed init).  Anything else that exec(2) says would be inherited,
 150  * but would affect the execution of init, needs to be reset.
 151  */
 152 static int
 153 restart_init(int what, int why)
 154 {
 155         kthread_t *t = curthread;
 156         klwp_t *lwp = ttolwp(t);
 157         proc_t *p = ttoproc(t);
 158         user_t *up = PTOU(p);
 159 
 160         vnode_t *oldcd, *oldrd;
 161         int i, err;
 162         char reason_buf[64];
 163 
 164         /*
 165          * Let zone admin (and global zone admin if this is for a non-global
 166          * zone) know that init has failed and will be restarted.
 167          */
 168         zcmn_err(p->p_zone->zone_id, CE_WARN,
 169             "init(1M) %s: restarting automatically",
 170             exit_reason(reason_buf, sizeof (reason_buf), what, why));
 171 
 172         if (!INGLOBALZONE(p)) {
 173                 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
 174                     "restarting automatically",
 175                     p->p_zone->zone_name, p->p_pid, reason_buf);
 176         }
 177 
 178         /*
 179          * Remove any fpollinfo_t's for this (last) thread from our file
 180          * descriptors so closeall() can ASSERT() that they're all gone.
 181          * Then close all open file descriptors in the process.
 182          */
 183         pollcleanup();
 184         closeall(P_FINFO(p));
 185 
 186         /*
 187          * Grab p_lock and begin clearing miscellaneous global process
 188          * state that needs to be reset before we exec the new init(1M).
 189          */
 190 
 191         mutex_enter(&p->p_lock);
 192         prbarrier(p);
 193 
 194         p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
 195         up->u_cmask = CMASK;
 196 
 197         sigemptyset(&t->t_hold);
 198         sigemptyset(&t->t_sig);
 199         sigemptyset(&t->t_extsig);
 200 
 201         sigemptyset(&p->p_sig);
 202         sigemptyset(&p->p_extsig);
 203 
 204         sigdelq(p, t, 0);
 205         sigdelq(p, NULL, 0);
 206 
 207         if (p->p_killsqp) {
 208                 siginfofree(p->p_killsqp);
 209                 p->p_killsqp = NULL;
 210         }
 211 
 212         /*
 213          * Reset any signals that are ignored back to the default disposition.
 214          * Other u_signal members will be cleared when exec calls sigdefault().
 215          */
 216         for (i = 1; i < NSIG; i++) {
 217                 if (up->u_signal[i - 1] == SIG_IGN) {
 218                         up->u_signal[i - 1] = SIG_DFL;
 219                         sigemptyset(&up->u_sigmask[i - 1]);
 220                 }
 221         }
 222 
 223         /*
 224          * Clear the current signal, any signal info associated with it, and
 225          * any signal information from contracts and/or contract templates.
 226          */
 227         lwp->lwp_cursig = 0;
 228         lwp->lwp_extsig = 0;
 229         if (lwp->lwp_curinfo != NULL) {
 230                 siginfofree(lwp->lwp_curinfo);
 231                 lwp->lwp_curinfo = NULL;
 232         }
 233         lwp_ctmpl_clear(lwp, B_FALSE);
 234 
 235         /*
 236          * Reset both the process root directory and the current working
 237          * directory to the root of the zone just as we do during boot.
 238          */
 239         VN_HOLD(p->p_zone->zone_rootvp);
 240         oldrd = up->u_rdir;
 241         up->u_rdir = p->p_zone->zone_rootvp;
 242 
 243         VN_HOLD(p->p_zone->zone_rootvp);
 244         oldcd = up->u_cdir;
 245         up->u_cdir = p->p_zone->zone_rootvp;
 246 
 247         if (up->u_cwd != NULL) {
 248                 refstr_rele(up->u_cwd);
 249                 up->u_cwd = NULL;
 250         }
 251 
 252         mutex_exit(&p->p_lock);
 253 
 254         if (oldrd != NULL)
 255                 VN_RELE(oldrd);
 256         if (oldcd != NULL)
 257                 VN_RELE(oldcd);
 258 
 259         /* Free the controlling tty.  (freectty() always assumes curproc.) */
 260         ASSERT(p == curproc);
 261         (void) freectty(B_TRUE);
 262 
 263         /*
 264          * Now exec() the new init(1M) on top of the current process.  If we
 265          * succeed, the caller will treat this like a successful system call.
 266          * If we fail, we issue messages and the caller will proceed with exit.
 267          */
 268         err = exec_init(p->p_zone->zone_initname, NULL);
 269 
 270         if (err == 0)
 271                 return (0);
 272 
 273         zcmn_err(p->p_zone->zone_id, CE_WARN,
 274             "failed to restart init(1M) (err=%d): system reboot required", err);
 275 
 276         if (!INGLOBALZONE(p)) {
 277                 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
 278                     "(pid %d, err=%d): zoneadm(1M) boot required",
 279                     p->p_zone->zone_name, p->p_pid, err);
 280         }
 281 
 282         return (-1);
 283 }
 284 
 285 /*
 286  * Release resources.
 287  * Enter zombie state.
 288  * Wake up parent and init processes,
 289  * and dispose of children.
 290  */
 291 void
 292 exit(int why, int what)
 293 {
 294         /*
 295          * If proc_exit() fails, then some other lwp in the process
 296          * got there first.  We just have to call lwp_exit() to allow
 297          * the other lwp to finish exiting the process.  Otherwise we're
 298          * restarting init, and should return.
 299          */
 300         if (proc_exit(why, what) != 0) {
 301                 mutex_enter(&curproc->p_lock);
 302                 ASSERT(curproc->p_flag & SEXITLWPS);
 303                 lwp_exit();
 304                 /* NOTREACHED */
 305         }
 306 }
 307 
 308 /*
 309  * Set the SEXITING flag on the process, after making sure /proc does
 310  * not have it locked.  This is done in more places than proc_exit(),
 311  * so it is a separate function.
 312  */
 313 void
 314 proc_is_exiting(proc_t *p)
 315 {
 316         mutex_enter(&p->p_lock);
 317         prbarrier(p);
 318         p->p_flag |= SEXITING;
 319         mutex_exit(&p->p_lock);
 320 }
 321 
 322 /*
 323  * Return value:
 324  *   1 - exitlwps() failed, call (or continue) lwp_exit()
 325  *   0 - restarting init.  Return through system call path
 326  */
 327 int
 328 proc_exit(int why, int what)
 329 {
 330         kthread_t *t = curthread;
 331         klwp_t *lwp = ttolwp(t);
 332         proc_t *p = ttoproc(t);
 333         zone_t *z = p->p_zone;
 334         timeout_id_t tmp_id;
 335         int rv;
 336         proc_t *q;
 337         task_t *tk;
 338         vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
 339         sigqueue_t *sqp;
 340         lwpdir_t *lwpdir;
 341         uint_t lwpdir_sz;
 342         tidhash_t *tidhash;
 343         uint_t tidhash_sz;
 344         ret_tidhash_t *ret_tidhash;
 345         refstr_t *cwd;
 346         hrtime_t hrutime, hrstime;
 347         int evaporate;
 348 
 349         /*
 350          * Stop and discard the process's lwps except for the current one,
 351          * unless some other lwp beat us to it.  If exitlwps() fails then
 352          * return and the calling lwp will call (or continue in) lwp_exit().
 353          */
 354         proc_is_exiting(p);
 355         if (exitlwps(0) != 0)
 356                 return (1);
 357 
 358         mutex_enter(&p->p_lock);
 359         if (p->p_ttime > 0) {
 360                 /*
 361                  * Account any remaining ticks charged to this process
 362                  * on its way out.
 363                  */
 364                 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 365                 p->p_ttime = 0;
 366         }
 367         mutex_exit(&p->p_lock);
 368 
 369         /*
 370          * Don't let init exit unless zone_start_init() failed its exec, or
 371          * we are shutting down the zone or the machine.
 372          *
 373          * Since we are single threaded, we don't need to lock the
 374          * following accesses to zone_proc_initpid.
 375          */
 376         if (p->p_pid == z->zone_proc_initpid) {
 377                 if (z->zone_boot_err == 0 &&
 378                     zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
 379                     zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
 380 
 381                         /*
 382                          * If the init process should be restarted, the
 383                          * "zone_restart_init" member will be set.  Some init
 384                          * programs in branded zones do not tolerate a restart
 385                          * in the traditional manner; setting the
 386                          * "zone_reboot_on_init_exit" member will cause the
 387                          * entire zone to be rebooted instead.  If neither of
 388                          * these flags is set the zone will shut down.
 389                          */
 390                         if (z->zone_reboot_on_init_exit == B_TRUE &&
 391                             z->zone_restart_init == B_TRUE) {
 392                                 /*
 393                                  * Trigger a zone reboot and continue
 394                                  * with exit processing.
 395                                  */
 396                                 z->zone_init_status = wstat(why, what);
 397                                 (void) zone_kadmin(A_REBOOT, 0, NULL,
 398                                     zone_kcred());
 399 
 400                         } else {
 401                                 if (z->zone_restart_init == B_TRUE) {
 402                                         if (restart_init(what, why) == 0)
 403                                                 return (0);
 404                                 }
 405 
 406                                 z->zone_init_status = wstat(why, what);
 407                                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
 408                                     zone_kcred());
 409                         }
 410                 }
 411 
 412                 /*
 413                  * Since we didn't or couldn't restart init, we clear
 414                  * the zone's init state and proceed with exit
 415                  * processing.
 416                  */
 417                 z->zone_proc_initpid = -1;
 418         }
 419 
 420         /*
 421          * Delay firing probes (and performing brand cleanup) until after the
 422          * zone_proc_initpid check. Cases which result in zone shutdown or
 423          * restart via zone_kadmin eventually result in a call back to
 424          * proc_exit.
 425          */
 426         DTRACE_PROC(lwp__exit);
 427         DTRACE_PROC1(exit, int, why);
 428 
 429         /*
 430          * Will perform any brand specific proc exit processing. Since this
 431          * is always the last lwp, will also perform lwp exit/free and proc
 432          * exit. Brand data will be freed when the process is reaped.
 433          */
 434         if (PROC_IS_BRANDED(p)) {
 435                 BROP(p)->b_lwpexit(lwp);
 436                 BROP(p)->b_proc_exit(p);
 437                 /*
 438                  * To ensure that b_proc_exit has access to brand-specific data
 439                  * contained by the one remaining lwp, call the freelwp hook as
 440                  * the last part of this clean-up process.
 441                  */
 442                 BROP(p)->b_freelwp(lwp);
 443                 lwp_detach_brand_hdlrs(lwp);
 444         }
 445 
 446         lwp_pcb_exit();
 447 
 448         /*
 449          * Allocate a sigqueue now, before we grab locks.
 450          * It will be given to sigcld(), below.
 451          * Special case:  If we will be making the process disappear
 452          * without a trace because it is either:
 453          *      * an exiting SSYS process, or
 454          *      * a posix_spawn() vfork child who requests it,
 455          * we don't bother to allocate a useless sigqueue.
 456          */
 457         evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
 458             why == CLD_EXITED && what == _EVAPORATE);
 459         if (!evaporate)
 460                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 461 
 462         /*
 463          * revoke any doors created by the process.
 464          */
 465         if (p->p_door_list)
 466                 door_exit();
 467 
 468         /*
 469          * Release schedctl data structures.
 470          */
 471         if (p->p_pagep)
 472                 schedctl_proc_cleanup();
 473 
 474         /*
 475          * make sure all pending kaio has completed.
 476          */
 477         if (p->p_aio)
 478                 aio_cleanup_exit();
 479 
 480         /*
 481          * discard the lwpchan cache.
 482          */
 483         if (p->p_lcp != NULL)
 484                 lwpchan_destroy_cache(0);
 485 
 486         /*
 487          * Clean up any DTrace helper actions or probes for the process.
 488          */
 489         if (p->p_dtrace_helpers != NULL) {
 490                 ASSERT(dtrace_helpers_cleanup != NULL);
 491                 (*dtrace_helpers_cleanup)();
 492         }
 493 
 494         /*
 495          * Clean up any signalfd state for the process.
 496          */
 497         if (p->p_sigfd != NULL) {
 498                 VERIFY(sigfd_exit_helper != NULL);
 499                 (*sigfd_exit_helper)();
 500         }
 501 
 502         /* untimeout the realtime timers */
 503         if (p->p_itimer != NULL)
 504                 timer_exit();
 505 
 506         if ((tmp_id = p->p_alarmid) != 0) {
 507                 p->p_alarmid = 0;
 508                 (void) untimeout(tmp_id);
 509         }
 510 
 511         /*
 512          * Remove any fpollinfo_t's for this (last) thread from our file
 513          * descriptors so closeall() can ASSERT() that they're all gone.
 514          */
 515         pollcleanup();
 516 
 517         if (p->p_rprof_cyclic != CYCLIC_NONE) {
 518                 mutex_enter(&cpu_lock);
 519                 cyclic_remove(p->p_rprof_cyclic);
 520                 mutex_exit(&cpu_lock);
 521         }
 522 
 523         mutex_enter(&p->p_lock);
 524 
 525         /*
 526          * Clean up any DTrace probes associated with this process.
 527          */
 528         if (p->p_dtrace_probes) {
 529                 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
 530                 dtrace_fasttrap_exit_ptr(p);
 531         }
 532 
 533         while ((tmp_id = p->p_itimerid) != 0) {
 534                 p->p_itimerid = 0;
 535                 mutex_exit(&p->p_lock);
 536                 (void) untimeout(tmp_id);
 537                 mutex_enter(&p->p_lock);
 538         }
 539 
 540         lwp_cleanup();
 541 
 542         /*
 543          * We are about to exit; prevent our resource associations from
 544          * being changed.
 545          */
 546         pool_barrier_enter();
 547 
 548         /*
 549          * Block the process against /proc now that we have really
 550          * acquired p->p_lock (to manipulate p_tlist at least).
 551          */
 552         prbarrier(p);
 553 
 554         sigfillset(&p->p_ignore);
 555         sigemptyset(&p->p_siginfo);
 556         sigemptyset(&p->p_sig);
 557         sigemptyset(&p->p_extsig);
 558         sigemptyset(&t->t_sig);
 559         sigemptyset(&t->t_extsig);
 560         sigemptyset(&p->p_sigmask);
 561         sigdelq(p, t, 0);
 562         lwp->lwp_cursig = 0;
 563         lwp->lwp_extsig = 0;
 564         p->p_flag &= ~(SKILLED | SEXTKILLED);
 565         if (lwp->lwp_curinfo) {
 566                 siginfofree(lwp->lwp_curinfo);
 567                 lwp->lwp_curinfo = NULL;
 568         }
 569 
 570         t->t_proc_flag |= TP_LWPEXIT;
 571         ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
 572         prlwpexit(t);           /* notify /proc */
 573         lwp_hash_out(p, t->t_tid);
 574         prexit(p);
 575 
 576         p->p_lwpcnt = 0;
 577         p->p_tlist = NULL;
 578         sigqfree(p);
 579         term_mstate(t);
 580         p->p_mterm = gethrtime();
 581 
 582         exec_vp = p->p_exec;
 583         execdir_vp = p->p_execdir;
 584         p->p_exec = NULLVP;
 585         p->p_execdir = NULLVP;
 586         mutex_exit(&p->p_lock);
 587 
 588         pr_free_watched_pages(p);
 589 
 590         closeall(P_FINFO(p));
 591 
 592         /* Free the controlling tty.  (freectty() always assumes curproc.) */
 593         ASSERT(p == curproc);
 594         (void) freectty(B_TRUE);
 595 
 596 #if defined(__sparc)
 597         if (p->p_utraps != NULL)
 598                 utrap_free(p);
 599 #endif
 600         if (p->p_semacct)                    /* IPC semaphore exit */
 601                 semexit(p);
 602         rv = wstat(why, what);
 603 
 604         acct(rv & 0xff);
 605         exacct_commit_proc(p, rv);
 606 
 607         /*
 608          * Release any resources associated with C2 auditing
 609          */
 610         if (AU_AUDITING()) {
 611                 /*
 612                  * audit exit system call
 613                  */
 614                 audit_exit(why, what);
 615         }
 616 
 617         /*
 618          * Free address space.
 619          */
 620         relvm();
 621 
 622         if (exec_vp) {
 623                 /*
 624                  * Close this executable which has been opened when the process
 625                  * was created by getproc().
 626                  */
 627                 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
 628                 VN_RELE(exec_vp);
 629         }
 630         if (execdir_vp)
 631                 VN_RELE(execdir_vp);
 632 
 633         /*
 634          * Release held contracts.
 635          */
 636         contract_exit(p);
 637 
 638         /*
 639          * Depart our encapsulating process contract.
 640          */
 641         if ((p->p_flag & SSYS) == 0) {
 642                 ASSERT(p->p_ct_process);
 643                 contract_process_exit(p->p_ct_process, p, rv);
 644         }
 645 
 646         /*
 647          * Remove pool association, and block if requested by pool_do_bind.
 648          */
 649         mutex_enter(&p->p_lock);
 650         ASSERT(p->p_pool->pool_ref > 0);
 651         atomic_dec_32(&p->p_pool->pool_ref);
 652         p->p_pool = pool_default;
 653         /*
 654          * Now that our address space has been freed and all other threads
 655          * in this process have exited, set the PEXITED pool flag.  This
 656          * tells the pools subsystems to ignore this process if it was
 657          * requested to rebind this process to a new pool.
 658          */
 659         p->p_poolflag |= PEXITED;
 660         pool_barrier_exit();
 661         mutex_exit(&p->p_lock);
 662 
 663         mutex_enter(&pidlock);
 664 
 665         /*
 666          * Delete this process from the newstate list of its parent. We
 667          * will put it in the right place in the sigcld in the end.
 668          */
 669         delete_ns(p->p_parent, p);
 670 
 671         /*
 672          * Reassign the orphans to the next of kin.
 673          * Don't rearrange init's orphanage.
 674          */
 675         if ((q = p->p_orphan) != NULL && p != proc_init) {
 676 
 677                 proc_t *nokp = p->p_nextofkin;
 678 
 679                 for (;;) {
 680                         q->p_nextofkin = nokp;
 681                         if (q->p_nextorph == NULL)
 682                                 break;
 683                         q = q->p_nextorph;
 684                 }
 685                 q->p_nextorph = nokp->p_orphan;
 686                 nokp->p_orphan = p->p_orphan;
 687                 p->p_orphan = NULL;
 688         }
 689 
 690         /*
 691          * Reassign the children to init.
 692          * Don't try to assign init's children to init.
 693          */
 694         if ((q = p->p_child) != NULL && p != proc_init) {
 695                 struct proc     *np;
 696                 struct proc     *initp = proc_init;
 697                 pid_t           zone_initpid = 1;
 698                 struct proc     *zoneinitp = NULL;
 699                 boolean_t       setzonetop = B_FALSE;
 700 
 701                 if (!INGLOBALZONE(curproc)) {
 702                         zone_initpid = curproc->p_zone->zone_proc_initpid;
 703 
 704                         ASSERT(MUTEX_HELD(&pidlock));
 705                         zoneinitp = prfind(zone_initpid);
 706                         if (zoneinitp != NULL) {
 707                                 initp = zoneinitp;
 708                         } else {
 709                                 zone_initpid = 1;
 710                                 setzonetop = B_TRUE;
 711                         }
 712                 }
 713 
 714                 pgdetach(p);
 715 
 716                 do {
 717                         np = q->p_sibling;
 718                         /*
 719                          * Delete it from its current parent new state
 720                          * list and add it to init new state list
 721                          */
 722                         delete_ns(q->p_parent, q);
 723 
 724                         q->p_ppid = zone_initpid;
 725 
 726                         q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
 727                         if (setzonetop) {
 728                                 mutex_enter(&q->p_lock);
 729                                 q->p_flag |= SZONETOP;
 730                                 mutex_exit(&q->p_lock);
 731                         }
 732                         q->p_parent = initp;
 733 
 734                         /*
 735                          * Since q will be the first child,
 736                          * it will not have a previous sibling.
 737                          */
 738                         q->p_psibling = NULL;
 739                         if (initp->p_child) {
 740                                 initp->p_child->p_psibling = q;
 741                         }
 742                         q->p_sibling = initp->p_child;
 743                         initp->p_child = q;
 744                         if (q->p_proc_flag & P_PR_PTRACE) {
 745                                 mutex_enter(&q->p_lock);
 746                                 sigtoproc(q, NULL, SIGKILL);
 747                                 mutex_exit(&q->p_lock);
 748                         }
 749                         /*
 750                          * sigcld() will add the child to parents
 751                          * newstate list.
 752                          */
 753                         if (q->p_stat == SZOMB)
 754                                 sigcld(q, NULL);
 755                 } while ((q = np) != NULL);
 756 
 757                 p->p_child = NULL;
 758                 ASSERT(p->p_child_ns == NULL);
 759         }
 760 
 761         TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
 762 
 763         mutex_enter(&p->p_lock);
 764         CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
 765 
 766         /*
 767          * Have our task accummulate our resource usage data before they
 768          * become contaminated by p_cacct etc., and before we renounce
 769          * membership of the task.
 770          *
 771          * We do this regardless of whether or not task accounting is active.
 772          * This is to avoid having nonsense data reported for this task if
 773          * task accounting is subsequently enabled. The overhead is minimal;
 774          * by this point, this process has accounted for the usage of all its
 775          * LWPs. We nonetheless do the work here, and under the protection of
 776          * pidlock, so that the movement of the process's usage to the task
 777          * happens at the same time as the removal of the process from the
 778          * task, from the point of view of exacct_snapshot_task_usage().
 779          */
 780         exacct_update_task_mstate(p);
 781 
 782         hrutime = mstate_aggr_state(p, LMS_USER);
 783         hrstime = mstate_aggr_state(p, LMS_SYSTEM);
 784         p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
 785         p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
 786 
 787         p->p_acct[LMS_USER]  += p->p_cacct[LMS_USER];
 788         p->p_acct[LMS_SYSTEM]        += p->p_cacct[LMS_SYSTEM];
 789         p->p_acct[LMS_TRAP]  += p->p_cacct[LMS_TRAP];
 790         p->p_acct[LMS_TFAULT]        += p->p_cacct[LMS_TFAULT];
 791         p->p_acct[LMS_DFAULT]        += p->p_cacct[LMS_DFAULT];
 792         p->p_acct[LMS_KFAULT]        += p->p_cacct[LMS_KFAULT];
 793         p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
 794         p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
 795         p->p_acct[LMS_WAIT_CPU]      += p->p_cacct[LMS_WAIT_CPU];
 796         p->p_acct[LMS_STOPPED]       += p->p_cacct[LMS_STOPPED];
 797 
 798         p->p_ru.minflt       += p->p_cru.minflt;
 799         p->p_ru.majflt       += p->p_cru.majflt;
 800         p->p_ru.nswap        += p->p_cru.nswap;
 801         p->p_ru.inblock      += p->p_cru.inblock;
 802         p->p_ru.oublock      += p->p_cru.oublock;
 803         p->p_ru.msgsnd       += p->p_cru.msgsnd;
 804         p->p_ru.msgrcv       += p->p_cru.msgrcv;
 805         p->p_ru.nsignals += p->p_cru.nsignals;
 806         p->p_ru.nvcsw        += p->p_cru.nvcsw;
 807         p->p_ru.nivcsw       += p->p_cru.nivcsw;
 808         p->p_ru.sysc += p->p_cru.sysc;
 809         p->p_ru.ioch += p->p_cru.ioch;
 810 
 811         p->p_stat = SZOMB;
 812         p->p_proc_flag &= ~P_PR_PTRACE;
 813         p->p_wdata = what;
 814         p->p_wcode = (char)why;
 815 
 816         cdir = PTOU(p)->u_cdir;
 817         rdir = PTOU(p)->u_rdir;
 818         cwd = PTOU(p)->u_cwd;
 819 
 820         ASSERT(cdir != NULL || p->p_parent == &p0);
 821 
 822         /*
 823          * Release resource controls, as they are no longer enforceable.
 824          */
 825         rctl_set_free(p->p_rctls);
 826 
 827         /*
 828          * Decrement tk_nlwps counter for our task.max-lwps resource control.
 829          * An extended accounting record, if that facility is active, is
 830          * scheduled to be written.  We cannot give up task and project
 831          * membership at this point because that would allow zombies to escape
 832          * from the max-processes resource controls.  Zombies stay in their
 833          * current task and project until the process table slot is released
 834          * in freeproc().
 835          */
 836         tk = p->p_task;
 837 
 838         mutex_enter(&p->p_zone->zone_nlwps_lock);
 839         tk->tk_nlwps--;
 840         tk->tk_proj->kpj_nlwps--;
 841         p->p_zone->zone_nlwps--;
 842         mutex_exit(&p->p_zone->zone_nlwps_lock);
 843 
 844         /*
 845          * Clear the lwp directory and the lwpid hash table
 846          * now that /proc can't bother us any more.
 847          * We free the memory below, after dropping p->p_lock.
 848          */
 849         lwpdir = p->p_lwpdir;
 850         lwpdir_sz = p->p_lwpdir_sz;
 851         tidhash = p->p_tidhash;
 852         tidhash_sz = p->p_tidhash_sz;
 853         ret_tidhash = p->p_ret_tidhash;
 854         p->p_lwpdir = NULL;
 855         p->p_lwpfree = NULL;
 856         p->p_lwpdir_sz = 0;
 857         p->p_tidhash = NULL;
 858         p->p_tidhash_sz = 0;
 859         p->p_ret_tidhash = NULL;
 860 
 861         /*
 862          * If the process has context ops installed, call the exit routine
 863          * on behalf of this last remaining thread. Normally exitpctx() is
 864          * called during thread_exit() or lwp_exit(), but because this is the
 865          * last thread in the process, we must call it here. By the time
 866          * thread_exit() is called (below), the association with the relevant
 867          * process has been lost.
 868          *
 869          * We also free the context here.
 870          */
 871         if (p->p_pctx) {
 872                 kpreempt_disable();
 873                 exitpctx(p);
 874                 kpreempt_enable();
 875 
 876                 freepctx(p, 0);
 877         }
 878 
 879         /*
 880          * curthread's proc pointer is changed to point to the 'sched'
 881          * process for the corresponding zone, except in the case when
 882          * the exiting process is in fact a zsched instance, in which
 883          * case the proc pointer is set to p0.  We do so, so that the
 884          * process still points at the right zone when we call the VN_RELE()
 885          * below.
 886          *
 887          * This is because curthread's original proc pointer can be freed as
 888          * soon as the child sends a SIGCLD to its parent.  We use zsched so
 889          * that for user processes, even in the final moments of death, the
 890          * process is still associated with its zone.
 891          */
 892         if (p != t->t_procp->p_zone->zone_zsched)
 893                 t->t_procp = t->t_procp->p_zone->zone_zsched;
 894         else
 895                 t->t_procp = &p0;
 896 
 897         mutex_exit(&p->p_lock);
 898         if (!evaporate) {
 899                 /*
 900                  * The brand specific code only happens when the brand has a
 901                  * function to call in place of sigcld and the parent of the
 902                  * exiting process is not the global zone init. If the parent
 903                  * is the global zone init, then the process was reparented,
 904                  * and we don't want brand code delivering possibly strange
 905                  * signals to init. Also, init is not branded, so any brand
 906                  * specific exit data will not be picked up by init anyway.
 907                  */
 908                 if (PROC_IS_BRANDED(p) &&
 909                     BROP(p)->b_exit_with_sig != NULL &&
 910                     p->p_ppid != 1) {
 911                         /*
 912                          * The code for _fini that could unload the brand_t
 913                          * blocks until the count of zones using the module
 914                          * reaches zero. Zones decrement the refcount on their
 915                          * brands only after all user tasks in that zone have
 916                          * exited and been waited on. The decrement on the
 917                          * brand's refcount happen in zone_destroy(). That
 918                          * depends on zone_shutdown() having been completed.
 919                          * zone_shutdown() includes a call to zone_empty(),
 920                          * where the zone waits for itself to reach the state
 921                          * ZONE_IS_EMPTY. This state is only set in either
 922                          * zone_shutdown(), when there are no user processes as
 923                          * the zone enters this function, or in
 924                          * zone_task_rele(). zone_task_rele() is called from
 925                          * code triggered by waiting on processes, not by the
 926                          * processes exiting through proc_exit().  This means
 927                          * all the branded processes that could exist for a
 928                          * specific brand_t must exit and get reaped before the
 929                          * refcount on the brand_t can reach 0. _fini will
 930                          * never unload the corresponding brand module before
 931                          * proc_exit finishes execution for all processes
 932                          * branded with a particular brand_t, which makes the
 933                          * operation below safe to do. Brands that wish to use
 934                          * this mechanism must wait in _fini as described
 935                          * above.
 936                          */
 937                         BROP(p)->b_exit_with_sig(p, sqp);
 938                 } else {
 939                         p->p_pidflag &= ~CLDPEND;
 940                         sigcld(p, sqp);
 941                 }
 942 
 943         } else {
 944                 /*
 945                  * Do what sigcld() would do if the disposition
 946                  * of the SIGCHLD signal were set to be ignored.
 947                  */
 948                 cv_broadcast(&p->p_srwchan_cv);
 949                 freeproc(p);
 950         }
 951         mutex_exit(&pidlock);
 952 
 953         /*
 954          * We don't release u_cdir and u_rdir until SZOMB is set.
 955          * This protects us against dofusers().
 956          */
 957         if (cdir)
 958                 VN_RELE(cdir);
 959         if (rdir)
 960                 VN_RELE(rdir);
 961         if (cwd)
 962                 refstr_rele(cwd);
 963 
 964         /*
 965          * task_rele() may ultimately cause the zone to go away (or
 966          * may cause the last user process in a zone to go away, which
 967          * signals zsched to go away).  So prior to this call, we must
 968          * no longer point at zsched.
 969          */
 970         t->t_procp = &p0;
 971 
 972         kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
 973         kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
 974         while (ret_tidhash != NULL) {
 975                 ret_tidhash_t *next = ret_tidhash->rth_next;
 976                 kmem_free(ret_tidhash->rth_tidhash,
 977                     ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
 978                 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
 979                 ret_tidhash = next;
 980         }
 981 
 982         thread_exit();
 983         /* NOTREACHED */
 984 }
 985 
 986 /*
 987  * Format siginfo structure for wait system calls.
 988  */
 989 void
 990 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
 991 {
 992         ASSERT(MUTEX_HELD(&pidlock));
 993 
 994         bzero(ip, sizeof (k_siginfo_t));
 995         ip->si_signo = SIGCLD;
 996         ip->si_code = pp->p_wcode;
 997         ip->si_pid = pp->p_pid;
 998         ip->si_ctid = PRCTID(pp);
 999         ip->si_zoneid = pp->p_zone->zone_id;
1000         ip->si_status = pp->p_wdata;
1001         ip->si_stime = pp->p_stime;
1002         ip->si_utime = pp->p_utime;
1003 
1004         if (waitflag) {
1005                 pp->p_wcode = 0;
1006                 pp->p_wdata = 0;
1007                 pp->p_pidflag &= ~CLDPEND;
1008         }
1009 }
1010 
1011 /*
1012  * Wait system call.
1013  * Search for a terminated (zombie) child,
1014  * finally lay it to rest, and collect its status.
1015  * Look also for stopped children,
1016  * and pass back status from them.
1017  */
1018 int
1019 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1020 {
1021         proc_t *cp, *pp;
1022         int waitflag = !(options & WNOWAIT);
1023         boolean_t have_brand_helper = B_FALSE;
1024 
1025         /*
1026          * Obsolete flag, defined here only for binary compatibility
1027          * with old statically linked executables.  Delete this when
1028          * we no longer care about these old and broken applications.
1029          */
1030 #define _WNOCHLD        0400
1031         options &= ~_WNOCHLD;
1032 
1033         if (options == 0 || (options & ~WOPTMASK))
1034                 return (EINVAL);
1035 
1036         switch (idtype) {
1037         case P_PID:
1038         case P_PGID:
1039                 if (id < 0 || id >= maxpid)
1040                         return (EINVAL);
1041                 /* FALLTHROUGH */
1042         case P_ALL:
1043                 break;
1044         default:
1045                 return (EINVAL);
1046         }
1047 
1048         pp = ttoproc(curthread);
1049 
1050         /*
1051          * Anytime you are looking for a process, you take pidlock to prevent
1052          * things from changing as you look.
1053          */
1054         mutex_enter(&pidlock);
1055 
1056         /*
1057          * if we are only looking for exited processes and child_ns list
1058          * is empty no reason to look at all children.
1059          */
1060         if (idtype == P_ALL &&
1061             (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1062             pp->p_child_ns == NULL) {
1063                 if (pp->p_child) {
1064                         mutex_exit(&pidlock);
1065                         bzero(ip, sizeof (k_siginfo_t));
1066                         return (0);
1067                 }
1068                 mutex_exit(&pidlock);
1069                 return (ECHILD);
1070         }
1071 
1072         if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1073                 have_brand_helper = B_TRUE;
1074         }
1075 
1076         while (pp->p_child != NULL || have_brand_helper) {
1077                 boolean_t brand_wants_wait = B_FALSE;
1078                 int proc_gone = 0;
1079                 int found = 0;
1080 
1081                 /*
1082                  * Give the brand a chance to return synthetic results from
1083                  * this waitid() call before we do the real thing.
1084                  */
1085                 if (have_brand_helper) {
1086                         int ret;
1087 
1088                         if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1089                             &brand_wants_wait, &ret) == 0) {
1090                                 mutex_exit(&pidlock);
1091                                 return (ret);
1092                         }
1093 
1094                         if (pp->p_child == NULL) {
1095                                 goto no_real_children;
1096                         }
1097                 }
1098 
1099                 /*
1100                  * Look for interesting children in the newstate list.
1101                  */
1102                 VERIFY(pp->p_child != NULL);
1103                 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1104                         if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1105                                 continue;
1106                         if (idtype == P_PID && id != cp->p_pid)
1107                                 continue;
1108                         if (idtype == P_PGID && id != cp->p_pgrp)
1109                                 continue;
1110                         if (PROC_IS_BRANDED(pp)) {
1111                                 if (BROP(pp)->b_wait_filter != NULL &&
1112                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1113                                         continue;
1114                         }
1115 
1116                         switch (cp->p_wcode) {
1117 
1118                         case CLD_TRAPPED:
1119                         case CLD_STOPPED:
1120                         case CLD_CONTINUED:
1121                                 cmn_err(CE_PANIC,
1122                                     "waitid: wrong state %d on the p_newstate"
1123                                     " list", cp->p_wcode);
1124                                 break;
1125 
1126                         case CLD_EXITED:
1127                         case CLD_DUMPED:
1128                         case CLD_KILLED:
1129                                 if (!(options & WEXITED)) {
1130                                         /*
1131                                          * Count how many are already gone
1132                                          * for good.
1133                                          */
1134                                         proc_gone++;
1135                                         break;
1136                                 }
1137                                 if (!waitflag) {
1138                                         winfo(cp, ip, 0);
1139                                 } else {
1140                                         winfo(cp, ip, 1);
1141                                         freeproc(cp);
1142                                 }
1143                                 mutex_exit(&pidlock);
1144                                 if (waitflag) {         /* accept SIGCLD */
1145                                         sigcld_delete(ip);
1146                                         sigcld_repost();
1147                                 }
1148                                 return (0);
1149                         }
1150 
1151                         if (idtype == P_PID)
1152                                 break;
1153                 }
1154 
1155                 /*
1156                  * Wow! None of the threads on the p_sibling_ns list were
1157                  * interesting threads. Check all the kids!
1158                  */
1159                 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1160                         if (idtype == P_PID && id != cp->p_pid)
1161                                 continue;
1162                         if (idtype == P_PGID && id != cp->p_pgrp)
1163                                 continue;
1164                         if (PROC_IS_BRANDED(pp)) {
1165                                 if (BROP(pp)->b_wait_filter != NULL &&
1166                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1167                                         continue;
1168                         }
1169 
1170                         switch (cp->p_wcode) {
1171                         case CLD_TRAPPED:
1172                                 if (!(options & WTRAPPED))
1173                                         break;
1174                                 winfo(cp, ip, waitflag);
1175                                 mutex_exit(&pidlock);
1176                                 if (waitflag) {         /* accept SIGCLD */
1177                                         sigcld_delete(ip);
1178                                         sigcld_repost();
1179                                 }
1180                                 return (0);
1181 
1182                         case CLD_STOPPED:
1183                                 if (!(options & WSTOPPED))
1184                                         break;
1185                                 /* Is it still stopped? */
1186                                 mutex_enter(&cp->p_lock);
1187                                 if (!jobstopped(cp)) {
1188                                         mutex_exit(&cp->p_lock);
1189                                         break;
1190                                 }
1191                                 mutex_exit(&cp->p_lock);
1192                                 winfo(cp, ip, waitflag);
1193                                 mutex_exit(&pidlock);
1194                                 if (waitflag) {         /* accept SIGCLD */
1195                                         sigcld_delete(ip);
1196                                         sigcld_repost();
1197                                 }
1198                                 return (0);
1199 
1200                         case CLD_CONTINUED:
1201                                 if (!(options & WCONTINUED))
1202                                         break;
1203                                 winfo(cp, ip, waitflag);
1204                                 mutex_exit(&pidlock);
1205                                 if (waitflag) {         /* accept SIGCLD */
1206                                         sigcld_delete(ip);
1207                                         sigcld_repost();
1208                                 }
1209                                 return (0);
1210 
1211                         case CLD_EXITED:
1212                         case CLD_DUMPED:
1213                         case CLD_KILLED:
1214                                 if (idtype != P_PID &&
1215                                     (cp->p_pidflag & CLDWAITPID))
1216                                         continue;
1217                                 /*
1218                                  * Don't complain if a process was found in
1219                                  * the first loop but we broke out of the loop
1220                                  * because of the arguments passed to us.
1221                                  */
1222                                 if (proc_gone == 0) {
1223                                         cmn_err(CE_PANIC,
1224                                             "waitid: wrong state on the"
1225                                             " p_child list");
1226                                 } else {
1227                                         break;
1228                                 }
1229                         }
1230 
1231                         found++;
1232 
1233                         if (idtype == P_PID)
1234                                 break;
1235                 }
1236 
1237 no_real_children:
1238                 /*
1239                  * If we found no interesting processes at all,
1240                  * break out and return ECHILD.
1241                  */
1242                 if (!brand_wants_wait && (found + proc_gone == 0))
1243                         break;
1244 
1245                 if (options & WNOHANG) {
1246                         mutex_exit(&pidlock);
1247                         bzero(ip, sizeof (k_siginfo_t));
1248                         /*
1249                          * We should set ip->si_signo = SIGCLD,
1250                          * but there is an SVVS test that expects
1251                          * ip->si_signo to be zero in this case.
1252                          */
1253                         return (0);
1254                 }
1255 
1256                 /*
1257                  * If we found no processes of interest that could
1258                  * change state while we wait, we don't wait at all.
1259                  * Get out with ECHILD according to SVID.
1260                  */
1261                 if (!brand_wants_wait && (found == proc_gone))
1262                         break;
1263 
1264                 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1265                         mutex_exit(&pidlock);
1266                         return (EINTR);
1267                 }
1268         }
1269         mutex_exit(&pidlock);
1270         return (ECHILD);
1271 }
1272 
1273 int
1274 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1275 {
1276         int error;
1277         k_siginfo_t info;
1278 
1279         if (error = waitid(idtype, id, &info, options))
1280                 return (set_errno(error));
1281         if (copyout(&info, infop, sizeof (k_siginfo_t)))
1282                 return (set_errno(EFAULT));
1283         return (0);
1284 }
1285 
1286 #ifdef _SYSCALL32_IMPL
1287 
1288 int
1289 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1290 {
1291         int error;
1292         k_siginfo_t info;
1293         siginfo32_t info32;
1294 
1295         if (error = waitid(idtype, id, &info, options))
1296                 return (set_errno(error));
1297         siginfo_kto32(&info, &info32);
1298         if (copyout(&info32, infop, sizeof (info32)))
1299                 return (set_errno(EFAULT));
1300         return (0);
1301 }
1302 
1303 #endif  /* _SYSCALL32_IMPL */
1304 
1305 void
1306 proc_detach(proc_t *p)
1307 {
1308         proc_t *q;
1309 
1310         ASSERT(MUTEX_HELD(&pidlock));
1311 
1312         q = p->p_parent;
1313         ASSERT(q != NULL);
1314 
1315         /*
1316          * Take it off the newstate list of its parent
1317          */
1318         delete_ns(q, p);
1319 
1320         if (q->p_child == p) {
1321                 q->p_child = p->p_sibling;
1322                 /*
1323                  * If the parent has no children, it better not
1324                  * have any with new states either!
1325                  */
1326                 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1327         }
1328 
1329         if (p->p_sibling) {
1330                 p->p_sibling->p_psibling = p->p_psibling;
1331         }
1332 
1333         if (p->p_psibling) {
1334                 p->p_psibling->p_sibling = p->p_sibling;
1335         }
1336 }
1337 
1338 /*
1339  * Remove zombie children from the process table.
1340  */
1341 void
1342 freeproc(proc_t *p)
1343 {
1344         proc_t *q;
1345         task_t *tk;
1346 
1347         ASSERT(p->p_stat == SZOMB);
1348         ASSERT(p->p_tlist == NULL);
1349         ASSERT(MUTEX_HELD(&pidlock));
1350 
1351         sigdelq(p, NULL, 0);
1352         if (p->p_killsqp) {
1353                 siginfofree(p->p_killsqp);
1354                 p->p_killsqp = NULL;
1355         }
1356 
1357         /* Clear any remaining brand data */
1358         if (PROC_IS_BRANDED(p)) {
1359                 brand_clearbrand(p, B_FALSE);
1360         }
1361 
1362 
1363         prfree(p);      /* inform /proc */
1364 
1365         /*
1366          * Don't free the init processes.
1367          * Other dying processes will access it.
1368          */
1369         if (p == proc_init)
1370                 return;
1371 
1372 
1373         /*
1374          * We wait until now to free the cred structure because a
1375          * zombie process's credentials may be examined by /proc.
1376          * No cred locking needed because there are no threads at this point.
1377          */
1378         upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1379         crfree(p->p_cred);
1380         if (p->p_corefile != NULL) {
1381                 corectl_path_rele(p->p_corefile);
1382                 p->p_corefile = NULL;
1383         }
1384         if (p->p_content != NULL) {
1385                 corectl_content_rele(p->p_content);
1386                 p->p_content = NULL;
1387         }
1388 
1389         if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1390             (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1391                 /*
1392                  * This should still do the right thing since p_utime/stime
1393                  * get set to the correct value on process exit, so it
1394                  * should get properly updated
1395                  */
1396                 p->p_nextofkin->p_cutime += p->p_utime;
1397                 p->p_nextofkin->p_cstime += p->p_stime;
1398 
1399                 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1400                 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1401                 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1402                 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1403                 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1404                 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1405                 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1406                     += p->p_acct[LMS_USER_LOCK];
1407                 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1408                 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1409                     += p->p_acct[LMS_WAIT_CPU];
1410                 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1411 
1412                 p->p_nextofkin->p_cru.minflt      += p->p_ru.minflt;
1413                 p->p_nextofkin->p_cru.majflt      += p->p_ru.majflt;
1414                 p->p_nextofkin->p_cru.nswap       += p->p_ru.nswap;
1415                 p->p_nextofkin->p_cru.inblock     += p->p_ru.inblock;
1416                 p->p_nextofkin->p_cru.oublock     += p->p_ru.oublock;
1417                 p->p_nextofkin->p_cru.msgsnd      += p->p_ru.msgsnd;
1418                 p->p_nextofkin->p_cru.msgrcv      += p->p_ru.msgrcv;
1419                 p->p_nextofkin->p_cru.nsignals    += p->p_ru.nsignals;
1420                 p->p_nextofkin->p_cru.nvcsw       += p->p_ru.nvcsw;
1421                 p->p_nextofkin->p_cru.nivcsw      += p->p_ru.nivcsw;
1422                 p->p_nextofkin->p_cru.sysc        += p->p_ru.sysc;
1423                 p->p_nextofkin->p_cru.ioch        += p->p_ru.ioch;
1424 
1425         }
1426 
1427         q = p->p_nextofkin;
1428         if (q && q->p_orphan == p)
1429                 q->p_orphan = p->p_nextorph;
1430         else if (q) {
1431                 for (q = q->p_orphan; q; q = q->p_nextorph)
1432                         if (q->p_nextorph == p)
1433                                 break;
1434                 ASSERT(q && q->p_nextorph == p);
1435                 q->p_nextorph = p->p_nextorph;
1436         }
1437 
1438         /*
1439          * The process table slot is being freed, so it is now safe to give up
1440          * task and project membership.
1441          */
1442         mutex_enter(&p->p_lock);
1443         tk = p->p_task;
1444         task_detach(p);
1445         mutex_exit(&p->p_lock);
1446 
1447         proc_detach(p);
1448         pid_exit(p, tk);        /* frees pid and proc structure */
1449 
1450         task_rele(tk);
1451 }
1452 
1453 /*
1454  * Delete process "child" from the newstate list of process "parent"
1455  */
1456 void
1457 delete_ns(proc_t *parent, proc_t *child)
1458 {
1459         proc_t **ns;
1460 
1461         ASSERT(MUTEX_HELD(&pidlock));
1462         ASSERT(child->p_parent == parent);
1463         for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1464                 if (*ns == child) {
1465 
1466                         ASSERT((*ns)->p_parent == parent);
1467 
1468                         *ns = child->p_sibling_ns;
1469                         child->p_sibling_ns = NULL;
1470                         return;
1471                 }
1472         }
1473 }
1474 
1475 /*
1476  * Add process "child" to the new state list of process "parent"
1477  */
1478 void
1479 add_ns(proc_t *parent, proc_t *child)
1480 {
1481         ASSERT(child->p_sibling_ns == NULL);
1482         child->p_sibling_ns = parent->p_child_ns;
1483         parent->p_child_ns = child;
1484 }