zones-merge-changes Old usr/src/uts/common/os/exit.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Joyent, Inc.
  25  * Copyright 2020 Oxide Computer Company
  26  * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/user.h>
  37 #include <sys/errno.h>
  38 #include <sys/proc.h>
  39 #include <sys/ucontext.h>
  40 #include <sys/procfs.h>
  41 #include <sys/vnode.h>
  42 #include <sys/acct.h>
  43 #include <sys/var.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/wait.h>
  47 #include <sys/siginfo.h>
  48 #include <sys/procset.h>
  49 #include <sys/class.h>
  50 #include <sys/file.h>
  51 #include <sys/session.h>
  52 #include <sys/kmem.h>
  53 #include <sys/vtrace.h>
  54 #include <sys/prsystm.h>
  55 #include <sys/ipc.h>
  56 #include <sys/sem_impl.h>
  57 #include <c2/audit.h>
  58 #include <sys/aio_impl.h>
  59 #include <vm/as.h>
  60 #include <sys/poll.h>
  61 #include <sys/door.h>
  62 #include <sys/lwpchan_impl.h>
  63 #include <sys/utrap.h>
  64 #include <sys/task.h>
  65 #include <sys/exacct.h>
  66 #include <sys/cyclic.h>
  67 #include <sys/schedctl.h>
  68 #include <sys/rctl.h>
  69 #include <sys/contract_impl.h>
  70 #include <sys/contract/process_impl.h>
  71 #include <sys/list.h>
  72 #include <sys/dtrace.h>
  73 #include <sys/pool.h>
  74 #include <sys/sdt.h>
  75 #include <sys/corectl.h>
  76 #include <sys/core.h>
  77 #include <sys/brand.h>
  78 #include <sys/libc_kernel.h>
  79 
  80 /*
  81  * convert code/data pair into old style wait status
  82  */
  83 int
  84 wstat(int code, int data)
  85 {
  86         int stat = (data & 0377);
  87 
  88         switch (code) {
  89         case CLD_EXITED:
  90                 stat <<= 8;
  91                 break;
  92         case CLD_DUMPED:
  93                 stat |= WCOREFLG;
  94                 break;
  95         case CLD_KILLED:
  96                 break;
  97         case CLD_TRAPPED:
  98         case CLD_STOPPED:
  99                 stat <<= 8;
 100                 stat |= WSTOPFLG;
 101                 break;
 102         case CLD_CONTINUED:
 103                 stat = WCONTFLG;
 104                 break;
 105         default:
 106                 cmn_err(CE_PANIC, "wstat: bad code");
 107                 /* NOTREACHED */
 108         }
 109         return (stat);
 110 }
 111 
 112 static char *
 113 exit_reason(char *buf, size_t bufsz, int what, int why)
 114 {
 115         switch (why) {
 116         case CLD_EXITED:
 117                 (void) snprintf(buf, bufsz, "exited with status %d", what);
 118                 break;
 119         case CLD_KILLED:
 120                 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
 121                 break;
 122         case CLD_DUMPED:
 123                 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
 124                 break;
 125         default:
 126                 (void) snprintf(buf, bufsz, "encountered unknown error "
 127                     "(%d, %d)", why, what);
 128                 break;
 129         }
 130 
 131         return (buf);
 132 }
 133 
 134 /*
 135  * exit system call: pass back caller's arg.
 136  */
 137 void
 138 rexit(int rval)
 139 {
 140         exit(CLD_EXITED, rval);
 141 }
 142 
 143 /*
 144  * Bump the init_restarts kstat and let interested parties know about the
 145  * restart.
 146  */
 147 static void
 148 restart_init_notify(zone_t *zone)
 149 {
 150         nvlist_t *nvl = NULL;
 151 
 152         zone->zone_proc_init_restarts++;
 153 
 154         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
 155             nvlist_add_uint32(nvl, ZONE_CB_RESTARTS,
 156             zone->zone_proc_init_restarts) == 0) {
 157                 zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS,
 158                     ZONE_EVENT_INIT_RESTART_SC, nvl);
 159         }
 160 
 161         nvlist_free(nvl);
 162 }
 163 
 164 /*
 165  * Called by proc_exit() when a zone's init exits, presumably because
 166  * it failed.  As long as the given zone is still in the "running"
 167  * state, we will re-exec() init, but first we need to reset things
 168  * which are usually inherited across exec() but will break init's
 169  * assumption that it is being exec()'d from a virgin process.  Most
 170  * importantly this includes closing all file descriptors (exec only
 171  * closes those marked close-on-exec) and resetting signals (exec only
 172  * resets handled signals, and we need to clear any signals which
 173  * killed init).  Anything else that exec(2) says would be inherited,
 174  * but would affect the execution of init, needs to be reset.
 175  */
 176 static int
 177 restart_init(int what, int why)
 178 {
 179         kthread_t *t = curthread;
 180         klwp_t *lwp = ttolwp(t);
 181         proc_t *p = ttoproc(t);
 182         proc_t *pp = p->p_zone->zone_zsched;
 183         user_t *up = PTOU(p);
 184 
 185         vnode_t *oldcd, *oldrd;
 186         int i, err;
 187         char reason_buf[64];
 188 
 189         /*
 190          * Let zone admin (and global zone admin if this is for a non-global
 191          * zone) know that init has failed and will be restarted.
 192          */
 193         zcmn_err(p->p_zone->zone_id, CE_WARN,
 194             "init(1M) %s: restarting automatically",
 195             exit_reason(reason_buf, sizeof (reason_buf), what, why));
 196 
 197         if (!INGLOBALZONE(p)) {
 198                 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
 199                     "restarting automatically",
 200                     p->p_zone->zone_name, p->p_pid, reason_buf);
 201         }
 202 
 203         /*
 204          * Remove any fpollinfo_t's for this (last) thread from our file
 205          * descriptors so closeall() can ASSERT() that they're all gone.
 206          * Then close all open file descriptors in the process.
 207          */
 208         pollcleanup();
 209         closeall(P_FINFO(p));
 210 
 211         /*
 212          * Grab p_lock and begin clearing miscellaneous global process
 213          * state that needs to be reset before we exec the new init(1M).
 214          */
 215 
 216         mutex_enter(&p->p_lock);
 217         prbarrier(p);
 218 
 219         p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
 220         up->u_cmask = CMASK;
 221 
 222         sigemptyset(&t->t_hold);
 223         sigemptyset(&t->t_sig);
 224         sigemptyset(&t->t_extsig);
 225 
 226         sigemptyset(&p->p_sig);
 227         sigemptyset(&p->p_extsig);
 228 
 229         sigdelq(p, t, 0);
 230         sigdelq(p, NULL, 0);
 231 
 232         if (p->p_killsqp) {
 233                 siginfofree(p->p_killsqp);
 234                 p->p_killsqp = NULL;
 235         }
 236 
 237         /*
 238          * Reset any signals that are ignored back to the default disposition.
 239          * Other u_signal members will be cleared when exec calls sigdefault().
 240          */
 241         for (i = 1; i < NSIG; i++) {
 242                 if (up->u_signal[i - 1] == SIG_IGN) {
 243                         up->u_signal[i - 1] = SIG_DFL;
 244                         sigemptyset(&up->u_sigmask[i - 1]);
 245                 }
 246         }
 247 
 248         /*
 249          * Clear the current signal, any signal info associated with it, and
 250          * any signal information from contracts and/or contract templates.
 251          */
 252         lwp->lwp_cursig = 0;
 253         lwp->lwp_extsig = 0;
 254         if (lwp->lwp_curinfo != NULL) {
 255                 siginfofree(lwp->lwp_curinfo);
 256                 lwp->lwp_curinfo = NULL;
 257         }
 258         lwp_ctmpl_clear(lwp, B_FALSE);
 259 
 260         /*
 261          * Reset both the process root directory and the current working
 262          * directory to the root of the zone just as we do during boot.
 263          */
 264         VN_HOLD(p->p_zone->zone_rootvp);
 265         oldrd = up->u_rdir;
 266         up->u_rdir = p->p_zone->zone_rootvp;
 267 
 268         VN_HOLD(p->p_zone->zone_rootvp);
 269         oldcd = up->u_cdir;
 270         up->u_cdir = p->p_zone->zone_rootvp;
 271 
 272         if (up->u_cwd != NULL) {
 273                 refstr_rele(up->u_cwd);
 274                 up->u_cwd = NULL;
 275         }
 276 
 277         /* Reset security flags */
 278         mutex_enter(&pp->p_lock);
 279         p->p_secflags = pp->p_secflags;
 280         mutex_exit(&pp->p_lock);
 281 
 282         mutex_exit(&p->p_lock);
 283 
 284         if (oldrd != NULL)
 285                 VN_RELE(oldrd);
 286         if (oldcd != NULL)
 287                 VN_RELE(oldcd);
 288 
 289         /*
 290          * It's possible that a zone's init will have become privilege aware
 291          * and modified privilege sets; reset them.
 292          */
 293         cred_t *oldcr, *newcr;
 294 
 295         mutex_enter(&p->p_crlock);
 296         oldcr = p->p_cred;
 297         mutex_enter(&pp->p_crlock);
 298         crhold(newcr = p->p_cred = pp->p_cred);
 299         mutex_exit(&pp->p_crlock);
 300         mutex_exit(&p->p_crlock);
 301         crfree(oldcr);
 302         /* Additional hold for the current thread - expected by crset() */
 303         crhold(newcr);
 304         crset(p, newcr);
 305 
 306         /* Free the controlling tty.  (freectty() always assumes curproc.) */
 307         ASSERT(p == curproc);
 308         (void) freectty(B_TRUE);
 309 
 310         restart_init_notify(p->p_zone);
 311 
 312         /*
 313          * Now exec() the new init(1M) on top of the current process.  If we
 314          * succeed, the caller will treat this like a successful system call.
 315          * If we fail, we issue messages and the caller will proceed with exit.
 316          */
 317         err = exec_init(p->p_zone->zone_initname, NULL);
 318 
 319         if (err == 0)
 320                 return (0);
 321 
 322         zcmn_err(p->p_zone->zone_id, CE_WARN,
 323             "failed to restart init(1M) (err=%d): system reboot required", err);
 324 
 325         if (!INGLOBALZONE(p)) {
 326                 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
 327                     "(pid %d, err=%d): zoneadm(1M) boot required",
 328                     p->p_zone->zone_name, p->p_pid, err);
 329         }
 330 
 331         return (-1);
 332 }
 333 
 334 /*
 335  * Release resources.
 336  * Enter zombie state.
 337  * Wake up parent and init processes,
 338  * and dispose of children.
 339  */
 340 void
 341 exit(int why, int what)
 342 {
 343         /*
 344          * If proc_exit() fails, then some other lwp in the process
 345          * got there first.  We just have to call lwp_exit() to allow
 346          * the other lwp to finish exiting the process.  Otherwise we're
 347          * restarting init, and should return.
 348          */
 349         if (proc_exit(why, what) != 0) {
 350                 mutex_enter(&curproc->p_lock);
 351                 ASSERT(curproc->p_flag & SEXITLWPS);
 352                 lwp_exit();
 353                 /* NOTREACHED */
 354         }
 355 }
 356 
 357 /*
 358  * Set the SEXITING flag on the process, after making sure /proc does
 359  * not have it locked.  This is done in more places than proc_exit(),
 360  * so it is a separate function.
 361  */
 362 void
 363 proc_is_exiting(proc_t *p)
 364 {
 365         mutex_enter(&p->p_lock);
 366         prbarrier(p);
 367         p->p_flag |= SEXITING;
 368         mutex_exit(&p->p_lock);
 369 }
 370 
 371 /*
 372  * Return true if zone's init is restarted, false if exit processing should
 373  * proceeed.
 374  */
 375 static boolean_t
 376 zone_init_exit(zone_t *z, int why, int what)
 377 {
 378         /*
 379          * Typically we don't let the zone's init exit unless zone_start_init()
 380          * failed its exec, or we are shutting down the zone or the machine,
 381          * although the various flags handled within this function will control
 382          * the behavior.
 383          *
 384          * Since we are single threaded, we don't need to lock the following
 385          * accesses to zone_proc_initpid.
 386          */
 387         if (z->zone_boot_err != 0 ||
 388             zone_status_get(z) >= ZONE_IS_SHUTTING_DOWN ||
 389             zone_status_get(global_zone) >= ZONE_IS_SHUTTING_DOWN) {
 390                 /*
 391                  * Clear the zone's init pid and proceed with exit processing.
 392                  */
 393                 z->zone_proc_initpid = -1;
 394                 return (B_FALSE);
 395         }
 396 
 397         /*
 398          * There are a variety of configuration flags on the zone to control
 399          * init exit behavior.
 400          *
 401          * If the init process should be restarted, the "zone_restart_init"
 402          * member will be set.
 403          */
 404         if (!z->zone_restart_init) {
 405                 /*
 406                  * The zone has been setup to halt when init exits.
 407                  */
 408                 z->zone_init_status = wstat(why, what);
 409                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
 410                 z->zone_proc_initpid = -1;
 411                 return (B_FALSE);
 412         }
 413 
 414         /*
 415          * At this point we know we're configured to restart init, but there
 416          * are various modifiers to that behavior.
 417          */
 418 
 419         if (z->zone_reboot_on_init_exit) {
 420                 /*
 421                  * Some init programs in branded zones do not tolerate a
 422                  * restart in the traditional manner; setting
 423                  * "zone_reboot_on_init_exit" will cause the entire zone to be
 424                  * rebooted instead.
 425                  */
 426 
 427                 if (z->zone_restart_init_0) {
 428                         /*
 429                          * Some init programs in branded zones only want to
 430                          * restart if they exit 0, otherwise the zone should
 431                          * shutdown. Setting the "zone_restart_init_0" member
 432                          * controls this behavior.
 433                          */
 434                         if (why == CLD_EXITED && what == 0) {
 435                                 /* Trigger a zone reboot */
 436                                 (void) zone_kadmin(A_REBOOT, 0, NULL,
 437                                     zone_kcred());
 438                         } else {
 439                                 /* Shutdown instead of reboot */
 440                                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
 441                                     zone_kcred());
 442                         }
 443                 } else {
 444                         /* Trigger a zone reboot */
 445                         (void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred());
 446                 }
 447 
 448                 z->zone_init_status = wstat(why, what);
 449                 z->zone_proc_initpid = -1;
 450                 return (B_FALSE);
 451         }
 452 
 453         if (z->zone_restart_init_0) {
 454                 /*
 455                  * Some init programs in branded zones only want to restart if
 456                  * they exit 0, otherwise the zone should shutdown. Setting the
 457                  * "zone_restart_init_0" member controls this behavior.
 458                  *
 459                  * In this case we only restart init if it exited successfully.
 460                  */
 461                 if (why == CLD_EXITED && what == 0 &&
 462                     restart_init(what, why) == 0) {
 463                         return (B_TRUE);
 464                 }
 465         } else {
 466                 /*
 467                  * No restart modifiers on the zone, attempt to restart init.
 468                  */
 469                 if (restart_init(what, why) == 0) {
 470                         return (B_TRUE);
 471                 }
 472         }
 473 
 474 
 475         /*
 476          * The restart failed, the zone will shut down.
 477          */
 478         z->zone_init_status = wstat(why, what);
 479         (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
 480         z->zone_proc_initpid = -1;
 481         return (B_FALSE);
 482 }
 483 
 484 /*
 485  * Return value:
 486  *   1 - exitlwps() failed, call (or continue) lwp_exit()
 487  *   0 - restarting init.  Return through system call path
 488  */
 489 int
 490 proc_exit(int why, int what)
 491 {
 492         kthread_t *t = curthread;
 493         klwp_t *lwp = ttolwp(t);
 494         proc_t *p = ttoproc(t);
 495         zone_t *z = p->p_zone;
 496         timeout_id_t tmp_id;
 497         int rv;
 498         proc_t *q;
 499         task_t *tk;
 500         vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
 501         sigqueue_t *sqp;
 502         lwpdir_t *lwpdir;
 503         uint_t lwpdir_sz;
 504         tidhash_t *tidhash;
 505         uint_t tidhash_sz;
 506         ret_tidhash_t *ret_tidhash;
 507         refstr_t *cwd;
 508         hrtime_t hrutime, hrstime;
 509         int evaporate;
 510 
 511         /*
 512          * Stop and discard the process's lwps except for the current one,
 513          * unless some other lwp beat us to it.  If exitlwps() fails then
 514          * return and the calling lwp will call (or continue in) lwp_exit().
 515          */
 516         proc_is_exiting(p);
 517         if (exitlwps(0) != 0)
 518                 return (1);
 519 
 520         mutex_enter(&p->p_lock);
 521         if (p->p_ttime > 0) {
 522                 /*
 523                  * Account any remaining ticks charged to this process
 524                  * on its way out.
 525                  */
 526                 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 527                 p->p_ttime = 0;
 528         }
 529         mutex_exit(&p->p_lock);
 530 
 531         if (p->p_pid == z->zone_proc_initpid) {
 532                 /* If zone's init restarts, we're done here. */
 533                 if (zone_init_exit(z, why, what))
 534                         return (0);
 535         }
 536 
 537         /*
 538          * Delay firing probes (and performing brand cleanup) until after the
 539          * zone_proc_initpid check. Cases which result in zone shutdown or
 540          * restart via zone_kadmin eventually result in a call back to
 541          * proc_exit.
 542          */
 543         DTRACE_PROC(lwp__exit);
 544         DTRACE_PROC1(exit, int, why);
 545 
 546         /*
 547          * Will perform any brand specific proc exit processing. Since this
 548          * is always the last lwp, will also perform lwp exit/free and proc
 549          * exit. Brand data will be freed when the process is reaped.
 550          */
 551         if (PROC_IS_BRANDED(p)) {
 552                 BROP(p)->b_lwpexit(lwp);
 553                 BROP(p)->b_proc_exit(p);
 554                 /*
 555                  * To ensure that b_proc_exit has access to brand-specific data
 556                  * contained by the one remaining lwp, call the freelwp hook as
 557                  * the last part of this clean-up process.
 558                  */
 559                 BROP(p)->b_freelwp(lwp);
 560                 lwp_detach_brand_hdlrs(lwp);
 561         }
 562 
 563         lwp_pcb_exit();
 564 
 565         /*
 566          * Allocate a sigqueue now, before we grab locks.
 567          * It will be given to sigcld(), below.
 568          * Special case:  If we will be making the process disappear
 569          * without a trace because it is either:
 570          *      * an exiting SSYS process, or
 571          *      * a posix_spawn() vfork child who requests it,
 572          * we don't bother to allocate a useless sigqueue.
 573          */
 574         evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
 575             why == CLD_EXITED && what == _EVAPORATE);
 576         if (!evaporate)
 577                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 578 
 579         /*
 580          * revoke any doors created by the process.
 581          */
 582         if (p->p_door_list)
 583                 door_exit();
 584 
 585         /*
 586          * Release schedctl data structures.
 587          */
 588         if (p->p_pagep)
 589                 schedctl_proc_cleanup();
 590 
 591         /*
 592          * make sure all pending kaio has completed.
 593          */
 594         if (p->p_aio)
 595                 aio_cleanup_exit();
 596 
 597         /*
 598          * discard the lwpchan cache.
 599          */
 600         if (p->p_lcp != NULL)
 601                 lwpchan_destroy_cache(0);
 602 
 603         /*
 604          * Clean up any DTrace helper actions or probes for the process.
 605          */
 606         if (p->p_dtrace_helpers != NULL) {
 607                 ASSERT(dtrace_helpers_cleanup != NULL);
 608                 (*dtrace_helpers_cleanup)(p);
 609         }
 610 
 611         /*
 612          * Clean up any signalfd state for the process.
 613          */
 614         if (p->p_sigfd != NULL) {
 615                 VERIFY(sigfd_exit_helper != NULL);
 616                 (*sigfd_exit_helper)();
 617         }
 618 
 619         /* untimeout the realtime timers */
 620         if (p->p_itimer != NULL)
 621                 timer_exit();
 622 
 623         if ((tmp_id = p->p_alarmid) != 0) {
 624                 p->p_alarmid = 0;
 625                 (void) untimeout(tmp_id);
 626         }
 627 
 628         /*
 629          * If we had generated any upanic(2) state, free that now.
 630          */
 631         if (p->p_upanic != NULL) {
 632                 kmem_free(p->p_upanic, PRUPANIC_BUFLEN);
 633                 p->p_upanic = NULL;
 634         }
 635 
 636         /*
 637          * Remove any fpollinfo_t's for this (last) thread from our file
 638          * descriptors so closeall() can ASSERT() that they're all gone.
 639          */
 640         pollcleanup();
 641 
 642         if (p->p_rprof_cyclic != CYCLIC_NONE) {
 643                 mutex_enter(&cpu_lock);
 644                 cyclic_remove(p->p_rprof_cyclic);
 645                 mutex_exit(&cpu_lock);
 646         }
 647 
 648         mutex_enter(&p->p_lock);
 649 
 650         /*
 651          * Clean up any DTrace probes associated with this process.
 652          */
 653         if (p->p_dtrace_probes) {
 654                 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
 655                 dtrace_fasttrap_exit_ptr(p);
 656         }
 657 
 658         while ((tmp_id = p->p_itimerid) != 0) {
 659                 p->p_itimerid = 0;
 660                 mutex_exit(&p->p_lock);
 661                 (void) untimeout(tmp_id);
 662                 mutex_enter(&p->p_lock);
 663         }
 664 
 665         lwp_cleanup();
 666 
 667         /*
 668          * We are about to exit; prevent our resource associations from
 669          * being changed.
 670          */
 671         pool_barrier_enter();
 672 
 673         /*
 674          * Block the process against /proc now that we have really
 675          * acquired p->p_lock (to manipulate p_tlist at least).
 676          */
 677         prbarrier(p);
 678 
 679         sigfillset(&p->p_ignore);
 680         sigemptyset(&p->p_siginfo);
 681         sigemptyset(&p->p_sig);
 682         sigemptyset(&p->p_extsig);
 683         sigemptyset(&t->t_sig);
 684         sigemptyset(&t->t_extsig);
 685         sigemptyset(&p->p_sigmask);
 686         sigdelq(p, t, 0);
 687         lwp->lwp_cursig = 0;
 688         lwp->lwp_extsig = 0;
 689         p->p_flag &= ~(SKILLED | SEXTKILLED);
 690         if (lwp->lwp_curinfo) {
 691                 siginfofree(lwp->lwp_curinfo);
 692                 lwp->lwp_curinfo = NULL;
 693         }
 694 
 695         t->t_proc_flag |= TP_LWPEXIT;
 696         ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
 697         prlwpexit(t);           /* notify /proc */
 698         lwp_hash_out(p, t->t_tid);
 699         prexit(p);
 700 
 701         p->p_lwpcnt = 0;
 702         p->p_tlist = NULL;
 703         sigqfree(p);
 704         term_mstate(t);
 705         p->p_mterm = gethrtime();
 706 
 707         exec_vp = p->p_exec;
 708         execdir_vp = p->p_execdir;
 709         p->p_exec = NULLVP;
 710         p->p_execdir = NULLVP;
 711         mutex_exit(&p->p_lock);
 712 
 713         pr_free_watched_pages(p);
 714 
 715         closeall(P_FINFO(p));
 716 
 717         /* Free the controlling tty.  (freectty() always assumes curproc.) */
 718         ASSERT(p == curproc);
 719         (void) freectty(B_TRUE);
 720 
 721 #if defined(__sparc)
 722         if (p->p_utraps != NULL)
 723                 utrap_free(p);
 724 #endif
 725         if (p->p_semacct)                    /* IPC semaphore exit */
 726                 semexit(p);
 727         rv = wstat(why, what);
 728 
 729         acct(rv);
 730         exacct_commit_proc(p, rv);
 731 
 732         /*
 733          * Release any resources associated with C2 auditing
 734          */
 735         if (AU_AUDITING()) {
 736                 /*
 737                  * audit exit system call
 738                  */
 739                 audit_exit(why, what);
 740         }
 741 
 742         /*
 743          * Free address space.
 744          */
 745         relvm();
 746 
 747         if (exec_vp) {
 748                 /*
 749                  * Close this executable which has been opened when the process
 750                  * was created by getproc().
 751                  */
 752                 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
 753                 VN_RELE(exec_vp);
 754         }
 755         if (execdir_vp)
 756                 VN_RELE(execdir_vp);
 757 
 758         /*
 759          * Release held contracts.
 760          */
 761         contract_exit(p);
 762 
 763         /*
 764          * Depart our encapsulating process contract.
 765          */
 766         if ((p->p_flag & SSYS) == 0) {
 767                 ASSERT(p->p_ct_process);
 768                 contract_process_exit(p->p_ct_process, p, rv);
 769         }
 770 
 771         /*
 772          * Remove pool association, and block if requested by pool_do_bind.
 773          */
 774         mutex_enter(&p->p_lock);
 775         ASSERT(p->p_pool->pool_ref > 0);
 776         atomic_dec_32(&p->p_pool->pool_ref);
 777         p->p_pool = pool_default;
 778         /*
 779          * Now that our address space has been freed and all other threads
 780          * in this process have exited, set the PEXITED pool flag.  This
 781          * tells the pools subsystems to ignore this process if it was
 782          * requested to rebind this process to a new pool.
 783          */
 784         p->p_poolflag |= PEXITED;
 785         pool_barrier_exit();
 786         mutex_exit(&p->p_lock);
 787 
 788         mutex_enter(&pidlock);
 789 
 790         /*
 791          * Delete this process from the newstate list of its parent. We
 792          * will put it in the right place in the sigcld in the end.
 793          */
 794         delete_ns(p->p_parent, p);
 795 
 796         /*
 797          * Reassign the orphans to the next of kin.
 798          * Don't rearrange init's orphanage.
 799          */
 800         if ((q = p->p_orphan) != NULL && p != proc_init) {
 801 
 802                 proc_t *nokp = p->p_nextofkin;
 803 
 804                 for (;;) {
 805                         q->p_nextofkin = nokp;
 806                         if (q->p_nextorph == NULL)
 807                                 break;
 808                         q = q->p_nextorph;
 809                 }
 810                 q->p_nextorph = nokp->p_orphan;
 811                 nokp->p_orphan = p->p_orphan;
 812                 p->p_orphan = NULL;
 813         }
 814 
 815         /*
 816          * Reassign the children to init.
 817          * Don't try to assign init's children to init.
 818          */
 819         if ((q = p->p_child) != NULL && p != proc_init) {
 820                 struct proc     *np;
 821                 struct proc     *initp = proc_init;
 822                 pid_t           zone_initpid = 1;
 823                 struct proc     *zoneinitp = NULL;
 824                 boolean_t       setzonetop = B_FALSE;
 825 
 826                 if (!INGLOBALZONE(curproc)) {
 827                         zone_initpid = curproc->p_zone->zone_proc_initpid;
 828 
 829                         ASSERT(MUTEX_HELD(&pidlock));
 830                         zoneinitp = prfind(zone_initpid);
 831                         if (zoneinitp != NULL) {
 832                                 initp = zoneinitp;
 833                         } else {
 834                                 zone_initpid = 1;
 835                                 setzonetop = B_TRUE;
 836                         }
 837                 }
 838 
 839                 pgdetach(p);
 840 
 841                 do {
 842                         np = q->p_sibling;
 843                         /*
 844                          * Delete it from its current parent new state
 845                          * list and add it to init new state list
 846                          */
 847                         delete_ns(q->p_parent, q);
 848 
 849                         q->p_ppid = zone_initpid;
 850 
 851                         q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
 852                         if (setzonetop) {
 853                                 mutex_enter(&q->p_lock);
 854                                 q->p_flag |= SZONETOP;
 855                                 mutex_exit(&q->p_lock);
 856                         }
 857                         q->p_parent = initp;
 858 
 859                         /*
 860                          * Since q will be the first child,
 861                          * it will not have a previous sibling.
 862                          */
 863                         q->p_psibling = NULL;
 864                         if (initp->p_child) {
 865                                 initp->p_child->p_psibling = q;
 866                         }
 867                         q->p_sibling = initp->p_child;
 868                         initp->p_child = q;
 869                         if (q->p_proc_flag & P_PR_PTRACE) {
 870                                 mutex_enter(&q->p_lock);
 871                                 sigtoproc(q, NULL, SIGKILL);
 872                                 mutex_exit(&q->p_lock);
 873                         }
 874                         /*
 875                          * sigcld() will add the child to parents
 876                          * newstate list.
 877                          */
 878                         if (q->p_stat == SZOMB)
 879                                 sigcld(q, NULL);
 880                 } while ((q = np) != NULL);
 881 
 882                 p->p_child = NULL;
 883                 ASSERT(p->p_child_ns == NULL);
 884         }
 885 
 886         TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
 887 
 888         mutex_enter(&p->p_lock);
 889         CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
 890 
 891         /*
 892          * Have our task accummulate our resource usage data before they
 893          * become contaminated by p_cacct etc., and before we renounce
 894          * membership of the task.
 895          *
 896          * We do this regardless of whether or not task accounting is active.
 897          * This is to avoid having nonsense data reported for this task if
 898          * task accounting is subsequently enabled. The overhead is minimal;
 899          * by this point, this process has accounted for the usage of all its
 900          * LWPs. We nonetheless do the work here, and under the protection of
 901          * pidlock, so that the movement of the process's usage to the task
 902          * happens at the same time as the removal of the process from the
 903          * task, from the point of view of exacct_snapshot_task_usage().
 904          */
 905         exacct_update_task_mstate(p);
 906 
 907         hrutime = mstate_aggr_state(p, LMS_USER);
 908         hrstime = mstate_aggr_state(p, LMS_SYSTEM);
 909         p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
 910         p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
 911 
 912         p->p_acct[LMS_USER]  += p->p_cacct[LMS_USER];
 913         p->p_acct[LMS_SYSTEM]        += p->p_cacct[LMS_SYSTEM];
 914         p->p_acct[LMS_TRAP]  += p->p_cacct[LMS_TRAP];
 915         p->p_acct[LMS_TFAULT]        += p->p_cacct[LMS_TFAULT];
 916         p->p_acct[LMS_DFAULT]        += p->p_cacct[LMS_DFAULT];
 917         p->p_acct[LMS_KFAULT]        += p->p_cacct[LMS_KFAULT];
 918         p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
 919         p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
 920         p->p_acct[LMS_WAIT_CPU]      += p->p_cacct[LMS_WAIT_CPU];
 921         p->p_acct[LMS_STOPPED]       += p->p_cacct[LMS_STOPPED];
 922 
 923         p->p_ru.minflt       += p->p_cru.minflt;
 924         p->p_ru.majflt       += p->p_cru.majflt;
 925         p->p_ru.nswap        += p->p_cru.nswap;
 926         p->p_ru.inblock      += p->p_cru.inblock;
 927         p->p_ru.oublock      += p->p_cru.oublock;
 928         p->p_ru.msgsnd       += p->p_cru.msgsnd;
 929         p->p_ru.msgrcv       += p->p_cru.msgrcv;
 930         p->p_ru.nsignals += p->p_cru.nsignals;
 931         p->p_ru.nvcsw        += p->p_cru.nvcsw;
 932         p->p_ru.nivcsw       += p->p_cru.nivcsw;
 933         p->p_ru.sysc += p->p_cru.sysc;
 934         p->p_ru.ioch += p->p_cru.ioch;
 935 
 936         p->p_stat = SZOMB;
 937         p->p_proc_flag &= ~P_PR_PTRACE;
 938         p->p_wdata = what;
 939         p->p_wcode = (char)why;
 940 
 941         cdir = PTOU(p)->u_cdir;
 942         rdir = PTOU(p)->u_rdir;
 943         cwd = PTOU(p)->u_cwd;
 944 
 945         ASSERT(cdir != NULL || p->p_parent == &p0);
 946 
 947         /*
 948          * Release resource controls, as they are no longer enforceable.
 949          */
 950         rctl_set_free(p->p_rctls);
 951 
 952         /*
 953          * Decrement tk_nlwps counter for our task.max-lwps resource control.
 954          * An extended accounting record, if that facility is active, is
 955          * scheduled to be written.  We cannot give up task and project
 956          * membership at this point because that would allow zombies to escape
 957          * from the max-processes resource controls.  Zombies stay in their
 958          * current task and project until the process table slot is released
 959          * in freeproc().
 960          */
 961         tk = p->p_task;
 962 
 963         mutex_enter(&p->p_zone->zone_nlwps_lock);
 964         tk->tk_nlwps--;
 965         tk->tk_proj->kpj_nlwps--;
 966         p->p_zone->zone_nlwps--;
 967         mutex_exit(&p->p_zone->zone_nlwps_lock);
 968 
 969         /*
 970          * Clear the lwp directory and the lwpid hash table
 971          * now that /proc can't bother us any more.
 972          * We free the memory below, after dropping p->p_lock.
 973          */
 974         lwpdir = p->p_lwpdir;
 975         lwpdir_sz = p->p_lwpdir_sz;
 976         tidhash = p->p_tidhash;
 977         tidhash_sz = p->p_tidhash_sz;
 978         ret_tidhash = p->p_ret_tidhash;
 979         p->p_lwpdir = NULL;
 980         p->p_lwpfree = NULL;
 981         p->p_lwpdir_sz = 0;
 982         p->p_tidhash = NULL;
 983         p->p_tidhash_sz = 0;
 984         p->p_ret_tidhash = NULL;
 985 
 986         /*
 987          * If the process has context ops installed, call the exit routine
 988          * on behalf of this last remaining thread. Normally exitpctx() is
 989          * called during thread_exit() or lwp_exit(), but because this is the
 990          * last thread in the process, we must call it here. By the time
 991          * thread_exit() is called (below), the association with the relevant
 992          * process has been lost.
 993          *
 994          * We also free the context here.
 995          */
 996         if (p->p_pctx) {
 997                 kpreempt_disable();
 998                 exitpctx(p);
 999                 kpreempt_enable();
1000 
1001                 freepctx(p, 0);
1002         }
1003 
1004         /*
1005          * curthread's proc pointer is changed to point to the 'sched'
1006          * process for the corresponding zone, except in the case when
1007          * the exiting process is in fact a zsched instance, in which
1008          * case the proc pointer is set to p0.  We do so, so that the
1009          * process still points at the right zone when we call the VN_RELE()
1010          * below.
1011          *
1012          * This is because curthread's original proc pointer can be freed as
1013          * soon as the child sends a SIGCLD to its parent.  We use zsched so
1014          * that for user processes, even in the final moments of death, the
1015          * process is still associated with its zone.
1016          */
1017         if (p != t->t_procp->p_zone->zone_zsched)
1018                 t->t_procp = t->t_procp->p_zone->zone_zsched;
1019         else
1020                 t->t_procp = &p0;
1021 
1022         mutex_exit(&p->p_lock);
1023         if (!evaporate) {
1024                 /*
1025                  * The brand specific code only happens when the brand has a
1026                  * function to call in place of sigcld and the parent of the
1027                  * exiting process is not the global zone init. If the parent
1028                  * is the global zone init, then the process was reparented,
1029                  * and we don't want brand code delivering possibly strange
1030                  * signals to init. Also, init is not branded, so any brand
1031                  * specific exit data will not be picked up by init anyway.
1032                  */
1033                 if (PROC_IS_BRANDED(p) &&
1034                     BROP(p)->b_exit_with_sig != NULL &&
1035                     p->p_ppid != 1) {
1036                         /*
1037                          * The code for _fini that could unload the brand_t
1038                          * blocks until the count of zones using the module
1039                          * reaches zero. Zones decrement the refcount on their
1040                          * brands only after all user tasks in that zone have
1041                          * exited and been waited on. The decrement on the
1042                          * brand's refcount happen in zone_destroy(). That
1043                          * depends on zone_shutdown() having been completed.
1044                          * zone_shutdown() includes a call to zone_empty(),
1045                          * where the zone waits for itself to reach the state
1046                          * ZONE_IS_EMPTY. This state is only set in either
1047                          * zone_shutdown(), when there are no user processes as
1048                          * the zone enters this function, or in
1049                          * zone_task_rele(). zone_task_rele() is called from
1050                          * code triggered by waiting on processes, not by the
1051                          * processes exiting through proc_exit().  This means
1052                          * all the branded processes that could exist for a
1053                          * specific brand_t must exit and get reaped before the
1054                          * refcount on the brand_t can reach 0. _fini will
1055                          * never unload the corresponding brand module before
1056                          * proc_exit finishes execution for all processes
1057                          * branded with a particular brand_t, which makes the
1058                          * operation below safe to do. Brands that wish to use
1059                          * this mechanism must wait in _fini as described
1060                          * above.
1061                          */
1062                         BROP(p)->b_exit_with_sig(p, sqp);
1063                 } else {
1064                         p->p_pidflag &= ~CLDPEND;
1065                         sigcld(p, sqp);
1066                 }
1067 
1068         } else {
1069                 /*
1070                  * Do what sigcld() would do if the disposition
1071                  * of the SIGCHLD signal were set to be ignored.
1072                  */
1073                 cv_broadcast(&p->p_srwchan_cv);
1074                 freeproc(p);
1075         }
1076         mutex_exit(&pidlock);
1077 
1078         /*
1079          * We don't release u_cdir and u_rdir until SZOMB is set.
1080          * This protects us against dofusers().
1081          */
1082         if (cdir)
1083                 VN_RELE(cdir);
1084         if (rdir)
1085                 VN_RELE(rdir);
1086         if (cwd)
1087                 refstr_rele(cwd);
1088 
1089         /*
1090          * task_rele() may ultimately cause the zone to go away (or
1091          * may cause the last user process in a zone to go away, which
1092          * signals zsched to go away).  So prior to this call, we must
1093          * no longer point at zsched.
1094          */
1095         t->t_procp = &p0;
1096 
1097         kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
1098         kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
1099         while (ret_tidhash != NULL) {
1100                 ret_tidhash_t *next = ret_tidhash->rth_next;
1101                 kmem_free(ret_tidhash->rth_tidhash,
1102                     ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
1103                 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
1104                 ret_tidhash = next;
1105         }
1106 
1107         thread_exit();
1108         /* NOTREACHED */
1109 }
1110 
1111 /*
1112  * Format siginfo structure for wait system calls.
1113  */
1114 void
1115 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
1116 {
1117         ASSERT(MUTEX_HELD(&pidlock));
1118 
1119         bzero(ip, sizeof (k_siginfo_t));
1120         ip->si_signo = SIGCLD;
1121         ip->si_code = pp->p_wcode;
1122         ip->si_pid = pp->p_pid;
1123         ip->si_ctid = PRCTID(pp);
1124         ip->si_zoneid = pp->p_zone->zone_id;
1125         ip->si_status = pp->p_wdata;
1126         ip->si_stime = pp->p_stime;
1127         ip->si_utime = pp->p_utime;
1128 
1129         if (waitflag) {
1130                 pp->p_wcode = 0;
1131                 pp->p_wdata = 0;
1132                 pp->p_pidflag &= ~CLDPEND;
1133         }
1134 }
1135 
1136 /*
1137  * Wait system call.
1138  * Search for a terminated (zombie) child,
1139  * finally lay it to rest, and collect its status.
1140  * Look also for stopped children,
1141  * and pass back status from them.
1142  */
1143 int
1144 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1145 {
1146         proc_t *cp, *pp;
1147         int waitflag = !(options & WNOWAIT);
1148         boolean_t have_brand_helper = B_FALSE;
1149 
1150         /*
1151          * Obsolete flag, defined here only for binary compatibility
1152          * with old statically linked executables.  Delete this when
1153          * we no longer care about these old and broken applications.
1154          */
1155 #define _WNOCHLD        0400
1156         options &= ~_WNOCHLD;
1157 
1158         if (options == 0 || (options & ~WOPTMASK))
1159                 return (EINVAL);
1160 
1161         switch (idtype) {
1162         case P_PID:
1163         case P_PGID:
1164                 if (id < 0 || id >= maxpid)
1165                         return (EINVAL);
1166                 /* FALLTHROUGH */
1167         case P_ALL:
1168                 break;
1169         default:
1170                 return (EINVAL);
1171         }
1172 
1173         pp = ttoproc(curthread);
1174 
1175         /*
1176          * Anytime you are looking for a process, you take pidlock to prevent
1177          * things from changing as you look.
1178          */
1179         mutex_enter(&pidlock);
1180 
1181         /*
1182          * if we are only looking for exited processes and child_ns list
1183          * is empty no reason to look at all children.
1184          */
1185         if (idtype == P_ALL &&
1186             (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1187             pp->p_child_ns == NULL) {
1188                 if (pp->p_child) {
1189                         mutex_exit(&pidlock);
1190                         bzero(ip, sizeof (k_siginfo_t));
1191                         return (0);
1192                 }
1193                 mutex_exit(&pidlock);
1194                 return (ECHILD);
1195         }
1196 
1197         if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1198                 have_brand_helper = B_TRUE;
1199         }
1200 
1201         while (pp->p_child != NULL || have_brand_helper) {
1202                 boolean_t brand_wants_wait = B_FALSE;
1203                 int proc_gone = 0;
1204                 int found = 0;
1205 
1206                 /*
1207                  * Give the brand a chance to return synthetic results from
1208                  * this waitid() call before we do the real thing.
1209                  */
1210                 if (have_brand_helper) {
1211                         int ret;
1212 
1213                         if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1214                             &brand_wants_wait, &ret) == 0) {
1215                                 mutex_exit(&pidlock);
1216                                 return (ret);
1217                         }
1218 
1219                         if (pp->p_child == NULL) {
1220                                 goto no_real_children;
1221                         }
1222                 }
1223 
1224                 /*
1225                  * Look for interesting children in the newstate list.
1226                  */
1227                 VERIFY(pp->p_child != NULL);
1228                 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1229                         if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1230                                 continue;
1231                         if (idtype == P_PID && id != cp->p_pid)
1232                                 continue;
1233                         if (idtype == P_PGID && id != cp->p_pgrp)
1234                                 continue;
1235                         if (PROC_IS_BRANDED(pp)) {
1236                                 if (BROP(pp)->b_wait_filter != NULL &&
1237                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1238                                         continue;
1239                         }
1240 
1241                         switch (cp->p_wcode) {
1242 
1243                         case CLD_TRAPPED:
1244                         case CLD_STOPPED:
1245                         case CLD_CONTINUED:
1246                                 cmn_err(CE_PANIC,
1247                                     "waitid: wrong state %d on the p_newstate"
1248                                     " list", cp->p_wcode);
1249                                 break;
1250 
1251                         case CLD_EXITED:
1252                         case CLD_DUMPED:
1253                         case CLD_KILLED:
1254                                 if (!(options & WEXITED)) {
1255                                         /*
1256                                          * Count how many are already gone
1257                                          * for good.
1258                                          */
1259                                         proc_gone++;
1260                                         break;
1261                                 }
1262                                 if (!waitflag) {
1263                                         winfo(cp, ip, 0);
1264                                 } else {
1265                                         winfo(cp, ip, 1);
1266                                         freeproc(cp);
1267                                 }
1268                                 mutex_exit(&pidlock);
1269                                 if (waitflag) {         /* accept SIGCLD */
1270                                         sigcld_delete(ip);
1271                                         sigcld_repost();
1272                                 }
1273                                 return (0);
1274                         }
1275 
1276                         if (idtype == P_PID)
1277                                 break;
1278                 }
1279 
1280                 /*
1281                  * Wow! None of the threads on the p_sibling_ns list were
1282                  * interesting threads. Check all the kids!
1283                  */
1284                 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1285                         if (idtype == P_PID && id != cp->p_pid)
1286                                 continue;
1287                         if (idtype == P_PGID && id != cp->p_pgrp)
1288                                 continue;
1289                         if (PROC_IS_BRANDED(pp)) {
1290                                 if (BROP(pp)->b_wait_filter != NULL &&
1291                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1292                                         continue;
1293                         }
1294 
1295                         switch (cp->p_wcode) {
1296                         case CLD_TRAPPED:
1297                                 if (!(options & WTRAPPED))
1298                                         break;
1299                                 winfo(cp, ip, waitflag);
1300                                 mutex_exit(&pidlock);
1301                                 if (waitflag) {         /* accept SIGCLD */
1302                                         sigcld_delete(ip);
1303                                         sigcld_repost();
1304                                 }
1305                                 return (0);
1306 
1307                         case CLD_STOPPED:
1308                                 if (!(options & WSTOPPED))
1309                                         break;
1310                                 /* Is it still stopped? */
1311                                 mutex_enter(&cp->p_lock);
1312                                 if (!jobstopped(cp)) {
1313                                         mutex_exit(&cp->p_lock);
1314                                         break;
1315                                 }
1316                                 mutex_exit(&cp->p_lock);
1317                                 winfo(cp, ip, waitflag);
1318                                 mutex_exit(&pidlock);
1319                                 if (waitflag) {         /* accept SIGCLD */
1320                                         sigcld_delete(ip);
1321                                         sigcld_repost();
1322                                 }
1323                                 return (0);
1324 
1325                         case CLD_CONTINUED:
1326                                 if (!(options & WCONTINUED))
1327                                         break;
1328                                 winfo(cp, ip, waitflag);
1329                                 mutex_exit(&pidlock);
1330                                 if (waitflag) {         /* accept SIGCLD */
1331                                         sigcld_delete(ip);
1332                                         sigcld_repost();
1333                                 }
1334                                 return (0);
1335 
1336                         case CLD_EXITED:
1337                         case CLD_DUMPED:
1338                         case CLD_KILLED:
1339                                 if (idtype != P_PID &&
1340                                     (cp->p_pidflag & CLDWAITPID))
1341                                         continue;
1342                                 /*
1343                                  * Don't complain if a process was found in
1344                                  * the first loop but we broke out of the loop
1345                                  * because of the arguments passed to us.
1346                                  */
1347                                 if (proc_gone == 0) {
1348                                         cmn_err(CE_PANIC,
1349                                             "waitid: wrong state on the"
1350                                             " p_child list");
1351                                 } else {
1352                                         break;
1353                                 }
1354                         }
1355 
1356                         found++;
1357 
1358                         if (idtype == P_PID)
1359                                 break;
1360                 }
1361 
1362 no_real_children:
1363                 /*
1364                  * If we found no interesting processes at all,
1365                  * break out and return ECHILD.
1366                  */
1367                 if (!brand_wants_wait && (found + proc_gone == 0))
1368                         break;
1369 
1370                 if (options & WNOHANG) {
1371                         mutex_exit(&pidlock);
1372                         bzero(ip, sizeof (k_siginfo_t));
1373                         /*
1374                          * We should set ip->si_signo = SIGCLD,
1375                          * but there is an SVVS test that expects
1376                          * ip->si_signo to be zero in this case.
1377                          */
1378                         return (0);
1379                 }
1380 
1381                 /*
1382                  * If we found no processes of interest that could
1383                  * change state while we wait, we don't wait at all.
1384                  * Get out with ECHILD according to SVID.
1385                  */
1386                 if (!brand_wants_wait && (found == proc_gone))
1387                         break;
1388 
1389                 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1390                         mutex_exit(&pidlock);
1391                         return (EINTR);
1392                 }
1393         }
1394         mutex_exit(&pidlock);
1395         return (ECHILD);
1396 }
1397 
1398 int
1399 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1400 {
1401         int error;
1402         k_siginfo_t info;
1403 
1404         if (error = waitid(idtype, id, &info, options))
1405                 return (set_errno(error));
1406         if (copyout(&info, infop, sizeof (k_siginfo_t)))
1407                 return (set_errno(EFAULT));
1408         return (0);
1409 }
1410 
1411 #ifdef _SYSCALL32_IMPL
1412 
1413 int
1414 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1415 {
1416         int error;
1417         k_siginfo_t info;
1418         siginfo32_t info32;
1419 
1420         if (error = waitid(idtype, id, &info, options))
1421                 return (set_errno(error));
1422         siginfo_kto32(&info, &info32);
1423         if (copyout(&info32, infop, sizeof (info32)))
1424                 return (set_errno(EFAULT));
1425         return (0);
1426 }
1427 
1428 #endif  /* _SYSCALL32_IMPL */
1429 
1430 void
1431 proc_detach(proc_t *p)
1432 {
1433         proc_t *q;
1434 
1435         ASSERT(MUTEX_HELD(&pidlock));
1436 
1437         q = p->p_parent;
1438         ASSERT(q != NULL);
1439 
1440         /*
1441          * Take it off the newstate list of its parent
1442          */
1443         delete_ns(q, p);
1444 
1445         if (q->p_child == p) {
1446                 q->p_child = p->p_sibling;
1447                 /*
1448                  * If the parent has no children, it better not
1449                  * have any with new states either!
1450                  */
1451                 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1452         }
1453 
1454         if (p->p_sibling) {
1455                 p->p_sibling->p_psibling = p->p_psibling;
1456         }
1457 
1458         if (p->p_psibling) {
1459                 p->p_psibling->p_sibling = p->p_sibling;
1460         }
1461 }
1462 
1463 /*
1464  * Remove zombie children from the process table.
1465  */
1466 void
1467 freeproc(proc_t *p)
1468 {
1469         proc_t *q;
1470         task_t *tk;
1471 
1472         ASSERT(p->p_stat == SZOMB);
1473         ASSERT(p->p_tlist == NULL);
1474         ASSERT(MUTEX_HELD(&pidlock));
1475 
1476         sigdelq(p, NULL, 0);
1477         if (p->p_killsqp) {
1478                 siginfofree(p->p_killsqp);
1479                 p->p_killsqp = NULL;
1480         }
1481 
1482         /* Clear any remaining brand data */
1483         if (PROC_IS_BRANDED(p)) {
1484                 brand_clearbrand(p, B_FALSE);
1485         }
1486 
1487 
1488         prfree(p);      /* inform /proc */
1489 
1490         /*
1491          * Don't free the init processes.
1492          * Other dying processes will access it.
1493          */
1494         if (p == proc_init)
1495                 return;
1496 
1497 
1498         /*
1499          * We wait until now to free the cred structure because a
1500          * zombie process's credentials may be examined by /proc.
1501          * No cred locking needed because there are no threads at this point.
1502          */
1503         upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1504         crfree(p->p_cred);
1505         if (p->p_corefile != NULL) {
1506                 corectl_path_rele(p->p_corefile);
1507                 p->p_corefile = NULL;
1508         }
1509         if (p->p_content != NULL) {
1510                 corectl_content_rele(p->p_content);
1511                 p->p_content = NULL;
1512         }
1513 
1514         if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1515             (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1516                 /*
1517                  * This should still do the right thing since p_utime/stime
1518                  * get set to the correct value on process exit, so it
1519                  * should get properly updated
1520                  */
1521                 p->p_nextofkin->p_cutime += p->p_utime;
1522                 p->p_nextofkin->p_cstime += p->p_stime;
1523 
1524                 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1525                 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1526                 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1527                 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1528                 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1529                 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1530                 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1531                     += p->p_acct[LMS_USER_LOCK];
1532                 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1533                 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1534                     += p->p_acct[LMS_WAIT_CPU];
1535                 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1536 
1537                 p->p_nextofkin->p_cru.minflt      += p->p_ru.minflt;
1538                 p->p_nextofkin->p_cru.majflt      += p->p_ru.majflt;
1539                 p->p_nextofkin->p_cru.nswap       += p->p_ru.nswap;
1540                 p->p_nextofkin->p_cru.inblock     += p->p_ru.inblock;
1541                 p->p_nextofkin->p_cru.oublock     += p->p_ru.oublock;
1542                 p->p_nextofkin->p_cru.msgsnd      += p->p_ru.msgsnd;
1543                 p->p_nextofkin->p_cru.msgrcv      += p->p_ru.msgrcv;
1544                 p->p_nextofkin->p_cru.nsignals    += p->p_ru.nsignals;
1545                 p->p_nextofkin->p_cru.nvcsw       += p->p_ru.nvcsw;
1546                 p->p_nextofkin->p_cru.nivcsw      += p->p_ru.nivcsw;
1547                 p->p_nextofkin->p_cru.sysc        += p->p_ru.sysc;
1548                 p->p_nextofkin->p_cru.ioch        += p->p_ru.ioch;
1549 
1550         }
1551 
1552         q = p->p_nextofkin;
1553         if (q && q->p_orphan == p)
1554                 q->p_orphan = p->p_nextorph;
1555         else if (q) {
1556                 for (q = q->p_orphan; q; q = q->p_nextorph)
1557                         if (q->p_nextorph == p)
1558                                 break;
1559                 ASSERT(q && q->p_nextorph == p);
1560                 q->p_nextorph = p->p_nextorph;
1561         }
1562 
1563         /*
1564          * The process table slot is being freed, so it is now safe to give up
1565          * task and project membership.
1566          */
1567         mutex_enter(&p->p_lock);
1568         tk = p->p_task;
1569         task_detach(p);
1570         mutex_exit(&p->p_lock);
1571 
1572         proc_detach(p);
1573         pid_exit(p, tk);        /* frees pid and proc structure */
1574 
1575         task_rele(tk);
1576 }
1577 
1578 /*
1579  * Delete process "child" from the newstate list of process "parent"
1580  */
1581 void
1582 delete_ns(proc_t *parent, proc_t *child)
1583 {
1584         proc_t **ns;
1585 
1586         ASSERT(MUTEX_HELD(&pidlock));
1587         ASSERT(child->p_parent == parent);
1588         for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1589                 if (*ns == child) {
1590 
1591                         ASSERT((*ns)->p_parent == parent);
1592 
1593                         *ns = child->p_sibling_ns;
1594                         child->p_sibling_ns = NULL;
1595                         return;
1596                 }
1597         }
1598 }
1599 
1600 /*
1601  * Add process "child" to the new state list of process "parent"
1602  */
1603 void
1604 add_ns(proc_t *parent, proc_t *child)
1605 {
1606         ASSERT(child->p_sibling_ns == NULL);
1607         child->p_sibling_ns = parent->p_child_ns;
1608         parent->p_child_ns = child;
1609 }