1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Joyent, Inc.
  25  * Copyright 2020 Oxide Computer Company
  26  * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/user.h>
  37 #include <sys/errno.h>
  38 #include <sys/proc.h>
  39 #include <sys/ucontext.h>
  40 #include <sys/procfs.h>
  41 #include <sys/vnode.h>
  42 #include <sys/acct.h>
  43 #include <sys/var.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/wait.h>
  47 #include <sys/siginfo.h>
  48 #include <sys/procset.h>
  49 #include <sys/class.h>
  50 #include <sys/file.h>
  51 #include <sys/session.h>
  52 #include <sys/kmem.h>
  53 #include <sys/vtrace.h>
  54 #include <sys/prsystm.h>
  55 #include <sys/ipc.h>
  56 #include <sys/sem_impl.h>
  57 #include <c2/audit.h>
  58 #include <sys/aio_impl.h>
  59 #include <vm/as.h>
  60 #include <sys/poll.h>
  61 #include <sys/door.h>
  62 #include <sys/lwpchan_impl.h>
  63 #include <sys/utrap.h>
  64 #include <sys/task.h>
  65 #include <sys/exacct.h>
  66 #include <sys/cyclic.h>
  67 #include <sys/schedctl.h>
  68 #include <sys/rctl.h>
  69 #include <sys/contract_impl.h>
  70 #include <sys/contract/process_impl.h>
  71 #include <sys/list.h>
  72 #include <sys/dtrace.h>
  73 #include <sys/pool.h>
  74 #include <sys/sdt.h>
  75 #include <sys/corectl.h>
  76 #include <sys/core.h>
  77 #include <sys/brand.h>
  78 #include <sys/libc_kernel.h>
  79 
  80 /*
  81  * convert code/data pair into old style wait status
  82  */
  83 int
  84 wstat(int code, int data)
  85 {
  86         int stat = (data & 0377);
  87 
  88         switch (code) {
  89         case CLD_EXITED:
  90                 stat <<= 8;
  91                 break;
  92         case CLD_DUMPED:
  93                 stat |= WCOREFLG;
  94                 break;
  95         case CLD_KILLED:
  96                 break;
  97         case CLD_TRAPPED:
  98         case CLD_STOPPED:
  99                 stat <<= 8;
 100                 stat |= WSTOPFLG;
 101                 break;
 102         case CLD_CONTINUED:
 103                 stat = WCONTFLG;
 104                 break;
 105         default:
 106                 cmn_err(CE_PANIC, "wstat: bad code");
 107                 /* NOTREACHED */
 108         }
 109         return (stat);
 110 }
 111 
 112 static char *
 113 exit_reason(char *buf, size_t bufsz, int what, int why)
 114 {
 115         switch (why) {
 116         case CLD_EXITED:
 117                 (void) snprintf(buf, bufsz, "exited with status %d", what);
 118                 break;
 119         case CLD_KILLED:
 120                 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
 121                 break;
 122         case CLD_DUMPED:
 123                 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
 124                 break;
 125         default:
 126                 (void) snprintf(buf, bufsz, "encountered unknown error "
 127                     "(%d, %d)", why, what);
 128                 break;
 129         }
 130 
 131         return (buf);
 132 }
 133 
 134 /*
 135  * exit system call: pass back caller's arg.
 136  */
 137 void
 138 rexit(int rval)
 139 {
 140         exit(CLD_EXITED, rval);
 141 }
 142 
 143 /*
 144  * Bump the init_restarts kstat and let interested parties know about the
 145  * restart.
 146  */
 147 static void
 148 restart_init_notify(zone_t *zone)
 149 {
 150         nvlist_t *nvl = NULL;
 151 
 152         zone->zone_proc_init_restarts++;
 153 
 154         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
 155             nvlist_add_uint32(nvl, ZONE_CB_RESTARTS,
 156             zone->zone_proc_init_restarts) == 0) {
 157                 zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS,
 158                     ZONE_EVENT_INIT_RESTART_SC, nvl);
 159         }
 160 
 161         nvlist_free(nvl);
 162 }
 163 
 164 /*
 165  * Called by proc_exit() when a zone's init exits, presumably because
 166  * it failed.  As long as the given zone is still in the "running"
 167  * state, we will re-exec() init, but first we need to reset things
 168  * which are usually inherited across exec() but will break init's
 169  * assumption that it is being exec()'d from a virgin process.  Most
 170  * importantly this includes closing all file descriptors (exec only
 171  * closes those marked close-on-exec) and resetting signals (exec only
 172  * resets handled signals, and we need to clear any signals which
 173  * killed init).  Anything else that exec(2) says would be inherited,
 174  * but would affect the execution of init, needs to be reset.
 175  */
 176 static int
 177 restart_init(int what, int why)
 178 {
 179         kthread_t *t = curthread;
 180         klwp_t *lwp = ttolwp(t);
 181         proc_t *p = ttoproc(t);
 182         proc_t *pp = p->p_zone->zone_zsched;
 183         user_t *up = PTOU(p);
 184 
 185         vnode_t *oldcd, *oldrd;
 186         int i, err;
 187         char reason_buf[64];
 188 
 189         /*
 190          * Let zone admin (and global zone admin if this is for a non-global
 191          * zone) know that init has failed and will be restarted.
 192          */
 193         zcmn_err(p->p_zone->zone_id, CE_WARN,
 194             "init(1M) %s: restarting automatically",
 195             exit_reason(reason_buf, sizeof (reason_buf), what, why));
 196 
 197         if (!INGLOBALZONE(p)) {
 198                 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
 199                     "restarting automatically",
 200                     p->p_zone->zone_name, p->p_pid, reason_buf);
 201         }
 202 
 203         /*
 204          * Remove any fpollinfo_t's for this (last) thread from our file
 205          * descriptors so closeall() can ASSERT() that they're all gone.
 206          * Then close all open file descriptors in the process.
 207          */
 208         pollcleanup();
 209         closeall(P_FINFO(p));
 210 
 211         /*
 212          * Grab p_lock and begin clearing miscellaneous global process
 213          * state that needs to be reset before we exec the new init(1M).
 214          */
 215 
 216         mutex_enter(&p->p_lock);
 217         prbarrier(p);
 218 
 219         p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
 220         up->u_cmask = CMASK;
 221 
 222         sigemptyset(&t->t_hold);
 223         sigemptyset(&t->t_sig);
 224         sigemptyset(&t->t_extsig);
 225 
 226         sigemptyset(&p->p_sig);
 227         sigemptyset(&p->p_extsig);
 228 
 229         sigdelq(p, t, 0);
 230         sigdelq(p, NULL, 0);
 231 
 232         if (p->p_killsqp) {
 233                 siginfofree(p->p_killsqp);
 234                 p->p_killsqp = NULL;
 235         }
 236 
 237         /*
 238          * Reset any signals that are ignored back to the default disposition.
 239          * Other u_signal members will be cleared when exec calls sigdefault().
 240          */
 241         for (i = 1; i < NSIG; i++) {
 242                 if (up->u_signal[i - 1] == SIG_IGN) {
 243                         up->u_signal[i - 1] = SIG_DFL;
 244                         sigemptyset(&up->u_sigmask[i - 1]);
 245                 }
 246         }
 247 
 248         /*
 249          * Clear the current signal, any signal info associated with it, and
 250          * any signal information from contracts and/or contract templates.
 251          */
 252         lwp->lwp_cursig = 0;
 253         lwp->lwp_extsig = 0;
 254         if (lwp->lwp_curinfo != NULL) {
 255                 siginfofree(lwp->lwp_curinfo);
 256                 lwp->lwp_curinfo = NULL;
 257         }
 258         lwp_ctmpl_clear(lwp, B_FALSE);
 259 
 260         /*
 261          * Reset both the process root directory and the current working
 262          * directory to the root of the zone just as we do during boot.
 263          */
 264         VN_HOLD(p->p_zone->zone_rootvp);
 265         oldrd = up->u_rdir;
 266         up->u_rdir = p->p_zone->zone_rootvp;
 267 
 268         VN_HOLD(p->p_zone->zone_rootvp);
 269         oldcd = up->u_cdir;
 270         up->u_cdir = p->p_zone->zone_rootvp;
 271 
 272         if (up->u_cwd != NULL) {
 273                 refstr_rele(up->u_cwd);
 274                 up->u_cwd = NULL;
 275         }
 276 
 277         /* Reset security flags */
 278         mutex_enter(&pp->p_lock);
 279         p->p_secflags = pp->p_secflags;
 280         mutex_exit(&pp->p_lock);
 281 
 282         mutex_exit(&p->p_lock);
 283 
 284         if (oldrd != NULL)
 285                 VN_RELE(oldrd);
 286         if (oldcd != NULL)
 287                 VN_RELE(oldcd);
 288 
 289         /*
 290          * It's possible that a zone's init will have become privilege aware
 291          * and modified privilege sets; reset them.
 292          */
 293         cred_t *oldcr, *newcr;
 294 
 295         mutex_enter(&p->p_crlock);
 296         oldcr = p->p_cred;
 297         mutex_enter(&pp->p_crlock);
 298         crhold(newcr = p->p_cred = pp->p_cred);
 299         mutex_exit(&pp->p_crlock);
 300         mutex_exit(&p->p_crlock);
 301         crfree(oldcr);
 302         /* Additional hold for the current thread - expected by crset() */
 303         crhold(newcr);
 304         crset(p, newcr);
 305 
 306         /* Free the controlling tty.  (freectty() always assumes curproc.) */
 307         ASSERT(p == curproc);
 308         (void) freectty(B_TRUE);
 309 
 310         restart_init_notify(p->p_zone);
 311 
 312         /*
 313          * Now exec() the new init(1M) on top of the current process.  If we
 314          * succeed, the caller will treat this like a successful system call.
 315          * If we fail, we issue messages and the caller will proceed with exit.
 316          */
 317         err = exec_init(p->p_zone->zone_initname, NULL);
 318 
 319         if (err == 0)
 320                 return (0);
 321 
 322         zcmn_err(p->p_zone->zone_id, CE_WARN,
 323             "failed to restart init(1M) (err=%d): system reboot required", err);
 324 
 325         if (!INGLOBALZONE(p)) {
 326                 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
 327                     "(pid %d, err=%d): zoneadm(1M) boot required",
 328                     p->p_zone->zone_name, p->p_pid, err);
 329         }
 330 
 331         return (-1);
 332 }
 333 
 334 /*
 335  * Release resources.
 336  * Enter zombie state.
 337  * Wake up parent and init processes,
 338  * and dispose of children.
 339  */
 340 void
 341 exit(int why, int what)
 342 {
 343         /*
 344          * If proc_exit() fails, then some other lwp in the process
 345          * got there first.  We just have to call lwp_exit() to allow
 346          * the other lwp to finish exiting the process.  Otherwise we're
 347          * restarting init, and should return.
 348          */
 349         if (proc_exit(why, what) != 0) {
 350                 mutex_enter(&curproc->p_lock);
 351                 ASSERT(curproc->p_flag & SEXITLWPS);
 352                 lwp_exit();
 353                 /* NOTREACHED */
 354         }
 355 }
 356 
 357 /*
 358  * Set the SEXITING flag on the process, after making sure /proc does
 359  * not have it locked.  This is done in more places than proc_exit(),
 360  * so it is a separate function.
 361  */
 362 void
 363 proc_is_exiting(proc_t *p)
 364 {
 365         mutex_enter(&p->p_lock);
 366         prbarrier(p);
 367         p->p_flag |= SEXITING;
 368         mutex_exit(&p->p_lock);
 369 }
 370 
 371 /*
 372  * Return true if zone's init is restarted, false if exit processing should
 373  * proceeed.
 374  */
 375 static boolean_t
 376 zone_init_exit(zone_t *z, int why, int what)
 377 {
 378         /*
 379          * Typically we don't let the zone's init exit unless zone_start_init()
 380          * failed its exec, or we are shutting down the zone or the machine,
 381          * although the various flags handled within this function will control
 382          * the behavior.
 383          *
 384          * Since we are single threaded, we don't need to lock the following
 385          * accesses to zone_proc_initpid.
 386          */
 387         if (z->zone_boot_err != 0 ||
 388             zone_status_get(z) >= ZONE_IS_SHUTTING_DOWN ||
 389             zone_status_get(global_zone) >= ZONE_IS_SHUTTING_DOWN) {
 390                 /*
 391                  * Clear the zone's init pid and proceed with exit processing.
 392                  */
 393                 z->zone_proc_initpid = -1;
 394                 return (B_FALSE);
 395         }
 396 
 397         /*
 398          * There are a variety of configuration flags on the zone to control
 399          * init exit behavior.
 400          *
 401          * If the init process should be restarted, the "zone_restart_init"
 402          * member will be set.
 403          */
 404         if (!z->zone_restart_init) {
 405                 /*
 406                  * The zone has been setup to halt when init exits.
 407                  */
 408                 z->zone_init_status = wstat(why, what);
 409                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
 410                 z->zone_proc_initpid = -1;
 411                 return (B_FALSE);
 412         }
 413 
 414         /*
 415          * At this point we know we're configured to restart init, but there
 416          * are various modifiers to that behavior.
 417          */
 418 
 419         if (z->zone_reboot_on_init_exit) {
 420                 /*
 421                  * Some init programs in branded zones do not tolerate a
 422                  * restart in the traditional manner; setting
 423                  * "zone_reboot_on_init_exit" will cause the entire zone to be
 424                  * rebooted instead.
 425                  */
 426 
 427                 if (z->zone_restart_init_0) {
 428                         /*
 429                          * Some init programs in branded zones only want to
 430                          * restart if they exit 0, otherwise the zone should
 431                          * shutdown. Setting the "zone_restart_init_0" member
 432                          * controls this behavior.
 433                          */
 434                         if (why == CLD_EXITED && what == 0) {
 435                                 /* Trigger a zone reboot */
 436                                 (void) zone_kadmin(A_REBOOT, 0, NULL,
 437                                     zone_kcred());
 438                         } else {
 439                                 /* Shutdown instead of reboot */
 440                                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
 441                                     zone_kcred());
 442                         }
 443                 } else {
 444                         /* Trigger a zone reboot */
 445                         (void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred());
 446                 }
 447 
 448                 z->zone_init_status = wstat(why, what);
 449                 z->zone_proc_initpid = -1;
 450                 return (B_FALSE);
 451         }
 452 
 453         if (z->zone_restart_init_0) {
 454                 /*
 455                  * Some init programs in branded zones only want to restart if
 456                  * they exit 0, otherwise the zone should shutdown. Setting the
 457                  * "zone_restart_init_0" member controls this behavior.
 458                  *
 459                  * In this case we only restart init if it exited successfully.
 460                  */
 461                 if (why == CLD_EXITED && what == 0 &&
 462                     restart_init(what, why) == 0) {
 463                         return (B_TRUE);
 464                 }
 465         } else {
 466                 /*
 467                  * No restart modifiers on the zone, attempt to restart init.
 468                  */
 469                 if (restart_init(what, why) == 0) {
 470                         return (B_TRUE);
 471                 }
 472         }
 473 
 474         /*
 475          * The restart failed, or the criteria for a restart are not met;
 476          * the zone will shut down.
 477          */
 478         z->zone_init_status = wstat(why, what);
 479         (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
 480         z->zone_proc_initpid = -1;
 481         return (B_FALSE);
 482 }
 483 
 484 /*
 485  * Return value:
 486  *   1 - exitlwps() failed, call (or continue) lwp_exit()
 487  *   0 - restarting init.  Return through system call path
 488  */
 489 int
 490 proc_exit(int why, int what)
 491 {
 492         kthread_t *t = curthread;
 493         klwp_t *lwp = ttolwp(t);
 494         proc_t *p = ttoproc(t);
 495         zone_t *z = p->p_zone;
 496         timeout_id_t tmp_id;
 497         int rv;
 498         proc_t *q;
 499         task_t *tk;
 500         vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
 501         sigqueue_t *sqp;
 502         lwpdir_t *lwpdir;
 503         uint_t lwpdir_sz;
 504         tidhash_t *tidhash;
 505         uint_t tidhash_sz;
 506         ret_tidhash_t *ret_tidhash;
 507         refstr_t *cwd;
 508         hrtime_t hrutime, hrstime;
 509         int evaporate;
 510 
 511         /*
 512          * Stop and discard the process's lwps except for the current one,
 513          * unless some other lwp beat us to it.  If exitlwps() fails then
 514          * return and the calling lwp will call (or continue in) lwp_exit().
 515          */
 516         proc_is_exiting(p);
 517         if (exitlwps(0) != 0)
 518                 return (1);
 519 
 520         mutex_enter(&p->p_lock);
 521         if (p->p_ttime > 0) {
 522                 /*
 523                  * Account any remaining ticks charged to this process
 524                  * on its way out.
 525                  */
 526                 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 527                 p->p_ttime = 0;
 528         }
 529         mutex_exit(&p->p_lock);
 530 
 531         /*
 532          * Don't let init exit unless zone_start_init() failed its exec, or
 533          * we are shutting down the zone or the machine.
 534          *
 535          * Since we are single threaded, we don't need to lock the
 536          * following accesses to zone_proc_initpid.
 537          */
 538         if (p->p_pid == z->zone_proc_initpid) {
 539                 /* If zone's init restarts, we're done here. */
 540                 if (zone_init_exit(z, why, what))
 541                         return (0);
 542         }
 543 
 544         /*
 545          * Delay firing probes (and performing brand cleanup) until after the
 546          * zone_proc_initpid check. Cases which result in zone shutdown or
 547          * restart via zone_kadmin eventually result in a call back to
 548          * proc_exit.
 549          */
 550         DTRACE_PROC(lwp__exit);
 551         DTRACE_PROC1(exit, int, why);
 552 
 553         /*
 554          * Will perform any brand specific proc exit processing. Since this
 555          * is always the last lwp, will also perform lwp exit/free and proc
 556          * exit. Brand data will be freed when the process is reaped.
 557          */
 558         if (PROC_IS_BRANDED(p)) {
 559                 BROP(p)->b_lwpexit(lwp);
 560                 BROP(p)->b_proc_exit(p);
 561                 /*
 562                  * To ensure that b_proc_exit has access to brand-specific data
 563                  * contained by the one remaining lwp, call the freelwp hook as
 564                  * the last part of this clean-up process.
 565                  */
 566                 BROP(p)->b_freelwp(lwp);
 567                 lwp_detach_brand_hdlrs(lwp);
 568         }
 569 
 570         lwp_pcb_exit();
 571 
 572         /*
 573          * Allocate a sigqueue now, before we grab locks.
 574          * It will be given to sigcld(), below.
 575          * Special case:  If we will be making the process disappear
 576          * without a trace because it is either:
 577          *      * an exiting SSYS process, or
 578          *      * a posix_spawn() vfork child who requests it,
 579          * we don't bother to allocate a useless sigqueue.
 580          */
 581         evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
 582             why == CLD_EXITED && what == _EVAPORATE);
 583         if (!evaporate)
 584                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 585 
 586         /*
 587          * revoke any doors created by the process.
 588          */
 589         if (p->p_door_list)
 590                 door_exit();
 591 
 592         /*
 593          * Release schedctl data structures.
 594          */
 595         if (p->p_pagep)
 596                 schedctl_proc_cleanup();
 597 
 598         /*
 599          * make sure all pending kaio has completed.
 600          */
 601         if (p->p_aio)
 602                 aio_cleanup_exit();
 603 
 604         /*
 605          * discard the lwpchan cache.
 606          */
 607         if (p->p_lcp != NULL)
 608                 lwpchan_destroy_cache(0);
 609 
 610         /*
 611          * Clean up any DTrace helper actions or probes for the process.
 612          */
 613         if (p->p_dtrace_helpers != NULL) {
 614                 ASSERT(dtrace_helpers_cleanup != NULL);
 615                 (*dtrace_helpers_cleanup)(p);
 616         }
 617 
 618         /*
 619          * Clean up any signalfd state for the process.
 620          */
 621         if (p->p_sigfd != NULL) {
 622                 VERIFY(sigfd_exit_helper != NULL);
 623                 (*sigfd_exit_helper)();
 624         }
 625 
 626         /* untimeout the realtime timers */
 627         if (p->p_itimer != NULL)
 628                 timer_exit();
 629 
 630         if ((tmp_id = p->p_alarmid) != 0) {
 631                 p->p_alarmid = 0;
 632                 (void) untimeout(tmp_id);
 633         }
 634 
 635         /*
 636          * If we had generated any upanic(2) state, free that now.
 637          */
 638         if (p->p_upanic != NULL) {
 639                 kmem_free(p->p_upanic, PRUPANIC_BUFLEN);
 640                 p->p_upanic = NULL;
 641         }
 642 
 643         /*
 644          * Remove any fpollinfo_t's for this (last) thread from our file
 645          * descriptors so closeall() can ASSERT() that they're all gone.
 646          */
 647         pollcleanup();
 648 
 649         if (p->p_rprof_cyclic != CYCLIC_NONE) {
 650                 mutex_enter(&cpu_lock);
 651                 cyclic_remove(p->p_rprof_cyclic);
 652                 mutex_exit(&cpu_lock);
 653         }
 654 
 655         mutex_enter(&p->p_lock);
 656 
 657         /*
 658          * Clean up any DTrace probes associated with this process.
 659          */
 660         if (p->p_dtrace_probes) {
 661                 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
 662                 dtrace_fasttrap_exit_ptr(p);
 663         }
 664 
 665         while ((tmp_id = p->p_itimerid) != 0) {
 666                 p->p_itimerid = 0;
 667                 mutex_exit(&p->p_lock);
 668                 (void) untimeout(tmp_id);
 669                 mutex_enter(&p->p_lock);
 670         }
 671 
 672         lwp_cleanup();
 673 
 674         /*
 675          * We are about to exit; prevent our resource associations from
 676          * being changed.
 677          */
 678         pool_barrier_enter();
 679 
 680         /*
 681          * Block the process against /proc now that we have really
 682          * acquired p->p_lock (to manipulate p_tlist at least).
 683          */
 684         prbarrier(p);
 685 
 686         sigfillset(&p->p_ignore);
 687         sigemptyset(&p->p_siginfo);
 688         sigemptyset(&p->p_sig);
 689         sigemptyset(&p->p_extsig);
 690         sigemptyset(&t->t_sig);
 691         sigemptyset(&t->t_extsig);
 692         sigemptyset(&p->p_sigmask);
 693         sigdelq(p, t, 0);
 694         lwp->lwp_cursig = 0;
 695         lwp->lwp_extsig = 0;
 696         p->p_flag &= ~(SKILLED | SEXTKILLED);
 697         if (lwp->lwp_curinfo) {
 698                 siginfofree(lwp->lwp_curinfo);
 699                 lwp->lwp_curinfo = NULL;
 700         }
 701 
 702         t->t_proc_flag |= TP_LWPEXIT;
 703         ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
 704         prlwpexit(t);           /* notify /proc */
 705         lwp_hash_out(p, t->t_tid);
 706         prexit(p);
 707 
 708         p->p_lwpcnt = 0;
 709         p->p_tlist = NULL;
 710         sigqfree(p);
 711         term_mstate(t);
 712         p->p_mterm = gethrtime();
 713 
 714         exec_vp = p->p_exec;
 715         execdir_vp = p->p_execdir;
 716         p->p_exec = NULLVP;
 717         p->p_execdir = NULLVP;
 718         mutex_exit(&p->p_lock);
 719 
 720         pr_free_watched_pages(p);
 721 
 722         closeall(P_FINFO(p));
 723 
 724         /* Free the controlling tty.  (freectty() always assumes curproc.) */
 725         ASSERT(p == curproc);
 726         (void) freectty(B_TRUE);
 727 
 728 #if defined(__sparc)
 729         if (p->p_utraps != NULL)
 730                 utrap_free(p);
 731 #endif
 732         if (p->p_semacct)                    /* IPC semaphore exit */
 733                 semexit(p);
 734         rv = wstat(why, what);
 735 
 736         acct(rv);
 737         exacct_commit_proc(p, rv);
 738 
 739         /*
 740          * Release any resources associated with C2 auditing
 741          */
 742         if (AU_AUDITING()) {
 743                 /*
 744                  * audit exit system call
 745                  */
 746                 audit_exit(why, what);
 747         }
 748 
 749         /*
 750          * Free address space.
 751          */
 752         relvm();
 753 
 754         if (exec_vp) {
 755                 /*
 756                  * Close this executable which has been opened when the process
 757                  * was created by getproc().
 758                  */
 759                 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
 760                 VN_RELE(exec_vp);
 761         }
 762         if (execdir_vp)
 763                 VN_RELE(execdir_vp);
 764 
 765         /*
 766          * Release held contracts.
 767          */
 768         contract_exit(p);
 769 
 770         /*
 771          * Depart our encapsulating process contract.
 772          */
 773         if ((p->p_flag & SSYS) == 0) {
 774                 ASSERT(p->p_ct_process);
 775                 contract_process_exit(p->p_ct_process, p, rv);
 776         }
 777 
 778         /*
 779          * Remove pool association, and block if requested by pool_do_bind.
 780          */
 781         mutex_enter(&p->p_lock);
 782         ASSERT(p->p_pool->pool_ref > 0);
 783         atomic_dec_32(&p->p_pool->pool_ref);
 784         p->p_pool = pool_default;
 785         /*
 786          * Now that our address space has been freed and all other threads
 787          * in this process have exited, set the PEXITED pool flag.  This
 788          * tells the pools subsystems to ignore this process if it was
 789          * requested to rebind this process to a new pool.
 790          */
 791         p->p_poolflag |= PEXITED;
 792         pool_barrier_exit();
 793         mutex_exit(&p->p_lock);
 794 
 795         mutex_enter(&pidlock);
 796 
 797         /*
 798          * Delete this process from the newstate list of its parent. We
 799          * will put it in the right place in the sigcld in the end.
 800          */
 801         delete_ns(p->p_parent, p);
 802 
 803         /*
 804          * Reassign the orphans to the next of kin.
 805          * Don't rearrange init's orphanage.
 806          */
 807         if ((q = p->p_orphan) != NULL && p != proc_init) {
 808 
 809                 proc_t *nokp = p->p_nextofkin;
 810 
 811                 for (;;) {
 812                         q->p_nextofkin = nokp;
 813                         if (q->p_nextorph == NULL)
 814                                 break;
 815                         q = q->p_nextorph;
 816                 }
 817                 q->p_nextorph = nokp->p_orphan;
 818                 nokp->p_orphan = p->p_orphan;
 819                 p->p_orphan = NULL;
 820         }
 821 
 822         /*
 823          * Reassign the children to init.
 824          * Don't try to assign init's children to init.
 825          */
 826         if ((q = p->p_child) != NULL && p != proc_init) {
 827                 struct proc     *np;
 828                 struct proc     *initp = proc_init;
 829                 pid_t           zone_initpid = 1;
 830                 struct proc     *zoneinitp = NULL;
 831                 boolean_t       setzonetop = B_FALSE;
 832 
 833                 if (!INGLOBALZONE(curproc)) {
 834                         zone_initpid = curproc->p_zone->zone_proc_initpid;
 835 
 836                         ASSERT(MUTEX_HELD(&pidlock));
 837                         zoneinitp = prfind(zone_initpid);
 838                         if (zoneinitp != NULL) {
 839                                 initp = zoneinitp;
 840                         } else {
 841                                 zone_initpid = 1;
 842                                 setzonetop = B_TRUE;
 843                         }
 844                 }
 845 
 846                 pgdetach(p);
 847 
 848                 do {
 849                         np = q->p_sibling;
 850                         /*
 851                          * Delete it from its current parent new state
 852                          * list and add it to init new state list
 853                          */
 854                         delete_ns(q->p_parent, q);
 855 
 856                         q->p_ppid = zone_initpid;
 857 
 858                         q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
 859                         if (setzonetop) {
 860                                 mutex_enter(&q->p_lock);
 861                                 q->p_flag |= SZONETOP;
 862                                 mutex_exit(&q->p_lock);
 863                         }
 864                         q->p_parent = initp;
 865 
 866                         /*
 867                          * Since q will be the first child,
 868                          * it will not have a previous sibling.
 869                          */
 870                         q->p_psibling = NULL;
 871                         if (initp->p_child) {
 872                                 initp->p_child->p_psibling = q;
 873                         }
 874                         q->p_sibling = initp->p_child;
 875                         initp->p_child = q;
 876                         if (q->p_proc_flag & P_PR_PTRACE) {
 877                                 mutex_enter(&q->p_lock);
 878                                 sigtoproc(q, NULL, SIGKILL);
 879                                 mutex_exit(&q->p_lock);
 880                         }
 881                         /*
 882                          * sigcld() will add the child to parents
 883                          * newstate list.
 884                          */
 885                         if (q->p_stat == SZOMB)
 886                                 sigcld(q, NULL);
 887                 } while ((q = np) != NULL);
 888 
 889                 p->p_child = NULL;
 890                 ASSERT(p->p_child_ns == NULL);
 891         }
 892 
 893         TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
 894 
 895         mutex_enter(&p->p_lock);
 896         CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
 897 
 898         /*
 899          * Have our task accummulate our resource usage data before they
 900          * become contaminated by p_cacct etc., and before we renounce
 901          * membership of the task.
 902          *
 903          * We do this regardless of whether or not task accounting is active.
 904          * This is to avoid having nonsense data reported for this task if
 905          * task accounting is subsequently enabled. The overhead is minimal;
 906          * by this point, this process has accounted for the usage of all its
 907          * LWPs. We nonetheless do the work here, and under the protection of
 908          * pidlock, so that the movement of the process's usage to the task
 909          * happens at the same time as the removal of the process from the
 910          * task, from the point of view of exacct_snapshot_task_usage().
 911          */
 912         exacct_update_task_mstate(p);
 913 
 914         hrutime = mstate_aggr_state(p, LMS_USER);
 915         hrstime = mstate_aggr_state(p, LMS_SYSTEM);
 916         p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
 917         p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
 918 
 919         p->p_acct[LMS_USER]  += p->p_cacct[LMS_USER];
 920         p->p_acct[LMS_SYSTEM]        += p->p_cacct[LMS_SYSTEM];
 921         p->p_acct[LMS_TRAP]  += p->p_cacct[LMS_TRAP];
 922         p->p_acct[LMS_TFAULT]        += p->p_cacct[LMS_TFAULT];
 923         p->p_acct[LMS_DFAULT]        += p->p_cacct[LMS_DFAULT];
 924         p->p_acct[LMS_KFAULT]        += p->p_cacct[LMS_KFAULT];
 925         p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
 926         p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
 927         p->p_acct[LMS_WAIT_CPU]      += p->p_cacct[LMS_WAIT_CPU];
 928         p->p_acct[LMS_STOPPED]       += p->p_cacct[LMS_STOPPED];
 929 
 930         p->p_ru.minflt       += p->p_cru.minflt;
 931         p->p_ru.majflt       += p->p_cru.majflt;
 932         p->p_ru.nswap        += p->p_cru.nswap;
 933         p->p_ru.inblock      += p->p_cru.inblock;
 934         p->p_ru.oublock      += p->p_cru.oublock;
 935         p->p_ru.msgsnd       += p->p_cru.msgsnd;
 936         p->p_ru.msgrcv       += p->p_cru.msgrcv;
 937         p->p_ru.nsignals += p->p_cru.nsignals;
 938         p->p_ru.nvcsw        += p->p_cru.nvcsw;
 939         p->p_ru.nivcsw       += p->p_cru.nivcsw;
 940         p->p_ru.sysc += p->p_cru.sysc;
 941         p->p_ru.ioch += p->p_cru.ioch;
 942 
 943         p->p_stat = SZOMB;
 944         p->p_proc_flag &= ~P_PR_PTRACE;
 945         p->p_wdata = what;
 946         p->p_wcode = (char)why;
 947 
 948         cdir = PTOU(p)->u_cdir;
 949         rdir = PTOU(p)->u_rdir;
 950         cwd = PTOU(p)->u_cwd;
 951 
 952         ASSERT(cdir != NULL || p->p_parent == &p0);
 953 
 954         /*
 955          * Release resource controls, as they are no longer enforceable.
 956          */
 957         rctl_set_free(p->p_rctls);
 958 
 959         /*
 960          * Decrement tk_nlwps counter for our task.max-lwps resource control.
 961          * An extended accounting record, if that facility is active, is
 962          * scheduled to be written.  We cannot give up task and project
 963          * membership at this point because that would allow zombies to escape
 964          * from the max-processes resource controls.  Zombies stay in their
 965          * current task and project until the process table slot is released
 966          * in freeproc().
 967          */
 968         tk = p->p_task;
 969 
 970         mutex_enter(&p->p_zone->zone_nlwps_lock);
 971         tk->tk_nlwps--;
 972         tk->tk_proj->kpj_nlwps--;
 973         p->p_zone->zone_nlwps--;
 974         mutex_exit(&p->p_zone->zone_nlwps_lock);
 975 
 976         /*
 977          * Clear the lwp directory and the lwpid hash table
 978          * now that /proc can't bother us any more.
 979          * We free the memory below, after dropping p->p_lock.
 980          */
 981         lwpdir = p->p_lwpdir;
 982         lwpdir_sz = p->p_lwpdir_sz;
 983         tidhash = p->p_tidhash;
 984         tidhash_sz = p->p_tidhash_sz;
 985         ret_tidhash = p->p_ret_tidhash;
 986         p->p_lwpdir = NULL;
 987         p->p_lwpfree = NULL;
 988         p->p_lwpdir_sz = 0;
 989         p->p_tidhash = NULL;
 990         p->p_tidhash_sz = 0;
 991         p->p_ret_tidhash = NULL;
 992 
 993         /*
 994          * If the process has context ops installed, call the exit routine
 995          * on behalf of this last remaining thread. Normally exitpctx() is
 996          * called during thread_exit() or lwp_exit(), but because this is the
 997          * last thread in the process, we must call it here. By the time
 998          * thread_exit() is called (below), the association with the relevant
 999          * process has been lost.
1000          *
1001          * We also free the context here.
1002          */
1003         if (p->p_pctx) {
1004                 kpreempt_disable();
1005                 exitpctx(p);
1006                 kpreempt_enable();
1007 
1008                 freepctx(p, 0);
1009         }
1010 
1011         /*
1012          * curthread's proc pointer is changed to point to the 'sched'
1013          * process for the corresponding zone, except in the case when
1014          * the exiting process is in fact a zsched instance, in which
1015          * case the proc pointer is set to p0.  We do so, so that the
1016          * process still points at the right zone when we call the VN_RELE()
1017          * below.
1018          *
1019          * This is because curthread's original proc pointer can be freed as
1020          * soon as the child sends a SIGCLD to its parent.  We use zsched so
1021          * that for user processes, even in the final moments of death, the
1022          * process is still associated with its zone.
1023          */
1024         if (p != t->t_procp->p_zone->zone_zsched)
1025                 t->t_procp = t->t_procp->p_zone->zone_zsched;
1026         else
1027                 t->t_procp = &p0;
1028 
1029         mutex_exit(&p->p_lock);
1030         if (!evaporate) {
1031                 /*
1032                  * The brand specific code only happens when the brand has a
1033                  * function to call in place of sigcld and the parent of the
1034                  * exiting process is not the global zone init. If the parent
1035                  * is the global zone init, then the process was reparented,
1036                  * and we don't want brand code delivering possibly strange
1037                  * signals to init. Also, init is not branded, so any brand
1038                  * specific exit data will not be picked up by init anyway.
1039                  */
1040                 if (PROC_IS_BRANDED(p) &&
1041                     BROP(p)->b_exit_with_sig != NULL &&
1042                     p->p_ppid != 1) {
1043                         /*
1044                          * The code for _fini that could unload the brand_t
1045                          * blocks until the count of zones using the module
1046                          * reaches zero. Zones decrement the refcount on their
1047                          * brands only after all user tasks in that zone have
1048                          * exited and been waited on. The decrement on the
1049                          * brand's refcount happen in zone_destroy(). That
1050                          * depends on zone_shutdown() having been completed.
1051                          * zone_shutdown() includes a call to zone_empty(),
1052                          * where the zone waits for itself to reach the state
1053                          * ZONE_IS_EMPTY. This state is only set in either
1054                          * zone_shutdown(), when there are no user processes as
1055                          * the zone enters this function, or in
1056                          * zone_task_rele(). zone_task_rele() is called from
1057                          * code triggered by waiting on processes, not by the
1058                          * processes exiting through proc_exit().  This means
1059                          * all the branded processes that could exist for a
1060                          * specific brand_t must exit and get reaped before the
1061                          * refcount on the brand_t can reach 0. _fini will
1062                          * never unload the corresponding brand module before
1063                          * proc_exit finishes execution for all processes
1064                          * branded with a particular brand_t, which makes the
1065                          * operation below safe to do. Brands that wish to use
1066                          * this mechanism must wait in _fini as described
1067                          * above.
1068                          */
1069                         BROP(p)->b_exit_with_sig(p, sqp);
1070                 } else {
1071                         p->p_pidflag &= ~CLDPEND;
1072                         sigcld(p, sqp);
1073                 }
1074 
1075         } else {
1076                 /*
1077                  * Do what sigcld() would do if the disposition
1078                  * of the SIGCHLD signal were set to be ignored.
1079                  */
1080                 cv_broadcast(&p->p_srwchan_cv);
1081                 freeproc(p);
1082         }
1083         mutex_exit(&pidlock);
1084 
1085         /*
1086          * We don't release u_cdir and u_rdir until SZOMB is set.
1087          * This protects us against dofusers().
1088          */
1089         if (cdir)
1090                 VN_RELE(cdir);
1091         if (rdir)
1092                 VN_RELE(rdir);
1093         if (cwd)
1094                 refstr_rele(cwd);
1095 
1096         /*
1097          * task_rele() may ultimately cause the zone to go away (or
1098          * may cause the last user process in a zone to go away, which
1099          * signals zsched to go away).  So prior to this call, we must
1100          * no longer point at zsched.
1101          */
1102         t->t_procp = &p0;
1103 
1104         kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
1105         kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
1106         while (ret_tidhash != NULL) {
1107                 ret_tidhash_t *next = ret_tidhash->rth_next;
1108                 kmem_free(ret_tidhash->rth_tidhash,
1109                     ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
1110                 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
1111                 ret_tidhash = next;
1112         }
1113 
1114         thread_exit();
1115         /* NOTREACHED */
1116 }
1117 
1118 /*
1119  * Format siginfo structure for wait system calls.
1120  */
1121 void
1122 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
1123 {
1124         ASSERT(MUTEX_HELD(&pidlock));
1125 
1126         bzero(ip, sizeof (k_siginfo_t));
1127         ip->si_signo = SIGCLD;
1128         ip->si_code = pp->p_wcode;
1129         ip->si_pid = pp->p_pid;
1130         ip->si_ctid = PRCTID(pp);
1131         ip->si_zoneid = pp->p_zone->zone_id;
1132         ip->si_status = pp->p_wdata;
1133         ip->si_stime = pp->p_stime;
1134         ip->si_utime = pp->p_utime;
1135 
1136         if (waitflag) {
1137                 pp->p_wcode = 0;
1138                 pp->p_wdata = 0;
1139                 pp->p_pidflag &= ~CLDPEND;
1140         }
1141 }
1142 
1143 /*
1144  * Wait system call.
1145  * Search for a terminated (zombie) child,
1146  * finally lay it to rest, and collect its status.
1147  * Look also for stopped children,
1148  * and pass back status from them.
1149  */
1150 int
1151 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1152 {
1153         proc_t *cp, *pp;
1154         int waitflag = !(options & WNOWAIT);
1155         boolean_t have_brand_helper = B_FALSE;
1156 
1157         /*
1158          * Obsolete flag, defined here only for binary compatibility
1159          * with old statically linked executables.  Delete this when
1160          * we no longer care about these old and broken applications.
1161          */
1162 #define _WNOCHLD        0400
1163         options &= ~_WNOCHLD;
1164 
1165         if (options == 0 || (options & ~WOPTMASK))
1166                 return (EINVAL);
1167 
1168         switch (idtype) {
1169         case P_PID:
1170         case P_PGID:
1171                 if (id < 0 || id >= maxpid)
1172                         return (EINVAL);
1173                 /* FALLTHROUGH */
1174         case P_ALL:
1175                 break;
1176         default:
1177                 return (EINVAL);
1178         }
1179 
1180         pp = ttoproc(curthread);
1181 
1182         /*
1183          * Anytime you are looking for a process, you take pidlock to prevent
1184          * things from changing as you look.
1185          */
1186         mutex_enter(&pidlock);
1187 
1188         /*
1189          * if we are only looking for exited processes and child_ns list
1190          * is empty no reason to look at all children.
1191          */
1192         if (idtype == P_ALL &&
1193             (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1194             pp->p_child_ns == NULL) {
1195                 if (pp->p_child) {
1196                         mutex_exit(&pidlock);
1197                         bzero(ip, sizeof (k_siginfo_t));
1198                         return (0);
1199                 }
1200                 mutex_exit(&pidlock);
1201                 return (ECHILD);
1202         }
1203 
1204         if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1205                 have_brand_helper = B_TRUE;
1206         }
1207 
1208         while (pp->p_child != NULL || have_brand_helper) {
1209                 boolean_t brand_wants_wait = B_FALSE;
1210                 int proc_gone = 0;
1211                 int found = 0;
1212 
1213                 /*
1214                  * Give the brand a chance to return synthetic results from
1215                  * this waitid() call before we do the real thing.
1216                  */
1217                 if (have_brand_helper) {
1218                         int ret;
1219 
1220                         if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1221                             &brand_wants_wait, &ret) == 0) {
1222                                 mutex_exit(&pidlock);
1223                                 return (ret);
1224                         }
1225 
1226                         if (pp->p_child == NULL) {
1227                                 goto no_real_children;
1228                         }
1229                 }
1230 
1231                 /*
1232                  * Look for interesting children in the newstate list.
1233                  */
1234                 VERIFY(pp->p_child != NULL);
1235                 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1236                         if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1237                                 continue;
1238                         if (idtype == P_PID && id != cp->p_pid)
1239                                 continue;
1240                         if (idtype == P_PGID && id != cp->p_pgrp)
1241                                 continue;
1242                         if (PROC_IS_BRANDED(pp)) {
1243                                 if (BROP(pp)->b_wait_filter != NULL &&
1244                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1245                                         continue;
1246                         }
1247 
1248                         switch (cp->p_wcode) {
1249 
1250                         case CLD_TRAPPED:
1251                         case CLD_STOPPED:
1252                         case CLD_CONTINUED:
1253                                 cmn_err(CE_PANIC,
1254                                     "waitid: wrong state %d on the p_newstate"
1255                                     " list", cp->p_wcode);
1256                                 break;
1257 
1258                         case CLD_EXITED:
1259                         case CLD_DUMPED:
1260                         case CLD_KILLED:
1261                                 if (!(options & WEXITED)) {
1262                                         /*
1263                                          * Count how many are already gone
1264                                          * for good.
1265                                          */
1266                                         proc_gone++;
1267                                         break;
1268                                 }
1269                                 if (!waitflag) {
1270                                         winfo(cp, ip, 0);
1271                                 } else {
1272                                         winfo(cp, ip, 1);
1273                                         freeproc(cp);
1274                                 }
1275                                 mutex_exit(&pidlock);
1276                                 if (waitflag) {         /* accept SIGCLD */
1277                                         sigcld_delete(ip);
1278                                         sigcld_repost();
1279                                 }
1280                                 return (0);
1281                         }
1282 
1283                         if (idtype == P_PID)
1284                                 break;
1285                 }
1286 
1287                 /*
1288                  * Wow! None of the threads on the p_sibling_ns list were
1289                  * interesting threads. Check all the kids!
1290                  */
1291                 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1292                         if (idtype == P_PID && id != cp->p_pid)
1293                                 continue;
1294                         if (idtype == P_PGID && id != cp->p_pgrp)
1295                                 continue;
1296                         if (PROC_IS_BRANDED(pp)) {
1297                                 if (BROP(pp)->b_wait_filter != NULL &&
1298                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1299                                         continue;
1300                         }
1301 
1302                         switch (cp->p_wcode) {
1303                         case CLD_TRAPPED:
1304                                 if (!(options & WTRAPPED))
1305                                         break;
1306                                 winfo(cp, ip, waitflag);
1307                                 mutex_exit(&pidlock);
1308                                 if (waitflag) {         /* accept SIGCLD */
1309                                         sigcld_delete(ip);
1310                                         sigcld_repost();
1311                                 }
1312                                 return (0);
1313 
1314                         case CLD_STOPPED:
1315                                 if (!(options & WSTOPPED))
1316                                         break;
1317                                 /* Is it still stopped? */
1318                                 mutex_enter(&cp->p_lock);
1319                                 if (!jobstopped(cp)) {
1320                                         mutex_exit(&cp->p_lock);
1321                                         break;
1322                                 }
1323                                 mutex_exit(&cp->p_lock);
1324                                 winfo(cp, ip, waitflag);
1325                                 mutex_exit(&pidlock);
1326                                 if (waitflag) {         /* accept SIGCLD */
1327                                         sigcld_delete(ip);
1328                                         sigcld_repost();
1329                                 }
1330                                 return (0);
1331 
1332                         case CLD_CONTINUED:
1333                                 if (!(options & WCONTINUED))
1334                                         break;
1335                                 winfo(cp, ip, waitflag);
1336                                 mutex_exit(&pidlock);
1337                                 if (waitflag) {         /* accept SIGCLD */
1338                                         sigcld_delete(ip);
1339                                         sigcld_repost();
1340                                 }
1341                                 return (0);
1342 
1343                         case CLD_EXITED:
1344                         case CLD_DUMPED:
1345                         case CLD_KILLED:
1346                                 if (idtype != P_PID &&
1347                                     (cp->p_pidflag & CLDWAITPID))
1348                                         continue;
1349                                 /*
1350                                  * Don't complain if a process was found in
1351                                  * the first loop but we broke out of the loop
1352                                  * because of the arguments passed to us.
1353                                  */
1354                                 if (proc_gone == 0) {
1355                                         cmn_err(CE_PANIC,
1356                                             "waitid: wrong state on the"
1357                                             " p_child list");
1358                                 } else {
1359                                         break;
1360                                 }
1361                         }
1362 
1363                         found++;
1364 
1365                         if (idtype == P_PID)
1366                                 break;
1367                 }
1368 
1369 no_real_children:
1370                 /*
1371                  * If we found no interesting processes at all,
1372                  * break out and return ECHILD.
1373                  */
1374                 if (!brand_wants_wait && (found + proc_gone == 0))
1375                         break;
1376 
1377                 if (options & WNOHANG) {
1378                         mutex_exit(&pidlock);
1379                         bzero(ip, sizeof (k_siginfo_t));
1380                         /*
1381                          * We should set ip->si_signo = SIGCLD,
1382                          * but there is an SVVS test that expects
1383                          * ip->si_signo to be zero in this case.
1384                          */
1385                         return (0);
1386                 }
1387 
1388                 /*
1389                  * If we found no processes of interest that could
1390                  * change state while we wait, we don't wait at all.
1391                  * Get out with ECHILD according to SVID.
1392                  */
1393                 if (!brand_wants_wait && (found == proc_gone))
1394                         break;
1395 
1396                 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1397                         mutex_exit(&pidlock);
1398                         return (EINTR);
1399                 }
1400         }
1401         mutex_exit(&pidlock);
1402         return (ECHILD);
1403 }
1404 
1405 int
1406 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1407 {
1408         int error;
1409         k_siginfo_t info;
1410 
1411         if (error = waitid(idtype, id, &info, options))
1412                 return (set_errno(error));
1413         if (copyout(&info, infop, sizeof (k_siginfo_t)))
1414                 return (set_errno(EFAULT));
1415         return (0);
1416 }
1417 
1418 #ifdef _SYSCALL32_IMPL
1419 
1420 int
1421 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1422 {
1423         int error;
1424         k_siginfo_t info;
1425         siginfo32_t info32;
1426 
1427         if (error = waitid(idtype, id, &info, options))
1428                 return (set_errno(error));
1429         siginfo_kto32(&info, &info32);
1430         if (copyout(&info32, infop, sizeof (info32)))
1431                 return (set_errno(EFAULT));
1432         return (0);
1433 }
1434 
1435 #endif  /* _SYSCALL32_IMPL */
1436 
1437 void
1438 proc_detach(proc_t *p)
1439 {
1440         proc_t *q;
1441 
1442         ASSERT(MUTEX_HELD(&pidlock));
1443 
1444         q = p->p_parent;
1445         ASSERT(q != NULL);
1446 
1447         /*
1448          * Take it off the newstate list of its parent
1449          */
1450         delete_ns(q, p);
1451 
1452         if (q->p_child == p) {
1453                 q->p_child = p->p_sibling;
1454                 /*
1455                  * If the parent has no children, it better not
1456                  * have any with new states either!
1457                  */
1458                 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1459         }
1460 
1461         if (p->p_sibling) {
1462                 p->p_sibling->p_psibling = p->p_psibling;
1463         }
1464 
1465         if (p->p_psibling) {
1466                 p->p_psibling->p_sibling = p->p_sibling;
1467         }
1468 }
1469 
1470 /*
1471  * Remove zombie children from the process table.
1472  */
1473 void
1474 freeproc(proc_t *p)
1475 {
1476         proc_t *q;
1477         task_t *tk;
1478 
1479         ASSERT(p->p_stat == SZOMB);
1480         ASSERT(p->p_tlist == NULL);
1481         ASSERT(MUTEX_HELD(&pidlock));
1482 
1483         sigdelq(p, NULL, 0);
1484         if (p->p_killsqp) {
1485                 siginfofree(p->p_killsqp);
1486                 p->p_killsqp = NULL;
1487         }
1488 
1489         /* Clear any remaining brand data */
1490         if (PROC_IS_BRANDED(p)) {
1491                 brand_clearbrand(p, B_FALSE);
1492         }
1493 
1494 
1495         prfree(p);      /* inform /proc */
1496 
1497         /*
1498          * Don't free the init processes.
1499          * Other dying processes will access it.
1500          */
1501         if (p == proc_init)
1502                 return;
1503 
1504 
1505         /*
1506          * We wait until now to free the cred structure because a
1507          * zombie process's credentials may be examined by /proc.
1508          * No cred locking needed because there are no threads at this point.
1509          */
1510         upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1511         crfree(p->p_cred);
1512         if (p->p_corefile != NULL) {
1513                 corectl_path_rele(p->p_corefile);
1514                 p->p_corefile = NULL;
1515         }
1516         if (p->p_content != NULL) {
1517                 corectl_content_rele(p->p_content);
1518                 p->p_content = NULL;
1519         }
1520 
1521         if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1522             (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1523                 /*
1524                  * This should still do the right thing since p_utime/stime
1525                  * get set to the correct value on process exit, so it
1526                  * should get properly updated
1527                  */
1528                 p->p_nextofkin->p_cutime += p->p_utime;
1529                 p->p_nextofkin->p_cstime += p->p_stime;
1530 
1531                 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1532                 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1533                 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1534                 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1535                 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1536                 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1537                 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1538                     += p->p_acct[LMS_USER_LOCK];
1539                 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1540                 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1541                     += p->p_acct[LMS_WAIT_CPU];
1542                 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1543 
1544                 p->p_nextofkin->p_cru.minflt      += p->p_ru.minflt;
1545                 p->p_nextofkin->p_cru.majflt      += p->p_ru.majflt;
1546                 p->p_nextofkin->p_cru.nswap       += p->p_ru.nswap;
1547                 p->p_nextofkin->p_cru.inblock     += p->p_ru.inblock;
1548                 p->p_nextofkin->p_cru.oublock     += p->p_ru.oublock;
1549                 p->p_nextofkin->p_cru.msgsnd      += p->p_ru.msgsnd;
1550                 p->p_nextofkin->p_cru.msgrcv      += p->p_ru.msgrcv;
1551                 p->p_nextofkin->p_cru.nsignals    += p->p_ru.nsignals;
1552                 p->p_nextofkin->p_cru.nvcsw       += p->p_ru.nvcsw;
1553                 p->p_nextofkin->p_cru.nivcsw      += p->p_ru.nivcsw;
1554                 p->p_nextofkin->p_cru.sysc        += p->p_ru.sysc;
1555                 p->p_nextofkin->p_cru.ioch        += p->p_ru.ioch;
1556 
1557         }
1558 
1559         q = p->p_nextofkin;
1560         if (q && q->p_orphan == p)
1561                 q->p_orphan = p->p_nextorph;
1562         else if (q) {
1563                 for (q = q->p_orphan; q; q = q->p_nextorph)
1564                         if (q->p_nextorph == p)
1565                                 break;
1566                 ASSERT(q && q->p_nextorph == p);
1567                 q->p_nextorph = p->p_nextorph;
1568         }
1569 
1570         /*
1571          * The process table slot is being freed, so it is now safe to give up
1572          * task and project membership.
1573          */
1574         mutex_enter(&p->p_lock);
1575         tk = p->p_task;
1576         task_detach(p);
1577         mutex_exit(&p->p_lock);
1578 
1579         proc_detach(p);
1580         pid_exit(p, tk);        /* frees pid and proc structure */
1581 
1582         task_rele(tk);
1583 }
1584 
1585 /*
1586  * Delete process "child" from the newstate list of process "parent"
1587  */
1588 void
1589 delete_ns(proc_t *parent, proc_t *child)
1590 {
1591         proc_t **ns;
1592 
1593         ASSERT(MUTEX_HELD(&pidlock));
1594         ASSERT(child->p_parent == parent);
1595         for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1596                 if (*ns == child) {
1597 
1598                         ASSERT((*ns)->p_parent == parent);
1599 
1600                         *ns = child->p_sibling_ns;
1601                         child->p_sibling_ns = NULL;
1602                         return;
1603                 }
1604         }
1605 }
1606 
1607 /*
1608  * Add process "child" to the new state list of process "parent"
1609  */
1610 void
1611 add_ns(proc_t *parent, proc_t *child)
1612 {
1613         ASSERT(child->p_sibling_ns == NULL);
1614         child->p_sibling_ns = parent->p_child_ns;
1615         parent->p_child_ns = child;
1616 }