Print this page
OS-4818 contract template disappears on exec
OS-4460 exec brands processes that still have multiple threads
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4151 setbrand hooks should be sane during fork
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-4144 panic in lx_freelwp during zone shutdown
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
OS-4129 lxbrand should not abuse p_brand_data for storing exit signal
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
OS-3820 lxbrand ptrace(2): the next generation
OS-3685 lxbrand PTRACE_O_TRACEFORK race condition
OS-3834 lxbrand 64-bit strace(1) reports 64-bit process as using x32 ABI
OS-3794 lxbrand panic on init signal death
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
OS-3140 In LX zone 'ps fax' does not show all processes
OS-3429 Expose zone's init exit status
OS-3149 lx brand always sends SIGCHLD to parent processes, regardless of how clone was invoked
OS-2887 lxbrand add WALL, WCLONE, WNOTHREAD support to waitid
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 
  29 #include <sys/types.h>
  30 #include <sys/param.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/user.h>
  35 #include <sys/errno.h>
  36 #include <sys/proc.h>
  37 #include <sys/ucontext.h>
  38 #include <sys/procfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/acct.h>
  41 #include <sys/var.h>
  42 #include <sys/cmn_err.h>
  43 #include <sys/debug.h>
  44 #include <sys/wait.h>


 213          * Reset any signals that are ignored back to the default disposition.
 214          * Other u_signal members will be cleared when exec calls sigdefault().
 215          */
 216         for (i = 1; i < NSIG; i++) {
 217                 if (up->u_signal[i - 1] == SIG_IGN) {
 218                         up->u_signal[i - 1] = SIG_DFL;
 219                         sigemptyset(&up->u_sigmask[i - 1]);
 220                 }
 221         }
 222 
 223         /*
 224          * Clear the current signal, any signal info associated with it, and
 225          * any signal information from contracts and/or contract templates.
 226          */
 227         lwp->lwp_cursig = 0;
 228         lwp->lwp_extsig = 0;
 229         if (lwp->lwp_curinfo != NULL) {
 230                 siginfofree(lwp->lwp_curinfo);
 231                 lwp->lwp_curinfo = NULL;
 232         }
 233         lwp_ctmpl_clear(lwp);
 234 
 235         /*
 236          * Reset both the process root directory and the current working
 237          * directory to the root of the zone just as we do during boot.
 238          */
 239         VN_HOLD(p->p_zone->zone_rootvp);
 240         oldrd = up->u_rdir;
 241         up->u_rdir = p->p_zone->zone_rootvp;
 242 
 243         VN_HOLD(p->p_zone->zone_rootvp);
 244         oldcd = up->u_cdir;
 245         up->u_cdir = p->p_zone->zone_rootvp;
 246 
 247         if (up->u_cwd != NULL) {
 248                 refstr_rele(up->u_cwd);
 249                 up->u_cwd = NULL;
 250         }
 251 
 252         mutex_exit(&p->p_lock);
 253 


 349         /*
 350          * Stop and discard the process's lwps except for the current one,
 351          * unless some other lwp beat us to it.  If exitlwps() fails then
 352          * return and the calling lwp will call (or continue in) lwp_exit().
 353          */
 354         proc_is_exiting(p);
 355         if (exitlwps(0) != 0)
 356                 return (1);
 357 
 358         mutex_enter(&p->p_lock);
 359         if (p->p_ttime > 0) {
 360                 /*
 361                  * Account any remaining ticks charged to this process
 362                  * on its way out.
 363                  */
 364                 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 365                 p->p_ttime = 0;
 366         }
 367         mutex_exit(&p->p_lock);
 368 
 369         DTRACE_PROC(lwp__exit);
 370         DTRACE_PROC1(exit, int, why);
 371 
 372         /*
 373          * Will perform any brand specific proc exit processing, since this
 374          * is always the last lwp, will also perform lwp_exit and free brand
 375          * data
 376          */
 377         if (PROC_IS_BRANDED(p)) {
 378                 lwp_detach_brand_hdlrs(lwp);
 379                 brand_clearbrand(p, B_FALSE);
 380         }
 381 
 382         /*
 383          * Don't let init exit unless zone_start_init() failed its exec, or
 384          * we are shutting down the zone or the machine.
 385          *
 386          * Since we are single threaded, we don't need to lock the
 387          * following accesses to zone_proc_initpid.
 388          */
 389         if (p->p_pid == z->zone_proc_initpid) {
 390                 if (z->zone_boot_err == 0 &&
 391                     zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
 392                     zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {





















 393                         if (z->zone_restart_init == B_TRUE) {
 394                                 if (restart_init(what, why) == 0)
 395                                         return (0);
 396                         } else {


 397                                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
 398                                     CRED());
 399                         }
 400                 }
 401 
 402                 /*
 403                  * Since we didn't or couldn't restart init, we clear
 404                  * the zone's init state and proceed with exit
 405                  * processing.
 406                  */
 407                 z->zone_proc_initpid = -1;
 408         }
 409 


























 410         lwp_pcb_exit();
 411 
 412         /*
 413          * Allocate a sigqueue now, before we grab locks.
 414          * It will be given to sigcld(), below.
 415          * Special case:  If we will be making the process disappear
 416          * without a trace because it is either:
 417          *      * an exiting SSYS process, or
 418          *      * a posix_spawn() vfork child who requests it,
 419          * we don't bother to allocate a useless sigqueue.
 420          */
 421         evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
 422             why == CLD_EXITED && what == _EVAPORATE);
 423         if (!evaporate)
 424                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 425 
 426         /*
 427          * revoke any doors created by the process.
 428          */
 429         if (p->p_door_list)


 641                 proc_t *nokp = p->p_nextofkin;
 642 
 643                 for (;;) {
 644                         q->p_nextofkin = nokp;
 645                         if (q->p_nextorph == NULL)
 646                                 break;
 647                         q = q->p_nextorph;
 648                 }
 649                 q->p_nextorph = nokp->p_orphan;
 650                 nokp->p_orphan = p->p_orphan;
 651                 p->p_orphan = NULL;
 652         }
 653 
 654         /*
 655          * Reassign the children to init.
 656          * Don't try to assign init's children to init.
 657          */
 658         if ((q = p->p_child) != NULL && p != proc_init) {
 659                 struct proc     *np;
 660                 struct proc     *initp = proc_init;


 661                 boolean_t       setzonetop = B_FALSE;
 662 
 663                 if (!INGLOBALZONE(curproc))








 664                         setzonetop = B_TRUE;


 665 
 666                 pgdetach(p);
 667 
 668                 do {
 669                         np = q->p_sibling;
 670                         /*
 671                          * Delete it from its current parent new state
 672                          * list and add it to init new state list
 673                          */
 674                         delete_ns(q->p_parent, q);
 675 
 676                         q->p_ppid = 1;

 677                         q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
 678                         if (setzonetop) {
 679                                 mutex_enter(&q->p_lock);
 680                                 q->p_flag |= SZONETOP;
 681                                 mutex_exit(&q->p_lock);
 682                         }
 683                         q->p_parent = initp;
 684 
 685                         /*
 686                          * Since q will be the first child,
 687                          * it will not have a previous sibling.
 688                          */
 689                         q->p_psibling = NULL;
 690                         if (initp->p_child) {
 691                                 initp->p_child->p_psibling = q;
 692                         }
 693                         q->p_sibling = initp->p_child;
 694                         initp->p_child = q;
 695                         if (q->p_proc_flag & P_PR_PTRACE) {
 696                                 mutex_enter(&q->p_lock);


 830         /*
 831          * curthread's proc pointer is changed to point to the 'sched'
 832          * process for the corresponding zone, except in the case when
 833          * the exiting process is in fact a zsched instance, in which
 834          * case the proc pointer is set to p0.  We do so, so that the
 835          * process still points at the right zone when we call the VN_RELE()
 836          * below.
 837          *
 838          * This is because curthread's original proc pointer can be freed as
 839          * soon as the child sends a SIGCLD to its parent.  We use zsched so
 840          * that for user processes, even in the final moments of death, the
 841          * process is still associated with its zone.
 842          */
 843         if (p != t->t_procp->p_zone->zone_zsched)
 844                 t->t_procp = t->t_procp->p_zone->zone_zsched;
 845         else
 846                 t->t_procp = &p0;
 847 
 848         mutex_exit(&p->p_lock);
 849         if (!evaporate) {








































 850                 p->p_pidflag &= ~CLDPEND;
 851                 sigcld(p, sqp);


 852         } else {
 853                 /*
 854                  * Do what sigcld() would do if the disposition
 855                  * of the SIGCHLD signal were set to be ignored.
 856                  */
 857                 cv_broadcast(&p->p_srwchan_cv);
 858                 freeproc(p);
 859         }
 860         mutex_exit(&pidlock);
 861 
 862         /*
 863          * We don't release u_cdir and u_rdir until SZOMB is set.
 864          * This protects us against dofusers().
 865          */
 866         if (cdir)
 867                 VN_RELE(cdir);
 868         if (rdir)
 869                 VN_RELE(rdir);
 870         if (cwd)
 871                 refstr_rele(cwd);


 910         ip->si_stime = pp->p_stime;
 911         ip->si_utime = pp->p_utime;
 912 
 913         if (waitflag) {
 914                 pp->p_wcode = 0;
 915                 pp->p_wdata = 0;
 916                 pp->p_pidflag &= ~CLDPEND;
 917         }
 918 }
 919 
 920 /*
 921  * Wait system call.
 922  * Search for a terminated (zombie) child,
 923  * finally lay it to rest, and collect its status.
 924  * Look also for stopped children,
 925  * and pass back status from them.
 926  */
 927 int
 928 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
 929 {
 930         int found;
 931         proc_t *cp, *pp;
 932         int proc_gone;
 933         int waitflag = !(options & WNOWAIT);

 934 
 935         /*
 936          * Obsolete flag, defined here only for binary compatibility
 937          * with old statically linked executables.  Delete this when
 938          * we no longer care about these old and broken applications.
 939          */
 940 #define _WNOCHLD        0400
 941         options &= ~_WNOCHLD;
 942 
 943         if (options == 0 || (options & ~WOPTMASK))
 944                 return (EINVAL);
 945 
 946         switch (idtype) {
 947         case P_PID:
 948         case P_PGID:
 949                 if (id < 0 || id >= maxpid)
 950                         return (EINVAL);
 951                 /* FALLTHROUGH */
 952         case P_ALL:
 953                 break;
 954         default:
 955                 return (EINVAL);
 956         }
 957 
 958         pp = ttoproc(curthread);
 959 
 960         /*
 961          * lock parent mutex so that sibling chain can be searched.

 962          */
 963         mutex_enter(&pidlock);
 964 
 965         /*
 966          * if we are only looking for exited processes and child_ns list
 967          * is empty no reason to look at all children.
 968          */
 969         if (idtype == P_ALL &&
 970             (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
 971             pp->p_child_ns == NULL) {
 972                 if (pp->p_child) {
 973                         mutex_exit(&pidlock);
 974                         bzero(ip, sizeof (k_siginfo_t));
 975                         return (0);
 976                 }
 977                 mutex_exit(&pidlock);
 978                 return (ECHILD);
 979         }
 980 
 981         while (pp->p_child != NULL) {


 982 
 983                 proc_gone = 0;



 984 






















 985                 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
 986                         if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
 987                                 continue;
 988                         if (idtype == P_PID && id != cp->p_pid)
 989                                 continue;
 990                         if (idtype == P_PGID && id != cp->p_pgrp)
 991                                 continue;





 992 
 993                         switch (cp->p_wcode) {
 994 
 995                         case CLD_TRAPPED:
 996                         case CLD_STOPPED:
 997                         case CLD_CONTINUED:
 998                                 cmn_err(CE_PANIC,
 999                                     "waitid: wrong state %d on the p_newstate"
1000                                     " list", cp->p_wcode);
1001                                 break;
1002 
1003                         case CLD_EXITED:
1004                         case CLD_DUMPED:
1005                         case CLD_KILLED:
1006                                 if (!(options & WEXITED)) {
1007                                         /*
1008                                          * Count how many are already gone
1009                                          * for good.
1010                                          */
1011                                         proc_gone++;


1016                                 } else {
1017                                         winfo(cp, ip, 1);
1018                                         freeproc(cp);
1019                                 }
1020                                 mutex_exit(&pidlock);
1021                                 if (waitflag) {         /* accept SIGCLD */
1022                                         sigcld_delete(ip);
1023                                         sigcld_repost();
1024                                 }
1025                                 return (0);
1026                         }
1027 
1028                         if (idtype == P_PID)
1029                                 break;
1030                 }
1031 
1032                 /*
1033                  * Wow! None of the threads on the p_sibling_ns list were
1034                  * interesting threads. Check all the kids!
1035                  */
1036                 found = 0;
1037                 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1038                         if (idtype == P_PID && id != cp->p_pid)
1039                                 continue;
1040                         if (idtype == P_PGID && id != cp->p_pgrp)
1041                                 continue;





1042 
1043                         switch (cp->p_wcode) {
1044                         case CLD_TRAPPED:
1045                                 if (!(options & WTRAPPED))
1046                                         break;
1047                                 winfo(cp, ip, waitflag);
1048                                 mutex_exit(&pidlock);
1049                                 if (waitflag) {         /* accept SIGCLD */
1050                                         sigcld_delete(ip);
1051                                         sigcld_repost();
1052                                 }
1053                                 return (0);
1054 
1055                         case CLD_STOPPED:
1056                                 if (!(options & WSTOPPED))
1057                                         break;
1058                                 /* Is it still stopped? */
1059                                 mutex_enter(&cp->p_lock);
1060                                 if (!jobstopped(cp)) {
1061                                         mutex_exit(&cp->p_lock);


1090                                 /*
1091                                  * Don't complain if a process was found in
1092                                  * the first loop but we broke out of the loop
1093                                  * because of the arguments passed to us.
1094                                  */
1095                                 if (proc_gone == 0) {
1096                                         cmn_err(CE_PANIC,
1097                                             "waitid: wrong state on the"
1098                                             " p_child list");
1099                                 } else {
1100                                         break;
1101                                 }
1102                         }
1103 
1104                         found++;
1105 
1106                         if (idtype == P_PID)
1107                                 break;
1108                 }
1109 

1110                 /*
1111                  * If we found no interesting processes at all,
1112                  * break out and return ECHILD.
1113                  */
1114                 if (found + proc_gone == 0)
1115                         break;
1116 
1117                 if (options & WNOHANG) {
1118                         mutex_exit(&pidlock);
1119                         bzero(ip, sizeof (k_siginfo_t));
1120                         /*
1121                          * We should set ip->si_signo = SIGCLD,
1122                          * but there is an SVVS test that expects
1123                          * ip->si_signo to be zero in this case.
1124                          */
1125                         return (0);
1126                 }
1127 
1128                 /*
1129                  * If we found no processes of interest that could
1130                  * change state while we wait, we don't wait at all.
1131                  * Get out with ECHILD according to SVID.
1132                  */
1133                 if (found == proc_gone)
1134                         break;
1135 
1136                 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1137                         mutex_exit(&pidlock);
1138                         return (EINTR);
1139                 }
1140         }
1141         mutex_exit(&pidlock);
1142         return (ECHILD);
1143 }
1144 
1145 int
1146 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1147 {
1148         int error;
1149         k_siginfo_t info;
1150 
1151         if (error = waitid(idtype, id, &info, options))
1152                 return (set_errno(error));
1153         if (copyout(&info, infop, sizeof (k_siginfo_t)))


1209 
1210 /*
1211  * Remove zombie children from the process table.
1212  */
1213 void
1214 freeproc(proc_t *p)
1215 {
1216         proc_t *q;
1217         task_t *tk;
1218 
1219         ASSERT(p->p_stat == SZOMB);
1220         ASSERT(p->p_tlist == NULL);
1221         ASSERT(MUTEX_HELD(&pidlock));
1222 
1223         sigdelq(p, NULL, 0);
1224         if (p->p_killsqp) {
1225                 siginfofree(p->p_killsqp);
1226                 p->p_killsqp = NULL;
1227         }
1228 






1229         prfree(p);      /* inform /proc */
1230 
1231         /*
1232          * Don't free the init processes.
1233          * Other dying processes will access it.
1234          */
1235         if (p == proc_init)
1236                 return;
1237 
1238 
1239         /*
1240          * We wait until now to free the cred structure because a
1241          * zombie process's credentials may be examined by /proc.
1242          * No cred locking needed because there are no threads at this point.
1243          */
1244         upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1245         crfree(p->p_cred);
1246         if (p->p_corefile != NULL) {
1247                 corectl_path_rele(p->p_corefile);
1248                 p->p_corefile = NULL;




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2014 Joyent, Inc. All rights reserved.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 
  29 #include <sys/types.h>
  30 #include <sys/param.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/user.h>
  35 #include <sys/errno.h>
  36 #include <sys/proc.h>
  37 #include <sys/ucontext.h>
  38 #include <sys/procfs.h>
  39 #include <sys/vnode.h>
  40 #include <sys/acct.h>
  41 #include <sys/var.h>
  42 #include <sys/cmn_err.h>
  43 #include <sys/debug.h>
  44 #include <sys/wait.h>


 213          * Reset any signals that are ignored back to the default disposition.
 214          * Other u_signal members will be cleared when exec calls sigdefault().
 215          */
 216         for (i = 1; i < NSIG; i++) {
 217                 if (up->u_signal[i - 1] == SIG_IGN) {
 218                         up->u_signal[i - 1] = SIG_DFL;
 219                         sigemptyset(&up->u_sigmask[i - 1]);
 220                 }
 221         }
 222 
 223         /*
 224          * Clear the current signal, any signal info associated with it, and
 225          * any signal information from contracts and/or contract templates.
 226          */
 227         lwp->lwp_cursig = 0;
 228         lwp->lwp_extsig = 0;
 229         if (lwp->lwp_curinfo != NULL) {
 230                 siginfofree(lwp->lwp_curinfo);
 231                 lwp->lwp_curinfo = NULL;
 232         }
 233         lwp_ctmpl_clear(lwp, B_FALSE);
 234 
 235         /*
 236          * Reset both the process root directory and the current working
 237          * directory to the root of the zone just as we do during boot.
 238          */
 239         VN_HOLD(p->p_zone->zone_rootvp);
 240         oldrd = up->u_rdir;
 241         up->u_rdir = p->p_zone->zone_rootvp;
 242 
 243         VN_HOLD(p->p_zone->zone_rootvp);
 244         oldcd = up->u_cdir;
 245         up->u_cdir = p->p_zone->zone_rootvp;
 246 
 247         if (up->u_cwd != NULL) {
 248                 refstr_rele(up->u_cwd);
 249                 up->u_cwd = NULL;
 250         }
 251 
 252         mutex_exit(&p->p_lock);
 253 


 349         /*
 350          * Stop and discard the process's lwps except for the current one,
 351          * unless some other lwp beat us to it.  If exitlwps() fails then
 352          * return and the calling lwp will call (or continue in) lwp_exit().
 353          */
 354         proc_is_exiting(p);
 355         if (exitlwps(0) != 0)
 356                 return (1);
 357 
 358         mutex_enter(&p->p_lock);
 359         if (p->p_ttime > 0) {
 360                 /*
 361                  * Account any remaining ticks charged to this process
 362                  * on its way out.
 363                  */
 364                 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
 365                 p->p_ttime = 0;
 366         }
 367         mutex_exit(&p->p_lock);
 368 



 369         /*










 370          * Don't let init exit unless zone_start_init() failed its exec, or
 371          * we are shutting down the zone or the machine.
 372          *
 373          * Since we are single threaded, we don't need to lock the
 374          * following accesses to zone_proc_initpid.
 375          */
 376         if (p->p_pid == z->zone_proc_initpid) {
 377                 if (z->zone_boot_err == 0 &&
 378                     zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
 379                     zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
 380 
 381                         /*
 382                          * If the init process should be restarted, the
 383                          * "zone_restart_init" member will be set.  Some init
 384                          * programs in branded zones do not tolerate a restart
 385                          * in the traditional manner; setting the
 386                          * "zone_reboot_on_init_exit" member will cause the
 387                          * entire zone to be rebooted instead.  If neither of
 388                          * these flags is set the zone will shut down.
 389                          */
 390                         if (z->zone_reboot_on_init_exit == B_TRUE &&
 391                             z->zone_restart_init == B_TRUE) {
 392                                 /*
 393                                  * Trigger a zone reboot and continue
 394                                  * with exit processing.
 395                                  */
 396                                 z->zone_init_status = wstat(why, what);
 397                                 (void) zone_kadmin(A_REBOOT, 0, NULL,
 398                                     zone_kcred());
 399 
 400                         } else {
 401                                 if (z->zone_restart_init == B_TRUE) {
 402                                         if (restart_init(what, why) == 0)
 403                                                 return (0);
 404                                 }
 405 
 406                                 z->zone_init_status = wstat(why, what);
 407                                 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
 408                                     zone_kcred());
 409                         }
 410                 }
 411 
 412                 /*
 413                  * Since we didn't or couldn't restart init, we clear
 414                  * the zone's init state and proceed with exit
 415                  * processing.
 416                  */
 417                 z->zone_proc_initpid = -1;
 418         }
 419 
 420         /*
 421          * Delay firing probes (and performing brand cleanup) until after the
 422          * zone_proc_initpid check. Cases which result in zone shutdown or
 423          * restart via zone_kadmin eventually result in a call back to
 424          * proc_exit.
 425          */
 426         DTRACE_PROC(lwp__exit);
 427         DTRACE_PROC1(exit, int, why);
 428 
 429         /*
 430          * Will perform any brand specific proc exit processing. Since this
 431          * is always the last lwp, will also perform lwp exit/free and proc
 432          * exit. Brand data will be freed when the process is reaped.
 433          */
 434         if (PROC_IS_BRANDED(p)) {
 435                 BROP(p)->b_lwpexit(lwp);
 436                 BROP(p)->b_proc_exit(p);
 437                 /*
 438                  * To ensure that b_proc_exit has access to brand-specific data
 439                  * contained by the one remaining lwp, call the freelwp hook as
 440                  * the last part of this clean-up process.
 441                  */
 442                 BROP(p)->b_freelwp(lwp);
 443                 lwp_detach_brand_hdlrs(lwp);
 444         }
 445 
 446         lwp_pcb_exit();
 447 
 448         /*
 449          * Allocate a sigqueue now, before we grab locks.
 450          * It will be given to sigcld(), below.
 451          * Special case:  If we will be making the process disappear
 452          * without a trace because it is either:
 453          *      * an exiting SSYS process, or
 454          *      * a posix_spawn() vfork child who requests it,
 455          * we don't bother to allocate a useless sigqueue.
 456          */
 457         evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
 458             why == CLD_EXITED && what == _EVAPORATE);
 459         if (!evaporate)
 460                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 461 
 462         /*
 463          * revoke any doors created by the process.
 464          */
 465         if (p->p_door_list)


 677                 proc_t *nokp = p->p_nextofkin;
 678 
 679                 for (;;) {
 680                         q->p_nextofkin = nokp;
 681                         if (q->p_nextorph == NULL)
 682                                 break;
 683                         q = q->p_nextorph;
 684                 }
 685                 q->p_nextorph = nokp->p_orphan;
 686                 nokp->p_orphan = p->p_orphan;
 687                 p->p_orphan = NULL;
 688         }
 689 
 690         /*
 691          * Reassign the children to init.
 692          * Don't try to assign init's children to init.
 693          */
 694         if ((q = p->p_child) != NULL && p != proc_init) {
 695                 struct proc     *np;
 696                 struct proc     *initp = proc_init;
 697                 pid_t           zone_initpid = 1;
 698                 struct proc     *zoneinitp = NULL;
 699                 boolean_t       setzonetop = B_FALSE;
 700 
 701                 if (!INGLOBALZONE(curproc)) {
 702                         zone_initpid = curproc->p_zone->zone_proc_initpid;
 703 
 704                         ASSERT(MUTEX_HELD(&pidlock));
 705                         zoneinitp = prfind(zone_initpid);
 706                         if (zoneinitp != NULL) {
 707                                 initp = zoneinitp;
 708                         } else {
 709                                 zone_initpid = 1;
 710                                 setzonetop = B_TRUE;
 711                         }
 712                 }
 713 
 714                 pgdetach(p);
 715 
 716                 do {
 717                         np = q->p_sibling;
 718                         /*
 719                          * Delete it from its current parent new state
 720                          * list and add it to init new state list
 721                          */
 722                         delete_ns(q->p_parent, q);
 723 
 724                         q->p_ppid = zone_initpid;
 725 
 726                         q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
 727                         if (setzonetop) {
 728                                 mutex_enter(&q->p_lock);
 729                                 q->p_flag |= SZONETOP;
 730                                 mutex_exit(&q->p_lock);
 731                         }
 732                         q->p_parent = initp;
 733 
 734                         /*
 735                          * Since q will be the first child,
 736                          * it will not have a previous sibling.
 737                          */
 738                         q->p_psibling = NULL;
 739                         if (initp->p_child) {
 740                                 initp->p_child->p_psibling = q;
 741                         }
 742                         q->p_sibling = initp->p_child;
 743                         initp->p_child = q;
 744                         if (q->p_proc_flag & P_PR_PTRACE) {
 745                                 mutex_enter(&q->p_lock);


 879         /*
 880          * curthread's proc pointer is changed to point to the 'sched'
 881          * process for the corresponding zone, except in the case when
 882          * the exiting process is in fact a zsched instance, in which
 883          * case the proc pointer is set to p0.  We do so, so that the
 884          * process still points at the right zone when we call the VN_RELE()
 885          * below.
 886          *
 887          * This is because curthread's original proc pointer can be freed as
 888          * soon as the child sends a SIGCLD to its parent.  We use zsched so
 889          * that for user processes, even in the final moments of death, the
 890          * process is still associated with its zone.
 891          */
 892         if (p != t->t_procp->p_zone->zone_zsched)
 893                 t->t_procp = t->t_procp->p_zone->zone_zsched;
 894         else
 895                 t->t_procp = &p0;
 896 
 897         mutex_exit(&p->p_lock);
 898         if (!evaporate) {
 899                 /*
 900                  * The brand specific code only happens when the brand has a
 901                  * function to call in place of sigcld and the parent of the
 902                  * exiting process is not the global zone init. If the parent
 903                  * is the global zone init, then the process was reparented,
 904                  * and we don't want brand code delivering possibly strange
 905                  * signals to init. Also, init is not branded, so any brand
 906                  * specific exit data will not be picked up by init anyway.
 907                  */
 908                 if (PROC_IS_BRANDED(p) &&
 909                     BROP(p)->b_exit_with_sig != NULL &&
 910                     p->p_ppid != 1) {
 911                         /*
 912                          * The code for _fini that could unload the brand_t
 913                          * blocks until the count of zones using the module
 914                          * reaches zero. Zones decrement the refcount on their
 915                          * brands only after all user tasks in that zone have
 916                          * exited and been waited on. The decrement on the
 917                          * brand's refcount happen in zone_destroy(). That
 918                          * depends on zone_shutdown() having been completed.
 919                          * zone_shutdown() includes a call to zone_empty(),
 920                          * where the zone waits for itself to reach the state
 921                          * ZONE_IS_EMPTY. This state is only set in either
 922                          * zone_shutdown(), when there are no user processes as
 923                          * the zone enters this function, or in
 924                          * zone_task_rele(). zone_task_rele() is called from
 925                          * code triggered by waiting on processes, not by the
 926                          * processes exiting through proc_exit().  This means
 927                          * all the branded processes that could exist for a
 928                          * specific brand_t must exit and get reaped before the
 929                          * refcount on the brand_t can reach 0. _fini will
 930                          * never unload the corresponding brand module before
 931                          * proc_exit finishes execution for all processes
 932                          * branded with a particular brand_t, which makes the
 933                          * operation below safe to do. Brands that wish to use
 934                          * this mechanism must wait in _fini as described
 935                          * above.
 936                          */
 937                         BROP(p)->b_exit_with_sig(p, sqp);
 938                 } else {
 939                         p->p_pidflag &= ~CLDPEND;
 940                         sigcld(p, sqp);
 941                 }
 942 
 943         } else {
 944                 /*
 945                  * Do what sigcld() would do if the disposition
 946                  * of the SIGCHLD signal were set to be ignored.
 947                  */
 948                 cv_broadcast(&p->p_srwchan_cv);
 949                 freeproc(p);
 950         }
 951         mutex_exit(&pidlock);
 952 
 953         /*
 954          * We don't release u_cdir and u_rdir until SZOMB is set.
 955          * This protects us against dofusers().
 956          */
 957         if (cdir)
 958                 VN_RELE(cdir);
 959         if (rdir)
 960                 VN_RELE(rdir);
 961         if (cwd)
 962                 refstr_rele(cwd);


1001         ip->si_stime = pp->p_stime;
1002         ip->si_utime = pp->p_utime;
1003 
1004         if (waitflag) {
1005                 pp->p_wcode = 0;
1006                 pp->p_wdata = 0;
1007                 pp->p_pidflag &= ~CLDPEND;
1008         }
1009 }
1010 
1011 /*
1012  * Wait system call.
1013  * Search for a terminated (zombie) child,
1014  * finally lay it to rest, and collect its status.
1015  * Look also for stopped children,
1016  * and pass back status from them.
1017  */
1018 int
1019 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1020 {

1021         proc_t *cp, *pp;

1022         int waitflag = !(options & WNOWAIT);
1023         boolean_t have_brand_helper = B_FALSE;
1024 
1025         /*
1026          * Obsolete flag, defined here only for binary compatibility
1027          * with old statically linked executables.  Delete this when
1028          * we no longer care about these old and broken applications.
1029          */
1030 #define _WNOCHLD        0400
1031         options &= ~_WNOCHLD;
1032 
1033         if (options == 0 || (options & ~WOPTMASK))
1034                 return (EINVAL);
1035 
1036         switch (idtype) {
1037         case P_PID:
1038         case P_PGID:
1039                 if (id < 0 || id >= maxpid)
1040                         return (EINVAL);
1041                 /* FALLTHROUGH */
1042         case P_ALL:
1043                 break;
1044         default:
1045                 return (EINVAL);
1046         }
1047 
1048         pp = ttoproc(curthread);
1049 
1050         /*
1051          * Anytime you are looking for a process, you take pidlock to prevent
1052          * things from changing as you look.
1053          */
1054         mutex_enter(&pidlock);
1055 
1056         /*
1057          * if we are only looking for exited processes and child_ns list
1058          * is empty no reason to look at all children.
1059          */
1060         if (idtype == P_ALL &&
1061             (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1062             pp->p_child_ns == NULL) {
1063                 if (pp->p_child) {
1064                         mutex_exit(&pidlock);
1065                         bzero(ip, sizeof (k_siginfo_t));
1066                         return (0);
1067                 }
1068                 mutex_exit(&pidlock);
1069                 return (ECHILD);
1070         }
1071 
1072         if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1073                 have_brand_helper = B_TRUE;
1074         }
1075 
1076         while (pp->p_child != NULL || have_brand_helper) {
1077                 boolean_t brand_wants_wait = B_FALSE;
1078                 int proc_gone = 0;
1079                 int found = 0;
1080 
1081                 /*
1082                  * Give the brand a chance to return synthetic results from
1083                  * this waitid() call before we do the real thing.
1084                  */
1085                 if (have_brand_helper) {
1086                         int ret;
1087 
1088                         if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1089                             &brand_wants_wait, &ret) == 0) {
1090                                 mutex_exit(&pidlock);
1091                                 return (ret);
1092                         }
1093 
1094                         if (pp->p_child == NULL) {
1095                                 goto no_real_children;
1096                         }
1097                 }
1098 
1099                 /*
1100                  * Look for interesting children in the newstate list.
1101                  */
1102                 VERIFY(pp->p_child != NULL);
1103                 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1104                         if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1105                                 continue;
1106                         if (idtype == P_PID && id != cp->p_pid)
1107                                 continue;
1108                         if (idtype == P_PGID && id != cp->p_pgrp)
1109                                 continue;
1110                         if (PROC_IS_BRANDED(pp)) {
1111                                 if (BROP(pp)->b_wait_filter != NULL &&
1112                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1113                                         continue;
1114                         }
1115 
1116                         switch (cp->p_wcode) {
1117 
1118                         case CLD_TRAPPED:
1119                         case CLD_STOPPED:
1120                         case CLD_CONTINUED:
1121                                 cmn_err(CE_PANIC,
1122                                     "waitid: wrong state %d on the p_newstate"
1123                                     " list", cp->p_wcode);
1124                                 break;
1125 
1126                         case CLD_EXITED:
1127                         case CLD_DUMPED:
1128                         case CLD_KILLED:
1129                                 if (!(options & WEXITED)) {
1130                                         /*
1131                                          * Count how many are already gone
1132                                          * for good.
1133                                          */
1134                                         proc_gone++;


1139                                 } else {
1140                                         winfo(cp, ip, 1);
1141                                         freeproc(cp);
1142                                 }
1143                                 mutex_exit(&pidlock);
1144                                 if (waitflag) {         /* accept SIGCLD */
1145                                         sigcld_delete(ip);
1146                                         sigcld_repost();
1147                                 }
1148                                 return (0);
1149                         }
1150 
1151                         if (idtype == P_PID)
1152                                 break;
1153                 }
1154 
1155                 /*
1156                  * Wow! None of the threads on the p_sibling_ns list were
1157                  * interesting threads. Check all the kids!
1158                  */

1159                 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1160                         if (idtype == P_PID && id != cp->p_pid)
1161                                 continue;
1162                         if (idtype == P_PGID && id != cp->p_pgrp)
1163                                 continue;
1164                         if (PROC_IS_BRANDED(pp)) {
1165                                 if (BROP(pp)->b_wait_filter != NULL &&
1166                                     BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1167                                         continue;
1168                         }
1169 
1170                         switch (cp->p_wcode) {
1171                         case CLD_TRAPPED:
1172                                 if (!(options & WTRAPPED))
1173                                         break;
1174                                 winfo(cp, ip, waitflag);
1175                                 mutex_exit(&pidlock);
1176                                 if (waitflag) {         /* accept SIGCLD */
1177                                         sigcld_delete(ip);
1178                                         sigcld_repost();
1179                                 }
1180                                 return (0);
1181 
1182                         case CLD_STOPPED:
1183                                 if (!(options & WSTOPPED))
1184                                         break;
1185                                 /* Is it still stopped? */
1186                                 mutex_enter(&cp->p_lock);
1187                                 if (!jobstopped(cp)) {
1188                                         mutex_exit(&cp->p_lock);


1217                                 /*
1218                                  * Don't complain if a process was found in
1219                                  * the first loop but we broke out of the loop
1220                                  * because of the arguments passed to us.
1221                                  */
1222                                 if (proc_gone == 0) {
1223                                         cmn_err(CE_PANIC,
1224                                             "waitid: wrong state on the"
1225                                             " p_child list");
1226                                 } else {
1227                                         break;
1228                                 }
1229                         }
1230 
1231                         found++;
1232 
1233                         if (idtype == P_PID)
1234                                 break;
1235                 }
1236 
1237 no_real_children:
1238                 /*
1239                  * If we found no interesting processes at all,
1240                  * break out and return ECHILD.
1241                  */
1242                 if (!brand_wants_wait && (found + proc_gone == 0))
1243                         break;
1244 
1245                 if (options & WNOHANG) {
1246                         mutex_exit(&pidlock);
1247                         bzero(ip, sizeof (k_siginfo_t));
1248                         /*
1249                          * We should set ip->si_signo = SIGCLD,
1250                          * but there is an SVVS test that expects
1251                          * ip->si_signo to be zero in this case.
1252                          */
1253                         return (0);
1254                 }
1255 
1256                 /*
1257                  * If we found no processes of interest that could
1258                  * change state while we wait, we don't wait at all.
1259                  * Get out with ECHILD according to SVID.
1260                  */
1261                 if (!brand_wants_wait && (found == proc_gone))
1262                         break;
1263 
1264                 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1265                         mutex_exit(&pidlock);
1266                         return (EINTR);
1267                 }
1268         }
1269         mutex_exit(&pidlock);
1270         return (ECHILD);
1271 }
1272 
1273 int
1274 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1275 {
1276         int error;
1277         k_siginfo_t info;
1278 
1279         if (error = waitid(idtype, id, &info, options))
1280                 return (set_errno(error));
1281         if (copyout(&info, infop, sizeof (k_siginfo_t)))


1337 
1338 /*
1339  * Remove zombie children from the process table.
1340  */
1341 void
1342 freeproc(proc_t *p)
1343 {
1344         proc_t *q;
1345         task_t *tk;
1346 
1347         ASSERT(p->p_stat == SZOMB);
1348         ASSERT(p->p_tlist == NULL);
1349         ASSERT(MUTEX_HELD(&pidlock));
1350 
1351         sigdelq(p, NULL, 0);
1352         if (p->p_killsqp) {
1353                 siginfofree(p->p_killsqp);
1354                 p->p_killsqp = NULL;
1355         }
1356 
1357         /* Clear any remaining brand data */
1358         if (PROC_IS_BRANDED(p)) {
1359                 brand_clearbrand(p, B_FALSE);
1360         }
1361 
1362 
1363         prfree(p);      /* inform /proc */
1364 
1365         /*
1366          * Don't free the init processes.
1367          * Other dying processes will access it.
1368          */
1369         if (p == proc_init)
1370                 return;
1371 
1372 
1373         /*
1374          * We wait until now to free the cred structure because a
1375          * zombie process's credentials may be examined by /proc.
1376          * No cred locking needed because there are no threads at this point.
1377          */
1378         upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1379         crfree(p->p_cred);
1380         if (p->p_corefile != NULL) {
1381                 corectl_path_rele(p->p_corefile);
1382                 p->p_corefile = NULL;