1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2018 Joyent, Inc.
25 * Copyright 2020 Oxide Computer Company
26 * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/user.h>
37 #include <sys/errno.h>
38 #include <sys/proc.h>
39 #include <sys/ucontext.h>
40 #include <sys/procfs.h>
41 #include <sys/vnode.h>
42 #include <sys/acct.h>
43 #include <sys/var.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/wait.h>
47 #include <sys/siginfo.h>
48 #include <sys/procset.h>
49 #include <sys/class.h>
50 #include <sys/file.h>
51 #include <sys/session.h>
52 #include <sys/kmem.h>
53 #include <sys/vtrace.h>
54 #include <sys/prsystm.h>
55 #include <sys/ipc.h>
56 #include <sys/sem_impl.h>
57 #include <c2/audit.h>
58 #include <sys/aio_impl.h>
59 #include <vm/as.h>
60 #include <sys/poll.h>
61 #include <sys/door.h>
62 #include <sys/lwpchan_impl.h>
63 #include <sys/utrap.h>
64 #include <sys/task.h>
65 #include <sys/exacct.h>
66 #include <sys/cyclic.h>
67 #include <sys/schedctl.h>
68 #include <sys/rctl.h>
69 #include <sys/contract_impl.h>
70 #include <sys/contract/process_impl.h>
71 #include <sys/list.h>
72 #include <sys/dtrace.h>
73 #include <sys/pool.h>
74 #include <sys/sdt.h>
75 #include <sys/corectl.h>
76 #include <sys/core.h>
77 #include <sys/brand.h>
78 #include <sys/libc_kernel.h>
79
80 /*
81 * convert code/data pair into old style wait status
82 */
83 int
84 wstat(int code, int data)
85 {
86 int stat = (data & 0377);
87
88 switch (code) {
89 case CLD_EXITED:
90 stat <<= 8;
91 break;
92 case CLD_DUMPED:
93 stat |= WCOREFLG;
94 break;
95 case CLD_KILLED:
96 break;
97 case CLD_TRAPPED:
98 case CLD_STOPPED:
99 stat <<= 8;
100 stat |= WSTOPFLG;
101 break;
102 case CLD_CONTINUED:
103 stat = WCONTFLG;
104 break;
105 default:
106 cmn_err(CE_PANIC, "wstat: bad code");
107 /* NOTREACHED */
108 }
109 return (stat);
110 }
111
112 static char *
113 exit_reason(char *buf, size_t bufsz, int what, int why)
114 {
115 switch (why) {
116 case CLD_EXITED:
117 (void) snprintf(buf, bufsz, "exited with status %d", what);
118 break;
119 case CLD_KILLED:
120 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
121 break;
122 case CLD_DUMPED:
123 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
124 break;
125 default:
126 (void) snprintf(buf, bufsz, "encountered unknown error "
127 "(%d, %d)", why, what);
128 break;
129 }
130
131 return (buf);
132 }
133
134 /*
135 * exit system call: pass back caller's arg.
136 */
137 void
138 rexit(int rval)
139 {
140 exit(CLD_EXITED, rval);
141 }
142
143 /*
144 * Bump the init_restarts kstat and let interested parties know about the
145 * restart.
146 */
147 static void
148 restart_init_notify(zone_t *zone)
149 {
150 nvlist_t *nvl = NULL;
151
152 zone->zone_proc_init_restarts++;
153
154 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
155 nvlist_add_uint32(nvl, ZONE_CB_RESTARTS,
156 zone->zone_proc_init_restarts) == 0) {
157 zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS,
158 ZONE_EVENT_INIT_RESTART_SC, nvl);
159 }
160
161 nvlist_free(nvl);
162 }
163
164 /*
165 * Called by proc_exit() when a zone's init exits, presumably because
166 * it failed. As long as the given zone is still in the "running"
167 * state, we will re-exec() init, but first we need to reset things
168 * which are usually inherited across exec() but will break init's
169 * assumption that it is being exec()'d from a virgin process. Most
170 * importantly this includes closing all file descriptors (exec only
171 * closes those marked close-on-exec) and resetting signals (exec only
172 * resets handled signals, and we need to clear any signals which
173 * killed init). Anything else that exec(2) says would be inherited,
174 * but would affect the execution of init, needs to be reset.
175 */
176 static int
177 restart_init(int what, int why)
178 {
179 kthread_t *t = curthread;
180 klwp_t *lwp = ttolwp(t);
181 proc_t *p = ttoproc(t);
182 proc_t *pp = p->p_zone->zone_zsched;
183 user_t *up = PTOU(p);
184
185 vnode_t *oldcd, *oldrd;
186 int i, err;
187 char reason_buf[64];
188
189 /*
190 * Let zone admin (and global zone admin if this is for a non-global
191 * zone) know that init has failed and will be restarted.
192 */
193 zcmn_err(p->p_zone->zone_id, CE_WARN,
194 "init(1M) %s: restarting automatically",
195 exit_reason(reason_buf, sizeof (reason_buf), what, why));
196
197 if (!INGLOBALZONE(p)) {
198 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
199 "restarting automatically",
200 p->p_zone->zone_name, p->p_pid, reason_buf);
201 }
202
203 /*
204 * Remove any fpollinfo_t's for this (last) thread from our file
205 * descriptors so closeall() can ASSERT() that they're all gone.
206 * Then close all open file descriptors in the process.
207 */
208 pollcleanup();
209 closeall(P_FINFO(p));
210
211 /*
212 * Grab p_lock and begin clearing miscellaneous global process
213 * state that needs to be reset before we exec the new init(1M).
214 */
215
216 mutex_enter(&p->p_lock);
217 prbarrier(p);
218
219 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
220 up->u_cmask = CMASK;
221
222 sigemptyset(&t->t_hold);
223 sigemptyset(&t->t_sig);
224 sigemptyset(&t->t_extsig);
225
226 sigemptyset(&p->p_sig);
227 sigemptyset(&p->p_extsig);
228
229 sigdelq(p, t, 0);
230 sigdelq(p, NULL, 0);
231
232 if (p->p_killsqp) {
233 siginfofree(p->p_killsqp);
234 p->p_killsqp = NULL;
235 }
236
237 /*
238 * Reset any signals that are ignored back to the default disposition.
239 * Other u_signal members will be cleared when exec calls sigdefault().
240 */
241 for (i = 1; i < NSIG; i++) {
242 if (up->u_signal[i - 1] == SIG_IGN) {
243 up->u_signal[i - 1] = SIG_DFL;
244 sigemptyset(&up->u_sigmask[i - 1]);
245 }
246 }
247
248 /*
249 * Clear the current signal, any signal info associated with it, and
250 * any signal information from contracts and/or contract templates.
251 */
252 lwp->lwp_cursig = 0;
253 lwp->lwp_extsig = 0;
254 if (lwp->lwp_curinfo != NULL) {
255 siginfofree(lwp->lwp_curinfo);
256 lwp->lwp_curinfo = NULL;
257 }
258 lwp_ctmpl_clear(lwp, B_FALSE);
259
260 /*
261 * Reset both the process root directory and the current working
262 * directory to the root of the zone just as we do during boot.
263 */
264 VN_HOLD(p->p_zone->zone_rootvp);
265 oldrd = up->u_rdir;
266 up->u_rdir = p->p_zone->zone_rootvp;
267
268 VN_HOLD(p->p_zone->zone_rootvp);
269 oldcd = up->u_cdir;
270 up->u_cdir = p->p_zone->zone_rootvp;
271
272 if (up->u_cwd != NULL) {
273 refstr_rele(up->u_cwd);
274 up->u_cwd = NULL;
275 }
276
277 /* Reset security flags */
278 mutex_enter(&pp->p_lock);
279 p->p_secflags = pp->p_secflags;
280 mutex_exit(&pp->p_lock);
281
282 mutex_exit(&p->p_lock);
283
284 if (oldrd != NULL)
285 VN_RELE(oldrd);
286 if (oldcd != NULL)
287 VN_RELE(oldcd);
288
289 /*
290 * It's possible that a zone's init will have become privilege aware
291 * and modified privilege sets; reset them.
292 */
293 cred_t *oldcr, *newcr;
294
295 mutex_enter(&p->p_crlock);
296 oldcr = p->p_cred;
297 mutex_enter(&pp->p_crlock);
298 crhold(newcr = p->p_cred = pp->p_cred);
299 mutex_exit(&pp->p_crlock);
300 mutex_exit(&p->p_crlock);
301 crfree(oldcr);
302 /* Additional hold for the current thread - expected by crset() */
303 crhold(newcr);
304 crset(p, newcr);
305
306 /* Free the controlling tty. (freectty() always assumes curproc.) */
307 ASSERT(p == curproc);
308 (void) freectty(B_TRUE);
309
310 restart_init_notify(p->p_zone);
311
312 /*
313 * Now exec() the new init(1M) on top of the current process. If we
314 * succeed, the caller will treat this like a successful system call.
315 * If we fail, we issue messages and the caller will proceed with exit.
316 */
317 err = exec_init(p->p_zone->zone_initname, NULL);
318
319 if (err == 0)
320 return (0);
321
322 zcmn_err(p->p_zone->zone_id, CE_WARN,
323 "failed to restart init(1M) (err=%d): system reboot required", err);
324
325 if (!INGLOBALZONE(p)) {
326 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
327 "(pid %d, err=%d): zoneadm(1M) boot required",
328 p->p_zone->zone_name, p->p_pid, err);
329 }
330
331 return (-1);
332 }
333
334 /*
335 * Release resources.
336 * Enter zombie state.
337 * Wake up parent and init processes,
338 * and dispose of children.
339 */
340 void
341 exit(int why, int what)
342 {
343 /*
344 * If proc_exit() fails, then some other lwp in the process
345 * got there first. We just have to call lwp_exit() to allow
346 * the other lwp to finish exiting the process. Otherwise we're
347 * restarting init, and should return.
348 */
349 if (proc_exit(why, what) != 0) {
350 mutex_enter(&curproc->p_lock);
351 ASSERT(curproc->p_flag & SEXITLWPS);
352 lwp_exit();
353 /* NOTREACHED */
354 }
355 }
356
357 /*
358 * Set the SEXITING flag on the process, after making sure /proc does
359 * not have it locked. This is done in more places than proc_exit(),
360 * so it is a separate function.
361 */
362 void
363 proc_is_exiting(proc_t *p)
364 {
365 mutex_enter(&p->p_lock);
366 prbarrier(p);
367 p->p_flag |= SEXITING;
368 mutex_exit(&p->p_lock);
369 }
370
371 /*
372 * Return true if zone's init is restarted, false if exit processing should
373 * proceeed.
374 */
375 static boolean_t
376 zone_init_exit(zone_t *z, int why, int what)
377 {
378 /*
379 * Typically we don't let the zone's init exit unless zone_start_init()
380 * failed its exec, or we are shutting down the zone or the machine,
381 * although the various flags handled within this function will control
382 * the behavior.
383 *
384 * Since we are single threaded, we don't need to lock the following
385 * accesses to zone_proc_initpid.
386 */
387 if (z->zone_boot_err != 0 ||
388 zone_status_get(z) >= ZONE_IS_SHUTTING_DOWN ||
389 zone_status_get(global_zone) >= ZONE_IS_SHUTTING_DOWN) {
390 /*
391 * Clear the zone's init pid and proceed with exit processing.
392 */
393 z->zone_proc_initpid = -1;
394 return (B_FALSE);
395 }
396
397 /*
398 * There are a variety of configuration flags on the zone to control
399 * init exit behavior.
400 *
401 * If the init process should be restarted, the "zone_restart_init"
402 * member will be set.
403 */
404 if (!z->zone_restart_init) {
405 /*
406 * The zone has been setup to halt when init exits.
407 */
408 z->zone_init_status = wstat(why, what);
409 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
410 z->zone_proc_initpid = -1;
411 return (B_FALSE);
412 }
413
414 /*
415 * At this point we know we're configured to restart init, but there
416 * are various modifiers to that behavior.
417 */
418
419 if (z->zone_reboot_on_init_exit) {
420 /*
421 * Some init programs in branded zones do not tolerate a
422 * restart in the traditional manner; setting
423 * "zone_reboot_on_init_exit" will cause the entire zone to be
424 * rebooted instead.
425 */
426
427 if (z->zone_restart_init_0) {
428 /*
429 * Some init programs in branded zones only want to
430 * restart if they exit 0, otherwise the zone should
431 * shutdown. Setting the "zone_restart_init_0" member
432 * controls this behavior.
433 */
434 if (why == CLD_EXITED && what == 0) {
435 /* Trigger a zone reboot */
436 (void) zone_kadmin(A_REBOOT, 0, NULL,
437 zone_kcred());
438 } else {
439 /* Shutdown instead of reboot */
440 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
441 zone_kcred());
442 }
443 } else {
444 /* Trigger a zone reboot */
445 (void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred());
446 }
447
448 z->zone_init_status = wstat(why, what);
449 z->zone_proc_initpid = -1;
450 return (B_FALSE);
451 }
452
453 if (z->zone_restart_init_0) {
454 /*
455 * Some init programs in branded zones only want to restart if
456 * they exit 0, otherwise the zone should shutdown. Setting the
457 * "zone_restart_init_0" member controls this behavior.
458 *
459 * In this case we only restart init if it exited successfully.
460 */
461 if (why == CLD_EXITED && what == 0 &&
462 restart_init(what, why) == 0) {
463 return (B_TRUE);
464 }
465 } else {
466 /*
467 * No restart modifiers on the zone, attempt to restart init.
468 */
469 if (restart_init(what, why) == 0) {
470 return (B_TRUE);
471 }
472 }
473
474 /*
475 * The restart failed, or the criteria for a restart are not met;
476 * the zone will shut down.
477 */
478 z->zone_init_status = wstat(why, what);
479 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
480 z->zone_proc_initpid = -1;
481 return (B_FALSE);
482 }
483
484 /*
485 * Return value:
486 * 1 - exitlwps() failed, call (or continue) lwp_exit()
487 * 0 - restarting init. Return through system call path
488 */
489 int
490 proc_exit(int why, int what)
491 {
492 kthread_t *t = curthread;
493 klwp_t *lwp = ttolwp(t);
494 proc_t *p = ttoproc(t);
495 zone_t *z = p->p_zone;
496 timeout_id_t tmp_id;
497 int rv;
498 proc_t *q;
499 task_t *tk;
500 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
501 sigqueue_t *sqp;
502 lwpdir_t *lwpdir;
503 uint_t lwpdir_sz;
504 tidhash_t *tidhash;
505 uint_t tidhash_sz;
506 ret_tidhash_t *ret_tidhash;
507 refstr_t *cwd;
508 hrtime_t hrutime, hrstime;
509 int evaporate;
510
511 /*
512 * Stop and discard the process's lwps except for the current one,
513 * unless some other lwp beat us to it. If exitlwps() fails then
514 * return and the calling lwp will call (or continue in) lwp_exit().
515 */
516 proc_is_exiting(p);
517 if (exitlwps(0) != 0)
518 return (1);
519
520 mutex_enter(&p->p_lock);
521 if (p->p_ttime > 0) {
522 /*
523 * Account any remaining ticks charged to this process
524 * on its way out.
525 */
526 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
527 p->p_ttime = 0;
528 }
529 mutex_exit(&p->p_lock);
530
531 /*
532 * Don't let init exit unless zone_start_init() failed its exec, or
533 * we are shutting down the zone or the machine.
534 *
535 * Since we are single threaded, we don't need to lock the
536 * following accesses to zone_proc_initpid.
537 */
538 if (p->p_pid == z->zone_proc_initpid) {
539 /* If zone's init restarts, we're done here. */
540 if (zone_init_exit(z, why, what))
541 return (0);
542 }
543
544 /*
545 * Delay firing probes (and performing brand cleanup) until after the
546 * zone_proc_initpid check. Cases which result in zone shutdown or
547 * restart via zone_kadmin eventually result in a call back to
548 * proc_exit.
549 */
550 DTRACE_PROC(lwp__exit);
551 DTRACE_PROC1(exit, int, why);
552
553 /*
554 * Will perform any brand specific proc exit processing. Since this
555 * is always the last lwp, will also perform lwp exit/free and proc
556 * exit. Brand data will be freed when the process is reaped.
557 */
558 if (PROC_IS_BRANDED(p)) {
559 BROP(p)->b_lwpexit(lwp);
560 BROP(p)->b_proc_exit(p);
561 /*
562 * To ensure that b_proc_exit has access to brand-specific data
563 * contained by the one remaining lwp, call the freelwp hook as
564 * the last part of this clean-up process.
565 */
566 BROP(p)->b_freelwp(lwp);
567 lwp_detach_brand_hdlrs(lwp);
568 }
569
570 lwp_pcb_exit();
571
572 /*
573 * Allocate a sigqueue now, before we grab locks.
574 * It will be given to sigcld(), below.
575 * Special case: If we will be making the process disappear
576 * without a trace because it is either:
577 * * an exiting SSYS process, or
578 * * a posix_spawn() vfork child who requests it,
579 * we don't bother to allocate a useless sigqueue.
580 */
581 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
582 why == CLD_EXITED && what == _EVAPORATE);
583 if (!evaporate)
584 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
585
586 /*
587 * revoke any doors created by the process.
588 */
589 if (p->p_door_list)
590 door_exit();
591
592 /*
593 * Release schedctl data structures.
594 */
595 if (p->p_pagep)
596 schedctl_proc_cleanup();
597
598 /*
599 * make sure all pending kaio has completed.
600 */
601 if (p->p_aio)
602 aio_cleanup_exit();
603
604 /*
605 * discard the lwpchan cache.
606 */
607 if (p->p_lcp != NULL)
608 lwpchan_destroy_cache(0);
609
610 /*
611 * Clean up any DTrace helper actions or probes for the process.
612 */
613 if (p->p_dtrace_helpers != NULL) {
614 ASSERT(dtrace_helpers_cleanup != NULL);
615 (*dtrace_helpers_cleanup)(p);
616 }
617
618 /*
619 * Clean up any signalfd state for the process.
620 */
621 if (p->p_sigfd != NULL) {
622 VERIFY(sigfd_exit_helper != NULL);
623 (*sigfd_exit_helper)();
624 }
625
626 /* untimeout the realtime timers */
627 if (p->p_itimer != NULL)
628 timer_exit();
629
630 if ((tmp_id = p->p_alarmid) != 0) {
631 p->p_alarmid = 0;
632 (void) untimeout(tmp_id);
633 }
634
635 /*
636 * If we had generated any upanic(2) state, free that now.
637 */
638 if (p->p_upanic != NULL) {
639 kmem_free(p->p_upanic, PRUPANIC_BUFLEN);
640 p->p_upanic = NULL;
641 }
642
643 /*
644 * Remove any fpollinfo_t's for this (last) thread from our file
645 * descriptors so closeall() can ASSERT() that they're all gone.
646 */
647 pollcleanup();
648
649 if (p->p_rprof_cyclic != CYCLIC_NONE) {
650 mutex_enter(&cpu_lock);
651 cyclic_remove(p->p_rprof_cyclic);
652 mutex_exit(&cpu_lock);
653 }
654
655 mutex_enter(&p->p_lock);
656
657 /*
658 * Clean up any DTrace probes associated with this process.
659 */
660 if (p->p_dtrace_probes) {
661 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
662 dtrace_fasttrap_exit_ptr(p);
663 }
664
665 while ((tmp_id = p->p_itimerid) != 0) {
666 p->p_itimerid = 0;
667 mutex_exit(&p->p_lock);
668 (void) untimeout(tmp_id);
669 mutex_enter(&p->p_lock);
670 }
671
672 lwp_cleanup();
673
674 /*
675 * We are about to exit; prevent our resource associations from
676 * being changed.
677 */
678 pool_barrier_enter();
679
680 /*
681 * Block the process against /proc now that we have really
682 * acquired p->p_lock (to manipulate p_tlist at least).
683 */
684 prbarrier(p);
685
686 sigfillset(&p->p_ignore);
687 sigemptyset(&p->p_siginfo);
688 sigemptyset(&p->p_sig);
689 sigemptyset(&p->p_extsig);
690 sigemptyset(&t->t_sig);
691 sigemptyset(&t->t_extsig);
692 sigemptyset(&p->p_sigmask);
693 sigdelq(p, t, 0);
694 lwp->lwp_cursig = 0;
695 lwp->lwp_extsig = 0;
696 p->p_flag &= ~(SKILLED | SEXTKILLED);
697 if (lwp->lwp_curinfo) {
698 siginfofree(lwp->lwp_curinfo);
699 lwp->lwp_curinfo = NULL;
700 }
701
702 t->t_proc_flag |= TP_LWPEXIT;
703 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
704 prlwpexit(t); /* notify /proc */
705 lwp_hash_out(p, t->t_tid);
706 prexit(p);
707
708 p->p_lwpcnt = 0;
709 p->p_tlist = NULL;
710 sigqfree(p);
711 term_mstate(t);
712 p->p_mterm = gethrtime();
713
714 exec_vp = p->p_exec;
715 execdir_vp = p->p_execdir;
716 p->p_exec = NULLVP;
717 p->p_execdir = NULLVP;
718 mutex_exit(&p->p_lock);
719
720 pr_free_watched_pages(p);
721
722 closeall(P_FINFO(p));
723
724 /* Free the controlling tty. (freectty() always assumes curproc.) */
725 ASSERT(p == curproc);
726 (void) freectty(B_TRUE);
727
728 #if defined(__sparc)
729 if (p->p_utraps != NULL)
730 utrap_free(p);
731 #endif
732 if (p->p_semacct) /* IPC semaphore exit */
733 semexit(p);
734 rv = wstat(why, what);
735
736 acct(rv);
737 exacct_commit_proc(p, rv);
738
739 /*
740 * Release any resources associated with C2 auditing
741 */
742 if (AU_AUDITING()) {
743 /*
744 * audit exit system call
745 */
746 audit_exit(why, what);
747 }
748
749 /*
750 * Free address space.
751 */
752 relvm();
753
754 if (exec_vp) {
755 /*
756 * Close this executable which has been opened when the process
757 * was created by getproc().
758 */
759 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
760 VN_RELE(exec_vp);
761 }
762 if (execdir_vp)
763 VN_RELE(execdir_vp);
764
765 /*
766 * Release held contracts.
767 */
768 contract_exit(p);
769
770 /*
771 * Depart our encapsulating process contract.
772 */
773 if ((p->p_flag & SSYS) == 0) {
774 ASSERT(p->p_ct_process);
775 contract_process_exit(p->p_ct_process, p, rv);
776 }
777
778 /*
779 * Remove pool association, and block if requested by pool_do_bind.
780 */
781 mutex_enter(&p->p_lock);
782 ASSERT(p->p_pool->pool_ref > 0);
783 atomic_dec_32(&p->p_pool->pool_ref);
784 p->p_pool = pool_default;
785 /*
786 * Now that our address space has been freed and all other threads
787 * in this process have exited, set the PEXITED pool flag. This
788 * tells the pools subsystems to ignore this process if it was
789 * requested to rebind this process to a new pool.
790 */
791 p->p_poolflag |= PEXITED;
792 pool_barrier_exit();
793 mutex_exit(&p->p_lock);
794
795 mutex_enter(&pidlock);
796
797 /*
798 * Delete this process from the newstate list of its parent. We
799 * will put it in the right place in the sigcld in the end.
800 */
801 delete_ns(p->p_parent, p);
802
803 /*
804 * Reassign the orphans to the next of kin.
805 * Don't rearrange init's orphanage.
806 */
807 if ((q = p->p_orphan) != NULL && p != proc_init) {
808
809 proc_t *nokp = p->p_nextofkin;
810
811 for (;;) {
812 q->p_nextofkin = nokp;
813 if (q->p_nextorph == NULL)
814 break;
815 q = q->p_nextorph;
816 }
817 q->p_nextorph = nokp->p_orphan;
818 nokp->p_orphan = p->p_orphan;
819 p->p_orphan = NULL;
820 }
821
822 /*
823 * Reassign the children to init.
824 * Don't try to assign init's children to init.
825 */
826 if ((q = p->p_child) != NULL && p != proc_init) {
827 struct proc *np;
828 struct proc *initp = proc_init;
829 pid_t zone_initpid = 1;
830 struct proc *zoneinitp = NULL;
831 boolean_t setzonetop = B_FALSE;
832
833 if (!INGLOBALZONE(curproc)) {
834 zone_initpid = curproc->p_zone->zone_proc_initpid;
835
836 ASSERT(MUTEX_HELD(&pidlock));
837 zoneinitp = prfind(zone_initpid);
838 if (zoneinitp != NULL) {
839 initp = zoneinitp;
840 } else {
841 zone_initpid = 1;
842 setzonetop = B_TRUE;
843 }
844 }
845
846 pgdetach(p);
847
848 do {
849 np = q->p_sibling;
850 /*
851 * Delete it from its current parent new state
852 * list and add it to init new state list
853 */
854 delete_ns(q->p_parent, q);
855
856 q->p_ppid = zone_initpid;
857
858 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
859 if (setzonetop) {
860 mutex_enter(&q->p_lock);
861 q->p_flag |= SZONETOP;
862 mutex_exit(&q->p_lock);
863 }
864 q->p_parent = initp;
865
866 /*
867 * Since q will be the first child,
868 * it will not have a previous sibling.
869 */
870 q->p_psibling = NULL;
871 if (initp->p_child) {
872 initp->p_child->p_psibling = q;
873 }
874 q->p_sibling = initp->p_child;
875 initp->p_child = q;
876 if (q->p_proc_flag & P_PR_PTRACE) {
877 mutex_enter(&q->p_lock);
878 sigtoproc(q, NULL, SIGKILL);
879 mutex_exit(&q->p_lock);
880 }
881 /*
882 * sigcld() will add the child to parents
883 * newstate list.
884 */
885 if (q->p_stat == SZOMB)
886 sigcld(q, NULL);
887 } while ((q = np) != NULL);
888
889 p->p_child = NULL;
890 ASSERT(p->p_child_ns == NULL);
891 }
892
893 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
894
895 mutex_enter(&p->p_lock);
896 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
897
898 /*
899 * Have our task accummulate our resource usage data before they
900 * become contaminated by p_cacct etc., and before we renounce
901 * membership of the task.
902 *
903 * We do this regardless of whether or not task accounting is active.
904 * This is to avoid having nonsense data reported for this task if
905 * task accounting is subsequently enabled. The overhead is minimal;
906 * by this point, this process has accounted for the usage of all its
907 * LWPs. We nonetheless do the work here, and under the protection of
908 * pidlock, so that the movement of the process's usage to the task
909 * happens at the same time as the removal of the process from the
910 * task, from the point of view of exacct_snapshot_task_usage().
911 */
912 exacct_update_task_mstate(p);
913
914 hrutime = mstate_aggr_state(p, LMS_USER);
915 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
916 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
917 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
918
919 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
920 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
921 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
922 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
923 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
924 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
925 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
926 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
927 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
928 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
929
930 p->p_ru.minflt += p->p_cru.minflt;
931 p->p_ru.majflt += p->p_cru.majflt;
932 p->p_ru.nswap += p->p_cru.nswap;
933 p->p_ru.inblock += p->p_cru.inblock;
934 p->p_ru.oublock += p->p_cru.oublock;
935 p->p_ru.msgsnd += p->p_cru.msgsnd;
936 p->p_ru.msgrcv += p->p_cru.msgrcv;
937 p->p_ru.nsignals += p->p_cru.nsignals;
938 p->p_ru.nvcsw += p->p_cru.nvcsw;
939 p->p_ru.nivcsw += p->p_cru.nivcsw;
940 p->p_ru.sysc += p->p_cru.sysc;
941 p->p_ru.ioch += p->p_cru.ioch;
942
943 p->p_stat = SZOMB;
944 p->p_proc_flag &= ~P_PR_PTRACE;
945 p->p_wdata = what;
946 p->p_wcode = (char)why;
947
948 cdir = PTOU(p)->u_cdir;
949 rdir = PTOU(p)->u_rdir;
950 cwd = PTOU(p)->u_cwd;
951
952 ASSERT(cdir != NULL || p->p_parent == &p0);
953
954 /*
955 * Release resource controls, as they are no longer enforceable.
956 */
957 rctl_set_free(p->p_rctls);
958
959 /*
960 * Decrement tk_nlwps counter for our task.max-lwps resource control.
961 * An extended accounting record, if that facility is active, is
962 * scheduled to be written. We cannot give up task and project
963 * membership at this point because that would allow zombies to escape
964 * from the max-processes resource controls. Zombies stay in their
965 * current task and project until the process table slot is released
966 * in freeproc().
967 */
968 tk = p->p_task;
969
970 mutex_enter(&p->p_zone->zone_nlwps_lock);
971 tk->tk_nlwps--;
972 tk->tk_proj->kpj_nlwps--;
973 p->p_zone->zone_nlwps--;
974 mutex_exit(&p->p_zone->zone_nlwps_lock);
975
976 /*
977 * Clear the lwp directory and the lwpid hash table
978 * now that /proc can't bother us any more.
979 * We free the memory below, after dropping p->p_lock.
980 */
981 lwpdir = p->p_lwpdir;
982 lwpdir_sz = p->p_lwpdir_sz;
983 tidhash = p->p_tidhash;
984 tidhash_sz = p->p_tidhash_sz;
985 ret_tidhash = p->p_ret_tidhash;
986 p->p_lwpdir = NULL;
987 p->p_lwpfree = NULL;
988 p->p_lwpdir_sz = 0;
989 p->p_tidhash = NULL;
990 p->p_tidhash_sz = 0;
991 p->p_ret_tidhash = NULL;
992
993 /*
994 * If the process has context ops installed, call the exit routine
995 * on behalf of this last remaining thread. Normally exitpctx() is
996 * called during thread_exit() or lwp_exit(), but because this is the
997 * last thread in the process, we must call it here. By the time
998 * thread_exit() is called (below), the association with the relevant
999 * process has been lost.
1000 *
1001 * We also free the context here.
1002 */
1003 if (p->p_pctx) {
1004 kpreempt_disable();
1005 exitpctx(p);
1006 kpreempt_enable();
1007
1008 freepctx(p, 0);
1009 }
1010
1011 /*
1012 * curthread's proc pointer is changed to point to the 'sched'
1013 * process for the corresponding zone, except in the case when
1014 * the exiting process is in fact a zsched instance, in which
1015 * case the proc pointer is set to p0. We do so, so that the
1016 * process still points at the right zone when we call the VN_RELE()
1017 * below.
1018 *
1019 * This is because curthread's original proc pointer can be freed as
1020 * soon as the child sends a SIGCLD to its parent. We use zsched so
1021 * that for user processes, even in the final moments of death, the
1022 * process is still associated with its zone.
1023 */
1024 if (p != t->t_procp->p_zone->zone_zsched)
1025 t->t_procp = t->t_procp->p_zone->zone_zsched;
1026 else
1027 t->t_procp = &p0;
1028
1029 mutex_exit(&p->p_lock);
1030 if (!evaporate) {
1031 /*
1032 * The brand specific code only happens when the brand has a
1033 * function to call in place of sigcld and the parent of the
1034 * exiting process is not the global zone init. If the parent
1035 * is the global zone init, then the process was reparented,
1036 * and we don't want brand code delivering possibly strange
1037 * signals to init. Also, init is not branded, so any brand
1038 * specific exit data will not be picked up by init anyway.
1039 */
1040 if (PROC_IS_BRANDED(p) &&
1041 BROP(p)->b_exit_with_sig != NULL &&
1042 p->p_ppid != 1) {
1043 /*
1044 * The code for _fini that could unload the brand_t
1045 * blocks until the count of zones using the module
1046 * reaches zero. Zones decrement the refcount on their
1047 * brands only after all user tasks in that zone have
1048 * exited and been waited on. The decrement on the
1049 * brand's refcount happen in zone_destroy(). That
1050 * depends on zone_shutdown() having been completed.
1051 * zone_shutdown() includes a call to zone_empty(),
1052 * where the zone waits for itself to reach the state
1053 * ZONE_IS_EMPTY. This state is only set in either
1054 * zone_shutdown(), when there are no user processes as
1055 * the zone enters this function, or in
1056 * zone_task_rele(). zone_task_rele() is called from
1057 * code triggered by waiting on processes, not by the
1058 * processes exiting through proc_exit(). This means
1059 * all the branded processes that could exist for a
1060 * specific brand_t must exit and get reaped before the
1061 * refcount on the brand_t can reach 0. _fini will
1062 * never unload the corresponding brand module before
1063 * proc_exit finishes execution for all processes
1064 * branded with a particular brand_t, which makes the
1065 * operation below safe to do. Brands that wish to use
1066 * this mechanism must wait in _fini as described
1067 * above.
1068 */
1069 BROP(p)->b_exit_with_sig(p, sqp);
1070 } else {
1071 p->p_pidflag &= ~CLDPEND;
1072 sigcld(p, sqp);
1073 }
1074
1075 } else {
1076 /*
1077 * Do what sigcld() would do if the disposition
1078 * of the SIGCHLD signal were set to be ignored.
1079 */
1080 cv_broadcast(&p->p_srwchan_cv);
1081 freeproc(p);
1082 }
1083 mutex_exit(&pidlock);
1084
1085 /*
1086 * We don't release u_cdir and u_rdir until SZOMB is set.
1087 * This protects us against dofusers().
1088 */
1089 if (cdir)
1090 VN_RELE(cdir);
1091 if (rdir)
1092 VN_RELE(rdir);
1093 if (cwd)
1094 refstr_rele(cwd);
1095
1096 /*
1097 * task_rele() may ultimately cause the zone to go away (or
1098 * may cause the last user process in a zone to go away, which
1099 * signals zsched to go away). So prior to this call, we must
1100 * no longer point at zsched.
1101 */
1102 t->t_procp = &p0;
1103
1104 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
1105 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
1106 while (ret_tidhash != NULL) {
1107 ret_tidhash_t *next = ret_tidhash->rth_next;
1108 kmem_free(ret_tidhash->rth_tidhash,
1109 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
1110 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
1111 ret_tidhash = next;
1112 }
1113
1114 thread_exit();
1115 /* NOTREACHED */
1116 }
1117
1118 /*
1119 * Format siginfo structure for wait system calls.
1120 */
1121 void
1122 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
1123 {
1124 ASSERT(MUTEX_HELD(&pidlock));
1125
1126 bzero(ip, sizeof (k_siginfo_t));
1127 ip->si_signo = SIGCLD;
1128 ip->si_code = pp->p_wcode;
1129 ip->si_pid = pp->p_pid;
1130 ip->si_ctid = PRCTID(pp);
1131 ip->si_zoneid = pp->p_zone->zone_id;
1132 ip->si_status = pp->p_wdata;
1133 ip->si_stime = pp->p_stime;
1134 ip->si_utime = pp->p_utime;
1135
1136 if (waitflag) {
1137 pp->p_wcode = 0;
1138 pp->p_wdata = 0;
1139 pp->p_pidflag &= ~CLDPEND;
1140 }
1141 }
1142
1143 /*
1144 * Wait system call.
1145 * Search for a terminated (zombie) child,
1146 * finally lay it to rest, and collect its status.
1147 * Look also for stopped children,
1148 * and pass back status from them.
1149 */
1150 int
1151 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1152 {
1153 proc_t *cp, *pp;
1154 int waitflag = !(options & WNOWAIT);
1155 boolean_t have_brand_helper = B_FALSE;
1156
1157 /*
1158 * Obsolete flag, defined here only for binary compatibility
1159 * with old statically linked executables. Delete this when
1160 * we no longer care about these old and broken applications.
1161 */
1162 #define _WNOCHLD 0400
1163 options &= ~_WNOCHLD;
1164
1165 if (options == 0 || (options & ~WOPTMASK))
1166 return (EINVAL);
1167
1168 switch (idtype) {
1169 case P_PID:
1170 case P_PGID:
1171 if (id < 0 || id >= maxpid)
1172 return (EINVAL);
1173 /* FALLTHROUGH */
1174 case P_ALL:
1175 break;
1176 default:
1177 return (EINVAL);
1178 }
1179
1180 pp = ttoproc(curthread);
1181
1182 /*
1183 * Anytime you are looking for a process, you take pidlock to prevent
1184 * things from changing as you look.
1185 */
1186 mutex_enter(&pidlock);
1187
1188 /*
1189 * if we are only looking for exited processes and child_ns list
1190 * is empty no reason to look at all children.
1191 */
1192 if (idtype == P_ALL &&
1193 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1194 pp->p_child_ns == NULL) {
1195 if (pp->p_child) {
1196 mutex_exit(&pidlock);
1197 bzero(ip, sizeof (k_siginfo_t));
1198 return (0);
1199 }
1200 mutex_exit(&pidlock);
1201 return (ECHILD);
1202 }
1203
1204 if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1205 have_brand_helper = B_TRUE;
1206 }
1207
1208 while (pp->p_child != NULL || have_brand_helper) {
1209 boolean_t brand_wants_wait = B_FALSE;
1210 int proc_gone = 0;
1211 int found = 0;
1212
1213 /*
1214 * Give the brand a chance to return synthetic results from
1215 * this waitid() call before we do the real thing.
1216 */
1217 if (have_brand_helper) {
1218 int ret;
1219
1220 if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1221 &brand_wants_wait, &ret) == 0) {
1222 mutex_exit(&pidlock);
1223 return (ret);
1224 }
1225
1226 if (pp->p_child == NULL) {
1227 goto no_real_children;
1228 }
1229 }
1230
1231 /*
1232 * Look for interesting children in the newstate list.
1233 */
1234 VERIFY(pp->p_child != NULL);
1235 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1236 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1237 continue;
1238 if (idtype == P_PID && id != cp->p_pid)
1239 continue;
1240 if (idtype == P_PGID && id != cp->p_pgrp)
1241 continue;
1242 if (PROC_IS_BRANDED(pp)) {
1243 if (BROP(pp)->b_wait_filter != NULL &&
1244 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1245 continue;
1246 }
1247
1248 switch (cp->p_wcode) {
1249
1250 case CLD_TRAPPED:
1251 case CLD_STOPPED:
1252 case CLD_CONTINUED:
1253 cmn_err(CE_PANIC,
1254 "waitid: wrong state %d on the p_newstate"
1255 " list", cp->p_wcode);
1256 break;
1257
1258 case CLD_EXITED:
1259 case CLD_DUMPED:
1260 case CLD_KILLED:
1261 if (!(options & WEXITED)) {
1262 /*
1263 * Count how many are already gone
1264 * for good.
1265 */
1266 proc_gone++;
1267 break;
1268 }
1269 if (!waitflag) {
1270 winfo(cp, ip, 0);
1271 } else {
1272 winfo(cp, ip, 1);
1273 freeproc(cp);
1274 }
1275 mutex_exit(&pidlock);
1276 if (waitflag) { /* accept SIGCLD */
1277 sigcld_delete(ip);
1278 sigcld_repost();
1279 }
1280 return (0);
1281 }
1282
1283 if (idtype == P_PID)
1284 break;
1285 }
1286
1287 /*
1288 * Wow! None of the threads on the p_sibling_ns list were
1289 * interesting threads. Check all the kids!
1290 */
1291 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1292 if (idtype == P_PID && id != cp->p_pid)
1293 continue;
1294 if (idtype == P_PGID && id != cp->p_pgrp)
1295 continue;
1296 if (PROC_IS_BRANDED(pp)) {
1297 if (BROP(pp)->b_wait_filter != NULL &&
1298 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1299 continue;
1300 }
1301
1302 switch (cp->p_wcode) {
1303 case CLD_TRAPPED:
1304 if (!(options & WTRAPPED))
1305 break;
1306 winfo(cp, ip, waitflag);
1307 mutex_exit(&pidlock);
1308 if (waitflag) { /* accept SIGCLD */
1309 sigcld_delete(ip);
1310 sigcld_repost();
1311 }
1312 return (0);
1313
1314 case CLD_STOPPED:
1315 if (!(options & WSTOPPED))
1316 break;
1317 /* Is it still stopped? */
1318 mutex_enter(&cp->p_lock);
1319 if (!jobstopped(cp)) {
1320 mutex_exit(&cp->p_lock);
1321 break;
1322 }
1323 mutex_exit(&cp->p_lock);
1324 winfo(cp, ip, waitflag);
1325 mutex_exit(&pidlock);
1326 if (waitflag) { /* accept SIGCLD */
1327 sigcld_delete(ip);
1328 sigcld_repost();
1329 }
1330 return (0);
1331
1332 case CLD_CONTINUED:
1333 if (!(options & WCONTINUED))
1334 break;
1335 winfo(cp, ip, waitflag);
1336 mutex_exit(&pidlock);
1337 if (waitflag) { /* accept SIGCLD */
1338 sigcld_delete(ip);
1339 sigcld_repost();
1340 }
1341 return (0);
1342
1343 case CLD_EXITED:
1344 case CLD_DUMPED:
1345 case CLD_KILLED:
1346 if (idtype != P_PID &&
1347 (cp->p_pidflag & CLDWAITPID))
1348 continue;
1349 /*
1350 * Don't complain if a process was found in
1351 * the first loop but we broke out of the loop
1352 * because of the arguments passed to us.
1353 */
1354 if (proc_gone == 0) {
1355 cmn_err(CE_PANIC,
1356 "waitid: wrong state on the"
1357 " p_child list");
1358 } else {
1359 break;
1360 }
1361 }
1362
1363 found++;
1364
1365 if (idtype == P_PID)
1366 break;
1367 }
1368
1369 no_real_children:
1370 /*
1371 * If we found no interesting processes at all,
1372 * break out and return ECHILD.
1373 */
1374 if (!brand_wants_wait && (found + proc_gone == 0))
1375 break;
1376
1377 if (options & WNOHANG) {
1378 mutex_exit(&pidlock);
1379 bzero(ip, sizeof (k_siginfo_t));
1380 /*
1381 * We should set ip->si_signo = SIGCLD,
1382 * but there is an SVVS test that expects
1383 * ip->si_signo to be zero in this case.
1384 */
1385 return (0);
1386 }
1387
1388 /*
1389 * If we found no processes of interest that could
1390 * change state while we wait, we don't wait at all.
1391 * Get out with ECHILD according to SVID.
1392 */
1393 if (!brand_wants_wait && (found == proc_gone))
1394 break;
1395
1396 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1397 mutex_exit(&pidlock);
1398 return (EINTR);
1399 }
1400 }
1401 mutex_exit(&pidlock);
1402 return (ECHILD);
1403 }
1404
1405 int
1406 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1407 {
1408 int error;
1409 k_siginfo_t info;
1410
1411 if (error = waitid(idtype, id, &info, options))
1412 return (set_errno(error));
1413 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1414 return (set_errno(EFAULT));
1415 return (0);
1416 }
1417
1418 #ifdef _SYSCALL32_IMPL
1419
1420 int
1421 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1422 {
1423 int error;
1424 k_siginfo_t info;
1425 siginfo32_t info32;
1426
1427 if (error = waitid(idtype, id, &info, options))
1428 return (set_errno(error));
1429 siginfo_kto32(&info, &info32);
1430 if (copyout(&info32, infop, sizeof (info32)))
1431 return (set_errno(EFAULT));
1432 return (0);
1433 }
1434
1435 #endif /* _SYSCALL32_IMPL */
1436
1437 void
1438 proc_detach(proc_t *p)
1439 {
1440 proc_t *q;
1441
1442 ASSERT(MUTEX_HELD(&pidlock));
1443
1444 q = p->p_parent;
1445 ASSERT(q != NULL);
1446
1447 /*
1448 * Take it off the newstate list of its parent
1449 */
1450 delete_ns(q, p);
1451
1452 if (q->p_child == p) {
1453 q->p_child = p->p_sibling;
1454 /*
1455 * If the parent has no children, it better not
1456 * have any with new states either!
1457 */
1458 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1459 }
1460
1461 if (p->p_sibling) {
1462 p->p_sibling->p_psibling = p->p_psibling;
1463 }
1464
1465 if (p->p_psibling) {
1466 p->p_psibling->p_sibling = p->p_sibling;
1467 }
1468 }
1469
1470 /*
1471 * Remove zombie children from the process table.
1472 */
1473 void
1474 freeproc(proc_t *p)
1475 {
1476 proc_t *q;
1477 task_t *tk;
1478
1479 ASSERT(p->p_stat == SZOMB);
1480 ASSERT(p->p_tlist == NULL);
1481 ASSERT(MUTEX_HELD(&pidlock));
1482
1483 sigdelq(p, NULL, 0);
1484 if (p->p_killsqp) {
1485 siginfofree(p->p_killsqp);
1486 p->p_killsqp = NULL;
1487 }
1488
1489 /* Clear any remaining brand data */
1490 if (PROC_IS_BRANDED(p)) {
1491 brand_clearbrand(p, B_FALSE);
1492 }
1493
1494
1495 prfree(p); /* inform /proc */
1496
1497 /*
1498 * Don't free the init processes.
1499 * Other dying processes will access it.
1500 */
1501 if (p == proc_init)
1502 return;
1503
1504
1505 /*
1506 * We wait until now to free the cred structure because a
1507 * zombie process's credentials may be examined by /proc.
1508 * No cred locking needed because there are no threads at this point.
1509 */
1510 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1511 crfree(p->p_cred);
1512 if (p->p_corefile != NULL) {
1513 corectl_path_rele(p->p_corefile);
1514 p->p_corefile = NULL;
1515 }
1516 if (p->p_content != NULL) {
1517 corectl_content_rele(p->p_content);
1518 p->p_content = NULL;
1519 }
1520
1521 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1522 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1523 /*
1524 * This should still do the right thing since p_utime/stime
1525 * get set to the correct value on process exit, so it
1526 * should get properly updated
1527 */
1528 p->p_nextofkin->p_cutime += p->p_utime;
1529 p->p_nextofkin->p_cstime += p->p_stime;
1530
1531 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1532 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1533 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1534 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1535 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1536 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1537 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1538 += p->p_acct[LMS_USER_LOCK];
1539 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1540 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1541 += p->p_acct[LMS_WAIT_CPU];
1542 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1543
1544 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1545 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1546 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1547 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1548 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1549 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1550 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1551 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1552 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1553 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1554 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1555 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1556
1557 }
1558
1559 q = p->p_nextofkin;
1560 if (q && q->p_orphan == p)
1561 q->p_orphan = p->p_nextorph;
1562 else if (q) {
1563 for (q = q->p_orphan; q; q = q->p_nextorph)
1564 if (q->p_nextorph == p)
1565 break;
1566 ASSERT(q && q->p_nextorph == p);
1567 q->p_nextorph = p->p_nextorph;
1568 }
1569
1570 /*
1571 * The process table slot is being freed, so it is now safe to give up
1572 * task and project membership.
1573 */
1574 mutex_enter(&p->p_lock);
1575 tk = p->p_task;
1576 task_detach(p);
1577 mutex_exit(&p->p_lock);
1578
1579 proc_detach(p);
1580 pid_exit(p, tk); /* frees pid and proc structure */
1581
1582 task_rele(tk);
1583 }
1584
1585 /*
1586 * Delete process "child" from the newstate list of process "parent"
1587 */
1588 void
1589 delete_ns(proc_t *parent, proc_t *child)
1590 {
1591 proc_t **ns;
1592
1593 ASSERT(MUTEX_HELD(&pidlock));
1594 ASSERT(child->p_parent == parent);
1595 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1596 if (*ns == child) {
1597
1598 ASSERT((*ns)->p_parent == parent);
1599
1600 *ns = child->p_sibling_ns;
1601 child->p_sibling_ns = NULL;
1602 return;
1603 }
1604 }
1605 }
1606
1607 /*
1608 * Add process "child" to the new state list of process "parent"
1609 */
1610 void
1611 add_ns(proc_t *parent, proc_t *child)
1612 {
1613 ASSERT(child->p_sibling_ns == NULL);
1614 child->p_sibling_ns = parent->p_child_ns;
1615 parent->p_child_ns = child;
1616 }