1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2018 Joyent, Inc.
25 * Copyright 2020 Oxide Computer Company
26 * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/user.h>
37 #include <sys/errno.h>
38 #include <sys/proc.h>
39 #include <sys/ucontext.h>
40 #include <sys/procfs.h>
41 #include <sys/vnode.h>
42 #include <sys/acct.h>
43 #include <sys/var.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/wait.h>
47 #include <sys/siginfo.h>
48 #include <sys/procset.h>
49 #include <sys/class.h>
50 #include <sys/file.h>
51 #include <sys/session.h>
52 #include <sys/kmem.h>
53 #include <sys/vtrace.h>
54 #include <sys/prsystm.h>
55 #include <sys/ipc.h>
56 #include <sys/sem_impl.h>
57 #include <c2/audit.h>
58 #include <sys/aio_impl.h>
59 #include <vm/as.h>
60 #include <sys/poll.h>
61 #include <sys/door.h>
62 #include <sys/lwpchan_impl.h>
63 #include <sys/utrap.h>
64 #include <sys/task.h>
65 #include <sys/exacct.h>
66 #include <sys/cyclic.h>
67 #include <sys/schedctl.h>
68 #include <sys/rctl.h>
69 #include <sys/contract_impl.h>
70 #include <sys/contract/process_impl.h>
71 #include <sys/list.h>
72 #include <sys/dtrace.h>
73 #include <sys/pool.h>
74 #include <sys/sdt.h>
75 #include <sys/corectl.h>
76 #include <sys/core.h>
77 #include <sys/brand.h>
78 #include <sys/libc_kernel.h>
79
80 /*
81 * convert code/data pair into old style wait status
82 */
83 int
84 wstat(int code, int data)
85 {
86 int stat = (data & 0377);
87
88 switch (code) {
89 case CLD_EXITED:
90 stat <<= 8;
91 break;
92 case CLD_DUMPED:
93 stat |= WCOREFLG;
94 break;
95 case CLD_KILLED:
96 break;
97 case CLD_TRAPPED:
98 case CLD_STOPPED:
99 stat <<= 8;
100 stat |= WSTOPFLG;
101 break;
102 case CLD_CONTINUED:
103 stat = WCONTFLG;
104 break;
105 default:
106 cmn_err(CE_PANIC, "wstat: bad code");
107 /* NOTREACHED */
108 }
109 return (stat);
110 }
111
112 static char *
113 exit_reason(char *buf, size_t bufsz, int what, int why)
114 {
115 switch (why) {
116 case CLD_EXITED:
117 (void) snprintf(buf, bufsz, "exited with status %d", what);
118 break;
119 case CLD_KILLED:
120 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
121 break;
122 case CLD_DUMPED:
123 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
124 break;
125 default:
126 (void) snprintf(buf, bufsz, "encountered unknown error "
127 "(%d, %d)", why, what);
128 break;
129 }
130
131 return (buf);
132 }
133
134 /*
135 * exit system call: pass back caller's arg.
136 */
137 void
138 rexit(int rval)
139 {
140 exit(CLD_EXITED, rval);
141 }
142
143 /*
144 * Bump the init_restarts kstat and let interested parties know about the
145 * restart.
146 */
147 static void
148 restart_init_notify(zone_t *zone)
149 {
150 nvlist_t *nvl = NULL;
151
152 zone->zone_proc_init_restarts++;
153
154 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
155 nvlist_add_uint32(nvl, ZONE_CB_RESTARTS,
156 zone->zone_proc_init_restarts) == 0) {
157 zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS,
158 ZONE_EVENT_INIT_RESTART_SC, nvl);
159 }
160
161 nvlist_free(nvl);
162 }
163
164 /*
165 * Called by proc_exit() when a zone's init exits, presumably because
166 * it failed. As long as the given zone is still in the "running"
167 * state, we will re-exec() init, but first we need to reset things
168 * which are usually inherited across exec() but will break init's
169 * assumption that it is being exec()'d from a virgin process. Most
170 * importantly this includes closing all file descriptors (exec only
171 * closes those marked close-on-exec) and resetting signals (exec only
172 * resets handled signals, and we need to clear any signals which
173 * killed init). Anything else that exec(2) says would be inherited,
174 * but would affect the execution of init, needs to be reset.
175 */
176 static int
177 restart_init(int what, int why)
178 {
179 kthread_t *t = curthread;
180 klwp_t *lwp = ttolwp(t);
181 proc_t *p = ttoproc(t);
182 proc_t *pp = p->p_zone->zone_zsched;
183 user_t *up = PTOU(p);
184
185 vnode_t *oldcd, *oldrd;
186 int i, err;
187 char reason_buf[64];
188
189 /*
190 * Let zone admin (and global zone admin if this is for a non-global
191 * zone) know that init has failed and will be restarted.
192 */
193 zcmn_err(p->p_zone->zone_id, CE_WARN,
194 "init(1M) %s: restarting automatically",
195 exit_reason(reason_buf, sizeof (reason_buf), what, why));
196
197 if (!INGLOBALZONE(p)) {
198 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
199 "restarting automatically",
200 p->p_zone->zone_name, p->p_pid, reason_buf);
201 }
202
203 /*
204 * Remove any fpollinfo_t's for this (last) thread from our file
205 * descriptors so closeall() can ASSERT() that they're all gone.
206 * Then close all open file descriptors in the process.
207 */
208 pollcleanup();
209 closeall(P_FINFO(p));
210
211 /*
212 * Grab p_lock and begin clearing miscellaneous global process
213 * state that needs to be reset before we exec the new init(1M).
214 */
215
216 mutex_enter(&p->p_lock);
217 prbarrier(p);
218
219 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
220 up->u_cmask = CMASK;
221
222 sigemptyset(&t->t_hold);
223 sigemptyset(&t->t_sig);
224 sigemptyset(&t->t_extsig);
225
226 sigemptyset(&p->p_sig);
227 sigemptyset(&p->p_extsig);
228
229 sigdelq(p, t, 0);
230 sigdelq(p, NULL, 0);
231
232 if (p->p_killsqp) {
233 siginfofree(p->p_killsqp);
234 p->p_killsqp = NULL;
235 }
236
237 /*
238 * Reset any signals that are ignored back to the default disposition.
239 * Other u_signal members will be cleared when exec calls sigdefault().
240 */
241 for (i = 1; i < NSIG; i++) {
242 if (up->u_signal[i - 1] == SIG_IGN) {
243 up->u_signal[i - 1] = SIG_DFL;
244 sigemptyset(&up->u_sigmask[i - 1]);
245 }
246 }
247
248 /*
249 * Clear the current signal, any signal info associated with it, and
250 * any signal information from contracts and/or contract templates.
251 */
252 lwp->lwp_cursig = 0;
253 lwp->lwp_extsig = 0;
254 if (lwp->lwp_curinfo != NULL) {
255 siginfofree(lwp->lwp_curinfo);
256 lwp->lwp_curinfo = NULL;
257 }
258 lwp_ctmpl_clear(lwp, B_FALSE);
259
260 /*
261 * Reset both the process root directory and the current working
262 * directory to the root of the zone just as we do during boot.
263 */
264 VN_HOLD(p->p_zone->zone_rootvp);
265 oldrd = up->u_rdir;
266 up->u_rdir = p->p_zone->zone_rootvp;
267
268 VN_HOLD(p->p_zone->zone_rootvp);
269 oldcd = up->u_cdir;
270 up->u_cdir = p->p_zone->zone_rootvp;
271
272 if (up->u_cwd != NULL) {
273 refstr_rele(up->u_cwd);
274 up->u_cwd = NULL;
275 }
276
277 /* Reset security flags */
278 mutex_enter(&pp->p_lock);
279 p->p_secflags = pp->p_secflags;
280 mutex_exit(&pp->p_lock);
281
282 mutex_exit(&p->p_lock);
283
284 if (oldrd != NULL)
285 VN_RELE(oldrd);
286 if (oldcd != NULL)
287 VN_RELE(oldcd);
288
289 /*
290 * It's possible that a zone's init will have become privilege aware
291 * and modified privilege sets; reset them.
292 */
293 cred_t *oldcr, *newcr;
294
295 mutex_enter(&p->p_crlock);
296 oldcr = p->p_cred;
297 mutex_enter(&pp->p_crlock);
298 crhold(newcr = p->p_cred = pp->p_cred);
299 mutex_exit(&pp->p_crlock);
300 mutex_exit(&p->p_crlock);
301 crfree(oldcr);
302 /* Additional hold for the current thread - expected by crset() */
303 crhold(newcr);
304 crset(p, newcr);
305
306 /* Free the controlling tty. (freectty() always assumes curproc.) */
307 ASSERT(p == curproc);
308 (void) freectty(B_TRUE);
309
310 restart_init_notify(p->p_zone);
311
312 /*
313 * Now exec() the new init(1M) on top of the current process. If we
314 * succeed, the caller will treat this like a successful system call.
315 * If we fail, we issue messages and the caller will proceed with exit.
316 */
317 err = exec_init(p->p_zone->zone_initname, NULL);
318
319 if (err == 0)
320 return (0);
321
322 zcmn_err(p->p_zone->zone_id, CE_WARN,
323 "failed to restart init(1M) (err=%d): system reboot required", err);
324
325 if (!INGLOBALZONE(p)) {
326 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
327 "(pid %d, err=%d): zoneadm(1M) boot required",
328 p->p_zone->zone_name, p->p_pid, err);
329 }
330
331 return (-1);
332 }
333
334 /*
335 * Release resources.
336 * Enter zombie state.
337 * Wake up parent and init processes,
338 * and dispose of children.
339 */
340 void
341 exit(int why, int what)
342 {
343 /*
344 * If proc_exit() fails, then some other lwp in the process
345 * got there first. We just have to call lwp_exit() to allow
346 * the other lwp to finish exiting the process. Otherwise we're
347 * restarting init, and should return.
348 */
349 if (proc_exit(why, what) != 0) {
350 mutex_enter(&curproc->p_lock);
351 ASSERT(curproc->p_flag & SEXITLWPS);
352 lwp_exit();
353 /* NOTREACHED */
354 }
355 }
356
357 /*
358 * Set the SEXITING flag on the process, after making sure /proc does
359 * not have it locked. This is done in more places than proc_exit(),
360 * so it is a separate function.
361 */
362 void
363 proc_is_exiting(proc_t *p)
364 {
365 mutex_enter(&p->p_lock);
366 prbarrier(p);
367 p->p_flag |= SEXITING;
368 mutex_exit(&p->p_lock);
369 }
370
371 /*
372 * Return true if zone's init is restarted, false if exit processing should
373 * proceeed.
374 */
375 static boolean_t
376 zone_init_exit(zone_t *z, int why, int what)
377 {
378 /*
379 * Typically we don't let the zone's init exit unless zone_start_init()
380 * failed its exec, or we are shutting down the zone or the machine,
381 * although the various flags handled within this function will control
382 * the behavior.
383 *
384 * Since we are single threaded, we don't need to lock the following
385 * accesses to zone_proc_initpid.
386 */
387 if (z->zone_boot_err != 0 ||
388 zone_status_get(z) >= ZONE_IS_SHUTTING_DOWN ||
389 zone_status_get(global_zone) >= ZONE_IS_SHUTTING_DOWN) {
390 /*
391 * Clear the zone's init pid and proceed with exit processing.
392 */
393 z->zone_proc_initpid = -1;
394 return (B_FALSE);
395 }
396
397 /*
398 * There are a variety of configuration flags on the zone to control
399 * init exit behavior.
400 *
401 * If the init process should be restarted, the "zone_restart_init"
402 * member will be set.
403 */
404 if (!z->zone_restart_init) {
405 /*
406 * The zone has been setup to halt when init exits.
407 */
408 z->zone_init_status = wstat(why, what);
409 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
410 z->zone_proc_initpid = -1;
411 return (B_FALSE);
412 }
413
414 /*
415 * At this point we know we're configured to restart init, but there
416 * are various modifiers to that behavior.
417 */
418
419 if (z->zone_reboot_on_init_exit) {
420 /*
421 * Some init programs in branded zones do not tolerate a
422 * restart in the traditional manner; setting
423 * "zone_reboot_on_init_exit" will cause the entire zone to be
424 * rebooted instead.
425 */
426
427 if (z->zone_restart_init_0) {
428 /*
429 * Some init programs in branded zones only want to
430 * restart if they exit 0, otherwise the zone should
431 * shutdown. Setting the "zone_restart_init_0" member
432 * controls this behavior.
433 */
434 if (why == CLD_EXITED && what == 0) {
435 /* Trigger a zone reboot */
436 (void) zone_kadmin(A_REBOOT, 0, NULL,
437 zone_kcred());
438 } else {
439 /* Shutdown instead of reboot */
440 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
441 zone_kcred());
442 }
443 } else {
444 /* Trigger a zone reboot */
445 (void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred());
446 }
447
448 z->zone_init_status = wstat(why, what);
449 z->zone_proc_initpid = -1;
450 return (B_FALSE);
451 }
452
453 if (z->zone_restart_init_0) {
454 /*
455 * Some init programs in branded zones only want to restart if
456 * they exit 0, otherwise the zone should shutdown. Setting the
457 * "zone_restart_init_0" member controls this behavior.
458 *
459 * In this case we only restart init if it exited successfully.
460 */
461 if (why == CLD_EXITED && what == 0 &&
462 restart_init(what, why) == 0) {
463 return (B_TRUE);
464 }
465 } else {
466 /*
467 * No restart modifiers on the zone, attempt to restart init.
468 */
469 if (restart_init(what, why) == 0) {
470 return (B_TRUE);
471 }
472 }
473
474
475 /*
476 * The restart failed, the zone will shut down.
477 */
478 z->zone_init_status = wstat(why, what);
479 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
480 z->zone_proc_initpid = -1;
481 return (B_FALSE);
482 }
483
484 /*
485 * Return value:
486 * 1 - exitlwps() failed, call (or continue) lwp_exit()
487 * 0 - restarting init. Return through system call path
488 */
489 int
490 proc_exit(int why, int what)
491 {
492 kthread_t *t = curthread;
493 klwp_t *lwp = ttolwp(t);
494 proc_t *p = ttoproc(t);
495 zone_t *z = p->p_zone;
496 timeout_id_t tmp_id;
497 int rv;
498 proc_t *q;
499 task_t *tk;
500 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
501 sigqueue_t *sqp;
502 lwpdir_t *lwpdir;
503 uint_t lwpdir_sz;
504 tidhash_t *tidhash;
505 uint_t tidhash_sz;
506 ret_tidhash_t *ret_tidhash;
507 refstr_t *cwd;
508 hrtime_t hrutime, hrstime;
509 int evaporate;
510
511 /*
512 * Stop and discard the process's lwps except for the current one,
513 * unless some other lwp beat us to it. If exitlwps() fails then
514 * return and the calling lwp will call (or continue in) lwp_exit().
515 */
516 proc_is_exiting(p);
517 if (exitlwps(0) != 0)
518 return (1);
519
520 mutex_enter(&p->p_lock);
521 if (p->p_ttime > 0) {
522 /*
523 * Account any remaining ticks charged to this process
524 * on its way out.
525 */
526 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
527 p->p_ttime = 0;
528 }
529 mutex_exit(&p->p_lock);
530
531 if (p->p_pid == z->zone_proc_initpid) {
532 /* If zone's init restarts, we're done here. */
533 if (zone_init_exit(z, why, what))
534 return (0);
535 }
536
537 /*
538 * Delay firing probes (and performing brand cleanup) until after the
539 * zone_proc_initpid check. Cases which result in zone shutdown or
540 * restart via zone_kadmin eventually result in a call back to
541 * proc_exit.
542 */
543 DTRACE_PROC(lwp__exit);
544 DTRACE_PROC1(exit, int, why);
545
546 /*
547 * Will perform any brand specific proc exit processing. Since this
548 * is always the last lwp, will also perform lwp exit/free and proc
549 * exit. Brand data will be freed when the process is reaped.
550 */
551 if (PROC_IS_BRANDED(p)) {
552 BROP(p)->b_lwpexit(lwp);
553 BROP(p)->b_proc_exit(p);
554 /*
555 * To ensure that b_proc_exit has access to brand-specific data
556 * contained by the one remaining lwp, call the freelwp hook as
557 * the last part of this clean-up process.
558 */
559 BROP(p)->b_freelwp(lwp);
560 lwp_detach_brand_hdlrs(lwp);
561 }
562
563 lwp_pcb_exit();
564
565 /*
566 * Allocate a sigqueue now, before we grab locks.
567 * It will be given to sigcld(), below.
568 * Special case: If we will be making the process disappear
569 * without a trace because it is either:
570 * * an exiting SSYS process, or
571 * * a posix_spawn() vfork child who requests it,
572 * we don't bother to allocate a useless sigqueue.
573 */
574 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
575 why == CLD_EXITED && what == _EVAPORATE);
576 if (!evaporate)
577 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
578
579 /*
580 * revoke any doors created by the process.
581 */
582 if (p->p_door_list)
583 door_exit();
584
585 /*
586 * Release schedctl data structures.
587 */
588 if (p->p_pagep)
589 schedctl_proc_cleanup();
590
591 /*
592 * make sure all pending kaio has completed.
593 */
594 if (p->p_aio)
595 aio_cleanup_exit();
596
597 /*
598 * discard the lwpchan cache.
599 */
600 if (p->p_lcp != NULL)
601 lwpchan_destroy_cache(0);
602
603 /*
604 * Clean up any DTrace helper actions or probes for the process.
605 */
606 if (p->p_dtrace_helpers != NULL) {
607 ASSERT(dtrace_helpers_cleanup != NULL);
608 (*dtrace_helpers_cleanup)(p);
609 }
610
611 /*
612 * Clean up any signalfd state for the process.
613 */
614 if (p->p_sigfd != NULL) {
615 VERIFY(sigfd_exit_helper != NULL);
616 (*sigfd_exit_helper)();
617 }
618
619 /* untimeout the realtime timers */
620 if (p->p_itimer != NULL)
621 timer_exit();
622
623 if ((tmp_id = p->p_alarmid) != 0) {
624 p->p_alarmid = 0;
625 (void) untimeout(tmp_id);
626 }
627
628 /*
629 * If we had generated any upanic(2) state, free that now.
630 */
631 if (p->p_upanic != NULL) {
632 kmem_free(p->p_upanic, PRUPANIC_BUFLEN);
633 p->p_upanic = NULL;
634 }
635
636 /*
637 * Remove any fpollinfo_t's for this (last) thread from our file
638 * descriptors so closeall() can ASSERT() that they're all gone.
639 */
640 pollcleanup();
641
642 if (p->p_rprof_cyclic != CYCLIC_NONE) {
643 mutex_enter(&cpu_lock);
644 cyclic_remove(p->p_rprof_cyclic);
645 mutex_exit(&cpu_lock);
646 }
647
648 mutex_enter(&p->p_lock);
649
650 /*
651 * Clean up any DTrace probes associated with this process.
652 */
653 if (p->p_dtrace_probes) {
654 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
655 dtrace_fasttrap_exit_ptr(p);
656 }
657
658 while ((tmp_id = p->p_itimerid) != 0) {
659 p->p_itimerid = 0;
660 mutex_exit(&p->p_lock);
661 (void) untimeout(tmp_id);
662 mutex_enter(&p->p_lock);
663 }
664
665 lwp_cleanup();
666
667 /*
668 * We are about to exit; prevent our resource associations from
669 * being changed.
670 */
671 pool_barrier_enter();
672
673 /*
674 * Block the process against /proc now that we have really
675 * acquired p->p_lock (to manipulate p_tlist at least).
676 */
677 prbarrier(p);
678
679 sigfillset(&p->p_ignore);
680 sigemptyset(&p->p_siginfo);
681 sigemptyset(&p->p_sig);
682 sigemptyset(&p->p_extsig);
683 sigemptyset(&t->t_sig);
684 sigemptyset(&t->t_extsig);
685 sigemptyset(&p->p_sigmask);
686 sigdelq(p, t, 0);
687 lwp->lwp_cursig = 0;
688 lwp->lwp_extsig = 0;
689 p->p_flag &= ~(SKILLED | SEXTKILLED);
690 if (lwp->lwp_curinfo) {
691 siginfofree(lwp->lwp_curinfo);
692 lwp->lwp_curinfo = NULL;
693 }
694
695 t->t_proc_flag |= TP_LWPEXIT;
696 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
697 prlwpexit(t); /* notify /proc */
698 lwp_hash_out(p, t->t_tid);
699 prexit(p);
700
701 p->p_lwpcnt = 0;
702 p->p_tlist = NULL;
703 sigqfree(p);
704 term_mstate(t);
705 p->p_mterm = gethrtime();
706
707 exec_vp = p->p_exec;
708 execdir_vp = p->p_execdir;
709 p->p_exec = NULLVP;
710 p->p_execdir = NULLVP;
711 mutex_exit(&p->p_lock);
712
713 pr_free_watched_pages(p);
714
715 closeall(P_FINFO(p));
716
717 /* Free the controlling tty. (freectty() always assumes curproc.) */
718 ASSERT(p == curproc);
719 (void) freectty(B_TRUE);
720
721 #if defined(__sparc)
722 if (p->p_utraps != NULL)
723 utrap_free(p);
724 #endif
725 if (p->p_semacct) /* IPC semaphore exit */
726 semexit(p);
727 rv = wstat(why, what);
728
729 acct(rv);
730 exacct_commit_proc(p, rv);
731
732 /*
733 * Release any resources associated with C2 auditing
734 */
735 if (AU_AUDITING()) {
736 /*
737 * audit exit system call
738 */
739 audit_exit(why, what);
740 }
741
742 /*
743 * Free address space.
744 */
745 relvm();
746
747 if (exec_vp) {
748 /*
749 * Close this executable which has been opened when the process
750 * was created by getproc().
751 */
752 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
753 VN_RELE(exec_vp);
754 }
755 if (execdir_vp)
756 VN_RELE(execdir_vp);
757
758 /*
759 * Release held contracts.
760 */
761 contract_exit(p);
762
763 /*
764 * Depart our encapsulating process contract.
765 */
766 if ((p->p_flag & SSYS) == 0) {
767 ASSERT(p->p_ct_process);
768 contract_process_exit(p->p_ct_process, p, rv);
769 }
770
771 /*
772 * Remove pool association, and block if requested by pool_do_bind.
773 */
774 mutex_enter(&p->p_lock);
775 ASSERT(p->p_pool->pool_ref > 0);
776 atomic_dec_32(&p->p_pool->pool_ref);
777 p->p_pool = pool_default;
778 /*
779 * Now that our address space has been freed and all other threads
780 * in this process have exited, set the PEXITED pool flag. This
781 * tells the pools subsystems to ignore this process if it was
782 * requested to rebind this process to a new pool.
783 */
784 p->p_poolflag |= PEXITED;
785 pool_barrier_exit();
786 mutex_exit(&p->p_lock);
787
788 mutex_enter(&pidlock);
789
790 /*
791 * Delete this process from the newstate list of its parent. We
792 * will put it in the right place in the sigcld in the end.
793 */
794 delete_ns(p->p_parent, p);
795
796 /*
797 * Reassign the orphans to the next of kin.
798 * Don't rearrange init's orphanage.
799 */
800 if ((q = p->p_orphan) != NULL && p != proc_init) {
801
802 proc_t *nokp = p->p_nextofkin;
803
804 for (;;) {
805 q->p_nextofkin = nokp;
806 if (q->p_nextorph == NULL)
807 break;
808 q = q->p_nextorph;
809 }
810 q->p_nextorph = nokp->p_orphan;
811 nokp->p_orphan = p->p_orphan;
812 p->p_orphan = NULL;
813 }
814
815 /*
816 * Reassign the children to init.
817 * Don't try to assign init's children to init.
818 */
819 if ((q = p->p_child) != NULL && p != proc_init) {
820 struct proc *np;
821 struct proc *initp = proc_init;
822 pid_t zone_initpid = 1;
823 struct proc *zoneinitp = NULL;
824 boolean_t setzonetop = B_FALSE;
825
826 if (!INGLOBALZONE(curproc)) {
827 zone_initpid = curproc->p_zone->zone_proc_initpid;
828
829 ASSERT(MUTEX_HELD(&pidlock));
830 zoneinitp = prfind(zone_initpid);
831 if (zoneinitp != NULL) {
832 initp = zoneinitp;
833 } else {
834 zone_initpid = 1;
835 setzonetop = B_TRUE;
836 }
837 }
838
839 pgdetach(p);
840
841 do {
842 np = q->p_sibling;
843 /*
844 * Delete it from its current parent new state
845 * list and add it to init new state list
846 */
847 delete_ns(q->p_parent, q);
848
849 q->p_ppid = zone_initpid;
850
851 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
852 if (setzonetop) {
853 mutex_enter(&q->p_lock);
854 q->p_flag |= SZONETOP;
855 mutex_exit(&q->p_lock);
856 }
857 q->p_parent = initp;
858
859 /*
860 * Since q will be the first child,
861 * it will not have a previous sibling.
862 */
863 q->p_psibling = NULL;
864 if (initp->p_child) {
865 initp->p_child->p_psibling = q;
866 }
867 q->p_sibling = initp->p_child;
868 initp->p_child = q;
869 if (q->p_proc_flag & P_PR_PTRACE) {
870 mutex_enter(&q->p_lock);
871 sigtoproc(q, NULL, SIGKILL);
872 mutex_exit(&q->p_lock);
873 }
874 /*
875 * sigcld() will add the child to parents
876 * newstate list.
877 */
878 if (q->p_stat == SZOMB)
879 sigcld(q, NULL);
880 } while ((q = np) != NULL);
881
882 p->p_child = NULL;
883 ASSERT(p->p_child_ns == NULL);
884 }
885
886 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
887
888 mutex_enter(&p->p_lock);
889 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
890
891 /*
892 * Have our task accummulate our resource usage data before they
893 * become contaminated by p_cacct etc., and before we renounce
894 * membership of the task.
895 *
896 * We do this regardless of whether or not task accounting is active.
897 * This is to avoid having nonsense data reported for this task if
898 * task accounting is subsequently enabled. The overhead is minimal;
899 * by this point, this process has accounted for the usage of all its
900 * LWPs. We nonetheless do the work here, and under the protection of
901 * pidlock, so that the movement of the process's usage to the task
902 * happens at the same time as the removal of the process from the
903 * task, from the point of view of exacct_snapshot_task_usage().
904 */
905 exacct_update_task_mstate(p);
906
907 hrutime = mstate_aggr_state(p, LMS_USER);
908 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
909 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
910 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
911
912 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
913 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
914 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
915 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
916 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
917 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
918 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
919 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
920 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
921 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
922
923 p->p_ru.minflt += p->p_cru.minflt;
924 p->p_ru.majflt += p->p_cru.majflt;
925 p->p_ru.nswap += p->p_cru.nswap;
926 p->p_ru.inblock += p->p_cru.inblock;
927 p->p_ru.oublock += p->p_cru.oublock;
928 p->p_ru.msgsnd += p->p_cru.msgsnd;
929 p->p_ru.msgrcv += p->p_cru.msgrcv;
930 p->p_ru.nsignals += p->p_cru.nsignals;
931 p->p_ru.nvcsw += p->p_cru.nvcsw;
932 p->p_ru.nivcsw += p->p_cru.nivcsw;
933 p->p_ru.sysc += p->p_cru.sysc;
934 p->p_ru.ioch += p->p_cru.ioch;
935
936 p->p_stat = SZOMB;
937 p->p_proc_flag &= ~P_PR_PTRACE;
938 p->p_wdata = what;
939 p->p_wcode = (char)why;
940
941 cdir = PTOU(p)->u_cdir;
942 rdir = PTOU(p)->u_rdir;
943 cwd = PTOU(p)->u_cwd;
944
945 ASSERT(cdir != NULL || p->p_parent == &p0);
946
947 /*
948 * Release resource controls, as they are no longer enforceable.
949 */
950 rctl_set_free(p->p_rctls);
951
952 /*
953 * Decrement tk_nlwps counter for our task.max-lwps resource control.
954 * An extended accounting record, if that facility is active, is
955 * scheduled to be written. We cannot give up task and project
956 * membership at this point because that would allow zombies to escape
957 * from the max-processes resource controls. Zombies stay in their
958 * current task and project until the process table slot is released
959 * in freeproc().
960 */
961 tk = p->p_task;
962
963 mutex_enter(&p->p_zone->zone_nlwps_lock);
964 tk->tk_nlwps--;
965 tk->tk_proj->kpj_nlwps--;
966 p->p_zone->zone_nlwps--;
967 mutex_exit(&p->p_zone->zone_nlwps_lock);
968
969 /*
970 * Clear the lwp directory and the lwpid hash table
971 * now that /proc can't bother us any more.
972 * We free the memory below, after dropping p->p_lock.
973 */
974 lwpdir = p->p_lwpdir;
975 lwpdir_sz = p->p_lwpdir_sz;
976 tidhash = p->p_tidhash;
977 tidhash_sz = p->p_tidhash_sz;
978 ret_tidhash = p->p_ret_tidhash;
979 p->p_lwpdir = NULL;
980 p->p_lwpfree = NULL;
981 p->p_lwpdir_sz = 0;
982 p->p_tidhash = NULL;
983 p->p_tidhash_sz = 0;
984 p->p_ret_tidhash = NULL;
985
986 /*
987 * If the process has context ops installed, call the exit routine
988 * on behalf of this last remaining thread. Normally exitpctx() is
989 * called during thread_exit() or lwp_exit(), but because this is the
990 * last thread in the process, we must call it here. By the time
991 * thread_exit() is called (below), the association with the relevant
992 * process has been lost.
993 *
994 * We also free the context here.
995 */
996 if (p->p_pctx) {
997 kpreempt_disable();
998 exitpctx(p);
999 kpreempt_enable();
1000
1001 freepctx(p, 0);
1002 }
1003
1004 /*
1005 * curthread's proc pointer is changed to point to the 'sched'
1006 * process for the corresponding zone, except in the case when
1007 * the exiting process is in fact a zsched instance, in which
1008 * case the proc pointer is set to p0. We do so, so that the
1009 * process still points at the right zone when we call the VN_RELE()
1010 * below.
1011 *
1012 * This is because curthread's original proc pointer can be freed as
1013 * soon as the child sends a SIGCLD to its parent. We use zsched so
1014 * that for user processes, even in the final moments of death, the
1015 * process is still associated with its zone.
1016 */
1017 if (p != t->t_procp->p_zone->zone_zsched)
1018 t->t_procp = t->t_procp->p_zone->zone_zsched;
1019 else
1020 t->t_procp = &p0;
1021
1022 mutex_exit(&p->p_lock);
1023 if (!evaporate) {
1024 /*
1025 * The brand specific code only happens when the brand has a
1026 * function to call in place of sigcld and the parent of the
1027 * exiting process is not the global zone init. If the parent
1028 * is the global zone init, then the process was reparented,
1029 * and we don't want brand code delivering possibly strange
1030 * signals to init. Also, init is not branded, so any brand
1031 * specific exit data will not be picked up by init anyway.
1032 */
1033 if (PROC_IS_BRANDED(p) &&
1034 BROP(p)->b_exit_with_sig != NULL &&
1035 p->p_ppid != 1) {
1036 /*
1037 * The code for _fini that could unload the brand_t
1038 * blocks until the count of zones using the module
1039 * reaches zero. Zones decrement the refcount on their
1040 * brands only after all user tasks in that zone have
1041 * exited and been waited on. The decrement on the
1042 * brand's refcount happen in zone_destroy(). That
1043 * depends on zone_shutdown() having been completed.
1044 * zone_shutdown() includes a call to zone_empty(),
1045 * where the zone waits for itself to reach the state
1046 * ZONE_IS_EMPTY. This state is only set in either
1047 * zone_shutdown(), when there are no user processes as
1048 * the zone enters this function, or in
1049 * zone_task_rele(). zone_task_rele() is called from
1050 * code triggered by waiting on processes, not by the
1051 * processes exiting through proc_exit(). This means
1052 * all the branded processes that could exist for a
1053 * specific brand_t must exit and get reaped before the
1054 * refcount on the brand_t can reach 0. _fini will
1055 * never unload the corresponding brand module before
1056 * proc_exit finishes execution for all processes
1057 * branded with a particular brand_t, which makes the
1058 * operation below safe to do. Brands that wish to use
1059 * this mechanism must wait in _fini as described
1060 * above.
1061 */
1062 BROP(p)->b_exit_with_sig(p, sqp);
1063 } else {
1064 p->p_pidflag &= ~CLDPEND;
1065 sigcld(p, sqp);
1066 }
1067
1068 } else {
1069 /*
1070 * Do what sigcld() would do if the disposition
1071 * of the SIGCHLD signal were set to be ignored.
1072 */
1073 cv_broadcast(&p->p_srwchan_cv);
1074 freeproc(p);
1075 }
1076 mutex_exit(&pidlock);
1077
1078 /*
1079 * We don't release u_cdir and u_rdir until SZOMB is set.
1080 * This protects us against dofusers().
1081 */
1082 if (cdir)
1083 VN_RELE(cdir);
1084 if (rdir)
1085 VN_RELE(rdir);
1086 if (cwd)
1087 refstr_rele(cwd);
1088
1089 /*
1090 * task_rele() may ultimately cause the zone to go away (or
1091 * may cause the last user process in a zone to go away, which
1092 * signals zsched to go away). So prior to this call, we must
1093 * no longer point at zsched.
1094 */
1095 t->t_procp = &p0;
1096
1097 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
1098 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
1099 while (ret_tidhash != NULL) {
1100 ret_tidhash_t *next = ret_tidhash->rth_next;
1101 kmem_free(ret_tidhash->rth_tidhash,
1102 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
1103 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
1104 ret_tidhash = next;
1105 }
1106
1107 thread_exit();
1108 /* NOTREACHED */
1109 }
1110
1111 /*
1112 * Format siginfo structure for wait system calls.
1113 */
1114 void
1115 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
1116 {
1117 ASSERT(MUTEX_HELD(&pidlock));
1118
1119 bzero(ip, sizeof (k_siginfo_t));
1120 ip->si_signo = SIGCLD;
1121 ip->si_code = pp->p_wcode;
1122 ip->si_pid = pp->p_pid;
1123 ip->si_ctid = PRCTID(pp);
1124 ip->si_zoneid = pp->p_zone->zone_id;
1125 ip->si_status = pp->p_wdata;
1126 ip->si_stime = pp->p_stime;
1127 ip->si_utime = pp->p_utime;
1128
1129 if (waitflag) {
1130 pp->p_wcode = 0;
1131 pp->p_wdata = 0;
1132 pp->p_pidflag &= ~CLDPEND;
1133 }
1134 }
1135
1136 /*
1137 * Wait system call.
1138 * Search for a terminated (zombie) child,
1139 * finally lay it to rest, and collect its status.
1140 * Look also for stopped children,
1141 * and pass back status from them.
1142 */
1143 int
1144 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1145 {
1146 proc_t *cp, *pp;
1147 int waitflag = !(options & WNOWAIT);
1148 boolean_t have_brand_helper = B_FALSE;
1149
1150 /*
1151 * Obsolete flag, defined here only for binary compatibility
1152 * with old statically linked executables. Delete this when
1153 * we no longer care about these old and broken applications.
1154 */
1155 #define _WNOCHLD 0400
1156 options &= ~_WNOCHLD;
1157
1158 if (options == 0 || (options & ~WOPTMASK))
1159 return (EINVAL);
1160
1161 switch (idtype) {
1162 case P_PID:
1163 case P_PGID:
1164 if (id < 0 || id >= maxpid)
1165 return (EINVAL);
1166 /* FALLTHROUGH */
1167 case P_ALL:
1168 break;
1169 default:
1170 return (EINVAL);
1171 }
1172
1173 pp = ttoproc(curthread);
1174
1175 /*
1176 * Anytime you are looking for a process, you take pidlock to prevent
1177 * things from changing as you look.
1178 */
1179 mutex_enter(&pidlock);
1180
1181 /*
1182 * if we are only looking for exited processes and child_ns list
1183 * is empty no reason to look at all children.
1184 */
1185 if (idtype == P_ALL &&
1186 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1187 pp->p_child_ns == NULL) {
1188 if (pp->p_child) {
1189 mutex_exit(&pidlock);
1190 bzero(ip, sizeof (k_siginfo_t));
1191 return (0);
1192 }
1193 mutex_exit(&pidlock);
1194 return (ECHILD);
1195 }
1196
1197 if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1198 have_brand_helper = B_TRUE;
1199 }
1200
1201 while (pp->p_child != NULL || have_brand_helper) {
1202 boolean_t brand_wants_wait = B_FALSE;
1203 int proc_gone = 0;
1204 int found = 0;
1205
1206 /*
1207 * Give the brand a chance to return synthetic results from
1208 * this waitid() call before we do the real thing.
1209 */
1210 if (have_brand_helper) {
1211 int ret;
1212
1213 if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1214 &brand_wants_wait, &ret) == 0) {
1215 mutex_exit(&pidlock);
1216 return (ret);
1217 }
1218
1219 if (pp->p_child == NULL) {
1220 goto no_real_children;
1221 }
1222 }
1223
1224 /*
1225 * Look for interesting children in the newstate list.
1226 */
1227 VERIFY(pp->p_child != NULL);
1228 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1229 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1230 continue;
1231 if (idtype == P_PID && id != cp->p_pid)
1232 continue;
1233 if (idtype == P_PGID && id != cp->p_pgrp)
1234 continue;
1235 if (PROC_IS_BRANDED(pp)) {
1236 if (BROP(pp)->b_wait_filter != NULL &&
1237 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1238 continue;
1239 }
1240
1241 switch (cp->p_wcode) {
1242
1243 case CLD_TRAPPED:
1244 case CLD_STOPPED:
1245 case CLD_CONTINUED:
1246 cmn_err(CE_PANIC,
1247 "waitid: wrong state %d on the p_newstate"
1248 " list", cp->p_wcode);
1249 break;
1250
1251 case CLD_EXITED:
1252 case CLD_DUMPED:
1253 case CLD_KILLED:
1254 if (!(options & WEXITED)) {
1255 /*
1256 * Count how many are already gone
1257 * for good.
1258 */
1259 proc_gone++;
1260 break;
1261 }
1262 if (!waitflag) {
1263 winfo(cp, ip, 0);
1264 } else {
1265 winfo(cp, ip, 1);
1266 freeproc(cp);
1267 }
1268 mutex_exit(&pidlock);
1269 if (waitflag) { /* accept SIGCLD */
1270 sigcld_delete(ip);
1271 sigcld_repost();
1272 }
1273 return (0);
1274 }
1275
1276 if (idtype == P_PID)
1277 break;
1278 }
1279
1280 /*
1281 * Wow! None of the threads on the p_sibling_ns list were
1282 * interesting threads. Check all the kids!
1283 */
1284 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1285 if (idtype == P_PID && id != cp->p_pid)
1286 continue;
1287 if (idtype == P_PGID && id != cp->p_pgrp)
1288 continue;
1289 if (PROC_IS_BRANDED(pp)) {
1290 if (BROP(pp)->b_wait_filter != NULL &&
1291 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1292 continue;
1293 }
1294
1295 switch (cp->p_wcode) {
1296 case CLD_TRAPPED:
1297 if (!(options & WTRAPPED))
1298 break;
1299 winfo(cp, ip, waitflag);
1300 mutex_exit(&pidlock);
1301 if (waitflag) { /* accept SIGCLD */
1302 sigcld_delete(ip);
1303 sigcld_repost();
1304 }
1305 return (0);
1306
1307 case CLD_STOPPED:
1308 if (!(options & WSTOPPED))
1309 break;
1310 /* Is it still stopped? */
1311 mutex_enter(&cp->p_lock);
1312 if (!jobstopped(cp)) {
1313 mutex_exit(&cp->p_lock);
1314 break;
1315 }
1316 mutex_exit(&cp->p_lock);
1317 winfo(cp, ip, waitflag);
1318 mutex_exit(&pidlock);
1319 if (waitflag) { /* accept SIGCLD */
1320 sigcld_delete(ip);
1321 sigcld_repost();
1322 }
1323 return (0);
1324
1325 case CLD_CONTINUED:
1326 if (!(options & WCONTINUED))
1327 break;
1328 winfo(cp, ip, waitflag);
1329 mutex_exit(&pidlock);
1330 if (waitflag) { /* accept SIGCLD */
1331 sigcld_delete(ip);
1332 sigcld_repost();
1333 }
1334 return (0);
1335
1336 case CLD_EXITED:
1337 case CLD_DUMPED:
1338 case CLD_KILLED:
1339 if (idtype != P_PID &&
1340 (cp->p_pidflag & CLDWAITPID))
1341 continue;
1342 /*
1343 * Don't complain if a process was found in
1344 * the first loop but we broke out of the loop
1345 * because of the arguments passed to us.
1346 */
1347 if (proc_gone == 0) {
1348 cmn_err(CE_PANIC,
1349 "waitid: wrong state on the"
1350 " p_child list");
1351 } else {
1352 break;
1353 }
1354 }
1355
1356 found++;
1357
1358 if (idtype == P_PID)
1359 break;
1360 }
1361
1362 no_real_children:
1363 /*
1364 * If we found no interesting processes at all,
1365 * break out and return ECHILD.
1366 */
1367 if (!brand_wants_wait && (found + proc_gone == 0))
1368 break;
1369
1370 if (options & WNOHANG) {
1371 mutex_exit(&pidlock);
1372 bzero(ip, sizeof (k_siginfo_t));
1373 /*
1374 * We should set ip->si_signo = SIGCLD,
1375 * but there is an SVVS test that expects
1376 * ip->si_signo to be zero in this case.
1377 */
1378 return (0);
1379 }
1380
1381 /*
1382 * If we found no processes of interest that could
1383 * change state while we wait, we don't wait at all.
1384 * Get out with ECHILD according to SVID.
1385 */
1386 if (!brand_wants_wait && (found == proc_gone))
1387 break;
1388
1389 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1390 mutex_exit(&pidlock);
1391 return (EINTR);
1392 }
1393 }
1394 mutex_exit(&pidlock);
1395 return (ECHILD);
1396 }
1397
1398 int
1399 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1400 {
1401 int error;
1402 k_siginfo_t info;
1403
1404 if (error = waitid(idtype, id, &info, options))
1405 return (set_errno(error));
1406 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1407 return (set_errno(EFAULT));
1408 return (0);
1409 }
1410
1411 #ifdef _SYSCALL32_IMPL
1412
1413 int
1414 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1415 {
1416 int error;
1417 k_siginfo_t info;
1418 siginfo32_t info32;
1419
1420 if (error = waitid(idtype, id, &info, options))
1421 return (set_errno(error));
1422 siginfo_kto32(&info, &info32);
1423 if (copyout(&info32, infop, sizeof (info32)))
1424 return (set_errno(EFAULT));
1425 return (0);
1426 }
1427
1428 #endif /* _SYSCALL32_IMPL */
1429
1430 void
1431 proc_detach(proc_t *p)
1432 {
1433 proc_t *q;
1434
1435 ASSERT(MUTEX_HELD(&pidlock));
1436
1437 q = p->p_parent;
1438 ASSERT(q != NULL);
1439
1440 /*
1441 * Take it off the newstate list of its parent
1442 */
1443 delete_ns(q, p);
1444
1445 if (q->p_child == p) {
1446 q->p_child = p->p_sibling;
1447 /*
1448 * If the parent has no children, it better not
1449 * have any with new states either!
1450 */
1451 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1452 }
1453
1454 if (p->p_sibling) {
1455 p->p_sibling->p_psibling = p->p_psibling;
1456 }
1457
1458 if (p->p_psibling) {
1459 p->p_psibling->p_sibling = p->p_sibling;
1460 }
1461 }
1462
1463 /*
1464 * Remove zombie children from the process table.
1465 */
1466 void
1467 freeproc(proc_t *p)
1468 {
1469 proc_t *q;
1470 task_t *tk;
1471
1472 ASSERT(p->p_stat == SZOMB);
1473 ASSERT(p->p_tlist == NULL);
1474 ASSERT(MUTEX_HELD(&pidlock));
1475
1476 sigdelq(p, NULL, 0);
1477 if (p->p_killsqp) {
1478 siginfofree(p->p_killsqp);
1479 p->p_killsqp = NULL;
1480 }
1481
1482 /* Clear any remaining brand data */
1483 if (PROC_IS_BRANDED(p)) {
1484 brand_clearbrand(p, B_FALSE);
1485 }
1486
1487
1488 prfree(p); /* inform /proc */
1489
1490 /*
1491 * Don't free the init processes.
1492 * Other dying processes will access it.
1493 */
1494 if (p == proc_init)
1495 return;
1496
1497
1498 /*
1499 * We wait until now to free the cred structure because a
1500 * zombie process's credentials may be examined by /proc.
1501 * No cred locking needed because there are no threads at this point.
1502 */
1503 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1504 crfree(p->p_cred);
1505 if (p->p_corefile != NULL) {
1506 corectl_path_rele(p->p_corefile);
1507 p->p_corefile = NULL;
1508 }
1509 if (p->p_content != NULL) {
1510 corectl_content_rele(p->p_content);
1511 p->p_content = NULL;
1512 }
1513
1514 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1515 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1516 /*
1517 * This should still do the right thing since p_utime/stime
1518 * get set to the correct value on process exit, so it
1519 * should get properly updated
1520 */
1521 p->p_nextofkin->p_cutime += p->p_utime;
1522 p->p_nextofkin->p_cstime += p->p_stime;
1523
1524 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1525 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1526 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1527 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1528 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1529 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1530 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1531 += p->p_acct[LMS_USER_LOCK];
1532 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1533 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1534 += p->p_acct[LMS_WAIT_CPU];
1535 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1536
1537 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1538 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1539 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1540 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1541 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1542 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1543 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1544 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1545 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1546 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1547 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1548 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1549
1550 }
1551
1552 q = p->p_nextofkin;
1553 if (q && q->p_orphan == p)
1554 q->p_orphan = p->p_nextorph;
1555 else if (q) {
1556 for (q = q->p_orphan; q; q = q->p_nextorph)
1557 if (q->p_nextorph == p)
1558 break;
1559 ASSERT(q && q->p_nextorph == p);
1560 q->p_nextorph = p->p_nextorph;
1561 }
1562
1563 /*
1564 * The process table slot is being freed, so it is now safe to give up
1565 * task and project membership.
1566 */
1567 mutex_enter(&p->p_lock);
1568 tk = p->p_task;
1569 task_detach(p);
1570 mutex_exit(&p->p_lock);
1571
1572 proc_detach(p);
1573 pid_exit(p, tk); /* frees pid and proc structure */
1574
1575 task_rele(tk);
1576 }
1577
1578 /*
1579 * Delete process "child" from the newstate list of process "parent"
1580 */
1581 void
1582 delete_ns(proc_t *parent, proc_t *child)
1583 {
1584 proc_t **ns;
1585
1586 ASSERT(MUTEX_HELD(&pidlock));
1587 ASSERT(child->p_parent == parent);
1588 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1589 if (*ns == child) {
1590
1591 ASSERT((*ns)->p_parent == parent);
1592
1593 *ns = child->p_sibling_ns;
1594 child->p_sibling_ns = NULL;
1595 return;
1596 }
1597 }
1598 }
1599
1600 /*
1601 * Add process "child" to the new state list of process "parent"
1602 */
1603 void
1604 add_ns(proc_t *parent, proc_t *child)
1605 {
1606 ASSERT(child->p_sibling_ns == NULL);
1607 child->p_sibling_ns = parent->p_child_ns;
1608 parent->p_child_ns = child;
1609 }