Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/exit.c
+++ new/usr/src/uts/common/os/exit.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 - * Copyright 2015 Joyent, Inc. All rights reserved.
24 + * Copyright 2014 Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28
29 29 #include <sys/types.h>
30 30 #include <sys/param.h>
31 31 #include <sys/sysmacros.h>
32 32 #include <sys/systm.h>
33 33 #include <sys/cred.h>
34 34 #include <sys/user.h>
35 35 #include <sys/errno.h>
36 36 #include <sys/proc.h>
37 37 #include <sys/ucontext.h>
38 38 #include <sys/procfs.h>
39 39 #include <sys/vnode.h>
40 40 #include <sys/acct.h>
41 41 #include <sys/var.h>
42 42 #include <sys/cmn_err.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/wait.h>
45 45 #include <sys/siginfo.h>
46 46 #include <sys/procset.h>
47 47 #include <sys/class.h>
48 48 #include <sys/file.h>
49 49 #include <sys/session.h>
50 50 #include <sys/kmem.h>
51 51 #include <sys/vtrace.h>
52 52 #include <sys/prsystm.h>
53 53 #include <sys/ipc.h>
54 54 #include <sys/sem_impl.h>
55 55 #include <c2/audit.h>
56 56 #include <sys/aio_impl.h>
57 57 #include <vm/as.h>
58 58 #include <sys/poll.h>
59 59 #include <sys/door.h>
60 60 #include <sys/lwpchan_impl.h>
61 61 #include <sys/utrap.h>
62 62 #include <sys/task.h>
63 63 #include <sys/exacct.h>
64 64 #include <sys/cyclic.h>
65 65 #include <sys/schedctl.h>
66 66 #include <sys/rctl.h>
67 67 #include <sys/contract_impl.h>
68 68 #include <sys/contract/process_impl.h>
69 69 #include <sys/list.h>
70 70 #include <sys/dtrace.h>
71 71 #include <sys/pool.h>
72 72 #include <sys/sdt.h>
73 73 #include <sys/corectl.h>
74 74 #include <sys/brand.h>
75 75 #include <sys/libc_kernel.h>
76 76
77 77 /*
78 78 * convert code/data pair into old style wait status
79 79 */
80 80 int
81 81 wstat(int code, int data)
82 82 {
83 83 int stat = (data & 0377);
84 84
85 85 switch (code) {
86 86 case CLD_EXITED:
87 87 stat <<= 8;
88 88 break;
89 89 case CLD_DUMPED:
90 90 stat |= WCOREFLG;
91 91 break;
92 92 case CLD_KILLED:
93 93 break;
94 94 case CLD_TRAPPED:
95 95 case CLD_STOPPED:
96 96 stat <<= 8;
97 97 stat |= WSTOPFLG;
98 98 break;
99 99 case CLD_CONTINUED:
100 100 stat = WCONTFLG;
101 101 break;
102 102 default:
103 103 cmn_err(CE_PANIC, "wstat: bad code");
104 104 /* NOTREACHED */
105 105 }
106 106 return (stat);
107 107 }
108 108
109 109 static char *
110 110 exit_reason(char *buf, size_t bufsz, int what, int why)
111 111 {
112 112 switch (why) {
113 113 case CLD_EXITED:
114 114 (void) snprintf(buf, bufsz, "exited with status %d", what);
115 115 break;
116 116 case CLD_KILLED:
117 117 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
118 118 break;
119 119 case CLD_DUMPED:
120 120 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
121 121 break;
122 122 default:
123 123 (void) snprintf(buf, bufsz, "encountered unknown error "
124 124 "(%d, %d)", why, what);
125 125 break;
126 126 }
127 127
128 128 return (buf);
129 129 }
130 130
131 131 /*
132 132 * exit system call: pass back caller's arg.
133 133 */
134 134 void
135 135 rexit(int rval)
136 136 {
137 137 exit(CLD_EXITED, rval);
138 138 }
139 139
140 140 /*
141 141 * Called by proc_exit() when a zone's init exits, presumably because
142 142 * it failed. As long as the given zone is still in the "running"
143 143 * state, we will re-exec() init, but first we need to reset things
144 144 * which are usually inherited across exec() but will break init's
145 145 * assumption that it is being exec()'d from a virgin process. Most
146 146 * importantly this includes closing all file descriptors (exec only
147 147 * closes those marked close-on-exec) and resetting signals (exec only
148 148 * resets handled signals, and we need to clear any signals which
149 149 * killed init). Anything else that exec(2) says would be inherited,
150 150 * but would affect the execution of init, needs to be reset.
151 151 */
152 152 static int
153 153 restart_init(int what, int why)
154 154 {
155 155 kthread_t *t = curthread;
156 156 klwp_t *lwp = ttolwp(t);
157 157 proc_t *p = ttoproc(t);
158 158 user_t *up = PTOU(p);
159 159
160 160 vnode_t *oldcd, *oldrd;
161 161 int i, err;
162 162 char reason_buf[64];
163 163
164 164 /*
165 165 * Let zone admin (and global zone admin if this is for a non-global
166 166 * zone) know that init has failed and will be restarted.
167 167 */
168 168 zcmn_err(p->p_zone->zone_id, CE_WARN,
169 169 "init(1M) %s: restarting automatically",
170 170 exit_reason(reason_buf, sizeof (reason_buf), what, why));
171 171
172 172 if (!INGLOBALZONE(p)) {
173 173 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
174 174 "restarting automatically",
175 175 p->p_zone->zone_name, p->p_pid, reason_buf);
176 176 }
177 177
178 178 /*
179 179 * Remove any fpollinfo_t's for this (last) thread from our file
180 180 * descriptors so closeall() can ASSERT() that they're all gone.
181 181 * Then close all open file descriptors in the process.
182 182 */
183 183 pollcleanup();
184 184 closeall(P_FINFO(p));
185 185
186 186 /*
187 187 * Grab p_lock and begin clearing miscellaneous global process
188 188 * state that needs to be reset before we exec the new init(1M).
189 189 */
190 190
191 191 mutex_enter(&p->p_lock);
192 192 prbarrier(p);
193 193
194 194 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
195 195 up->u_cmask = CMASK;
196 196
197 197 sigemptyset(&t->t_hold);
198 198 sigemptyset(&t->t_sig);
199 199 sigemptyset(&t->t_extsig);
200 200
201 201 sigemptyset(&p->p_sig);
202 202 sigemptyset(&p->p_extsig);
203 203
204 204 sigdelq(p, t, 0);
205 205 sigdelq(p, NULL, 0);
206 206
207 207 if (p->p_killsqp) {
208 208 siginfofree(p->p_killsqp);
209 209 p->p_killsqp = NULL;
210 210 }
211 211
212 212 /*
213 213 * Reset any signals that are ignored back to the default disposition.
214 214 * Other u_signal members will be cleared when exec calls sigdefault().
215 215 */
216 216 for (i = 1; i < NSIG; i++) {
217 217 if (up->u_signal[i - 1] == SIG_IGN) {
218 218 up->u_signal[i - 1] = SIG_DFL;
219 219 sigemptyset(&up->u_sigmask[i - 1]);
220 220 }
221 221 }
222 222
223 223 /*
224 224 * Clear the current signal, any signal info associated with it, and
225 225 * any signal information from contracts and/or contract templates.
226 226 */
227 227 lwp->lwp_cursig = 0;
228 228 lwp->lwp_extsig = 0;
229 229 if (lwp->lwp_curinfo != NULL) {
230 230 siginfofree(lwp->lwp_curinfo);
231 231 lwp->lwp_curinfo = NULL;
232 232 }
233 233 lwp_ctmpl_clear(lwp, B_FALSE);
234 234
235 235 /*
236 236 * Reset both the process root directory and the current working
237 237 * directory to the root of the zone just as we do during boot.
238 238 */
239 239 VN_HOLD(p->p_zone->zone_rootvp);
240 240 oldrd = up->u_rdir;
241 241 up->u_rdir = p->p_zone->zone_rootvp;
242 242
243 243 VN_HOLD(p->p_zone->zone_rootvp);
244 244 oldcd = up->u_cdir;
245 245 up->u_cdir = p->p_zone->zone_rootvp;
246 246
247 247 if (up->u_cwd != NULL) {
248 248 refstr_rele(up->u_cwd);
249 249 up->u_cwd = NULL;
250 250 }
251 251
252 252 mutex_exit(&p->p_lock);
253 253
254 254 if (oldrd != NULL)
255 255 VN_RELE(oldrd);
256 256 if (oldcd != NULL)
257 257 VN_RELE(oldcd);
258 258
259 259 /* Free the controlling tty. (freectty() always assumes curproc.) */
260 260 ASSERT(p == curproc);
261 261 (void) freectty(B_TRUE);
262 262
263 263 /*
264 264 * Now exec() the new init(1M) on top of the current process. If we
265 265 * succeed, the caller will treat this like a successful system call.
266 266 * If we fail, we issue messages and the caller will proceed with exit.
267 267 */
268 268 err = exec_init(p->p_zone->zone_initname, NULL);
269 269
270 270 if (err == 0)
271 271 return (0);
272 272
273 273 zcmn_err(p->p_zone->zone_id, CE_WARN,
274 274 "failed to restart init(1M) (err=%d): system reboot required", err);
275 275
276 276 if (!INGLOBALZONE(p)) {
277 277 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
278 278 "(pid %d, err=%d): zoneadm(1M) boot required",
279 279 p->p_zone->zone_name, p->p_pid, err);
280 280 }
281 281
282 282 return (-1);
283 283 }
284 284
285 285 /*
286 286 * Release resources.
287 287 * Enter zombie state.
288 288 * Wake up parent and init processes,
289 289 * and dispose of children.
290 290 */
291 291 void
292 292 exit(int why, int what)
293 293 {
294 294 /*
295 295 * If proc_exit() fails, then some other lwp in the process
296 296 * got there first. We just have to call lwp_exit() to allow
297 297 * the other lwp to finish exiting the process. Otherwise we're
298 298 * restarting init, and should return.
299 299 */
300 300 if (proc_exit(why, what) != 0) {
301 301 mutex_enter(&curproc->p_lock);
302 302 ASSERT(curproc->p_flag & SEXITLWPS);
303 303 lwp_exit();
304 304 /* NOTREACHED */
305 305 }
306 306 }
307 307
308 308 /*
309 309 * Set the SEXITING flag on the process, after making sure /proc does
310 310 * not have it locked. This is done in more places than proc_exit(),
311 311 * so it is a separate function.
312 312 */
313 313 void
314 314 proc_is_exiting(proc_t *p)
315 315 {
316 316 mutex_enter(&p->p_lock);
317 317 prbarrier(p);
318 318 p->p_flag |= SEXITING;
319 319 mutex_exit(&p->p_lock);
320 320 }
321 321
322 322 /*
323 323 * Return value:
324 324 * 1 - exitlwps() failed, call (or continue) lwp_exit()
325 325 * 0 - restarting init. Return through system call path
326 326 */
327 327 int
328 328 proc_exit(int why, int what)
329 329 {
330 330 kthread_t *t = curthread;
331 331 klwp_t *lwp = ttolwp(t);
332 332 proc_t *p = ttoproc(t);
333 333 zone_t *z = p->p_zone;
334 334 timeout_id_t tmp_id;
335 335 int rv;
336 336 proc_t *q;
337 337 task_t *tk;
338 338 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
339 339 sigqueue_t *sqp;
340 340 lwpdir_t *lwpdir;
341 341 uint_t lwpdir_sz;
342 342 tidhash_t *tidhash;
343 343 uint_t tidhash_sz;
344 344 ret_tidhash_t *ret_tidhash;
345 345 refstr_t *cwd;
346 346 hrtime_t hrutime, hrstime;
347 347 int evaporate;
348 348
349 349 /*
350 350 * Stop and discard the process's lwps except for the current one,
351 351 * unless some other lwp beat us to it. If exitlwps() fails then
352 352 * return and the calling lwp will call (or continue in) lwp_exit().
353 353 */
354 354 proc_is_exiting(p);
355 355 if (exitlwps(0) != 0)
356 356 return (1);
357 357
358 358 mutex_enter(&p->p_lock);
359 359 if (p->p_ttime > 0) {
360 360 /*
361 361 * Account any remaining ticks charged to this process
362 362 * on its way out.
363 363 */
364 364 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
365 365 p->p_ttime = 0;
366 366 }
367 367 mutex_exit(&p->p_lock);
368 368
369 369 /*
370 370 * Don't let init exit unless zone_start_init() failed its exec, or
371 371 * we are shutting down the zone or the machine.
372 372 *
373 373 * Since we are single threaded, we don't need to lock the
374 374 * following accesses to zone_proc_initpid.
375 375 */
376 376 if (p->p_pid == z->zone_proc_initpid) {
377 377 if (z->zone_boot_err == 0 &&
378 378 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
379 379 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
380 380
381 381 /*
382 382 * If the init process should be restarted, the
383 383 * "zone_restart_init" member will be set. Some init
384 384 * programs in branded zones do not tolerate a restart
385 385 * in the traditional manner; setting the
386 386 * "zone_reboot_on_init_exit" member will cause the
387 387 * entire zone to be rebooted instead. If neither of
388 388 * these flags is set the zone will shut down.
389 389 */
390 390 if (z->zone_reboot_on_init_exit == B_TRUE &&
391 391 z->zone_restart_init == B_TRUE) {
392 392 /*
393 393 * Trigger a zone reboot and continue
394 394 * with exit processing.
395 395 */
396 396 z->zone_init_status = wstat(why, what);
397 397 (void) zone_kadmin(A_REBOOT, 0, NULL,
398 398 zone_kcred());
399 399
400 400 } else {
401 401 if (z->zone_restart_init == B_TRUE) {
402 402 if (restart_init(what, why) == 0)
403 403 return (0);
404 404 }
405 405
406 406 z->zone_init_status = wstat(why, what);
407 407 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
408 408 zone_kcred());
409 409 }
410 410 }
411 411
412 412 /*
413 413 * Since we didn't or couldn't restart init, we clear
414 414 * the zone's init state and proceed with exit
415 415 * processing.
416 416 */
417 417 z->zone_proc_initpid = -1;
418 418 }
419 419
420 420 /*
421 421 * Delay firing probes (and performing brand cleanup) until after the
422 422 * zone_proc_initpid check. Cases which result in zone shutdown or
423 423 * restart via zone_kadmin eventually result in a call back to
424 424 * proc_exit.
425 425 */
426 426 DTRACE_PROC(lwp__exit);
427 427 DTRACE_PROC1(exit, int, why);
428 428
429 429 /*
430 430 * Will perform any brand specific proc exit processing. Since this
431 431 * is always the last lwp, will also perform lwp exit/free and proc
432 432 * exit. Brand data will be freed when the process is reaped.
433 433 */
434 434 if (PROC_IS_BRANDED(p)) {
435 435 BROP(p)->b_lwpexit(lwp);
436 436 BROP(p)->b_proc_exit(p);
437 437 /*
438 438 * To ensure that b_proc_exit has access to brand-specific data
439 439 * contained by the one remaining lwp, call the freelwp hook as
440 440 * the last part of this clean-up process.
441 441 */
442 442 BROP(p)->b_freelwp(lwp);
443 443 lwp_detach_brand_hdlrs(lwp);
444 444 }
445 445
446 446 lwp_pcb_exit();
447 447
448 448 /*
449 449 * Allocate a sigqueue now, before we grab locks.
450 450 * It will be given to sigcld(), below.
451 451 * Special case: If we will be making the process disappear
452 452 * without a trace because it is either:
453 453 * * an exiting SSYS process, or
454 454 * * a posix_spawn() vfork child who requests it,
455 455 * we don't bother to allocate a useless sigqueue.
456 456 */
457 457 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
458 458 why == CLD_EXITED && what == _EVAPORATE);
459 459 if (!evaporate)
460 460 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
461 461
462 462 /*
463 463 * revoke any doors created by the process.
464 464 */
465 465 if (p->p_door_list)
466 466 door_exit();
467 467
468 468 /*
469 469 * Release schedctl data structures.
470 470 */
471 471 if (p->p_pagep)
472 472 schedctl_proc_cleanup();
473 473
474 474 /*
475 475 * make sure all pending kaio has completed.
476 476 */
477 477 if (p->p_aio)
478 478 aio_cleanup_exit();
479 479
480 480 /*
481 481 * discard the lwpchan cache.
482 482 */
483 483 if (p->p_lcp != NULL)
484 484 lwpchan_destroy_cache(0);
485 485
486 486 /*
487 487 * Clean up any DTrace helper actions or probes for the process.
488 488 */
489 489 if (p->p_dtrace_helpers != NULL) {
490 490 ASSERT(dtrace_helpers_cleanup != NULL);
491 491 (*dtrace_helpers_cleanup)();
492 492 }
493 493
494 494 /*
495 495 * Clean up any signalfd state for the process.
496 496 */
497 497 if (p->p_sigfd != NULL) {
498 498 VERIFY(sigfd_exit_helper != NULL);
499 499 (*sigfd_exit_helper)();
500 500 }
501 501
502 502 /* untimeout the realtime timers */
503 503 if (p->p_itimer != NULL)
504 504 timer_exit();
505 505
506 506 if ((tmp_id = p->p_alarmid) != 0) {
507 507 p->p_alarmid = 0;
508 508 (void) untimeout(tmp_id);
509 509 }
510 510
511 511 /*
512 512 * Remove any fpollinfo_t's for this (last) thread from our file
513 513 * descriptors so closeall() can ASSERT() that they're all gone.
514 514 */
515 515 pollcleanup();
516 516
517 517 if (p->p_rprof_cyclic != CYCLIC_NONE) {
518 518 mutex_enter(&cpu_lock);
519 519 cyclic_remove(p->p_rprof_cyclic);
520 520 mutex_exit(&cpu_lock);
521 521 }
522 522
523 523 mutex_enter(&p->p_lock);
524 524
525 525 /*
526 526 * Clean up any DTrace probes associated with this process.
527 527 */
528 528 if (p->p_dtrace_probes) {
529 529 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
530 530 dtrace_fasttrap_exit_ptr(p);
531 531 }
532 532
533 533 while ((tmp_id = p->p_itimerid) != 0) {
534 534 p->p_itimerid = 0;
535 535 mutex_exit(&p->p_lock);
536 536 (void) untimeout(tmp_id);
537 537 mutex_enter(&p->p_lock);
538 538 }
539 539
540 540 lwp_cleanup();
541 541
542 542 /*
543 543 * We are about to exit; prevent our resource associations from
544 544 * being changed.
545 545 */
546 546 pool_barrier_enter();
547 547
548 548 /*
549 549 * Block the process against /proc now that we have really
550 550 * acquired p->p_lock (to manipulate p_tlist at least).
551 551 */
552 552 prbarrier(p);
553 553
554 554 sigfillset(&p->p_ignore);
555 555 sigemptyset(&p->p_siginfo);
556 556 sigemptyset(&p->p_sig);
557 557 sigemptyset(&p->p_extsig);
558 558 sigemptyset(&t->t_sig);
559 559 sigemptyset(&t->t_extsig);
560 560 sigemptyset(&p->p_sigmask);
561 561 sigdelq(p, t, 0);
562 562 lwp->lwp_cursig = 0;
563 563 lwp->lwp_extsig = 0;
564 564 p->p_flag &= ~(SKILLED | SEXTKILLED);
565 565 if (lwp->lwp_curinfo) {
566 566 siginfofree(lwp->lwp_curinfo);
567 567 lwp->lwp_curinfo = NULL;
568 568 }
569 569
570 570 t->t_proc_flag |= TP_LWPEXIT;
571 571 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
572 572 prlwpexit(t); /* notify /proc */
573 573 lwp_hash_out(p, t->t_tid);
574 574 prexit(p);
575 575
576 576 p->p_lwpcnt = 0;
577 577 p->p_tlist = NULL;
578 578 sigqfree(p);
579 579 term_mstate(t);
580 580 p->p_mterm = gethrtime();
581 581
582 582 exec_vp = p->p_exec;
583 583 execdir_vp = p->p_execdir;
584 584 p->p_exec = NULLVP;
585 585 p->p_execdir = NULLVP;
586 586 mutex_exit(&p->p_lock);
587 587
588 588 pr_free_watched_pages(p);
589 589
590 590 closeall(P_FINFO(p));
591 591
592 592 /* Free the controlling tty. (freectty() always assumes curproc.) */
593 593 ASSERT(p == curproc);
594 594 (void) freectty(B_TRUE);
595 595
596 596 #if defined(__sparc)
597 597 if (p->p_utraps != NULL)
598 598 utrap_free(p);
599 599 #endif
600 600 if (p->p_semacct) /* IPC semaphore exit */
601 601 semexit(p);
602 602 rv = wstat(why, what);
603 603
604 604 acct(rv & 0xff);
605 605 exacct_commit_proc(p, rv);
606 606
607 607 /*
608 608 * Release any resources associated with C2 auditing
609 609 */
610 610 if (AU_AUDITING()) {
611 611 /*
612 612 * audit exit system call
613 613 */
614 614 audit_exit(why, what);
615 615 }
616 616
617 617 /*
618 618 * Free address space.
619 619 */
620 620 relvm();
621 621
622 622 if (exec_vp) {
623 623 /*
624 624 * Close this executable which has been opened when the process
625 625 * was created by getproc().
626 626 */
627 627 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
628 628 VN_RELE(exec_vp);
629 629 }
630 630 if (execdir_vp)
631 631 VN_RELE(execdir_vp);
632 632
633 633 /*
634 634 * Release held contracts.
635 635 */
636 636 contract_exit(p);
637 637
638 638 /*
639 639 * Depart our encapsulating process contract.
640 640 */
641 641 if ((p->p_flag & SSYS) == 0) {
642 642 ASSERT(p->p_ct_process);
643 643 contract_process_exit(p->p_ct_process, p, rv);
644 644 }
645 645
646 646 /*
647 647 * Remove pool association, and block if requested by pool_do_bind.
648 648 */
649 649 mutex_enter(&p->p_lock);
650 650 ASSERT(p->p_pool->pool_ref > 0);
651 651 atomic_dec_32(&p->p_pool->pool_ref);
652 652 p->p_pool = pool_default;
653 653 /*
654 654 * Now that our address space has been freed and all other threads
655 655 * in this process have exited, set the PEXITED pool flag. This
656 656 * tells the pools subsystems to ignore this process if it was
657 657 * requested to rebind this process to a new pool.
658 658 */
659 659 p->p_poolflag |= PEXITED;
660 660 pool_barrier_exit();
661 661 mutex_exit(&p->p_lock);
662 662
663 663 mutex_enter(&pidlock);
664 664
665 665 /*
666 666 * Delete this process from the newstate list of its parent. We
667 667 * will put it in the right place in the sigcld in the end.
668 668 */
669 669 delete_ns(p->p_parent, p);
670 670
671 671 /*
672 672 * Reassign the orphans to the next of kin.
673 673 * Don't rearrange init's orphanage.
674 674 */
675 675 if ((q = p->p_orphan) != NULL && p != proc_init) {
676 676
677 677 proc_t *nokp = p->p_nextofkin;
678 678
679 679 for (;;) {
680 680 q->p_nextofkin = nokp;
681 681 if (q->p_nextorph == NULL)
682 682 break;
683 683 q = q->p_nextorph;
684 684 }
685 685 q->p_nextorph = nokp->p_orphan;
686 686 nokp->p_orphan = p->p_orphan;
687 687 p->p_orphan = NULL;
688 688 }
689 689
690 690 /*
691 691 * Reassign the children to init.
692 692 * Don't try to assign init's children to init.
693 693 */
694 694 if ((q = p->p_child) != NULL && p != proc_init) {
695 695 struct proc *np;
696 696 struct proc *initp = proc_init;
697 697 pid_t zone_initpid = 1;
698 698 struct proc *zoneinitp = NULL;
699 699 boolean_t setzonetop = B_FALSE;
700 700
701 701 if (!INGLOBALZONE(curproc)) {
702 702 zone_initpid = curproc->p_zone->zone_proc_initpid;
703 703
704 704 ASSERT(MUTEX_HELD(&pidlock));
705 705 zoneinitp = prfind(zone_initpid);
706 706 if (zoneinitp != NULL) {
707 707 initp = zoneinitp;
708 708 } else {
709 709 zone_initpid = 1;
710 710 setzonetop = B_TRUE;
711 711 }
712 712 }
713 713
714 714 pgdetach(p);
715 715
716 716 do {
717 717 np = q->p_sibling;
718 718 /*
719 719 * Delete it from its current parent new state
720 720 * list and add it to init new state list
721 721 */
722 722 delete_ns(q->p_parent, q);
723 723
724 724 q->p_ppid = zone_initpid;
725 725
726 726 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
727 727 if (setzonetop) {
728 728 mutex_enter(&q->p_lock);
729 729 q->p_flag |= SZONETOP;
730 730 mutex_exit(&q->p_lock);
731 731 }
732 732 q->p_parent = initp;
733 733
734 734 /*
735 735 * Since q will be the first child,
736 736 * it will not have a previous sibling.
737 737 */
738 738 q->p_psibling = NULL;
739 739 if (initp->p_child) {
740 740 initp->p_child->p_psibling = q;
741 741 }
742 742 q->p_sibling = initp->p_child;
743 743 initp->p_child = q;
744 744 if (q->p_proc_flag & P_PR_PTRACE) {
745 745 mutex_enter(&q->p_lock);
746 746 sigtoproc(q, NULL, SIGKILL);
747 747 mutex_exit(&q->p_lock);
748 748 }
749 749 /*
750 750 * sigcld() will add the child to parents
751 751 * newstate list.
752 752 */
753 753 if (q->p_stat == SZOMB)
754 754 sigcld(q, NULL);
755 755 } while ((q = np) != NULL);
756 756
757 757 p->p_child = NULL;
758 758 ASSERT(p->p_child_ns == NULL);
759 759 }
760 760
761 761 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
762 762
763 763 mutex_enter(&p->p_lock);
764 764 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
765 765
766 766 /*
767 767 * Have our task accummulate our resource usage data before they
768 768 * become contaminated by p_cacct etc., and before we renounce
769 769 * membership of the task.
770 770 *
771 771 * We do this regardless of whether or not task accounting is active.
772 772 * This is to avoid having nonsense data reported for this task if
773 773 * task accounting is subsequently enabled. The overhead is minimal;
774 774 * by this point, this process has accounted for the usage of all its
775 775 * LWPs. We nonetheless do the work here, and under the protection of
776 776 * pidlock, so that the movement of the process's usage to the task
777 777 * happens at the same time as the removal of the process from the
778 778 * task, from the point of view of exacct_snapshot_task_usage().
779 779 */
780 780 exacct_update_task_mstate(p);
781 781
782 782 hrutime = mstate_aggr_state(p, LMS_USER);
783 783 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
784 784 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
785 785 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
786 786
787 787 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
788 788 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
789 789 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
790 790 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
791 791 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
792 792 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
793 793 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
794 794 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
795 795 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
796 796 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
797 797
798 798 p->p_ru.minflt += p->p_cru.minflt;
799 799 p->p_ru.majflt += p->p_cru.majflt;
800 800 p->p_ru.nswap += p->p_cru.nswap;
801 801 p->p_ru.inblock += p->p_cru.inblock;
802 802 p->p_ru.oublock += p->p_cru.oublock;
803 803 p->p_ru.msgsnd += p->p_cru.msgsnd;
804 804 p->p_ru.msgrcv += p->p_cru.msgrcv;
805 805 p->p_ru.nsignals += p->p_cru.nsignals;
806 806 p->p_ru.nvcsw += p->p_cru.nvcsw;
807 807 p->p_ru.nivcsw += p->p_cru.nivcsw;
808 808 p->p_ru.sysc += p->p_cru.sysc;
809 809 p->p_ru.ioch += p->p_cru.ioch;
810 810
811 811 p->p_stat = SZOMB;
812 812 p->p_proc_flag &= ~P_PR_PTRACE;
813 813 p->p_wdata = what;
814 814 p->p_wcode = (char)why;
815 815
816 816 cdir = PTOU(p)->u_cdir;
817 817 rdir = PTOU(p)->u_rdir;
818 818 cwd = PTOU(p)->u_cwd;
819 819
820 820 ASSERT(cdir != NULL || p->p_parent == &p0);
821 821
822 822 /*
823 823 * Release resource controls, as they are no longer enforceable.
824 824 */
825 825 rctl_set_free(p->p_rctls);
826 826
827 827 /*
828 828 * Decrement tk_nlwps counter for our task.max-lwps resource control.
829 829 * An extended accounting record, if that facility is active, is
830 830 * scheduled to be written. We cannot give up task and project
831 831 * membership at this point because that would allow zombies to escape
832 832 * from the max-processes resource controls. Zombies stay in their
833 833 * current task and project until the process table slot is released
834 834 * in freeproc().
835 835 */
836 836 tk = p->p_task;
837 837
838 838 mutex_enter(&p->p_zone->zone_nlwps_lock);
839 839 tk->tk_nlwps--;
840 840 tk->tk_proj->kpj_nlwps--;
841 841 p->p_zone->zone_nlwps--;
842 842 mutex_exit(&p->p_zone->zone_nlwps_lock);
843 843
844 844 /*
845 845 * Clear the lwp directory and the lwpid hash table
846 846 * now that /proc can't bother us any more.
847 847 * We free the memory below, after dropping p->p_lock.
848 848 */
849 849 lwpdir = p->p_lwpdir;
850 850 lwpdir_sz = p->p_lwpdir_sz;
851 851 tidhash = p->p_tidhash;
852 852 tidhash_sz = p->p_tidhash_sz;
853 853 ret_tidhash = p->p_ret_tidhash;
854 854 p->p_lwpdir = NULL;
855 855 p->p_lwpfree = NULL;
856 856 p->p_lwpdir_sz = 0;
857 857 p->p_tidhash = NULL;
858 858 p->p_tidhash_sz = 0;
859 859 p->p_ret_tidhash = NULL;
860 860
861 861 /*
862 862 * If the process has context ops installed, call the exit routine
863 863 * on behalf of this last remaining thread. Normally exitpctx() is
864 864 * called during thread_exit() or lwp_exit(), but because this is the
865 865 * last thread in the process, we must call it here. By the time
866 866 * thread_exit() is called (below), the association with the relevant
867 867 * process has been lost.
868 868 *
869 869 * We also free the context here.
870 870 */
871 871 if (p->p_pctx) {
872 872 kpreempt_disable();
873 873 exitpctx(p);
874 874 kpreempt_enable();
875 875
876 876 freepctx(p, 0);
877 877 }
878 878
879 879 /*
880 880 * curthread's proc pointer is changed to point to the 'sched'
881 881 * process for the corresponding zone, except in the case when
882 882 * the exiting process is in fact a zsched instance, in which
883 883 * case the proc pointer is set to p0. We do so, so that the
884 884 * process still points at the right zone when we call the VN_RELE()
885 885 * below.
886 886 *
887 887 * This is because curthread's original proc pointer can be freed as
888 888 * soon as the child sends a SIGCLD to its parent. We use zsched so
889 889 * that for user processes, even in the final moments of death, the
890 890 * process is still associated with its zone.
891 891 */
892 892 if (p != t->t_procp->p_zone->zone_zsched)
893 893 t->t_procp = t->t_procp->p_zone->zone_zsched;
894 894 else
895 895 t->t_procp = &p0;
896 896
897 897 mutex_exit(&p->p_lock);
898 898 if (!evaporate) {
899 899 /*
900 900 * The brand specific code only happens when the brand has a
901 901 * function to call in place of sigcld and the parent of the
902 902 * exiting process is not the global zone init. If the parent
903 903 * is the global zone init, then the process was reparented,
904 904 * and we don't want brand code delivering possibly strange
905 905 * signals to init. Also, init is not branded, so any brand
906 906 * specific exit data will not be picked up by init anyway.
907 907 */
908 908 if (PROC_IS_BRANDED(p) &&
909 909 BROP(p)->b_exit_with_sig != NULL &&
910 910 p->p_ppid != 1) {
911 911 /*
912 912 * The code for _fini that could unload the brand_t
913 913 * blocks until the count of zones using the module
914 914 * reaches zero. Zones decrement the refcount on their
915 915 * brands only after all user tasks in that zone have
916 916 * exited and been waited on. The decrement on the
917 917 * brand's refcount happen in zone_destroy(). That
918 918 * depends on zone_shutdown() having been completed.
919 919 * zone_shutdown() includes a call to zone_empty(),
920 920 * where the zone waits for itself to reach the state
921 921 * ZONE_IS_EMPTY. This state is only set in either
922 922 * zone_shutdown(), when there are no user processes as
923 923 * the zone enters this function, or in
924 924 * zone_task_rele(). zone_task_rele() is called from
925 925 * code triggered by waiting on processes, not by the
926 926 * processes exiting through proc_exit(). This means
927 927 * all the branded processes that could exist for a
928 928 * specific brand_t must exit and get reaped before the
929 929 * refcount on the brand_t can reach 0. _fini will
930 930 * never unload the corresponding brand module before
931 931 * proc_exit finishes execution for all processes
932 932 * branded with a particular brand_t, which makes the
933 933 * operation below safe to do. Brands that wish to use
934 934 * this mechanism must wait in _fini as described
935 935 * above.
936 936 */
937 937 BROP(p)->b_exit_with_sig(p, sqp);
938 938 } else {
939 939 p->p_pidflag &= ~CLDPEND;
940 940 sigcld(p, sqp);
941 941 }
942 942
943 943 } else {
944 944 /*
945 945 * Do what sigcld() would do if the disposition
946 946 * of the SIGCHLD signal were set to be ignored.
947 947 */
948 948 cv_broadcast(&p->p_srwchan_cv);
949 949 freeproc(p);
950 950 }
951 951 mutex_exit(&pidlock);
952 952
953 953 /*
954 954 * We don't release u_cdir and u_rdir until SZOMB is set.
955 955 * This protects us against dofusers().
956 956 */
957 957 if (cdir)
958 958 VN_RELE(cdir);
959 959 if (rdir)
960 960 VN_RELE(rdir);
961 961 if (cwd)
962 962 refstr_rele(cwd);
963 963
964 964 /*
965 965 * task_rele() may ultimately cause the zone to go away (or
966 966 * may cause the last user process in a zone to go away, which
967 967 * signals zsched to go away). So prior to this call, we must
968 968 * no longer point at zsched.
969 969 */
970 970 t->t_procp = &p0;
971 971
972 972 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
973 973 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
974 974 while (ret_tidhash != NULL) {
975 975 ret_tidhash_t *next = ret_tidhash->rth_next;
976 976 kmem_free(ret_tidhash->rth_tidhash,
977 977 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
978 978 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
979 979 ret_tidhash = next;
980 980 }
981 981
982 982 thread_exit();
983 983 /* NOTREACHED */
984 984 }
985 985
986 986 /*
987 987 * Format siginfo structure for wait system calls.
988 988 */
989 989 void
990 990 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
991 991 {
992 992 ASSERT(MUTEX_HELD(&pidlock));
993 993
994 994 bzero(ip, sizeof (k_siginfo_t));
995 995 ip->si_signo = SIGCLD;
996 996 ip->si_code = pp->p_wcode;
997 997 ip->si_pid = pp->p_pid;
998 998 ip->si_ctid = PRCTID(pp);
999 999 ip->si_zoneid = pp->p_zone->zone_id;
1000 1000 ip->si_status = pp->p_wdata;
1001 1001 ip->si_stime = pp->p_stime;
1002 1002 ip->si_utime = pp->p_utime;
1003 1003
1004 1004 if (waitflag) {
1005 1005 pp->p_wcode = 0;
1006 1006 pp->p_wdata = 0;
1007 1007 pp->p_pidflag &= ~CLDPEND;
1008 1008 }
1009 1009 }
1010 1010
1011 1011 /*
1012 1012 * Wait system call.
1013 1013 * Search for a terminated (zombie) child,
1014 1014 * finally lay it to rest, and collect its status.
1015 1015 * Look also for stopped children,
1016 1016 * and pass back status from them.
1017 1017 */
1018 1018 int
1019 1019 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1020 1020 {
1021 1021 proc_t *cp, *pp;
1022 1022 int waitflag = !(options & WNOWAIT);
1023 1023 boolean_t have_brand_helper = B_FALSE;
1024 1024
1025 1025 /*
1026 1026 * Obsolete flag, defined here only for binary compatibility
1027 1027 * with old statically linked executables. Delete this when
1028 1028 * we no longer care about these old and broken applications.
1029 1029 */
1030 1030 #define _WNOCHLD 0400
1031 1031 options &= ~_WNOCHLD;
1032 1032
1033 1033 if (options == 0 || (options & ~WOPTMASK))
1034 1034 return (EINVAL);
1035 1035
1036 1036 switch (idtype) {
1037 1037 case P_PID:
1038 1038 case P_PGID:
1039 1039 if (id < 0 || id >= maxpid)
1040 1040 return (EINVAL);
1041 1041 /* FALLTHROUGH */
1042 1042 case P_ALL:
1043 1043 break;
1044 1044 default:
1045 1045 return (EINVAL);
1046 1046 }
1047 1047
1048 1048 pp = ttoproc(curthread);
1049 1049
1050 1050 /*
1051 1051 * Anytime you are looking for a process, you take pidlock to prevent
1052 1052 * things from changing as you look.
1053 1053 */
1054 1054 mutex_enter(&pidlock);
1055 1055
1056 1056 /*
1057 1057 * if we are only looking for exited processes and child_ns list
1058 1058 * is empty no reason to look at all children.
1059 1059 */
1060 1060 if (idtype == P_ALL &&
1061 1061 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1062 1062 pp->p_child_ns == NULL) {
1063 1063 if (pp->p_child) {
1064 1064 mutex_exit(&pidlock);
1065 1065 bzero(ip, sizeof (k_siginfo_t));
1066 1066 return (0);
1067 1067 }
1068 1068 mutex_exit(&pidlock);
1069 1069 return (ECHILD);
1070 1070 }
1071 1071
1072 1072 if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1073 1073 have_brand_helper = B_TRUE;
1074 1074 }
1075 1075
1076 1076 while (pp->p_child != NULL || have_brand_helper) {
1077 1077 boolean_t brand_wants_wait = B_FALSE;
1078 1078 int proc_gone = 0;
1079 1079 int found = 0;
1080 1080
1081 1081 /*
1082 1082 * Give the brand a chance to return synthetic results from
1083 1083 * this waitid() call before we do the real thing.
1084 1084 */
1085 1085 if (have_brand_helper) {
1086 1086 int ret;
1087 1087
1088 1088 if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1089 1089 &brand_wants_wait, &ret) == 0) {
1090 1090 mutex_exit(&pidlock);
1091 1091 return (ret);
1092 1092 }
1093 1093
1094 1094 if (pp->p_child == NULL) {
1095 1095 goto no_real_children;
1096 1096 }
1097 1097 }
1098 1098
1099 1099 /*
1100 1100 * Look for interesting children in the newstate list.
1101 1101 */
1102 1102 VERIFY(pp->p_child != NULL);
1103 1103 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1104 1104 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1105 1105 continue;
1106 1106 if (idtype == P_PID && id != cp->p_pid)
1107 1107 continue;
1108 1108 if (idtype == P_PGID && id != cp->p_pgrp)
1109 1109 continue;
1110 1110 if (PROC_IS_BRANDED(pp)) {
1111 1111 if (BROP(pp)->b_wait_filter != NULL &&
1112 1112 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1113 1113 continue;
1114 1114 }
1115 1115
1116 1116 switch (cp->p_wcode) {
1117 1117
1118 1118 case CLD_TRAPPED:
1119 1119 case CLD_STOPPED:
1120 1120 case CLD_CONTINUED:
1121 1121 cmn_err(CE_PANIC,
1122 1122 "waitid: wrong state %d on the p_newstate"
1123 1123 " list", cp->p_wcode);
1124 1124 break;
1125 1125
1126 1126 case CLD_EXITED:
1127 1127 case CLD_DUMPED:
1128 1128 case CLD_KILLED:
1129 1129 if (!(options & WEXITED)) {
1130 1130 /*
1131 1131 * Count how many are already gone
1132 1132 * for good.
1133 1133 */
1134 1134 proc_gone++;
1135 1135 break;
1136 1136 }
1137 1137 if (!waitflag) {
1138 1138 winfo(cp, ip, 0);
1139 1139 } else {
1140 1140 winfo(cp, ip, 1);
1141 1141 freeproc(cp);
1142 1142 }
1143 1143 mutex_exit(&pidlock);
1144 1144 if (waitflag) { /* accept SIGCLD */
1145 1145 sigcld_delete(ip);
1146 1146 sigcld_repost();
1147 1147 }
1148 1148 return (0);
1149 1149 }
1150 1150
1151 1151 if (idtype == P_PID)
1152 1152 break;
1153 1153 }
1154 1154
1155 1155 /*
1156 1156 * Wow! None of the threads on the p_sibling_ns list were
1157 1157 * interesting threads. Check all the kids!
1158 1158 */
1159 1159 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1160 1160 if (idtype == P_PID && id != cp->p_pid)
1161 1161 continue;
1162 1162 if (idtype == P_PGID && id != cp->p_pgrp)
1163 1163 continue;
1164 1164 if (PROC_IS_BRANDED(pp)) {
1165 1165 if (BROP(pp)->b_wait_filter != NULL &&
1166 1166 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1167 1167 continue;
1168 1168 }
1169 1169
1170 1170 switch (cp->p_wcode) {
1171 1171 case CLD_TRAPPED:
1172 1172 if (!(options & WTRAPPED))
1173 1173 break;
1174 1174 winfo(cp, ip, waitflag);
1175 1175 mutex_exit(&pidlock);
1176 1176 if (waitflag) { /* accept SIGCLD */
1177 1177 sigcld_delete(ip);
1178 1178 sigcld_repost();
1179 1179 }
1180 1180 return (0);
1181 1181
1182 1182 case CLD_STOPPED:
1183 1183 if (!(options & WSTOPPED))
1184 1184 break;
1185 1185 /* Is it still stopped? */
1186 1186 mutex_enter(&cp->p_lock);
1187 1187 if (!jobstopped(cp)) {
1188 1188 mutex_exit(&cp->p_lock);
1189 1189 break;
1190 1190 }
1191 1191 mutex_exit(&cp->p_lock);
1192 1192 winfo(cp, ip, waitflag);
1193 1193 mutex_exit(&pidlock);
1194 1194 if (waitflag) { /* accept SIGCLD */
1195 1195 sigcld_delete(ip);
1196 1196 sigcld_repost();
1197 1197 }
1198 1198 return (0);
1199 1199
1200 1200 case CLD_CONTINUED:
1201 1201 if (!(options & WCONTINUED))
1202 1202 break;
1203 1203 winfo(cp, ip, waitflag);
1204 1204 mutex_exit(&pidlock);
1205 1205 if (waitflag) { /* accept SIGCLD */
1206 1206 sigcld_delete(ip);
1207 1207 sigcld_repost();
1208 1208 }
1209 1209 return (0);
1210 1210
1211 1211 case CLD_EXITED:
1212 1212 case CLD_DUMPED:
1213 1213 case CLD_KILLED:
1214 1214 if (idtype != P_PID &&
1215 1215 (cp->p_pidflag & CLDWAITPID))
1216 1216 continue;
1217 1217 /*
1218 1218 * Don't complain if a process was found in
1219 1219 * the first loop but we broke out of the loop
1220 1220 * because of the arguments passed to us.
1221 1221 */
1222 1222 if (proc_gone == 0) {
1223 1223 cmn_err(CE_PANIC,
1224 1224 "waitid: wrong state on the"
1225 1225 " p_child list");
1226 1226 } else {
1227 1227 break;
1228 1228 }
1229 1229 }
1230 1230
1231 1231 found++;
1232 1232
1233 1233 if (idtype == P_PID)
1234 1234 break;
1235 1235 }
1236 1236
1237 1237 no_real_children:
1238 1238 /*
1239 1239 * If we found no interesting processes at all,
1240 1240 * break out and return ECHILD.
1241 1241 */
1242 1242 if (!brand_wants_wait && (found + proc_gone == 0))
1243 1243 break;
1244 1244
1245 1245 if (options & WNOHANG) {
1246 1246 mutex_exit(&pidlock);
1247 1247 bzero(ip, sizeof (k_siginfo_t));
1248 1248 /*
1249 1249 * We should set ip->si_signo = SIGCLD,
1250 1250 * but there is an SVVS test that expects
1251 1251 * ip->si_signo to be zero in this case.
1252 1252 */
1253 1253 return (0);
1254 1254 }
1255 1255
1256 1256 /*
1257 1257 * If we found no processes of interest that could
1258 1258 * change state while we wait, we don't wait at all.
1259 1259 * Get out with ECHILD according to SVID.
1260 1260 */
1261 1261 if (!brand_wants_wait && (found == proc_gone))
1262 1262 break;
1263 1263
1264 1264 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1265 1265 mutex_exit(&pidlock);
1266 1266 return (EINTR);
1267 1267 }
1268 1268 }
1269 1269 mutex_exit(&pidlock);
1270 1270 return (ECHILD);
1271 1271 }
1272 1272
1273 1273 int
1274 1274 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1275 1275 {
1276 1276 int error;
1277 1277 k_siginfo_t info;
1278 1278
1279 1279 if (error = waitid(idtype, id, &info, options))
1280 1280 return (set_errno(error));
1281 1281 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1282 1282 return (set_errno(EFAULT));
1283 1283 return (0);
1284 1284 }
1285 1285
1286 1286 #ifdef _SYSCALL32_IMPL
1287 1287
1288 1288 int
1289 1289 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1290 1290 {
1291 1291 int error;
1292 1292 k_siginfo_t info;
1293 1293 siginfo32_t info32;
1294 1294
1295 1295 if (error = waitid(idtype, id, &info, options))
1296 1296 return (set_errno(error));
1297 1297 siginfo_kto32(&info, &info32);
1298 1298 if (copyout(&info32, infop, sizeof (info32)))
1299 1299 return (set_errno(EFAULT));
1300 1300 return (0);
1301 1301 }
1302 1302
1303 1303 #endif /* _SYSCALL32_IMPL */
1304 1304
1305 1305 void
1306 1306 proc_detach(proc_t *p)
1307 1307 {
1308 1308 proc_t *q;
1309 1309
1310 1310 ASSERT(MUTEX_HELD(&pidlock));
1311 1311
1312 1312 q = p->p_parent;
1313 1313 ASSERT(q != NULL);
1314 1314
1315 1315 /*
1316 1316 * Take it off the newstate list of its parent
1317 1317 */
1318 1318 delete_ns(q, p);
1319 1319
1320 1320 if (q->p_child == p) {
1321 1321 q->p_child = p->p_sibling;
1322 1322 /*
1323 1323 * If the parent has no children, it better not
1324 1324 * have any with new states either!
1325 1325 */
1326 1326 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1327 1327 }
1328 1328
1329 1329 if (p->p_sibling) {
1330 1330 p->p_sibling->p_psibling = p->p_psibling;
1331 1331 }
1332 1332
1333 1333 if (p->p_psibling) {
1334 1334 p->p_psibling->p_sibling = p->p_sibling;
1335 1335 }
1336 1336 }
1337 1337
1338 1338 /*
1339 1339 * Remove zombie children from the process table.
1340 1340 */
1341 1341 void
1342 1342 freeproc(proc_t *p)
1343 1343 {
1344 1344 proc_t *q;
1345 1345 task_t *tk;
1346 1346
1347 1347 ASSERT(p->p_stat == SZOMB);
1348 1348 ASSERT(p->p_tlist == NULL);
1349 1349 ASSERT(MUTEX_HELD(&pidlock));
1350 1350
1351 1351 sigdelq(p, NULL, 0);
1352 1352 if (p->p_killsqp) {
1353 1353 siginfofree(p->p_killsqp);
1354 1354 p->p_killsqp = NULL;
1355 1355 }
1356 1356
1357 1357 /* Clear any remaining brand data */
1358 1358 if (PROC_IS_BRANDED(p)) {
1359 1359 brand_clearbrand(p, B_FALSE);
1360 1360 }
1361 1361
1362 1362
1363 1363 prfree(p); /* inform /proc */
1364 1364
1365 1365 /*
1366 1366 * Don't free the init processes.
1367 1367 * Other dying processes will access it.
1368 1368 */
1369 1369 if (p == proc_init)
1370 1370 return;
1371 1371
1372 1372
1373 1373 /*
1374 1374 * We wait until now to free the cred structure because a
1375 1375 * zombie process's credentials may be examined by /proc.
1376 1376 * No cred locking needed because there are no threads at this point.
1377 1377 */
1378 1378 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1379 1379 crfree(p->p_cred);
1380 1380 if (p->p_corefile != NULL) {
1381 1381 corectl_path_rele(p->p_corefile);
1382 1382 p->p_corefile = NULL;
1383 1383 }
1384 1384 if (p->p_content != NULL) {
1385 1385 corectl_content_rele(p->p_content);
1386 1386 p->p_content = NULL;
1387 1387 }
1388 1388
1389 1389 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1390 1390 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1391 1391 /*
1392 1392 * This should still do the right thing since p_utime/stime
1393 1393 * get set to the correct value on process exit, so it
1394 1394 * should get properly updated
1395 1395 */
1396 1396 p->p_nextofkin->p_cutime += p->p_utime;
1397 1397 p->p_nextofkin->p_cstime += p->p_stime;
1398 1398
1399 1399 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1400 1400 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1401 1401 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1402 1402 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1403 1403 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1404 1404 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1405 1405 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1406 1406 += p->p_acct[LMS_USER_LOCK];
1407 1407 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1408 1408 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1409 1409 += p->p_acct[LMS_WAIT_CPU];
1410 1410 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1411 1411
1412 1412 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1413 1413 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1414 1414 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1415 1415 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1416 1416 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1417 1417 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1418 1418 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1419 1419 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1420 1420 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1421 1421 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1422 1422 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1423 1423 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1424 1424
1425 1425 }
1426 1426
1427 1427 q = p->p_nextofkin;
1428 1428 if (q && q->p_orphan == p)
1429 1429 q->p_orphan = p->p_nextorph;
1430 1430 else if (q) {
1431 1431 for (q = q->p_orphan; q; q = q->p_nextorph)
1432 1432 if (q->p_nextorph == p)
1433 1433 break;
1434 1434 ASSERT(q && q->p_nextorph == p);
1435 1435 q->p_nextorph = p->p_nextorph;
1436 1436 }
1437 1437
1438 1438 /*
1439 1439 * The process table slot is being freed, so it is now safe to give up
1440 1440 * task and project membership.
1441 1441 */
1442 1442 mutex_enter(&p->p_lock);
1443 1443 tk = p->p_task;
1444 1444 task_detach(p);
1445 1445 mutex_exit(&p->p_lock);
1446 1446
1447 1447 proc_detach(p);
1448 1448 pid_exit(p, tk); /* frees pid and proc structure */
1449 1449
1450 1450 task_rele(tk);
1451 1451 }
1452 1452
1453 1453 /*
1454 1454 * Delete process "child" from the newstate list of process "parent"
1455 1455 */
1456 1456 void
1457 1457 delete_ns(proc_t *parent, proc_t *child)
1458 1458 {
1459 1459 proc_t **ns;
1460 1460
1461 1461 ASSERT(MUTEX_HELD(&pidlock));
1462 1462 ASSERT(child->p_parent == parent);
1463 1463 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1464 1464 if (*ns == child) {
1465 1465
1466 1466 ASSERT((*ns)->p_parent == parent);
1467 1467
1468 1468 *ns = child->p_sibling_ns;
1469 1469 child->p_sibling_ns = NULL;
1470 1470 return;
1471 1471 }
1472 1472 }
1473 1473 }
1474 1474
1475 1475 /*
1476 1476 * Add process "child" to the new state list of process "parent"
1477 1477 */
1478 1478 void
1479 1479 add_ns(proc_t *parent, proc_t *child)
1480 1480 {
1481 1481 ASSERT(child->p_sibling_ns == NULL);
1482 1482 child->p_sibling_ns = parent->p_child_ns;
1483 1483 parent->p_child_ns = child;
1484 1484 }
|
↓ open down ↓ |
1450 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX