Print this page
14019 Allow more control over zone init exit actions (fix mismerge)
14019 Allow more control over zone init exit actions
Portions contributed by: Joshua M. Clulow <jmc@joyent.com>
Portions contributed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Gordon Ross <Gordon.W.Ross@gmail.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/os/exit.c
+++ new/usr/src/uts/common/os/exit.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2018 Joyent, Inc.
25 25 * Copyright 2020 Oxide Computer Company
26 26 * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
27 27 */
28 28
29 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 30
31 31 #include <sys/types.h>
32 32 #include <sys/param.h>
33 33 #include <sys/sysmacros.h>
34 34 #include <sys/systm.h>
35 35 #include <sys/cred.h>
36 36 #include <sys/user.h>
37 37 #include <sys/errno.h>
38 38 #include <sys/proc.h>
39 39 #include <sys/ucontext.h>
40 40 #include <sys/procfs.h>
41 41 #include <sys/vnode.h>
42 42 #include <sys/acct.h>
43 43 #include <sys/var.h>
44 44 #include <sys/cmn_err.h>
45 45 #include <sys/debug.h>
46 46 #include <sys/wait.h>
47 47 #include <sys/siginfo.h>
48 48 #include <sys/procset.h>
49 49 #include <sys/class.h>
50 50 #include <sys/file.h>
51 51 #include <sys/session.h>
52 52 #include <sys/kmem.h>
53 53 #include <sys/vtrace.h>
54 54 #include <sys/prsystm.h>
55 55 #include <sys/ipc.h>
56 56 #include <sys/sem_impl.h>
57 57 #include <c2/audit.h>
58 58 #include <sys/aio_impl.h>
59 59 #include <vm/as.h>
60 60 #include <sys/poll.h>
61 61 #include <sys/door.h>
62 62 #include <sys/lwpchan_impl.h>
63 63 #include <sys/utrap.h>
64 64 #include <sys/task.h>
65 65 #include <sys/exacct.h>
66 66 #include <sys/cyclic.h>
67 67 #include <sys/schedctl.h>
68 68 #include <sys/rctl.h>
69 69 #include <sys/contract_impl.h>
70 70 #include <sys/contract/process_impl.h>
71 71 #include <sys/list.h>
72 72 #include <sys/dtrace.h>
73 73 #include <sys/pool.h>
74 74 #include <sys/sdt.h>
75 75 #include <sys/corectl.h>
76 76 #include <sys/core.h>
77 77 #include <sys/brand.h>
78 78 #include <sys/libc_kernel.h>
79 79
80 80 /*
81 81 * convert code/data pair into old style wait status
82 82 */
83 83 int
84 84 wstat(int code, int data)
85 85 {
86 86 int stat = (data & 0377);
87 87
88 88 switch (code) {
89 89 case CLD_EXITED:
90 90 stat <<= 8;
91 91 break;
92 92 case CLD_DUMPED:
93 93 stat |= WCOREFLG;
94 94 break;
95 95 case CLD_KILLED:
96 96 break;
97 97 case CLD_TRAPPED:
98 98 case CLD_STOPPED:
99 99 stat <<= 8;
100 100 stat |= WSTOPFLG;
101 101 break;
102 102 case CLD_CONTINUED:
103 103 stat = WCONTFLG;
104 104 break;
105 105 default:
106 106 cmn_err(CE_PANIC, "wstat: bad code");
107 107 /* NOTREACHED */
108 108 }
109 109 return (stat);
110 110 }
111 111
112 112 static char *
113 113 exit_reason(char *buf, size_t bufsz, int what, int why)
114 114 {
115 115 switch (why) {
116 116 case CLD_EXITED:
117 117 (void) snprintf(buf, bufsz, "exited with status %d", what);
118 118 break;
119 119 case CLD_KILLED:
120 120 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
121 121 break;
122 122 case CLD_DUMPED:
123 123 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
124 124 break;
125 125 default:
126 126 (void) snprintf(buf, bufsz, "encountered unknown error "
127 127 "(%d, %d)", why, what);
128 128 break;
129 129 }
130 130
131 131 return (buf);
132 132 }
133 133
134 134 /*
135 135 * exit system call: pass back caller's arg.
136 136 */
137 137 void
138 138 rexit(int rval)
139 139 {
140 140 exit(CLD_EXITED, rval);
141 141 }
142 142
143 143 /*
144 144 * Bump the init_restarts kstat and let interested parties know about the
145 145 * restart.
146 146 */
147 147 static void
148 148 restart_init_notify(zone_t *zone)
149 149 {
150 150 nvlist_t *nvl = NULL;
151 151
152 152 zone->zone_proc_init_restarts++;
153 153
154 154 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
155 155 nvlist_add_uint32(nvl, ZONE_CB_RESTARTS,
156 156 zone->zone_proc_init_restarts) == 0) {
157 157 zone_sysevent_publish(zone, ZONE_EVENT_INIT_CLASS,
158 158 ZONE_EVENT_INIT_RESTART_SC, nvl);
|
↓ open down ↓ |
158 lines elided |
↑ open up ↑ |
159 159 }
160 160
161 161 nvlist_free(nvl);
162 162 }
163 163
164 164 /*
165 165 * Called by proc_exit() when a zone's init exits, presumably because
166 166 * it failed. As long as the given zone is still in the "running"
167 167 * state, we will re-exec() init, but first we need to reset things
168 168 * which are usually inherited across exec() but will break init's
169 - * assumption that it is being exec()'d from a virgin process. Most
169 + * assumption that it is being exec()'d from a virgin process. Most
170 170 * importantly this includes closing all file descriptors (exec only
171 171 * closes those marked close-on-exec) and resetting signals (exec only
172 172 * resets handled signals, and we need to clear any signals which
173 173 * killed init). Anything else that exec(2) says would be inherited,
174 174 * but would affect the execution of init, needs to be reset.
175 175 */
176 176 static int
177 177 restart_init(int what, int why)
178 178 {
179 179 kthread_t *t = curthread;
180 180 klwp_t *lwp = ttolwp(t);
181 181 proc_t *p = ttoproc(t);
182 182 proc_t *pp = p->p_zone->zone_zsched;
183 183 user_t *up = PTOU(p);
184 184
185 185 vnode_t *oldcd, *oldrd;
186 186 int i, err;
187 187 char reason_buf[64];
188 188
189 189 /*
190 190 * Let zone admin (and global zone admin if this is for a non-global
191 191 * zone) know that init has failed and will be restarted.
192 192 */
193 193 zcmn_err(p->p_zone->zone_id, CE_WARN,
194 194 "init(1M) %s: restarting automatically",
195 195 exit_reason(reason_buf, sizeof (reason_buf), what, why));
196 196
197 197 if (!INGLOBALZONE(p)) {
198 198 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
199 199 "restarting automatically",
200 200 p->p_zone->zone_name, p->p_pid, reason_buf);
201 201 }
202 202
203 203 /*
204 204 * Remove any fpollinfo_t's for this (last) thread from our file
205 205 * descriptors so closeall() can ASSERT() that they're all gone.
206 206 * Then close all open file descriptors in the process.
207 207 */
208 208 pollcleanup();
209 209 closeall(P_FINFO(p));
210 210
211 211 /*
212 212 * Grab p_lock and begin clearing miscellaneous global process
213 213 * state that needs to be reset before we exec the new init(1M).
214 214 */
215 215
216 216 mutex_enter(&p->p_lock);
217 217 prbarrier(p);
218 218
219 219 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
220 220 up->u_cmask = CMASK;
221 221
222 222 sigemptyset(&t->t_hold);
223 223 sigemptyset(&t->t_sig);
224 224 sigemptyset(&t->t_extsig);
225 225
226 226 sigemptyset(&p->p_sig);
227 227 sigemptyset(&p->p_extsig);
228 228
229 229 sigdelq(p, t, 0);
230 230 sigdelq(p, NULL, 0);
231 231
232 232 if (p->p_killsqp) {
233 233 siginfofree(p->p_killsqp);
234 234 p->p_killsqp = NULL;
235 235 }
236 236
237 237 /*
238 238 * Reset any signals that are ignored back to the default disposition.
239 239 * Other u_signal members will be cleared when exec calls sigdefault().
240 240 */
241 241 for (i = 1; i < NSIG; i++) {
242 242 if (up->u_signal[i - 1] == SIG_IGN) {
243 243 up->u_signal[i - 1] = SIG_DFL;
244 244 sigemptyset(&up->u_sigmask[i - 1]);
245 245 }
246 246 }
247 247
248 248 /*
249 249 * Clear the current signal, any signal info associated with it, and
250 250 * any signal information from contracts and/or contract templates.
251 251 */
252 252 lwp->lwp_cursig = 0;
253 253 lwp->lwp_extsig = 0;
254 254 if (lwp->lwp_curinfo != NULL) {
255 255 siginfofree(lwp->lwp_curinfo);
256 256 lwp->lwp_curinfo = NULL;
257 257 }
258 258 lwp_ctmpl_clear(lwp, B_FALSE);
259 259
260 260 /*
261 261 * Reset both the process root directory and the current working
262 262 * directory to the root of the zone just as we do during boot.
263 263 */
264 264 VN_HOLD(p->p_zone->zone_rootvp);
265 265 oldrd = up->u_rdir;
266 266 up->u_rdir = p->p_zone->zone_rootvp;
267 267
268 268 VN_HOLD(p->p_zone->zone_rootvp);
269 269 oldcd = up->u_cdir;
270 270 up->u_cdir = p->p_zone->zone_rootvp;
271 271
272 272 if (up->u_cwd != NULL) {
273 273 refstr_rele(up->u_cwd);
274 274 up->u_cwd = NULL;
275 275 }
276 276
277 277 /* Reset security flags */
278 278 mutex_enter(&pp->p_lock);
279 279 p->p_secflags = pp->p_secflags;
280 280 mutex_exit(&pp->p_lock);
281 281
282 282 mutex_exit(&p->p_lock);
283 283
284 284 if (oldrd != NULL)
285 285 VN_RELE(oldrd);
286 286 if (oldcd != NULL)
287 287 VN_RELE(oldcd);
288 288
289 289 /*
290 290 * It's possible that a zone's init will have become privilege aware
291 291 * and modified privilege sets; reset them.
292 292 */
293 293 cred_t *oldcr, *newcr;
294 294
295 295 mutex_enter(&p->p_crlock);
296 296 oldcr = p->p_cred;
297 297 mutex_enter(&pp->p_crlock);
298 298 crhold(newcr = p->p_cred = pp->p_cred);
299 299 mutex_exit(&pp->p_crlock);
300 300 mutex_exit(&p->p_crlock);
301 301 crfree(oldcr);
302 302 /* Additional hold for the current thread - expected by crset() */
303 303 crhold(newcr);
304 304 crset(p, newcr);
305 305
306 306 /* Free the controlling tty. (freectty() always assumes curproc.) */
307 307 ASSERT(p == curproc);
308 308 (void) freectty(B_TRUE);
309 309
310 310 restart_init_notify(p->p_zone);
311 311
312 312 /*
313 313 * Now exec() the new init(1M) on top of the current process. If we
314 314 * succeed, the caller will treat this like a successful system call.
315 315 * If we fail, we issue messages and the caller will proceed with exit.
316 316 */
317 317 err = exec_init(p->p_zone->zone_initname, NULL);
318 318
319 319 if (err == 0)
320 320 return (0);
321 321
322 322 zcmn_err(p->p_zone->zone_id, CE_WARN,
323 323 "failed to restart init(1M) (err=%d): system reboot required", err);
324 324
325 325 if (!INGLOBALZONE(p)) {
326 326 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
327 327 "(pid %d, err=%d): zoneadm(1M) boot required",
328 328 p->p_zone->zone_name, p->p_pid, err);
329 329 }
330 330
331 331 return (-1);
332 332 }
333 333
334 334 /*
335 335 * Release resources.
|
↓ open down ↓ |
156 lines elided |
↑ open up ↑ |
336 336 * Enter zombie state.
337 337 * Wake up parent and init processes,
338 338 * and dispose of children.
339 339 */
340 340 void
341 341 exit(int why, int what)
342 342 {
343 343 /*
344 344 * If proc_exit() fails, then some other lwp in the process
345 345 * got there first. We just have to call lwp_exit() to allow
346 - * the other lwp to finish exiting the process. Otherwise we're
346 + * the other lwp to finish exiting the process. Otherwise we're
347 347 * restarting init, and should return.
348 348 */
349 349 if (proc_exit(why, what) != 0) {
350 350 mutex_enter(&curproc->p_lock);
351 351 ASSERT(curproc->p_flag & SEXITLWPS);
352 352 lwp_exit();
353 353 /* NOTREACHED */
354 354 }
355 355 }
356 356
357 357 /*
358 358 * Set the SEXITING flag on the process, after making sure /proc does
359 - * not have it locked. This is done in more places than proc_exit(),
359 + * not have it locked. This is done in more places than proc_exit(),
360 360 * so it is a separate function.
361 361 */
362 362 void
363 363 proc_is_exiting(proc_t *p)
364 364 {
365 365 mutex_enter(&p->p_lock);
366 366 prbarrier(p);
367 367 p->p_flag |= SEXITING;
368 368 mutex_exit(&p->p_lock);
369 369 }
370 370
371 371 /*
372 372 * Return true if zone's init is restarted, false if exit processing should
373 373 * proceeed.
374 374 */
375 375 static boolean_t
376 376 zone_init_exit(zone_t *z, int why, int what)
377 377 {
378 378 /*
379 379 * Typically we don't let the zone's init exit unless zone_start_init()
380 380 * failed its exec, or we are shutting down the zone or the machine,
381 381 * although the various flags handled within this function will control
382 382 * the behavior.
383 383 *
384 384 * Since we are single threaded, we don't need to lock the following
385 385 * accesses to zone_proc_initpid.
386 386 */
387 387 if (z->zone_boot_err != 0 ||
388 388 zone_status_get(z) >= ZONE_IS_SHUTTING_DOWN ||
389 389 zone_status_get(global_zone) >= ZONE_IS_SHUTTING_DOWN) {
390 390 /*
391 391 * Clear the zone's init pid and proceed with exit processing.
392 392 */
393 393 z->zone_proc_initpid = -1;
394 394 return (B_FALSE);
395 395 }
396 396
397 397 /*
398 398 * There are a variety of configuration flags on the zone to control
399 399 * init exit behavior.
400 400 *
401 401 * If the init process should be restarted, the "zone_restart_init"
402 402 * member will be set.
403 403 */
404 404 if (!z->zone_restart_init) {
405 405 /*
406 406 * The zone has been setup to halt when init exits.
407 407 */
408 408 z->zone_init_status = wstat(why, what);
409 409 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
410 410 z->zone_proc_initpid = -1;
411 411 return (B_FALSE);
412 412 }
413 413
414 414 /*
415 415 * At this point we know we're configured to restart init, but there
416 416 * are various modifiers to that behavior.
417 417 */
418 418
419 419 if (z->zone_reboot_on_init_exit) {
420 420 /*
421 421 * Some init programs in branded zones do not tolerate a
422 422 * restart in the traditional manner; setting
423 423 * "zone_reboot_on_init_exit" will cause the entire zone to be
424 424 * rebooted instead.
425 425 */
426 426
427 427 if (z->zone_restart_init_0) {
428 428 /*
429 429 * Some init programs in branded zones only want to
430 430 * restart if they exit 0, otherwise the zone should
431 431 * shutdown. Setting the "zone_restart_init_0" member
432 432 * controls this behavior.
433 433 */
434 434 if (why == CLD_EXITED && what == 0) {
435 435 /* Trigger a zone reboot */
436 436 (void) zone_kadmin(A_REBOOT, 0, NULL,
437 437 zone_kcred());
438 438 } else {
439 439 /* Shutdown instead of reboot */
440 440 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
441 441 zone_kcred());
442 442 }
443 443 } else {
444 444 /* Trigger a zone reboot */
445 445 (void) zone_kadmin(A_REBOOT, 0, NULL, zone_kcred());
446 446 }
447 447
448 448 z->zone_init_status = wstat(why, what);
449 449 z->zone_proc_initpid = -1;
450 450 return (B_FALSE);
451 451 }
452 452
453 453 if (z->zone_restart_init_0) {
454 454 /*
455 455 * Some init programs in branded zones only want to restart if
456 456 * they exit 0, otherwise the zone should shutdown. Setting the
457 457 * "zone_restart_init_0" member controls this behavior.
458 458 *
459 459 * In this case we only restart init if it exited successfully.
460 460 */
461 461 if (why == CLD_EXITED && what == 0 &&
462 462 restart_init(what, why) == 0) {
463 463 return (B_TRUE);
|
↓ open down ↓ |
94 lines elided |
↑ open up ↑ |
464 464 }
465 465 } else {
466 466 /*
467 467 * No restart modifiers on the zone, attempt to restart init.
468 468 */
469 469 if (restart_init(what, why) == 0) {
470 470 return (B_TRUE);
471 471 }
472 472 }
473 473
474 -
475 474 /*
476 - * The restart failed, the zone will shut down.
475 + * The restart failed, or the criteria for a restart are not met;
476 + * the zone will shut down.
477 477 */
478 478 z->zone_init_status = wstat(why, what);
479 479 (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, zone_kcred());
480 480 z->zone_proc_initpid = -1;
481 481 return (B_FALSE);
482 482 }
483 483
484 484 /*
485 485 * Return value:
486 486 * 1 - exitlwps() failed, call (or continue) lwp_exit()
487 487 * 0 - restarting init. Return through system call path
488 488 */
489 489 int
490 490 proc_exit(int why, int what)
491 491 {
492 492 kthread_t *t = curthread;
493 493 klwp_t *lwp = ttolwp(t);
494 494 proc_t *p = ttoproc(t);
495 495 zone_t *z = p->p_zone;
496 496 timeout_id_t tmp_id;
497 497 int rv;
498 498 proc_t *q;
499 499 task_t *tk;
500 500 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
501 501 sigqueue_t *sqp;
502 502 lwpdir_t *lwpdir;
|
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
503 503 uint_t lwpdir_sz;
504 504 tidhash_t *tidhash;
505 505 uint_t tidhash_sz;
506 506 ret_tidhash_t *ret_tidhash;
507 507 refstr_t *cwd;
508 508 hrtime_t hrutime, hrstime;
509 509 int evaporate;
510 510
511 511 /*
512 512 * Stop and discard the process's lwps except for the current one,
513 - * unless some other lwp beat us to it. If exitlwps() fails then
513 + * unless some other lwp beat us to it. If exitlwps() fails then
514 514 * return and the calling lwp will call (or continue in) lwp_exit().
515 515 */
516 516 proc_is_exiting(p);
517 517 if (exitlwps(0) != 0)
518 518 return (1);
519 519
520 520 mutex_enter(&p->p_lock);
521 521 if (p->p_ttime > 0) {
522 522 /*
523 523 * Account any remaining ticks charged to this process
524 524 * on its way out.
525 525 */
526 526 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
527 527 p->p_ttime = 0;
528 528 }
529 529 mutex_exit(&p->p_lock);
530 530
531 + /*
532 + * Don't let init exit unless zone_start_init() failed its exec, or
533 + * we are shutting down the zone or the machine.
534 + *
535 + * Since we are single threaded, we don't need to lock the
536 + * following accesses to zone_proc_initpid.
537 + */
531 538 if (p->p_pid == z->zone_proc_initpid) {
532 539 /* If zone's init restarts, we're done here. */
533 540 if (zone_init_exit(z, why, what))
534 541 return (0);
535 542 }
536 543
537 544 /*
538 545 * Delay firing probes (and performing brand cleanup) until after the
539 546 * zone_proc_initpid check. Cases which result in zone shutdown or
540 547 * restart via zone_kadmin eventually result in a call back to
541 548 * proc_exit.
542 549 */
543 550 DTRACE_PROC(lwp__exit);
544 551 DTRACE_PROC1(exit, int, why);
545 552
546 553 /*
547 554 * Will perform any brand specific proc exit processing. Since this
548 555 * is always the last lwp, will also perform lwp exit/free and proc
549 556 * exit. Brand data will be freed when the process is reaped.
550 557 */
551 558 if (PROC_IS_BRANDED(p)) {
552 559 BROP(p)->b_lwpexit(lwp);
553 560 BROP(p)->b_proc_exit(p);
554 561 /*
555 562 * To ensure that b_proc_exit has access to brand-specific data
556 563 * contained by the one remaining lwp, call the freelwp hook as
557 564 * the last part of this clean-up process.
558 565 */
559 566 BROP(p)->b_freelwp(lwp);
560 567 lwp_detach_brand_hdlrs(lwp);
561 568 }
562 569
563 570 lwp_pcb_exit();
564 571
565 572 /*
566 573 * Allocate a sigqueue now, before we grab locks.
567 574 * It will be given to sigcld(), below.
568 575 * Special case: If we will be making the process disappear
569 576 * without a trace because it is either:
570 577 * * an exiting SSYS process, or
571 578 * * a posix_spawn() vfork child who requests it,
572 579 * we don't bother to allocate a useless sigqueue.
573 580 */
574 581 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
575 582 why == CLD_EXITED && what == _EVAPORATE);
576 583 if (!evaporate)
577 584 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
578 585
579 586 /*
580 587 * revoke any doors created by the process.
581 588 */
582 589 if (p->p_door_list)
583 590 door_exit();
584 591
585 592 /*
586 593 * Release schedctl data structures.
587 594 */
588 595 if (p->p_pagep)
589 596 schedctl_proc_cleanup();
590 597
591 598 /*
592 599 * make sure all pending kaio has completed.
593 600 */
594 601 if (p->p_aio)
595 602 aio_cleanup_exit();
596 603
597 604 /*
598 605 * discard the lwpchan cache.
599 606 */
600 607 if (p->p_lcp != NULL)
601 608 lwpchan_destroy_cache(0);
602 609
603 610 /*
604 611 * Clean up any DTrace helper actions or probes for the process.
605 612 */
606 613 if (p->p_dtrace_helpers != NULL) {
607 614 ASSERT(dtrace_helpers_cleanup != NULL);
608 615 (*dtrace_helpers_cleanup)(p);
609 616 }
610 617
611 618 /*
612 619 * Clean up any signalfd state for the process.
613 620 */
614 621 if (p->p_sigfd != NULL) {
615 622 VERIFY(sigfd_exit_helper != NULL);
616 623 (*sigfd_exit_helper)();
617 624 }
618 625
619 626 /* untimeout the realtime timers */
620 627 if (p->p_itimer != NULL)
621 628 timer_exit();
622 629
623 630 if ((tmp_id = p->p_alarmid) != 0) {
624 631 p->p_alarmid = 0;
625 632 (void) untimeout(tmp_id);
626 633 }
627 634
628 635 /*
629 636 * If we had generated any upanic(2) state, free that now.
630 637 */
631 638 if (p->p_upanic != NULL) {
632 639 kmem_free(p->p_upanic, PRUPANIC_BUFLEN);
633 640 p->p_upanic = NULL;
634 641 }
635 642
636 643 /*
637 644 * Remove any fpollinfo_t's for this (last) thread from our file
638 645 * descriptors so closeall() can ASSERT() that they're all gone.
639 646 */
640 647 pollcleanup();
641 648
642 649 if (p->p_rprof_cyclic != CYCLIC_NONE) {
643 650 mutex_enter(&cpu_lock);
644 651 cyclic_remove(p->p_rprof_cyclic);
645 652 mutex_exit(&cpu_lock);
646 653 }
647 654
648 655 mutex_enter(&p->p_lock);
649 656
650 657 /*
651 658 * Clean up any DTrace probes associated with this process.
652 659 */
653 660 if (p->p_dtrace_probes) {
654 661 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
655 662 dtrace_fasttrap_exit_ptr(p);
656 663 }
657 664
658 665 while ((tmp_id = p->p_itimerid) != 0) {
659 666 p->p_itimerid = 0;
660 667 mutex_exit(&p->p_lock);
661 668 (void) untimeout(tmp_id);
662 669 mutex_enter(&p->p_lock);
663 670 }
664 671
665 672 lwp_cleanup();
666 673
667 674 /*
668 675 * We are about to exit; prevent our resource associations from
669 676 * being changed.
670 677 */
671 678 pool_barrier_enter();
672 679
673 680 /*
674 681 * Block the process against /proc now that we have really
675 682 * acquired p->p_lock (to manipulate p_tlist at least).
676 683 */
677 684 prbarrier(p);
678 685
679 686 sigfillset(&p->p_ignore);
680 687 sigemptyset(&p->p_siginfo);
681 688 sigemptyset(&p->p_sig);
682 689 sigemptyset(&p->p_extsig);
683 690 sigemptyset(&t->t_sig);
684 691 sigemptyset(&t->t_extsig);
685 692 sigemptyset(&p->p_sigmask);
686 693 sigdelq(p, t, 0);
687 694 lwp->lwp_cursig = 0;
688 695 lwp->lwp_extsig = 0;
689 696 p->p_flag &= ~(SKILLED | SEXTKILLED);
690 697 if (lwp->lwp_curinfo) {
691 698 siginfofree(lwp->lwp_curinfo);
692 699 lwp->lwp_curinfo = NULL;
693 700 }
694 701
695 702 t->t_proc_flag |= TP_LWPEXIT;
696 703 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
697 704 prlwpexit(t); /* notify /proc */
698 705 lwp_hash_out(p, t->t_tid);
699 706 prexit(p);
700 707
701 708 p->p_lwpcnt = 0;
702 709 p->p_tlist = NULL;
703 710 sigqfree(p);
704 711 term_mstate(t);
705 712 p->p_mterm = gethrtime();
706 713
707 714 exec_vp = p->p_exec;
708 715 execdir_vp = p->p_execdir;
709 716 p->p_exec = NULLVP;
710 717 p->p_execdir = NULLVP;
711 718 mutex_exit(&p->p_lock);
712 719
713 720 pr_free_watched_pages(p);
714 721
715 722 closeall(P_FINFO(p));
716 723
717 724 /* Free the controlling tty. (freectty() always assumes curproc.) */
718 725 ASSERT(p == curproc);
719 726 (void) freectty(B_TRUE);
720 727
721 728 #if defined(__sparc)
722 729 if (p->p_utraps != NULL)
723 730 utrap_free(p);
724 731 #endif
725 732 if (p->p_semacct) /* IPC semaphore exit */
726 733 semexit(p);
727 734 rv = wstat(why, what);
728 735
729 736 acct(rv);
730 737 exacct_commit_proc(p, rv);
731 738
732 739 /*
733 740 * Release any resources associated with C2 auditing
734 741 */
735 742 if (AU_AUDITING()) {
736 743 /*
737 744 * audit exit system call
738 745 */
739 746 audit_exit(why, what);
740 747 }
741 748
742 749 /*
743 750 * Free address space.
744 751 */
745 752 relvm();
746 753
747 754 if (exec_vp) {
748 755 /*
749 756 * Close this executable which has been opened when the process
750 757 * was created by getproc().
751 758 */
752 759 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
753 760 VN_RELE(exec_vp);
754 761 }
755 762 if (execdir_vp)
756 763 VN_RELE(execdir_vp);
757 764
758 765 /*
759 766 * Release held contracts.
760 767 */
761 768 contract_exit(p);
762 769
763 770 /*
764 771 * Depart our encapsulating process contract.
765 772 */
766 773 if ((p->p_flag & SSYS) == 0) {
767 774 ASSERT(p->p_ct_process);
768 775 contract_process_exit(p->p_ct_process, p, rv);
769 776 }
770 777
771 778 /*
772 779 * Remove pool association, and block if requested by pool_do_bind.
773 780 */
774 781 mutex_enter(&p->p_lock);
775 782 ASSERT(p->p_pool->pool_ref > 0);
776 783 atomic_dec_32(&p->p_pool->pool_ref);
777 784 p->p_pool = pool_default;
778 785 /*
779 786 * Now that our address space has been freed and all other threads
780 787 * in this process have exited, set the PEXITED pool flag. This
781 788 * tells the pools subsystems to ignore this process if it was
782 789 * requested to rebind this process to a new pool.
783 790 */
784 791 p->p_poolflag |= PEXITED;
785 792 pool_barrier_exit();
786 793 mutex_exit(&p->p_lock);
787 794
788 795 mutex_enter(&pidlock);
789 796
790 797 /*
791 798 * Delete this process from the newstate list of its parent. We
792 799 * will put it in the right place in the sigcld in the end.
793 800 */
794 801 delete_ns(p->p_parent, p);
795 802
796 803 /*
797 804 * Reassign the orphans to the next of kin.
798 805 * Don't rearrange init's orphanage.
799 806 */
800 807 if ((q = p->p_orphan) != NULL && p != proc_init) {
801 808
802 809 proc_t *nokp = p->p_nextofkin;
803 810
804 811 for (;;) {
805 812 q->p_nextofkin = nokp;
806 813 if (q->p_nextorph == NULL)
807 814 break;
808 815 q = q->p_nextorph;
809 816 }
810 817 q->p_nextorph = nokp->p_orphan;
811 818 nokp->p_orphan = p->p_orphan;
812 819 p->p_orphan = NULL;
813 820 }
814 821
815 822 /*
816 823 * Reassign the children to init.
817 824 * Don't try to assign init's children to init.
818 825 */
819 826 if ((q = p->p_child) != NULL && p != proc_init) {
820 827 struct proc *np;
821 828 struct proc *initp = proc_init;
822 829 pid_t zone_initpid = 1;
823 830 struct proc *zoneinitp = NULL;
824 831 boolean_t setzonetop = B_FALSE;
825 832
826 833 if (!INGLOBALZONE(curproc)) {
827 834 zone_initpid = curproc->p_zone->zone_proc_initpid;
828 835
829 836 ASSERT(MUTEX_HELD(&pidlock));
830 837 zoneinitp = prfind(zone_initpid);
831 838 if (zoneinitp != NULL) {
832 839 initp = zoneinitp;
833 840 } else {
834 841 zone_initpid = 1;
835 842 setzonetop = B_TRUE;
836 843 }
837 844 }
838 845
839 846 pgdetach(p);
840 847
841 848 do {
842 849 np = q->p_sibling;
843 850 /*
844 851 * Delete it from its current parent new state
845 852 * list and add it to init new state list
846 853 */
847 854 delete_ns(q->p_parent, q);
848 855
849 856 q->p_ppid = zone_initpid;
850 857
851 858 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
852 859 if (setzonetop) {
853 860 mutex_enter(&q->p_lock);
854 861 q->p_flag |= SZONETOP;
855 862 mutex_exit(&q->p_lock);
856 863 }
857 864 q->p_parent = initp;
858 865
859 866 /*
860 867 * Since q will be the first child,
861 868 * it will not have a previous sibling.
862 869 */
863 870 q->p_psibling = NULL;
864 871 if (initp->p_child) {
865 872 initp->p_child->p_psibling = q;
866 873 }
867 874 q->p_sibling = initp->p_child;
868 875 initp->p_child = q;
869 876 if (q->p_proc_flag & P_PR_PTRACE) {
870 877 mutex_enter(&q->p_lock);
871 878 sigtoproc(q, NULL, SIGKILL);
872 879 mutex_exit(&q->p_lock);
873 880 }
874 881 /*
875 882 * sigcld() will add the child to parents
876 883 * newstate list.
877 884 */
878 885 if (q->p_stat == SZOMB)
879 886 sigcld(q, NULL);
880 887 } while ((q = np) != NULL);
881 888
882 889 p->p_child = NULL;
883 890 ASSERT(p->p_child_ns == NULL);
884 891 }
885 892
886 893 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
887 894
888 895 mutex_enter(&p->p_lock);
889 896 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
890 897
891 898 /*
892 899 * Have our task accummulate our resource usage data before they
893 900 * become contaminated by p_cacct etc., and before we renounce
894 901 * membership of the task.
895 902 *
896 903 * We do this regardless of whether or not task accounting is active.
897 904 * This is to avoid having nonsense data reported for this task if
898 905 * task accounting is subsequently enabled. The overhead is minimal;
899 906 * by this point, this process has accounted for the usage of all its
900 907 * LWPs. We nonetheless do the work here, and under the protection of
901 908 * pidlock, so that the movement of the process's usage to the task
902 909 * happens at the same time as the removal of the process from the
903 910 * task, from the point of view of exacct_snapshot_task_usage().
904 911 */
905 912 exacct_update_task_mstate(p);
906 913
907 914 hrutime = mstate_aggr_state(p, LMS_USER);
908 915 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
909 916 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
910 917 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
911 918
912 919 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
913 920 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
914 921 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
915 922 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
916 923 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
917 924 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
918 925 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
919 926 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
920 927 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
921 928 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
922 929
923 930 p->p_ru.minflt += p->p_cru.minflt;
924 931 p->p_ru.majflt += p->p_cru.majflt;
925 932 p->p_ru.nswap += p->p_cru.nswap;
926 933 p->p_ru.inblock += p->p_cru.inblock;
927 934 p->p_ru.oublock += p->p_cru.oublock;
928 935 p->p_ru.msgsnd += p->p_cru.msgsnd;
929 936 p->p_ru.msgrcv += p->p_cru.msgrcv;
930 937 p->p_ru.nsignals += p->p_cru.nsignals;
931 938 p->p_ru.nvcsw += p->p_cru.nvcsw;
932 939 p->p_ru.nivcsw += p->p_cru.nivcsw;
933 940 p->p_ru.sysc += p->p_cru.sysc;
934 941 p->p_ru.ioch += p->p_cru.ioch;
935 942
936 943 p->p_stat = SZOMB;
937 944 p->p_proc_flag &= ~P_PR_PTRACE;
938 945 p->p_wdata = what;
939 946 p->p_wcode = (char)why;
940 947
941 948 cdir = PTOU(p)->u_cdir;
942 949 rdir = PTOU(p)->u_rdir;
943 950 cwd = PTOU(p)->u_cwd;
944 951
945 952 ASSERT(cdir != NULL || p->p_parent == &p0);
946 953
947 954 /*
948 955 * Release resource controls, as they are no longer enforceable.
949 956 */
950 957 rctl_set_free(p->p_rctls);
951 958
952 959 /*
953 960 * Decrement tk_nlwps counter for our task.max-lwps resource control.
954 961 * An extended accounting record, if that facility is active, is
955 962 * scheduled to be written. We cannot give up task and project
956 963 * membership at this point because that would allow zombies to escape
957 964 * from the max-processes resource controls. Zombies stay in their
958 965 * current task and project until the process table slot is released
959 966 * in freeproc().
960 967 */
961 968 tk = p->p_task;
962 969
963 970 mutex_enter(&p->p_zone->zone_nlwps_lock);
964 971 tk->tk_nlwps--;
965 972 tk->tk_proj->kpj_nlwps--;
966 973 p->p_zone->zone_nlwps--;
967 974 mutex_exit(&p->p_zone->zone_nlwps_lock);
968 975
969 976 /*
970 977 * Clear the lwp directory and the lwpid hash table
971 978 * now that /proc can't bother us any more.
972 979 * We free the memory below, after dropping p->p_lock.
973 980 */
974 981 lwpdir = p->p_lwpdir;
975 982 lwpdir_sz = p->p_lwpdir_sz;
976 983 tidhash = p->p_tidhash;
977 984 tidhash_sz = p->p_tidhash_sz;
978 985 ret_tidhash = p->p_ret_tidhash;
979 986 p->p_lwpdir = NULL;
980 987 p->p_lwpfree = NULL;
981 988 p->p_lwpdir_sz = 0;
982 989 p->p_tidhash = NULL;
983 990 p->p_tidhash_sz = 0;
984 991 p->p_ret_tidhash = NULL;
985 992
986 993 /*
987 994 * If the process has context ops installed, call the exit routine
988 995 * on behalf of this last remaining thread. Normally exitpctx() is
989 996 * called during thread_exit() or lwp_exit(), but because this is the
990 997 * last thread in the process, we must call it here. By the time
991 998 * thread_exit() is called (below), the association with the relevant
992 999 * process has been lost.
993 1000 *
994 1001 * We also free the context here.
995 1002 */
996 1003 if (p->p_pctx) {
997 1004 kpreempt_disable();
|
↓ open down ↓ |
457 lines elided |
↑ open up ↑ |
998 1005 exitpctx(p);
999 1006 kpreempt_enable();
1000 1007
1001 1008 freepctx(p, 0);
1002 1009 }
1003 1010
1004 1011 /*
1005 1012 * curthread's proc pointer is changed to point to the 'sched'
1006 1013 * process for the corresponding zone, except in the case when
1007 1014 * the exiting process is in fact a zsched instance, in which
1008 - * case the proc pointer is set to p0. We do so, so that the
1015 + * case the proc pointer is set to p0. We do so, so that the
1009 1016 * process still points at the right zone when we call the VN_RELE()
1010 1017 * below.
1011 1018 *
1012 1019 * This is because curthread's original proc pointer can be freed as
1013 1020 * soon as the child sends a SIGCLD to its parent. We use zsched so
1014 1021 * that for user processes, even in the final moments of death, the
1015 1022 * process is still associated with its zone.
1016 1023 */
1017 1024 if (p != t->t_procp->p_zone->zone_zsched)
1018 1025 t->t_procp = t->t_procp->p_zone->zone_zsched;
1019 1026 else
1020 1027 t->t_procp = &p0;
1021 1028
1022 1029 mutex_exit(&p->p_lock);
1023 1030 if (!evaporate) {
1024 1031 /*
1025 1032 * The brand specific code only happens when the brand has a
1026 1033 * function to call in place of sigcld and the parent of the
1027 1034 * exiting process is not the global zone init. If the parent
1028 1035 * is the global zone init, then the process was reparented,
1029 1036 * and we don't want brand code delivering possibly strange
1030 1037 * signals to init. Also, init is not branded, so any brand
1031 1038 * specific exit data will not be picked up by init anyway.
1032 1039 */
1033 1040 if (PROC_IS_BRANDED(p) &&
1034 1041 BROP(p)->b_exit_with_sig != NULL &&
1035 1042 p->p_ppid != 1) {
1036 1043 /*
1037 1044 * The code for _fini that could unload the brand_t
1038 1045 * blocks until the count of zones using the module
1039 1046 * reaches zero. Zones decrement the refcount on their
1040 1047 * brands only after all user tasks in that zone have
1041 1048 * exited and been waited on. The decrement on the
1042 1049 * brand's refcount happen in zone_destroy(). That
1043 1050 * depends on zone_shutdown() having been completed.
1044 1051 * zone_shutdown() includes a call to zone_empty(),
1045 1052 * where the zone waits for itself to reach the state
1046 1053 * ZONE_IS_EMPTY. This state is only set in either
1047 1054 * zone_shutdown(), when there are no user processes as
1048 1055 * the zone enters this function, or in
1049 1056 * zone_task_rele(). zone_task_rele() is called from
1050 1057 * code triggered by waiting on processes, not by the
1051 1058 * processes exiting through proc_exit(). This means
1052 1059 * all the branded processes that could exist for a
1053 1060 * specific brand_t must exit and get reaped before the
1054 1061 * refcount on the brand_t can reach 0. _fini will
1055 1062 * never unload the corresponding brand module before
1056 1063 * proc_exit finishes execution for all processes
1057 1064 * branded with a particular brand_t, which makes the
1058 1065 * operation below safe to do. Brands that wish to use
1059 1066 * this mechanism must wait in _fini as described
1060 1067 * above.
1061 1068 */
1062 1069 BROP(p)->b_exit_with_sig(p, sqp);
1063 1070 } else {
1064 1071 p->p_pidflag &= ~CLDPEND;
1065 1072 sigcld(p, sqp);
1066 1073 }
1067 1074
1068 1075 } else {
1069 1076 /*
1070 1077 * Do what sigcld() would do if the disposition
1071 1078 * of the SIGCHLD signal were set to be ignored.
1072 1079 */
1073 1080 cv_broadcast(&p->p_srwchan_cv);
1074 1081 freeproc(p);
1075 1082 }
1076 1083 mutex_exit(&pidlock);
1077 1084
1078 1085 /*
1079 1086 * We don't release u_cdir and u_rdir until SZOMB is set.
1080 1087 * This protects us against dofusers().
1081 1088 */
|
↓ open down ↓ |
63 lines elided |
↑ open up ↑ |
1082 1089 if (cdir)
1083 1090 VN_RELE(cdir);
1084 1091 if (rdir)
1085 1092 VN_RELE(rdir);
1086 1093 if (cwd)
1087 1094 refstr_rele(cwd);
1088 1095
1089 1096 /*
1090 1097 * task_rele() may ultimately cause the zone to go away (or
1091 1098 * may cause the last user process in a zone to go away, which
1092 - * signals zsched to go away). So prior to this call, we must
1099 + * signals zsched to go away). So prior to this call, we must
1093 1100 * no longer point at zsched.
1094 1101 */
1095 1102 t->t_procp = &p0;
1096 1103
1097 1104 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
1098 1105 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
1099 1106 while (ret_tidhash != NULL) {
1100 1107 ret_tidhash_t *next = ret_tidhash->rth_next;
1101 1108 kmem_free(ret_tidhash->rth_tidhash,
1102 1109 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
1103 1110 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
1104 1111 ret_tidhash = next;
1105 1112 }
1106 1113
1107 1114 thread_exit();
1108 1115 /* NOTREACHED */
1109 1116 }
1110 1117
1111 1118 /*
1112 1119 * Format siginfo structure for wait system calls.
1113 1120 */
1114 1121 void
1115 1122 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
1116 1123 {
1117 1124 ASSERT(MUTEX_HELD(&pidlock));
1118 1125
1119 1126 bzero(ip, sizeof (k_siginfo_t));
1120 1127 ip->si_signo = SIGCLD;
1121 1128 ip->si_code = pp->p_wcode;
1122 1129 ip->si_pid = pp->p_pid;
1123 1130 ip->si_ctid = PRCTID(pp);
1124 1131 ip->si_zoneid = pp->p_zone->zone_id;
1125 1132 ip->si_status = pp->p_wdata;
1126 1133 ip->si_stime = pp->p_stime;
1127 1134 ip->si_utime = pp->p_utime;
1128 1135
1129 1136 if (waitflag) {
1130 1137 pp->p_wcode = 0;
1131 1138 pp->p_wdata = 0;
1132 1139 pp->p_pidflag &= ~CLDPEND;
1133 1140 }
1134 1141 }
1135 1142
1136 1143 /*
1137 1144 * Wait system call.
1138 1145 * Search for a terminated (zombie) child,
1139 1146 * finally lay it to rest, and collect its status.
1140 1147 * Look also for stopped children,
1141 1148 * and pass back status from them.
1142 1149 */
1143 1150 int
1144 1151 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
1145 1152 {
1146 1153 proc_t *cp, *pp;
1147 1154 int waitflag = !(options & WNOWAIT);
1148 1155 boolean_t have_brand_helper = B_FALSE;
1149 1156
1150 1157 /*
1151 1158 * Obsolete flag, defined here only for binary compatibility
1152 1159 * with old statically linked executables. Delete this when
1153 1160 * we no longer care about these old and broken applications.
1154 1161 */
1155 1162 #define _WNOCHLD 0400
1156 1163 options &= ~_WNOCHLD;
1157 1164
1158 1165 if (options == 0 || (options & ~WOPTMASK))
1159 1166 return (EINVAL);
1160 1167
1161 1168 switch (idtype) {
1162 1169 case P_PID:
1163 1170 case P_PGID:
1164 1171 if (id < 0 || id >= maxpid)
1165 1172 return (EINVAL);
1166 1173 /* FALLTHROUGH */
1167 1174 case P_ALL:
1168 1175 break;
1169 1176 default:
1170 1177 return (EINVAL);
1171 1178 }
1172 1179
1173 1180 pp = ttoproc(curthread);
1174 1181
1175 1182 /*
1176 1183 * Anytime you are looking for a process, you take pidlock to prevent
1177 1184 * things from changing as you look.
1178 1185 */
1179 1186 mutex_enter(&pidlock);
1180 1187
1181 1188 /*
1182 1189 * if we are only looking for exited processes and child_ns list
1183 1190 * is empty no reason to look at all children.
1184 1191 */
1185 1192 if (idtype == P_ALL &&
1186 1193 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
1187 1194 pp->p_child_ns == NULL) {
1188 1195 if (pp->p_child) {
1189 1196 mutex_exit(&pidlock);
1190 1197 bzero(ip, sizeof (k_siginfo_t));
1191 1198 return (0);
1192 1199 }
1193 1200 mutex_exit(&pidlock);
1194 1201 return (ECHILD);
1195 1202 }
1196 1203
1197 1204 if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
1198 1205 have_brand_helper = B_TRUE;
1199 1206 }
1200 1207
1201 1208 while (pp->p_child != NULL || have_brand_helper) {
1202 1209 boolean_t brand_wants_wait = B_FALSE;
1203 1210 int proc_gone = 0;
1204 1211 int found = 0;
1205 1212
1206 1213 /*
1207 1214 * Give the brand a chance to return synthetic results from
1208 1215 * this waitid() call before we do the real thing.
1209 1216 */
1210 1217 if (have_brand_helper) {
1211 1218 int ret;
1212 1219
1213 1220 if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
1214 1221 &brand_wants_wait, &ret) == 0) {
1215 1222 mutex_exit(&pidlock);
1216 1223 return (ret);
1217 1224 }
1218 1225
1219 1226 if (pp->p_child == NULL) {
1220 1227 goto no_real_children;
1221 1228 }
1222 1229 }
1223 1230
1224 1231 /*
1225 1232 * Look for interesting children in the newstate list.
1226 1233 */
1227 1234 VERIFY(pp->p_child != NULL);
1228 1235 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
1229 1236 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
1230 1237 continue;
1231 1238 if (idtype == P_PID && id != cp->p_pid)
1232 1239 continue;
1233 1240 if (idtype == P_PGID && id != cp->p_pgrp)
1234 1241 continue;
1235 1242 if (PROC_IS_BRANDED(pp)) {
1236 1243 if (BROP(pp)->b_wait_filter != NULL &&
1237 1244 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1238 1245 continue;
1239 1246 }
1240 1247
1241 1248 switch (cp->p_wcode) {
1242 1249
1243 1250 case CLD_TRAPPED:
1244 1251 case CLD_STOPPED:
1245 1252 case CLD_CONTINUED:
1246 1253 cmn_err(CE_PANIC,
1247 1254 "waitid: wrong state %d on the p_newstate"
1248 1255 " list", cp->p_wcode);
1249 1256 break;
1250 1257
1251 1258 case CLD_EXITED:
1252 1259 case CLD_DUMPED:
1253 1260 case CLD_KILLED:
1254 1261 if (!(options & WEXITED)) {
1255 1262 /*
1256 1263 * Count how many are already gone
1257 1264 * for good.
1258 1265 */
1259 1266 proc_gone++;
1260 1267 break;
1261 1268 }
1262 1269 if (!waitflag) {
1263 1270 winfo(cp, ip, 0);
1264 1271 } else {
1265 1272 winfo(cp, ip, 1);
1266 1273 freeproc(cp);
1267 1274 }
1268 1275 mutex_exit(&pidlock);
1269 1276 if (waitflag) { /* accept SIGCLD */
1270 1277 sigcld_delete(ip);
1271 1278 sigcld_repost();
1272 1279 }
1273 1280 return (0);
1274 1281 }
1275 1282
1276 1283 if (idtype == P_PID)
1277 1284 break;
1278 1285 }
1279 1286
1280 1287 /*
1281 1288 * Wow! None of the threads on the p_sibling_ns list were
1282 1289 * interesting threads. Check all the kids!
1283 1290 */
1284 1291 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1285 1292 if (idtype == P_PID && id != cp->p_pid)
1286 1293 continue;
1287 1294 if (idtype == P_PGID && id != cp->p_pgrp)
1288 1295 continue;
1289 1296 if (PROC_IS_BRANDED(pp)) {
1290 1297 if (BROP(pp)->b_wait_filter != NULL &&
1291 1298 BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
1292 1299 continue;
1293 1300 }
1294 1301
1295 1302 switch (cp->p_wcode) {
1296 1303 case CLD_TRAPPED:
1297 1304 if (!(options & WTRAPPED))
1298 1305 break;
1299 1306 winfo(cp, ip, waitflag);
1300 1307 mutex_exit(&pidlock);
1301 1308 if (waitflag) { /* accept SIGCLD */
1302 1309 sigcld_delete(ip);
1303 1310 sigcld_repost();
1304 1311 }
1305 1312 return (0);
1306 1313
1307 1314 case CLD_STOPPED:
1308 1315 if (!(options & WSTOPPED))
1309 1316 break;
1310 1317 /* Is it still stopped? */
1311 1318 mutex_enter(&cp->p_lock);
1312 1319 if (!jobstopped(cp)) {
1313 1320 mutex_exit(&cp->p_lock);
1314 1321 break;
1315 1322 }
1316 1323 mutex_exit(&cp->p_lock);
1317 1324 winfo(cp, ip, waitflag);
1318 1325 mutex_exit(&pidlock);
1319 1326 if (waitflag) { /* accept SIGCLD */
1320 1327 sigcld_delete(ip);
1321 1328 sigcld_repost();
1322 1329 }
1323 1330 return (0);
1324 1331
1325 1332 case CLD_CONTINUED:
1326 1333 if (!(options & WCONTINUED))
1327 1334 break;
1328 1335 winfo(cp, ip, waitflag);
1329 1336 mutex_exit(&pidlock);
1330 1337 if (waitflag) { /* accept SIGCLD */
1331 1338 sigcld_delete(ip);
1332 1339 sigcld_repost();
1333 1340 }
1334 1341 return (0);
1335 1342
1336 1343 case CLD_EXITED:
1337 1344 case CLD_DUMPED:
1338 1345 case CLD_KILLED:
1339 1346 if (idtype != P_PID &&
1340 1347 (cp->p_pidflag & CLDWAITPID))
1341 1348 continue;
1342 1349 /*
1343 1350 * Don't complain if a process was found in
1344 1351 * the first loop but we broke out of the loop
1345 1352 * because of the arguments passed to us.
1346 1353 */
1347 1354 if (proc_gone == 0) {
1348 1355 cmn_err(CE_PANIC,
1349 1356 "waitid: wrong state on the"
1350 1357 " p_child list");
1351 1358 } else {
1352 1359 break;
1353 1360 }
1354 1361 }
1355 1362
1356 1363 found++;
1357 1364
1358 1365 if (idtype == P_PID)
1359 1366 break;
1360 1367 }
1361 1368
1362 1369 no_real_children:
1363 1370 /*
1364 1371 * If we found no interesting processes at all,
1365 1372 * break out and return ECHILD.
1366 1373 */
1367 1374 if (!brand_wants_wait && (found + proc_gone == 0))
1368 1375 break;
1369 1376
1370 1377 if (options & WNOHANG) {
1371 1378 mutex_exit(&pidlock);
1372 1379 bzero(ip, sizeof (k_siginfo_t));
1373 1380 /*
1374 1381 * We should set ip->si_signo = SIGCLD,
1375 1382 * but there is an SVVS test that expects
1376 1383 * ip->si_signo to be zero in this case.
1377 1384 */
1378 1385 return (0);
1379 1386 }
1380 1387
1381 1388 /*
1382 1389 * If we found no processes of interest that could
1383 1390 * change state while we wait, we don't wait at all.
1384 1391 * Get out with ECHILD according to SVID.
1385 1392 */
1386 1393 if (!brand_wants_wait && (found == proc_gone))
1387 1394 break;
1388 1395
1389 1396 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1390 1397 mutex_exit(&pidlock);
1391 1398 return (EINTR);
1392 1399 }
1393 1400 }
1394 1401 mutex_exit(&pidlock);
1395 1402 return (ECHILD);
1396 1403 }
1397 1404
1398 1405 int
1399 1406 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1400 1407 {
1401 1408 int error;
1402 1409 k_siginfo_t info;
1403 1410
1404 1411 if (error = waitid(idtype, id, &info, options))
1405 1412 return (set_errno(error));
1406 1413 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1407 1414 return (set_errno(EFAULT));
1408 1415 return (0);
1409 1416 }
1410 1417
1411 1418 #ifdef _SYSCALL32_IMPL
1412 1419
1413 1420 int
1414 1421 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1415 1422 {
1416 1423 int error;
1417 1424 k_siginfo_t info;
1418 1425 siginfo32_t info32;
1419 1426
1420 1427 if (error = waitid(idtype, id, &info, options))
1421 1428 return (set_errno(error));
1422 1429 siginfo_kto32(&info, &info32);
1423 1430 if (copyout(&info32, infop, sizeof (info32)))
1424 1431 return (set_errno(EFAULT));
1425 1432 return (0);
1426 1433 }
1427 1434
1428 1435 #endif /* _SYSCALL32_IMPL */
1429 1436
1430 1437 void
1431 1438 proc_detach(proc_t *p)
1432 1439 {
1433 1440 proc_t *q;
1434 1441
1435 1442 ASSERT(MUTEX_HELD(&pidlock));
1436 1443
1437 1444 q = p->p_parent;
1438 1445 ASSERT(q != NULL);
1439 1446
1440 1447 /*
1441 1448 * Take it off the newstate list of its parent
1442 1449 */
1443 1450 delete_ns(q, p);
1444 1451
1445 1452 if (q->p_child == p) {
1446 1453 q->p_child = p->p_sibling;
1447 1454 /*
1448 1455 * If the parent has no children, it better not
1449 1456 * have any with new states either!
1450 1457 */
1451 1458 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1452 1459 }
1453 1460
1454 1461 if (p->p_sibling) {
1455 1462 p->p_sibling->p_psibling = p->p_psibling;
1456 1463 }
1457 1464
1458 1465 if (p->p_psibling) {
1459 1466 p->p_psibling->p_sibling = p->p_sibling;
1460 1467 }
1461 1468 }
1462 1469
1463 1470 /*
1464 1471 * Remove zombie children from the process table.
1465 1472 */
1466 1473 void
1467 1474 freeproc(proc_t *p)
1468 1475 {
1469 1476 proc_t *q;
1470 1477 task_t *tk;
1471 1478
1472 1479 ASSERT(p->p_stat == SZOMB);
1473 1480 ASSERT(p->p_tlist == NULL);
1474 1481 ASSERT(MUTEX_HELD(&pidlock));
1475 1482
1476 1483 sigdelq(p, NULL, 0);
1477 1484 if (p->p_killsqp) {
1478 1485 siginfofree(p->p_killsqp);
1479 1486 p->p_killsqp = NULL;
1480 1487 }
1481 1488
1482 1489 /* Clear any remaining brand data */
1483 1490 if (PROC_IS_BRANDED(p)) {
1484 1491 brand_clearbrand(p, B_FALSE);
1485 1492 }
1486 1493
1487 1494
1488 1495 prfree(p); /* inform /proc */
1489 1496
1490 1497 /*
1491 1498 * Don't free the init processes.
1492 1499 * Other dying processes will access it.
1493 1500 */
1494 1501 if (p == proc_init)
1495 1502 return;
1496 1503
1497 1504
1498 1505 /*
1499 1506 * We wait until now to free the cred structure because a
1500 1507 * zombie process's credentials may be examined by /proc.
1501 1508 * No cred locking needed because there are no threads at this point.
1502 1509 */
1503 1510 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1504 1511 crfree(p->p_cred);
1505 1512 if (p->p_corefile != NULL) {
1506 1513 corectl_path_rele(p->p_corefile);
1507 1514 p->p_corefile = NULL;
1508 1515 }
1509 1516 if (p->p_content != NULL) {
1510 1517 corectl_content_rele(p->p_content);
1511 1518 p->p_content = NULL;
1512 1519 }
1513 1520
1514 1521 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1515 1522 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1516 1523 /*
1517 1524 * This should still do the right thing since p_utime/stime
1518 1525 * get set to the correct value on process exit, so it
1519 1526 * should get properly updated
1520 1527 */
1521 1528 p->p_nextofkin->p_cutime += p->p_utime;
1522 1529 p->p_nextofkin->p_cstime += p->p_stime;
1523 1530
1524 1531 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1525 1532 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1526 1533 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1527 1534 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1528 1535 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1529 1536 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1530 1537 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1531 1538 += p->p_acct[LMS_USER_LOCK];
1532 1539 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1533 1540 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1534 1541 += p->p_acct[LMS_WAIT_CPU];
1535 1542 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1536 1543
1537 1544 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1538 1545 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1539 1546 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1540 1547 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1541 1548 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1542 1549 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1543 1550 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1544 1551 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1545 1552 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1546 1553 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1547 1554 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1548 1555 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1549 1556
1550 1557 }
1551 1558
1552 1559 q = p->p_nextofkin;
1553 1560 if (q && q->p_orphan == p)
1554 1561 q->p_orphan = p->p_nextorph;
1555 1562 else if (q) {
1556 1563 for (q = q->p_orphan; q; q = q->p_nextorph)
1557 1564 if (q->p_nextorph == p)
1558 1565 break;
1559 1566 ASSERT(q && q->p_nextorph == p);
1560 1567 q->p_nextorph = p->p_nextorph;
1561 1568 }
1562 1569
1563 1570 /*
1564 1571 * The process table slot is being freed, so it is now safe to give up
1565 1572 * task and project membership.
1566 1573 */
1567 1574 mutex_enter(&p->p_lock);
1568 1575 tk = p->p_task;
1569 1576 task_detach(p);
1570 1577 mutex_exit(&p->p_lock);
1571 1578
1572 1579 proc_detach(p);
1573 1580 pid_exit(p, tk); /* frees pid and proc structure */
1574 1581
1575 1582 task_rele(tk);
1576 1583 }
1577 1584
1578 1585 /*
1579 1586 * Delete process "child" from the newstate list of process "parent"
1580 1587 */
1581 1588 void
1582 1589 delete_ns(proc_t *parent, proc_t *child)
1583 1590 {
1584 1591 proc_t **ns;
1585 1592
1586 1593 ASSERT(MUTEX_HELD(&pidlock));
1587 1594 ASSERT(child->p_parent == parent);
1588 1595 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1589 1596 if (*ns == child) {
1590 1597
1591 1598 ASSERT((*ns)->p_parent == parent);
1592 1599
1593 1600 *ns = child->p_sibling_ns;
1594 1601 child->p_sibling_ns = NULL;
1595 1602 return;
1596 1603 }
1597 1604 }
1598 1605 }
1599 1606
1600 1607 /*
1601 1608 * Add process "child" to the new state list of process "parent"
1602 1609 */
1603 1610 void
1604 1611 add_ns(proc_t *parent, proc_t *child)
1605 1612 {
1606 1613 ASSERT(child->p_sibling_ns == NULL);
1607 1614 child->p_sibling_ns = parent->p_child_ns;
1608 1615 parent->p_child_ns = child;
1609 1616 }
|
↓ open down ↓ |
507 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX