1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 * Copyright 2017 Nexenta Systems, Inc.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/time.h>
35 #include <sys/sysmacros.h>
36 #include <sys/proc.h>
37 #include <sys/systm.h>
38 #include <sys/cred.h>
39 #include <sys/user.h>
40 #include <sys/utsname.h>
41 #include <sys/errno.h>
42 #include <sys/signal.h>
43 #include <sys/siginfo.h>
44 #include <sys/fault.h>
45 #include <sys/syscall.h>
46 #include <sys/ucontext.h>
47 #include <sys/prsystm.h>
48 #include <sys/vnode.h>
49 #include <sys/var.h>
50 #include <sys/file.h>
51 #include <sys/pathname.h>
52 #include <sys/vfs.h>
53 #include <sys/exec.h>
54 #include <sys/debug.h>
55 #include <sys/stack.h>
56 #include <sys/kmem.h>
57 #include <sys/schedctl.h>
58 #include <sys/core.h>
59 #include <sys/corectl.h>
60 #include <sys/cmn_err.h>
61 #include <vm/as.h>
62 #include <sys/rctl.h>
63 #include <sys/nbmlock.h>
64 #include <sys/stat.h>
65 #include <sys/zone.h>
66 #include <sys/contract/process_impl.h>
67 #include <sys/ddi.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/sw/core.h>
71 #include <sys/sysevent.h>
72
73 /*
74 * Processes running within a zone potentially dump core in 3 locations,
75 * based on the per-process, per-zone, and the global zone's core settings.
76 *
77 * Per-zone and global zone settings are often referred to as "global"
78 * settings since they apply to the system (or zone) as a whole, as
79 * opposed to a particular process.
80 */
81 enum core_types {
82 CORE_PROC, /* Use per-process settings */
83 CORE_ZONE, /* Use per-zone settings */
84 CORE_GLOBAL /* Use global zone settings */
85 };
86
87 /*
88 * Log information about "global" core dumps to syslog.
89 */
90 static void
91 core_log(struct core_globals *cg, int error, const char *why, const char *path,
92 zoneid_t zoneid)
93 {
94 proc_t *p = curproc;
95 pid_t pid = p->p_pid;
96 char *fn = PTOU(p)->u_comm;
97
98 if (!(cg->core_options & CC_GLOBAL_LOG))
99 return;
100
101 if (path == NULL)
102 zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s", fn, pid, why);
103 else if (error == 0)
104 zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s: %s", fn, pid,
105 why, path);
106 else
107 zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s, errno=%d: %s",
108 fn, pid, why, error, path);
109 }
110
111 /*
112 * Generate FMA e-report for a core.
113 */
114 static void
115 gen_ereport(const char *path, int sig)
116 {
117 nvlist_t *ereport = NULL;
118 nvlist_t *fmri = NULL;
119 nvlist_t *sw_obj = NULL;
120 uint64_t ena;
121 proc_t *p = curproc;
122 int err = 0;
123
124 if ((ereport = fm_nvlist_create(NULL)) == NULL)
125 return;
126 if ((fmri = fm_nvlist_create(NULL)) == NULL)
127 goto out;
128 if ((sw_obj = fm_nvlist_create(NULL)) == NULL)
129 goto out;
130 ena = fm_ena_generate(0, FM_ENA_FMT1);
131
132 err |= nvlist_add_uint8(fmri, FM_VERSION, FM_SW_SCHEME_VERSION);
133 err |= nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_SW);
134 err |= nvlist_add_string(sw_obj, FM_FMRI_SW_OBJ_PATH, path);
135 err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_OBJ, sw_obj);
136
137 if (err != 0)
138 goto out;
139
140 fm_ereport_set(ereport, FM_EREPORT_VERSION, CORE_ERROR_CLASS,
141 ena, fmri, NULL);
142
143 fm_payload_set(ereport,
144 FM_EREPORT_PAYLOAD_CORE_COMMAND, DATA_TYPE_STRING,
145 p->p_exec->v_path ? p->p_exec->v_path : p->p_user.u_comm,
146 FM_EREPORT_PAYLOAD_CORE_PSARGS, DATA_TYPE_STRING,
147 p->p_user.u_psargs,
148 FM_EREPORT_PAYLOAD_CORE_SIGNAL, DATA_TYPE_INT32, sig,
149 FM_EREPORT_PAYLOAD_CORE_PATH, DATA_TYPE_STRING, path,
150 NULL);
151
152 fm_ereport_post(ereport, EVCH_SLEEP);
153
154 out:
155 fm_nvlist_destroy(sw_obj, FM_NVA_FREE);
156 fm_nvlist_destroy(ereport, FM_NVA_FREE);
157 fm_nvlist_destroy(fmri, FM_NVA_FREE);
158 }
159
160 /*
161 * Private version of vn_remove().
162 * Refuse to unlink a directory or an unwritable file.
163 * Also allow the process to access files normally inaccessible due to
164 * chroot(2) or Zone limitations.
165 */
166 static int
167 remove_core_file(char *fp, enum core_types core_type)
168 {
169 vnode_t *vp = NULL; /* entry vnode */
170 vnode_t *dvp; /* ptr to parent dir vnode */
171 vfs_t *dvfsp;
172 int error;
173 int in_crit = 0;
174 pathname_t pn; /* name of entry */
175 vnode_t *startvp, *rootvp;
176
177 if ((error = pn_get(fp, UIO_SYSSPACE, &pn)) != 0)
178 return (error);
179 /*
180 * Determine what rootvp to use.
181 */
182 if (core_type == CORE_PROC) {
183 rootvp = (PTOU(curproc)->u_rdir == NULL ?
184 curproc->p_zone->zone_rootvp : PTOU(curproc)->u_rdir);
185 startvp = (fp[0] == '/' ? rootvp : PTOU(curproc)->u_cdir);
186 } else if (core_type == CORE_ZONE) {
187 startvp = curproc->p_zone->zone_rootvp;
188 rootvp = curproc->p_zone->zone_rootvp;
189 } else {
190 ASSERT(core_type == CORE_GLOBAL);
191 startvp = rootdir;
192 rootvp = rootdir;
193 }
194 VN_HOLD(startvp);
195 if (rootvp != rootdir)
196 VN_HOLD(rootvp);
197 if ((error = lookuppnvp(&pn, NULL, NO_FOLLOW, &dvp, &vp, rootvp,
198 startvp, CRED())) != 0) {
199 pn_free(&pn);
200 return (error);
201 }
202 /*
203 * Succeed if there is no file.
204 * Fail if the file is not a regular file.
205 * Fail if the filesystem is mounted read-only.
206 * Fail if the file is not writeable.
207 * Fail if the file has NBMAND share reservations.
208 */
209 if (vp == NULL)
210 error = 0;
211 else if (vp->v_type != VREG)
212 error = EACCES;
213 else if ((dvfsp = dvp->v_vfsp) != NULL &&
214 (dvfsp->vfs_flag & VFS_RDONLY))
215 error = EROFS;
216 else if ((error = VOP_ACCESS(vp, VWRITE, 0, CRED(), NULL)) == 0) {
217 if (nbl_need_check(vp)) {
218 nbl_start_crit(vp, RW_READER);
219 in_crit = 1;
220 if (nbl_share_conflict(vp, NBL_REMOVE, NULL)) {
221 error = EACCES;
222 }
223 }
224 if (!error) {
225 error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
226 }
227 }
228
229 pn_free(&pn);
230 if (vp != NULL) {
231 if (in_crit)
232 nbl_end_crit(vp);
233 VN_RELE(vp);
234 }
235 VN_RELE(dvp);
236 return (error);
237 }
238
239 /*
240 * Create the core file in a location that may be normally inaccessible due
241 * to chroot(2) or Zone limitations.
242 */
243 static int
244 create_core_file(char *fp, enum core_types core_type, vnode_t **vpp)
245 {
246 int error;
247 mode_t perms = (S_IRUSR | S_IWUSR);
248 pathname_t pn;
249 char *file;
250 vnode_t *vp;
251 vnode_t *dvp;
252 vattr_t vattr;
253 cred_t *credp = CRED();
254
255 if (core_type == CORE_PROC) {
256 file = fp;
257 dvp = NULL; /* regular lookup */
258 } else {
259 vnode_t *startvp, *rootvp;
260
261 ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
262 /*
263 * This is tricky because we want to dump the core in
264 * a location which may normally be inaccessible
265 * to us (due to chroot(2) limitations, or zone
266 * membership), and hence need to overcome u_rdir
267 * restrictions. The basic idea is to separate
268 * the path from the filename, lookup the
269 * pathname separately (starting from the global
270 * zone's root directory), and then open the
271 * file starting at the directory vnode.
272 */
273 if (error = pn_get(fp, UIO_SYSSPACE, &pn))
274 return (error);
275
276 if (core_type == CORE_ZONE) {
277 startvp = rootvp = curproc->p_zone->zone_rootvp;
278 } else {
279 startvp = rootvp = rootdir;
280 }
281 /*
282 * rootvp and startvp will be VN_RELE()'d by lookuppnvp() if
283 * necessary.
284 */
285 VN_HOLD(startvp);
286 if (rootvp != rootdir)
287 VN_HOLD(rootvp);
288 /*
289 * Do a lookup on the full path, ignoring the actual file, but
290 * finding the vnode for the directory. It's OK if the file
291 * doesn't exist -- it most likely won't since we just removed
292 * it.
293 */
294 error = lookuppnvp(&pn, NULL, FOLLOW, &dvp, NULLVPP,
295 rootvp, startvp, credp);
296 pn_free(&pn);
297 if (error != 0)
298 return (error);
299 ASSERT(dvp != NULL);
300 /*
301 * Now find the final component in the path (ie, the name of
302 * the core file).
303 */
304 if (error = pn_get(fp, UIO_SYSSPACE, &pn)) {
305 VN_RELE(dvp);
306 return (error);
307 }
308 pn_setlast(&pn);
309 file = pn.pn_path;
310 }
311 error = vn_openat(file, UIO_SYSSPACE,
312 FWRITE | FTRUNC | FEXCL | FCREAT | FOFFMAX,
313 perms, &vp, CRCREAT, PTOU(curproc)->u_cmask, dvp, -1);
314 if (core_type != CORE_PROC) {
315 VN_RELE(dvp);
316 pn_free(&pn);
317 }
318 /*
319 * Don't dump a core file owned by "nobody".
320 */
321 vattr.va_mask = AT_UID;
322 if (error == 0 &&
323 (VOP_GETATTR(vp, &vattr, 0, credp, NULL) != 0 ||
324 vattr.va_uid != crgetuid(credp))) {
325 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0,
326 credp, NULL);
327 VN_RELE(vp);
328 (void) remove_core_file(fp, core_type);
329 error = EACCES;
330 }
331 *vpp = vp;
332 return (error);
333 }
334
335 /*
336 * Install the specified held cred into the process, and return a pointer to
337 * the held cred which was previously the value of p->p_cred.
338 */
339 static cred_t *
340 set_cred(proc_t *p, cred_t *newcr)
341 {
342 cred_t *oldcr;
343 uid_t olduid, newuid;
344
345 /*
346 * Place a hold on the existing cred, and then install the new
347 * cred into the proc structure.
348 */
349 mutex_enter(&p->p_crlock);
350 oldcr = p->p_cred;
351 crhold(oldcr);
352 p->p_cred = newcr;
353 mutex_exit(&p->p_crlock);
354
355 ASSERT(crgetzoneid(oldcr) == crgetzoneid(newcr));
356
357 /*
358 * If the real uid is changing, keep the per-user process
359 * counts accurate.
360 */
361 olduid = crgetruid(oldcr);
362 newuid = crgetruid(newcr);
363 if (olduid != newuid) {
364 zoneid_t zoneid = crgetzoneid(newcr);
365
366 mutex_enter(&pidlock);
367 upcount_dec(olduid, zoneid);
368 upcount_inc(newuid, zoneid);
369 mutex_exit(&pidlock);
370 }
371
372 /*
373 * Broadcast the new cred to all the other threads. The old
374 * cred can be safely returned because we have a hold on it.
375 */
376 crset(p, newcr);
377 return (oldcr);
378 }
379
380 static int
381 do_core(char *fp, int sig, enum core_types core_type, struct core_globals *cg)
382 {
383 proc_t *p = curproc;
384 cred_t *credp = CRED();
385 rlim64_t rlimit;
386 vnode_t *vp;
387 int error = 0;
388 struct execsw *eswp;
389 cred_t *ocredp = NULL;
390 int is_setid = 0;
391 core_content_t content;
392 uid_t uid;
393 gid_t gid;
394
395 if (core_type == CORE_GLOBAL || core_type == CORE_ZONE) {
396 mutex_enter(&cg->core_lock);
397 content = cg->core_content;
398 mutex_exit(&cg->core_lock);
399 rlimit = cg->core_rlimit;
400 } else {
401 mutex_enter(&p->p_lock);
402 rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE],
403 p->p_rctls, p);
404 content = corectl_content_value(p->p_content);
405 mutex_exit(&p->p_lock);
406 }
407
408 if (rlimit == 0)
409 return (EFBIG);
410
411 /*
412 * If SNOCD is set, or if the effective, real, and saved ids do
413 * not match up, no one but a privileged user is allowed to view
414 * this core file. Set the credentials and the owner to root.
415 */
416 if ((p->p_flag & SNOCD) ||
417 (uid = crgetuid(credp)) != crgetruid(credp) ||
418 uid != crgetsuid(credp) ||
419 (gid = crgetgid(credp)) != crgetrgid(credp) ||
420 gid != crgetsgid(credp)) {
421 /*
422 * Because this is insecure against certain forms of file
423 * system attack, do it only if set-id core files have been
424 * enabled via corectl(CC_GLOBAL_SETID | CC_PROCESS_SETID).
425 */
426 if (((core_type == CORE_GLOBAL || core_type == CORE_ZONE) &&
427 !(cg->core_options & CC_GLOBAL_SETID)) ||
428 (core_type == CORE_PROC &&
429 !(cg->core_options & CC_PROCESS_SETID)))
430 return (ENOTSUP);
431
432 is_setid = 1;
433 }
434
435 /*
436 * If we are doing a "global" core dump or a set-id core dump,
437 * use kcred to do the dumping.
438 */
439 if (core_type == CORE_GLOBAL || core_type == CORE_ZONE || is_setid) {
440 /*
441 * Use the zone's "kcred" to prevent privilege
442 * escalation.
443 */
444 credp = zone_get_kcred(getzoneid());
445 ASSERT(credp != NULL);
446 ocredp = set_cred(p, credp);
447 }
448
449 /*
450 * First remove any existing core file, then
451 * open the new core file with (O_EXCL|O_CREAT).
452 *
453 * The reasons for doing this are manifold:
454 *
455 * For security reasons, we don't want root processes
456 * to dump core through a symlink because that would
457 * allow a malicious user to clobber any file on
458 * the system if they could convince a root process,
459 * perhaps a set-uid root process that they started,
460 * to dump core in a directory writable by that user.
461 * Similar security reasons apply to hard links.
462 * For symmetry we do this unconditionally, not
463 * just for root processes.
464 *
465 * If the process has the core file mmap()d into the
466 * address space, we would be modifying the address
467 * space that we are trying to dump if we did not first
468 * remove the core file. (The command "file core"
469 * is the canonical example of this possibility.)
470 *
471 * Opening the core file with O_EXCL|O_CREAT ensures than
472 * two concurrent core dumps don't clobber each other.
473 * One is bound to lose; we don't want to make both lose.
474 */
475 if ((error = remove_core_file(fp, core_type)) == 0) {
476 error = create_core_file(fp, core_type, &vp);
477 }
478
479 /*
480 * Now that vn_open is complete, reset the process's credentials if
481 * we changed them, and make 'credp' point to kcred used
482 * above. We use 'credp' to do i/o on the core file below, but leave
483 * p->p_cred set to the original credential to allow the core file
484 * to record this information.
485 */
486 if (ocredp != NULL)
487 credp = set_cred(p, ocredp);
488
489 if (error == 0) {
490 int closerr;
491 #if defined(__sparc)
492 (void) flush_user_windows_to_stack(NULL);
493 #endif
494 if ((eswp = PTOU(curproc)->u_execsw) == NULL ||
495 (eswp = findexec_by_magic(eswp->exec_magic)) == NULL) {
496 error = ENOSYS;
497 } else {
498 error = eswp->exec_core(vp, p, credp, rlimit, sig,
499 content);
500 rw_exit(eswp->exec_lock);
501 }
502
503 closerr = VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, credp, NULL);
504 VN_RELE(vp);
505 if (error == 0)
506 error = closerr;
507 }
508
509 if (ocredp != NULL)
510 crfree(credp);
511
512 return (error);
513 }
514
515 /*
516 * Convert a core name pattern to a pathname.
517 */
518 static int
519 expand_string(const char *pat, char *fp, int size, cred_t *cr)
520 {
521 proc_t *p = curproc;
522 char buf[24];
523 int len, i;
524 char *s;
525 char c;
526
527 while ((c = *pat++) != '\0') {
528 if (size < 2)
529 return (ENAMETOOLONG);
530 if (c != '%') {
531 size--;
532 *fp++ = c;
533 continue;
534 }
535 if ((c = *pat++) == '\0') {
536 size--;
537 *fp++ = '%';
538 break;
539 }
540 switch (c) {
541 case 'p': /* pid */
542 (void) sprintf((s = buf), "%d", p->p_pid);
543 break;
544 case 'u': /* effective uid */
545 (void) sprintf((s = buf), "%u", crgetuid(p->p_cred));
546 break;
547 case 'g': /* effective gid */
548 (void) sprintf((s = buf), "%u", crgetgid(p->p_cred));
549 break;
550 case 'f': /* exec'd filename */
551 s = PTOU(p)->u_comm;
552 break;
553 case 'd': /* exec'd dirname */
554 /*
555 * Even if pathname caching is disabled, we should
556 * be able to lookup the pathname for a directory.
557 */
558 if (p->p_execdir != NULL && vnodetopath(NULL,
559 p->p_execdir, fp, size, cr) == 0) {
560 len = (int)strlen(fp);
561 ASSERT(len < size);
562 ASSERT(len >= 1);
563 ASSERT(fp[0] == '/');
564
565 /*
566 * Strip off the leading slash.
567 */
568 for (i = 0; i < len; i++) {
569 fp[i] = fp[i + 1];
570 }
571
572 len--;
573
574 size -= len;
575 fp += len;
576 } else {
577 *fp = '\0';
578 }
579
580 continue;
581 case 'n': /* system nodename */
582 s = uts_nodename();
583 break;
584 case 'm': /* machine (sun4u, etc) */
585 s = utsname.machine;
586 break;
587 case 't': /* decimal value of time(2) */
588 (void) sprintf((s = buf), "%ld", gethrestime_sec());
589 break;
590 case 'z':
591 s = p->p_zone->zone_name;
592 break;
593 case 'Z':
594 /* This is zonepath + "/root/", except for GZ */
595 s = p->p_zone->zone_rootpath;
596 break;
597 case '%':
598 (void) strcpy((s = buf), "%");
599 break;
600 default:
601 s = buf;
602 buf[0] = '%';
603 buf[1] = c;
604 buf[2] = '\0';
605 break;
606 }
607 len = (int)strlen(s);
608 if ((size -= len) <= 0)
609 return (ENAMETOOLONG);
610 (void) strcpy(fp, s);
611 /* strip trailing "/root/" from non-GZ zonepath string */
612 if (c == 'Z' && len > 6) {
613 len -= 6;
614 ASSERT(strncmp(fp + len, "/root/", 6) == 0);
615 }
616 fp += len;
617 }
618
619 *fp = '\0';
620 return (0);
621 }
622
623 static int
624 dump_one_core(int sig, rlim64_t rlimit, enum core_types core_type,
625 struct core_globals *cg, char **name)
626 {
627 refstr_t *rp;
628 proc_t *p = curproc;
629 zoneid_t zoneid;
630 int error;
631 char *fp;
632 cred_t *cr;
633
634 ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
635 zoneid = (core_type == CORE_ZONE ? getzoneid() : GLOBAL_ZONEID);
636
637 mutex_enter(&cg->core_lock);
638 if ((rp = cg->core_file) != NULL)
639 refstr_hold(rp);
640 mutex_exit(&cg->core_lock);
641 if (rp == NULL) {
642 core_log(cg, 0, "no global core file pattern exists", NULL,
643 zoneid);
644 return (1); /* core file not generated */
645 }
646 fp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
647 cr = zone_get_kcred(getzoneid());
648 error = expand_string(refstr_value(rp), fp, MAXPATHLEN, cr);
649 crfree(cr);
650 if (error != 0) {
651 core_log(cg, 0, "global core file pattern too long",
652 refstr_value(rp), zoneid);
653 } else if ((error = do_core(fp, sig, core_type, cg)) == 0) {
654 core_log(cg, 0, "core dumped", fp, zoneid);
655 } else if (error == ENOTSUP) {
656 core_log(cg, 0, "setid process, core not dumped", fp, zoneid);
657 } else if (error == ENOSPC) {
658 core_log(cg, 0, "no space left on device, core truncated",
659 fp, zoneid);
660 } else if (error == EFBIG) {
661 if (rlimit == 0)
662 core_log(cg, 0, "core rlimit is zero, core not dumped",
663 fp, zoneid);
664 else
665 core_log(cg, 0, "core rlimit exceeded, core truncated",
666 fp, zoneid);
667 /*
668 * In addition to the core result logging, we
669 * may also have explicit actions defined on
670 * core file size violations via the resource
671 * control framework.
672 */
673 mutex_enter(&p->p_lock);
674 (void) rctl_action(rctlproc_legacy[RLIMIT_CORE],
675 p->p_rctls, p, RCA_SAFE);
676 mutex_exit(&p->p_lock);
677 } else {
678 core_log(cg, error, "core dump failed", fp, zoneid);
679 }
680 refstr_rele(rp);
681 if (name != NULL)
682 *name = fp;
683 else
684 kmem_free(fp, MAXPATHLEN);
685 return (error);
686 }
687
688 int
689 core(int sig, int ext)
690 {
691 proc_t *p = curproc;
692 klwp_t *lwp = ttolwp(curthread);
693 refstr_t *rp;
694 char *fp_process = NULL, *fp_global = NULL, *fp_zone = NULL;
695 int error1 = 1;
696 int error2 = 1;
697 int error3 = 1;
698 k_sigset_t sigmask;
699 k_sigset_t sighold;
700 rlim64_t rlimit;
701 struct core_globals *my_cg, *global_cg;
702
703 global_cg = zone_getspecific(core_zone_key, global_zone);
704 ASSERT(global_cg != NULL);
705
706 my_cg = zone_getspecific(core_zone_key, curproc->p_zone);
707 ASSERT(my_cg != NULL);
708
709 /* core files suppressed? */
710 if (!(my_cg->core_options & (CC_PROCESS_PATH|CC_GLOBAL_PATH)) &&
711 !(global_cg->core_options & CC_GLOBAL_PATH)) {
712 if (!ext && p->p_ct_process != NULL)
713 contract_process_core(p->p_ct_process, p, sig,
714 NULL, NULL, NULL);
715 return (1);
716 }
717
718 /*
719 * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM; no
720 * other signal may interrupt a core dump. For each signal, we
721 * explicitly unblock it and set it in p_siginfo to allow for some
722 * minimal error reporting. Additionally, we get the current limit on
723 * core file size for handling later error reporting.
724 */
725 mutex_enter(&p->p_lock);
726
727 p->p_flag |= SDOCORE;
728 schedctl_finish_sigblock(curthread);
729 sigmask = curthread->t_hold; /* remember for later */
730 sigfillset(&sighold);
731 if (!sigismember(&sigmask, SIGHUP))
732 sigdelset(&sighold, SIGHUP);
733 if (!sigismember(&sigmask, SIGINT))
734 sigdelset(&sighold, SIGINT);
735 if (!sigismember(&sigmask, SIGKILL))
736 sigdelset(&sighold, SIGKILL);
737 if (!sigismember(&sigmask, SIGTERM))
738 sigdelset(&sighold, SIGTERM);
739
740 sigaddset(&p->p_siginfo, SIGHUP);
741 sigaddset(&p->p_siginfo, SIGINT);
742 sigaddset(&p->p_siginfo, SIGKILL);
743 sigaddset(&p->p_siginfo, SIGTERM);
744
745 curthread->t_hold = sighold;
746
747 rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE], p->p_rctls,
748 p);
749
750 mutex_exit(&p->p_lock);
751
752 /*
753 * Undo any watchpoints.
754 */
755 pr_free_watched_pages(p);
756
757 /*
758 * The presence of a current signal prevents file i/o
759 * from succeeding over a network. We copy the current
760 * signal information to the side and cancel the current
761 * signal so that the core dump will succeed.
762 */
763 ASSERT(lwp->lwp_cursig == sig);
764 lwp->lwp_cursig = 0;
765 lwp->lwp_extsig = 0;
766 if (lwp->lwp_curinfo == NULL) {
767 bzero(&lwp->lwp_siginfo, sizeof (k_siginfo_t));
768 lwp->lwp_siginfo.si_signo = sig;
769 lwp->lwp_siginfo.si_code = SI_NOINFO;
770 } else {
771 bcopy(&lwp->lwp_curinfo->sq_info,
772 &lwp->lwp_siginfo, sizeof (k_siginfo_t));
773 siginfofree(lwp->lwp_curinfo);
774 lwp->lwp_curinfo = NULL;
775 }
776
777 /*
778 * Convert the core file name patterns into path names
779 * and call do_core() to write the core files.
780 */
781
782 if (my_cg->core_options & CC_PROCESS_PATH) {
783 mutex_enter(&p->p_lock);
784 if (p->p_corefile != NULL)
785 rp = corectl_path_value(p->p_corefile);
786 else
787 rp = NULL;
788 mutex_exit(&p->p_lock);
789 if (rp != NULL) {
790 fp_process = kmem_alloc(MAXPATHLEN, KM_SLEEP);
791 error1 = expand_string(refstr_value(rp),
792 fp_process, MAXPATHLEN, p->p_cred);
793 if (error1 == 0)
794 error1 = do_core(fp_process, sig, CORE_PROC,
795 my_cg);
796 refstr_rele(rp);
797 }
798 }
799
800 if (my_cg->core_options & CC_GLOBAL_PATH)
801 error2 = dump_one_core(sig, rlimit, CORE_ZONE, my_cg,
802 &fp_global);
803 if (global_cg != my_cg && (global_cg->core_options & CC_GLOBAL_PATH))
804 error3 = dump_one_core(sig, rlimit, CORE_GLOBAL, global_cg,
805 &fp_zone);
806
807 /*
808 * Restore the signal hold mask.
809 */
810 mutex_enter(&p->p_lock);
811 curthread->t_hold = sigmask;
812 mutex_exit(&p->p_lock);
813
814 if (!ext && p->p_ct_process != NULL)
815 contract_process_core(p->p_ct_process, p, sig,
816 error1 == 0 ? fp_process : NULL,
817 error2 == 0 ? fp_global : NULL,
818 error3 == 0 ? fp_zone : NULL);
819
820 /*
821 * FMA ereport is currently generated only for global zone cores
822 * with global path.
823 */
824 if (error2 == 0 && global_cg == my_cg)
825 gen_ereport(fp_global, sig);
826
827 if (fp_process != NULL)
828 kmem_free(fp_process, MAXPATHLEN);
829 if (fp_global != NULL)
830 kmem_free(fp_global, MAXPATHLEN);
831 if (fp_zone != NULL)
832 kmem_free(fp_zone, MAXPATHLEN);
833
834 /*
835 * Return non-zero if no core file was created.
836 */
837 return (error1 != 0 && error2 != 0 && error3 != 0);
838 }
839
840 /*
841 * Maximum chunk size for dumping core files,
842 * size in pages, patchable in /etc/system
843 */
844 uint_t core_chunk = 32;
845
846 /*
847 * The delay between core_write() calls, in microseconds. The default
848 * matches one "normal" clock tick, or 10 milliseconds.
849 */
850 clock_t core_delay_usec = 10000;
851
852 /*
853 * Common code to core dump process memory. The core_seg routine does i/o
854 * using core_write() below, and so it has the same failure semantics.
855 */
856 int
857 core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size,
858 rlim64_t rlimit, cred_t *credp)
859 {
860 caddr_t eaddr;
861 caddr_t base;
862 size_t len;
863 int err = 0;
864
865 eaddr = addr + size;
866 for (base = addr; base < eaddr; base += len) {
867 len = eaddr - base;
868 if (as_memory(p->p_as, &base, &len) != 0)
869 return (0);
870
871 /*
872 * Reduce len to a reasonable value so that we don't
873 * overwhelm the VM system with a monstrously large
874 * single write and cause pageout to stop running.
875 */
876 if (len > (size_t)core_chunk * PAGESIZE)
877 len = (size_t)core_chunk * PAGESIZE;
878
879 err = core_write(vp, UIO_USERSPACE,
880 offset + (size_t)(base - addr), base, len, rlimit, credp);
881
882 if (err)
883 return (err);
884
885 /*
886 * If we have taken a signal, return EINTR to allow the dump
887 * to be aborted.
888 */
889 if (issig(JUSTLOOKING) && issig(FORREAL))
890 return (EINTR);
891 }
892
893 return (0);
894 }
895
896 /*
897 * Wrapper around vn_rdwr to perform writes to a core file. For core files,
898 * we always want to write as much as we possibly can, and then make sure to
899 * return either 0 to the caller (for success), or the actual errno value.
900 * By using this function, the caller can omit additional code for handling
901 * retries and errors for partial writes returned by vn_rdwr. If vn_rdwr
902 * unexpectedly returns zero but no progress has been made, we return ENOSPC.
903 */
904 int
905 core_write(vnode_t *vp, enum uio_seg segflg, offset_t offset,
906 const void *buf, size_t len, rlim64_t rlimit, cred_t *credp)
907 {
908 ssize_t resid = len;
909 int error = 0;
910
911 while (len != 0) {
912 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, len, offset,
913 segflg, 0, rlimit, credp, &resid);
914
915 if (error != 0)
916 break;
917
918 if (resid >= len)
919 return (ENOSPC);
920
921 buf = (const char *)buf + len - resid;
922 offset += len - resid;
923 len = resid;
924 }
925
926 return (error);
927 }