1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, Joyent Inc. All rights reserved.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  * Copyright 2017 Nexenta Systems, Inc.
  27  */
  28 
  29 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  30 /*        All Rights Reserved   */
  31 
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/time.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/proc.h>
  37 #include <sys/systm.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/utsname.h>
  41 #include <sys/errno.h>
  42 #include <sys/signal.h>
  43 #include <sys/siginfo.h>
  44 #include <sys/fault.h>
  45 #include <sys/syscall.h>
  46 #include <sys/ucontext.h>
  47 #include <sys/prsystm.h>
  48 #include <sys/vnode.h>
  49 #include <sys/var.h>
  50 #include <sys/file.h>
  51 #include <sys/pathname.h>
  52 #include <sys/vfs.h>
  53 #include <sys/exec.h>
  54 #include <sys/debug.h>
  55 #include <sys/stack.h>
  56 #include <sys/kmem.h>
  57 #include <sys/schedctl.h>
  58 #include <sys/core.h>
  59 #include <sys/corectl.h>
  60 #include <sys/cmn_err.h>
  61 #include <vm/as.h>
  62 #include <sys/rctl.h>
  63 #include <sys/nbmlock.h>
  64 #include <sys/stat.h>
  65 #include <sys/zone.h>
  66 #include <sys/contract/process_impl.h>
  67 #include <sys/ddi.h>
  68 #include <sys/fm/protocol.h>
  69 #include <sys/fm/util.h>
  70 #include <sys/fm/sw/core.h>
  71 #include <sys/sysevent.h>
  72 
  73 /*
  74  * Processes running within a zone potentially dump core in 3 locations,
  75  * based on the per-process, per-zone, and the global zone's core settings.
  76  *
  77  * Per-zone and global zone settings are often referred to as "global"
  78  * settings since they apply to the system (or zone) as a whole, as
  79  * opposed to a particular process.
  80  */
  81 enum core_types {
  82         CORE_PROC,      /* Use per-process settings */
  83         CORE_ZONE,      /* Use per-zone settings */
  84         CORE_GLOBAL     /* Use global zone settings */
  85 };
  86 
  87 /*
  88  * Log information about "global" core dumps to syslog.
  89  */
  90 static void
  91 core_log(struct core_globals *cg, int error, const char *why, const char *path,
  92     zoneid_t zoneid)
  93 {
  94         proc_t *p = curproc;
  95         pid_t pid = p->p_pid;
  96         char *fn = PTOU(p)->u_comm;
  97 
  98         if (!(cg->core_options & CC_GLOBAL_LOG))
  99                 return;
 100 
 101         if (path == NULL)
 102                 zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s", fn, pid, why);
 103         else if (error == 0)
 104                 zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s: %s", fn, pid,
 105                     why, path);
 106         else
 107                 zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s, errno=%d: %s",
 108                     fn, pid, why, error, path);
 109 }
 110 
 111 /*
 112  * Generate FMA e-report for a core.
 113  */
 114 static void
 115 gen_ereport(const char *path, int sig)
 116 {
 117         nvlist_t *ereport = NULL;
 118         nvlist_t *fmri = NULL;
 119         nvlist_t *sw_obj = NULL;
 120         uint64_t ena;
 121         proc_t *p = curproc;
 122         int err = 0;
 123 
 124         if ((ereport = fm_nvlist_create(NULL)) == NULL)
 125                 return;
 126         if ((fmri = fm_nvlist_create(NULL)) == NULL)
 127                 goto out;
 128         if ((sw_obj = fm_nvlist_create(NULL)) == NULL)
 129                 goto out;
 130         ena = fm_ena_generate(0, FM_ENA_FMT1);
 131 
 132         err |= nvlist_add_uint8(fmri, FM_VERSION, FM_SW_SCHEME_VERSION);
 133         err |= nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_SW);
 134         err |= nvlist_add_string(sw_obj, FM_FMRI_SW_OBJ_PATH, path);
 135         err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_OBJ, sw_obj);
 136 
 137         if (err != 0)
 138                 goto out;
 139 
 140         fm_ereport_set(ereport, FM_EREPORT_VERSION, CORE_ERROR_CLASS,
 141             ena, fmri, NULL);
 142 
 143         fm_payload_set(ereport,
 144             FM_EREPORT_PAYLOAD_CORE_COMMAND, DATA_TYPE_STRING,
 145             p->p_exec->v_path ? p->p_exec->v_path : p->p_user.u_comm,
 146             FM_EREPORT_PAYLOAD_CORE_PSARGS, DATA_TYPE_STRING,
 147             p->p_user.u_psargs,
 148             FM_EREPORT_PAYLOAD_CORE_SIGNAL, DATA_TYPE_INT32, sig,
 149             FM_EREPORT_PAYLOAD_CORE_PATH, DATA_TYPE_STRING, path,
 150             NULL);
 151 
 152         fm_ereport_post(ereport, EVCH_SLEEP);
 153 
 154 out:
 155         fm_nvlist_destroy(sw_obj, FM_NVA_FREE);
 156         fm_nvlist_destroy(ereport, FM_NVA_FREE);
 157         fm_nvlist_destroy(fmri, FM_NVA_FREE);
 158 }
 159 
 160 /*
 161  * Private version of vn_remove().
 162  * Refuse to unlink a directory or an unwritable file.
 163  * Also allow the process to access files normally inaccessible due to
 164  * chroot(2) or Zone limitations.
 165  */
 166 static int
 167 remove_core_file(char *fp, enum core_types core_type)
 168 {
 169         vnode_t *vp = NULL;             /* entry vnode */
 170         vnode_t *dvp;                   /* ptr to parent dir vnode */
 171         vfs_t *dvfsp;
 172         int error;
 173         int in_crit = 0;
 174         pathname_t pn;                  /* name of entry */
 175         vnode_t *startvp, *rootvp;
 176 
 177         if ((error = pn_get(fp, UIO_SYSSPACE, &pn)) != 0)
 178                 return (error);
 179         /*
 180          * Determine what rootvp to use.
 181          */
 182         if (core_type == CORE_PROC) {
 183                 rootvp = (PTOU(curproc)->u_rdir == NULL ?
 184                     curproc->p_zone->zone_rootvp : PTOU(curproc)->u_rdir);
 185                 startvp = (fp[0] == '/' ? rootvp : PTOU(curproc)->u_cdir);
 186         } else if (core_type == CORE_ZONE) {
 187                 startvp = curproc->p_zone->zone_rootvp;
 188                 rootvp = curproc->p_zone->zone_rootvp;
 189         } else {
 190                 ASSERT(core_type == CORE_GLOBAL);
 191                 startvp = rootdir;
 192                 rootvp = rootdir;
 193         }
 194         VN_HOLD(startvp);
 195         if (rootvp != rootdir)
 196                 VN_HOLD(rootvp);
 197         if ((error = lookuppnvp(&pn, NULL, NO_FOLLOW, &dvp, &vp, rootvp,
 198             startvp, CRED())) != 0) {
 199                 pn_free(&pn);
 200                 return (error);
 201         }
 202         /*
 203          * Succeed if there is no file.
 204          * Fail if the file is not a regular file.
 205          * Fail if the filesystem is mounted read-only.
 206          * Fail if the file is not writeable.
 207          * Fail if the file has NBMAND share reservations.
 208          */
 209         if (vp == NULL)
 210                 error = 0;
 211         else if (vp->v_type != VREG)
 212                 error = EACCES;
 213         else if ((dvfsp = dvp->v_vfsp) != NULL &&
 214             (dvfsp->vfs_flag & VFS_RDONLY))
 215                 error = EROFS;
 216         else if ((error = VOP_ACCESS(vp, VWRITE, 0, CRED(), NULL)) == 0) {
 217                 if (nbl_need_check(vp)) {
 218                         nbl_start_crit(vp, RW_READER);
 219                         in_crit = 1;
 220                         if (nbl_share_conflict(vp, NBL_REMOVE, NULL)) {
 221                                 error = EACCES;
 222                         }
 223                 }
 224                 if (!error) {
 225                         error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
 226                 }
 227         }
 228 
 229         pn_free(&pn);
 230         if (vp != NULL) {
 231                 if (in_crit)
 232                         nbl_end_crit(vp);
 233                 VN_RELE(vp);
 234         }
 235         VN_RELE(dvp);
 236         return (error);
 237 }
 238 
 239 /*
 240  * Create the core file in a location that may be normally inaccessible due
 241  * to chroot(2) or Zone limitations.
 242  */
 243 static int
 244 create_core_file(char *fp, enum core_types core_type, vnode_t **vpp)
 245 {
 246         int error;
 247         mode_t perms = (S_IRUSR | S_IWUSR);
 248         pathname_t pn;
 249         char *file;
 250         vnode_t *vp;
 251         vnode_t *dvp;
 252         vattr_t vattr;
 253         cred_t *credp = CRED();
 254 
 255         if (core_type == CORE_PROC) {
 256                 file = fp;
 257                 dvp = NULL;     /* regular lookup */
 258         } else {
 259                 vnode_t *startvp, *rootvp;
 260 
 261                 ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
 262                 /*
 263                  * This is tricky because we want to dump the core in
 264                  * a location which may normally be inaccessible
 265                  * to us (due to chroot(2) limitations, or zone
 266                  * membership), and hence need to overcome u_rdir
 267                  * restrictions.  The basic idea is to separate
 268                  * the path from the filename, lookup the
 269                  * pathname separately (starting from the global
 270                  * zone's root directory), and then open the
 271                  * file starting at the directory vnode.
 272                  */
 273                 if (error = pn_get(fp, UIO_SYSSPACE, &pn))
 274                         return (error);
 275 
 276                 if (core_type == CORE_ZONE) {
 277                         startvp = rootvp = curproc->p_zone->zone_rootvp;
 278                 } else {
 279                         startvp = rootvp = rootdir;
 280                 }
 281                 /*
 282                  * rootvp and startvp will be VN_RELE()'d by lookuppnvp() if
 283                  * necessary.
 284                  */
 285                 VN_HOLD(startvp);
 286                 if (rootvp != rootdir)
 287                         VN_HOLD(rootvp);
 288                 /*
 289                  * Do a lookup on the full path, ignoring the actual file, but
 290                  * finding the vnode for the directory.  It's OK if the file
 291                  * doesn't exist -- it most likely won't since we just removed
 292                  * it.
 293                  */
 294                 error = lookuppnvp(&pn, NULL, FOLLOW, &dvp, NULLVPP,
 295                     rootvp, startvp, credp);
 296                 pn_free(&pn);
 297                 if (error != 0)
 298                         return (error);
 299                 ASSERT(dvp != NULL);
 300                 /*
 301                  * Now find the final component in the path (ie, the name of
 302                  * the core file).
 303                  */
 304                 if (error = pn_get(fp, UIO_SYSSPACE, &pn)) {
 305                         VN_RELE(dvp);
 306                         return (error);
 307                 }
 308                 pn_setlast(&pn);
 309                 file = pn.pn_path;
 310         }
 311         error =  vn_openat(file, UIO_SYSSPACE,
 312             FWRITE | FTRUNC | FEXCL | FCREAT | FOFFMAX,
 313             perms, &vp, CRCREAT, PTOU(curproc)->u_cmask, dvp, -1);
 314         if (core_type != CORE_PROC) {
 315                 VN_RELE(dvp);
 316                 pn_free(&pn);
 317         }
 318         /*
 319          * Don't dump a core file owned by "nobody".
 320          */
 321         vattr.va_mask = AT_UID;
 322         if (error == 0 &&
 323             (VOP_GETATTR(vp, &vattr, 0, credp, NULL) != 0 ||
 324             vattr.va_uid != crgetuid(credp))) {
 325                 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0,
 326                     credp, NULL);
 327                 VN_RELE(vp);
 328                 (void) remove_core_file(fp, core_type);
 329                 error = EACCES;
 330         }
 331         *vpp = vp;
 332         return (error);
 333 }
 334 
 335 /*
 336  * Install the specified held cred into the process, and return a pointer to
 337  * the held cred which was previously the value of p->p_cred.
 338  */
 339 static cred_t *
 340 set_cred(proc_t *p, cred_t *newcr)
 341 {
 342         cred_t *oldcr;
 343         uid_t olduid, newuid;
 344 
 345         /*
 346          * Place a hold on the existing cred, and then install the new
 347          * cred into the proc structure.
 348          */
 349         mutex_enter(&p->p_crlock);
 350         oldcr = p->p_cred;
 351         crhold(oldcr);
 352         p->p_cred = newcr;
 353         mutex_exit(&p->p_crlock);
 354 
 355         ASSERT(crgetzoneid(oldcr) == crgetzoneid(newcr));
 356 
 357         /*
 358          * If the real uid is changing, keep the per-user process
 359          * counts accurate.
 360          */
 361         olduid = crgetruid(oldcr);
 362         newuid = crgetruid(newcr);
 363         if (olduid != newuid) {
 364                 zoneid_t zoneid = crgetzoneid(newcr);
 365 
 366                 mutex_enter(&pidlock);
 367                 upcount_dec(olduid, zoneid);
 368                 upcount_inc(newuid, zoneid);
 369                 mutex_exit(&pidlock);
 370         }
 371 
 372         /*
 373          * Broadcast the new cred to all the other threads.  The old
 374          * cred can be safely returned because we have a hold on it.
 375          */
 376         crset(p, newcr);
 377         return (oldcr);
 378 }
 379 
 380 static int
 381 do_core(char *fp, int sig, enum core_types core_type, struct core_globals *cg)
 382 {
 383         proc_t *p = curproc;
 384         cred_t *credp = CRED();
 385         rlim64_t rlimit;
 386         vnode_t *vp;
 387         int error = 0;
 388         struct execsw *eswp;
 389         cred_t *ocredp = NULL;
 390         int is_setid = 0;
 391         core_content_t content;
 392         uid_t uid;
 393         gid_t gid;
 394 
 395         if (core_type == CORE_GLOBAL || core_type == CORE_ZONE) {
 396                 mutex_enter(&cg->core_lock);
 397                 content = cg->core_content;
 398                 mutex_exit(&cg->core_lock);
 399                 rlimit = cg->core_rlimit;
 400         } else {
 401                 mutex_enter(&p->p_lock);
 402                 rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE],
 403                     p->p_rctls, p);
 404                 content = corectl_content_value(p->p_content);
 405                 mutex_exit(&p->p_lock);
 406         }
 407 
 408         if (rlimit == 0)
 409                 return (EFBIG);
 410 
 411         /*
 412          * If SNOCD is set, or if the effective, real, and saved ids do
 413          * not match up, no one but a privileged user is allowed to view
 414          * this core file.  Set the credentials and the owner to root.
 415          */
 416         if ((p->p_flag & SNOCD) ||
 417             (uid = crgetuid(credp)) != crgetruid(credp) ||
 418             uid != crgetsuid(credp) ||
 419             (gid = crgetgid(credp)) != crgetrgid(credp) ||
 420             gid != crgetsgid(credp)) {
 421                 /*
 422                  * Because this is insecure against certain forms of file
 423                  * system attack, do it only if set-id core files have been
 424                  * enabled via corectl(CC_GLOBAL_SETID | CC_PROCESS_SETID).
 425                  */
 426                 if (((core_type == CORE_GLOBAL || core_type == CORE_ZONE) &&
 427                     !(cg->core_options & CC_GLOBAL_SETID)) ||
 428                     (core_type == CORE_PROC &&
 429                     !(cg->core_options & CC_PROCESS_SETID)))
 430                         return (ENOTSUP);
 431 
 432                 is_setid = 1;
 433         }
 434 
 435         /*
 436          * If we are doing a "global" core dump or a set-id core dump,
 437          * use kcred to do the dumping.
 438          */
 439         if (core_type == CORE_GLOBAL || core_type == CORE_ZONE || is_setid) {
 440                 /*
 441                  * Use the zone's "kcred" to prevent privilege
 442                  * escalation.
 443                  */
 444                 credp = zone_get_kcred(getzoneid());
 445                 ASSERT(credp != NULL);
 446                 ocredp = set_cred(p, credp);
 447         }
 448 
 449         /*
 450          * First remove any existing core file, then
 451          * open the new core file with (O_EXCL|O_CREAT).
 452          *
 453          * The reasons for doing this are manifold:
 454          *
 455          * For security reasons, we don't want root processes
 456          * to dump core through a symlink because that would
 457          * allow a malicious user to clobber any file on
 458          * the system if they could convince a root process,
 459          * perhaps a set-uid root process that they started,
 460          * to dump core in a directory writable by that user.
 461          * Similar security reasons apply to hard links.
 462          * For symmetry we do this unconditionally, not
 463          * just for root processes.
 464          *
 465          * If the process has the core file mmap()d into the
 466          * address space, we would be modifying the address
 467          * space that we are trying to dump if we did not first
 468          * remove the core file.  (The command "file core"
 469          * is the canonical example of this possibility.)
 470          *
 471          * Opening the core file with O_EXCL|O_CREAT ensures than
 472          * two concurrent core dumps don't clobber each other.
 473          * One is bound to lose; we don't want to make both lose.
 474          */
 475         if ((error = remove_core_file(fp, core_type)) == 0) {
 476                 error = create_core_file(fp, core_type, &vp);
 477         }
 478 
 479         /*
 480          * Now that vn_open is complete, reset the process's credentials if
 481          * we changed them, and make 'credp' point to kcred used
 482          * above.  We use 'credp' to do i/o on the core file below, but leave
 483          * p->p_cred set to the original credential to allow the core file
 484          * to record this information.
 485          */
 486         if (ocredp != NULL)
 487                 credp = set_cred(p, ocredp);
 488 
 489         if (error == 0) {
 490                 int closerr;
 491 #if defined(__sparc)
 492                 (void) flush_user_windows_to_stack(NULL);
 493 #endif
 494                 if ((eswp = PTOU(curproc)->u_execsw) == NULL ||
 495                     (eswp = findexec_by_magic(eswp->exec_magic)) == NULL) {
 496                         error = ENOSYS;
 497                 } else {
 498                         error = eswp->exec_core(vp, p, credp, rlimit, sig,
 499                             content);
 500                         rw_exit(eswp->exec_lock);
 501                 }
 502 
 503                 closerr = VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, credp, NULL);
 504                 VN_RELE(vp);
 505                 if (error == 0)
 506                         error = closerr;
 507         }
 508 
 509         if (ocredp != NULL)
 510                 crfree(credp);
 511 
 512         return (error);
 513 }
 514 
 515 /*
 516  * Convert a core name pattern to a pathname.
 517  */
 518 static int
 519 expand_string(const char *pat, char *fp, int size, cred_t *cr)
 520 {
 521         proc_t *p = curproc;
 522         char buf[24];
 523         int len, i;
 524         char *s;
 525         char c;
 526 
 527         while ((c = *pat++) != '\0') {
 528                 if (size < 2)
 529                         return (ENAMETOOLONG);
 530                 if (c != '%') {
 531                         size--;
 532                         *fp++ = c;
 533                         continue;
 534                 }
 535                 if ((c = *pat++) == '\0') {
 536                         size--;
 537                         *fp++ = '%';
 538                         break;
 539                 }
 540                 switch (c) {
 541                 case 'p':       /* pid */
 542                         (void) sprintf((s = buf), "%d", p->p_pid);
 543                         break;
 544                 case 'u':       /* effective uid */
 545                         (void) sprintf((s = buf), "%u", crgetuid(p->p_cred));
 546                         break;
 547                 case 'g':       /* effective gid */
 548                         (void) sprintf((s = buf), "%u", crgetgid(p->p_cred));
 549                         break;
 550                 case 'f':       /* exec'd filename */
 551                         s = PTOU(p)->u_comm;
 552                         break;
 553                 case 'd':       /* exec'd dirname */
 554                         /*
 555                          * Even if pathname caching is disabled, we should
 556                          * be able to lookup the pathname for a directory.
 557                          */
 558                         if (p->p_execdir != NULL && vnodetopath(NULL,
 559                             p->p_execdir, fp, size, cr) == 0) {
 560                                 len = (int)strlen(fp);
 561                                 ASSERT(len < size);
 562                                 ASSERT(len >= 1);
 563                                 ASSERT(fp[0] == '/');
 564 
 565                                 /*
 566                                  * Strip off the leading slash.
 567                                  */
 568                                 for (i = 0; i < len; i++) {
 569                                         fp[i] = fp[i + 1];
 570                                 }
 571 
 572                                 len--;
 573 
 574                                 size -= len;
 575                                 fp += len;
 576                         } else {
 577                                 *fp = '\0';
 578                         }
 579 
 580                         continue;
 581                 case 'n':       /* system nodename */
 582                         s = uts_nodename();
 583                         break;
 584                 case 'm':       /* machine (sun4u, etc) */
 585                         s = utsname.machine;
 586                         break;
 587                 case 't':       /* decimal value of time(2) */
 588                         (void) sprintf((s = buf), "%ld", gethrestime_sec());
 589                         break;
 590                 case 'z':
 591                         s = p->p_zone->zone_name;
 592                         break;
 593                 case 'Z':
 594                         /* This is zonepath + "/root/", except for GZ */
 595                         s = p->p_zone->zone_rootpath;
 596                         break;
 597                 case '%':
 598                         (void) strcpy((s = buf), "%");
 599                         break;
 600                 default:
 601                         s = buf;
 602                         buf[0] = '%';
 603                         buf[1] = c;
 604                         buf[2] = '\0';
 605                         break;
 606                 }
 607                 len = (int)strlen(s);
 608                 if ((size -= len) <= 0)
 609                         return (ENAMETOOLONG);
 610                 (void) strcpy(fp, s);
 611                 /* strip trailing "/root/" from non-GZ zonepath string */
 612                 if (c == 'Z' && len > 6) {
 613                         len -= 6;
 614                         ASSERT(strncmp(fp + len, "/root/", 6) == 0);
 615                 }
 616                 fp += len;
 617         }
 618 
 619         *fp = '\0';
 620         return (0);
 621 }
 622 
 623 static int
 624 dump_one_core(int sig, rlim64_t rlimit, enum core_types core_type,
 625     struct core_globals *cg, char **name)
 626 {
 627         refstr_t *rp;
 628         proc_t *p = curproc;
 629         zoneid_t zoneid;
 630         int error;
 631         char *fp;
 632         cred_t *cr;
 633 
 634         ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
 635         zoneid = (core_type == CORE_ZONE ? getzoneid() : GLOBAL_ZONEID);
 636 
 637         mutex_enter(&cg->core_lock);
 638         if ((rp = cg->core_file) != NULL)
 639                 refstr_hold(rp);
 640         mutex_exit(&cg->core_lock);
 641         if (rp == NULL) {
 642                 core_log(cg, 0, "no global core file pattern exists", NULL,
 643                     zoneid);
 644                 return (1);     /* core file not generated */
 645         }
 646         fp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 647         cr = zone_get_kcred(getzoneid());
 648         error = expand_string(refstr_value(rp), fp, MAXPATHLEN, cr);
 649         crfree(cr);
 650         if (error != 0) {
 651                 core_log(cg, 0, "global core file pattern too long",
 652                     refstr_value(rp), zoneid);
 653         } else if ((error = do_core(fp, sig, core_type, cg)) == 0) {
 654                 core_log(cg, 0, "core dumped", fp, zoneid);
 655         } else if (error == ENOTSUP) {
 656                 core_log(cg, 0, "setid process, core not dumped", fp, zoneid);
 657         } else if (error == ENOSPC) {
 658                 core_log(cg, 0, "no space left on device, core truncated",
 659                     fp, zoneid);
 660         } else if (error == EFBIG) {
 661                 if (rlimit == 0)
 662                         core_log(cg, 0, "core rlimit is zero, core not dumped",
 663                             fp, zoneid);
 664                 else
 665                         core_log(cg, 0, "core rlimit exceeded, core truncated",
 666                             fp, zoneid);
 667                 /*
 668                  * In addition to the core result logging, we
 669                  * may also have explicit actions defined on
 670                  * core file size violations via the resource
 671                  * control framework.
 672                  */
 673                 mutex_enter(&p->p_lock);
 674                 (void) rctl_action(rctlproc_legacy[RLIMIT_CORE],
 675                     p->p_rctls, p, RCA_SAFE);
 676                 mutex_exit(&p->p_lock);
 677         } else {
 678                 core_log(cg, error, "core dump failed", fp, zoneid);
 679         }
 680         refstr_rele(rp);
 681         if (name != NULL)
 682                 *name = fp;
 683         else
 684                 kmem_free(fp, MAXPATHLEN);
 685         return (error);
 686 }
 687 
 688 int
 689 core(int sig, int ext)
 690 {
 691         proc_t *p = curproc;
 692         klwp_t *lwp = ttolwp(curthread);
 693         refstr_t *rp;
 694         char *fp_process = NULL, *fp_global = NULL, *fp_zone = NULL;
 695         int error1 = 1;
 696         int error2 = 1;
 697         int error3 = 1;
 698         k_sigset_t sigmask;
 699         k_sigset_t sighold;
 700         rlim64_t rlimit;
 701         struct core_globals *my_cg, *global_cg;
 702 
 703         global_cg = zone_getspecific(core_zone_key, global_zone);
 704         ASSERT(global_cg != NULL);
 705 
 706         my_cg = zone_getspecific(core_zone_key, curproc->p_zone);
 707         ASSERT(my_cg != NULL);
 708 
 709         /* core files suppressed? */
 710         if (!(my_cg->core_options & (CC_PROCESS_PATH|CC_GLOBAL_PATH)) &&
 711             !(global_cg->core_options & CC_GLOBAL_PATH)) {
 712                 if (!ext && p->p_ct_process != NULL)
 713                         contract_process_core(p->p_ct_process, p, sig,
 714                             NULL, NULL, NULL);
 715                 return (1);
 716         }
 717 
 718         /*
 719          * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM; no
 720          * other signal may interrupt a core dump.  For each signal, we
 721          * explicitly unblock it and set it in p_siginfo to allow for some
 722          * minimal error reporting.  Additionally, we get the current limit on
 723          * core file size for handling later error reporting.
 724          */
 725         mutex_enter(&p->p_lock);
 726 
 727         p->p_flag |= SDOCORE;
 728         schedctl_finish_sigblock(curthread);
 729         sigmask = curthread->t_hold; /* remember for later */
 730         sigfillset(&sighold);
 731         if (!sigismember(&sigmask, SIGHUP))
 732                 sigdelset(&sighold, SIGHUP);
 733         if (!sigismember(&sigmask, SIGINT))
 734                 sigdelset(&sighold, SIGINT);
 735         if (!sigismember(&sigmask, SIGKILL))
 736                 sigdelset(&sighold, SIGKILL);
 737         if (!sigismember(&sigmask, SIGTERM))
 738                 sigdelset(&sighold, SIGTERM);
 739 
 740         sigaddset(&p->p_siginfo, SIGHUP);
 741         sigaddset(&p->p_siginfo, SIGINT);
 742         sigaddset(&p->p_siginfo, SIGKILL);
 743         sigaddset(&p->p_siginfo, SIGTERM);
 744 
 745         curthread->t_hold = sighold;
 746 
 747         rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE], p->p_rctls,
 748             p);
 749 
 750         mutex_exit(&p->p_lock);
 751 
 752         /*
 753          * Undo any watchpoints.
 754          */
 755         pr_free_watched_pages(p);
 756 
 757         /*
 758          * The presence of a current signal prevents file i/o
 759          * from succeeding over a network.  We copy the current
 760          * signal information to the side and cancel the current
 761          * signal so that the core dump will succeed.
 762          */
 763         ASSERT(lwp->lwp_cursig == sig);
 764         lwp->lwp_cursig = 0;
 765         lwp->lwp_extsig = 0;
 766         if (lwp->lwp_curinfo == NULL) {
 767                 bzero(&lwp->lwp_siginfo, sizeof (k_siginfo_t));
 768                 lwp->lwp_siginfo.si_signo = sig;
 769                 lwp->lwp_siginfo.si_code = SI_NOINFO;
 770         } else {
 771                 bcopy(&lwp->lwp_curinfo->sq_info,
 772                     &lwp->lwp_siginfo, sizeof (k_siginfo_t));
 773                 siginfofree(lwp->lwp_curinfo);
 774                 lwp->lwp_curinfo = NULL;
 775         }
 776 
 777         /*
 778          * Convert the core file name patterns into path names
 779          * and call do_core() to write the core files.
 780          */
 781 
 782         if (my_cg->core_options & CC_PROCESS_PATH) {
 783                 mutex_enter(&p->p_lock);
 784                 if (p->p_corefile != NULL)
 785                         rp = corectl_path_value(p->p_corefile);
 786                 else
 787                         rp = NULL;
 788                 mutex_exit(&p->p_lock);
 789                 if (rp != NULL) {
 790                         fp_process = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 791                         error1 = expand_string(refstr_value(rp),
 792                             fp_process, MAXPATHLEN, p->p_cred);
 793                         if (error1 == 0)
 794                                 error1 = do_core(fp_process, sig, CORE_PROC,
 795                                     my_cg);
 796                         refstr_rele(rp);
 797                 }
 798         }
 799 
 800         if (my_cg->core_options & CC_GLOBAL_PATH)
 801                 error2 = dump_one_core(sig, rlimit, CORE_ZONE, my_cg,
 802                     &fp_global);
 803         if (global_cg != my_cg && (global_cg->core_options & CC_GLOBAL_PATH))
 804                 error3 = dump_one_core(sig, rlimit, CORE_GLOBAL, global_cg,
 805                     &fp_zone);
 806 
 807         /*
 808          * Restore the signal hold mask.
 809          */
 810         mutex_enter(&p->p_lock);
 811         curthread->t_hold = sigmask;
 812         mutex_exit(&p->p_lock);
 813 
 814         if (!ext && p->p_ct_process != NULL)
 815                 contract_process_core(p->p_ct_process, p, sig,
 816                     error1 == 0 ? fp_process : NULL,
 817                     error2 == 0 ? fp_global : NULL,
 818                     error3 == 0 ? fp_zone : NULL);
 819 
 820         /*
 821          * FMA ereport is currently generated only for global zone cores
 822          * with global path.
 823          */
 824         if (error2 == 0 && global_cg == my_cg)
 825                 gen_ereport(fp_global, sig);
 826 
 827         if (fp_process != NULL)
 828                 kmem_free(fp_process, MAXPATHLEN);
 829         if (fp_global != NULL)
 830                 kmem_free(fp_global, MAXPATHLEN);
 831         if (fp_zone != NULL)
 832                 kmem_free(fp_zone, MAXPATHLEN);
 833 
 834         /*
 835          * Return non-zero if no core file was created.
 836          */
 837         return (error1 != 0 && error2 != 0 && error3 != 0);
 838 }
 839 
 840 /*
 841  * Maximum chunk size for dumping core files,
 842  * size in pages, patchable in /etc/system
 843  */
 844 uint_t  core_chunk = 32;
 845 
 846 /*
 847  * The delay between core_write() calls, in microseconds.  The default
 848  * matches one "normal" clock tick, or 10 milliseconds.
 849  */
 850 clock_t core_delay_usec = 10000;
 851 
 852 /*
 853  * Common code to core dump process memory.  The core_seg routine does i/o
 854  * using core_write() below, and so it has the same failure semantics.
 855  */
 856 int
 857 core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size,
 858     rlim64_t rlimit, cred_t *credp)
 859 {
 860         caddr_t eaddr;
 861         caddr_t base;
 862         size_t len;
 863         int err = 0;
 864 
 865         eaddr = addr + size;
 866         for (base = addr; base < eaddr; base += len) {
 867                 len = eaddr - base;
 868                 if (as_memory(p->p_as, &base, &len) != 0)
 869                         return (0);
 870 
 871                 /*
 872                  * Reduce len to a reasonable value so that we don't
 873                  * overwhelm the VM system with a monstrously large
 874                  * single write and cause pageout to stop running.
 875                  */
 876                 if (len > (size_t)core_chunk * PAGESIZE)
 877                         len = (size_t)core_chunk * PAGESIZE;
 878 
 879                 err = core_write(vp, UIO_USERSPACE,
 880                     offset + (size_t)(base - addr), base, len, rlimit, credp);
 881 
 882                 if (err)
 883                         return (err);
 884 
 885                 /*
 886                  * If we have taken a signal, return EINTR to allow the dump
 887                  * to be aborted.
 888                  */
 889                 if (issig(JUSTLOOKING) && issig(FORREAL))
 890                         return (EINTR);
 891         }
 892 
 893         return (0);
 894 }
 895 
 896 /*
 897  * Wrapper around vn_rdwr to perform writes to a core file.  For core files,
 898  * we always want to write as much as we possibly can, and then make sure to
 899  * return either 0 to the caller (for success), or the actual errno value.
 900  * By using this function, the caller can omit additional code for handling
 901  * retries and errors for partial writes returned by vn_rdwr.  If vn_rdwr
 902  * unexpectedly returns zero but no progress has been made, we return ENOSPC.
 903  */
 904 int
 905 core_write(vnode_t *vp, enum uio_seg segflg, offset_t offset,
 906     const void *buf, size_t len, rlim64_t rlimit, cred_t *credp)
 907 {
 908         ssize_t resid = len;
 909         int error = 0;
 910 
 911         while (len != 0) {
 912                 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, len, offset,
 913                     segflg, 0, rlimit, credp, &resid);
 914 
 915                 if (error != 0)
 916                         break;
 917 
 918                 if (resid >= len)
 919                         return (ENOSPC);
 920 
 921                 buf = (const char *)buf + len - resid;
 922                 offset += len - resid;
 923                 len = resid;
 924         }
 925 
 926         return (error);
 927 }