1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 /*
  29  * Copyright 2016 Joyent, Inc.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/signal.h>
  37 #include <sys/cred.h>
  38 #include <sys/user.h>
  39 #include <sys/errno.h>
  40 #include <sys/vnode.h>
  41 #include <sys/mman.h>
  42 #include <sys/kmem.h>
  43 #include <sys/proc.h>
  44 #include <sys/pathname.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/systm.h>
  47 #include <sys/elf.h>
  48 #include <sys/vmsystm.h>
  49 #include <sys/debug.h>
  50 #include <sys/auxv.h>
  51 #include <sys/exec.h>
  52 #include <sys/prsystm.h>
  53 #include <vm/as.h>
  54 #include <vm/rm.h>
  55 #include <vm/seg.h>
  56 #include <vm/seg_vn.h>
  57 #include <sys/modctl.h>
  58 #include <sys/systeminfo.h>
  59 #include <sys/vmparam.h>
  60 #include <sys/machelf.h>
  61 #include <sys/shm_impl.h>
  62 #include <sys/archsystm.h>
  63 #include <sys/fasttrap.h>
  64 #include <sys/brand.h>
  65 #include "elf_impl.h"
  66 #include <sys/sdt.h>
  67 #include <sys/siginfo.h>
  68 
  69 #if defined(__x86)
  70 #include <sys/comm_page_util.h>
  71 #endif /* defined(__x86) */
  72 
  73 
  74 extern int at_flags;
  75 
  76 #define ORIGIN_STR      "ORIGIN"
  77 #define ORIGIN_STR_SIZE 6
  78 
  79 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
  80 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
  81     ssize_t *);
  82 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
  83     ssize_t *, caddr_t *, ssize_t *);
  84 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
  85 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
  86     Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
  87     caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
  88 
  89 typedef enum {
  90         STR_CTF,
  91         STR_SYMTAB,
  92         STR_DYNSYM,
  93         STR_STRTAB,
  94         STR_DYNSTR,
  95         STR_SHSTRTAB,
  96         STR_NUM
  97 } shstrtype_t;
  98 
  99 static const char *shstrtab_data[] = {
 100         ".SUNW_ctf",
 101         ".symtab",
 102         ".dynsym",
 103         ".strtab",
 104         ".dynstr",
 105         ".shstrtab"
 106 };
 107 
 108 typedef struct shstrtab {
 109         int     sst_ndx[STR_NUM];
 110         int     sst_cur;
 111 } shstrtab_t;
 112 
 113 static void
 114 shstrtab_init(shstrtab_t *s)
 115 {
 116         bzero(&s->sst_ndx, sizeof (s->sst_ndx));
 117         s->sst_cur = 1;
 118 }
 119 
 120 static int
 121 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
 122 {
 123         int ret;
 124 
 125         if ((ret = s->sst_ndx[type]) != 0)
 126                 return (ret);
 127 
 128         ret = s->sst_ndx[type] = s->sst_cur;
 129         s->sst_cur += strlen(shstrtab_data[type]) + 1;
 130 
 131         return (ret);
 132 }
 133 
 134 static size_t
 135 shstrtab_size(const shstrtab_t *s)
 136 {
 137         return (s->sst_cur);
 138 }
 139 
 140 static void
 141 shstrtab_dump(const shstrtab_t *s, char *buf)
 142 {
 143         int i, ndx;
 144 
 145         *buf = '\0';
 146         for (i = 0; i < STR_NUM; i++) {
 147                 if ((ndx = s->sst_ndx[i]) != 0)
 148                         (void) strcpy(buf + ndx, shstrtab_data[i]);
 149         }
 150 }
 151 
 152 static int
 153 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 154 {
 155         ASSERT(phdrp->p_type == PT_SUNWDTRACE);
 156 
 157         /*
 158          * See the comment in fasttrap.h for information on how to safely
 159          * update this program header.
 160          */
 161         if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
 162             (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
 163                 return (-1);
 164 
 165         args->thrptr = phdrp->p_vaddr + base;
 166 
 167         return (0);
 168 }
 169 
 170 /*
 171  * Map in the executable pointed to by vp. Returns 0 on success.  Note that
 172  * this function currently has the maximum number of arguments allowed by
 173  * modstubs on x86 (MAXNARG)!  Do _not_ add to this function signature without
 174  * adding to MAXNARG.  (Better yet, do not add to this monster of a function
 175  * signature!)
 176  */
 177 int
 178 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
 179     intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
 180     caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
 181 {
 182         size_t          len;
 183         struct vattr    vat;
 184         caddr_t         phdrbase = NULL;
 185         ssize_t         phdrsize;
 186         int             nshdrs, shstrndx, nphdrs;
 187         int             error = 0;
 188         Phdr            *uphdr = NULL;
 189         Phdr            *junk = NULL;
 190         Phdr            *dynphdr = NULL;
 191         Phdr            *dtrphdr = NULL;
 192         char            *interp = NULL;
 193         uintptr_t       lddata;
 194         long            execsz;
 195         intptr_t        minaddr;
 196 
 197         if (lddatap != NULL)
 198                 *lddatap = NULL;
 199 
 200         if (minaddrp != NULL)
 201                 *minaddrp = NULL;
 202 
 203         if (error = execpermissions(vp, &vat, args)) {
 204                 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
 205                 return (error);
 206         }
 207 
 208         if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
 209             &nphdrs)) != 0 ||
 210             (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
 211             &phdrsize)) != 0) {
 212                 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
 213                 return (error);
 214         }
 215 
 216         if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
 217                 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
 218                 kmem_free(phdrbase, phdrsize);
 219                 return (ENOEXEC);
 220         }
 221         if (lddatap != NULL)
 222                 *lddatap = lddata;
 223 
 224         if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
 225             &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
 226             len, &execsz, brksize)) {
 227                 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
 228                 if (uphdr != NULL && uphdr->p_flags == 0)
 229                         kmem_free(uphdr, sizeof (Phdr));
 230                 kmem_free(phdrbase, phdrsize);
 231                 return (error);
 232         }
 233 
 234         if (minaddrp != NULL)
 235                 *minaddrp = minaddr;
 236 
 237         /*
 238          * If the executable requires an interpreter, determine its name.
 239          */
 240         if (dynphdr != NULL) {
 241                 ssize_t resid;
 242 
 243                 if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
 244                         uprintf("%s: Invalid interpreter\n", exec_file);
 245                         kmem_free(phdrbase, phdrsize);
 246                         return (ENOEXEC);
 247                 }
 248 
 249                 interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 250 
 251                 if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
 252                     (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
 253                     (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
 254                     interp[dynphdr->p_filesz - 1] != '\0') {
 255                         uprintf("%s: Cannot obtain interpreter pathname\n",
 256                             exec_file);
 257                         kmem_free(interp, MAXPATHLEN);
 258                         kmem_free(phdrbase, phdrsize);
 259                         return (error != 0 ? error : ENOEXEC);
 260                 }
 261         }
 262 
 263         /*
 264          * If this is a statically linked executable, voffset should indicate
 265          * the address of the executable itself (it normally holds the address
 266          * of the interpreter).
 267          */
 268         if (ehdr->e_type == ET_EXEC && interp == NULL)
 269                 *voffset = minaddr;
 270 
 271         /*
 272          * If the caller has asked for the interpreter name, return it (it's
 273          * up to the caller to free it); if the caller hasn't asked for it,
 274          * free it ourselves.
 275          */
 276         if (interpp != NULL) {
 277                 *interpp = interp;
 278         } else if (interp != NULL) {
 279                 kmem_free(interp, MAXPATHLEN);
 280         }
 281 
 282         if (uphdr != NULL) {
 283                 *uphdr_vaddr = uphdr->p_vaddr;
 284 
 285                 if (uphdr->p_flags == 0)
 286                         kmem_free(uphdr, sizeof (Phdr));
 287         } else if (ehdr->e_type == ET_DYN) {
 288                 /*
 289                  * If we don't have a uphdr, we'll apply the logic found
 290                  * in mapelfexec() and use the p_vaddr of the first PT_LOAD
 291                  * section as the base address of the object.
 292                  */
 293                 Phdr *phdr = (Phdr *)phdrbase;
 294                 int i, hsize = ehdr->e_phentsize;
 295 
 296                 for (i = nphdrs; i > 0; i--) {
 297                         if (phdr->p_type == PT_LOAD) {
 298                                 *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
 299                                     ehdr->e_phoff;
 300                                 break;
 301                         }
 302 
 303                         phdr = (Phdr *)((caddr_t)phdr + hsize);
 304                 }
 305 
 306                 /*
 307                  * If we don't have a PT_LOAD segment, we should have returned
 308                  * ENOEXEC when elfsize() returned 0, above.
 309                  */
 310                 VERIFY(i > 0);
 311         } else {
 312                 *uphdr_vaddr = (Addr)-1;
 313         }
 314 
 315         kmem_free(phdrbase, phdrsize);
 316         return (error);
 317 }
 318 
 319 /*ARGSUSED*/
 320 int
 321 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 322     int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
 323     int *brand_action)
 324 {
 325         caddr_t         phdrbase = NULL;
 326         caddr_t         bssbase = 0;
 327         caddr_t         brkbase = 0;
 328         size_t          brksize = 0;
 329         ssize_t         dlnsize, nsize = 0;
 330         aux_entry_t     *aux;
 331         int             error;
 332         ssize_t         resid;
 333         int             fd = -1;
 334         intptr_t        voffset;
 335         Phdr            *dyphdr = NULL;
 336         Phdr            *stphdr = NULL;
 337         Phdr            *uphdr = NULL;
 338         Phdr            *junk = NULL;
 339         size_t          len;
 340         ssize_t         phdrsize;
 341         int             postfixsize = 0;
 342         int             i, hsize;
 343         Phdr            *phdrp;
 344         Phdr            *dataphdrp = NULL;
 345         Phdr            *dtrphdr;
 346         Phdr            *capphdr = NULL;
 347         Cap             *cap = NULL;
 348         ssize_t         capsize;
 349         int             hasu = 0;
 350         int             hasauxv = 0;
 351         int             hasdy = 0;
 352         int             branded = 0;
 353         int             dynuphdr = 0;
 354 
 355         struct proc *p = ttoproc(curthread);
 356         struct user *up = PTOU(p);
 357         struct bigwad {
 358                 Ehdr    ehdr;
 359                 aux_entry_t     elfargs[__KERN_NAUXV_IMPL];
 360                 char            dl_name[MAXPATHLEN];
 361                 char            pathbuf[MAXPATHLEN];
 362                 struct vattr    vattr;
 363                 struct execenv  exenv;
 364         } *bigwad;      /* kmem_alloc this behemoth so we don't blow stack */
 365         Ehdr            *ehdrp;
 366         int             nshdrs, shstrndx, nphdrs;
 367         char            *dlnp;
 368         char            *pathbufp;
 369         rlim64_t        limit;
 370         rlim64_t        roundlimit;
 371 
 372         ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 373 
 374         bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 375         ehdrp = &bigwad->ehdr;
 376         dlnp = bigwad->dl_name;
 377         pathbufp = bigwad->pathbuf;
 378 
 379         /*
 380          * Obtain ELF and program header information.
 381          */
 382         if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
 383             &nphdrs)) != 0 ||
 384             (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
 385             &phdrsize)) != 0)
 386                 goto out;
 387 
 388         /*
 389          * Prevent executing an ELF file that has no entry point.
 390          */
 391         if (ehdrp->e_entry == 0) {
 392                 uprintf("%s: Bad entry point\n", exec_file);
 393                 goto bad;
 394         }
 395 
 396         /*
 397          * Put data model that we're exec-ing to into the args passed to
 398          * exec_args(), so it will know what it is copying to on new stack.
 399          * Now that we know whether we are exec-ing a 32-bit or 64-bit
 400          * executable, we can set execsz with the appropriate NCARGS.
 401          */
 402 #ifdef  _LP64
 403         if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
 404                 args->to_model = DATAMODEL_ILP32;
 405                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
 406         } else {
 407                 args->to_model = DATAMODEL_LP64;
 408                 if (!args->stk_prot_override) {
 409                         args->stk_prot &= ~PROT_EXEC;
 410                 }
 411 #if defined(__i386) || defined(__amd64)
 412                 args->dat_prot &= ~PROT_EXEC;
 413 #endif
 414                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
 415         }
 416 #else   /* _LP64 */
 417         args->to_model = DATAMODEL_ILP32;
 418         *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
 419 #endif  /* _LP64 */
 420 
 421         /*
 422          * We delay invoking the brand callback until we've figured out what
 423          * kind of elf binary we're trying to run, 32-bit or 64-bit.  We do this
 424          * because now the brand library can just check args->to_model to see if
 425          * the target is 32-bit or 64-bit without having do duplicate all the
 426          * code above.
 427          *
 428          * We also give the brand a chance to indicate that based on the ELF
 429          * OSABI of the target binary it should become unbranded and optionally
 430          * indicate that it should be treated as existing in a specific prefix.
 431          *
 432          * Note that if a brand opts to go down this route it does not actually
 433          * end up being debranded. In other words, future programs that exec
 434          * will still be considered for branding unless this escape hatch is
 435          * used. Consider the case of lx brand for example. If a user runs
 436          * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
 437          * of DTrace that's in /native will take this escape hatch and be run
 438          * and interpreted using the normal system call table; however, the
 439          * execution of a non-illumos binary in the form of /bin/ls will still
 440          * be branded and be subject to all of the normal actions of the brand.
 441          *
 442          * The level checks associated with brand handling below are used to
 443          * prevent a loop since the brand elfexec function typically comes back
 444          * through this function. We must check <= here since the nested
 445          * handling in the #! interpreter code will increment the level before
 446          * calling gexec to run the final elfexec interpreter.
 447          */
 448         if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
 449             (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
 450                 if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
 451                     &args->brand_nroot) == B_TRUE) {
 452                         ASSERT(ehdrp->e_ident[EI_OSABI]);
 453                         *brand_action = EBA_NATIVE;
 454                         /* Add one for the trailing '/' in the path */
 455                         if (args->brand_nroot != NULL)
 456                                 nsize = strlen(args->brand_nroot) + 1;
 457                 }
 458         }
 459 
 460         if ((level <= INTP_MAXDEPTH) &&
 461             (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 462                 error = BROP(p)->b_elfexec(vp, uap, args,
 463                     idatap, level + 1, execsz, setid, exec_file, cred,
 464                     brand_action);
 465                 goto out;
 466         }
 467 
 468         /*
 469          * Determine aux size now so that stack can be built
 470          * in one shot (except actual copyout of aux image),
 471          * determine any non-default stack protections,
 472          * and still have this code be machine independent.
 473          */
 474         hsize = ehdrp->e_phentsize;
 475         phdrp = (Phdr *)phdrbase;
 476         for (i = nphdrs; i > 0; i--) {
 477                 switch (phdrp->p_type) {
 478                 case PT_INTERP:
 479                         hasauxv = hasdy = 1;
 480                         break;
 481                 case PT_PHDR:
 482                         hasu = 1;
 483                         break;
 484                 case PT_SUNWSTACK:
 485                         args->stk_prot = PROT_USER;
 486                         if (phdrp->p_flags & PF_R)
 487                                 args->stk_prot |= PROT_READ;
 488                         if (phdrp->p_flags & PF_W)
 489                                 args->stk_prot |= PROT_WRITE;
 490                         if (phdrp->p_flags & PF_X)
 491                                 args->stk_prot |= PROT_EXEC;
 492                         break;
 493                 case PT_LOAD:
 494                         dataphdrp = phdrp;
 495                         break;
 496                 case PT_SUNWCAP:
 497                         capphdr = phdrp;
 498                         break;
 499                 }
 500                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 501         }
 502 
 503         if (ehdrp->e_type != ET_EXEC) {
 504                 dataphdrp = NULL;
 505                 hasauxv = 1;
 506         }
 507 
 508         /* Copy BSS permissions to args->dat_prot */
 509         if (dataphdrp != NULL) {
 510                 args->dat_prot = PROT_USER;
 511                 if (dataphdrp->p_flags & PF_R)
 512                         args->dat_prot |= PROT_READ;
 513                 if (dataphdrp->p_flags & PF_W)
 514                         args->dat_prot |= PROT_WRITE;
 515                 if (dataphdrp->p_flags & PF_X)
 516                         args->dat_prot |= PROT_EXEC;
 517         }
 518 
 519         /*
 520          * If a auxvector will be required - reserve the space for
 521          * it now.  This may be increased by exec_args if there are
 522          * ISA-specific types (included in __KERN_NAUXV_IMPL).
 523          */
 524         if (hasauxv) {
 525                 /*
 526                  * If a AUX vector is being built - the base AUX
 527                  * entries are:
 528                  *
 529                  *      AT_BASE
 530                  *      AT_FLAGS
 531                  *      AT_PAGESZ
 532                  *      AT_RANDOM
 533                  *      AT_SUN_AUXFLAGS
 534                  *      AT_SUN_HWCAP
 535                  *      AT_SUN_HWCAP2
 536                  *      AT_SUN_PLATFORM (added in stk_copyout)
 537                  *      AT_SUN_EXECNAME (added in stk_copyout)
 538                  *      AT_NULL
 539                  *
 540                  * total == 10
 541                  */
 542                 if (hasdy && hasu) {
 543                         /*
 544                          * Has PT_INTERP & PT_PHDR - the auxvectors that
 545                          * will be built are:
 546                          *
 547                          *      AT_PHDR
 548                          *      AT_PHENT
 549                          *      AT_PHNUM
 550                          *      AT_ENTRY
 551                          *      AT_LDDATA
 552                          *
 553                          * total = 5
 554                          */
 555                         args->auxsize = (10 + 5) * sizeof (aux_entry_t);
 556                 } else if (hasdy) {
 557                         /*
 558                          * Has PT_INTERP but no PT_PHDR
 559                          *
 560                          *      AT_EXECFD
 561                          *      AT_LDDATA
 562                          *
 563                          * total = 2
 564                          */
 565                         args->auxsize = (10 + 2) * sizeof (aux_entry_t);
 566                 } else {
 567                         args->auxsize = 10 * sizeof (aux_entry_t);
 568                 }
 569         } else {
 570                 args->auxsize = 0;
 571         }
 572 
 573         /*
 574          * If this binary is using an emulator, we need to add an
 575          * AT_SUN_EMULATOR aux entry.
 576          */
 577         if (args->emulator != NULL)
 578                 args->auxsize += sizeof (aux_entry_t);
 579 
 580         /*
 581          * If this is a native binary that's been given a modified interpreter
 582          * root, inform it that the native system exists at that root.
 583          */
 584         if (args->brand_nroot != NULL) {
 585                 args->auxsize += sizeof (aux_entry_t);
 586         }
 587 
 588 
 589         /*
 590          * On supported kernels (x86_64) make room in the auxv for the
 591          * AT_SUN_COMMPAGE entry.  This will go unpopulated on i86xpv systems
 592          * which do not provide such functionality.
 593          */
 594 #if defined(__amd64)
 595         args->auxsize += sizeof (aux_entry_t);
 596 #endif /* defined(__amd64) */
 597 
 598         /*
 599          * If we have user credentials, we'll supply the following entries:
 600          *      AT_SUN_UID
 601          *      AT_SUN_RUID
 602          *      AT_SUN_GID
 603          *      AT_SUN_RGID
 604          */
 605         if (cred != NULL) {
 606                 args->auxsize += 4 * sizeof (aux_entry_t);
 607         }
 608 
 609         if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 610                 branded = 1;
 611                 /*
 612                  * We will be adding 5 entries to the aux vectors.  One for
 613                  * the the brandname and 4 for the brand specific aux vectors.
 614                  */
 615                 args->auxsize += 5 * sizeof (aux_entry_t);
 616         }
 617 
 618         /* Hardware/Software capabilities */
 619         if (capphdr != NULL &&
 620             (capsize = capphdr->p_filesz) > 0 &&
 621             capsize <= 16 * sizeof (*cap)) {
 622                 int ncaps = capsize / sizeof (*cap);
 623                 Cap *cp;
 624 
 625                 cap = kmem_alloc(capsize, KM_SLEEP);
 626                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
 627                     capsize, (offset_t)capphdr->p_offset,
 628                     UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
 629                         uprintf("%s: Cannot read capabilities section\n",
 630                             exec_file);
 631                         goto out;
 632                 }
 633                 for (cp = cap; cp < cap + ncaps; cp++) {
 634                         if (cp->c_tag == CA_SUNW_SF_1 &&
 635                             (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
 636                                 if (args->to_model == DATAMODEL_LP64)
 637                                         args->addr32 = 1;
 638                                 break;
 639                         }
 640                 }
 641         }
 642 
 643         aux = bigwad->elfargs;
 644         /*
 645          * Move args to the user's stack.
 646          * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 647          */
 648         if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 649                 if (error == -1) {
 650                         error = ENOEXEC;
 651                         goto bad;
 652                 }
 653                 goto out;
 654         }
 655         /* we're single threaded after this point */
 656 
 657         /*
 658          * If this is an ET_DYN executable (shared object),
 659          * determine its memory size so that mapelfexec() can load it.
 660          */
 661         if (ehdrp->e_type == ET_DYN)
 662                 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
 663         else
 664                 len = 0;
 665 
 666         dtrphdr = NULL;
 667 
 668         if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
 669             &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
 670             len, execsz, &brksize)) != 0)
 671                 goto bad;
 672 
 673         if (uphdr != NULL) {
 674                 /*
 675                  * Our uphdr has been dynamically allocated if (and only if)
 676                  * its program header flags are clear.
 677                  */
 678                 dynuphdr = (uphdr->p_flags == 0);
 679         }
 680 
 681         if (uphdr != NULL && dyphdr == NULL)
 682                 goto bad;
 683 
 684         if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 685                 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
 686                 goto bad;
 687         }
 688 
 689         if (dyphdr != NULL) {
 690                 size_t          len;
 691                 uintptr_t       lddata;
 692                 char            *p;
 693                 struct vnode    *nvp;
 694 
 695                 dlnsize = dyphdr->p_filesz + nsize;
 696 
 697                 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
 698                         goto bad;
 699 
 700                 if (nsize != 0) {
 701                         bcopy(args->brand_nroot, dlnp, nsize - 1);
 702                         dlnp[nsize - 1] = '/';
 703                 }
 704 
 705                 /*
 706                  * Read in "interpreter" pathname.
 707                  */
 708                 if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
 709                     dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
 710                     0, (rlim64_t)0, CRED(), &resid)) != 0) {
 711                         uprintf("%s: Cannot obtain interpreter pathname\n",
 712                             exec_file);
 713                         goto bad;
 714                 }
 715 
 716                 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
 717                         goto bad;
 718 
 719                 /*
 720                  * Search for '$ORIGIN' token in interpreter path.
 721                  * If found, expand it.
 722                  */
 723                 for (p = dlnp; p = strchr(p, '$'); ) {
 724                         uint_t  len, curlen;
 725                         char    *_ptr;
 726 
 727                         if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
 728                                 continue;
 729 
 730                         /*
 731                          * We don't support $ORIGIN on setid programs to close
 732                          * a potential attack vector.
 733                          */
 734                         if ((setid & EXECSETID_SETID) != 0) {
 735                                 error = ENOEXEC;
 736                                 goto bad;
 737                         }
 738 
 739                         curlen = 0;
 740                         len = p - dlnp - 1;
 741                         if (len) {
 742                                 bcopy(dlnp, pathbufp, len);
 743                                 curlen += len;
 744                         }
 745                         if (_ptr = strrchr(args->pathname, '/')) {
 746                                 len = _ptr - args->pathname;
 747                                 if ((curlen + len) > MAXPATHLEN)
 748                                         break;
 749 
 750                                 bcopy(args->pathname, &pathbufp[curlen], len);
 751                                 curlen += len;
 752                         } else {
 753                                 /*
 754                                  * executable is a basename found in the
 755                                  * current directory.  So - just substitue
 756                                  * '.' for ORIGIN.
 757                                  */
 758                                 pathbufp[curlen] = '.';
 759                                 curlen++;
 760                         }
 761                         p += ORIGIN_STR_SIZE;
 762                         len = strlen(p);
 763 
 764                         if ((curlen + len) > MAXPATHLEN)
 765                                 break;
 766                         bcopy(p, &pathbufp[curlen], len);
 767                         curlen += len;
 768                         pathbufp[curlen++] = '\0';
 769                         bcopy(pathbufp, dlnp, curlen);
 770                 }
 771 
 772                 /*
 773                  * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
 774                  * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
 775                  * Just in case /usr is not mounted, change it now.
 776                  */
 777                 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
 778                         dlnp += 4;
 779                 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
 780                 if (error && dlnp != bigwad->dl_name) {
 781                         /* new kernel, old user-level */
 782                         error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
 783                             NULLVPP, &nvp);
 784                 }
 785                 if (error) {
 786                         uprintf("%s: Cannot find %s\n", exec_file, dlnp);
 787                         goto bad;
 788                 }
 789 
 790                 /*
 791                  * Setup the "aux" vector.
 792                  */
 793                 if (uphdr) {
 794                         if (ehdrp->e_type == ET_DYN) {
 795                                 /* don't use the first page */
 796                                 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
 797                                 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
 798                         } else {
 799                                 bigwad->exenv.ex_bssbase = bssbase;
 800                                 bigwad->exenv.ex_brkbase = brkbase;
 801                         }
 802                         bigwad->exenv.ex_brksize = brksize;
 803                         bigwad->exenv.ex_magic = elfmagic;
 804                         bigwad->exenv.ex_vp = vp;
 805                         setexecenv(&bigwad->exenv);
 806 
 807                         ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
 808                         ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
 809                         ADDAUX(aux, AT_PHNUM, nphdrs)
 810                         ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
 811                 } else {
 812                         if ((error = execopen(&vp, &fd)) != 0) {
 813                                 VN_RELE(nvp);
 814                                 goto bad;
 815                         }
 816 
 817                         ADDAUX(aux, AT_EXECFD, fd)
 818                 }
 819 
 820                 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
 821                         VN_RELE(nvp);
 822                         uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
 823                         goto bad;
 824                 }
 825 
 826                 /*
 827                  * Now obtain the ELF header along with the entire program
 828                  * header contained in "nvp".
 829                  */
 830                 kmem_free(phdrbase, phdrsize);
 831                 phdrbase = NULL;
 832                 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
 833                     &shstrndx, &nphdrs)) != 0 ||
 834                     (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
 835                     &phdrsize)) != 0) {
 836                         VN_RELE(nvp);
 837                         uprintf("%s: Cannot read %s\n", exec_file, dlnp);
 838                         goto bad;
 839                 }
 840 
 841                 /*
 842                  * Determine memory size of the "interpreter's" loadable
 843                  * sections.  This size is then used to obtain the virtual
 844                  * address of a hole, in the user's address space, large
 845                  * enough to map the "interpreter".
 846                  */
 847                 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
 848                         VN_RELE(nvp);
 849                         uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
 850                         goto bad;
 851                 }
 852 
 853                 dtrphdr = NULL;
 854 
 855                 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
 856                     &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
 857                     execsz, NULL);
 858 
 859                 if (error || junk != NULL) {
 860                         VN_RELE(nvp);
 861                         uprintf("%s: Cannot map %s\n", exec_file, dlnp);
 862                         goto bad;
 863                 }
 864 
 865                 /*
 866                  * We use the DTrace program header to initialize the
 867                  * architecture-specific user per-LWP location. The dtrace
 868                  * fasttrap provider requires ready access to per-LWP scratch
 869                  * space. We assume that there is only one such program header
 870                  * in the interpreter.
 871                  */
 872                 if (dtrphdr != NULL &&
 873                     dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 874                         VN_RELE(nvp);
 875                         uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
 876                         goto bad;
 877                 }
 878 
 879                 VN_RELE(nvp);
 880                 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
 881         }
 882 
 883         if (hasauxv) {
 884                 int auxf = AF_SUN_HWCAPVERIFY;
 885 
 886                 /*
 887                  * Note: AT_SUN_PLATFORM and AT_RANDOM were filled in via
 888                  * exec_args()
 889                  */
 890                 ADDAUX(aux, AT_BASE, voffset)
 891                 ADDAUX(aux, AT_FLAGS, at_flags)
 892                 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
 893                 /*
 894                  * Linker flags. (security)
 895                  * p_flag not yet set at this time.
 896                  * We rely on gexec() to provide us with the information.
 897                  * If the application is set-uid but this is not reflected
 898                  * in a mismatch between real/effective uids/gids, then
 899                  * don't treat this as a set-uid exec.  So we care about
 900                  * the EXECSETID_UGIDS flag but not the ...SETID flag.
 901                  */
 902                 if ((setid &= ~EXECSETID_SETID) != 0)
 903                         auxf |= AF_SUN_SETUGID;
 904 
 905                 /*
 906                  * If we're running a native process from within a branded
 907                  * zone under pfexec then we clear the AF_SUN_SETUGID flag so
 908                  * that the native ld.so.1 is able to link with the native
 909                  * libraries instead of using the brand libraries that are
 910                  * installed in the zone.  We only do this for processes
 911                  * which we trust because we see they are already running
 912                  * under pfexec (where uid != euid).  This prevents a
 913                  * malicious user within the zone from crafting a wrapper to
 914                  * run native suid commands with unsecure libraries interposed.
 915                  */
 916                 if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
 917                     (setid &= ~EXECSETID_SETID) != 0))
 918                         auxf &= ~AF_SUN_SETUGID;
 919 
 920                 /*
 921                  * Record the user addr of the auxflags aux vector entry
 922                  * since brands may optionally want to manipulate this field.
 923                  */
 924                 args->auxp_auxflags =
 925                     (char *)((char *)args->stackend +
 926                     ((char *)&aux->a_type -
 927                     (char *)bigwad->elfargs));
 928                 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
 929 
 930                 /*
 931                  * Record information about the real and effective user and
 932                  * group IDs.
 933                  */
 934                 if (cred != NULL) {
 935                         ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
 936                         ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
 937                         ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
 938                         ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
 939                 }
 940 
 941                 /*
 942                  * Hardware capability flag word (performance hints)
 943                  * Used for choosing faster library routines.
 944                  * (Potentially different between 32-bit and 64-bit ABIs)
 945                  */
 946 #if defined(_LP64)
 947                 if (args->to_model == DATAMODEL_NATIVE) {
 948                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 949                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 950                 } else {
 951                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
 952                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
 953                 }
 954 #else
 955                 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 956                 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 957 #endif
 958                 if (branded) {
 959                         /*
 960                          * Reserve space for the brand-private aux vectors,
 961                          * and record the user addr of that space.
 962                          */
 963                         args->auxp_brand =
 964                             (char *)((char *)args->stackend +
 965                             ((char *)&aux->a_type -
 966                             (char *)bigwad->elfargs));
 967                         ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
 968                         ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
 969                         ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
 970                         ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
 971                 }
 972 
 973                 /*
 974                  * Add the comm page auxv entry, mapping it in if needed.
 975                  */
 976 #if defined(__amd64)
 977                 if (args->commpage != NULL ||
 978                     (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
 979                         ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
 980                 } else {
 981                         /*
 982                          * If the comm page cannot be mapped, pad out the auxv
 983                          * to satisfy later size checks.
 984                          */
 985                         ADDAUX(aux, AT_NULL, 0)
 986                 }
 987 #endif /* defined(__amd64) */
 988 
 989                 ADDAUX(aux, AT_NULL, 0)
 990                 postfixsize = (char *)aux - (char *)bigwad->elfargs;
 991 
 992                 /*
 993                  * We make assumptions above when we determine how many aux
 994                  * vector entries we will be adding. However, if we have an
 995                  * invalid elf file, it is possible that mapelfexec might
 996                  * behave differently (but not return an error), in which case
 997                  * the number of aux entries we actually add will be different.
 998                  * We detect that now and error out.
 999                  */
1000                 if (postfixsize != args->auxsize) {
1001                         DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
1002                             int, args->auxsize);
1003                         goto bad;
1004                 }
1005                 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
1006         }
1007 
1008         /*
1009          * For the 64-bit kernel, the limit is big enough that rounding it up
1010          * to a page can overflow the 64-bit limit, so we check for btopr()
1011          * overflowing here by comparing it with the unrounded limit in pages.
1012          * If it hasn't overflowed, compare the exec size with the rounded up
1013          * limit in pages.  Otherwise, just compare with the unrounded limit.
1014          */
1015         limit = btop(p->p_vmem_ctl);
1016         roundlimit = btopr(p->p_vmem_ctl);
1017         if ((roundlimit > limit && *execsz > roundlimit) ||
1018             (roundlimit < limit && *execsz > limit)) {
1019                 mutex_enter(&p->p_lock);
1020                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
1021                     RCA_SAFE);
1022                 mutex_exit(&p->p_lock);
1023                 error = ENOMEM;
1024                 goto bad;
1025         }
1026 
1027         bzero(up->u_auxv, sizeof (up->u_auxv));
1028         up->u_commpagep = args->commpage;
1029         if (postfixsize) {
1030                 int num_auxv;
1031 
1032                 /*
1033                  * Copy the aux vector to the user stack.
1034                  */
1035                 error = execpoststack(args, bigwad->elfargs, postfixsize);
1036                 if (error)
1037                         goto bad;
1038 
1039                 /*
1040                  * Copy auxv to the process's user structure for use by /proc.
1041                  * If this is a branded process, the brand's exec routine will
1042                  * copy it's private entries to the user structure later. It
1043                  * relies on the fact that the blank entries are at the end.
1044                  */
1045                 num_auxv = postfixsize / sizeof (aux_entry_t);
1046                 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
1047                 aux = bigwad->elfargs;
1048                 for (i = 0; i < num_auxv; i++) {
1049                         up->u_auxv[i].a_type = aux[i].a_type;
1050                         up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
1051                 }
1052         }
1053 
1054         /*
1055          * Pass back the starting address so we can set the program counter.
1056          */
1057         args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
1058 
1059         if (!uphdr) {
1060                 if (ehdrp->e_type == ET_DYN) {
1061                         /*
1062                          * If we are executing a shared library which doesn't
1063                          * have a interpreter (probably ld.so.1) then
1064                          * we don't set the brkbase now.  Instead we
1065                          * delay it's setting until the first call
1066                          * via grow.c::brk().  This permits ld.so.1 to
1067                          * initialize brkbase to the tail of the executable it
1068                          * loads (which is where it needs to be).
1069                          */
1070                         bigwad->exenv.ex_brkbase = (caddr_t)0;
1071                         bigwad->exenv.ex_bssbase = (caddr_t)0;
1072                         bigwad->exenv.ex_brksize = 0;
1073                 } else {
1074                         bigwad->exenv.ex_brkbase = brkbase;
1075                         bigwad->exenv.ex_bssbase = bssbase;
1076                         bigwad->exenv.ex_brksize = brksize;
1077                 }
1078                 bigwad->exenv.ex_magic = elfmagic;
1079                 bigwad->exenv.ex_vp = vp;
1080                 setexecenv(&bigwad->exenv);
1081         }
1082 
1083         ASSERT(error == 0);
1084         goto out;
1085 
1086 bad:
1087         if (fd != -1)           /* did we open the a.out yet */
1088                 (void) execclose(fd);
1089 
1090         psignal(p, SIGKILL);
1091 
1092         if (error == 0)
1093                 error = ENOEXEC;
1094 out:
1095         if (dynuphdr)
1096                 kmem_free(uphdr, sizeof (Phdr));
1097         if (phdrbase != NULL)
1098                 kmem_free(phdrbase, phdrsize);
1099         if (cap != NULL)
1100                 kmem_free(cap, capsize);
1101         kmem_free(bigwad, sizeof (struct bigwad));
1102         return (error);
1103 }
1104 
1105 /*
1106  * Compute the memory size requirement for the ELF file.
1107  */
1108 static size_t
1109 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
1110 {
1111         size_t  len;
1112         Phdr    *phdrp = (Phdr *)phdrbase;
1113         int     hsize = ehdrp->e_phentsize;
1114         int     first = 1;
1115         int     dfirst = 1;     /* first data segment */
1116         uintptr_t loaddr = 0;
1117         uintptr_t hiaddr = 0;
1118         uintptr_t lo, hi;
1119         int     i;
1120 
1121         for (i = nphdrs; i > 0; i--) {
1122                 if (phdrp->p_type == PT_LOAD) {
1123                         lo = phdrp->p_vaddr;
1124                         hi = lo + phdrp->p_memsz;
1125                         if (first) {
1126                                 loaddr = lo;
1127                                 hiaddr = hi;
1128                                 first = 0;
1129                         } else {
1130                                 if (loaddr > lo)
1131                                         loaddr = lo;
1132                                 if (hiaddr < hi)
1133                                         hiaddr = hi;
1134                         }
1135 
1136                         /*
1137                          * save the address of the first data segment
1138                          * of a object - used for the AT_SUNW_LDDATA
1139                          * aux entry.
1140                          */
1141                         if ((lddata != NULL) && dfirst &&
1142                             (phdrp->p_flags & PF_W)) {
1143                                 *lddata = lo;
1144                                 dfirst = 0;
1145                         }
1146                 }
1147                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
1148         }
1149 
1150         len = hiaddr - (loaddr & PAGEMASK);
1151         len = roundup(len, PAGESIZE);
1152 
1153         return (len);
1154 }
1155 
1156 /*
1157  * Read in the ELF header and program header table.
1158  * SUSV3 requires:
1159  *      ENOEXEC File format is not recognized
1160  *      EINVAL  Format recognized but execution not supported
1161  */
1162 static int
1163 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
1164     int *nphdrs)
1165 {
1166         int error;
1167         ssize_t resid;
1168 
1169         /*
1170          * We got here by the first two bytes in ident,
1171          * now read the entire ELF header.
1172          */
1173         if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
1174             sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
1175             (rlim64_t)0, credp, &resid)) != 0)
1176                 return (error);
1177 
1178         /*
1179          * Since a separate version is compiled for handling 32-bit and
1180          * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1181          * doesn't need to be able to deal with 32-bit ELF files.
1182          */
1183         if (resid != 0 ||
1184             ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1185             ehdr->e_ident[EI_MAG3] != ELFMAG3)
1186                 return (ENOEXEC);
1187 
1188         if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1189 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1190             ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1191 #else
1192             ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1193 #endif
1194             !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1195             ehdr->e_flags))
1196                 return (EINVAL);
1197 
1198         *nshdrs = ehdr->e_shnum;
1199         *shstrndx = ehdr->e_shstrndx;
1200         *nphdrs = ehdr->e_phnum;
1201 
1202         /*
1203          * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1204          * to read in the section header at index zero to acces the true
1205          * values for those fields.
1206          */
1207         if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1208             *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1209                 Shdr shdr;
1210 
1211                 if (ehdr->e_shoff == 0)
1212                         return (EINVAL);
1213 
1214                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1215                     sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1216                     (rlim64_t)0, credp, &resid)) != 0)
1217                         return (error);
1218 
1219                 if (*nshdrs == 0)
1220                         *nshdrs = shdr.sh_size;
1221                 if (*shstrndx == SHN_XINDEX)
1222                         *shstrndx = shdr.sh_link;
1223                 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1224                         *nphdrs = shdr.sh_info;
1225         }
1226 
1227         return (0);
1228 }
1229 
1230 #ifdef _ELF32_COMPAT
1231 extern size_t elf_nphdr_max;
1232 #else
1233 size_t elf_nphdr_max = 1000;
1234 #endif
1235 
1236 static int
1237 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1238     caddr_t *phbasep, ssize_t *phsizep)
1239 {
1240         ssize_t resid, minsize;
1241         int err;
1242 
1243         /*
1244          * Since we're going to be using e_phentsize to iterate down the
1245          * array of program headers, it must be 8-byte aligned or else
1246          * a we might cause a misaligned access. We use all members through
1247          * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1248          * e_phentsize must be at least large enough to include those
1249          * members.
1250          */
1251 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1252         minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1253 #else
1254         minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1255 #endif
1256         if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1257                 return (EINVAL);
1258 
1259         *phsizep = nphdrs * ehdr->e_phentsize;
1260 
1261         if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1262                 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1263                         return (ENOMEM);
1264         } else {
1265                 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1266         }
1267 
1268         if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1269             (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1270             credp, &resid)) != 0) {
1271                 kmem_free(*phbasep, *phsizep);
1272                 *phbasep = NULL;
1273                 return (err);
1274         }
1275 
1276         return (0);
1277 }
1278 
1279 #ifdef _ELF32_COMPAT
1280 extern size_t elf_nshdr_max;
1281 extern size_t elf_shstrtab_max;
1282 #else
1283 size_t elf_nshdr_max = 10000;
1284 size_t elf_shstrtab_max = 100 * 1024;
1285 #endif
1286 
1287 
1288 static int
1289 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1290     int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1291     char **shstrbasep, ssize_t *shstrsizep)
1292 {
1293         ssize_t resid, minsize;
1294         int err;
1295         Shdr *shdr;
1296 
1297         /*
1298          * Since we're going to be using e_shentsize to iterate down the
1299          * array of section headers, it must be 8-byte aligned or else
1300          * a we might cause a misaligned access. We use all members through
1301          * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1302          * must be at least large enough to include that member. The index
1303          * of the string table section must also be valid.
1304          */
1305         minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1306         if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1307             shstrndx >= nshdrs)
1308                 return (EINVAL);
1309 
1310         *shsizep = nshdrs * ehdr->e_shentsize;
1311 
1312         if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1313                 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1314                         return (ENOMEM);
1315         } else {
1316                 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1317         }
1318 
1319         if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1320             (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1321             credp, &resid)) != 0) {
1322                 kmem_free(*shbasep, *shsizep);
1323                 return (err);
1324         }
1325 
1326         /*
1327          * Pull the section string table out of the vnode; fail if the size
1328          * is zero.
1329          */
1330         shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1331         if ((*shstrsizep = shdr->sh_size) == 0) {
1332                 kmem_free(*shbasep, *shsizep);
1333                 return (EINVAL);
1334         }
1335 
1336         if (*shstrsizep > elf_shstrtab_max) {
1337                 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1338                     KM_NOSLEEP)) == NULL) {
1339                         kmem_free(*shbasep, *shsizep);
1340                         return (ENOMEM);
1341                 }
1342         } else {
1343                 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1344         }
1345 
1346         if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1347             (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1348             credp, &resid)) != 0) {
1349                 kmem_free(*shbasep, *shsizep);
1350                 kmem_free(*shstrbasep, *shstrsizep);
1351                 return (err);
1352         }
1353 
1354         /*
1355          * Make sure the strtab is null-terminated to make sure we
1356          * don't run off the end of the table.
1357          */
1358         (*shstrbasep)[*shstrsizep - 1] = '\0';
1359 
1360         return (0);
1361 }
1362 
1363 
1364 #ifdef _ELF32_COMPAT
1365 int
1366 elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1367     caddr_t *phbasep, ssize_t *phsizep)
1368 #else
1369 int
1370 elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
1371     caddr_t *phbasep, ssize_t *phsizep)
1372 #endif
1373 {
1374         int error, nshdrs, shstrndx;
1375 
1376         if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
1377             nphdrs)) != 0 ||
1378             (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
1379             phsizep)) != 0) {
1380                 return (error);
1381         }
1382         return (0);
1383 }
1384 
1385 
1386 static int
1387 mapelfexec(
1388         vnode_t *vp,
1389         Ehdr *ehdr,
1390         int nphdrs,
1391         caddr_t phdrbase,
1392         Phdr **uphdr,
1393         Phdr **dyphdr,
1394         Phdr **stphdr,
1395         Phdr **dtphdr,
1396         Phdr *dataphdrp,
1397         caddr_t *bssbase,
1398         caddr_t *brkbase,
1399         intptr_t *voffset,
1400         intptr_t *minaddr,
1401         size_t len,
1402         long *execsz,
1403         size_t *brksize)
1404 {
1405         Phdr *phdr;
1406         int i, prot, error, lastprot = 0;
1407         caddr_t addr = NULL;
1408         size_t zfodsz;
1409         int ptload = 0;
1410         int page;
1411         off_t offset;
1412         int hsize = ehdr->e_phentsize;
1413         caddr_t mintmp = (caddr_t)-1;
1414         uintptr_t lastaddr = NULL;
1415         extern int use_brk_lpg;
1416 
1417         if (ehdr->e_type == ET_DYN) {
1418                 caddr_t vaddr;
1419 
1420                 /*
1421                  * Despite the fact that mmapobj(2) refuses to load them, we
1422                  * need to support executing ET_DYN objects that have a
1423                  * non-NULL p_vaddr.  When found in the wild, these objects
1424                  * are likely to be due to an old (and largely obviated) Linux
1425                  * facility, prelink(8), that rewrites shared objects to
1426                  * prefer specific (disjoint) virtual address ranges.  (Yes,
1427                  * this is putatively for performance -- and yes, it has
1428                  * limited applicability, many edge conditions and grisly
1429                  * failure modes; even for Linux, it's insane.)  As ELF
1430                  * mandates that the PT_LOAD segments be in p_vaddr order, we
1431                  * find the lowest p_vaddr by finding the first PT_LOAD
1432                  * segment.
1433                  */
1434                 phdr = (Phdr *)phdrbase;
1435                 for (i = nphdrs; i > 0; i--) {
1436                         if (phdr->p_type == PT_LOAD) {
1437                                 addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
1438                                 break;
1439                         }
1440                         phdr = (Phdr *)((caddr_t)phdr + hsize);
1441                 }
1442 
1443                 /*
1444                  * We have a non-zero p_vaddr in the first PT_LOAD segment --
1445                  * presumably because we're directly executing a prelink(8)'d
1446                  * ld-linux.so.  While we could correctly execute such an
1447                  * object without locating it at its desired p_vaddr (it is,
1448                  * after all, still relocatable), our inner antiquarian
1449                  * derives a perverse pleasure in accommodating the steampunk
1450                  * prelink(8) contraption -- goggles on!
1451                  */
1452                 if ((vaddr = addr) != NULL) {
1453                         if (as_gap(curproc->p_as, len,
1454                             &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
1455                                 addr = NULL;
1456                         }
1457                 }
1458 
1459                 if (addr == NULL) {
1460                         /*
1461                          * We either have a NULL p_vaddr (the common case, by
1462                          * many orders of magnitude) or we have a non-NULL
1463                          * p_vaddr and we were unable to obtain the specified
1464                          * VA range (presumably because it's an illegal
1465                          * address).  Either way, obtain an address in which
1466                          * to map the interpreter.
1467                          */
1468                         map_addr(&addr, len, (offset_t)0, 1, 0);
1469                         if (addr == NULL)
1470                                 return (ENOMEM);
1471                 }
1472 
1473                 /*
1474                  * Our voffset is the difference between where we landed and
1475                  * where we wanted to be.
1476                  */
1477                 *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
1478         } else {
1479                 *voffset = 0;
1480         }
1481 
1482         phdr = (Phdr *)phdrbase;
1483         for (i = nphdrs; i > 0; i--) {
1484                 switch (phdr->p_type) {
1485                 case PT_LOAD:
1486                         ptload = 1;
1487                         prot = PROT_USER;
1488                         if (phdr->p_flags & PF_R)
1489                                 prot |= PROT_READ;
1490                         if (phdr->p_flags & PF_W)
1491                                 prot |= PROT_WRITE;
1492                         if (phdr->p_flags & PF_X)
1493                                 prot |= PROT_EXEC;
1494 
1495                         addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1496 
1497                         if ((*dyphdr != NULL) && uphdr != NULL &&
1498                             (*uphdr == NULL)) {
1499                                 /*
1500                                  * The PT_PHDR program header is, strictly
1501                                  * speaking, optional.  If we find that this
1502                                  * is missing, we will determine the location
1503                                  * of the program headers based on the address
1504                                  * of the lowest PT_LOAD segment (namely, this
1505                                  * one):  we subtract the p_offset to get to
1506                                  * the ELF header and then add back the program
1507                                  * header offset to get to the program headers.
1508                                  * We then cons up a Phdr that corresponds to
1509                                  * the (missing) PT_PHDR, setting the flags
1510                                  * to 0 to denote that this is artificial and
1511                                  * should (must) be freed by the caller.
1512                                  */
1513                                 Phdr *cons;
1514 
1515                                 cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
1516 
1517                                 cons->p_flags = 0;
1518                                 cons->p_type = PT_PHDR;
1519                                 cons->p_vaddr = ((uintptr_t)addr -
1520                                     phdr->p_offset) + ehdr->e_phoff;
1521 
1522                                 *uphdr = cons;
1523                         }
1524 
1525                         /*
1526                          * Keep track of the segment with the lowest starting
1527                          * address.
1528                          */
1529                         if (addr < mintmp)
1530                                 mintmp = addr;
1531 
1532                         /*
1533                          * Segments need not correspond to page boundaries:
1534                          * they are permitted to share a page.  If two PT_LOAD
1535                          * segments share the same page, and the permissions
1536                          * of the segments differ, the behavior is historically
1537                          * that the permissions of the latter segment are used
1538                          * for the page that the two segments share.  This is
1539                          * also historically a non-issue:  binaries generated
1540                          * by most anything will make sure that two PT_LOAD
1541                          * segments with differing permissions don't actually
1542                          * share any pages.  However, there exist some crazy
1543                          * things out there (including at least an obscure
1544                          * Portuguese teaching language called G-Portugol) that
1545                          * actually do the wrong thing and expect it to work:
1546                          * they have a segment with execute permission share
1547                          * a page with a subsequent segment that does not
1548                          * have execute permissions and expect the resulting
1549                          * shared page to in fact be executable.  To accommodate
1550                          * such broken link editors, we take advantage of a
1551                          * latitude explicitly granted to the loader:  it is
1552                          * permitted to make _any_ PT_LOAD segment executable
1553                          * (provided that it is readable or writable).  If we
1554                          * see that we're sharing a page and that the previous
1555                          * page was executable, we will add execute permissions
1556                          * to our segment.
1557                          */
1558                         if (btop(lastaddr) == btop((uintptr_t)addr) &&
1559                             (phdr->p_flags & (PF_R | PF_W)) &&
1560                             (lastprot & PROT_EXEC)) {
1561                                 prot |= PROT_EXEC;
1562                         }
1563 
1564                         lastaddr = (uintptr_t)addr + phdr->p_filesz;
1565                         lastprot = prot;
1566 
1567                         zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1568 
1569                         offset = phdr->p_offset;
1570                         if (((uintptr_t)offset & PAGEOFFSET) ==
1571                             ((uintptr_t)addr & PAGEOFFSET) &&
1572                             (!(vp->v_flag & VNOMAP))) {
1573                                 page = 1;
1574                         } else {
1575                                 page = 0;
1576                         }
1577 
1578                         /*
1579                          * Set the heap pagesize for OOB when the bss size
1580                          * is known and use_brk_lpg is not 0.
1581                          */
1582                         if (brksize != NULL && use_brk_lpg &&
1583                             zfodsz != 0 && phdr == dataphdrp &&
1584                             (prot & PROT_WRITE)) {
1585                                 size_t tlen = P2NPHASE((uintptr_t)addr +
1586                                     phdr->p_filesz, PAGESIZE);
1587 
1588                                 if (zfodsz > tlen) {
1589                                         curproc->p_brkpageszc =
1590                                             page_szc(map_pgsz(MAPPGSZ_HEAP,
1591                                             curproc, addr + phdr->p_filesz +
1592                                             tlen, zfodsz - tlen, 0));
1593                                 }
1594                         }
1595 
1596                         if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1597                             (prot & PROT_WRITE)) {
1598                                 uint_t  szc = curproc->p_brkpageszc;
1599                                 size_t pgsz = page_get_pagesize(szc);
1600                                 caddr_t ebss = addr + phdr->p_memsz;
1601                                 size_t extra_zfodsz;
1602 
1603                                 ASSERT(pgsz > PAGESIZE);
1604 
1605                                 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1606 
1607                                 if (error = execmap(vp, addr, phdr->p_filesz,
1608                                     zfodsz + extra_zfodsz, phdr->p_offset,
1609                                     prot, page, szc))
1610                                         goto bad;
1611                                 if (brksize != NULL)
1612                                         *brksize = extra_zfodsz;
1613                         } else {
1614                                 if (error = execmap(vp, addr, phdr->p_filesz,
1615                                     zfodsz, phdr->p_offset, prot, page, 0))
1616                                         goto bad;
1617                         }
1618 
1619                         if (bssbase != NULL && addr >= *bssbase &&
1620                             phdr == dataphdrp) {
1621                                 *bssbase = addr + phdr->p_filesz;
1622                         }
1623                         if (brkbase != NULL && addr >= *brkbase) {
1624                                 *brkbase = addr + phdr->p_memsz;
1625                         }
1626 
1627                         *execsz += btopr(phdr->p_memsz);
1628                         break;
1629 
1630                 case PT_INTERP:
1631                         /*
1632                          * The ELF specification is unequivocal about the
1633                          * PT_INTERP program header with respect to any PT_LOAD
1634                          * program header:  "If it is present, it must precede
1635                          * any loadable segment entry." Linux, however, makes
1636                          * no attempt to enforce this -- which has allowed some
1637                          * binary editing tools to get away with generating
1638                          * invalid ELF binaries in the respect that PT_INTERP
1639                          * occurs after the first PT_LOAD program header.  This
1640                          * is unfortunate (and of course, disappointing) but
1641                          * it's no worse than that: there is no reason that we
1642                          * can't process the PT_INTERP entry (if present) after
1643                          * one or more PT_LOAD entries.  We therefore
1644                          * deliberately do not check ptload here and always
1645                          * store dyphdr to be the PT_INTERP program header.
1646                          */
1647                         *dyphdr = phdr;
1648                         break;
1649 
1650                 case PT_SHLIB:
1651                         *stphdr = phdr;
1652                         break;
1653 
1654                 case PT_PHDR:
1655                         if (ptload || phdr->p_flags == 0)
1656                                 goto bad;
1657 
1658                         if (uphdr != NULL)
1659                                 *uphdr = phdr;
1660 
1661                         break;
1662 
1663                 case PT_NULL:
1664                 case PT_DYNAMIC:
1665                 case PT_NOTE:
1666                         break;
1667 
1668                 case PT_SUNWDTRACE:
1669                         if (dtphdr != NULL)
1670                                 *dtphdr = phdr;
1671                         break;
1672 
1673                 default:
1674                         break;
1675                 }
1676                 phdr = (Phdr *)((caddr_t)phdr + hsize);
1677         }
1678 
1679         if (minaddr != NULL) {
1680                 ASSERT(mintmp != (caddr_t)-1);
1681                 *minaddr = (intptr_t)mintmp;
1682         }
1683 
1684         return (0);
1685 bad:
1686         if (error == 0)
1687                 error = EINVAL;
1688         return (error);
1689 }
1690 
1691 int
1692 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1693     rlim64_t rlimit, cred_t *credp)
1694 {
1695         Note note;
1696         int error;
1697 
1698         bzero(¬e, sizeof (note));
1699         bcopy("CORE", note.name, 4);
1700         note.nhdr.n_type = type;
1701         /*
1702          * The System V ABI states that n_namesz must be the length of the
1703          * string that follows the Nhdr structure including the terminating
1704          * null. The ABI also specifies that sufficient padding should be
1705          * included so that the description that follows the name string
1706          * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1707          * respectively. However, since this change was not made correctly
1708          * at the time of the 64-bit port, both 32- and 64-bit binaries
1709          * descriptions are only guaranteed to begin on a 4-byte boundary.
1710          */
1711         note.nhdr.n_namesz = 5;
1712         note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1713 
1714         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e,
1715             sizeof (note), rlimit, credp))
1716                 return (error);
1717 
1718         *offsetp += sizeof (note);
1719 
1720         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1721             note.nhdr.n_descsz, rlimit, credp))
1722                 return (error);
1723 
1724         *offsetp += note.nhdr.n_descsz;
1725         return (0);
1726 }
1727 
1728 /*
1729  * Copy the section data from one vnode to the section of another vnode.
1730  */
1731 static void
1732 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1733     void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1734 {
1735         ssize_t resid;
1736         size_t len, n = src->sh_size;
1737         offset_t off = 0;
1738 
1739         while (n != 0) {
1740                 len = MIN(size, n);
1741                 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1742                     UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1743                     resid >= len ||
1744                     core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1745                     buf, len - resid, rlimit, credp) != 0) {
1746                         dst->sh_size = 0;
1747                         dst->sh_offset = 0;
1748                         return;
1749                 }
1750 
1751                 ASSERT(n >= len - resid);
1752 
1753                 n -= len - resid;
1754                 off += len - resid;
1755         }
1756 
1757         *doffset += src->sh_size;
1758 }
1759 
1760 #ifdef _ELF32_COMPAT
1761 extern size_t elf_datasz_max;
1762 #else
1763 size_t elf_datasz_max = 1 * 1024 * 1024;
1764 #endif
1765 
1766 /*
1767  * This function processes mappings that correspond to load objects to
1768  * examine their respective sections for elfcore(). It's called once with
1769  * v set to NULL to count the number of sections that we're going to need
1770  * and then again with v set to some allocated buffer that we fill in with
1771  * all the section data.
1772  */
1773 static int
1774 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1775     Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1776 {
1777         vnode_t *lastvp = NULL;
1778         struct seg *seg;
1779         int i, j;
1780         void *data = NULL;
1781         size_t datasz = 0;
1782         shstrtab_t shstrtab;
1783         struct as *as = p->p_as;
1784         int error = 0;
1785 
1786         if (v != NULL)
1787                 shstrtab_init(&shstrtab);
1788 
1789         i = 1;
1790         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1791                 uint_t prot;
1792                 vnode_t *mvp;
1793                 void *tmp = NULL;
1794                 caddr_t saddr = seg->s_base;
1795                 caddr_t naddr;
1796                 caddr_t eaddr;
1797                 size_t segsize;
1798 
1799                 Ehdr ehdr;
1800                 int nshdrs, shstrndx, nphdrs;
1801                 caddr_t shbase;
1802                 ssize_t shsize;
1803                 char *shstrbase;
1804                 ssize_t shstrsize;
1805 
1806                 Shdr *shdr;
1807                 const char *name;
1808                 size_t sz;
1809                 uintptr_t off;
1810 
1811                 int ctf_ndx = 0;
1812                 int symtab_ndx = 0;
1813 
1814                 /*
1815                  * Since we're just looking for text segments of load
1816                  * objects, we only care about the protection bits; we don't
1817                  * care about the actual size of the segment so we use the
1818                  * reserved size. If the segment's size is zero, there's
1819                  * something fishy going on so we ignore this segment.
1820                  */
1821                 if (seg->s_ops != &segvn_ops ||
1822                     SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1823                     mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1824                     (segsize = pr_getsegsize(seg, 1)) == 0)
1825                         continue;
1826 
1827                 eaddr = saddr + segsize;
1828                 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1829                 pr_getprot_done(&tmp);
1830 
1831                 /*
1832                  * Skip this segment unless the protection bits look like
1833                  * what we'd expect for a text segment.
1834                  */
1835                 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1836                         continue;
1837 
1838                 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1839                     &nphdrs) != 0 ||
1840                     getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1841                     &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1842                         continue;
1843 
1844                 off = ehdr.e_shentsize;
1845                 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1846                         Shdr *symtab = NULL, *strtab;
1847 
1848                         shdr = (Shdr *)(shbase + off);
1849 
1850                         if (shdr->sh_name >= shstrsize)
1851                                 continue;
1852 
1853                         name = shstrbase + shdr->sh_name;
1854 
1855                         if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1856                                 if ((content & CC_CONTENT_CTF) == 0 ||
1857                                     ctf_ndx != 0)
1858                                         continue;
1859 
1860                                 if (shdr->sh_link > 0 &&
1861                                     shdr->sh_link < nshdrs) {
1862                                         symtab = (Shdr *)(shbase +
1863                                             shdr->sh_link * ehdr.e_shentsize);
1864                                 }
1865 
1866                                 if (v != NULL && i < nv - 1) {
1867                                         if (shdr->sh_size > datasz &&
1868                                             shdr->sh_size <= elf_datasz_max) {
1869                                                 if (data != NULL)
1870                                                         kmem_free(data, datasz);
1871 
1872                                                 datasz = shdr->sh_size;
1873                                                 data = kmem_alloc(datasz,
1874                                                     KM_SLEEP);
1875                                         }
1876 
1877                                         v[i].sh_name = shstrtab_ndx(&shstrtab,
1878                                             STR_CTF);
1879                                         v[i].sh_addr = (Addr)(uintptr_t)saddr;
1880                                         v[i].sh_type = SHT_PROGBITS;
1881                                         v[i].sh_addralign = 4;
1882                                         *doffsetp = roundup(*doffsetp,
1883                                             v[i].sh_addralign);
1884                                         v[i].sh_offset = *doffsetp;
1885                                         v[i].sh_size = shdr->sh_size;
1886                                         if (symtab == NULL)  {
1887                                                 v[i].sh_link = 0;
1888                                         } else if (symtab->sh_type ==
1889                                             SHT_SYMTAB &&
1890                                             symtab_ndx != 0) {
1891                                                 v[i].sh_link =
1892                                                     symtab_ndx;
1893                                         } else {
1894                                                 v[i].sh_link = i + 1;
1895                                         }
1896 
1897                                         copy_scn(shdr, mvp, &v[i], vp,
1898                                             doffsetp, data, datasz, credp,
1899                                             rlimit);
1900                                 }
1901 
1902                                 ctf_ndx = i++;
1903 
1904                                 /*
1905                                  * We've already dumped the symtab.
1906                                  */
1907                                 if (symtab != NULL &&
1908                                     symtab->sh_type == SHT_SYMTAB &&
1909                                     symtab_ndx != 0)
1910                                         continue;
1911 
1912                         } else if (strcmp(name,
1913                             shstrtab_data[STR_SYMTAB]) == 0) {
1914                                 if ((content & CC_CONTENT_SYMTAB) == 0 ||
1915                                     symtab != 0)
1916                                         continue;
1917 
1918                                 symtab = shdr;
1919                         }
1920 
1921                         if (symtab != NULL) {
1922                                 if ((symtab->sh_type != SHT_DYNSYM &&
1923                                     symtab->sh_type != SHT_SYMTAB) ||
1924                                     symtab->sh_link == 0 ||
1925                                     symtab->sh_link >= nshdrs)
1926                                         continue;
1927 
1928                                 strtab = (Shdr *)(shbase +
1929                                     symtab->sh_link * ehdr.e_shentsize);
1930 
1931                                 if (strtab->sh_type != SHT_STRTAB)
1932                                         continue;
1933 
1934                                 if (v != NULL && i < nv - 2) {
1935                                         sz = MAX(symtab->sh_size,
1936                                             strtab->sh_size);
1937                                         if (sz > datasz &&
1938                                             sz <= elf_datasz_max) {
1939                                                 if (data != NULL)
1940                                                         kmem_free(data, datasz);
1941 
1942                                                 datasz = sz;
1943                                                 data = kmem_alloc(datasz,
1944                                                     KM_SLEEP);
1945                                         }
1946 
1947                                         if (symtab->sh_type == SHT_DYNSYM) {
1948                                                 v[i].sh_name = shstrtab_ndx(
1949                                                     &shstrtab, STR_DYNSYM);
1950                                                 v[i + 1].sh_name = shstrtab_ndx(
1951                                                     &shstrtab, STR_DYNSTR);
1952                                         } else {
1953                                                 v[i].sh_name = shstrtab_ndx(
1954                                                     &shstrtab, STR_SYMTAB);
1955                                                 v[i + 1].sh_name = shstrtab_ndx(
1956                                                     &shstrtab, STR_STRTAB);
1957                                         }
1958 
1959                                         v[i].sh_type = symtab->sh_type;
1960                                         v[i].sh_addr = symtab->sh_addr;
1961                                         if (ehdr.e_type == ET_DYN ||
1962                                             v[i].sh_addr == 0)
1963                                                 v[i].sh_addr +=
1964                                                     (Addr)(uintptr_t)saddr;
1965                                         v[i].sh_addralign =
1966                                             symtab->sh_addralign;
1967                                         *doffsetp = roundup(*doffsetp,
1968                                             v[i].sh_addralign);
1969                                         v[i].sh_offset = *doffsetp;
1970                                         v[i].sh_size = symtab->sh_size;
1971                                         v[i].sh_link = i + 1;
1972                                         v[i].sh_entsize = symtab->sh_entsize;
1973                                         v[i].sh_info = symtab->sh_info;
1974 
1975                                         copy_scn(symtab, mvp, &v[i], vp,
1976                                             doffsetp, data, datasz, credp,
1977                                             rlimit);
1978 
1979                                         v[i + 1].sh_type = SHT_STRTAB;
1980                                         v[i + 1].sh_flags = SHF_STRINGS;
1981                                         v[i + 1].sh_addr = symtab->sh_addr;
1982                                         if (ehdr.e_type == ET_DYN ||
1983                                             v[i + 1].sh_addr == 0)
1984                                                 v[i + 1].sh_addr +=
1985                                                     (Addr)(uintptr_t)saddr;
1986                                         v[i + 1].sh_addralign =
1987                                             strtab->sh_addralign;
1988                                         *doffsetp = roundup(*doffsetp,
1989                                             v[i + 1].sh_addralign);
1990                                         v[i + 1].sh_offset = *doffsetp;
1991                                         v[i + 1].sh_size = strtab->sh_size;
1992 
1993                                         copy_scn(strtab, mvp, &v[i + 1], vp,
1994                                             doffsetp, data, datasz, credp,
1995                                             rlimit);
1996                                 }
1997 
1998                                 if (symtab->sh_type == SHT_SYMTAB)
1999                                         symtab_ndx = i;
2000                                 i += 2;
2001                         }
2002                 }
2003 
2004                 kmem_free(shstrbase, shstrsize);
2005                 kmem_free(shbase, shsize);
2006 
2007                 lastvp = mvp;
2008         }
2009 
2010         if (v == NULL) {
2011                 if (i == 1)
2012                         *nshdrsp = 0;
2013                 else
2014                         *nshdrsp = i + 1;
2015                 goto done;
2016         }
2017 
2018         if (i != nv - 1) {
2019                 cmn_err(CE_WARN, "elfcore: core dump failed for "
2020                     "process %d; address space is changing", p->p_pid);
2021                 error = EIO;
2022                 goto done;
2023         }
2024 
2025         v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
2026         v[i].sh_size = shstrtab_size(&shstrtab);
2027         v[i].sh_addralign = 1;
2028         *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
2029         v[i].sh_offset = *doffsetp;
2030         v[i].sh_flags = SHF_STRINGS;
2031         v[i].sh_type = SHT_STRTAB;
2032 
2033         if (v[i].sh_size > datasz) {
2034                 if (data != NULL)
2035                         kmem_free(data, datasz);
2036 
2037                 datasz = v[i].sh_size;
2038                 data = kmem_alloc(datasz,
2039                     KM_SLEEP);
2040         }
2041 
2042         shstrtab_dump(&shstrtab, data);
2043 
2044         if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
2045             data, v[i].sh_size, rlimit, credp)) != 0)
2046                 goto done;
2047 
2048         *doffsetp += v[i].sh_size;
2049 
2050 done:
2051         if (data != NULL)
2052                 kmem_free(data, datasz);
2053 
2054         return (error);
2055 }
2056 
2057 int
2058 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
2059     core_content_t content)
2060 {
2061         offset_t poffset, soffset;
2062         Off doffset;
2063         int error, i, nphdrs, nshdrs;
2064         int overflow = 0;
2065         struct seg *seg;
2066         struct as *as = p->p_as;
2067         union {
2068                 Ehdr ehdr;
2069                 Phdr phdr[1];
2070                 Shdr shdr[1];
2071         } *bigwad;
2072         size_t bigsize;
2073         size_t phdrsz, shdrsz;
2074         Ehdr *ehdr;
2075         Phdr *v;
2076         caddr_t brkbase;
2077         size_t brksize;
2078         caddr_t stkbase;
2079         size_t stksize;
2080         int ntries = 0;
2081         klwp_t *lwp = ttolwp(curthread);
2082 
2083 top:
2084         /*
2085          * Make sure we have everything we need (registers, etc.).
2086          * All other lwps have already stopped and are in an orderly state.
2087          */
2088         ASSERT(p == ttoproc(curthread));
2089         prstop(0, 0);
2090 
2091         AS_LOCK_ENTER(as, RW_WRITER);
2092         nphdrs = prnsegs(as, 0) + 2;            /* two CORE note sections */
2093 
2094         /*
2095          * Count the number of section headers we're going to need.
2096          */
2097         nshdrs = 0;
2098         if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
2099                 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
2100                     NULL, &nshdrs);
2101         }
2102         AS_LOCK_EXIT(as);
2103 
2104         ASSERT(nshdrs == 0 || nshdrs > 1);
2105 
2106         /*
2107          * The core file contents may required zero section headers, but if
2108          * we overflow the 16 bits allotted to the program header count in
2109          * the ELF header, we'll need that program header at index zero.
2110          */
2111         if (nshdrs == 0 && nphdrs >= PN_XNUM)
2112                 nshdrs = 1;
2113 
2114         phdrsz = nphdrs * sizeof (Phdr);
2115         shdrsz = nshdrs * sizeof (Shdr);
2116 
2117         bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
2118         bigwad = kmem_alloc(bigsize, KM_SLEEP);
2119 
2120         ehdr = &bigwad->ehdr;
2121         bzero(ehdr, sizeof (*ehdr));
2122 
2123         ehdr->e_ident[EI_MAG0] = ELFMAG0;
2124         ehdr->e_ident[EI_MAG1] = ELFMAG1;
2125         ehdr->e_ident[EI_MAG2] = ELFMAG2;
2126         ehdr->e_ident[EI_MAG3] = ELFMAG3;
2127         ehdr->e_ident[EI_CLASS] = ELFCLASS;
2128         ehdr->e_type = ET_CORE;
2129 
2130 #if !defined(_LP64) || defined(_ELF32_COMPAT)
2131 
2132 #if defined(__sparc)
2133         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2134         ehdr->e_machine = EM_SPARC;
2135 #elif defined(__i386) || defined(__i386_COMPAT)
2136         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2137         ehdr->e_machine = EM_386;
2138 #else
2139 #error "no recognized machine type is defined"
2140 #endif
2141 
2142 #else   /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2143 
2144 #if defined(__sparc)
2145         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
2146         ehdr->e_machine = EM_SPARCV9;
2147 #elif defined(__amd64)
2148         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
2149         ehdr->e_machine = EM_AMD64;
2150 #else
2151 #error "no recognized 64-bit machine type is defined"
2152 #endif
2153 
2154 #endif  /* !defined(_LP64) || defined(_ELF32_COMPAT) */
2155 
2156         /*
2157          * If the count of program headers or section headers or the index
2158          * of the section string table can't fit in the mere 16 bits
2159          * shortsightedly allotted to them in the ELF header, we use the
2160          * extended formats and put the real values in the section header
2161          * as index 0.
2162          */
2163         ehdr->e_version = EV_CURRENT;
2164         ehdr->e_ehsize = sizeof (Ehdr);
2165 
2166         if (nphdrs >= PN_XNUM)
2167                 ehdr->e_phnum = PN_XNUM;
2168         else
2169                 ehdr->e_phnum = (unsigned short)nphdrs;
2170 
2171         ehdr->e_phoff = sizeof (Ehdr);
2172         ehdr->e_phentsize = sizeof (Phdr);
2173 
2174         if (nshdrs > 0) {
2175                 if (nshdrs >= SHN_LORESERVE)
2176                         ehdr->e_shnum = 0;
2177                 else
2178                         ehdr->e_shnum = (unsigned short)nshdrs;
2179 
2180                 if (nshdrs - 1 >= SHN_LORESERVE)
2181                         ehdr->e_shstrndx = SHN_XINDEX;
2182                 else
2183                         ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
2184 
2185                 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
2186                 ehdr->e_shentsize = sizeof (Shdr);
2187         }
2188 
2189         if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
2190             sizeof (Ehdr), rlimit, credp))
2191                 goto done;
2192 
2193         poffset = sizeof (Ehdr);
2194         soffset = sizeof (Ehdr) + phdrsz;
2195         doffset = sizeof (Ehdr) + phdrsz + shdrsz;
2196 
2197         v = &bigwad->phdr[0];
2198         bzero(v, phdrsz);
2199 
2200         setup_old_note_header(&v[0], p);
2201         v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
2202         doffset += v[0].p_filesz;
2203 
2204         setup_note_header(&v[1], p);
2205         v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
2206         doffset += v[1].p_filesz;
2207 
2208         mutex_enter(&p->p_lock);
2209 
2210         brkbase = p->p_brkbase;
2211         brksize = p->p_brksize;
2212 
2213         stkbase = p->p_usrstack - p->p_stksize;
2214         stksize = p->p_stksize;
2215 
2216         mutex_exit(&p->p_lock);
2217 
2218         AS_LOCK_ENTER(as, RW_WRITER);
2219         i = 2;
2220         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
2221                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2222                 caddr_t saddr, naddr;
2223                 void *tmp = NULL;
2224                 extern struct seg_ops segspt_shmops;
2225 
2226                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2227                         uint_t prot;
2228                         size_t size;
2229                         int type;
2230                         vnode_t *mvp;
2231 
2232                         prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2233                         prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
2234                         if ((size = (size_t)(naddr - saddr)) == 0)
2235                                 continue;
2236                         if (i == nphdrs) {
2237                                 overflow++;
2238                                 continue;
2239                         }
2240                         v[i].p_type = PT_LOAD;
2241                         v[i].p_vaddr = (Addr)(uintptr_t)saddr;
2242                         v[i].p_memsz = size;
2243                         if (prot & PROT_READ)
2244                                 v[i].p_flags |= PF_R;
2245                         if (prot & PROT_WRITE)
2246                                 v[i].p_flags |= PF_W;
2247                         if (prot & PROT_EXEC)
2248                                 v[i].p_flags |= PF_X;
2249 
2250                         /*
2251                          * Figure out which mappings to include in the core.
2252                          */
2253                         type = SEGOP_GETTYPE(seg, saddr);
2254 
2255                         if (saddr == stkbase && size == stksize) {
2256                                 if (!(content & CC_CONTENT_STACK))
2257                                         goto exclude;
2258 
2259                         } else if (saddr == brkbase && size == brksize) {
2260                                 if (!(content & CC_CONTENT_HEAP))
2261                                         goto exclude;
2262 
2263                         } else if (seg->s_ops == &segspt_shmops) {
2264                                 if (type & MAP_NORESERVE) {
2265                                         if (!(content & CC_CONTENT_DISM))
2266                                                 goto exclude;
2267                                 } else {
2268                                         if (!(content & CC_CONTENT_ISM))
2269                                                 goto exclude;
2270                                 }
2271 
2272                         } else if (seg->s_ops != &segvn_ops) {
2273                                 goto exclude;
2274 
2275                         } else if (type & MAP_SHARED) {
2276                                 if (shmgetid(p, saddr) != SHMID_NONE) {
2277                                         if (!(content & CC_CONTENT_SHM))
2278                                                 goto exclude;
2279 
2280                                 } else if (SEGOP_GETVP(seg, seg->s_base,
2281                                     &mvp) != 0 || mvp == NULL ||
2282                                     mvp->v_type != VREG) {
2283                                         if (!(content & CC_CONTENT_SHANON))
2284                                                 goto exclude;
2285 
2286                                 } else {
2287                                         if (!(content & CC_CONTENT_SHFILE))
2288                                                 goto exclude;
2289                                 }
2290 
2291                         } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2292                             mvp == NULL || mvp->v_type != VREG) {
2293                                 if (!(content & CC_CONTENT_ANON))
2294                                         goto exclude;
2295 
2296                         } else if (prot == (PROT_READ | PROT_EXEC)) {
2297                                 if (!(content & CC_CONTENT_TEXT))
2298                                         goto exclude;
2299 
2300                         } else if (prot == PROT_READ) {
2301                                 if (!(content & CC_CONTENT_RODATA))
2302                                         goto exclude;
2303 
2304                         } else {
2305                                 if (!(content & CC_CONTENT_DATA))
2306                                         goto exclude;
2307                         }
2308 
2309                         doffset = roundup(doffset, sizeof (Word));
2310                         v[i].p_offset = doffset;
2311                         v[i].p_filesz = size;
2312                         doffset += size;
2313 exclude:
2314                         i++;
2315                 }
2316                 ASSERT(tmp == NULL);
2317         }
2318         AS_LOCK_EXIT(as);
2319 
2320         if (overflow || i != nphdrs) {
2321                 if (ntries++ == 0) {
2322                         kmem_free(bigwad, bigsize);
2323                         overflow = 0;
2324                         goto top;
2325                 }
2326                 cmn_err(CE_WARN, "elfcore: core dump failed for "
2327                     "process %d; address space is changing", p->p_pid);
2328                 error = EIO;
2329                 goto done;
2330         }
2331 
2332         if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2333             v, phdrsz, rlimit, credp)) != 0)
2334                 goto done;
2335 
2336         if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2337             credp)) != 0)
2338                 goto done;
2339 
2340         if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2341             credp, content)) != 0)
2342                 goto done;
2343 
2344         for (i = 2; i < nphdrs; i++) {
2345                 prkillinfo_t killinfo;
2346                 sigqueue_t *sq;
2347                 int sig, j;
2348 
2349                 if (v[i].p_filesz == 0)
2350                         continue;
2351 
2352                 /*
2353                  * If dumping out this segment fails, rather than failing
2354                  * the core dump entirely, we reset the size of the mapping
2355                  * to zero to indicate that the data is absent from the core
2356                  * file and or in the PF_SUNW_FAILURE flag to differentiate
2357                  * this from mappings that were excluded due to the core file
2358                  * content settings.
2359                  */
2360                 if ((error = core_seg(p, vp, v[i].p_offset,
2361                     (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2362                     rlimit, credp)) == 0) {
2363                         continue;
2364                 }
2365 
2366                 if ((sig = lwp->lwp_cursig) == 0) {
2367                         /*
2368                          * We failed due to something other than a signal.
2369                          * Since the space reserved for the segment is now
2370                          * unused, we stash the errno in the first four
2371                          * bytes. This undocumented interface will let us
2372                          * understand the nature of the failure.
2373                          */
2374                         (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2375                             &error, sizeof (error), rlimit, credp);
2376 
2377                         v[i].p_filesz = 0;
2378                         v[i].p_flags |= PF_SUNW_FAILURE;
2379                         if ((error = core_write(vp, UIO_SYSSPACE,
2380                             poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2381                             rlimit, credp)) != 0)
2382                                 goto done;
2383 
2384                         continue;
2385                 }
2386 
2387                 /*
2388                  * We took a signal.  We want to abort the dump entirely, but
2389                  * we also want to indicate what failed and why.  We therefore
2390                  * use the space reserved for the first failing segment to
2391                  * write our error (which, for purposes of compatability with
2392                  * older core dump readers, we set to EINTR) followed by any
2393                  * siginfo associated with the signal.
2394                  */
2395                 bzero(&killinfo, sizeof (killinfo));
2396                 killinfo.prk_error = EINTR;
2397 
2398                 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2399 
2400                 if (sq != NULL) {
2401                         bcopy(&sq->sq_info, &killinfo.prk_info,
2402                             sizeof (sq->sq_info));
2403                 } else {
2404                         killinfo.prk_info.si_signo = lwp->lwp_cursig;
2405                         killinfo.prk_info.si_code = SI_NOINFO;
2406                 }
2407 
2408 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2409                 /*
2410                  * If this is a 32-bit process, we need to translate from the
2411                  * native siginfo to the 32-bit variant.  (Core readers must
2412                  * always have the same data model as their target or must
2413                  * be aware of -- and compensate for -- data model differences.)
2414                  */
2415                 if (curproc->p_model == DATAMODEL_ILP32) {
2416                         siginfo32_t si32;
2417 
2418                         siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2419                         bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2420                 }
2421 #endif
2422 
2423                 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2424                     &killinfo, sizeof (killinfo), rlimit, credp);
2425 
2426                 /*
2427                  * For the segment on which we took the signal, indicate that
2428                  * its data now refers to a siginfo.
2429                  */
2430                 v[i].p_filesz = 0;
2431                 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2432                     PF_SUNW_SIGINFO;
2433 
2434                 /*
2435                  * And for every other segment, indicate that its absence
2436                  * is due to a signal.
2437                  */
2438                 for (j = i + 1; j < nphdrs; j++) {
2439                         v[j].p_filesz = 0;
2440                         v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2441                 }
2442 
2443                 /*
2444                  * Finally, write out our modified program headers.
2445                  */
2446                 if ((error = core_write(vp, UIO_SYSSPACE,
2447                     poffset + sizeof (v[i]) * i, &v[i],
2448                     sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2449                         goto done;
2450 
2451                 break;
2452         }
2453 
2454         if (nshdrs > 0) {
2455                 bzero(&bigwad->shdr[0], shdrsz);
2456 
2457                 if (nshdrs >= SHN_LORESERVE)
2458                         bigwad->shdr[0].sh_size = nshdrs;
2459 
2460                 if (nshdrs - 1 >= SHN_LORESERVE)
2461                         bigwad->shdr[0].sh_link = nshdrs - 1;
2462 
2463                 if (nphdrs >= PN_XNUM)
2464                         bigwad->shdr[0].sh_info = nphdrs;
2465 
2466                 if (nshdrs > 1) {
2467                         AS_LOCK_ENTER(as, RW_WRITER);
2468                         if ((error = process_scns(content, p, credp, vp,
2469                             &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2470                             NULL)) != 0) {
2471                                 AS_LOCK_EXIT(as);
2472                                 goto done;
2473                         }
2474                         AS_LOCK_EXIT(as);
2475                 }
2476 
2477                 if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2478                     &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2479                         goto done;
2480         }
2481 
2482 done:
2483         kmem_free(bigwad, bigsize);
2484         return (error);
2485 }
2486 
2487 #ifndef _ELF32_COMPAT
2488 
2489 static struct execsw esw = {
2490 #ifdef  _LP64
2491         elf64magicstr,
2492 #else   /* _LP64 */
2493         elf32magicstr,
2494 #endif  /* _LP64 */
2495         0,
2496         5,
2497         elfexec,
2498         elfcore
2499 };
2500 
2501 static struct modlexec modlexec = {
2502         &mod_execops, "exec module for elf", &esw
2503 };
2504 
2505 #ifdef  _LP64
2506 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2507                         intpdata_t *idatap, int level, long *execsz,
2508                         int setid, caddr_t exec_file, cred_t *cred,
2509                         int *brand_action);
2510 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2511                         rlim64_t rlimit, int sig, core_content_t content);
2512 
2513 static struct execsw esw32 = {
2514         elf32magicstr,
2515         0,
2516         5,
2517         elf32exec,
2518         elf32core
2519 };
2520 
2521 static struct modlexec modlexec32 = {
2522         &mod_execops, "32-bit exec module for elf", &esw32
2523 };
2524 #endif  /* _LP64 */
2525 
2526 static struct modlinkage modlinkage = {
2527         MODREV_1,
2528         (void *)&modlexec,
2529 #ifdef  _LP64
2530         (void *)&modlexec32,
2531 #endif  /* _LP64 */
2532         NULL
2533 };
2534 
2535 int
2536 _init(void)
2537 {
2538         return (mod_install(&modlinkage));
2539 }
2540 
2541 int
2542 _fini(void)
2543 {
2544         return (mod_remove(&modlinkage));
2545 }
2546 
2547 int
2548 _info(struct modinfo *modinfop)
2549 {
2550         return (mod_info(&modlinkage, modinfop));
2551 }
2552 
2553 #endif  /* !_ELF32_COMPAT */